From 025adbe8e58290798001b472aec3eb618d8fc930 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:20:04 -0700
Subject: tipc: Simplify log buffer resizing

This patch simplifies & standardizes the way TIPC's print buffer
log is resized.  Code to terminate use of the log buffer is
eliminated by simply setting the log buffer size to 0 bytes.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c |  2 +-
 net/tipc/core.c   |  4 ++--
 net/tipc/dbg.c    | 26 +++++++-------------------
 net/tipc/dbg.h    |  6 +++---
 4 files changed, 13 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index c71337a22d3..91d56f8fee9 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -529,7 +529,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		break;
 #endif
 	case TIPC_CMD_SET_LOG_SIZE:
-		rep_tlv_buf = tipc_log_resize(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space);
 		break;
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_log_dump();
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 740aac5cdfb..862d4154161 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -182,7 +182,7 @@ static int __init tipc_init(void)
 {
 	int res;
 
-	tipc_log_reinit(CONFIG_TIPC_LOG);
+	tipc_log_resize(CONFIG_TIPC_LOG);
 	info("Activated (version " TIPC_MOD_VER
 	     " compiled " __DATE__ " " __TIME__ ")\n");
 
@@ -209,7 +209,7 @@ static void __exit tipc_exit(void)
 	tipc_core_stop_net();
 	tipc_core_stop();
 	info("Deactivated\n");
-	tipc_log_stop();
+	tipc_log_resize(0);
 }
 
 module_init(tipc_init);
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index e809d2a2ce0..1a8d0a24d9e 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -350,43 +350,31 @@ void tipc_dump(struct print_buf *pb, const char *fmt, ...)
 }
 
 /**
- * tipc_log_stop - free up TIPC log print buffer
+ * tipc_log_resize - change the size of the TIPC log buffer
+ * @log_size: print buffer size to use
  */
 
-void tipc_log_stop(void)
+void tipc_log_resize(int log_size)
 {
 	spin_lock_bh(&print_lock);
 	if (TIPC_LOG->buf) {
 		kfree(TIPC_LOG->buf);
 		TIPC_LOG->buf = NULL;
 	}
-	spin_unlock_bh(&print_lock);
-}
-
-/**
- * tipc_log_reinit - (re)initialize TIPC log print buffer
- * @log_size: print buffer size to use
- */
-
-void tipc_log_reinit(int log_size)
-{
-	tipc_log_stop();
-
 	if (log_size) {
 		if (log_size < TIPC_PB_MIN_SIZE)
 			log_size = TIPC_PB_MIN_SIZE;
-		spin_lock_bh(&print_lock);
 		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC),
 				   log_size);
-		spin_unlock_bh(&print_lock);
 	}
+	spin_unlock_bh(&print_lock);
 }
 
 /**
- * tipc_log_resize - reconfigure size of TIPC log buffer
+ * tipc_log_resize_cmd - reconfigure size of TIPC log buffer
  */
 
-struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 value;
 
@@ -397,7 +385,7 @@ struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space)
 	if (value != delimit(value, 0, 32768))
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (log size must be 0-32768)");
-	tipc_log_reinit(value);
+	tipc_log_resize(value);
 	return tipc_cfg_reply_none();
 }
 
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index c01b085000e..718a5db245b 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -61,10 +61,10 @@ int  tipc_printbuf_empty(struct print_buf *pb);
 int  tipc_printbuf_validate(struct print_buf *pb);
 void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
 
-void tipc_log_reinit(int log_size);
-void tipc_log_stop(void);
+void tipc_log_resize(int log_size);
 
-struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space);
+struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
+				    int req_tlv_space);
 struct sk_buff *tipc_log_dump(void);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From fb98ec71c7f81b6db9b793aeb9d53823b6960d8b Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:20:42 -0700
Subject: tipc: Provide feedback when log buffer resizing fails

This patch provides feedback to the user when TIPC is unable
to set its log buffer to the requested size.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/dbg.c | 13 ++++++++++---
 net/tipc/dbg.h |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1a8d0a24d9e..c6806910598 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -2,7 +2,7 @@
  * net/tipc/dbg.c: TIPC print buffer routines for debugging
  *
  * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -354,8 +354,10 @@ void tipc_dump(struct print_buf *pb, const char *fmt, ...)
  * @log_size: print buffer size to use
  */
 
-void tipc_log_resize(int log_size)
+int tipc_log_resize(int log_size)
 {
+	int res = 0;
+
 	spin_lock_bh(&print_lock);
 	if (TIPC_LOG->buf) {
 		kfree(TIPC_LOG->buf);
@@ -366,8 +368,11 @@ void tipc_log_resize(int log_size)
 			log_size = TIPC_PB_MIN_SIZE;
 		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC),
 				   log_size);
+		res = !TIPC_LOG->buf;
 	}
 	spin_unlock_bh(&print_lock);
+
+	return res;
 }
 
 /**
@@ -385,7 +390,9 @@ struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space)
 	if (value != delimit(value, 0, 32768))
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (log size must be 0-32768)");
-	tipc_log_resize(value);
+	if (tipc_log_resize(value))
+		return tipc_cfg_reply_error_string(
+			"unable to create specified log (log size is now 0)");
 	return tipc_cfg_reply_none();
 }
 
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index 718a5db245b..6b00062bf04 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -2,7 +2,7 @@
  * net/tipc/dbg.h: Include file for TIPC print buffer routines
  *
  * Copyright (c) 1997-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,7 +61,7 @@ int  tipc_printbuf_empty(struct print_buf *pb);
 int  tipc_printbuf_validate(struct print_buf *pb);
 void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
 
-void tipc_log_resize(int log_size);
+int tipc_log_resize(int log_size);
 
 struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
 				    int req_tlv_space);
-- 
cgit v1.2.3-70-g09d2


From 93758c3da19e99f5377cc1413c27320882b18f4b Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:21:12 -0700
Subject: tipc: Fix recursive spinlock invocation in print buffer code

This patch fixes two routines that allow the global print buffer
spinlock to be taken recursively.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/dbg.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index c6806910598..834319e3b7e 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -179,8 +179,10 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
 	}
 
 	if (pb_to->size < pb_from->size) {
-		tipc_printbuf_reset(pb_to);
-		tipc_printf(pb_to, "*** PRINT BUFFER MOVE ERROR ***");
+		strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***");
+		pb_to->buf[pb_to->size - 1] = ~0;
+		pb_to->crs = strchr(pb_to->buf, 0);
+		pb_to->next = NULL;
 		return;
 	}
 
@@ -405,27 +407,32 @@ struct sk_buff *tipc_log_dump(void)
 	struct sk_buff *reply;
 
 	spin_lock_bh(&print_lock);
-	if (!TIPC_LOG->buf)
+	if (!TIPC_LOG->buf) {
+		spin_unlock_bh(&print_lock);
 		reply = tipc_cfg_reply_ultra_string("log not activated\n");
-	else if (tipc_printbuf_empty(TIPC_LOG))
+	} else if (tipc_printbuf_empty(TIPC_LOG)) {
+		spin_unlock_bh(&print_lock);
 		reply = tipc_cfg_reply_ultra_string("log is empty\n");
+	}
 	else {
 		struct tlv_desc *rep_tlv;
 		struct print_buf pb;
 		int str_len;
 
 		str_len = min(TIPC_LOG->size, 32768u);
+		spin_unlock_bh(&print_lock);
 		reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len));
 		if (reply) {
 			rep_tlv = (struct tlv_desc *)reply->data;
 			tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len);
+			spin_lock_bh(&print_lock);
 			tipc_printbuf_move(&pb, TIPC_LOG);
+			spin_unlock_bh(&print_lock);
 			str_len = strlen(TLV_DATA(rep_tlv)) + 1;
 			skb_put(reply, TLV_SPACE(str_len));
 			TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
 		}
 	}
-	spin_unlock_bh(&print_lock);
 	return reply;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 40dbfae440abe6860167f12e0296bd7a1a599839 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:21:54 -0700
Subject: tipc: Fix null pointer dereference in debug code

This patch eliminates an obsolete use of the DBG_OUTPUT print
buffer which could lead to a null pointer crash in tipc_printf()
if TIPC's debugging capabilities are configured.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 39fd1619feb..aecba5cd87d 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -41,9 +41,6 @@
 #include "msg.h"
 #include "name_distr.h"
 
-#undef  DBG_OUTPUT
-#define DBG_OUTPUT NULL
-
 #define ITEM_SIZE sizeof(struct distr_item)
 
 /**
-- 
cgit v1.2.3-70-g09d2


From c89039850bdf8047472b4ee6132048dacef2cf5a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:22:30 -0700
Subject: tipc: Elimination of print buffer chaining

This patch revamps TIPC's print buffer subsystem to eliminate
support for arbitrary chains of print buffers, which were
rarely needed and difficult to use safely.

In its place, print buffers can now be configured to echo their
output to the system console.  This provides an equivalent for
the only chaining currently utilized by TIPC, in a faster and
more compact manner.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h |   7 +--
 net/tipc/dbg.c  | 163 +++++++++++++++++++++++++-------------------------------
 net/tipc/dbg.h  |   4 +-
 3 files changed, 80 insertions(+), 94 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 325404fd4eb..7042ef37726 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,7 +66,6 @@
 
 struct tipc_msg;
 extern struct print_buf *TIPC_NULL, *TIPC_CONS, *TIPC_LOG;
-extern struct print_buf *TIPC_TEE(struct print_buf *, struct print_buf *);
 void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*);
 void tipc_printf(struct print_buf *, const char *fmt, ...);
 void tipc_dump(struct print_buf*,const char *fmt, ...);
@@ -98,11 +97,13 @@ void tipc_dump(struct print_buf*,const char *fmt, ...);
  * TIPC_CONS		   : system console
  * TIPC_LOG		   : TIPC log buffer
  * &buf			   : user-defined buffer (struct print_buf *)
- * TIPC_TEE(&buf_a,&buf_b) : list of buffers (eg. TIPC_TEE(TIPC_CONS,TIPC_LOG))
+ *
+ * Note: TIPC_LOG is configured to echo its output to the system console;
+ *       user-defined buffers can be configured to do the same thing.
  */
 
 #ifndef TIPC_OUTPUT
-#define TIPC_OUTPUT TIPC_TEE(TIPC_CONS,TIPC_LOG)
+#define TIPC_OUTPUT TIPC_LOG
 #endif
 
 #ifndef DBG_OUTPUT
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 834319e3b7e..8ca9457250b 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -38,18 +38,44 @@
 #include "config.h"
 #include "dbg.h"
 
-static char print_string[TIPC_PB_MAX_STR];
-static DEFINE_SPINLOCK(print_lock);
+/*
+ * TIPC pre-defines the following print buffers:
+ *
+ * TIPC_NULL : null buffer (i.e. print nowhere)
+ * TIPC_CONS : system console
+ * TIPC_LOG  : TIPC log buffer
+ *
+ * Additional user-defined print buffers are also permitted.
+ */
 
-static struct print_buf null_buf = { NULL, 0, NULL, NULL };
+static struct print_buf null_buf = { NULL, 0, NULL, 0 };
 struct print_buf *TIPC_NULL = &null_buf;
 
-static struct print_buf cons_buf = { NULL, 0, NULL, NULL };
+static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
 struct print_buf *TIPC_CONS = &cons_buf;
 
-static struct print_buf log_buf = { NULL, 0, NULL, NULL };
+static struct print_buf log_buf = { NULL, 0, NULL, 1 };
 struct print_buf *TIPC_LOG = &log_buf;
 
+/*
+ * Locking policy when using print buffers.
+ *
+ * 1) tipc_printf() uses 'print_lock' to protect against concurrent access to
+ * 'print_string' when writing to a print buffer. This also protects against
+ * concurrent writes to the print buffer being written to.
+ *
+ * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned
+ * use of 'print_lock' to protect against all types of concurrent operations
+ * on their associated print buffer (not just write operations).
+ *
+ * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely
+ * on the caller to prevent simultaneous use of the print buffer(s) being
+ * manipulated.
+ */
+
+static char print_string[TIPC_PB_MAX_STR];
+static DEFINE_SPINLOCK(print_lock);
+
 
 #define FORMAT(PTR,LEN,FMT) \
 {\
@@ -60,27 +86,14 @@ struct print_buf *TIPC_LOG = &log_buf;
        *(PTR + LEN) = '\0';\
 }
 
-/*
- * Locking policy when using print buffers.
- *
- * The following routines use 'print_lock' for protection:
- * 1) tipc_printf()  - to protect its print buffer(s) and 'print_string'
- * 2) TIPC_TEE()     - to protect its print buffer(s)
- * 3) tipc_dump()    - to protect its print buffer(s) and 'print_string'
- * 4) tipc_log_XXX() - to protect TIPC_LOG
- *
- * All routines of the form tipc_printbuf_XXX() rely on the caller to prevent
- * simultaneous use of the print buffer(s) being manipulated.
- */
-
 /**
  * tipc_printbuf_init - initialize print buffer to empty
  * @pb: pointer to print buffer structure
  * @raw: pointer to character array used by print buffer
  * @size: size of character array
  *
- * Makes the print buffer a null device that discards anything written to it
- * if the character array is too small (or absent).
+ * Note: If the character array is too small (or absent), the print buffer
+ * becomes a null device that discards anything written to it.
  */
 
 void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
@@ -88,7 +101,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
 	pb->buf = raw;
 	pb->crs = raw;
 	pb->size = size;
-	pb->next = NULL;
+	pb->echo = 0;
 
 	if (size < TIPC_PB_MIN_SIZE) {
 		pb->buf = NULL;
@@ -105,7 +118,11 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
 
 void tipc_printbuf_reset(struct print_buf *pb)
 {
-	tipc_printbuf_init(pb, pb->buf, pb->size);
+	if (pb->buf != NULL) {
+		pb->crs = pb->buf;
+		pb->buf[0] = 0;
+		pb->buf[pb->size - 1] = ~0;
+	}
 }
 
 /**
@@ -182,7 +199,6 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
 		strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***");
 		pb_to->buf[pb_to->size - 1] = ~0;
 		pb_to->crs = strchr(pb_to->buf, 0);
-		pb_to->next = NULL;
 		return;
 	}
 
@@ -205,8 +221,8 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
 }
 
 /**
- * tipc_printf - append formatted output to print buffer chain
- * @pb: pointer to chain of print buffers (may be NULL)
+ * tipc_printf - append formatted output to print buffer
+ * @pb: pointer to print buffer
  * @fmt: formatted info to be printed
  */
 
@@ -215,66 +231,36 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...)
 	int chars_to_add;
 	int chars_left;
 	char save_char;
-	struct print_buf *pb_next;
 
 	spin_lock_bh(&print_lock);
+
 	FORMAT(print_string, chars_to_add, fmt);
 	if (chars_to_add >= TIPC_PB_MAX_STR)
 		strcpy(print_string, "*** PRINT BUFFER STRING TOO LONG ***");
 
-	while (pb) {
-		if (pb == TIPC_CONS)
-			printk(print_string);
-		else if (pb->buf) {
-			chars_left = pb->buf + pb->size - pb->crs - 1;
-			if (chars_to_add <= chars_left) {
-				strcpy(pb->crs, print_string);
-				pb->crs += chars_to_add;
-			} else if (chars_to_add >= (pb->size - 1)) {
-				strcpy(pb->buf, print_string + chars_to_add + 1
-				       - pb->size);
-				pb->crs = pb->buf + pb->size - 1;
-			} else {
-				strcpy(pb->buf, print_string + chars_left);
-				save_char = print_string[chars_left];
-				print_string[chars_left] = 0;
-				strcpy(pb->crs, print_string);
-				print_string[chars_left] = save_char;
-				pb->crs = pb->buf + chars_to_add - chars_left;
-			}
+	if (pb->buf) {
+		chars_left = pb->buf + pb->size - pb->crs - 1;
+		if (chars_to_add <= chars_left) {
+			strcpy(pb->crs, print_string);
+			pb->crs += chars_to_add;
+		} else if (chars_to_add >= (pb->size - 1)) {
+			strcpy(pb->buf, print_string + chars_to_add + 1
+			       - pb->size);
+			pb->crs = pb->buf + pb->size - 1;
+		} else {
+			strcpy(pb->buf, print_string + chars_left);
+			save_char = print_string[chars_left];
+			print_string[chars_left] = 0;
+			strcpy(pb->crs, print_string);
+			print_string[chars_left] = save_char;
+			pb->crs = pb->buf + chars_to_add - chars_left;
 		}
-		pb_next = pb->next;
-		pb->next = NULL;
-		pb = pb_next;
 	}
-	spin_unlock_bh(&print_lock);
-}
 
-/**
- * TIPC_TEE - perform next output operation on both print buffers
- * @b0: pointer to chain of print buffers (may be NULL)
- * @b1: pointer to print buffer to add to chain
- *
- * Returns pointer to print buffer chain.
- */
-
-struct print_buf *TIPC_TEE(struct print_buf *b0, struct print_buf *b1)
-{
-	struct print_buf *pb = b0;
+	if (pb->echo)
+		printk(print_string);
 
-	if (!b0 || (b0 == b1))
-		return b1;
-
-	spin_lock_bh(&print_lock);
-	while (pb->next) {
-		if ((pb->next == b1) || (pb->next == b0))
-			pb->next = pb->next->next;
-		else
-			pb = pb->next;
-	}
-	pb->next = b1;
 	spin_unlock_bh(&print_lock);
-	return b0;
 }
 
 /**
@@ -323,31 +309,28 @@ static void printbuf_dump(struct print_buf *pb)
 }
 
 /**
- * tipc_dump - dump non-console print buffer(s) to console
- * @pb: pointer to chain of print buffers
+ * tipc_dump - dump (non-console) print buffer to console
+ * @pb: pointer to print buffer
  */
 
 void tipc_dump(struct print_buf *pb, const char *fmt, ...)
 {
-	struct print_buf *pb_next;
 	int len;
 
+	if (pb == TIPC_CONS)
+		return;
+
 	spin_lock_bh(&print_lock);
+
 	FORMAT(print_string, len, fmt);
 	printk(print_string);
 
-	for (; pb; pb = pb->next) {
-		if (pb != TIPC_CONS) {
-			printk("\n---- Start of %s log dump ----\n\n",
-			       (pb == TIPC_LOG) ? "global" : "local");
-			printbuf_dump(pb);
-			tipc_printbuf_reset(pb);
-			printk("\n---- End of dump ----\n");
-		}
-		pb_next = pb->next;
-		pb->next = NULL;
-		pb = pb_next;
-	}
+	printk("\n---- Start of %s log dump ----\n\n",
+	       (pb == TIPC_LOG) ? "global" : "local");
+	printbuf_dump(pb);
+	tipc_printbuf_reset(pb);
+	printk("\n---- End of dump ----\n");
+
 	spin_unlock_bh(&print_lock);
 }
 
@@ -368,8 +351,10 @@ int tipc_log_resize(int log_size)
 	if (log_size) {
 		if (log_size < TIPC_PB_MIN_SIZE)
 			log_size = TIPC_PB_MIN_SIZE;
+		res = TIPC_LOG->echo;
 		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC),
 				   log_size);
+		TIPC_LOG->echo = res;
 		res = !TIPC_LOG->buf;
 	}
 	spin_unlock_bh(&print_lock);
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index 6b00062bf04..5ef1bc8f64e 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -42,14 +42,14 @@
  * @buf: pointer to character array containing print buffer contents
  * @size: size of character array
  * @crs: pointer to first unused space in character array (i.e. final NUL)
- * @next: used to link print buffers when printing to more than one at a time
+ * @echo: echo output to system console if non-zero
  */
 
 struct print_buf {
 	char *buf;
 	u32 size;
 	char *crs;
-	struct print_buf *next;
+	int echo;
 };
 
 #define TIPC_PB_MIN_SIZE 64	/* minimum size for a print buffer's array */
-- 
cgit v1.2.3-70-g09d2


From 7d3aa71239f588215b5a7c359f05155b192d8081 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:22:59 -0700
Subject: [TIPC]: Cosmetic cleanup of print buffer code

This patch contains changes to make TIPC's print buffer code
conform more closely to Linux kernel coding guidelines.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h |  6 ++++--
 net/tipc/dbg.c  | 14 +++++++-------
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 7042ef37726..bc633552e9f 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
  * net/tipc/core.h: Include file for TIPC global declarations
  *
  * Copyright (c) 2005-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,7 +65,9 @@
 #define assert(i)  BUG_ON(!(i))
 
 struct tipc_msg;
-extern struct print_buf *TIPC_NULL, *TIPC_CONS, *TIPC_LOG;
+extern struct print_buf *const TIPC_NULL;
+extern struct print_buf *const TIPC_CONS;
+extern struct print_buf *const TIPC_LOG;
 void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*);
 void tipc_printf(struct print_buf *, const char *fmt, ...);
 void tipc_dump(struct print_buf*,const char *fmt, ...);
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 8ca9457250b..cda496815a0 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -49,13 +49,13 @@
  */
 
 static struct print_buf null_buf = { NULL, 0, NULL, 0 };
-struct print_buf *TIPC_NULL = &null_buf;
+struct print_buf *const TIPC_NULL = &null_buf;
 
 static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
-struct print_buf *TIPC_CONS = &cons_buf;
+struct print_buf *const TIPC_CONS = &cons_buf;
 
 static struct print_buf log_buf = { NULL, 0, NULL, 1 };
-struct print_buf *TIPC_LOG = &log_buf;
+struct print_buf *const TIPC_LOG = &log_buf;
 
 /*
  * Locking policy when using print buffers.
@@ -107,7 +107,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
 		pb->buf = NULL;
 	} else if (raw) {
 		pb->buf[0] = 0;
-		pb->buf[size-1] = ~0;
+		pb->buf[size - 1] = ~0;
 	}
 }
 
@@ -118,7 +118,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
 
 void tipc_printbuf_reset(struct print_buf *pb)
 {
-	if (pb->buf != NULL) {
+	if (pb->buf) {
 		pb->crs = pb->buf;
 		pb->buf[0] = 0;
 		pb->buf[pb->size - 1] = ~0;
@@ -158,7 +158,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
 
 	if (pb->buf[pb->size - 1] == 0) {
 		cp_buf = kmalloc(pb->size, GFP_ATOMIC);
-		if (cp_buf != NULL){
+		if (cp_buf) {
 			tipc_printbuf_init(&cb, cp_buf, pb->size);
 			tipc_printbuf_move(&cb, pb);
 			tipc_printbuf_move(pb, &cb);
@@ -205,7 +205,7 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
 	/* Copy data from char after cursor to end (if used) */
 
 	len = pb_from->buf + pb_from->size - pb_from->crs - 2;
-	if ((pb_from->buf[pb_from->size-1] == 0) && (len > 0)) {
+	if ((pb_from->buf[pb_from->size - 1] == 0) && (len > 0)) {
 		strcpy(pb_to->buf, pb_from->crs + 1);
 		pb_to->crs = pb_to->buf + len;
 	} else
-- 
cgit v1.2.3-70-g09d2


From 6063da9d74d4da812ae0d8f233b7e320e15765e3 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:23:34 -0700
Subject: tipc: Cosmetic cleanup of system & debug output declarations

This patch contains changes to make TIPC's system & debug
message declarations more readable.  Declarations have been
regrouped and recommented to make it easier to understand
what output is generated in both standard and debugging modes.
In addition, oversize lines have been fixed to respect the
80 character upper bound used in the kernel.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h | 117 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 67 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index bc633552e9f..d7f3b3c96b9 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,87 +59,104 @@
 #include <linux/vmalloc.h>
 
 /*
- * TIPC debugging code
+ * TIPC sanity test macros
  */
 
 #define assert(i)  BUG_ON(!(i))
 
-struct tipc_msg;
-extern struct print_buf *const TIPC_NULL;
-extern struct print_buf *const TIPC_CONS;
-extern struct print_buf *const TIPC_LOG;
-void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*);
-void tipc_printf(struct print_buf *, const char *fmt, ...);
-void tipc_dump(struct print_buf*,const char *fmt, ...);
-
-#ifdef CONFIG_TIPC_DEBUG
-
 /*
- * TIPC debug support included:
- * - system messages are printed to TIPC_OUTPUT print buffer
- * - debug messages are printed to DBG_OUTPUT print buffer
+ * TIPC system monitoring code
  */
 
-#define err(fmt, arg...)  tipc_printf(TIPC_OUTPUT, KERN_ERR "TIPC: " fmt, ## arg)
-#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_WARNING "TIPC: " fmt, ## arg)
-#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_NOTICE "TIPC: " fmt, ## arg)
-
-#define dbg(fmt, arg...)  do {if (DBG_OUTPUT != TIPC_NULL) tipc_printf(DBG_OUTPUT, fmt, ## arg);} while(0)
-#define msg_dbg(msg, txt) do {if (DBG_OUTPUT != TIPC_NULL) tipc_msg_print(DBG_OUTPUT, msg, txt);} while(0)
-#define dump(fmt, arg...) do {if (DBG_OUTPUT != TIPC_NULL) tipc_dump(DBG_OUTPUT, fmt, ##arg);} while(0)
-
-
 /*
- * By default, TIPC_OUTPUT is defined to be system console and TIPC log buffer,
- * while DBG_OUTPUT is the null print buffer.  These defaults can be changed
- * here, or on a per .c file basis, by redefining these symbols.  The following
- * print buffer options are available:
+ * TIPC's print buffer subsystem supports the following print buffers:
  *
- * TIPC_NULL		   : null buffer (i.e. print nowhere)
- * TIPC_CONS		   : system console
- * TIPC_LOG		   : TIPC log buffer
- * &buf			   : user-defined buffer (struct print_buf *)
+ * TIPC_NULL : null buffer (i.e. print nowhere)
+ * TIPC_CONS : system console
+ * TIPC_LOG  : TIPC log buffer
+ * &buf	     : user-defined buffer (struct print_buf *)
  *
  * Note: TIPC_LOG is configured to echo its output to the system console;
  *       user-defined buffers can be configured to do the same thing.
  */
 
+extern struct print_buf *const TIPC_NULL;
+extern struct print_buf *const TIPC_CONS;
+extern struct print_buf *const TIPC_LOG;
+
+void tipc_printf(struct print_buf *, const char *fmt, ...);
+void tipc_msg_print(struct print_buf *, struct tipc_msg *, const char *);
+void tipc_dump(struct print_buf *, const char *fmt, ...);
+
+/*
+ * TIPC_OUTPUT is the destination print buffer for system messages.
+ */
+
 #ifndef TIPC_OUTPUT
 #define TIPC_OUTPUT TIPC_LOG
 #endif
 
-#ifndef DBG_OUTPUT
-#define DBG_OUTPUT TIPC_NULL
-#endif
-
-#else
-
 /*
- * TIPC debug support not included:
- * - system messages are printed to system console
- * - debug messages are not printed
+ * TIPC can be configured to send system messages to TIPC_OUTPUT
+ * or to the system console only.
  */
 
+#ifdef CONFIG_TIPC_DEBUG
+
+#define err(fmt, arg...)  tipc_printf(TIPC_OUTPUT, \
+					KERN_ERR "TIPC: " fmt, ## arg)
+#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
+					KERN_WARNING "TIPC: " fmt, ## arg)
+#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
+					KERN_NOTICE "TIPC: " fmt, ## arg)
+
+#else
+
 #define err(fmt, arg...)  printk(KERN_ERR "TIPC: " fmt , ## arg)
 #define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg)
 #define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg)
 
-#define dbg(fmt, arg...) do {} while (0)
-#define msg_dbg(msg,txt) do {} while (0)
-#define dump(fmt,arg...) do {} while (0)
+#endif
 
+/*
+ * DBG_OUTPUT is the destination print buffer for debug messages.
+ * It defaults to the the null print buffer, but can be redefined
+ * (typically in the individual .c files being debugged) to allow
+ * selected debug messages to be generated where needed.
+ */
+
+#ifndef DBG_OUTPUT
+#define DBG_OUTPUT TIPC_NULL
+#endif
 
 /*
- * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is
- * the null print buffer.  Thes ensures that any system or debug messages
- * that are generated without using the above macros are handled correctly.
+ * TIPC can be configured to send debug messages to the specified print buffer
+ * (typically DBG_OUTPUT) or to suppress them entirely.
  */
 
-#undef  TIPC_OUTPUT
-#define TIPC_OUTPUT TIPC_CONS
+#ifdef CONFIG_TIPC_DEBUG
 
-#undef  DBG_OUTPUT
-#define DBG_OUTPUT TIPC_NULL
+#define dbg(fmt, arg...)  \
+	do { \
+		if (DBG_OUTPUT != TIPC_NULL) \
+			tipc_printf(DBG_OUTPUT, fmt, ## arg); \
+	} while (0)
+#define msg_dbg(msg, txt) \
+	do { \
+		if (DBG_OUTPUT != TIPC_NULL) \
+			tipc_msg_print(DBG_OUTPUT, msg, txt); \
+	} while (0)
+#define dump(fmt, arg...) \
+	do { \
+		if (DBG_OUTPUT != TIPC_NULL) \
+			tipc_dump(DBG_OUTPUT, fmt, ##arg); \
+	} while (0)
+
+#else
+
+#define dbg(fmt, arg...)	do {} while (0)
+#define msg_dbg(msg, txt)	do {} while (0)
+#define dump(fmt, arg...)	do {} while (0)
 
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 48c971394626173eaf1c33441ea1d900c88b21a3 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:24:06 -0700
Subject: tipc: Exclude debug-only print buffer code when not debugging

This patch modifies TIPC to only exclude debug-related print buffer
routines when debugging capabilities are not required.  It also
fixes up some related #defines that exceed 80 characters.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h | 12 ++++++++----
 net/tipc/dbg.c  |  8 ++++++--
 net/tipc/link.c | 20 ++++++++++++++++----
 net/tipc/msg.c  | 10 +++++++---
 4 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index d7f3b3c96b9..0d783bcc6f9 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -85,8 +85,6 @@ extern struct print_buf *const TIPC_CONS;
 extern struct print_buf *const TIPC_LOG;
 
 void tipc_printf(struct print_buf *, const char *fmt, ...);
-void tipc_msg_print(struct print_buf *, struct tipc_msg *, const char *);
-void tipc_dump(struct print_buf *, const char *fmt, ...);
 
 /*
  * TIPC_OUTPUT is the destination print buffer for system messages.
@@ -144,20 +142,26 @@ void tipc_dump(struct print_buf *, const char *fmt, ...);
 #define msg_dbg(msg, txt) \
 	do { \
 		if (DBG_OUTPUT != TIPC_NULL) \
-			tipc_msg_print(DBG_OUTPUT, msg, txt); \
+			tipc_msg_dbg(DBG_OUTPUT, msg, txt); \
 	} while (0)
 #define dump(fmt, arg...) \
 	do { \
 		if (DBG_OUTPUT != TIPC_NULL) \
-			tipc_dump(DBG_OUTPUT, fmt, ##arg); \
+			tipc_dump_dbg(DBG_OUTPUT, fmt, ##arg); \
 	} while (0)
 
+void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
+void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
+
 #else
 
 #define dbg(fmt, arg...)	do {} while (0)
 #define msg_dbg(msg, txt)	do {} while (0)
 #define dump(fmt, arg...)	do {} while (0)
 
+#define tipc_msg_dbg(...)	do {} while (0)
+#define tipc_dump_dbg(...)	do {} while (0)
+
 #endif
 
 
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index cda496815a0..29ecae85166 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -263,6 +263,8 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...)
 	spin_unlock_bh(&print_lock);
 }
 
+#ifdef CONFIG_TIPC_DEBUG
+
 /**
  * print_to_console - write string of bytes to console in multiple chunks
  */
@@ -309,11 +311,11 @@ static void printbuf_dump(struct print_buf *pb)
 }
 
 /**
- * tipc_dump - dump (non-console) print buffer to console
+ * tipc_dump_dbg - dump (non-console) print buffer to console
  * @pb: pointer to print buffer
  */
 
-void tipc_dump(struct print_buf *pb, const char *fmt, ...)
+void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...)
 {
 	int len;
 
@@ -334,6 +336,8 @@ void tipc_dump(struct print_buf *pb, const char *fmt, ...)
 	spin_unlock_bh(&print_lock);
 }
 
+#endif
+
 /**
  * tipc_log_resize - change the size of the TIPC log buffer
  * @log_size: print buffer size to use
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2a26a16e269..bd206ebe4ee 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -147,9 +147,21 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
 
 #define LINK_LOG_BUF_SIZE 0
 
-#define dbg_link(fmt, arg...)  do {if (LINK_LOG_BUF_SIZE) tipc_printf(&l_ptr->print_buf, fmt, ## arg); } while(0)
-#define dbg_link_msg(msg, txt) do {if (LINK_LOG_BUF_SIZE) tipc_msg_print(&l_ptr->print_buf, msg, txt); } while(0)
-#define dbg_link_state(txt) do {if (LINK_LOG_BUF_SIZE) link_print(l_ptr, &l_ptr->print_buf, txt); } while(0)
+#define dbg_link(fmt, arg...) \
+	do { \
+		if (LINK_LOG_BUF_SIZE) \
+			tipc_printf(&l_ptr->print_buf, fmt, ## arg); \
+	} while (0)
+#define dbg_link_msg(msg, txt) \
+	do { \
+		if (LINK_LOG_BUF_SIZE) \
+			tipc_msg_dbg(&l_ptr->print_buf, msg, txt); \
+	} while (0)
+#define dbg_link_state(txt) \
+	do { \
+		if (LINK_LOG_BUF_SIZE) \
+			link_print(l_ptr, &l_ptr->print_buf, txt); \
+	} while (0)
 #define dbg_link_dump() do { \
 	if (LINK_LOG_BUF_SIZE) { \
 		tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \
@@ -1651,7 +1663,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 	struct tipc_msg *msg = buf_msg(buf);
 
 	warn("Retransmission failure on link <%s>\n", l_ptr->name);
-	tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>");
+	tipc_msg_dbg(TIPC_OUTPUT, msg, ">RETR-FAIL>");
 
 	if (l_ptr->addr) {
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 696a8633df7..38abebaae88 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,7 +41,9 @@
 #include "bearer.h"
 
 
-void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str)
+#ifdef CONFIG_TIPC_DEBUG
+
+void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 {
 	u32 usr = msg_user(msg);
 	tipc_printf(buf, str);
@@ -315,9 +317,11 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str
 	}
 	tipc_printf(buf, "\n");
 	if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) {
-		tipc_msg_print(buf,msg_get_wrapped(msg),"      /");
+		tipc_msg_dbg(buf, msg_get_wrapped(msg), "      /");
 	}
 	if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) {
-		tipc_msg_print(buf,msg_get_wrapped(msg),"      /");
+		tipc_msg_dbg(buf, msg_get_wrapped(msg), "      /");
 	}
 }
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From a7ca0268b5dfffcaa8a1fe40c6eccdeac50fa3ea Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 5 May 2008 01:24:36 -0700
Subject: tipc: Update version number to TIPC 1.6.4

This patch updates TIPC's version number to 1.6.4.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 862d4154161..6d6aa5a3c24 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,7 +49,7 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "1.6.3"
+#define TIPC_MOD_VER "1.6.4"
 
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
-- 
cgit v1.2.3-70-g09d2


From 1c014420583564ac09e3b67006f2e7050861e66b Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Thu, 17 Apr 2008 19:41:02 +0200
Subject: mac80211: Replace ieee80211_tx_control->key_idx with
 ieee80211_key_conf

The hw_key_idx inside the ieee80211_key_conf structure does
not provide all the information drivers might need to perform
hardware encryption.

This is in particular true for rt2x00 who needs to know the
key algorithm and whether it is a shared or pairwise key.

By passing the ieee80211_key_conf pointer it assures us that
drivers can make full use of all information that it should know
about a particular key.

Additionally this patch updates all drivers to grab the hw_key_idx from
the ieee80211_key_conf structure.

v2: Removed bogus u16 cast
v3: Add warning about ieee80211_tx_control pointers
v4: Update warning about ieee80211_tx_control pointers

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/xmit.c             |  2 +-
 drivers/net/wireless/b43legacy/xmit.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  7 ++++---
 drivers/net/wireless/iwlwifi/iwl4965-base.c |  6 +++---
 include/net/mac80211.h                      | 19 +++++++++++++++----
 net/mac80211/wep.c                          |  2 +-
 net/mac80211/wpa.c                          |  8 ++++----
 8 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 4e5c8fc3520..3854619f351 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1319,7 +1319,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
 	pktlen = skb->len;
 
 	if (!(ctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) {
-		keyidx = ctl->key_idx;
+		keyidx = ctl->hw_key->hw_key_idx;
 		pktlen += ctl->icv_len;
 	}
 
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 19aefbfb2c9..88491947a20 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -235,7 +235,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 
 	plcp_fragment_len = fragment_len + FCS_LEN;
 	if (use_encryption) {
-		u8 key_idx = (u16) (txctl->key_idx);
+		u8 key_idx = txctl->hw_key->hw_key_idx;
 		struct b43_key *key;
 		int wlhdr_len;
 		size_t iv_len;
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index dcad2491a60..fc83dab6e2c 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -232,7 +232,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 
 	plcp_fragment_len = fragment_len + FCS_LEN;
 	if (use_encryption) {
-		u8 key_idx = (u16)(txctl->key_idx);
+		u8 key_idx = txctl->hw_key->hw_key_idx;
 		struct b43legacy_key *key;
 		int wlhdr_len;
 		size_t iv_len;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 13925b627e3..7bdffa9cfea 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2391,7 +2391,8 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
 				      struct sk_buff *skb_frag,
 				      int last_frag)
 {
-	struct iwl3945_hw_key *keyinfo = &priv->stations[ctl->key_idx].keyinfo;
+	struct iwl3945_hw_key *keyinfo =
+	    &priv->stations[ctl->hw_key->hw_key_idx].keyinfo;
 
 	switch (keyinfo->alg) {
 	case ALG_CCMP:
@@ -2414,7 +2415,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
 
 	case ALG_WEP:
 		cmd->cmd.tx.sec_ctl = TX_CMD_SEC_WEP |
-		    (ctl->key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT;
+		    (ctl->hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT;
 
 		if (keyinfo->keylen == 13)
 			cmd->cmd.tx.sec_ctl |= TX_CMD_SEC_KEY128;
@@ -2422,7 +2423,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
 		memcpy(&cmd->cmd.tx.key[3], keyinfo->key, keyinfo->keylen);
 
 		IWL_DEBUG_TX("Configuring packet for WEP encryption "
-			     "with key %d\n", ctl->key_idx);
+			     "with key %d\n", ctl->hw_key->hw_key_idx);
 		break;
 
 	default:
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index c8cbf70600d..e43ea5377d8 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -1926,7 +1926,7 @@ static void iwl4965_build_tx_cmd_hwcrypto(struct iwl_priv *priv,
 	struct iwl_wep_key *wepkey;
 	int keyidx = 0;
 
-	BUG_ON(ctl->key_idx > 3);
+	BUG_ON(ctl->hw_key->hw_key_idx > 3);
 
 	switch (keyinfo->alg) {
 	case ALG_CCMP:
@@ -1945,11 +1945,11 @@ static void iwl4965_build_tx_cmd_hwcrypto(struct iwl_priv *priv,
 		break;
 
 	case ALG_WEP:
-		wepkey = &priv->wep_keys[ctl->key_idx];
+		wepkey = &priv->wep_keys[ctl->hw_key->hw_key_idx];
 		cmd->cmd.tx.sec_ctl = 0;
 		if (priv->default_wep_key) {
 			/* the WEP key was sent as static */
-			keyidx = ctl->key_idx;
+			keyidx = ctl->hw_key->hw_key_idx;
 			memcpy(&cmd->cmd.tx.key[3], wepkey->key,
 							wepkey->key_size);
 			if (wepkey->key_size == WEP_KEY_LEN_128)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4a80d74975e..27ef9f761ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -286,8 +286,17 @@ enum mac80211_tx_control_flags {
 
 /* Transmit control fields. This data structure is passed to low-level driver
  * with each TX frame. The low-level driver is responsible for configuring
- * the hardware to use given values (depending on what is supported). */
-
+ * the hardware to use given values (depending on what is supported).
+ *
+ * NOTE: Be careful with using the pointers outside of the ieee80211_ops->tx()
+ * context (i.e. when defering the work to a workqueue).
+ * The vif pointer is valid until the it has been removed with the
+ * ieee80211_ops->remove_interface() callback funtion.
+ * The hw_key pointer is valid until it has been removed with the
+ * ieee80211_ops->set_key() callback function.
+ * The tx_rate and alt_retry_rate pointers are valid until the phy is
+ * deregistered.
+ */
 struct ieee80211_tx_control {
 	struct ieee80211_vif *vif;
 	struct ieee80211_rate *tx_rate;
@@ -298,9 +307,11 @@ struct ieee80211_tx_control {
 	/* retry rate for the last retries */
 	struct ieee80211_rate *alt_retry_rate;
 
+	/* Key used for hardware encryption
+	 * NULL if IEEE80211_TXCTL_DO_NOT_ENCRYPT is set */
+	struct ieee80211_key_conf *hw_key;
+
 	u32 flags;		/* tx control flags defined above */
-	u8 key_idx;		/* keyidx from hw->set_key(), undefined if
-				 * IEEE80211_TXCTL_DO_NOT_ENCRYPT is set */
 	u8 retry_limit;		/* 1 = only first attempt, 2 = one retry, ..
 				 * This could be used when set_retry_limit
 				 * is not implemented by the driver */
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index affcecd78c1..3cbae42ec50 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -337,7 +337,7 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		if (ieee80211_wep_encrypt(tx->local, skb, tx->key))
 			return -1;
 	} else {
-		tx->control->key_idx = tx->key->conf.hw_key_idx;
+		tx->control->hw_key = &tx->key->conf;
 		if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
 			if (!ieee80211_wep_add_iv(tx->local, skb, tx->key))
 				return -1;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 45709ada8fe..42f3654e1c5 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -228,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx,
 					    0x7f),
 				      (u8) key->u.tkip.iv16);
 
-		tx->control->key_idx = tx->key->conf.hw_key_idx;
+		tx->control->hw_key = &tx->key->conf;
 		return 0;
 	}
 
@@ -256,7 +256,7 @@ ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx)
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
 	    !wpa_test) {
 		/* hwaccel - with no need for preallocated room for IV/ICV */
-		tx->control->key_idx = tx->key->conf.hw_key_idx;
+		tx->control->hw_key = &tx->key->conf;
 		return TX_CONTINUE;
 	}
 
@@ -478,7 +478,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx,
 
 	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 		/* hwaccel - with preallocated room for CCMP header */
-		tx->control->key_idx = key->conf.hw_key_idx;
+		tx->control->hw_key = &tx->key->conf;
 		return 0;
 	}
 
@@ -505,7 +505,7 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
 		/* hwaccel - with no need for preallocated room for CCMP "
 		 * header or MIC fields */
-		tx->control->key_idx = tx->key->conf.hw_key_idx;
+		tx->control->hw_key = &tx->key->conf;
 		return TX_CONTINUE;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 988c0f723d0b1abb399e6e71d8bf3f8bf1949a70 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Apr 2008 19:21:22 +0200
Subject: mac80211: a few code cleanups

This has some code cleanups (some inspired by checkpatch), I got
bored at probably a third of the output though so if somebody
else wants to...

Signed-off-by: Johannes Berg <johannes@sipsolutions.net
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/aes_ccm.c        |  2 +-
 net/mac80211/aes_ccm.h        |  2 +-
 net/mac80211/debugfs_netdev.c |  2 +-
 net/mac80211/ieee80211_i.h    |  6 +++---
 net/mac80211/iface.c          |  3 +--
 net/mac80211/main.c           |  6 +++---
 net/mac80211/mlme.c           | 31 ++++++++++++++-----------------
 net/mac80211/tkip.h           |  4 ++--
 net/mac80211/wep.h            |  2 +-
 9 files changed, 27 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 59f1691f62c..4d4c2dfcf9a 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -134,7 +134,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
 }
 
 
-struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[])
+struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
 {
 	struct crypto_cipher *tfm;
 
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index 885f19030b2..8cd0f14aab4 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -14,7 +14,7 @@
 
 #define AES_BLOCK_LEN 16
 
-struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]);
+struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]);
 void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
 			       u8 *b_0, u8 *aad, u8 *data, size_t data_len,
 			       u8 *cdata, u8 *mic);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index e3326d04694..3ae5493d728 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -528,7 +528,7 @@ void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata,
 	add_files(sdata);
 }
 
-static int netdev_notify(struct notifier_block * nb,
+static int netdev_notify(struct notifier_block *nb,
 			 unsigned long state,
 			 void *ndev)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c7314bf4bec..a05e1db3699 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -919,9 +919,9 @@ ieee80211_rx_result ieee80211_sta_rx_scan(
 void ieee80211_rx_bss_list_init(struct net_device *dev);
 void ieee80211_rx_bss_list_deinit(struct net_device *dev);
 int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len);
-struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
-					 struct sk_buff *skb, u8 *bssid,
-					 u8 *addr);
+struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
+					struct sk_buff *skb, u8 *bssid,
+					u8 *addr);
 int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
 int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 80954a51218..f41c7e0de62 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -33,9 +33,8 @@ static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata)
 {
 	int i;
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
+	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		__skb_queue_purge(&sdata->fragments[i].skb_list);
-	}
 }
 
 /* Must be called with rtnl lock held. */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9ad4e3631b6..e19be27a3de 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -385,8 +385,8 @@ static int ieee80211_open(struct net_device *dev)
 	 * yet be effective. Trigger execution of ieee80211_sta_work
 	 * to fix this.
 	 */
-	if(sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	   sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		queue_work(local->hw.workqueue, &ifsta->work);
 	}
@@ -1482,7 +1482,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 		return;
 	}
 
-	rthdr = (struct ieee80211_tx_status_rtap_hdr*)
+	rthdr = (struct ieee80211_tx_status_rtap_hdr *)
 				skb_push(skb, sizeof(*rthdr));
 
 	memset(rthdr, 0, sizeof(*rthdr));
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a5e5c31c23a..de09f58d968 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -306,28 +306,24 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 		switch (aci) {
 		case 1:
 			queue = IEEE80211_TX_QUEUE_DATA3;
-			if (acm) {
+			if (acm)
 				local->wmm_acm |= BIT(0) | BIT(3);
-			}
 			break;
 		case 2:
 			queue = IEEE80211_TX_QUEUE_DATA1;
-			if (acm) {
+			if (acm)
 				local->wmm_acm |= BIT(4) | BIT(5);
-			}
 			break;
 		case 3:
 			queue = IEEE80211_TX_QUEUE_DATA0;
-			if (acm) {
+			if (acm)
 				local->wmm_acm |= BIT(6) | BIT(7);
-			}
 			break;
 		case 0:
 		default:
 			queue = IEEE80211_TX_QUEUE_DATA2;
-			if (acm) {
+			if (acm)
 				local->wmm_acm |= BIT(1) | BIT(2);
-			}
 			break;
 		}
 
@@ -706,9 +702,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 	if (bss) {
 		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
 			capab |= WLAN_CAPABILITY_PRIVACY;
-		if (bss->wmm_ie) {
+		if (bss->wmm_ie)
 			wmm = 1;
-		}
 		ieee80211_rx_bss_put(dev, bss);
 	}
 
@@ -1805,9 +1800,8 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
 	       " (reason=%d)\n",
 	       dev->name, print_mac(mac, mgmt->sa), reason_code);
 
-	if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) {
+	if (ifsta->flags & IEEE80211_STA_AUTHENTICATED)
 		printk(KERN_DEBUG "%s: deauthenticated\n", dev->name);
-	}
 
 	if (ifsta->state == IEEE80211_AUTHENTICATE ||
 	    ifsta->state == IEEE80211_ASSOCIATE ||
@@ -3517,10 +3511,12 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	bss->beacon_int = local->hw.conf.beacon_int;
 	bss->last_update = jiffies;
 	bss->capability = WLAN_CAPABILITY_IBSS;
-	if (sdata->default_key) {
+
+	if (sdata->default_key)
 		bss->capability |= WLAN_CAPABILITY_PRIVACY;
-	} else
+	else
 		sdata->drop_unencrypted = 0;
+
 	bss->supp_rates_len = sband->n_bitrates;
 	pos = bss->supp_rates;
 	for (i = 0; i < sband->n_bitrates; i++) {
@@ -4203,6 +4199,7 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+
 	kfree(ifsta->extra_ie);
 	if (len == 0) {
 		ifsta->extra_ie = NULL;
@@ -4220,9 +4217,9 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
 }
 
 
-struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
-					 struct sk_buff *skb, u8 *bssid,
-					 u8 *addr)
+struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
+					struct sk_buff *skb, u8 *bssid,
+					u8 *addr)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct sta_info *sta;
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index b7c2ee763d9..1fa0bb4dba3 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -13,8 +13,8 @@
 #include <linux/crypto.h>
 #include "key.h"
 
-u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
-			   u8 iv0, u8 iv1, u8 iv2);
+u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
+			  u8 iv0, u8 iv1, u8 iv2);
 void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
 				  u16 *phase1key);
 void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index 363779c5065..e587172115b 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -26,7 +26,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_key *key);
 int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_key *key);
-u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key);
+u8 *ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key);
 
 ieee80211_rx_result
 ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx);
-- 
cgit v1.2.3-70-g09d2


From c6adbd2158fee972adcc6232de5e2ef375f1f782 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Thu, 17 Apr 2008 21:11:18 +0200
Subject: mac80211: Add IEEE80211_KEY_FLAG_PAIRWISE

This adds a new flag to the ieee80211_key_conf structure.
This flag will inform the driver the key is pairwise rather then
a shared key.

This is important for drivers who support both types of keys,
and need to be informed which type of key this is. Alternative
would be drivers checking the address argument of set_key(),
but it will be safer when mac80211 is more explicit.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 3 +++
 net/mac80211/key.c     | 7 +++++++
 2 files changed, 10 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 27ef9f761ac..740c11ca066 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -621,11 +621,14 @@ enum ieee80211_key_alg {
  * @IEEE80211_KEY_FLAG_GENERATE_MMIC: This flag should be set by
  *	the driver for a TKIP key if it requires Michael MIC
  *	generation in software.
+ * @IEEE80211_KEY_FLAG_PAIRWISE: Set by mac80211, this flag indicates
+ *	that the key is pairwise rather then a shared key.
  */
 enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_WMM_STA	= 1<<0,
 	IEEE80211_KEY_FLAG_GENERATE_IV	= 1<<1,
 	IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2,
+	IEEE80211_KEY_FLAG_PAIRWISE	= 1<<3,
 };
 
 /**
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 150d66dbda9..88b211af7c1 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -323,6 +323,13 @@ void ieee80211_key_link(struct ieee80211_key *key,
 		 */
 		if (sta->flags & WLAN_STA_WME)
 			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
+
+		/*
+		 * This key is for a specific sta interface,
+		 * inform the driver that it should try to store
+		 * this key as pairwise key.
+		 */
+		key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
 	} else {
 		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
 			struct sta_info *ap;
-- 
cgit v1.2.3-70-g09d2


From 3acea5b616c6d85008700a9d51cb02a81b2d0c67 Mon Sep 17 00:00:00 2001
From: Ester Kummer <ester.kummer@intel.com>
Date: Thu, 17 Apr 2008 16:05:14 -0700
Subject: mac80211: correct skb allocation

This patch corrects the allocation of skb in ADDBA req/resp and DELBA
it removes redundant space u.addba_* are already counted in sizeof(*mgmt)

Signed-off-by: Ester Kummer <ester.kummer@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index de09f58d968..63b391d0125 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1093,8 +1093,8 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 	struct ieee80211_mgmt *mgmt;
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
-					sizeof(mgmt->u.action.u.addba_resp));
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer "
 		       "for addba resp frame\n", dev->name);
@@ -1142,9 +1142,7 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
 	struct ieee80211_mgmt *mgmt;
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
-				sizeof(mgmt->u.action.u.addba_req));
-
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
@@ -1406,8 +1404,7 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 	struct ieee80211_mgmt *mgmt;
 	u16 params;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
-					sizeof(mgmt->u.action.u.delba));
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
-- 
cgit v1.2.3-70-g09d2


From 712590de5e5c977ff96efa3a16fcc03b0e976c3c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 21 Apr 2008 11:47:51 +0300
Subject: make sta_rx_agg_session_timer_expired() static

sta_rx_agg_session_timer_expired() can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 1 -
 net/mac80211/mlme.c        | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a05e1db3699..cabdc1439d5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -940,7 +940,6 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 
 void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
 				u16 tid, u16 initiator, u16 reason);
-void sta_rx_agg_session_timer_expired(unsigned long data);
 void sta_addba_resp_timer_expired(unsigned long data);
 void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr);
 u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 63b391d0125..e860d0bacea 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -87,6 +87,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 				    u8 *ssid, size_t ssid_len);
 static int ieee80211_sta_config_auth(struct net_device *dev,
 				     struct ieee80211_if_sta *ifsta);
+static void sta_rx_agg_session_timer_expired(unsigned long data);
 
 
 void ieee802_11_parse_elems(u8 *start, size_t len,
@@ -1608,7 +1609,7 @@ timer_expired_exit:
  * resetting it after each frame that arrives from the originator.
  * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
  */
-void sta_rx_agg_session_timer_expired(unsigned long data)
+static void sta_rx_agg_session_timer_expired(unsigned long data)
 {
 	/* not an elegant detour, but there is no choice as the timer passes
 	 * only one argument, and verious sta_info are needed here, so init
-- 
cgit v1.2.3-70-g09d2


From c12cf2109702c052688391f4171f239effb241bf Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 21 Apr 2008 11:48:28 +0300
Subject: remove ieee80211_tx_frame()

After the softmac removal ieee80211_tx_frame() was no longer used.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/ieee80211.h      |  3 --
 net/ieee80211/ieee80211_tx.c | 86 --------------------------------------------
 2 files changed, 89 deletions(-)

(limited to 'net')

diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 529816bfbc5..facd0ffedc6 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -1262,9 +1262,6 @@ extern int ieee80211_set_encryption(struct ieee80211_device *ieee);
 /* ieee80211_tx.c */
 extern int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev);
 extern void ieee80211_txb_free(struct ieee80211_txb *);
-extern int ieee80211_tx_frame(struct ieee80211_device *ieee,
-			      struct ieee80211_hdr *frame, int hdr_len,
-			      int total_len, int encrypt_mpdu);
 
 /* ieee80211_rx.c */
 extern void ieee80211_rx_any(struct ieee80211_device *ieee,
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index d8b02603cbe..d996547f7a6 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -542,90 +542,4 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 1;
 }
 
-/* Incoming 802.11 strucure is converted to a TXB
- * a block of 802.11 fragment packets (stored as skbs) */
-int ieee80211_tx_frame(struct ieee80211_device *ieee,
-		       struct ieee80211_hdr *frame, int hdr_len, int total_len,
-		       int encrypt_mpdu)
-{
-	struct ieee80211_txb *txb = NULL;
-	unsigned long flags;
-	struct net_device_stats *stats = &ieee->stats;
-	struct sk_buff *skb_frag;
-	int priority = -1;
-	int fraglen = total_len;
-	int headroom = ieee->tx_headroom;
-	struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
-
-	spin_lock_irqsave(&ieee->lock, flags);
-
-	if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt))
-		encrypt_mpdu = 0;
-
-	/* If there is no driver handler to take the TXB, dont' bother
-	 * creating it... */
-	if (!ieee->hard_start_xmit) {
-		printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name);
-		goto success;
-	}
-
-	if (unlikely(total_len < 24)) {
-		printk(KERN_WARNING "%s: skb too small (%d).\n",
-		       ieee->dev->name, total_len);
-		goto success;
-	}
-
-	if (encrypt_mpdu) {
-		frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-		fraglen += crypt->ops->extra_mpdu_prefix_len +
-			   crypt->ops->extra_mpdu_postfix_len;
-		headroom += crypt->ops->extra_mpdu_prefix_len;
-	}
-
-	/* When we allocate the TXB we allocate enough space for the reserve
-	 * and full fragment bytes (bytes_per_frag doesn't include prefix,
-	 * postfix, header, FCS, etc.) */
-	txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC);
-	if (unlikely(!txb)) {
-		printk(KERN_WARNING "%s: Could not allocate TXB\n",
-		       ieee->dev->name);
-		goto failed;
-	}
-	txb->encrypted = 0;
-	txb->payload_size = fraglen;
-
-	skb_frag = txb->fragments[0];
-
-	memcpy(skb_put(skb_frag, total_len), frame, total_len);
-
-	if (ieee->config &
-	    (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
-		skb_put(skb_frag, 4);
-
-	/* To avoid overcomplicating things, we do the corner-case frame
-	 * encryption in software. The only real situation where encryption is
-	 * needed here is during software-based shared key authentication. */
-	if (encrypt_mpdu)
-		ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len);
-
-      success:
-	spin_unlock_irqrestore(&ieee->lock, flags);
-
-	if (txb) {
-		if ((*ieee->hard_start_xmit) (txb, ieee->dev, priority) == 0) {
-			stats->tx_packets++;
-			stats->tx_bytes += txb->payload_size;
-			return 0;
-		}
-		ieee80211_txb_free(txb);
-	}
-	return 0;
-
-      failed:
-	spin_unlock_irqrestore(&ieee->lock, flags);
-	stats->tx_errors++;
-	return 1;
-}
-
-EXPORT_SYMBOL(ieee80211_tx_frame);
 EXPORT_SYMBOL(ieee80211_txb_free);
-- 
cgit v1.2.3-70-g09d2


From 7eafd25d9559bd0f652449c222d38d63412e3d4a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 21 Apr 2008 11:48:34 +0300
Subject: remove ieee80211_wx_{get,set}_auth()

After the bcm43xx removal ieee80211_wx_{get,set}_auth() were no longer
used.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/ieee80211.h      |  8 ----
 net/ieee80211/ieee80211_wx.c | 89 --------------------------------------------
 2 files changed, 97 deletions(-)

(limited to 'net')

diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index facd0ffedc6..b31399e1fd8 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -1309,14 +1309,6 @@ extern int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 extern int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
 				      struct iw_request_info *info,
 				      union iwreq_data *wrqu, char *extra);
-extern int ieee80211_wx_set_auth(struct net_device *dev,
-				 struct iw_request_info *info,
-				 union iwreq_data *wrqu,
-				 char *extra);
-extern int ieee80211_wx_get_auth(struct net_device *dev,
-				 struct iw_request_info *info,
-				 union iwreq_data *wrqu,
-				 char *extra);
 
 static inline void ieee80211_increment_scans(struct ieee80211_device *ieee)
 {
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 623489afa62..822606b615c 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -744,98 +744,9 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
 	return 0;
 }
 
-int ieee80211_wx_set_auth(struct net_device *dev,
-			  struct iw_request_info *info,
-			  union iwreq_data *wrqu,
-			  char *extra)
-{
-	struct ieee80211_device *ieee = netdev_priv(dev);
-	unsigned long flags;
-	int err = 0;
-
-	spin_lock_irqsave(&ieee->lock, flags);
-
-	switch (wrqu->param.flags & IW_AUTH_INDEX) {
-	case IW_AUTH_WPA_VERSION:
-	case IW_AUTH_CIPHER_PAIRWISE:
-	case IW_AUTH_CIPHER_GROUP:
-	case IW_AUTH_KEY_MGMT:
-		/*
-		 * Host AP driver does not use these parameters and allows
-		 * wpa_supplicant to control them internally.
-		 */
-		break;
-	case IW_AUTH_TKIP_COUNTERMEASURES:
-		break;		/* FIXME */
-	case IW_AUTH_DROP_UNENCRYPTED:
-		ieee->drop_unencrypted = !!wrqu->param.value;
-		break;
-	case IW_AUTH_80211_AUTH_ALG:
-		break;		/* FIXME */
-	case IW_AUTH_WPA_ENABLED:
-		ieee->privacy_invoked = ieee->wpa_enabled = !!wrqu->param.value;
-		break;
-	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
-		ieee->ieee802_1x = !!wrqu->param.value;
-		break;
-	case IW_AUTH_PRIVACY_INVOKED:
-		ieee->privacy_invoked = !!wrqu->param.value;
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-	spin_unlock_irqrestore(&ieee->lock, flags);
-	return err;
-}
-
-int ieee80211_wx_get_auth(struct net_device *dev,
-			  struct iw_request_info *info,
-			  union iwreq_data *wrqu,
-			  char *extra)
-{
-	struct ieee80211_device *ieee = netdev_priv(dev);
-	unsigned long flags;
-	int err = 0;
-
-	spin_lock_irqsave(&ieee->lock, flags);
-
-	switch (wrqu->param.flags & IW_AUTH_INDEX) {
-	case IW_AUTH_WPA_VERSION:
-	case IW_AUTH_CIPHER_PAIRWISE:
-	case IW_AUTH_CIPHER_GROUP:
-	case IW_AUTH_KEY_MGMT:
-	case IW_AUTH_TKIP_COUNTERMEASURES:		/* FIXME */
-	case IW_AUTH_80211_AUTH_ALG:			/* FIXME */
-		/*
-		 * Host AP driver does not use these parameters and allows
-		 * wpa_supplicant to control them internally.
-		 */
-		err = -EOPNOTSUPP;
-		break;
-	case IW_AUTH_DROP_UNENCRYPTED:
-		wrqu->param.value = ieee->drop_unencrypted;
-		break;
-	case IW_AUTH_WPA_ENABLED:
-		wrqu->param.value = ieee->wpa_enabled;
-		break;
-	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
-		wrqu->param.value = ieee->ieee802_1x;
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-	spin_unlock_irqrestore(&ieee->lock, flags);
-	return err;
-}
-
 EXPORT_SYMBOL(ieee80211_wx_set_encodeext);
 EXPORT_SYMBOL(ieee80211_wx_get_encodeext);
 
 EXPORT_SYMBOL(ieee80211_wx_get_scan);
 EXPORT_SYMBOL(ieee80211_wx_set_encode);
 EXPORT_SYMBOL(ieee80211_wx_get_encode);
-
-EXPORT_SYMBOL_GPL(ieee80211_wx_set_auth);
-EXPORT_SYMBOL_GPL(ieee80211_wx_get_auth);
-- 
cgit v1.2.3-70-g09d2


From e100bb64bf7cdeae7f742a65ee1985649a7fd1b4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 30 Apr 2008 18:51:21 +0200
Subject: mac80211: QoS related cleanups

This
 * makes the queue number passed to drivers a u16
   (as it will be with skb_get_queue_mapping)
 * removes the useless queue number defines
 * splits hw->queues into hw->queues/ampdu_queues
 * removes the debugfs files for per-queue counters
 * removes some dead QoS code
 * removes the beacon queue configuration for IBSS
   so that the drivers now never get a queue number
   bigger than (hw->queues + hw->ampdu_queues - 1)
   for tx and only in the range 0..hw->queues-1 for
   conf_tx.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c             |   3 +-
 drivers/net/wireless/b43legacy/main.c       |   3 +-
 drivers/net/wireless/iwlwifi/iwl-core.c     |   2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |   2 +-
 drivers/net/wireless/iwlwifi/iwl4965-base.c |   2 +-
 drivers/net/wireless/p54/p54common.c        |   2 +-
 drivers/net/wireless/rt2x00/rt2400pci.c     |   5 +-
 drivers/net/wireless/rt2x00/rt2x00.h        |   2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |   2 +-
 drivers/net/wireless/rt2x00/rt2x00queue.h   |   7 +-
 include/net/mac80211.h                      |  66 ++++++---------
 net/mac80211/debugfs.c                      |  43 ----------
 net/mac80211/debugfs_sta.c                  |  38 ---------
 net/mac80211/ieee80211_i.h                  |   8 +-
 net/mac80211/main.c                         |   5 ++
 net/mac80211/mlme.c                         |  23 ++----
 net/mac80211/rx.c                           |   5 --
 net/mac80211/sta_info.c                     |   2 +-
 net/mac80211/wme.c                          | 120 ++++++++++++----------------
 19 files changed, 100 insertions(+), 240 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index cf546e367d3..c6e79a15d3c 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -2996,8 +2996,7 @@ static void b43_qos_update_work(struct work_struct *work)
 	mutex_unlock(&wl->mutex);
 }
 
-static int b43_op_conf_tx(struct ieee80211_hw *hw,
-			  int _queue,
+static int b43_op_conf_tx(struct ieee80211_hw *hw, u16 _queue,
 			  const struct ieee80211_tx_queue_params *params)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 14a5eea2573..1ccc51e90a8 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2383,8 +2383,7 @@ out:
 	return NETDEV_TX_OK;
 }
 
-static int b43legacy_op_conf_tx(struct ieee80211_hw *hw,
-				int queue,
+static int b43legacy_op_conf_tx(struct ieee80211_hw *hw, u16 queue,
 				const struct ieee80211_tx_queue_params *params)
 {
 	return 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 27e5f496bc1..c4b5c1a100f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -568,7 +568,7 @@ static void iwlcore_init_hw(struct iwl_priv *priv)
 	hw->queues = 4;
 #ifdef CONFIG_IWL4965_HT
 	/* Enhanced value; more queues, to support 11n aggregation */
-	hw->queues = 16;
+	hw->ampdu_queues = 12;
 #endif /* CONFIG_IWL4965_HT */
 }
 
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index bad367cfbee..7040cde8bc2 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -7143,7 +7143,7 @@ static int iwl3945_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	return rc;
 }
 
-static int iwl3945_mac_conf_tx(struct ieee80211_hw *hw, int queue,
+static int iwl3945_mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 			   const struct ieee80211_tx_queue_params *params)
 {
 	struct iwl3945_priv *priv = hw->priv;
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 5363be7a305..4406fc72d88 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -6339,7 +6339,7 @@ static int iwl4965_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	return ret;
 }
 
-static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, int queue,
+static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 			   const struct ieee80211_tx_queue_params *params)
 {
 	struct iwl_priv *priv = hw->priv;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 59a26978f87..34b91ccd8ae 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -935,7 +935,7 @@ static void p54_configure_filter(struct ieee80211_hw *dev,
 	}
 }
 
-static int p54_conf_tx(struct ieee80211_hw *dev, int queue,
+static int p54_conf_tx(struct ieee80211_hw *dev, u16 queue,
 		       const struct ieee80211_tx_queue_params *params)
 {
 	struct p54_common *priv = dev->priv;
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 705bc2d41dc..247fbbfb0f2 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1442,8 +1442,7 @@ static int rt2400pci_set_retry_limit(struct ieee80211_hw *hw,
 	return 0;
 }
 
-static int rt2400pci_conf_tx(struct ieee80211_hw *hw,
-			     int queue,
+static int rt2400pci_conf_tx(struct ieee80211_hw *hw, u16 queue,
 			     const struct ieee80211_tx_queue_params *params)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
@@ -1453,7 +1452,7 @@ static int rt2400pci_conf_tx(struct ieee80211_hw *hw,
 	 * per queue. So by default we only configure the TX queue,
 	 * and ignore all other configurations.
 	 */
-	if (queue != IEEE80211_TX_QUEUE_DATA0)
+	if (queue != 0)
 		return -EINVAL;
 
 	if (rt2x00mac_conf_tx(hw, queue, params))
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 48a9af47921..79bd9c9f896 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -997,7 +997,7 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif,
 				struct ieee80211_bss_conf *bss_conf,
 				u32 changes);
-int rt2x00mac_conf_tx(struct ieee80211_hw *hw, int queue,
+int rt2x00mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 		      const struct ieee80211_tx_queue_params *params);
 
 /*
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 7bc5129484f..767e0ffce04 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -517,7 +517,7 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_bss_info_changed);
 
-int rt2x00mac_conf_tx(struct ieee80211_hw *hw, int queue_idx,
+int rt2x00mac_conf_tx(struct ieee80211_hw *hw, u16 queue_idx,
 		      const struct ieee80211_tx_queue_params *params)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index 29fe2652555..d1707a7ca41 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -86,12 +86,9 @@ enum data_queue_qid {
 static inline enum data_queue_qid mac80211_queue_to_qid(unsigned int queue)
 {
 	/* Regular TX queues are mapped directly */
-	if (queue < NUM_TX_DATA_QUEUES)
+	if (queue < 4)
 		return queue;
-	else if (queue == IEEE80211_TX_QUEUE_BEACON)
-		return QID_BEACON;
-	else if (queue == IEEE80211_TX_QUEUE_AFTER_BEACON)
-		return QID_ATIM;
+	WARN_ON(1);
 	return QID_OTHER;
 }
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ef701d6fd66..75a34609eed 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -97,6 +97,18 @@ struct ieee80211_ht_bss_info {
 	u8 bss_op_mode; /* use IEEE80211_HT_IE_ */
 };
 
+/**
+ * enum ieee80211_max_queues - maximum number of queues
+ *
+ * @IEEE80211_MAX_QUEUES: Maximum number of regular device queues.
+ * @IEEE80211_MAX_AMPDU_QUEUES: Maximum number of queues usable
+ *	for A-MPDU operation.
+ */
+enum ieee80211_max_queues {
+	IEEE80211_MAX_QUEUES =		16,
+	IEEE80211_MAX_AMPDU_QUEUES =	16,
+};
+
 /**
  * struct ieee80211_tx_queue_params - transmit queue configuration
  *
@@ -129,42 +141,6 @@ struct ieee80211_tx_queue_stats {
 	unsigned int count;
 };
 
-/**
- * enum ieee80211_tx_queue - transmit queue number
- *
- * These constants are used with some callbacks that take a
- * queue number to set parameters for a queue.
- *
- * @IEEE80211_TX_QUEUE_DATA0: data queue 0
- * @IEEE80211_TX_QUEUE_DATA1: data queue 1
- * @IEEE80211_TX_QUEUE_DATA2: data queue 2
- * @IEEE80211_TX_QUEUE_DATA3: data queue 3
- * @IEEE80211_TX_QUEUE_DATA4: data queue 4
- * @IEEE80211_TX_QUEUE_SVP: ??
- * @NUM_TX_DATA_QUEUES: number of data queues
- * @IEEE80211_TX_QUEUE_AFTER_BEACON: transmit queue for frames to be
- *	sent after a beacon
- * @IEEE80211_TX_QUEUE_BEACON: transmit queue for beacon frames
- * @NUM_TX_DATA_QUEUES_AMPDU: adding more queues for A-MPDU
- */
-enum ieee80211_tx_queue {
-	IEEE80211_TX_QUEUE_DATA0,
-	IEEE80211_TX_QUEUE_DATA1,
-	IEEE80211_TX_QUEUE_DATA2,
-	IEEE80211_TX_QUEUE_DATA3,
-	IEEE80211_TX_QUEUE_DATA4,
-	IEEE80211_TX_QUEUE_SVP,
-
-	NUM_TX_DATA_QUEUES,
-
-/* due to stupidity in the sub-ioctl userspace interface, the items in
- * this struct need to have fixed values. As soon as it is removed, we can
- * fix these entries. */
-	IEEE80211_TX_QUEUE_AFTER_BEACON = 6,
-	IEEE80211_TX_QUEUE_BEACON = 7,
-	NUM_TX_DATA_QUEUES_AMPDU = 16
-};
-
 struct ieee80211_low_level_stats {
 	unsigned int dot11ACKFailureCount;
 	unsigned int dot11RTSFailureCount;
@@ -315,7 +291,7 @@ struct ieee80211_tx_control {
 				 * position represents antenna number used */
 	u8 icv_len;		/* length of the ICV/MIC field in octets */
 	u8 iv_len;		/* length of the IV field in octets */
-	u8 queue;		/* hardware queue to use for this frame;
+	u16 queue;		/* hardware queue to use for this frame;
 				 * 0 = highest, hw->queues-1 = lowest */
 	u16 aid;		/* Station AID */
 	int type;	/* internal */
@@ -772,7 +748,14 @@ enum ieee80211_hw_flags {
  * @max_noise: like @max_rssi, but for the noise value.
  *
  * @queues: number of available hardware transmit queues for
- *	data packets. WMM/QoS requires at least four.
+ *	data packets. WMM/QoS requires at least four, these
+ *	queues need to have configurable access parameters.
+ *
+ * @ampdu_queues: number of available hardware transmit queues
+ *	for A-MPDU packets, these have no access parameters
+ *	because they're used only for A-MPDU frames. Note that
+ *	mac80211 will not currently use any of the regular queues
+ *	for aggregation.
  *
  * @rate_control_algorithm: rate control algorithm for this hardware.
  *	If unset (NULL), the default algorithm will be used. Must be
@@ -791,7 +774,7 @@ struct ieee80211_hw {
 	unsigned int extra_tx_headroom;
 	int channel_change_time;
 	int vif_data_size;
-	u8 queues;
+	u16 queues, ampdu_queues;
 	s8 max_rssi;
 	s8 max_signal;
 	s8 max_noise;
@@ -1069,8 +1052,7 @@ enum ieee80211_ampdu_mlme_action {
  *	of assocaited station or AP.
  *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
- *	bursting) for a hardware TX queue. The @queue parameter uses the
- *	%IEEE80211_TX_QUEUE_* constants. Must be atomic.
+ *	bursting) for a hardware TX queue. Must be atomic.
  *
  * @get_tx_stats: Get statistics of the current TX queue status. This is used
  *	to get number of currently queued packets (queue length), maximum queue
@@ -1150,7 +1132,7 @@ struct ieee80211_ops {
 			       u32 short_retry, u32 long_retr);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, const u8 *addr);
-	int (*conf_tx)(struct ieee80211_hw *hw, int queue,
+	int (*conf_tx)(struct ieee80211_hw *hw, u16 queue,
 		       const struct ieee80211_tx_queue_params *params);
 	int (*get_tx_stats)(struct ieee80211_hw *hw,
 			    struct ieee80211_tx_queue_stats *stats);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1cccbfd781f..d20d90eead1 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -197,45 +197,6 @@ DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u",
 DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u",
 		   local->tx_status_drop);
 
-static ssize_t stats_wme_rx_queue_read(struct file *file,
-				       char __user *userbuf,
-				       size_t count, loff_t *ppos)
-{
-	struct ieee80211_local *local = file->private_data;
-	char buf[NUM_RX_DATA_QUEUES*15], *p = buf;
-	int i;
-
-	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
-		p += scnprintf(p, sizeof(buf)+buf-p,
-			       "%u\n", local->wme_rx_queue[i]);
-
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf);
-}
-
-static const struct file_operations stats_wme_rx_queue_ops = {
-	.read = stats_wme_rx_queue_read,
-	.open = mac80211_open_file_generic,
-};
-
-static ssize_t stats_wme_tx_queue_read(struct file *file,
-				       char __user *userbuf,
-				       size_t count, loff_t *ppos)
-{
-	struct ieee80211_local *local = file->private_data;
-	char buf[NUM_TX_DATA_QUEUES*15], *p = buf;
-	int i;
-
-	for (i = 0; i < NUM_TX_DATA_QUEUES; i++)
-		p += scnprintf(p, sizeof(buf)+buf-p,
-			       "%u\n", local->wme_tx_queue[i]);
-
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf);
-}
-
-static const struct file_operations stats_wme_tx_queue_ops = {
-	.read = stats_wme_tx_queue_read,
-	.open = mac80211_open_file_generic,
-};
 #endif
 
 DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount);
@@ -303,8 +264,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_STATS_ADD(rx_expand_skb_head2);
 	DEBUGFS_STATS_ADD(rx_handlers_fragments);
 	DEBUGFS_STATS_ADD(tx_status_drop);
-	DEBUGFS_STATS_ADD(wme_tx_queue);
-	DEBUGFS_STATS_ADD(wme_rx_queue);
 #endif
 	DEBUGFS_STATS_ADD(dot11ACKFailureCount);
 	DEBUGFS_STATS_ADD(dot11RTSFailureCount);
@@ -356,8 +315,6 @@ void debugfs_hw_del(struct ieee80211_local *local)
 	DEBUGFS_STATS_DEL(rx_expand_skb_head2);
 	DEBUGFS_STATS_DEL(rx_handlers_fragments);
 	DEBUGFS_STATS_DEL(tx_status_drop);
-	DEBUGFS_STATS_DEL(wme_tx_queue);
-	DEBUGFS_STATS_DEL(wme_rx_queue);
 #endif
 	DEBUGFS_STATS_DEL(dot11ACKFailureCount);
 	DEBUGFS_STATS_DEL(dot11RTSFailureCount);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6d47a1d31b3..676a93202ff 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -123,36 +123,6 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(last_seq_ctrl);
 
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-static ssize_t sta_wme_rx_queue_read(struct file *file, char __user *userbuf,
-				     size_t count, loff_t *ppos)
-{
-	char buf[15*NUM_RX_DATA_QUEUES], *p = buf;
-	int i;
-	struct sta_info *sta = file->private_data;
-	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
-		p += scnprintf(p, sizeof(buf)+buf-p, "%u ",
-			       sta->wme_rx_queue[i]);
-	p += scnprintf(p, sizeof(buf)+buf-p, "\n");
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-}
-STA_OPS(wme_rx_queue);
-
-static ssize_t sta_wme_tx_queue_read(struct file *file, char __user *userbuf,
-				     size_t count, loff_t *ppos)
-{
-	char buf[15*NUM_TX_DATA_QUEUES], *p = buf;
-	int i;
-	struct sta_info *sta = file->private_data;
-	for (i = 0; i < NUM_TX_DATA_QUEUES; i++)
-		p += scnprintf(p, sizeof(buf)+buf-p, "%u ",
-			       sta->wme_tx_queue[i]);
-	p += scnprintf(p, sizeof(buf)+buf-p, "\n");
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-}
-STA_OPS(wme_tx_queue);
-#endif
-
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -293,10 +263,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(num_ps_buf_frames);
 	DEBUGFS_ADD(inactive_ms);
 	DEBUGFS_ADD(last_seq_ctrl);
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-	DEBUGFS_ADD(wme_rx_queue);
-	DEBUGFS_ADD(wme_tx_queue);
-#endif
 	DEBUGFS_ADD(agg_status);
 }
 
@@ -306,10 +272,6 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta)
 	DEBUGFS_DEL(num_ps_buf_frames);
 	DEBUGFS_DEL(inactive_ms);
 	DEBUGFS_DEL(last_seq_ctrl);
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-	DEBUGFS_DEL(wme_rx_queue);
-	DEBUGFS_DEL(wme_tx_queue);
-#endif
 	DEBUGFS_DEL(agg_status);
 
 	debugfs_remove(sta->debugfs.dir);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cabdc1439d5..d82ed20a344 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -610,8 +610,8 @@ struct ieee80211_local {
 	struct sta_info *sta_hash[STA_HASH_SIZE];
 	struct timer_list sta_cleanup;
 
-	unsigned long state[NUM_TX_DATA_QUEUES_AMPDU];
-	struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES_AMPDU];
+	unsigned long state[IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
+	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
 	/* number of interfaces with corresponding IFF_ flags */
@@ -705,8 +705,6 @@ struct ieee80211_local {
 	unsigned int rx_expand_skb_head2;
 	unsigned int rx_handlers_fragments;
 	unsigned int tx_status_drop;
-	unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES];
-	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
 #define I802_DEBUG_INC(c) (c)++
 #else /* CONFIG_MAC80211_DEBUG_COUNTERS */
 #define I802_DEBUG_INC(c) do { } while (0)
@@ -764,8 +762,6 @@ struct ieee80211_local {
 			struct dentry *rx_expand_skb_head2;
 			struct dentry *rx_handlers_fragments;
 			struct dentry *tx_status_drop;
-			struct dentry *wme_tx_queue;
-			struct dentry *wme_rx_queue;
 #endif
 			struct dentry *dot11ACKFailureCount;
 			struct dentry *dot11RTSFailureCount;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e19be27a3de..55e76117da9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1745,6 +1745,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_wep;
 	}
 
+	if (hw->queues > IEEE80211_MAX_QUEUES)
+		hw->queues = IEEE80211_MAX_QUEUES;
+	if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES)
+		hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES;
+
 	ieee80211_install_qdisc(local->mdev);
 
 	/* add one default STA interface */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e860d0bacea..55b85ae5bc1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -257,19 +257,8 @@ static void ieee80211_sta_def_wmm_params(struct net_device *dev,
 		qparam.cw_max = 1023;
 		qparam.txop = 0;
 
-		for (i = IEEE80211_TX_QUEUE_DATA0; i < NUM_TX_DATA_QUEUES; i++)
-			local->ops->conf_tx(local_to_hw(local),
-					   i + IEEE80211_TX_QUEUE_DATA0,
-					   &qparam);
-
-		if (ibss) {
-			/* IBSS uses different parameters for Beacon sending */
-			qparam.cw_min++;
-			qparam.cw_min *= 2;
-			qparam.cw_min--;
-			local->ops->conf_tx(local_to_hw(local),
-					   IEEE80211_TX_QUEUE_BEACON, &qparam);
-		}
+		for (i = 0; i < local_to_hw(local)->queues; i++)
+			local->ops->conf_tx(local_to_hw(local), i, &qparam);
 	}
 }
 
@@ -306,23 +295,23 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 
 		switch (aci) {
 		case 1:
-			queue = IEEE80211_TX_QUEUE_DATA3;
+			queue = 3;
 			if (acm)
 				local->wmm_acm |= BIT(0) | BIT(3);
 			break;
 		case 2:
-			queue = IEEE80211_TX_QUEUE_DATA1;
+			queue = 1;
 			if (acm)
 				local->wmm_acm |= BIT(4) | BIT(5);
 			break;
 		case 3:
-			queue = IEEE80211_TX_QUEUE_DATA0;
+			queue = 0;
 			if (acm)
 				local->wmm_acm |= BIT(6) | BIT(7);
 			break;
 		case 0:
 		default:
-			queue = IEEE80211_TX_QUEUE_DATA2;
+			queue = 2;
 			if (acm)
 				local->wmm_acm |= BIT(1) | BIT(2);
 			break;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 02f436a8606..e8b89c89e87 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -275,11 +275,6 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 		}
 	}
 
-	I802_DEBUG_INC(rx->local->wme_rx_queue[tid]);
-	/* only a debug counter, sta might not be assigned properly yet */
-	if (rx->sta)
-		I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]);
-
 	rx->queue = tid;
 	/* Set skb->priority to 1d tag if highest order bit of TID is not set.
 	 * For now, set skb->priority to 0 for other cases. */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 7d4fe4a5292..631943e8af8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -257,7 +257,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		 * sta_rx_agg_session_timer_expired for useage */
 		sta->timer_to_tid[i] = i;
 		/* tid to tx queue: initialize according to HW (0 is valid) */
-		sta->tid_to_tx_q[i] = local->hw.queues;
+		sta->tid_to_tx_q[i] = local->hw.queues + local->hw.ampdu_queues;
 		/* rx */
 		sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
 		sta->ampdu_mlme.tid_rx[i] = NULL;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 64faa3dc488..5eddf1f32ed 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -19,16 +19,22 @@
 #include "wme.h"
 
 /* maximum number of hardware queues we support. */
-#define TC_80211_MAX_QUEUES 16
+#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
+/* current number of hardware queues we support. */
+#define QD_NUM(hw) ((hw)->queues + (hw)->ampdu_queues)
 
+/*
+ * Default mapping in classifier to work with default
+ * queue setup.
+ */
 const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 };
 
 struct ieee80211_sched_data
 {
-	unsigned long qdisc_pool[BITS_TO_LONGS(TC_80211_MAX_QUEUES)];
+	unsigned long qdisc_pool[BITS_TO_LONGS(QD_MAX_QUEUES)];
 	struct tcf_proto *filter_list;
-	struct Qdisc *queues[TC_80211_MAX_QUEUES];
-	struct sk_buff_head requeued[TC_80211_MAX_QUEUES];
+	struct Qdisc *queues[QD_MAX_QUEUES];
+	struct sk_buff_head requeued[QD_MAX_QUEUES];
 };
 
 static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
@@ -95,7 +101,7 @@ static inline int wme_downgrade_ac(struct sk_buff *skb)
 
 /* positive return value indicates which queue to use
  * negative return value indicates to drop the frame */
-static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd)
+static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 {
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -106,7 +112,7 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) {
 		/* management frames go on AC_VO queue, but are sent
 		* without QoS control fields */
-		return IEEE80211_TX_QUEUE_DATA0;
+		return 0;
 	}
 
 	if (0 /* injected */) {
@@ -141,14 +147,16 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 {
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
 	struct ieee80211_tx_packet_data *pkt_data =
 		(struct ieee80211_tx_packet_data *) skb->cb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	unsigned short fc = le16_to_cpu(hdr->frame_control);
 	struct Qdisc *qdisc;
-	int err, queue;
 	struct sta_info *sta;
+	int err;
+	u16 queue;
 	u8 tid;
 
 	if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) {
@@ -158,7 +166,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
 		if (sta) {
 			int ampdu_queue = sta->tid_to_tx_q[tid];
-			if ((ampdu_queue < local->hw.queues) &&
+			if ((ampdu_queue < QD_NUM(hw)) &&
 			    test_bit(ampdu_queue, q->qdisc_pool)) {
 				queue = ampdu_queue;
 				pkt_data->flags |= IEEE80211_TXPD_AMPDU;
@@ -174,6 +182,9 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 
 	queue = classify80211(skb, qd);
 
+	if (unlikely(queue >= local->hw.queues))
+		queue = local->hw.queues - 1;
+
 	/* now we know the 1d priority, fill in the QoS header if there is one
 	 */
 	if (WLAN_FC_IS_QOS_DATA(fc)) {
@@ -193,8 +204,8 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
 			int ampdu_queue = sta->tid_to_tx_q[tid];
-			if ((ampdu_queue < local->hw.queues) &&
-				test_bit(ampdu_queue, q->qdisc_pool)) {
+			if ((ampdu_queue < QD_NUM(hw)) &&
+			    test_bit(ampdu_queue, q->qdisc_pool)) {
 				queue = ampdu_queue;
 				pkt_data->flags |= IEEE80211_TXPD_AMPDU;
 			} else {
@@ -205,17 +216,6 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		rcu_read_unlock();
 	}
 
-	if (unlikely(queue >= local->hw.queues)) {
-#if 0
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "%s - queue=%d (hw does not "
-			       "support) -> %d\n",
-			       __func__, queue, local->hw.queues - 1);
-		}
-#endif
-		queue = local->hw.queues - 1;
-	}
-
 	if (unlikely(queue < 0)) {
 			kfree_skb(skb);
 			err = NET_XMIT_DROP;
@@ -270,7 +270,7 @@ static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
 	int queue;
 
 	/* check all the h/w queues in numeric/priority order */
-	for (queue = 0; queue < hw->queues; queue++) {
+	for (queue = 0; queue < QD_NUM(hw); queue++) {
 		/* see if there is room in this hardware queue */
 		if ((test_bit(IEEE80211_LINK_STATE_XOFF,
 				&local->state[queue])) ||
@@ -308,7 +308,7 @@ static void wme_qdiscop_reset(struct Qdisc* qd)
 
 	/* QUESTION: should we have some hardware flush functionality here? */
 
-	for (queue = 0; queue < hw->queues; queue++) {
+	for (queue = 0; queue < QD_NUM(hw); queue++) {
 		skb_queue_purge(&q->requeued[queue]);
 		qdisc_reset(q->queues[queue]);
 	}
@@ -326,7 +326,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
 	tcf_destroy_chain(q->filter_list);
 	q->filter_list = NULL;
 
-	for (queue=0; queue < hw->queues; queue++) {
+	for (queue = 0; queue < QD_NUM(hw); queue++) {
 		skb_queue_purge(&q->requeued[queue]);
 		qdisc_destroy(q->queues[queue]);
 		q->queues[queue] = &noop_qdisc;
@@ -337,17 +337,6 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
 /* called whenever parameters are updated on existing qdisc */
 static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
 {
-/*	struct ieee80211_sched_data *q = qdisc_priv(qd);
-*/
-	/* check our options block is the right size */
-	/* copy any options to our local structure */
-/*	Ignore options block for now - always use static mapping
-	struct tc_ieee80211_qopt *qopt = nla_data(opt);
-
-	if (opt->nla_len < nla_attr_size(sizeof(*qopt)))
-		return -EINVAL;
-	memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue));
-*/
 	return 0;
 }
 
@@ -358,7 +347,7 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
 	struct net_device *dev = qd->dev;
 	struct ieee80211_local *local;
-	int queues;
+	struct ieee80211_hw *hw;
 	int err = 0, i;
 
 	/* check that device is a mac80211 device */
@@ -366,29 +355,26 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 	    dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
 		return -EINVAL;
 
-	/* check this device is an ieee80211 master type device */
-	if (dev->type != ARPHRD_IEEE80211)
+	local = wdev_priv(dev->ieee80211_ptr);
+	hw = &local->hw;
+
+	/* only allow on master dev */
+	if (dev != local->mdev)
 		return -EINVAL;
 
-	/* check that there is no qdisc currently attached to device
-	 * this ensures that we will be the root qdisc. (I can't find a better
-	 * way to test this explicitly) */
-	if (dev->qdisc_sleeping != &noop_qdisc)
+	/* ensure that we are root qdisc */
+	if (qd->parent != TC_H_ROOT)
 		return -EINVAL;
 
 	if (qd->flags & TCQ_F_INGRESS)
 		return -EINVAL;
 
-	local = wdev_priv(dev->ieee80211_ptr);
-	queues = local->hw.queues;
-
 	/* if options were passed in, set them */
-	if (opt) {
+	if (opt)
 		err = wme_qdiscop_tune(qd, opt);
-	}
 
 	/* create child queues */
-	for (i = 0; i < queues; i++) {
+	for (i = 0; i < QD_NUM(hw); i++) {
 		skb_queue_head_init(&q->requeued[i]);
 		q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops,
 						 qd->handle);
@@ -398,8 +384,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 		}
 	}
 
-	/* reserve all legacy QoS queues */
-	for (i = 0; i < min(IEEE80211_TX_QUEUE_DATA4, queues); i++)
+	/* non-aggregation queues: reserve/mark as used */
+	for (i = 0; i < local->hw.queues; i++)
 		set_bit(i, q->qdisc_pool);
 
 	return err;
@@ -407,16 +393,6 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 
 static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
 {
-/*	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	unsigned char *p = skb->tail;
-	struct tc_ieee80211_qopt opt;
-
-	memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1);
-	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-*/	return skb->len;
-/*
-nla_put_failure:
-	skb_trim(skb, p - skb->data);*/
 	return -1;
 }
 
@@ -429,7 +405,7 @@ static int wme_classop_graft(struct Qdisc *qd, unsigned long arg,
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = arg - 1;
 
-	if (queue >= hw->queues)
+	if (queue >= QD_NUM(hw))
 		return -EINVAL;
 
 	if (!new)
@@ -453,7 +429,7 @@ wme_classop_leaf(struct Qdisc *qd, unsigned long arg)
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = arg - 1;
 
-	if (queue >= hw->queues)
+	if (queue >= QD_NUM(hw))
 		return NULL;
 
 	return q->queues[queue];
@@ -466,7 +442,7 @@ static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid)
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = TC_H_MIN(classid);
 
-	if (queue - 1 >= hw->queues)
+	if (queue - 1 >= QD_NUM(hw))
 		return 0;
 
 	return queue;
@@ -492,7 +468,7 @@ static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
-	if (cl - 1 > hw->queues)
+	if (cl - 1 > QD_NUM(hw))
 		return -ENOENT;
 
 	/* TODO: put code to program hardware queue parameters here,
@@ -509,7 +485,7 @@ static int wme_classop_delete(struct Qdisc *qd, unsigned long cl)
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
-	if (cl - 1 > hw->queues)
+	if (cl - 1 > QD_NUM(hw))
 		return -ENOENT;
 	return 0;
 }
@@ -522,7 +498,7 @@ static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl,
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
-	if (cl - 1 > hw->queues)
+	if (cl - 1 > QD_NUM(hw))
 		return -ENOENT;
 	tcm->tcm_handle = TC_H_MIN(cl);
 	tcm->tcm_parent = qd->handle;
@@ -540,7 +516,7 @@ static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg)
 	if (arg->stop)
 		return;
 
-	for (queue = 0; queue < hw->queues; queue++) {
+	for (queue = 0; queue < QD_NUM(hw); queue++) {
 		if (arg->count < arg->skip) {
 			arg->count++;
 			continue;
@@ -657,10 +633,13 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 	DECLARE_MAC_BUF(mac);
 
 	/* prepare the filter and save it for the SW queue
-	 * matching the recieved HW queue */
+	 * matching the received HW queue */
+
+	if (!local->hw.ampdu_queues)
+		return -EPERM;
 
 	/* try to get a Qdisc from the pool */
-	for (i = IEEE80211_TX_QUEUE_BEACON; i < local->hw.queues; i++)
+	for (i = local->hw.queues; i < QD_NUM(&local->hw); i++)
 		if (!test_and_set_bit(i, q->qdisc_pool)) {
 			ieee80211_stop_queue(local_to_hw(local), i);
 			sta->tid_to_tx_q[tid] = i;
@@ -689,13 +668,14 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,
 				   u8 requeue)
 {
+	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sched_data *q =
 		qdisc_priv(local->mdev->qdisc_sleeping);
 	int agg_queue = sta->tid_to_tx_q[tid];
 
 	/* return the qdisc to the pool */
 	clear_bit(agg_queue, q->qdisc_pool);
-	sta->tid_to_tx_q[tid] = local->hw.queues;
+	sta->tid_to_tx_q[tid] = QD_NUM(hw);
 
 	if (requeue)
 		ieee80211_requeue(local, agg_queue);
-- 
cgit v1.2.3-70-g09d2


From 3df5ee60f1ee559b1417397461891f8b483e8089 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 1 May 2008 17:07:32 -0400
Subject: wireless: fix warning introduced by "mac80211: QoS related cleanups"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/mac80211/wme.c: In function ‘wme_qdiscop_enqueue’:
net/mac80211/wme.c:219: warning: comparison is always false due to limited range of data type

drivers/net/wireless/p54/p54common.c: In function ‘p54_conf_tx’:
drivers/net/wireless/p54/p54common.c:947: warning: comparison is always false due to limited range of data type

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/p54/p54common.c |  2 +-
 net/mac80211/wme.c                   | 23 +++++++++--------------
 2 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 34b91ccd8ae..33d608a60d7 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -944,7 +944,7 @@ static int p54_conf_tx(struct ieee80211_hw *dev, u16 queue,
 	vdcf = (struct p54_tx_control_vdcf *)(((struct p54_control_hdr *)
 		((void *)priv->cached_vdcf + priv->tx_hdr_len))->data);
 
-	if ((params) && !((queue < 0) || (queue > 4))) {
+	if ((params) && !(queue > 4)) {
 		P54_SET_QUEUE(vdcf->queue[queue], params->aifs,
 			params->cw_min, params->cw_max, params->txop);
 	} else
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5eddf1f32ed..b1e20ca03ff 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -216,20 +216,15 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		rcu_read_unlock();
 	}
 
-	if (unlikely(queue < 0)) {
-			kfree_skb(skb);
-			err = NET_XMIT_DROP;
-	} else {
-		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
-		pkt_data->queue = (unsigned int) queue;
-		qdisc = q->queues[queue];
-		err = qdisc->enqueue(skb, qdisc);
-		if (err == NET_XMIT_SUCCESS) {
-			qd->q.qlen++;
-			qd->bstats.bytes += skb->len;
-			qd->bstats.packets++;
-			return NET_XMIT_SUCCESS;
-		}
+	tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
+	pkt_data->queue = (unsigned int) queue;
+	qdisc = q->queues[queue];
+	err = qdisc->enqueue(skb, qdisc);
+	if (err == NET_XMIT_SUCCESS) {
+		qd->q.qlen++;
+		qd->bstats.bytes += skb->len;
+		qd->bstats.packets++;
+		return NET_XMIT_SUCCESS;
 	}
 	qd->qstats.drops++;
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 5c5e12898af0978a780991950be12d0d73c02f04 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 3 May 2008 00:44:09 +0200
Subject: mac80211: fix wme code

In commit e100bb64bf7cdeae7f742a65ee1985649a7fd1b4 (mac80211:
QoS related cleanups) I accidentally changed a variable from
int to u16 causing a warning that a comparison for < 0 was always
false. John thought this was a missing deletion of code and removed
the warning by deleting the never executed branch of code in commit
3df5ee60f1ee559b1417397461891f8b483e8089 (wireless: fix warning
introduced by "mac80211: QoS related cleanups") but the problem really
was my mistake of using a u16 variable for the queue variable when
that variable can also contain an error code. This patch restores
the original code and variable type.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index b1e20ca03ff..8ffff27fe00 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -155,8 +155,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	unsigned short fc = le16_to_cpu(hdr->frame_control);
 	struct Qdisc *qdisc;
 	struct sta_info *sta;
-	int err;
-	u16 queue;
+	int err, queue;
 	u8 tid;
 
 	if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) {
@@ -216,15 +215,20 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		rcu_read_unlock();
 	}
 
-	tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
-	pkt_data->queue = (unsigned int) queue;
-	qdisc = q->queues[queue];
-	err = qdisc->enqueue(skb, qdisc);
-	if (err == NET_XMIT_SUCCESS) {
-		qd->q.qlen++;
-		qd->bstats.bytes += skb->len;
-		qd->bstats.packets++;
-		return NET_XMIT_SUCCESS;
+	if (unlikely(queue < 0)) {
+			kfree_skb(skb);
+			err = NET_XMIT_DROP;
+	} else {
+		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
+		pkt_data->queue = (unsigned int) queue;
+		qdisc = q->queues[queue];
+		err = qdisc->enqueue(skb, qdisc);
+		if (err == NET_XMIT_SUCCESS) {
+			qd->q.qlen++;
+			qd->bstats.bytes += skb->len;
+			qd->bstats.packets++;
+			return NET_XMIT_SUCCESS;
+		}
 	}
 	qd->qstats.drops++;
 	return err;
-- 
cgit v1.2.3-70-g09d2


From d364d9276b54af16fcb4db83f1315b620daec102 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 9 May 2008 15:13:26 -0700
Subject: sctp: Bring SCTP_DELAYED_ACK socket option into API compliance

Brings delayed_ack socket option set/get into line with the latest ietf
socket extensions API draft, while maintaining backwards compatibility.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |   4 +
 include/net/sctp/user.h    |  28 ++++--
 net/sctp/associola.c       |   3 +
 net/sctp/sm_sideeffect.c   |  17 +++-
 net/sctp/socket.c          | 242 ++++++++++++++++++++++++++++-----------------
 5 files changed, 194 insertions(+), 100 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0ce0443c5b7..e11151702be 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -300,6 +300,7 @@ struct sctp_sock {
 
 	/* The default SACK delay timeout for new associations. */
 	__u32 sackdelay;
+	__u32 sackfreq;
 
 	/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
 	__u32 param_flags;
@@ -938,6 +939,7 @@ struct sctp_transport {
 
 	/* SACK delay timeout */
 	unsigned long sackdelay;
+	__u32 sackfreq;
 
 	/* When was the last time (in jiffies) that we heard from this
 	 * transport?  We use this to pick new active and retran paths.
@@ -1542,6 +1544,7 @@ struct sctp_association {
 		 *             : SACK's are not delayed (see Section 6).
 		 */
 		__u8    sack_needed;     /* Do we need to sack the peer? */
+		__u32	sack_cnt;
 
 		/* These are capabilities which our peer advertised.  */
 		__u8	ecn_capable;	 /* Can peer do ECN? */
@@ -1651,6 +1654,7 @@ struct sctp_association {
 
 	/* SACK delay timeout */
 	unsigned long sackdelay;
+	__u32 sackfreq;
 
 
 	unsigned long timeouts[SCTP_NUM_TIMEOUT_TYPES];
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 9619b9d35c9..31371d2553e 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -93,8 +93,9 @@ enum sctp_optname {
 #define SCTP_STATUS SCTP_STATUS
 	SCTP_GET_PEER_ADDR_INFO,
 #define SCTP_GET_PEER_ADDR_INFO SCTP_GET_PEER_ADDR_INFO
-	SCTP_DELAYED_ACK_TIME,
-#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME
+	SCTP_DELAYED_ACK,
+#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK
+#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK
 	SCTP_CONTEXT,	/* Receive Context */
 #define SCTP_CONTEXT SCTP_CONTEXT
 	SCTP_FRAGMENT_INTERLEAVE,
@@ -618,13 +619,26 @@ struct sctp_authkeyid {
 };
 
 
-/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/*
+ * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
  *
- *   This options will get or set the delayed ack timer.  The time is set
- *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
- *   endpoints default delayed ack timer value.  If the assoc_id field is
- *   non-zero, then the set or get effects the specified association.
+ * This option will effect the way delayed acks are performed.  This
+ * option allows you to get or set the delayed ack time, in
+ * milliseconds.  It also allows changing the delayed ack frequency.
+ * Changing the frequency to 1 disables the delayed sack algorithm.  If
+ * the assoc_id is 0, then this sets or gets the endpoints default
+ * values.  If the assoc_id field is non-zero, then the set or get
+ * effects the specified association for the one to many model (the
+ * assoc_id field is ignored by the one to one model).  Note that if
+ * sack_delay or sack_freq are 0 when setting this option, then the
+ * current values will remain unchanged.
  */
+struct sctp_sack_info {
+	sctp_assoc_t	sack_assoc_id;
+	uint32_t	sack_delay;
+	uint32_t	sack_freq;
+};
+
 struct sctp_assoc_value {
     sctp_assoc_t            assoc_id;
     uint32_t                assoc_value;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b4cd2b71953..7b79d1e781a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -136,6 +136,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* Set association default SACK delay */
 	asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
+	asoc->sackfreq = sp->sackfreq;
 
 	/* Set the association default flags controlling
 	 * Heartbeat, SACK delay, and Path MTU Discovery.
@@ -261,6 +262,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	 * already received one packet.]
 	 */
 	asoc->peer.sack_needed = 1;
+	asoc->peer.sack_cnt = 0;
 
 	/* Assume that the peer will tell us if he recognizes ASCONF
 	 * as part of INIT exchange.
@@ -615,6 +617,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 * association configured value.
 	 */
 	peer->sackdelay = asoc->sackdelay;
+	peer->sackfreq = asoc->sackfreq;
 
 	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
 	 * based on association setting.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 23a9f1a95b7..b083312c725 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -190,20 +190,28 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 	 * unacknowledged DATA chunk. ...
 	 */
 	if (!asoc->peer.sack_needed) {
-		/* We will need a SACK for the next packet.  */
-		asoc->peer.sack_needed = 1;
+		asoc->peer.sack_cnt++;
 
 		/* Set the SACK delay timeout based on the
 		 * SACK delay for the last transport
 		 * data was received from, or the default
 		 * for the association.
 		 */
-		if (trans)
+		if (trans) {
+			/* We will need a SACK for the next packet.  */
+			if (asoc->peer.sack_cnt >= trans->sackfreq - 1)
+				asoc->peer.sack_needed = 1;
+
 			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
 				trans->sackdelay;
-		else
+		} else {
+			/* We will need a SACK for the next packet.  */
+			if (asoc->peer.sack_cnt >= asoc->sackfreq - 1)
+				asoc->peer.sack_needed = 1;
+
 			asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
 				asoc->sackdelay;
+		}
 
 		/* Restart the SACK timer. */
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
@@ -216,6 +224,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
 			goto nomem;
 
 		asoc->peer.sack_needed = 0;
+		asoc->peer.sack_cnt = 0;
 
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(sack));
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009..66985871401 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2305,74 +2305,98 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	return 0;
 }
 
-/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
- *
- *   This options will get or set the delayed ack timer.  The time is set
- *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
- *   endpoints default delayed ack timer value.  If the assoc_id field is
- *   non-zero, then the set or get effects the specified association.
- *
- *   struct sctp_assoc_value {
- *       sctp_assoc_t            assoc_id;
- *       uint32_t                assoc_value;
- *   };
- *
- *     assoc_id    - This parameter, indicates which association the
- *                   user is preforming an action upon. Note that if
- *                   this field's value is zero then the endpoints
- *                   default value is changed (effecting future
- *                   associations only).
- *
- *     assoc_value - This parameter contains the number of milliseconds
- *                   that the user is requesting the delayed ACK timer
- *                   be set to. Note that this value is defined in
- *                   the standard to be between 200 and 500 milliseconds.
- *
- *                   Note: a value of zero will leave the value alone,
- *                   but disable SACK delay. A non-zero value will also
- *                   enable SACK delay.
+/*
+ * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
+ *
+ * This option will effect the way delayed acks are performed.  This
+ * option allows you to get or set the delayed ack time, in
+ * milliseconds.  It also allows changing the delayed ack frequency.
+ * Changing the frequency to 1 disables the delayed sack algorithm.  If
+ * the assoc_id is 0, then this sets or gets the endpoints default
+ * values.  If the assoc_id field is non-zero, then the set or get
+ * effects the specified association for the one to many model (the
+ * assoc_id field is ignored by the one to one model).  Note that if
+ * sack_delay or sack_freq are 0 when setting this option, then the
+ * current values will remain unchanged.
+ *
+ * struct sctp_sack_info {
+ *     sctp_assoc_t            sack_assoc_id;
+ *     uint32_t                sack_delay;
+ *     uint32_t                sack_freq;
+ * };
+ *
+ * sack_assoc_id -  This parameter, indicates which association the user
+ *    is performing an action upon.  Note that if this field's value is
+ *    zero then the endpoints default value is changed (effecting future
+ *    associations only).
+ *
+ * sack_delay -  This parameter contains the number of milliseconds that
+ *    the user is requesting the delayed ACK timer be set to.  Note that
+ *    this value is defined in the standard to be between 200 and 500
+ *    milliseconds.
+ *
+ * sack_freq -  This parameter contains the number of packets that must
+ *    be received before a sack is sent without waiting for the delay
+ *    timer to expire.  The default value for this is 2, setting this
+ *    value to 1 will disable the delayed sack algorithm.
  */
 
-static int sctp_setsockopt_delayed_ack_time(struct sock *sk,
+static int sctp_setsockopt_delayed_ack(struct sock *sk,
 					    char __user *optval, int optlen)
 {
-	struct sctp_assoc_value  params;
+	struct sctp_sack_info    params;
 	struct sctp_transport   *trans = NULL;
 	struct sctp_association *asoc = NULL;
 	struct sctp_sock        *sp = sctp_sk(sk);
 
-	if (optlen != sizeof(struct sctp_assoc_value))
-		return - EINVAL;
+	if (optlen == sizeof(struct sctp_sack_info)) {
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
 
-	if (copy_from_user(&params, optval, optlen))
-		return -EFAULT;
+		if (params.sack_delay == 0 && params.sack_freq == 0)
+			return 0;
+	} else if (optlen == sizeof(struct sctp_assoc_value)) {
+		printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info "
+		       "in delayed_ack socket option deprecated\n");
+		printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n");
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
+
+		if (params.sack_delay == 0)
+			params.sack_freq = 1;
+		else
+			params.sack_freq = 0;
+	} else
+		return - EINVAL;
 
 	/* Validate value parameter. */
-	if (params.assoc_value > 500)
+	if (params.sack_delay > 500)
 		return -EINVAL;
 
-	/* Get association, if assoc_id != 0 and the socket is a one
+	/* Get association, if sack_assoc_id != 0 and the socket is a one
 	 * to many style socket, and an association was not found, then
 	 * the id was invalid.
 	 */
-	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+	asoc = sctp_id2assoc(sk, params.sack_assoc_id);
+	if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
-	if (params.assoc_value) {
+	if (params.sack_delay) {
 		if (asoc) {
 			asoc->sackdelay =
-				msecs_to_jiffies(params.assoc_value);
+				msecs_to_jiffies(params.sack_delay);
 			asoc->param_flags =
 				(asoc->param_flags & ~SPP_SACKDELAY) |
 				SPP_SACKDELAY_ENABLE;
 		} else {
-			sp->sackdelay = params.assoc_value;
+			sp->sackdelay = params.sack_delay;
 			sp->param_flags =
 				(sp->param_flags & ~SPP_SACKDELAY) |
 				SPP_SACKDELAY_ENABLE;
 		}
-	} else {
+	}
+
+	if (params.sack_freq == 1) {
 		if (asoc) {
 			asoc->param_flags =
 				(asoc->param_flags & ~SPP_SACKDELAY) |
@@ -2382,22 +2406,40 @@ static int sctp_setsockopt_delayed_ack_time(struct sock *sk,
 				(sp->param_flags & ~SPP_SACKDELAY) |
 				SPP_SACKDELAY_DISABLE;
 		}
+	} else if (params.sack_freq > 1) {
+		if (asoc) {
+			asoc->sackfreq = params.sack_freq;
+			asoc->param_flags =
+				(asoc->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		} else {
+			sp->sackfreq = params.sack_freq;
+			sp->param_flags =
+				(sp->param_flags & ~SPP_SACKDELAY) |
+				SPP_SACKDELAY_ENABLE;
+		}
 	}
 
 	/* If change is for association, also apply to each transport. */
 	if (asoc) {
 		list_for_each_entry(trans, &asoc->peer.transport_addr_list,
 				transports) {
-			if (params.assoc_value) {
+			if (params.sack_delay) {
 				trans->sackdelay =
-					msecs_to_jiffies(params.assoc_value);
+					msecs_to_jiffies(params.sack_delay);
 				trans->param_flags =
 					(trans->param_flags & ~SPP_SACKDELAY) |
 					SPP_SACKDELAY_ENABLE;
-			} else {
+			}
+			if (params.sack_delay == 1) {
 				trans->param_flags =
 					(trans->param_flags & ~SPP_SACKDELAY) |
 					SPP_SACKDELAY_DISABLE;
+			} else if (params.sack_freq > 1) {
+				trans->sackfreq = params.sack_freq;
+				trans->param_flags =
+					(trans->param_flags & ~SPP_SACKDELAY) |
+					SPP_SACKDELAY_ENABLE;
 			}
 		}
 	}
@@ -3186,8 +3228,8 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
 		break;
 
-	case SCTP_DELAYED_ACK_TIME:
-		retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
+	case SCTP_DELAYED_ACK:
+		retval = sctp_setsockopt_delayed_ack(sk, optval, optlen);
 		break;
 	case SCTP_PARTIAL_DELIVERY_POINT:
 		retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
@@ -3446,6 +3488,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->pathmaxrxt  = sctp_max_retrans_path;
 	sp->pathmtu     = 0; // allow default discovery
 	sp->sackdelay   = sctp_sack_timeout;
+	sp->sackfreq	= 3;
 	sp->param_flags = SPP_HB_ENABLE |
 			  SPP_PMTUD_ENABLE |
 			  SPP_SACKDELAY_ENABLE;
@@ -3999,70 +4042,91 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	return 0;
 }
 
-/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
- *
- *   This options will get or set the delayed ack timer.  The time is set
- *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
- *   endpoints default delayed ack timer value.  If the assoc_id field is
- *   non-zero, then the set or get effects the specified association.
- *
- *   struct sctp_assoc_value {
- *       sctp_assoc_t            assoc_id;
- *       uint32_t                assoc_value;
- *   };
+/*
+ * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
+ *
+ * This option will effect the way delayed acks are performed.  This
+ * option allows you to get or set the delayed ack time, in
+ * milliseconds.  It also allows changing the delayed ack frequency.
+ * Changing the frequency to 1 disables the delayed sack algorithm.  If
+ * the assoc_id is 0, then this sets or gets the endpoints default
+ * values.  If the assoc_id field is non-zero, then the set or get
+ * effects the specified association for the one to many model (the
+ * assoc_id field is ignored by the one to one model).  Note that if
+ * sack_delay or sack_freq are 0 when setting this option, then the
+ * current values will remain unchanged.
+ *
+ * struct sctp_sack_info {
+ *     sctp_assoc_t            sack_assoc_id;
+ *     uint32_t                sack_delay;
+ *     uint32_t                sack_freq;
+ * };
  *
- *     assoc_id    - This parameter, indicates which association the
- *                   user is preforming an action upon. Note that if
- *                   this field's value is zero then the endpoints
- *                   default value is changed (effecting future
- *                   associations only).
+ * sack_assoc_id -  This parameter, indicates which association the user
+ *    is performing an action upon.  Note that if this field's value is
+ *    zero then the endpoints default value is changed (effecting future
+ *    associations only).
  *
- *     assoc_value - This parameter contains the number of milliseconds
- *                   that the user is requesting the delayed ACK timer
- *                   be set to. Note that this value is defined in
- *                   the standard to be between 200 and 500 milliseconds.
+ * sack_delay -  This parameter contains the number of milliseconds that
+ *    the user is requesting the delayed ACK timer be set to.  Note that
+ *    this value is defined in the standard to be between 200 and 500
+ *    milliseconds.
  *
- *                   Note: a value of zero will leave the value alone,
- *                   but disable SACK delay. A non-zero value will also
- *                   enable SACK delay.
+ * sack_freq -  This parameter contains the number of packets that must
+ *    be received before a sack is sent without waiting for the delay
+ *    timer to expire.  The default value for this is 2, setting this
+ *    value to 1 will disable the delayed sack algorithm.
  */
-static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len,
+static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
 					    char __user *optval,
 					    int __user *optlen)
 {
-	struct sctp_assoc_value  params;
+	struct sctp_sack_info    params;
 	struct sctp_association *asoc = NULL;
 	struct sctp_sock        *sp = sctp_sk(sk);
 
-	if (len < sizeof(struct sctp_assoc_value))
-		return - EINVAL;
+	if (len >= sizeof(struct sctp_sack_info)) {
+		len = sizeof(struct sctp_sack_info);
 
-	len = sizeof(struct sctp_assoc_value);
-
-	if (copy_from_user(&params, optval, len))
-		return -EFAULT;
+		if (copy_from_user(&params, optval, len))
+			return -EFAULT;
+	} else if (len == sizeof(struct sctp_assoc_value)) {
+		printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info "
+		       "in delayed_ack socket option deprecated\n");
+		printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n");
+		if (copy_from_user(&params, optval, len))
+			return -EFAULT;
+	} else
+		return - EINVAL;
 
-	/* Get association, if assoc_id != 0 and the socket is a one
+	/* Get association, if sack_assoc_id != 0 and the socket is a one
 	 * to many style socket, and an association was not found, then
 	 * the id was invalid.
 	 */
-	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
+	asoc = sctp_id2assoc(sk, params.sack_assoc_id);
+	if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
 
 	if (asoc) {
 		/* Fetch association values. */
-		if (asoc->param_flags & SPP_SACKDELAY_ENABLE)
-			params.assoc_value = jiffies_to_msecs(
+		if (asoc->param_flags & SPP_SACKDELAY_ENABLE) {
+			params.sack_delay = jiffies_to_msecs(
 				asoc->sackdelay);
-		else
-			params.assoc_value = 0;
+			params.sack_freq = asoc->sackfreq;
+
+		} else {
+			params.sack_delay = 0;
+			params.sack_freq = 1;
+		}
 	} else {
 		/* Fetch socket values. */
-		if (sp->param_flags & SPP_SACKDELAY_ENABLE)
-			params.assoc_value  = sp->sackdelay;
-		else
-			params.assoc_value  = 0;
+		if (sp->param_flags & SPP_SACKDELAY_ENABLE) {
+			params.sack_delay  = sp->sackdelay;
+			params.sack_freq = sp->sackfreq;
+		} else {
+			params.sack_delay  = 0;
+			params.sack_freq = 1;
+		}
 	}
 
 	if (copy_to_user(optval, &params, len))
@@ -5218,8 +5282,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
 							  optlen);
 		break;
-	case SCTP_DELAYED_ACK_TIME:
-		retval = sctp_getsockopt_delayed_ack_time(sk, len, optval,
+	case SCTP_DELAYED_ACK:
+		retval = sctp_getsockopt_delayed_ack(sk, len, optval,
 							  optlen);
 		break;
 	case SCTP_INITMSG:
-- 
cgit v1.2.3-70-g09d2


From 88a0a948e752bb9b617a8c55417a9fd9b0257199 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 9 May 2008 15:14:11 -0700
Subject: sctp: Support the new specification of sctp_connectx()

The specification of sctp_connectx() has been changed to return
an association id.  We've added a new socket option that will
return the association id as the return value from the setsockopt()
call.  The library that implements sctp_connectx() interface will
implement both socket options.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h |  6 +++--
 net/sctp/socket.c       | 70 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 63 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 31371d2553e..f205b10f0ab 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -137,12 +137,14 @@ enum sctp_optname {
 #define SCTP_GET_LOCAL_ADDRS_NUM_OLD	SCTP_GET_LOCAL_ADDRS_NUM_OLD
 	SCTP_GET_LOCAL_ADDRS_OLD, 	/* Get all local addresss. */
 #define SCTP_GET_LOCAL_ADDRS_OLD	SCTP_GET_LOCAL_ADDRS_OLD
-	SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */
-#define SCTP_SOCKOPT_CONNECTX	SCTP_SOCKOPT_CONNECTX
+	SCTP_SOCKOPT_CONNECTX_OLD, /* CONNECTX old requests. */
+#define SCTP_SOCKOPT_CONNECTX_OLD	SCTP_SOCKOPT_CONNECTX_OLD
 	SCTP_GET_PEER_ADDRS, 	/* Get all peer addresss. */
 #define SCTP_GET_PEER_ADDRS	SCTP_GET_PEER_ADDRS
 	SCTP_GET_LOCAL_ADDRS, 	/* Get all local addresss. */
 #define SCTP_GET_LOCAL_ADDRS	SCTP_GET_LOCAL_ADDRS
+	SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */
+#define SCTP_SOCKOPT_CONNECTX	SCTP_SOCKOPT_CONNECTX
 };
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 66985871401..81600eea05d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -956,7 +956,8 @@ out:
  */
 static int __sctp_connect(struct sock* sk,
 			  struct sockaddr *kaddrs,
-			  int addrs_size)
+			  int addrs_size,
+			  sctp_assoc_t *assoc_id)
 {
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
@@ -1111,6 +1112,8 @@ static int __sctp_connect(struct sock* sk,
 	timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
 
 	err = sctp_wait_for_connect(asoc, &timeo);
+	if (!err && assoc_id)
+		*assoc_id = asoc->assoc_id;
 
 	/* Don't free association on exit. */
 	asoc = NULL;
@@ -1128,7 +1131,8 @@ out_free:
 /* Helper for tunneling sctp_connectx() requests through sctp_setsockopt()
  *
  * API 8.9
- * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt);
+ * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt,
+ * 			sctp_assoc_t *asoc);
  *
  * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
  * If the sd is an IPv6 socket, the addresses passed can either be IPv4
@@ -1144,8 +1148,10 @@ out_free:
  * representation is termed a "packed array" of addresses). The caller
  * specifies the number of addresses in the array with addrcnt.
  *
- * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns
- * -1, and sets errno to the appropriate error code.
+ * On success, sctp_connectx() returns 0. It also sets the assoc_id to
+ * the association id of the new association.  On failure, sctp_connectx()
+ * returns -1, and sets errno to the appropriate error code.  The assoc_id
+ * is not touched by the kernel.
  *
  * For SCTP, the port given in each socket address must be the same, or
  * sctp_connectx() will fail, setting errno to EINVAL.
@@ -1182,11 +1188,12 @@ out_free:
  * addrs     The pointer to the addresses in user land
  * addrssize Size of the addrs buffer
  *
- * Returns 0 if ok, <0 errno code on error.
+ * Returns >=0 if ok, <0 errno code on error.
  */
-SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
+SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk,
 				      struct sockaddr __user *addrs,
-				      int addrs_size)
+				      int addrs_size,
+				      sctp_assoc_t *assoc_id)
 {
 	int err = 0;
 	struct sockaddr *kaddrs;
@@ -1209,13 +1216,46 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
 	if (__copy_from_user(kaddrs, addrs, addrs_size)) {
 		err = -EFAULT;
 	} else {
-		err = __sctp_connect(sk, kaddrs, addrs_size);
+		err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
 	}
 
 	kfree(kaddrs);
+
 	return err;
 }
 
+/*
+ * This is an older interface.  It's kept for backward compatibility
+ * to the option that doesn't provide association id.
+ */
+SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk,
+				      struct sockaddr __user *addrs,
+				      int addrs_size)
+{
+	return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL);
+}
+
+/*
+ * New interface for the API.  The since the API is done with a socket
+ * option, to make it simple we feed back the association id is as a return
+ * indication to the call.  Error is always negative and association id is
+ * always positive.
+ */
+SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
+				      struct sockaddr __user *addrs,
+				      int addrs_size)
+{
+	sctp_assoc_t assoc_id = 0;
+	int err = 0;
+
+	err = __sctp_setsockopt_connectx(sk, addrs, addrs_size, &assoc_id);
+
+	if (err)
+		return err;
+	else
+		return assoc_id;
+}
+
 /* API 3.1.4 close() - UDP Style Syntax
  * Applications use close() to perform graceful shutdown (as described in
  * Section 10.1 of [SCTP]) on ALL the associations currently represented
@@ -3206,10 +3246,18 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 					       optlen, SCTP_BINDX_REM_ADDR);
 		break;
 
+	case SCTP_SOCKOPT_CONNECTX_OLD:
+		/* 'optlen' is the size of the addresses buffer. */
+		retval = sctp_setsockopt_connectx_old(sk,
+					    (struct sockaddr __user *)optval,
+					    optlen);
+		break;
+
 	case SCTP_SOCKOPT_CONNECTX:
 		/* 'optlen' is the size of the addresses buffer. */
-		retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval,
-					       optlen);
+		retval = sctp_setsockopt_connectx(sk,
+					    (struct sockaddr __user *)optval,
+					    optlen);
 		break;
 
 	case SCTP_DISABLE_FRAGMENTS:
@@ -3336,7 +3384,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr,
 		/* Pass correct addr len to common routine (so it knows there
 		 * is only one address being passed.
 		 */
-		err = __sctp_connect(sk, addr, af->sockaddr_len);
+		err = __sctp_connect(sk, addr, af->sockaddr_len, NULL);
 	}
 
 	sctp_release_sock(sk);
-- 
cgit v1.2.3-70-g09d2


From 20c2c1fd6c842caf70dcb1d94b9d58861949fd3d Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 9 May 2008 15:14:50 -0700
Subject: sctp: add sctp/remaddr table to complete RFC remote address table OID

Add support for RFC3873 remote address table OID.

      +--(5) sctpAssocRemAddrTable
      |   |
      |   |--(-) sctpAssocId (shared index)
      |   |
      |   +--(1) sctpAssocRemAddrType (index)
      .   |
      .   +--(2) sctpAssocRemAddr (index)
      .   |
          +--(3) sctpAssocRemAddrActive
          |
          +--(4) sctpAssocRemAddrHBActive
          |
          +--(5) sctpAssocRemAddrRTO
          |
          +--(6) sctpAssocRemAddrMaxPathRtx
          |
          +--(7) sctpAssocRemAddrRtx
          |
          +--(8) sctpAssocRemAddrStartTime

This patch places all the requsite data in /proc/net/sctp/remaddr.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |   2 +
 net/sctp/proc.c         | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/sctp/protocol.c     |   3 ++
 3 files changed, 146 insertions(+)

(limited to 'net')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 90b1e8d23b1..5672d489e92 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -179,6 +179,8 @@ int sctp_eps_proc_init(void);
 void sctp_eps_proc_exit(void);
 int sctp_assocs_proc_init(void);
 void sctp_assocs_proc_exit(void);
+int sctp_remaddr_proc_init(void);
+void sctp_remaddr_proc_exit(void);
 
 
 /*
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 0aba759cb9b..5dd89831ece 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -383,3 +383,144 @@ void sctp_assocs_proc_exit(void)
 {
 	remove_proc_entry("assocs", proc_net_sctp);
 }
+
+static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos >= sctp_assoc_hashsize)
+		return NULL;
+
+	if (*pos < 0)
+		*pos = 0;
+
+	if (*pos == 0)
+		seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
+				"REM_ADDR_RTX  START\n");
+
+	return (void *)pos;
+}
+
+static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	if (++*pos >= sctp_assoc_hashsize)
+		return NULL;
+
+	return pos;
+}
+
+static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
+{
+	return;
+}
+
+static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+	struct sctp_association *assoc;
+	struct hlist_node *node;
+	struct sctp_transport *tsp;
+	int    hash = *(loff_t *)v;
+
+	if (hash >= sctp_assoc_hashsize)
+		return -ENOMEM;
+
+	head = &sctp_assoc_hashtable[hash];
+	sctp_local_bh_disable();
+	read_lock(&head->lock);
+	sctp_for_each_hentry(epb, node, &head->chain) {
+		assoc = sctp_assoc(epb);
+		list_for_each_entry(tsp, &assoc->peer.transport_addr_list,
+					transports) {
+			/*
+			 * The remote address (ADDR)
+			 */
+			tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr);
+			seq_printf(seq, " ");
+
+			/*
+			 * The association ID (ASSOC_ID)
+			 */
+			seq_printf(seq, "%d ", tsp->asoc->assoc_id);
+
+			/*
+			 * If the Heartbeat is active (HB_ACT)
+			 * Note: 1 = Active, 0 = Inactive
+			 */
+			seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer));
+
+			/*
+			 * Retransmit time out (RTO)
+			 */
+			seq_printf(seq, "%lu ", tsp->rto);
+
+			/*
+			 * Maximum path retransmit count (PATH_MAX_RTX)
+			 */
+			seq_printf(seq, "%d ", tsp->pathmaxrxt);
+
+			/*
+			 * remote address retransmit count (REM_ADDR_RTX)
+			 * Note: We don't have a way to tally this at the moment
+			 * so lets just leave it as zero for the moment
+			 */
+			seq_printf(seq, "0 ");
+
+			/*
+			 * remote address start time (START).  This is also not
+			 * currently implemented, but we can record it with a
+			 * jiffies marker in a subsequent patch
+			 */
+			seq_printf(seq, "0");
+
+			seq_printf(seq, "\n");
+		}
+	}
+
+	read_unlock(&head->lock);
+	sctp_local_bh_enable();
+
+	return 0;
+
+}
+
+static const struct seq_operations sctp_remaddr_ops = {
+	.start = sctp_remaddr_seq_start,
+	.next  = sctp_remaddr_seq_next,
+	.stop  = sctp_remaddr_seq_stop,
+	.show  = sctp_remaddr_seq_show,
+};
+
+/* Cleanup the proc fs entry for 'remaddr' object. */
+void sctp_remaddr_proc_exit(void)
+{
+	remove_proc_entry("remaddr", proc_net_sctp);
+}
+
+static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &sctp_remaddr_ops);
+}
+
+static const struct file_operations sctp_remaddr_seq_fops = {
+	.open = sctp_remaddr_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+int __init sctp_remaddr_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("remaddr", S_IRUGO, proc_net_sctp);
+	if (!p)
+		return -ENOMEM;
+	p->proc_fops = &sctp_remaddr_seq_fops;
+
+	return 0;
+}
+
+void sctp_assoc_proc_exit(void)
+{
+	remove_proc_entry("remaddr", proc_net_sctp);
+}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c..b8bd9e01449 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -113,6 +113,8 @@ static __init int sctp_proc_init(void)
 		goto out_nomem;
 	if (sctp_assocs_proc_init())
 		goto out_nomem;
+	if (sctp_remaddr_proc_init())
+		goto out_nomem;
 
 	return 0;
 
@@ -129,6 +131,7 @@ static void sctp_proc_exit(void)
 	sctp_snmp_proc_exit();
 	sctp_eps_proc_exit();
 	sctp_assocs_proc_exit();
+	sctp_remaddr_proc_exit();
 
 	if (proc_net_sctp) {
 		proc_net_sctp = NULL;
-- 
cgit v1.2.3-70-g09d2


From 4e3e6dcb43c3669a8817cb3d0f920f91661afd98 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 12 May 2008 15:41:53 -0700
Subject: tipc: Enhancements to name table initialization

This patch enhances the initialization of TIPC's name table
by removing a pointless spinlock operation, and by using
kcalloc() to detect requests for an oversized name table.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ac7dfdda797..892373e498e 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1050,15 +1050,12 @@ void tipc_nametbl_dump(void)
 
 int tipc_nametbl_init(void)
 {
-	int array_size = sizeof(struct hlist_head) * tipc_nametbl_size;
-
-	table.types = kzalloc(array_size, GFP_ATOMIC);
+	table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
+			      GFP_ATOMIC);
 	if (!table.types)
 		return -ENOMEM;
 
-	write_lock_bh(&tipc_nametbl_lock);
 	table.local_publ_count = 0;
-	write_unlock_bh(&tipc_nametbl_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7ef43ebaa538e0cc9063cbf84593a05091bcace2 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 12 May 2008 15:42:28 -0700
Subject: tipc: Fix race condition when creating socket or native port

This patch eliminates the (very remote) chance of a crash resulting
from a partially initialized socket or native port unexpectedly
receiving a message.  Now, during the creation of a socket or native
port, the underlying generic port's lock is not released until all
initialization required to handle incoming messages has been done.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tipc/tipc_port.h |  3 ++-
 net/tipc/port.c              | 20 ++++++++++++--------
 net/tipc/ref.c               | 12 ++++++++----
 net/tipc/socket.c            |  5 ++++-
 net/tipc/subscr.c            |  1 +
 5 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
index 11105bcc445..9923e41a821 100644
--- a/include/net/tipc/tipc_port.h
+++ b/include/net/tipc/tipc_port.h
@@ -84,7 +84,8 @@ struct tipc_port {
 u32 tipc_createport_raw(void *usr_handle,
 			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
 			void (*wakeup)(struct tipc_port *),
-			const u32 importance);
+			const u32 importance,
+			struct tipc_port **tp_ptr);
 
 int tipc_reject_msg(struct sk_buff *buf, u32 err);
 
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 2f5806410c6..757de38fe6a 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -211,15 +211,18 @@ exit:
 }
 
 /**
- * tipc_createport_raw - create a native TIPC port
+ * tipc_createport_raw - create a generic TIPC port
  *
- * Returns local port reference
+ * Returns port reference, or 0 if unable to create it
+ *
+ * Note: The newly created port is returned in the locked state.
  */
 
 u32 tipc_createport_raw(void *usr_handle,
 			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
 			void (*wakeup)(struct tipc_port *),
-			const u32 importance)
+			const u32 importance,
+			struct tipc_port **tp_ptr)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -237,7 +240,6 @@ u32 tipc_createport_raw(void *usr_handle,
 		return 0;
 	}
 
-	tipc_port_lock(ref);
 	p_ptr->publ.usr_handle = usr_handle;
 	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
@@ -262,7 +264,7 @@ u32 tipc_createport_raw(void *usr_handle,
 	INIT_LIST_HEAD(&p_ptr->port_list);
 	list_add_tail(&p_ptr->port_list, &ports);
 	spin_unlock_bh(&tipc_port_list_lock);
-	tipc_port_unlock(p_ptr);
+	*tp_ptr = &p_ptr->publ;
 	return ref;
 }
 
@@ -1053,6 +1055,7 @@ int tipc_createport(u32 user_ref,
 {
 	struct user_port *up_ptr;
 	struct port *p_ptr;
+	struct tipc_port *tp_ptr;
 	u32 ref;
 
 	up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
@@ -1060,12 +1063,13 @@ int tipc_createport(u32 user_ref,
 		warn("Port creation failed, no memory\n");
 		return -ENOMEM;
 	}
-	ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance);
-	p_ptr = tipc_port_lock(ref);
-	if (!p_ptr) {
+	ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup,
+				  importance, &tp_ptr);
+	if (ref == 0) {
 		kfree(up_ptr);
 		return -ENOMEM;
 	}
+	p_ptr = (struct port *)tp_ptr;
 
 	p_ptr->user_port = up_ptr;
 	up_ptr->user_ref = user_ref;
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 89cbab24d08..a101de86824 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -142,9 +142,13 @@ void tipc_ref_table_stop(void)
 /**
  * tipc_ref_acquire - create reference to an object
  *
- * Return a unique reference value which can be translated back to the pointer
- * 'object' at a later time.  Also, pass back a pointer to the lock protecting
- * the object, but without locking it.
+ * Register an object pointer in reference table and lock the object.
+ * Returns a unique reference value that is used from then on to retrieve the
+ * object pointer, or to determine that the object has been deregistered.
+ *
+ * Note: The object is returned in the locked state so that the caller can
+ * register a partially initialized object, without running the risk that
+ * the object will be accessed before initialization is complete.
  */
 
 u32 tipc_ref_acquire(void *object, spinlock_t **lock)
@@ -178,13 +182,13 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
 		ref = (next_plus_upper & ~index_mask) + index;
 		entry->ref = ref;
 		entry->object = object;
-		spin_unlock_bh(&entry->lock);
 		*lock = &entry->lock;
 	}
 	else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
 		index = tipc_ref_table.init_point++;
 		entry = &(tipc_ref_table.entries[index]);
 		spin_lock_init(&entry->lock);
+		spin_lock_bh(&entry->lock);
 		ref = tipc_ref_table.start_mask + index;
 		entry->ref = ref;
 		entry->object = object;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 230f9ca2ad6..38f48795b40 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -188,6 +188,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 	const struct proto_ops *ops;
 	socket_state state;
 	struct sock *sk;
+	struct tipc_port *tp_ptr;
 	u32 portref;
 
 	/* Validate arguments */
@@ -225,7 +226,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 	/* Allocate TIPC port for socket to use */
 
 	portref = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
-				      TIPC_LOW_IMPORTANCE);
+				      TIPC_LOW_IMPORTANCE, &tp_ptr);
 	if (unlikely(portref == 0)) {
 		sk_free(sk);
 		return -ENOMEM;
@@ -241,6 +242,8 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 	sk->sk_backlog_rcv = backlog_rcv;
 	tipc_sk(sk)->p = tipc_get_port(portref);
 
+	spin_unlock_bh(tp_ptr->lock);
+
 	if (sock->state == SS_READY) {
 		tipc_set_portunreturnable(portref, 1);
 		if (sock->type == SOCK_DGRAM)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 8c01ccd3626..8f8d0a6c1c1 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -474,6 +474,7 @@ static void subscr_named_msg_event(void *usr_handle,
 		kfree(subscriber);
 		return;
 	}
+	spin_unlock_bh(subscriber->lock);
 
 	/* Establish a connection to subscriber */
 
-- 
cgit v1.2.3-70-g09d2


From ae7245cbf27ee6b6423bc363cbe01c93e57befda Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 1 May 2008 22:19:33 -0700
Subject: wireless: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_hwmp.c |  2 +-
 net/wireless/radiotap.c  | 16 +++++++---------
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3df809222d1..23689733e29 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -26,7 +26,7 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
-	return le32_to_cpu(get_unaligned((__le32 *) (preq_elem + offset)));
+	return get_unaligned_le32(preq_elem + offset);
 }
 
 /* HWMP IE processing macros */
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 28fbd0b0b56..f591871a7b4 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -59,23 +59,21 @@ int ieee80211_radiotap_iterator_init(
 		return -EINVAL;
 
 	/* sanity check for allowed length and radiotap length field */
-	if (max_length < le16_to_cpu(get_unaligned(&radiotap_header->it_len)))
+	if (max_length < get_unaligned_le16(&radiotap_header->it_len))
 		return -EINVAL;
 
 	iterator->rtheader = radiotap_header;
-	iterator->max_length = le16_to_cpu(get_unaligned(
-						&radiotap_header->it_len));
+	iterator->max_length = get_unaligned_le16(&radiotap_header->it_len);
 	iterator->arg_index = 0;
-	iterator->bitmap_shifter = le32_to_cpu(get_unaligned(
-						&radiotap_header->it_present));
+	iterator->bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
 	iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header);
 	iterator->this_arg = NULL;
 
 	/* find payload start allowing for extended bitmap(s) */
 
 	if (unlikely(iterator->bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT))) {
-		while (le32_to_cpu(get_unaligned((__le32 *)iterator->arg)) &
-				   (1<<IEEE80211_RADIOTAP_EXT)) {
+		while (get_unaligned_le32(iterator->arg) &
+		       (1 << IEEE80211_RADIOTAP_EXT)) {
 			iterator->arg += sizeof(u32);
 
 			/*
@@ -241,8 +239,8 @@ int ieee80211_radiotap_iterator_next(
 			if (iterator->bitmap_shifter & 1) {
 				/* b31 was set, there is more */
 				/* move to next u32 bitmap */
-				iterator->bitmap_shifter = le32_to_cpu(
-					get_unaligned(iterator->next_bitmap));
+				iterator->bitmap_shifter =
+				    get_unaligned_le32(iterator->next_bitmap);
 				iterator->next_bitmap++;
 			} else
 				/* no more bitmaps: end */
-- 
cgit v1.2.3-70-g09d2


From 8c046c8c64ba81dd87468ddaf2db4a5d926b988b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 2 May 2008 13:47:45 -0700
Subject: mac80211: tkip.c use kernel-provided infrastructure

Use kernel-provided bit rotation and unaligned access infrastructure rather
than opencoding it.

Some minor spacing adjustments as well.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 145 +++++++++++++++++-----------------------------------
 1 file changed, 46 insertions(+), 99 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 09093da24af..a7c3febc5a4 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -8,23 +8,22 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/netdevice.h>
+#include <asm/unaligned.h>
 
 #include <net/mac80211.h>
 #include "key.h"
 #include "tkip.h"
 #include "wep.h"
 
-
-/* TKIP key mixing functions */
-
-
 #define PHASE1_LOOP_COUNT 8
 
-
-/* 2-byte by 2-byte subset of the full AES S-box table; second part of this
- * table is identical to first part but byte-swapped */
+/*
+ * 2-byte by 2-byte subset of the full AES S-box table; second part of this
+ * table is identical to first part but byte-swapped
+ */
 static const u16 tkip_sbox[256] =
 {
 	0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
@@ -61,53 +60,13 @@ static const u16 tkip_sbox[256] =
 	0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
 };
 
-
-static inline u16 Mk16(u8 x, u8 y)
-{
-	return ((u16) x << 8) | (u16) y;
-}
-
-
-static inline u8 Hi8(u16 v)
-{
-	return v >> 8;
-}
-
-
-static inline u8 Lo8(u16 v)
-{
-	return v & 0xff;
-}
-
-
-static inline u16 Hi16(u32 v)
-{
-	return v >> 16;
-}
-
-
-static inline u16 Lo16(u32 v)
-{
-	return v & 0xffff;
-}
-
-
-static inline u16 RotR1(u16 v)
+static u16 tkipS(u16 val)
 {
-	return (v >> 1) | ((v & 0x0001) << 15);
-}
-
-
-static inline u16 tkip_S(u16 val)
-{
-	u16 a = tkip_sbox[Hi8(val)];
-
-	return tkip_sbox[Lo8(val)] ^ Hi8(a) ^ (Lo8(a) << 8);
+	return tkip_sbox[val & 0xff] ^ swab16(tkip_sbox[val >> 8]);
 }
 
-
-
-/* P1K := Phase1(TA, TK, TSC)
+/*
+ * P1K := Phase1(TA, TK, TSC)
  * TA = transmitter address (48 bits)
  * TK = dot11DefaultKeyValue or dot11KeyMappingValue (128 bits)
  * TSC = TKIP sequence counter (48 bits, only 32 msb bits used)
@@ -118,23 +77,22 @@ static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32,
 {
 	int i, j;
 
-	p1k[0] = Lo16(tsc_IV32);
-	p1k[1] = Hi16(tsc_IV32);
-	p1k[2] = Mk16(ta[1], ta[0]);
-	p1k[3] = Mk16(ta[3], ta[2]);
-	p1k[4] = Mk16(ta[5], ta[4]);
+	p1k[0] = tsc_IV32 & 0xFFFF;
+	p1k[1] = tsc_IV32 >> 16;
+	p1k[2] = get_unaligned_le16(ta + 0);
+	p1k[3] = get_unaligned_le16(ta + 2);
+	p1k[4] = get_unaligned_le16(ta + 4);
 
 	for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
 		j = 2 * (i & 1);
-		p1k[0] += tkip_S(p1k[4] ^ Mk16(tk[ 1 + j], tk[ 0 + j]));
-		p1k[1] += tkip_S(p1k[0] ^ Mk16(tk[ 5 + j], tk[ 4 + j]));
-		p1k[2] += tkip_S(p1k[1] ^ Mk16(tk[ 9 + j], tk[ 8 + j]));
-		p1k[3] += tkip_S(p1k[2] ^ Mk16(tk[13 + j], tk[12 + j]));
-		p1k[4] += tkip_S(p1k[3] ^ Mk16(tk[ 1 + j], tk[ 0 + j])) + i;
+		p1k[0] += tkipS(p1k[4] ^ get_unaligned_le16(tk + 0 + j));
+		p1k[1] += tkipS(p1k[0] ^ get_unaligned_le16(tk + 4 + j));
+		p1k[2] += tkipS(p1k[1] ^ get_unaligned_le16(tk + 8 + j));
+		p1k[3] += tkipS(p1k[2] ^ get_unaligned_le16(tk + 12 + j));
+		p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i;
 	}
 }
 
-
 static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
 			       u8 *rc4key)
 {
@@ -148,31 +106,29 @@ static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
 	ppk[4] = p1k[4];
 	ppk[5] = p1k[4] + tsc_IV16;
 
-	ppk[0] += tkip_S(ppk[5] ^ Mk16(tk[ 1], tk[ 0]));
-	ppk[1] += tkip_S(ppk[0] ^ Mk16(tk[ 3], tk[ 2]));
-	ppk[2] += tkip_S(ppk[1] ^ Mk16(tk[ 5], tk[ 4]));
-	ppk[3] += tkip_S(ppk[2] ^ Mk16(tk[ 7], tk[ 6]));
-	ppk[4] += tkip_S(ppk[3] ^ Mk16(tk[ 9], tk[ 8]));
-	ppk[5] += tkip_S(ppk[4] ^ Mk16(tk[11], tk[10]));
-	ppk[0] +=  RotR1(ppk[5] ^ Mk16(tk[13], tk[12]));
-	ppk[1] +=  RotR1(ppk[0] ^ Mk16(tk[15], tk[14]));
-	ppk[2] +=  RotR1(ppk[1]);
-	ppk[3] +=  RotR1(ppk[2]);
-	ppk[4] +=  RotR1(ppk[3]);
-	ppk[5] +=  RotR1(ppk[4]);
-
-	rc4key[0] = Hi8(tsc_IV16);
-	rc4key[1] = (Hi8(tsc_IV16) | 0x20) & 0x7f;
-	rc4key[2] = Lo8(tsc_IV16);
-	rc4key[3] = Lo8((ppk[5] ^ Mk16(tk[1], tk[0])) >> 1);
-
-	for (i = 0; i < 6; i++) {
-		rc4key[4 + 2 * i] = Lo8(ppk[i]);
-		rc4key[5 + 2 * i] = Hi8(ppk[i]);
-	}
+	ppk[0] += tkipS(ppk[5] ^ get_unaligned_le16(tk + 0));
+	ppk[1] += tkipS(ppk[0] ^ get_unaligned_le16(tk + 2));
+	ppk[2] += tkipS(ppk[1] ^ get_unaligned_le16(tk + 4));
+	ppk[3] += tkipS(ppk[2] ^ get_unaligned_le16(tk + 6));
+	ppk[4] += tkipS(ppk[3] ^ get_unaligned_le16(tk + 8));
+	ppk[5] += tkipS(ppk[4] ^ get_unaligned_le16(tk + 10));
+	ppk[0] += ror16(ppk[5] ^ get_unaligned_le16(tk + 12), 1);
+	ppk[1] += ror16(ppk[0] ^ get_unaligned_le16(tk + 14), 1);
+	ppk[2] += ror16(ppk[1], 1);
+	ppk[3] += ror16(ppk[2], 1);
+	ppk[4] += ror16(ppk[3], 1);
+	ppk[5] += ror16(ppk[4], 1);
+
+	rc4key[0] = tsc_IV16 >> 8;
+	rc4key[1] = ((tsc_IV16 >> 8) | 0x20) & 0x7f;
+	rc4key[2] = tsc_IV16 & 0xFF;
+	rc4key[3] = ((ppk[5] ^ get_unaligned_le16(tk)) >> 1) & 0xFF;
+
+	rc4key += 4;
+	for (i = 0; i < 6; i++)
+		put_unaligned_le16(ppk[i], rc4key + 2 * i);
 }
 
-
 /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
  * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
  * the packet payload). */
@@ -183,14 +139,10 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
 	*pos++ = iv1;
 	*pos++ = iv2;
 	*pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */;
-	*pos++ = key->u.tkip.iv32 & 0xff;
-	*pos++ = (key->u.tkip.iv32 >> 8) & 0xff;
-	*pos++ = (key->u.tkip.iv32 >> 16) & 0xff;
-	*pos++ = (key->u.tkip.iv32 >> 24) & 0xff;
-	return pos;
+	put_unaligned_le32(key->u.tkip.iv32, pos);
+	return pos + 4;
 }
 
-
 void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
 				  u16 *phase1key)
 {
@@ -228,10 +180,8 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	u16 iv16;
 	u32 iv32;
 
-	iv16 = data[hdr_len] << 8;
-	iv16 += data[hdr_len + 2];
-	iv32 = data[hdr_len + 4] | (data[hdr_len + 5] << 8) |
-	       (data[hdr_len + 6] << 16) | (data[hdr_len + 7] << 24);
+	iv16 = data[hdr_len + 2] | (data[hdr_len] << 8);
+	iv32 = get_unaligned_le32(data + hdr_len + 4);
 
 #ifdef CONFIG_TKIP_DEBUG
 	printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n",
@@ -281,7 +231,6 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 	ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
 }
 
-
 /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the
  * beginning of the buffer containing IEEE 802.11 header payload, i.e.,
  * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the
@@ -302,7 +251,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 
 	iv16 = (pos[0] << 8) | pos[2];
 	keyid = pos[3];
-	iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
+	iv32 = get_unaligned_le32(pos + 4);
 	pos += 8;
 #ifdef CONFIG_TKIP_DEBUG
 	{
@@ -409,5 +358,3 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 
 	return res;
 }
-
-
-- 
cgit v1.2.3-70-g09d2


From 3434fbd39862d471c92b66c28cd449deea8e9f90 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 3 May 2008 00:59:37 +0200
Subject: mac80211: require four hardware queues for QoS/HT

This patch makes mac80211 only announce QoS/HT support when
the underlying hardware has four (or more) queues.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Ron Rindjunksi <ron.rindjunksi@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c |  3 ++-
 net/mac80211/mlme.c  | 21 ++++++++++++++-------
 net/mac80211/tx.c    |  4 ++--
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f41c7e0de62..5a9a3c6ef48 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -157,9 +157,10 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 		ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
 			IEEE80211_AUTH_ALG_SHARED_KEY;
 		ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
-			IEEE80211_STA_WMM_ENABLED |
 			IEEE80211_STA_AUTO_BSSID_SEL |
 			IEEE80211_STA_AUTO_CHANNEL_SEL;
+		if (sdata->local->hw.queues >= 4)
+			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
 
 		msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev);
 		sdata->bss = &msdata->u.ap;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 55b85ae5bc1..108c6fc42fe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -272,6 +272,12 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 	int count;
 	u8 *pos;
 
+	if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED))
+		return;
+
+	if (!wmm_param)
+		return;
+
 	if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
 		return;
 	count = wmm_param[6] & 0x0f;
@@ -763,8 +769,10 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		*pos++ = 1; /* WME ver */
 		*pos++ = 0;
 	}
+
 	/* wmm support is a must to HT */
-	if (wmm && sband->ht_info.ht_supported) {
+	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
+	    sband->ht_info.ht_supported) {
 		__le16 tmp = cpu_to_le16(sband->ht_info.cap);
 		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
@@ -2021,7 +2029,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	else
 		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
 
-	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) {
+	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
+	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
 		struct ieee80211_ht_bss_info bss_info;
 		ieee80211_ht_cap_ie_to_ht_info(
 				(struct ieee80211_ht_cap *)
@@ -2034,7 +2043,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	rate_control_rate_init(sta, local);
 
-	if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
+	if (elems.wmm_param) {
 		sta->flags |= WLAN_STA_WME;
 		rcu_read_unlock();
 		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
@@ -2817,10 +2826,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
-		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
-					 elems.wmm_param_len);
-	}
+	ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
+				 elems.wmm_param_len);
 
 	/* Do not send changes to driver if we are scanning. This removes
 	 * requirement that driver's bss_info_changed function needs to be
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f35eaea98e7..3a03d7807c3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1490,8 +1490,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		rcu_read_unlock();
 	}
 
-	/* receiver is QoS enabled, use a QoS type frame */
-	if (sta_flags & WLAN_STA_WME) {
+	/* receiver and we are QoS enabled, use a QoS type frame */
+	if (sta_flags & WLAN_STA_WME && local->hw.queues >= 4) {
 		fc |= IEEE80211_STYPE_QOS_DATA;
 		hdrlen += 2;
 	}
-- 
cgit v1.2.3-70-g09d2


From 07346f81e87d6e4cca7ae9adfa711d0c61c87b56 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 3 May 2008 01:02:02 +0200
Subject: mac80211: proper STA info locking

As discussed earlier, we can unify locking in struct sta_info
and use just a single spinlock protecting all members of the
structure that need protection. Many don't, but one of the
especially bad ones is the 'flags' member that can currently
be clobbered when RX and TX is being processed on different
CPUs at the same time.

Because having four spinlocks for different, mostly exclusive
parts of a single structure is overkill, this patch also kills
the ampdu and mesh plink spinlocks and uses just a single one
for everything. Because none of the spinlocks are nested, this
is safe.

It remains to be seen whether or not we should make the sta
flags use atomic bit operations instead, for now though this
is a safe thing and using atomic operations instead will be
very simple using the new static inline functions this patch
introduces for accessing sta->flags.

Since spin_lock_bh() is used with this lock, there shouldn't
be any contention even if aggregation is enabled at around the
same time as both requires frame transmission/reception which
is in a bh context.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Tomas Winkler <tomasw@gmail.com>
Cc: Ron Rindjunsky <ron.rindjunsky@intel.com>
Cc: Luis Carlos Cobo <luisca@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-4965-rs.c |  4 +-
 net/mac80211/cfg.c                         |  2 +
 net/mac80211/debugfs_sta.c                 | 15 ++---
 net/mac80211/key.c                         |  4 +-
 net/mac80211/main.c                        | 31 ++++++-----
 net/mac80211/mesh_plink.c                  | 88 +++++++++++++++---------------
 net/mac80211/mlme.c                        | 38 ++++++-------
 net/mac80211/rx.c                          | 17 +++---
 net/mac80211/sta_info.c                    | 14 ++---
 net/mac80211/sta_info.h                    | 72 +++++++++++++++++++++---
 net/mac80211/tx.c                          | 21 +++----
 11 files changed, 181 insertions(+), 125 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
index 67356d9a0c5..e3444c4ea1a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
@@ -360,9 +360,9 @@ static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv,
 	unsigned long state;
 	DECLARE_MAC_BUF(mac);
 
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 	state = sta->ampdu_mlme.tid_state_tx[tid];
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 
 	if (state == HT_AGG_STATE_IDLE &&
 	    rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 699d97b8de5..3cef80dcd0e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -602,6 +602,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 	 */
 
 	if (params->station_flags & STATION_FLAG_CHANGED) {
+		spin_lock_bh(&sta->lock);
 		sta->flags &= ~WLAN_STA_AUTHORIZED;
 		if (params->station_flags & STATION_FLAG_AUTHORIZED)
 			sta->flags |= WLAN_STA_AUTHORIZED;
@@ -613,6 +614,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		sta->flags &= ~WLAN_STA_WME;
 		if (params->station_flags & STATION_FLAG_WME)
 			sta->flags |= WLAN_STA_WME;
+		spin_unlock_bh(&sta->lock);
 	}
 
 	/*
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 676a93202ff..1f00273d99c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -74,14 +74,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 {
 	char buf[100];
 	struct sta_info *sta = file->private_data;
+	u32 staflags = get_sta_flags(sta);
 	int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s",
-		sta->flags & WLAN_STA_AUTH ? "AUTH\n" : "",
-		sta->flags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
-		sta->flags & WLAN_STA_PS ? "PS\n" : "",
-		sta->flags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
-		sta->flags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
-		sta->flags & WLAN_STA_WME ? "WME\n" : "",
-		sta->flags & WLAN_STA_WDS ? "WDS\n" : "");
+		staflags & WLAN_STA_AUTH ? "AUTH\n" : "",
+		staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
+		staflags & WLAN_STA_PS ? "PS\n" : "",
+		staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
+		staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
+		staflags & WLAN_STA_WME ? "WME\n" : "",
+		staflags & WLAN_STA_WDS ? "WDS\n" : "");
 	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
 }
 STA_OPS(flags);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 88b211af7c1..d4893bd1775 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -321,7 +321,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 		 * some hardware cannot handle TKIP with QoS, so
 		 * we indicate whether QoS could be in use.
 		 */
-		if (sta->flags & WLAN_STA_WME)
+		if (test_sta_flags(sta, WLAN_STA_WME))
 			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
 
 		/*
@@ -342,7 +342,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 			/* same here, the AP could be using QoS */
 			ap = sta_info_get(key->local, key->sdata->u.sta.bssid);
 			if (ap) {
-				if (ap->flags & WLAN_STA_WME)
+				if (test_sta_flags(ap, WLAN_STA_WME))
 					key->conf.flags |=
 						IEEE80211_KEY_FLAG_WMM_STA;
 			}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 55e76117da9..f277407f0f5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -346,6 +346,7 @@ static int ieee80211_open(struct net_device *dev)
 			goto err_del_interface;
 		}
 
+		/* no locking required since STA is not live yet */
 		sta->flags |= WLAN_STA_AUTHORIZED;
 
 		res = sta_info_insert(sta);
@@ -588,7 +589,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 		return -ENOENT;
 	}
 
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 
 	/* we have tried too many times, receiver does not want A-MPDU */
 	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
@@ -691,7 +692,7 @@ start_ba_err:
 	spin_unlock_bh(&local->mdev->queue_lock);
 	ret = -EBUSY;
 start_ba_exit:
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 	return ret;
 }
@@ -719,7 +720,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
 
 	/* check if the TID is in aggregation */
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 
 	if (*state != HT_AGG_STATE_OPERATIONAL) {
 		ret = -ENOENT;
@@ -749,7 +750,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
 	}
 
 stop_BA_exit:
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 	return ret;
 }
@@ -778,12 +779,12 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	}
 
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
 		printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
 				*state);
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
@@ -796,7 +797,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 		printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
 		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 	}
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
@@ -830,10 +831,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	}
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
@@ -860,7 +861,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 
 	rcu_read_unlock();
 }
@@ -1315,7 +1316,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 * packet. If the STA went to power save mode, this will happen
 	 * happen when it wakes up for the next time.
 	 */
-	sta->flags |= WLAN_STA_CLEAR_PS_FILT;
+	set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT);
 
 	/*
 	 * This code races in the following way:
@@ -1347,7 +1348,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 *      can be unknown, for example with different interrupt status
 	 *	bits.
 	 */
-	if (sta->flags & WLAN_STA_PS &&
+	if (test_sta_flags(sta, WLAN_STA_PS) &&
 	    skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
 		ieee80211_remove_tx_extra(local, sta->key, skb,
 					  &status->control);
@@ -1355,7 +1356,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		return;
 	}
 
-	if (!(sta->flags & WLAN_STA_PS) &&
+	if (!test_sta_flags(sta, WLAN_STA_PS) &&
 	    !(status->control.flags & IEEE80211_TXCTL_REQUEUE)) {
 		/* Software retry the packet once */
 		status->control.flags |= IEEE80211_TXCTL_REQUEUE;
@@ -1370,7 +1371,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		       "queue_len=%d PS=%d @%lu\n",
 		       wiphy_name(local->hw.wiphy),
 		       skb_queue_len(&sta->tx_filtered),
-		       !!(sta->flags & WLAN_STA_PS), jiffies);
+		       !!test_sta_flags(sta, WLAN_STA_PS), jiffies);
 	dev_kfree_skb(skb);
 }
 
@@ -1399,7 +1400,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 		struct sta_info *sta;
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
-			if (sta->flags & WLAN_STA_PS) {
+			if (test_sta_flags(sta, WLAN_STA_PS)) {
 				/*
 				 * The STA is in power save mode, so assume
 				 * that this TX packet failed because of that.
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 37f0c2b94ae..9efeb1f0702 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -79,7 +79,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
  *
  * @sta: mes peer link to restart
  *
- * Locking: this function must be called holding sta->plink_lock
+ * Locking: this function must be called holding sta->lock
  */
 static inline void mesh_plink_fsm_restart(struct sta_info *sta)
 {
@@ -105,7 +105,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return NULL;
 
-	sta->flags |= WLAN_STA_AUTHORIZED;
+	sta->flags = WLAN_STA_AUTHORIZED;
 	sta->supp_rates[local->hw.conf.channel->band] = rates;
 
 	return sta;
@@ -118,7 +118,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
  *
  * All mesh paths with this peer as next hop will be flushed
  *
- * Locking: the caller must hold sta->plink_lock
+ * Locking: the caller must hold sta->lock
  */
 static void __mesh_plink_deactivate(struct sta_info *sta)
 {
@@ -139,9 +139,9 @@ static void __mesh_plink_deactivate(struct sta_info *sta)
  */
 void mesh_plink_deactivate(struct sta_info *sta)
 {
-	spin_lock_bh(&sta->plink_lock);
+	spin_lock_bh(&sta->lock);
 	__mesh_plink_deactivate(sta);
-	spin_unlock_bh(&sta->plink_lock);
+	spin_unlock_bh(&sta->lock);
 }
 
 static int mesh_plink_frame_tx(struct net_device *dev,
@@ -270,10 +270,10 @@ static void mesh_plink_timer(unsigned long data)
 	 */
 	sta = (struct sta_info *) data;
 
-	spin_lock_bh(&sta->plink_lock);
+	spin_lock_bh(&sta->lock);
 	if (sta->ignore_plink_timer) {
 		sta->ignore_plink_timer = false;
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		return;
 	}
 	mpl_dbg("Mesh plink timer for %s fired on state %d\n",
@@ -298,7 +298,7 @@ static void mesh_plink_timer(unsigned long data)
 					     rand % sta->plink_timeout;
 			++sta->plink_retries;
 			mod_plink_timer(sta, sta->plink_timeout);
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid,
 					    0, 0);
 			break;
@@ -311,7 +311,7 @@ static void mesh_plink_timer(unsigned long data)
 			reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT);
 		sta->plink_state = PLINK_HOLDING;
 		mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid,
 				    reason);
 		break;
@@ -319,10 +319,10 @@ static void mesh_plink_timer(unsigned long data)
 		/* holding timer */
 		del_timer(&sta->plink_timer);
 		mesh_plink_fsm_restart(sta);
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		break;
 	default:
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		break;
 	}
 }
@@ -344,16 +344,16 @@ int mesh_plink_open(struct sta_info *sta)
 	DECLARE_MAC_BUF(mac);
 #endif
 
-	spin_lock_bh(&sta->plink_lock);
+	spin_lock_bh(&sta->lock);
 	get_random_bytes(&llid, 2);
 	sta->llid = llid;
 	if (sta->plink_state != PLINK_LISTEN) {
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		return -EBUSY;
 	}
 	sta->plink_state = PLINK_OPN_SNT;
 	mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
-	spin_unlock_bh(&sta->plink_lock);
+	spin_unlock_bh(&sta->lock);
 	mpl_dbg("Mesh plink: starting establishment with %s\n",
 		print_mac(mac, sta->addr));
 
@@ -367,10 +367,10 @@ void mesh_plink_block(struct sta_info *sta)
 	DECLARE_MAC_BUF(mac);
 #endif
 
-	spin_lock_bh(&sta->plink_lock);
+	spin_lock_bh(&sta->lock);
 	__mesh_plink_deactivate(sta);
 	sta->plink_state = PLINK_BLOCKED;
-	spin_unlock_bh(&sta->plink_lock);
+	spin_unlock_bh(&sta->lock);
 }
 
 int mesh_plink_close(struct sta_info *sta)
@@ -383,14 +383,14 @@ int mesh_plink_close(struct sta_info *sta)
 
 	mpl_dbg("Mesh plink: closing link with %s\n",
 			print_mac(mac, sta->addr));
-	spin_lock_bh(&sta->plink_lock);
+	spin_lock_bh(&sta->lock);
 	sta->reason = cpu_to_le16(MESH_LINK_CANCELLED);
 	reason = sta->reason;
 
 	if (sta->plink_state == PLINK_LISTEN ||
 	    sta->plink_state == PLINK_BLOCKED) {
 		mesh_plink_fsm_restart(sta);
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		return 0;
 	} else if (sta->plink_state == PLINK_ESTAB) {
 		__mesh_plink_deactivate(sta);
@@ -402,7 +402,7 @@ int mesh_plink_close(struct sta_info *sta)
 	sta->plink_state = PLINK_HOLDING;
 	llid = sta->llid;
 	plid = sta->plid;
-	spin_unlock_bh(&sta->plink_lock);
+	spin_unlock_bh(&sta->lock);
 	mesh_plink_frame_tx(sta->sdata->dev, PLINK_CLOSE, sta->addr, llid,
 			    plid, reason);
 	return 0;
@@ -490,7 +490,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			/* avoid warning */
 			break;
 		}
-		spin_lock_bh(&sta->plink_lock);
+		spin_lock_bh(&sta->lock);
 	} else if (!sta) {
 		/* ftype == PLINK_OPEN */
 		u64 rates;
@@ -512,9 +512,9 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			return;
 		}
 		event = OPN_ACPT;
-		spin_lock_bh(&sta->plink_lock);
+		spin_lock_bh(&sta->lock);
 	} else {
-		spin_lock_bh(&sta->plink_lock);
+		spin_lock_bh(&sta->lock);
 		switch (ftype) {
 		case PLINK_OPEN:
 			if (!mesh_plink_free_count(sdata) ||
@@ -551,7 +551,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			break;
 		default:
 			mpl_dbg("Mesh plink: unknown frame subtype\n");
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			rcu_read_unlock();
 			return;
 		}
@@ -568,7 +568,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 		switch (event) {
 		case CLS_ACPT:
 			mesh_plink_fsm_restart(sta);
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		case OPN_ACPT:
 			sta->plink_state = PLINK_OPN_RCVD;
@@ -576,14 +576,14 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			get_random_bytes(&llid, 2);
 			sta->llid = llid;
 			mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid,
 					    0, 0);
 			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr,
 					    llid, plid, 0);
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		}
 		break;
@@ -603,7 +603,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 				sta->ignore_plink_timer = true;
 
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
@@ -612,7 +612,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			sta->plink_state = PLINK_OPN_RCVD;
 			sta->plid = plid;
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
@@ -622,10 +622,10 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 					     dot11MeshConfirmTimeout(sdata)))
 				sta->ignore_plink_timer = true;
 
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		}
 		break;
@@ -645,13 +645,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 				sta->ignore_plink_timer = true;
 
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
@@ -659,12 +659,12 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			del_timer(&sta->plink_timer);
 			sta->plink_state = PLINK_ESTAB;
 			mesh_plink_inc_estab_count(sdata);
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mpl_dbg("Mesh plink with %s ESTABLISHED\n",
 					print_mac(mac, sta->addr));
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		}
 		break;
@@ -684,7 +684,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 				sta->ignore_plink_timer = true;
 
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
@@ -692,14 +692,14 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			del_timer(&sta->plink_timer);
 			sta->plink_state = PLINK_ESTAB;
 			mesh_plink_inc_estab_count(sdata);
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mpl_dbg("Mesh plink with %s ESTABLISHED\n",
 					print_mac(mac, sta->addr));
 			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		}
 		break;
@@ -713,18 +713,18 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			sta->plink_state = PLINK_HOLDING;
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		}
 		break;
@@ -734,7 +734,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			if (del_timer(&sta->plink_timer))
 				sta->ignore_plink_timer = 1;
 			mesh_plink_fsm_restart(sta);
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			break;
 		case OPN_ACPT:
 		case CNF_ACPT:
@@ -742,19 +742,19 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 		case CNF_RJCT:
 			llid = sta->llid;
 			reason = sta->reason;
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		default:
-			spin_unlock_bh(&sta->plink_lock);
+			spin_unlock_bh(&sta->lock);
 		}
 		break;
 	default:
 		/* should not get here, PLINK_BLOCKED is dealt with at the
 		 * beggining of the function
 		 */
-		spin_unlock_bh(&sta->plink_lock);
+		spin_unlock_bh(&sta->lock);
 		break;
 	}
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 108c6fc42fe..6c4b27be35d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1241,7 +1241,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 
 
 	/* examine state machine */
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
+	spin_lock_bh(&sta->lock);
 
 	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -1308,7 +1308,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 	tid_agg_rx->stored_mpdu_num = 0;
 	status = WLAN_STATUS_SUCCESS;
 end:
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+	spin_unlock_bh(&sta->lock);
 
 end_no_lock:
 	ieee80211_send_addba_resp(sta->sdata->dev, sta->addr, tid,
@@ -1340,10 +1340,10 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		printk(KERN_DEBUG "state not HT_ADDBA_REQUESTED_MSK:"
 			"%d\n", *state);
 		goto addba_resp_exit;
@@ -1351,7 +1351,7 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 
 	if (mgmt->u.action.u.addba_resp.dialog_token !=
 		sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
@@ -1375,7 +1375,7 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 		}
 
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		printk(KERN_DEBUG "recipient accepted agg: tid %d \n", tid);
 	} else {
 		printk(KERN_DEBUG "recipient rejected agg: tid %d \n", tid);
@@ -1383,7 +1383,7 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 		sta->ampdu_mlme.addba_req_num[tid]++;
 		/* this will allow the state check in stop_BA_session */
 		*state = HT_AGG_STATE_OPERATIONAL;
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		ieee80211_stop_tx_ba_session(hw, sta->addr, tid,
 					     WLAN_BACK_INITIATOR);
 	}
@@ -1453,17 +1453,17 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 	}
 
 	/* check if TID is in operational state */
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
+	spin_lock_bh(&sta->lock);
 	if (sta->ampdu_mlme.tid_state_rx[tid]
 				!= HT_AGG_STATE_OPERATIONAL) {
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
 	sta->ampdu_mlme.tid_state_rx[tid] =
 		HT_AGG_STATE_REQ_STOP_BA_MSK |
 		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
+	spin_unlock_bh(&sta->lock);
 
 	/* stop HW Rx aggregation. ampdu_action existence
 	 * already verified in session init so we add the BUG_ON */
@@ -1540,10 +1540,10 @@ static void ieee80211_sta_process_delba(struct net_device *dev,
 		ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid,
 						 WLAN_BACK_INITIATOR, 0);
 	else { /* WLAN_BACK_RECIPIENT */
-		spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_lock_bh(&sta->lock);
 		sta->ampdu_mlme.tid_state_tx[tid] =
 				HT_AGG_STATE_OPERATIONAL;
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid,
 					     WLAN_BACK_RECIPIENT);
 	}
@@ -1580,9 +1580,9 @@ void sta_addba_resp_timer_expired(unsigned long data)
 
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 	/* check if the TID waits for addBA response */
-	spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_lock_bh(&sta->lock);
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 		*state = HT_AGG_STATE_IDLE;
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
 				"expecting addBA response there", tid);
@@ -1593,7 +1593,7 @@ void sta_addba_resp_timer_expired(unsigned long data)
 
 	/* go through the state check in stop_BA_session */
 	*state = HT_AGG_STATE_OPERATIONAL;
-	spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+	spin_unlock_bh(&sta->lock);
 	ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid,
 				     WLAN_BACK_INITIATOR);
 
@@ -1984,8 +1984,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	 *	  to between the sta_info_alloc() and sta_info_insert() above.
 	 */
 
-	sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP |
-		      WLAN_STA_AUTHORIZED;
+	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP |
+			   WLAN_STA_AUTHORIZED);
 
 	rates = 0;
 	basic_rates = 0;
@@ -2044,7 +2044,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	rate_control_rate_init(sta, local);
 
 	if (elems.wmm_param) {
-		sta->flags |= WLAN_STA_WME;
+		set_sta_flags(sta, WLAN_STA_WME);
 		rcu_read_unlock();
 		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
 					 elems.wmm_param_len);
@@ -4237,7 +4237,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 	if (!sta)
 		return NULL;
 
-	sta->flags |= WLAN_STA_AUTHORIZED;
+	set_sta_flags(sta, WLAN_STA_AUTHORIZED);
 
 	sta->supp_rates[local->hw.conf.channel->band] =
 		sdata->u.sta.supp_rates_bits[local->hw.conf.channel->band];
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e8b89c89e87..b399e09ec7a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -479,7 +479,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 		      ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
 		       (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
 		     rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-		     (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) {
+		     (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
 		if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
 		     !(rx->fc & IEEE80211_FCTL_TODS) &&
 		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)
@@ -630,8 +630,7 @@ static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta)
 
 	if (sdata->bss)
 		atomic_inc(&sdata->bss->num_sta_ps);
-	sta->flags |= WLAN_STA_PS;
-	sta->flags &= ~WLAN_STA_PSPOLL;
+	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n",
 	       dev->name, print_mac(mac, sta->addr), sta->aid);
@@ -652,7 +651,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 	if (sdata->bss)
 		atomic_dec(&sdata->bss->num_sta_ps);
 
-	sta->flags &= ~(WLAN_STA_PS | WLAN_STA_PSPOLL);
+	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
 		sta_info_clear_tim_bit(sta);
@@ -727,9 +726,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) {
 		/* Change STA power saving mode only in the end of a frame
 		 * exchange sequence */
-		if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM))
+		if (test_sta_flags(sta, WLAN_STA_PS) &&
+		    !(rx->fc & IEEE80211_FCTL_PM))
 			rx->sent_ps_buffered += ap_sta_ps_end(dev, sta);
-		else if (!(sta->flags & WLAN_STA_PS) &&
+		else if (!test_sta_flags(sta, WLAN_STA_PS) &&
 			 (rx->fc & IEEE80211_FCTL_PM))
 			ap_sta_ps_start(dev, sta);
 	}
@@ -983,7 +983,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		 * Tell TX path to send one frame even though the STA may
 		 * still remain is PS mode after this frame exchange.
 		 */
-		rx->sta->flags |= WLAN_STA_PSPOLL;
+		set_sta_flags(rx->sta, WLAN_STA_PSPOLL);
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n",
@@ -1046,7 +1046,8 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx)
 static int
 ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
 {
-	if (unlikely(!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED))) {
+	if (unlikely(!rx->sta ||
+	    !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) {
 #ifdef CONFIG_MAC80211_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "%s: dropped frame "
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 631943e8af8..baf5e474688 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -202,14 +202,12 @@ void sta_info_destroy(struct sta_info *sta)
 		dev_kfree_skb_any(skb);
 
 	for (i = 0; i <  STA_TID_NUM; i++) {
-		spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
+		spin_lock_bh(&sta->lock);
 		if (sta->ampdu_mlme.tid_rx[i])
 		  del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer);
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
-		spin_lock_bh(&sta->ampdu_mlme.ampdu_tx);
 		if (sta->ampdu_mlme.tid_tx[i])
 		  del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer);
-		spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx);
+		spin_unlock_bh(&sta->lock);
 	}
 
 	__sta_info_free(local, sta);
@@ -236,6 +234,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return NULL;
 
+	spin_lock_init(&sta->lock);
+
 	memcpy(sta->addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->sdata = sdata;
@@ -249,8 +249,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 	}
 
-	spin_lock_init(&sta->ampdu_mlme.ampdu_rx);
-	spin_lock_init(&sta->ampdu_mlme.ampdu_tx);
 	for (i = 0; i < STA_TID_NUM; i++) {
 		/* timer_to_tid must be initialized with identity mapping to
 		 * enable session_timer's data differentiation. refer to
@@ -276,7 +274,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 #ifdef CONFIG_MAC80211_MESH
 	sta->plink_state = PLINK_LISTEN;
-	spin_lock_init(&sta->plink_lock);
 	init_timer(&sta->plink_timer);
 #endif
 
@@ -437,8 +434,7 @@ void __sta_info_unlink(struct sta_info **sta)
 
 	list_del(&(*sta)->list);
 
-	if ((*sta)->flags & WLAN_STA_PS) {
-		(*sta)->flags &= ~WLAN_STA_PS;
+	if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) {
 		if (sdata->bss)
 			atomic_dec(&sdata->bss->num_sta_ps);
 		__sta_info_clear_tim_bit(sdata->bss, *sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index f8c95bc9659..97a61c39ad9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -129,23 +129,19 @@ enum plink_state {
  *
  * @tid_state_rx: TID's state in Rx session state machine.
  * @tid_rx: aggregation info for Rx per TID
- * @ampdu_rx: for locking sections in aggregation Rx flow
  * @tid_state_tx: TID's state in Tx session state machine.
  * @tid_tx: aggregation info for Tx per TID
  * @addba_req_num: number of times addBA request has been sent.
- * @ampdu_tx: for locking sectionsi in aggregation Tx flow
  * @dialog_token_allocator: dialog token enumerator for each new session;
  */
 struct sta_ampdu_mlme {
 	/* rx */
 	u8 tid_state_rx[STA_TID_NUM];
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
-	spinlock_t ampdu_rx;
 	/* tx */
 	u8 tid_state_tx[STA_TID_NUM];
 	struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
 	u8 addba_req_num[STA_TID_NUM];
-	spinlock_t ampdu_tx;
 	u8 dialog_token_allocator;
 };
 
@@ -177,6 +173,8 @@ struct sta_ampdu_mlme {
  * @rx_bytes: Number of bytes received from this STA
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_info: HT capabilities of this STA
+ * @lock: used for locking all fields that require locking, see comments
+ *	in the header file.
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -187,6 +185,7 @@ struct sta_info {
 	struct ieee80211_key *key;
 	struct rate_control_ref *rate_ctrl;
 	void *rate_ctrl_priv;
+	spinlock_t lock;
 	struct ieee80211_ht_info ht_info;
 	u64 supp_rates[IEEE80211_NUM_BANDS];
 	u8 addr[ETH_ALEN];
@@ -199,7 +198,7 @@ struct sta_info {
 	 */
 	u8 pin_status;
 
-	/* frequently updated information, needs locking? */
+	/* frequently updated information, locked with lock spinlock */
 	u32 flags;
 
 	/*
@@ -251,7 +250,7 @@ struct sta_info {
 	int channel_use_raw;
 
 	/*
-	 * Aggregation information, comes with own locking.
+	 * Aggregation information, locked with lock.
 	 */
 	struct sta_ampdu_mlme ampdu_mlme;
 	u8 timer_to_tid[STA_TID_NUM];	/* identity mapping to ID timers */
@@ -270,9 +269,6 @@ struct sta_info {
 	enum plink_state plink_state;
 	u32 plink_timeout;
 	struct timer_list plink_timer;
-	spinlock_t plink_lock;	/* For peer_state reads / updates and other
-				   updates in the structure. Ensures robust
-				   transitions for the peerlink FSM */
 #endif
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -299,6 +295,64 @@ static inline enum plink_state sta_plink_state(struct sta_info *sta)
 	return PLINK_LISTEN;
 }
 
+static inline void set_sta_flags(struct sta_info *sta, const u32 flags)
+{
+	spin_lock_bh(&sta->lock);
+	sta->flags |= flags;
+	spin_unlock_bh(&sta->lock);
+}
+
+static inline void clear_sta_flags(struct sta_info *sta, const u32 flags)
+{
+	spin_lock_bh(&sta->lock);
+	sta->flags &= ~flags;
+	spin_unlock_bh(&sta->lock);
+}
+
+static inline void set_and_clear_sta_flags(struct sta_info *sta,
+					   const u32 set, const u32 clear)
+{
+	spin_lock_bh(&sta->lock);
+	sta->flags |= set;
+	sta->flags &= ~clear;
+	spin_unlock_bh(&sta->lock);
+}
+
+static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags)
+{
+	u32 ret;
+
+	spin_lock_bh(&sta->lock);
+	ret = sta->flags & flags;
+	spin_unlock_bh(&sta->lock);
+
+	return ret;
+}
+
+static inline u32 test_and_clear_sta_flags(struct sta_info *sta,
+					   const u32 flags)
+{
+	u32 ret;
+
+	spin_lock_bh(&sta->lock);
+	ret = sta->flags & flags;
+	sta->flags &= ~flags;
+	spin_unlock_bh(&sta->lock);
+
+	return ret;
+}
+
+static inline u32 get_sta_flags(struct sta_info *sta)
+{
+	u32 ret;
+
+	spin_lock_bh(&sta->lock);
+	ret = sta->flags;
+	spin_unlock_bh(&sta->lock);
+
+	return ret;
+}
+
 
 /* Maximum number of concurrently registered stations */
 #define MAX_STA_COUNT 2007
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3a03d7807c3..5f31a6233e1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -256,7 +256,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	if (tx->flags & IEEE80211_TX_PS_BUFFERED)
 		return TX_CONTINUE;
 
-	sta_flags = tx->sta ? tx->sta->flags : 0;
+	sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0;
 
 	if (likely(tx->flags & IEEE80211_TX_UNICAST)) {
 		if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
@@ -391,6 +391,7 @@ static ieee80211_tx_result
 ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 {
 	struct sta_info *sta = tx->sta;
+	u32 staflags;
 	DECLARE_MAC_BUF(mac);
 
 	if (unlikely(!sta ||
@@ -398,8 +399,10 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		      (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP)))
 		return TX_CONTINUE;
 
-	if (unlikely((sta->flags & WLAN_STA_PS) &&
-		     !(sta->flags & WLAN_STA_PSPOLL))) {
+	staflags = get_sta_flags(sta);
+
+	if (unlikely((staflags & WLAN_STA_PS) &&
+		     !(staflags & WLAN_STA_PSPOLL))) {
 		struct ieee80211_tx_packet_data *pkt_data;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries "
@@ -430,13 +433,13 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		return TX_QUEUED;
 	}
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
-	else if (unlikely(sta->flags & WLAN_STA_PS)) {
+	else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) {
 		printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll "
 		       "set -> send frame\n", tx->dev->name,
 		       print_mac(mac, sta->addr));
 	}
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
-	sta->flags &= ~WLAN_STA_PSPOLL;
+	clear_sta_flags(sta, WLAN_STA_PSPOLL);
 
 	return TX_CONTINUE;
 }
@@ -697,7 +700,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
 	    (tx->rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
 	    tx->sdata->bss_conf.use_short_preamble &&
-	    (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) {
+	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
 		tx->control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
 	}
 
@@ -1025,10 +1028,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	if (!tx->sta)
 		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
-	else if (tx->sta->flags & WLAN_STA_CLEAR_PS_FILT) {
+	else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT))
 		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
-		tx->sta->flags &= ~WLAN_STA_CLEAR_PS_FILT;
-	}
 
 	hdrlen = ieee80211_get_hdrlen(tx->fc);
 	if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) {
@@ -1486,7 +1487,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr.addr1);
 		if (sta)
-			sta_flags = sta->flags;
+			sta_flags = get_sta_flags(sta);
 		rcu_read_unlock();
 	}
 
-- 
cgit v1.2.3-70-g09d2


From fac371d9f09f461dfe9fbbceb2a38e2e12164dda Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 7 May 2008 23:18:09 +0200
Subject: mac80211: fix queue constant confusion

In commit 31ccc476b77234f6afb3 (mac80211: QoS related cleanups) I
accidentally changed these to use IEEE80211_MAX_AMPDU_QUEUES twice
which obviously is wrong, it should be IEEE80211_MAX_QUEUES once.
Currently harmless as they're both the same value anyway.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d82ed20a344..173b7fcb302 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -610,8 +610,8 @@ struct ieee80211_local {
 	struct sta_info *sta_hash[STA_HASH_SIZE];
 	struct timer_list sta_cleanup;
 
-	unsigned long state[IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
-	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
+	unsigned long state[IEEE80211_MAX_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
+	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
 	/* number of interfaces with corresponding IFF_ flags */
-- 
cgit v1.2.3-70-g09d2


From 566bfe5a8bcde13188a356f77666f8115813cf31 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 8 May 2008 19:15:40 +0200
Subject: mac80211: use hardware flags for signal/noise units

trying to clean up the signal/noise code. the previous code in mac80211 had
confusing names for the related variables, did not have much definition of
what units of signal and noise were provided and used implicit mechanisms from
the wireless extensions.

this patch introduces hardware capability flags to let the hardware specify
clearly if it can provide signal and noise level values and which units it can
provide. this also anticipates possible new units like RCPI in the future.

for signal:

  IEEE80211_HW_SIGNAL_UNSPEC - unspecified, unknown, hw specific
  IEEE80211_HW_SIGNAL_DB     - dB difference to unspecified reference point
  IEEE80211_HW_SIGNAL_DBM    - dBm, difference to 1mW

for noise we currently only have dBm:

  IEEE80211_HW_NOISE_DBM     - dBm, difference to 1mW

if IEEE80211_HW_SIGNAL_UNSPEC or IEEE80211_HW_SIGNAL_DB is used the driver has
to provide the maximum value (max_signal) it reports in order for applications
to make sense of the signal values.

i tried my best to find out for each driver what it can provide and update it
but i'm not sure (?) for some of them and used the more conservative guess in
doubt. this can be fixed easily after this patch has been merged by changing
the hardware flags of the driver.

DRIVER          SIGNAL    MAX	NOISE   QUAL
-----------------------------------------------------------------
adm8211         unspec(?) 100   n/a     missing
at76_usb        unspec(?) (?)   unused  missing
ath5k           dBm             dBm     percent rssi
b43legacy       dBm             dBm     percent jssi(?)
b43             dBm             dBm     percent jssi(?)
iwl-3945        dBm             dBm     percent snr+more
iwl-4965        dBm             dBm     percent snr+more
p54             unspec    127   n/a     missing
rt2x00          dBm	        n/a     percent rssi+tx/rx frame success
  rt2400        dBm             n/a
  rt2500pci     dBm             n/a
  rt2500usb     dBm             n/a
  rt61pci       dBm             n/a
  rt73usb       dBm             n/a
rtl8180         unspec(?) 65    n/a     (?)
rtl8187         unspec(?) 65    (?)     noise(?)
zd1211          dB(?)     100   n/a     percent

drivers/net/wireless/ath5k/base.c:      Changes-licensed-under: 3-Clause-BSD

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  7 +++--
 drivers/net/wireless/ath5k/base.c           | 23 +++-----------
 drivers/net/wireless/b43/main.c             |  8 ++---
 drivers/net/wireless/b43/xmit.c             |  5 ++-
 drivers/net/wireless/b43legacy/main.c       |  7 ++---
 drivers/net/wireless/b43legacy/xmit.c       |  4 +--
 drivers/net/wireless/iwlwifi/iwl-3945.c     | 18 +++++------
 drivers/net/wireless/iwlwifi/iwl-4965.c     | 12 ++++----
 drivers/net/wireless/iwlwifi/iwl-core.c     | 16 +++-------
 drivers/net/wireless/iwlwifi/iwl3945-base.c | 15 +++------
 drivers/net/wireless/p54/p54common.c        |  7 +++--
 drivers/net/wireless/rt2x00/rt2400pci.c     |  5 ++-
 drivers/net/wireless/rt2x00/rt2500pci.c     |  6 ++--
 drivers/net/wireless/rt2x00/rt2500usb.c     |  6 ++--
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  4 +--
 drivers/net/wireless/rt2x00/rt61pci.c       |  5 ++-
 drivers/net/wireless/rt2x00/rt73usb.c       |  5 ++-
 drivers/net/wireless/rtl8180_dev.c          |  9 +++---
 drivers/net/wireless/rtl8187_dev.c          | 10 +++---
 drivers/net/wireless/zd1211rw/zd_mac.c      | 12 ++++----
 include/net/mac80211.h                      | 47 +++++++++++++++++++++--------
 net/ieee80211/ieee80211_rx.c                |  2 +-
 net/mac80211/debugfs_sta.c                  |  2 +-
 net/mac80211/ieee80211_i.h                  |  2 +-
 net/mac80211/main.c                         | 10 +++---
 net/mac80211/mlme.c                         | 12 ++++----
 net/mac80211/rx.c                           |  4 +--
 net/mac80211/sta_info.h                     |  2 +-
 net/mac80211/wext.c                         | 28 ++++++++++++-----
 29 files changed, 148 insertions(+), 145 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index d93a1de77eb..7af5d8851f6 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -445,9 +445,9 @@ static void adm8211_interrupt_rci(struct ieee80211_hw *dev)
 			struct ieee80211_rx_status rx_status = {0};
 
 			if (priv->pdev->revision < ADM8211_REV_CA)
-				rx_status.ssi = rssi;
+				rx_status.signal = rssi;
 			else
-				rx_status.ssi = 100 - rssi;
+				rx_status.signal = 100 - rssi;
 
 			rx_status.rate_idx = rate;
 
@@ -1893,9 +1893,10 @@ static int __devinit adm8211_probe(struct pci_dev *pdev,
 
 	dev->extra_tx_headroom = sizeof(struct adm8211_tx_hdr);
 	/* dev->flags = IEEE80211_HW_RX_INCLUDES_FCS in promisc mode */
+	dev->flags = IEEE80211_HW_SIGNAL_UNSPEC;
 
 	dev->channel_change_time = 1000;
-	dev->max_rssi = 100;	/* FIXME: find better value */
+	dev->max_signal = 100;    /* FIXME: find better value */
 
 	dev->queues = 1; /* ADM8211C supports more, maybe ADM8211B too */
 
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 3201c160434..dd8adaddd7a 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -458,13 +458,11 @@ ath5k_pci_probe(struct pci_dev *pdev,
 
 	/* Initialize driver private data */
 	SET_IEEE80211_DEV(hw, &pdev->dev);
-	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS;
+	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
+		    IEEE80211_HW_SIGNAL_DBM |
+		    IEEE80211_HW_NOISE_DBM;
 	hw->extra_tx_headroom = 2;
 	hw->channel_change_time = 5000;
-	/* these names are misleading */
-	hw->max_rssi = -110; /* signal in dBm */
-	hw->max_noise = -110; /* noise in dBm */
-	hw->max_signal = 100; /* we will provide a percentage based on rssi */
 	sc = hw->priv;
 	sc->hw = hw;
 	sc->pdev = pdev;
@@ -1893,20 +1891,9 @@ accept:
 		rxs.freq = sc->curchan->center_freq;
 		rxs.band = sc->curband->band;
 
-		/*
-		 * signal quality:
-		 * the names here are misleading and the usage of these
-		 * values by iwconfig makes it even worse
-		 */
-		/* noise floor in dBm, from the last noise calibration */
 		rxs.noise = sc->ah->ah_noise_floor;
-		/* signal level in dBm */
-		rxs.ssi = rxs.noise + rs.rs_rssi;
-		/*
-		 * "signal" is actually displayed as Link Quality by iwconfig
-		 * we provide a percentage based on rssi (assuming max rssi 64)
-		 */
-		rxs.signal = rs.rs_rssi * 100 / 64;
+		rxs.signal = rxs.noise + rs.rs_rssi;
+		rxs.qual = rs.rs_rssi * 100 / 64;
 
 		rxs.antenna = rs.rs_antenna;
 		rxs.rate_idx = ath5k_hw_to_driver_rix(sc, rs.rs_rate);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 7c23aa8705c..fc23ba5309b 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4466,10 +4466,10 @@ static int b43_wireless_init(struct ssb_device *dev)
 
 	/* fill hw info */
 	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
-		    IEEE80211_HW_RX_INCLUDES_FCS;
-	hw->max_signal = 100;
-	hw->max_rssi = -110;
-	hw->max_noise = -110;
+		    IEEE80211_HW_RX_INCLUDES_FCS |
+		    IEEE80211_HW_SIGNAL_DBM |
+		    IEEE80211_HW_NOISE_DBM;
+
 	hw->queues = b43_modparam_qos ? 4 : 1;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 88491947a20..afce9338d83 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -581,12 +581,11 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 		//      and also find out what the maximum possible value is.
 		//      Fill status.ssi and status.signal fields.
 	} else {
-		status.ssi = b43_rssi_postprocess(dev, rxhdr->jssi,
+		status.signal = b43_rssi_postprocess(dev, rxhdr->jssi,
 						  (phystat0 & B43_RX_PHYST0_OFDM),
 						  (phystat0 & B43_RX_PHYST0_GAINCTL),
 						  (phystat3 & B43_RX_PHYST3_TRSTATE));
-		/* the next line looks wrong, but is what mac80211 wants */
-		status.signal = (rxhdr->jssi * 100) / B43_RX_MAX_SSI;
+		status.qual = (rxhdr->jssi * 100) / B43_RX_MAX_SSI;
 	}
 
 	if (phystat0 & B43_RX_PHYST0_OFDM)
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index d3820dce3c4..7755c59e080 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3718,10 +3718,9 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 
 	/* fill hw info */
 	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
-		    IEEE80211_HW_RX_INCLUDES_FCS;
-	hw->max_signal = 100;
-	hw->max_rssi = -110;
-	hw->max_noise = -110;
+		    IEEE80211_HW_RX_INCLUDES_FCS |
+		    IEEE80211_HW_SIGNAL_DBM |
+		    IEEE80211_HW_NOISE_DBM;
 	hw->queues = 1; /* FIXME: hardware has more queues */
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index fc83dab6e2c..bed9e041d6c 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -532,12 +532,12 @@ void b43legacy_rx(struct b43legacy_wldev *dev,
 		}
 	}
 
-	status.ssi = b43legacy_rssi_postprocess(dev, jssi,
+	status.signal = b43legacy_rssi_postprocess(dev, jssi,
 				      (phystat0 & B43legacy_RX_PHYST0_OFDM),
 				      (phystat0 & B43legacy_RX_PHYST0_GAINCTL),
 				      (phystat3 & B43legacy_RX_PHYST3_TRSTATE));
 	status.noise = dev->stats.link_noise;
-	status.signal = (jssi * 100) / B43legacy_RX_MAX_SSI;
+	status.qual = (jssi * 100) / B43legacy_RX_MAX_SSI;
 	/* change to support A PHY */
 	if (phystat0 & B43legacy_RX_PHYST0_OFDM)
 		status.rate_idx = b43legacy_plcp_get_bitrate_idx_ofdm(plcp, false);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 8464397f781..d7beeff2642 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -520,7 +520,7 @@ static void iwl3945_add_radiotap(struct iwl3945_priv *priv,
 {
 	/* First cache any information we need before we overwrite
 	 * the information provided in the skb from the hardware */
-	s8 signal = stats->ssi;
+	s8 signal = stats->signal;
 	s8 noise = 0;
 	int rate = stats->rate_idx;
 	u64 tsf = stats->mactime;
@@ -693,7 +693,7 @@ static void iwl3945_rx_reply_rx(struct iwl3945_priv *priv,
 	}
 
 	/* Convert 3945's rssi indicator to dBm */
-	rx_status.ssi = rx_stats->rssi - IWL_RSSI_OFFSET;
+	rx_status.signal = rx_stats->rssi - IWL_RSSI_OFFSET;
 
 	/* Set default noise value to -127 */
 	if (priv->last_rx_noise == 0)
@@ -712,21 +712,21 @@ static void iwl3945_rx_reply_rx(struct iwl3945_priv *priv,
 	 * Calculate rx_status.signal (quality indicator in %) based on SNR. */
 	if (rx_stats_noise_diff) {
 		snr = rx_stats_sig_avg / rx_stats_noise_diff;
-		rx_status.noise = rx_status.ssi -
+		rx_status.noise = rx_status.signal -
 					iwl3945_calc_db_from_ratio(snr);
-		rx_status.signal = iwl3945_calc_sig_qual(rx_status.ssi,
+		rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal,
 							 rx_status.noise);
 
 	/* If noise info not available, calculate signal quality indicator (%)
 	 *   using just the dBm signal level. */
 	} else {
 		rx_status.noise = priv->last_rx_noise;
-		rx_status.signal = iwl3945_calc_sig_qual(rx_status.ssi, 0);
+		rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal, 0);
 	}
 
 
 	IWL_DEBUG_STATS("Rssi %d noise %d qual %d sig_avg %d noise_diff %d\n",
-			rx_status.ssi, rx_status.noise, rx_status.signal,
+			rx_status.signal, rx_status.noise, rx_status.qual,
 			rx_stats_sig_avg, rx_stats_noise_diff);
 
 	header = (struct ieee80211_hdr *)IWL_RX_DATA(pkt);
@@ -736,8 +736,8 @@ static void iwl3945_rx_reply_rx(struct iwl3945_priv *priv,
 	IWL_DEBUG_STATS_LIMIT("[%c] %d RSSI:%d Signal:%u, Noise:%u, Rate:%u\n",
 			      network_packet ? '*' : ' ',
 			      le16_to_cpu(rx_hdr->channel),
-			      rx_status.ssi, rx_status.ssi,
-			      rx_status.ssi, rx_status.rate_idx);
+			      rx_status.signal, rx_status.signal,
+			      rx_status.noise, rx_status.rate_idx);
 
 #ifdef CONFIG_IWL3945_DEBUG
 	if (iwl3945_debug_level & (IWL_DL_RX))
@@ -748,7 +748,7 @@ static void iwl3945_rx_reply_rx(struct iwl3945_priv *priv,
 	if (network_packet) {
 		priv->last_beacon_time = le32_to_cpu(rx_end->beacon_timestamp);
 		priv->last_tsf = le64_to_cpu(rx_end->timestamp);
-		priv->last_rx_rssi = rx_status.ssi;
+		priv->last_rx_rssi = rx_status.signal;
 		priv->last_rx_noise = rx_status.noise;
 	}
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index b7036e01145..45cbd7789bd 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -2362,7 +2362,7 @@ static void iwl4965_add_radiotap(struct iwl_priv *priv,
 				 struct ieee80211_rx_status *stats,
 				 u32 ampdu_status)
 {
-	s8 signal = stats->ssi;
+	s8 signal = stats->signal;
 	s8 noise = 0;
 	int rate = stats->rate_idx;
 	u64 tsf = stats->mactime;
@@ -2963,7 +2963,7 @@ static void iwl4965_rx_reply_rx(struct iwl_priv *priv,
 	priv->ucode_beacon_time = le32_to_cpu(rx_start->beacon_time_stamp);
 
 	/* Find max signal strength (dBm) among 3 antenna/receiver chains */
-	rx_status.ssi = iwl4965_calc_rssi(priv, rx_start);
+	rx_status.signal = iwl4965_calc_rssi(priv, rx_start);
 
 	/* Meaningful noise values are available only from beacon statistics,
 	 *   which are gathered only when associated, and indicate noise
@@ -2972,11 +2972,11 @@ static void iwl4965_rx_reply_rx(struct iwl_priv *priv,
 	if (iwl_is_associated(priv) &&
 	    !test_bit(STATUS_SCANNING, &priv->status)) {
 		rx_status.noise = priv->last_rx_noise;
-		rx_status.signal = iwl4965_calc_sig_qual(rx_status.ssi,
+		rx_status.qual = iwl4965_calc_sig_qual(rx_status.signal,
 							 rx_status.noise);
 	} else {
 		rx_status.noise = IWL_NOISE_MEAS_NOT_AVAILABLE;
-		rx_status.signal = iwl4965_calc_sig_qual(rx_status.ssi, 0);
+		rx_status.qual = iwl4965_calc_sig_qual(rx_status.signal, 0);
 	}
 
 	/* Reset beacon noise level if not associated. */
@@ -2988,7 +2988,7 @@ static void iwl4965_rx_reply_rx(struct iwl_priv *priv,
 	iwl4965_dbg_report_frame(priv, pkt, header, 1);
 
 	IWL_DEBUG_STATS_LIMIT("Rssi %d, noise %d, qual %d, TSF %llu\n",
-			      rx_status.ssi, rx_status.noise, rx_status.signal,
+			      rx_status.signal, rx_status.noise, rx_status.signal,
 			      (unsigned long long)rx_status.mactime);
 
 
@@ -3000,7 +3000,7 @@ static void iwl4965_rx_reply_rx(struct iwl_priv *priv,
 
 	network_packet = iwl4965_is_network_packet(priv, header);
 	if (network_packet) {
-		priv->last_rx_rssi = rx_status.ssi;
+		priv->last_rx_rssi = rx_status.signal;
 		priv->last_beacon_time =  priv->ucode_beacon_time;
 		priv->last_tsf = le64_to_cpu(rx_start->timestamp);
 	}
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 27e56b89d94..d3cbad2bf87 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -746,18 +746,10 @@ static void iwlcore_init_hw(struct iwl_priv *priv)
 	struct ieee80211_hw *hw = priv->hw;
 	hw->rate_control_algorithm = "iwl-4965-rs";
 
-	/* Tell mac80211 and its clients (e.g. Wireless Extensions)
-	 *	 the range of signal quality values that we'll provide.
-	 * Negative values for level/noise indicate that we'll provide dBm.
-	 * For WE, at least, non-0 values here *enable* display of values
-	 *	 in app (iwconfig). */
-	hw->max_rssi = -20; /* signal level, negative indicates dBm */
-	hw->max_noise = -20;	/* noise level, negative indicates dBm */
-	hw->max_signal = 100;	/* link quality indication (%) */
-
-	/* Tell mac80211 our Tx characteristics */
-	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE;
-
+	/* Tell mac80211 our characteristics */
+	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
+		    IEEE80211_HW_SIGNAL_DBM |
+		    IEEE80211_HW_NOISE_DBM;
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
 #ifdef CONFIG_IWL4965_HT
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 4baa185ba50..c1234ff4fc9 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -8029,17 +8029,10 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 
 	priv->ibss_beacon = NULL;
 
-	/* Tell mac80211 and its clients (e.g. Wireless Extensions)
-	 *   the range of signal quality values that we'll provide.
-	 * Negative values for level/noise indicate that we'll provide dBm.
-	 * For WE, at least, non-0 values here *enable* display of values
-	 *   in app (iwconfig). */
-	hw->max_rssi = -20;	/* signal level, negative indicates dBm */
-	hw->max_noise = -20;	/* noise level, negative indicates dBm */
-	hw->max_signal = 100;	/* link quality indication (%) */
-
-	/* Tell mac80211 our Tx characteristics */
-	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE;
+	/* Tell mac80211 our characteristics */
+	hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
+		    IEEE80211_HW_SIGNAL_DBM |
+		    IEEE80211_HW_NOISE_DBM;
 
 	/* 4 EDCA QOS priorities */
 	hw->queues = 4;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 33d608a60d7..9cbef5bce0f 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -355,7 +355,7 @@ static void p54_rx_data(struct ieee80211_hw *dev, struct sk_buff *skb)
 	struct ieee80211_rx_status rx_status = {0};
 	u16 freq = le16_to_cpu(hdr->freq);
 
-	rx_status.ssi = hdr->rssi;
+	rx_status.signal = hdr->rssi;
 	/* XX correct? */
 	rx_status.rate_idx = hdr->rate & 0xf;
 	rx_status.freq = freq;
@@ -1000,9 +1000,10 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len)
 	skb_queue_head_init(&priv->tx_queue);
 	dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &band_2GHz;
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | /* not sure */
-		    IEEE80211_HW_RX_INCLUDES_FCS;
+		     IEEE80211_HW_RX_INCLUDES_FCS |
+		     IEEE80211_HW_SIGNAL_UNSPEC;
 	dev->channel_change_time = 1000;	/* TODO: find actual value */
-	dev->max_rssi = 127;
+	dev->max_signal = 127;
 
 	priv->tx_stats[0].limit = 5;
 	dev->queues = 1;
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 247fbbfb0f2..afa565c6362 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1361,10 +1361,9 @@ static void rt2400pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	/*
 	 * Initialize all hw fields.
 	 */
-	rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
+	rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
+			       IEEE80211_HW_SIGNAL_DBM;
 	rt2x00dev->hw->extra_tx_headroom = 0;
-	rt2x00dev->hw->max_signal = MAX_SIGNAL;
-	rt2x00dev->hw->max_rssi = MAX_RX_SSI;
 	rt2x00dev->hw->queues = 2;
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, &rt2x00dev_pci(rt2x00dev)->dev);
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index 0d53c75d55d..c06f1b5e588 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1680,10 +1680,10 @@ static void rt2500pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	/*
 	 * Initialize all hw fields.
 	 */
-	rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
+	rt2x00dev->hw->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
+			       IEEE80211_HW_SIGNAL_DBM;
+
 	rt2x00dev->hw->extra_tx_headroom = 0;
-	rt2x00dev->hw->max_signal = MAX_SIGNAL;
-	rt2x00dev->hw->max_rssi = MAX_RX_SSI;
 	rt2x00dev->hw->queues = 2;
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, &rt2x00dev_pci(rt2x00dev)->dev);
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 80b34d47114..88bafdf8f0f 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1587,10 +1587,10 @@ static void rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	rt2x00dev->hw->flags =
 	    IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
 	    IEEE80211_HW_RX_INCLUDES_FCS |
-	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
+	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
+	    IEEE80211_HW_SIGNAL_DBM;
+
 	rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE;
-	rt2x00dev->hw->max_signal = MAX_SIGNAL;
-	rt2x00dev->hw->max_rssi = MAX_RX_SSI;
 	rt2x00dev->hw->queues = 2;
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, &rt2x00dev_usb(rt2x00dev)->dev);
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 9929b15e28b..f51a50b31e1 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -600,9 +600,9 @@ void rt2x00lib_rxdone(struct queue_entry *entry,
 	rt2x00dev->link.qual.rx_success++;
 
 	rx_status->rate_idx = idx;
-	rx_status->signal =
+	rx_status->qual =
 	    rt2x00lib_calculate_link_signal(rt2x00dev, rxdesc->rssi);
-	rx_status->ssi = rxdesc->rssi;
+	rx_status->signal = rxdesc->rssi;
 	rx_status->flag = rxdesc->flags;
 	rx_status->antenna = rt2x00dev->link.ant.active.rx;
 
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 98af4d26583..eaf23b90877 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2245,10 +2245,9 @@ static void rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	 */
 	rt2x00dev->hw->flags =
 	    IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
-	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
+	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
+	    IEEE80211_HW_SIGNAL_DBM;
 	rt2x00dev->hw->extra_tx_headroom = 0;
-	rt2x00dev->hw->max_signal = MAX_SIGNAL;
-	rt2x00dev->hw->max_rssi = MAX_RX_SSI;
 	rt2x00dev->hw->queues = 4;
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, &rt2x00dev_pci(rt2x00dev)->dev);
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index 351d95c4f50..51c5575ed02 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -1830,10 +1830,9 @@ static void rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	 */
 	rt2x00dev->hw->flags =
 	    IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
-	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING;
+	    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
+	    IEEE80211_HW_SIGNAL_DBM;
 	rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE;
-	rt2x00dev->hw->max_signal = MAX_SIGNAL;
-	rt2x00dev->hw->max_rssi = MAX_RX_SSI;
 	rt2x00dev->hw->queues = 4;
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, &rt2x00dev_usb(rt2x00dev)->dev);
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index c181f23e930..c220998cee6 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -132,8 +132,8 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
 
 			rx_status.antenna = (flags2 >> 15) & 1;
 			/* TODO: improve signal/rssi reporting */
-			rx_status.signal = flags2 & 0xFF;
-			rx_status.ssi = (flags2 >> 8) & 0x7F;
+			rx_status.qual = flags2 & 0xFF;
+			rx_status.signal = (flags2 >> 8) & 0x7F;
 			/* XXX: is this correct? */
 			rx_status.rate_idx = (flags >> 20) & 0xF;
 			rx_status.freq = dev->conf.channel->center_freq;
@@ -894,9 +894,10 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev,
 	dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band;
 
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
-		     IEEE80211_HW_RX_INCLUDES_FCS;
+		     IEEE80211_HW_RX_INCLUDES_FCS |
+		     IEEE80211_HW_SIGNAL_UNSPEC;
 	dev->queues = 1;
-	dev->max_rssi = 65;
+	dev->max_signal = 65;
 
 	reg = rtl818x_ioread32(priv, &priv->map->TX_CONF);
 	reg &= RTL818X_TX_CONF_HWVER_MASK;
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index d5787b37e1f..e14c8424868 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -261,8 +261,8 @@ static void rtl8187_rx_cb(struct urb *urb)
 	}
 
 	rx_status.antenna = (hdr->signal >> 7) & 1;
-	rx_status.signal = 64 - min(hdr->noise, (u8)64);
-	rx_status.ssi = signal;
+	rx_status.qual = 64 - min(hdr->noise, (u8)64);
+	rx_status.signal = signal;
 	rx_status.rate_idx = rate;
 	rx_status.freq = dev->conf.channel->center_freq;
 	rx_status.band = dev->conf.channel->band;
@@ -740,11 +740,11 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
 
 	priv->mode = IEEE80211_IF_TYPE_MNTR;
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
-		     IEEE80211_HW_RX_INCLUDES_FCS;
+		     IEEE80211_HW_RX_INCLUDES_FCS |
+		     IEEE80211_HW_SIGNAL_UNSPEC;
 	dev->extra_tx_headroom = sizeof(struct rtl8187_tx_hdr);
 	dev->queues = 1;
-	dev->max_rssi = 65;
-	dev->max_signal = 64;
+	dev->max_signal = 65;
 
 	eeprom.data = dev;
 	eeprom.register_read = rtl8187_eeprom_register_read;
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index ee4331229f1..0c736735e21 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -638,7 +638,7 @@ static int filter_ack(struct ieee80211_hw *hw, struct ieee80211_hdr *rx_hdr,
 
 			memset(&status, 0, sizeof(status));
 			status.flags = IEEE80211_TX_STATUS_ACK;
-			status.ack_signal = stats->ssi;
+			status.ack_signal = stats->signal;
 			__skb_unlink(skb, q);
 			tx_status(hw, skb, &status, 1);
 			goto out;
@@ -691,8 +691,8 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 
 	stats.freq = zd_channels[_zd_chip_get_channel(&mac->chip) - 1].center_freq;
 	stats.band = IEEE80211_BAND_2GHZ;
-	stats.ssi = status->signal_strength;
-	stats.signal = zd_rx_qual_percent(buffer,
+	stats.signal = status->signal_strength;
+	stats.qual = zd_rx_qual_percent(buffer,
 		                          length - sizeof(struct rx_status),
 		                          status);
 
@@ -997,10 +997,10 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
 	hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band;
 
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
-		    IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE;
-	hw->max_rssi = 100;
-	hw->max_signal = 100;
+		    IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
+		    IEEE80211_HW_SIGNAL_DB;
 
+	hw->max_signal = 100;
 	hw->queues = 1;
 	hw->extra_tx_headroom = sizeof(struct zd_ctrlset);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 75a34609eed..909956c97c4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -336,13 +336,16 @@ enum mac80211_rx_flags {
  * The low-level driver should provide this information (the subset
  * supported by hardware) to the 802.11 code with each received
  * frame.
+ *
  * @mactime: value in microseconds of the 64-bit Time Synchronization Function
  * 	(TSF) timer when the first data symbol (MPDU) arrived at the hardware.
  * @band: the active band when this frame was received
  * @freq: frequency the radio was tuned to when receiving this frame, in MHz
- * @ssi: signal strength when receiving this frame
- * @signal: used as 'qual' in statistics reporting
- * @noise: PHY noise when receiving this frame
+ * @signal: signal strength when receiving this frame, either in dBm, in dB or
+ *	unspecified depending on the hardware capabilities flags
+ *	@IEEE80211_HW_SIGNAL_*
+ * @noise: noise when receiving this frame, in dBm.
+ * @qual: overall signal quality indication, in percent (0-100).
  * @antenna: antenna used
  * @rate_idx: index of data rate into band's supported rates
  * @flag: %RX_FLAG_*
@@ -351,9 +354,9 @@ struct ieee80211_rx_status {
 	u64 mactime;
 	enum ieee80211_band band;
 	int freq;
-	int ssi;
 	int signal;
 	int noise;
+	int qual;
 	int antenna;
 	int rate_idx;
 	int flag;
@@ -392,7 +395,8 @@ enum ieee80211_tx_status_flags {
  * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
  * @ampdu_ack_map: block ack bit map for the aggregation.
  * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
- * @ack_signal: signal strength of the ACK frame
+ * @ack_signal: signal strength of the ACK frame either in dBm, dB or unspec
+ *	depending on hardware capabilites flags @IEEE80211_HW_SIGNAL_*
  */
 struct ieee80211_tx_status {
 	struct ieee80211_tx_control control;
@@ -703,6 +707,25 @@ enum ieee80211_tkip_key_type {
  * @IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE:
  *	Hardware is not capable of receiving frames with short preamble on
  *	the 2.4 GHz band.
+ *
+ * @IEEE80211_HW_SIGNAL_UNSPEC:
+ *	Hardware can provide signal values but we don't know its units. We
+ *	expect values between 0 and @max_signal.
+ *	If possible please provide dB or dBm instead.
+ *
+ * @IEEE80211_HW_SIGNAL_DB:
+ *	Hardware gives signal values in dB, decibel difference from an
+ *	arbitrary, fixed reference. We expect values between 0 and @max_signal.
+ *	If possible please provide dBm instead.
+ *
+ * @IEEE80211_HW_SIGNAL_DBM:
+ *	Hardware gives signal values in dBm, decibel difference from
+ *	one milliwatt. This is the preferred method since it is standardized
+ *	between different devices. @max_signal does not need to be set.
+ *
+ * @IEEE80211_HW_NOISE_DBM:
+ *	Hardware can provide noise (radio interference) values in units dBm,
+ *      decibel difference from one milliwatt.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE		= 1<<0,
@@ -710,6 +733,10 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING	= 1<<2,
 	IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE		= 1<<3,
 	IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE	= 1<<4,
+	IEEE80211_HW_SIGNAL_UNSPEC			= 1<<5,
+	IEEE80211_HW_SIGNAL_DB				= 1<<6,
+	IEEE80211_HW_SIGNAL_DBM				= 1<<7,
+	IEEE80211_HW_NOISE_DBM				= 1<<8,
 };
 
 /**
@@ -740,12 +767,8 @@ enum ieee80211_hw_flags {
  *
  * @channel_change_time: time (in microseconds) it takes to change channels.
  *
- * @max_rssi: Maximum value for ssi in RX information, use
- *	negative numbers for dBm and 0 to indicate no support.
- *
- * @max_signal: like @max_rssi, but for the signal value.
- *
- * @max_noise: like @max_rssi, but for the noise value.
+ * @max_signal: Maximum value for signal (rssi) in RX information, used
+ *     only when @IEEE80211_HW_SIGNAL_UNSPEC or @IEEE80211_HW_SIGNAL_DB
  *
  * @queues: number of available hardware transmit queues for
  *	data packets. WMM/QoS requires at least four, these
@@ -775,9 +798,7 @@ struct ieee80211_hw {
 	int channel_change_time;
 	int vif_data_size;
 	u16 queues, ampdu_queues;
-	s8 max_rssi;
 	s8 max_signal;
-	s8 max_noise;
 };
 
 /**
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 200ee1e6372..69dbc342a46 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -391,7 +391,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 
 		wstats.updated = 0;
 		if (rx_stats->mask & IEEE80211_STATMASK_RSSI) {
-			wstats.level = rx_stats->rssi;
+			wstats.level = rx_stats->signal;
 			wstats.updated |= IW_QUAL_LEVEL_UPDATED;
 		} else
 			wstats.updated |= IW_QUAL_LEVEL_INVALID;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 1f00273d99c..a2cc0284c9d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -63,8 +63,8 @@ STA_FILE(tx_fragments, tx_fragments, LU);
 STA_FILE(tx_filtered, tx_filtered_count, LU);
 STA_FILE(tx_retry_failed, tx_retry_failed, LU);
 STA_FILE(tx_retry_count, tx_retry_count, LU);
-STA_FILE(last_rssi, last_rssi, D);
 STA_FILE(last_signal, last_signal, D);
+STA_FILE(last_qual, last_qual, D);
 STA_FILE(last_noise, last_noise, D);
 STA_FILE(channel_use, channel_use, D);
 STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 173b7fcb302..ed0d9b35ae6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -82,7 +82,7 @@ struct ieee80211_sta_bss {
 	u16 capability; /* host byte order */
 	enum ieee80211_band band;
 	int freq;
-	int rssi, signal, noise;
+	int signal, noise, qual;
 	u8 *wpa_ie;
 	size_t wpa_ie_len;
 	u8 *rsn_ie;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f277407f0f5..390df48dfd4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1702,13 +1702,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->hw.conf.beacon_int = 1000;
 
-	local->wstats_flags |= local->hw.max_rssi ?
-			       IW_QUAL_LEVEL_UPDATED : IW_QUAL_LEVEL_INVALID;
-	local->wstats_flags |= local->hw.max_signal ?
+	local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
+						  IEEE80211_HW_SIGNAL_DB |
+						  IEEE80211_HW_SIGNAL_DBM) ?
 			       IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID;
-	local->wstats_flags |= local->hw.max_noise ?
+	local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ?
 			       IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID;
-	if (local->hw.max_rssi < 0 || local->hw.max_noise < 0)
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		local->wstats_flags |= IW_QUAL_DBM;
 
 	result = sta_info_start(local);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6c4b27be35d..dda30e98aa1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1959,8 +1959,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 					   local->hw.conf.channel->center_freq,
 					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
-			sta->last_rssi = bss->rssi;
 			sta->last_signal = bss->signal;
+			sta->last_qual = bss->qual;
 			sta->last_noise = bss->noise;
 			ieee80211_rx_bss_put(dev, bss);
 		}
@@ -2629,9 +2629,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 	bss->timestamp = beacon_timestamp;
 	bss->last_update = jiffies;
-	bss->rssi = rx_status->ssi;
 	bss->signal = rx_status->signal;
 	bss->noise = rx_status->noise;
+	bss->qual = rx_status->qual;
 	if (!beacon && !bss->probe_resp)
 		bss->probe_resp = true;
 
@@ -3427,9 +3427,9 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
 		    !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
 			continue;
 
-		if (!selected || top_rssi < bss->rssi) {
+		if (!selected || top_rssi < bss->signal) {
 			selected = bss;
-			top_rssi = bss->rssi;
+			top_rssi = bss->signal;
 		}
 	}
 	if (selected)
@@ -4060,8 +4060,8 @@ ieee80211_sta_scan_result(struct net_device *dev,
 
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = IWEVQUAL;
-	iwe.u.qual.qual = bss->signal;
-	iwe.u.qual.level = bss->rssi;
+	iwe.u.qual.qual = bss->qual;
+	iwe.u.qual.level = bss->signal;
 	iwe.u.qual.noise = bss->noise;
 	iwe.u.qual.updated = local->wstats_flags;
 	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b399e09ec7a..474f13662c8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -209,7 +209,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 				cpu_to_le16(IEEE80211_CHAN_DYN |
 					    IEEE80211_CHAN_2GHZ);
 
-		rtfixed->antsignal = status->ssi;
+		rtfixed->antsignal = status->signal;
 		rthdr->it_len = cpu_to_le16(rtap_len);
 	}
 
@@ -719,8 +719,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
-	sta->last_rssi = rx->status->ssi;
 	sta->last_signal = rx->status->signal;
+	sta->last_qual = rx->status->qual;
 	sta->last_noise = rx->status->noise;
 
 	if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 97a61c39ad9..e89cc165554 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -216,8 +216,8 @@ struct sta_info {
 				       * from this STA */
 	unsigned long rx_fragments; /* number of received MPDUs */
 	unsigned long rx_dropped; /* number of dropped MPDUs from this STA */
-	int last_rssi; /* RSSI of last received frame from this STA */
 	int last_signal; /* signal of last received frame from this STA */
+	int last_qual;  /* qual of last received frame from this STA */
 	int last_noise; /* noise of last received frame from this STA */
 	/* last received seq/frag number from this STA (per RX queue) */
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 76e1de1dc73..6a342a9a40c 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -169,14 +169,26 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
 	range->num_encoding_sizes = 2;
 	range->max_encoding_tokens = NUM_DEFAULT_KEYS;
 
-	range->max_qual.qual = local->hw.max_signal;
-	range->max_qual.level = local->hw.max_rssi;
-	range->max_qual.noise = local->hw.max_noise;
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC ||
+	    local->hw.flags & IEEE80211_HW_SIGNAL_DB)
+		range->max_qual.level = local->hw.max_signal;
+	else if  (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+		range->max_qual.level = -110;
+	else
+		range->max_qual.level = 0;
+
+	if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
+		range->max_qual.noise = -110;
+	else
+		range->max_qual.noise = 0;
+
+	range->max_qual.qual = 100;
 	range->max_qual.updated = local->wstats_flags;
 
-	range->avg_qual.qual = local->hw.max_signal/2;
-	range->avg_qual.level = 0;
-	range->avg_qual.noise = 0;
+	range->avg_qual.qual = 50;
+	/* not always true but better than nothing */
+	range->avg_qual.level = range->max_qual.level / 2;
+	range->avg_qual.noise = range->max_qual.noise / 2;
 	range->avg_qual.updated = local->wstats_flags;
 
 	range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
@@ -996,8 +1008,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
 		wstats->qual.noise = 0;
 		wstats->qual.updated = IW_QUAL_ALL_INVALID;
 	} else {
-		wstats->qual.level = sta->last_rssi;
-		wstats->qual.qual = sta->last_signal;
+		wstats->qual.level = sta->last_signal;
+		wstats->qual.qual = sta->last_qual;
 		wstats->qual.noise = sta->last_noise;
 		wstats->qual.updated = local->wstats_flags;
 	}
-- 
cgit v1.2.3-70-g09d2


From 601ae7f25aea58f208a7f640f6174aac0652403a Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 8 May 2008 19:22:43 +0200
Subject: mac80211: make rx radiotap header more flexible

use hw flags and rx flags to determine which fields are present in the header
and use all available information from the driver.

make sure radiotap header starts at a naturally aligned address (mod 8) for
all radiotap fields.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 195 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 133 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 474f13662c8..1159a43a3df 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -77,6 +77,134 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
 	return 0;
 }
 
+static int
+ieee80211_rx_radiotap_len(struct ieee80211_local *local,
+			  struct ieee80211_rx_status *status)
+{
+	int len;
+
+	/* always present fields */
+	len = sizeof(struct ieee80211_radiotap_header) + 9;
+
+	if (status->flag & RX_FLAG_TSFT)
+		len += 8;
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_DB ||
+	    local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+		len += 1;
+	if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
+		len += 1;
+
+	if (len & 1) /* padding for RX_FLAGS if necessary */
+		len++;
+
+	/* make sure radiotap starts at a naturally aligned address */
+	if (len % 8)
+		len = roundup(len, 8);
+
+	return len;
+}
+
+/**
+ * ieee80211_add_rx_radiotap_header - add radiotap header
+ *
+ * add a radiotap header containing all the fields which the hardware provided.
+ */
+static void
+ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
+				 struct sk_buff *skb,
+				 struct ieee80211_rx_status *status,
+				 struct ieee80211_rate *rate,
+				 int rtap_len)
+{
+	struct ieee80211_radiotap_header *rthdr;
+	unsigned char *pos;
+
+	rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
+	memset(rthdr, 0, rtap_len);
+
+	/* radiotap header, set always present flags */
+	rthdr->it_present =
+		cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
+			    (1 << IEEE80211_RADIOTAP_RATE) |
+			    (1 << IEEE80211_RADIOTAP_CHANNEL) |
+			    (1 << IEEE80211_RADIOTAP_ANTENNA) |
+			    (1 << IEEE80211_RADIOTAP_RX_FLAGS));
+	rthdr->it_len = cpu_to_le16(rtap_len);
+
+	pos = (unsigned char *)(rthdr+1);
+
+	/* the order of the following fields is important */
+
+	/* IEEE80211_RADIOTAP_TSFT */
+	if (status->flag & RX_FLAG_TSFT) {
+		*(__le64 *)pos = cpu_to_le64(status->mactime);
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
+		pos += 8;
+	}
+
+	/* IEEE80211_RADIOTAP_FLAGS */
+	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
+		*pos |= IEEE80211_RADIOTAP_F_FCS;
+	pos++;
+
+	/* IEEE80211_RADIOTAP_RATE */
+	*pos = rate->bitrate / 5;
+	pos++;
+
+	/* IEEE80211_RADIOTAP_CHANNEL */
+	*(__le16 *)pos = cpu_to_le16(status->freq);
+	pos += 2;
+	if (status->band == IEEE80211_BAND_5GHZ)
+		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM |
+					     IEEE80211_CHAN_5GHZ);
+	else
+		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_DYN |
+					     IEEE80211_CHAN_2GHZ);
+	pos += 2;
+
+	/* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
+		*pos = status->signal;
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
+		pos++;
+	}
+
+	/* IEEE80211_RADIOTAP_DBM_ANTNOISE */
+	if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
+		*pos = status->noise;
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE);
+		pos++;
+	}
+
+	/* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
+
+	/* IEEE80211_RADIOTAP_ANTENNA */
+	*pos = status->antenna;
+	pos++;
+
+	/* IEEE80211_RADIOTAP_DB_ANTSIGNAL */
+	if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) {
+		*pos = status->signal;
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL);
+		pos++;
+	}
+
+	/* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
+
+	/* IEEE80211_RADIOTAP_RX_FLAGS */
+	/* ensure 2 byte alignment for the 2 byte field as required */
+	if ((pos - (unsigned char *)rthdr) & 1)
+		pos++;
+	/* FIXME: when radiotap gets a 'bad PLCP' flag use it here */
+	if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
+		*(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
+	pos += 2;
+}
+
 /*
  * This function copies a received frame to all monitor interfaces and
  * returns a cleaned-up SKB that no longer includes the FCS nor the
@@ -89,17 +217,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 {
 	struct ieee80211_sub_if_data *sdata;
 	int needed_headroom = 0;
-	struct ieee80211_radiotap_header *rthdr;
-	__le64 *rttsft = NULL;
-	struct ieee80211_rtap_fixed_data {
-		u8 flags;
-		u8 rate;
-		__le16 chan_freq;
-		__le16 chan_flags;
-		u8 antsignal;
-		u8 padding_for_rxflags;
-		__le16 rx_flags;
-	} __attribute__ ((packed)) *rtfixed;
 	struct sk_buff *skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	int present_fcs_len = 0;
@@ -116,8 +233,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	if (status->flag & RX_FLAG_RADIOTAP)
 		rtap_len = ieee80211_get_radiotap_len(origskb->data);
 	else
-		/* room for radiotap header, always present fields and TSFT */
-		needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8;
+		/* room for the radiotap header based on driver features */
+		needed_headroom = ieee80211_rx_radiotap_len(local, status);
 
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
 		present_fcs_len = FCS_LEN;
@@ -163,55 +280,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	}
 
 	/* if necessary, prepend radiotap information */
-	if (!(status->flag & RX_FLAG_RADIOTAP)) {
-		rtfixed = (void *) skb_push(skb, sizeof(*rtfixed));
-		rtap_len = sizeof(*rthdr) + sizeof(*rtfixed);
-		if (status->flag & RX_FLAG_TSFT) {
-			rttsft = (void *) skb_push(skb, sizeof(*rttsft));
-			rtap_len += 8;
-		}
-		rthdr = (void *) skb_push(skb, sizeof(*rthdr));
-		memset(rthdr, 0, sizeof(*rthdr));
-		memset(rtfixed, 0, sizeof(*rtfixed));
-		rthdr->it_present =
-			cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
-				    (1 << IEEE80211_RADIOTAP_RATE) |
-				    (1 << IEEE80211_RADIOTAP_CHANNEL) |
-				    (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) |
-				    (1 << IEEE80211_RADIOTAP_RX_FLAGS));
-		rtfixed->flags = 0;
-		if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
-			rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS;
-
-		if (rttsft) {
-			*rttsft = cpu_to_le64(status->mactime);
-			rthdr->it_present |=
-				cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
-		}
-
-		/* FIXME: when radiotap gets a 'bad PLCP' flag use it here */
-		rtfixed->rx_flags = 0;
-		if (status->flag &
-		    (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
-			rtfixed->rx_flags |=
-				cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
-
-		rtfixed->rate = rate->bitrate / 5;
-
-		rtfixed->chan_freq = cpu_to_le16(status->freq);
-
-		if (status->band == IEEE80211_BAND_5GHZ)
-			rtfixed->chan_flags =
-				cpu_to_le16(IEEE80211_CHAN_OFDM |
-					    IEEE80211_CHAN_5GHZ);
-		else
-			rtfixed->chan_flags =
-				cpu_to_le16(IEEE80211_CHAN_DYN |
-					    IEEE80211_CHAN_2GHZ);
-
-		rtfixed->antsignal = status->signal;
-		rthdr->it_len = cpu_to_le16(rtap_len);
-	}
+	if (!(status->flag & RX_FLAG_RADIOTAP))
+		ieee80211_add_rx_radiotap_header(local, skb, status, rate,
+						 needed_headroom);
 
 	skb_reset_mac_header(skb);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
-- 
cgit v1.2.3-70-g09d2


From 2940bb69fd84047e78fdb1868a8b894df5584255 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 8 May 2008 14:30:18 -0700
Subject: wireless: Add missing locking to cfg80211_dev_rename

device_rename only performs useful and race free validity
checking at the optional sysfs level so depending on it
for all of the validity checking in cfg80211_dev_rename
is racy.

Instead implement all of the needed validity checking
and locking in cfg80211_dev_rename.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 80afacdae46..f1da0b93bc5 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv)
 int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 			char *newname)
 {
+	struct cfg80211_registered_device *drv;
 	int idx, taken = -1, result, digits;
 
+	mutex_lock(&cfg80211_drv_mutex);
+
 	/* prohibit calling the thing phy%d when %d is not its number */
 	sscanf(newname, PHY_NAME "%d%n", &idx, &taken);
 	if (taken == strlen(newname) && idx != rdev->idx) {
@@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 		 * deny the name if it is phy<idx> where <idx> is printed
 		 * without leading zeroes. taken == strlen(newname) here
 		 */
+		result = -EINVAL;
 		if (taken == strlen(PHY_NAME) + digits)
-			return -EINVAL;
+			goto out_unlock;
+	}
+
+
+	/* Ignore nop renames */
+	result = 0;
+	if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0)
+		goto out_unlock;
+
+	/* Ensure another device does not already have this name. */
+	list_for_each_entry(drv, &cfg80211_drv_list, list) {
+		result = -EINVAL;
+		if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0)
+			goto out_unlock;
 	}
 
-	/* this will check for collisions */
+	/* this will only check for collisions in sysfs
+	 * which is not even always compiled in.
+	 */
 	result = device_rename(&rdev->wiphy.dev, newname);
 	if (result)
-		return result;
+		goto out_unlock;
 
 	if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
 			    rdev->wiphy.debugfsdir,
@@ -172,9 +191,13 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 		printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
 		       newname);
 
-	nl80211_notify_dev_rename(rdev);
+	result = 0;
+out_unlock:
+	mutex_unlock(&cfg80211_drv_mutex);
+	if (result == 0)
+		nl80211_notify_dev_rename(rdev);
 
-	return 0;
+	return result;
 }
 
 /* exported functions */
-- 
cgit v1.2.3-70-g09d2


From b0a6717994a4e00ee19372e1bdaab53572ae025c Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Tue, 13 May 2008 15:03:02 +0200
Subject: mac80211: Set IEEE80211_TXPD_REQ_TX_STATUS for all TX frames

All interfaces should set the IEEE80211_TXPD_REQ_TX_STATUS flag for all TX frames
which will force the master interface to set the IEEE80211_TX_CTL_REQ_TX_STATUS
flag. This in turn will allow drivers to check for that flag before reporting
the TX status to mac80211.

This is very usefull when frames (like beacons, RTS and CTS-to-self) should not
be reported back to mac80211. Later we could add more extensive checks to
exclude more frames from being reported, or let mac80211 decide if it wants
the frame for status reporting or not.

v2: Monitor interfaces should also set IEEE80211_TXPD_REQ_TX_STATUS

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5f31a6233e1..4c25fd5d76a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1337,6 +1337,8 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	pkt_data->ifindex = dev->ifindex;
 
 	pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
+	/* Interfaces should always request a status report */
+	pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
 
 	/*
 	 * fix up the pointers accounting for the radiotap
@@ -1618,6 +1620,9 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	if (ethertype == ETH_P_PAE)
 		pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
 
+	/* Interfaces should always request a status report */
+	pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
+
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
-- 
cgit v1.2.3-70-g09d2


From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <fbuihuu@gmail.com>
Date: Mon, 12 May 2008 21:21:05 +0200
Subject: rcu: split list.h and move rcu-protected lists into rculist.h

Move rcu-protected lists from list.h into a new header file rculist.h.

This is done because list are a very used primitive structure all over the
kernel and it's currently impossible to include other header files in this
list.h without creating some circular dependencies.

For example, list.h implements rcu-protected list and uses rcu_dereference()
without including rcupdate.h.  It actually compiles because users of
rcu_dereference() are macros.  Others RCU functions could be used too but
aren't probably because of this.

Therefore this patch creates rculist.h which includes rcupdates without to
many changes/troubles.

Signed-off-by: Franck Bui-Huu <fbuihuu@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Josh Triplett <josh@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/sn/kernel/irq.c                       |   1 +
 crypto/async_tx/async_tx.c                      |   1 +
 drivers/infiniband/hw/ipath/ipath_verbs.c       |   1 +
 drivers/infiniband/hw/ipath/ipath_verbs_mcast.c |   3 +-
 drivers/net/macvlan.c                           |   2 +-
 include/linux/dcache.h                          |   1 +
 include/linux/list.h                            | 367 ----------------------
 include/linux/rculist.h                         | 396 ++++++++++++++++++++++++
 kernel/pid.c                                    |   1 +
 lib/textsearch.c                                |   1 +
 net/802/psnap.c                                 |   1 +
 net/8021q/vlan.c                                |   1 +
 net/bridge/br_fdb.c                             |   1 +
 net/bridge/br_stp.c                             |   1 +
 net/netlabel/netlabel_domainhash.c              |   3 +-
 15 files changed, 409 insertions(+), 372 deletions(-)
 create mode 100644 include/linux/rculist.h

(limited to 'net')

diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 53351c3cd7b..96c31b4180c 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -11,6 +11,7 @@
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/intr.h>
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index c6e772fc5cc..095c798d317 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -23,6 +23,7 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  *
  */
+#include <linux/rculist.h>
 #include <linux/kernel.h>
 #include <linux/async_tx.h>
 
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index e0ec540042b..5d830d87ebc 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -35,6 +35,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/io.h>
 #include <linux/utsname.h>
+#include <linux/rculist.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 9e5abf9c309..d73e3223287 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -31,8 +31,7 @@
  * SOFTWARE.
  */
 
-#include <linux/list.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 #include "ipath_verbs.h"
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c36a03ae9bf..860d75d81f8 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -20,7 +20,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2a6639407c8..1f5cebf10a2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -3,6 +3,7 @@
 
 #include <asm/atomic.h>
 #include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
diff --git a/include/linux/list.h b/include/linux/list.h
index 08cf4f65188..139ec41d9c2 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -84,65 +84,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
 	__list_add(new, head->prev, head);
 }
 
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add_rcu(struct list_head * new,
-		struct list_head * prev, struct list_head * next)
-{
-	new->next = next;
-	new->prev = prev;
-	smp_wmb();
-	next->prev = new;
-	prev->next = new;
-}
-
-/**
- * list_add_rcu - add a new entry to rcu-protected list
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as list_add_rcu()
- * or list_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * list_for_each_entry_rcu().
- */
-static inline void list_add_rcu(struct list_head *new, struct list_head *head)
-{
-	__list_add_rcu(new, head, head->next);
-}
-
-/**
- * list_add_tail_rcu - add a new entry to rcu-protected list
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as list_add_tail_rcu()
- * or list_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * list_for_each_entry_rcu().
- */
-static inline void list_add_tail_rcu(struct list_head *new,
-					struct list_head *head)
-{
-	__list_add_rcu(new, head->prev, head);
-}
-
 /*
  * Delete a list entry by making the prev/next entries
  * point to each other.
@@ -173,36 +114,6 @@ static inline void list_del(struct list_head *entry)
 extern void list_del(struct list_head *entry);
 #endif
 
-/**
- * list_del_rcu - deletes entry from list without re-initialization
- * @entry: the element to delete from the list.
- *
- * Note: list_empty() on entry does not return true after this,
- * the entry is in an undefined state. It is useful for RCU based
- * lockfree traversal.
- *
- * In particular, it means that we can not poison the forward
- * pointers that may still be used for walking the list.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as list_del_rcu()
- * or list_add_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * list_for_each_entry_rcu().
- *
- * Note that the caller is not permitted to immediately free
- * the newly deleted entry.  Instead, either synchronize_rcu()
- * or call_rcu() must be used to defer freeing until an RCU
- * grace period has elapsed.
- */
-static inline void list_del_rcu(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	entry->prev = LIST_POISON2;
-}
-
 /**
  * list_replace - replace old entry by new one
  * @old : the element to be replaced
@@ -226,25 +137,6 @@ static inline void list_replace_init(struct list_head *old,
 	INIT_LIST_HEAD(old);
 }
 
-/**
- * list_replace_rcu - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- *
- * The @old entry will be replaced with the @new entry atomically.
- * Note: @old should not be empty.
- */
-static inline void list_replace_rcu(struct list_head *old,
-				struct list_head *new)
-{
-	new->next = old->next;
-	new->prev = old->prev;
-	smp_wmb();
-	new->next->prev = new;
-	new->prev->next = new;
-	old->prev = LIST_POISON2;
-}
-
 /**
  * list_del_init - deletes entry from list and reinitialize it.
  * @entry: the element to delete from the list.
@@ -368,62 +260,6 @@ static inline void list_splice_init(struct list_head *list,
 	}
 }
 
-/**
- * list_splice_init_rcu - splice an RCU-protected list into an existing list.
- * @list:	the RCU-protected list to splice
- * @head:	the place in the list to splice the first list into
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
- *
- * @head can be RCU-read traversed concurrently with this function.
- *
- * Note that this function blocks.
- *
- * Important note: the caller must take whatever action is necessary to
- *	prevent any other updates to @head.  In principle, it is possible
- *	to modify the list as soon as sync() begins execution.
- *	If this sort of thing becomes necessary, an alternative version
- *	based on call_rcu() could be created.  But only if -really-
- *	needed -- there is no shortage of RCU API members.
- */
-static inline void list_splice_init_rcu(struct list_head *list,
-					struct list_head *head,
-					void (*sync)(void))
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-	struct list_head *at = head->next;
-
-	if (list_empty(head))
-		return;
-
-	/* "first" and "last" tracking list, so initialize it. */
-
-	INIT_LIST_HEAD(list);
-
-	/*
-	 * At this point, the list body still points to the source list.
-	 * Wait for any readers to finish using the list before splicing
-	 * the list body into the new list.  Any new readers will see
-	 * an empty list.
-	 */
-
-	sync();
-
-	/*
-	 * Readers are finished with the source list, so perform splice.
-	 * The order is important if the new list is global and accessible
-	 * to concurrent RCU readers.  Note that RCU readers are not
-	 * permitted to traverse the prev pointers without excluding
-	 * this function.
-	 */
-
-	last->next = at;
-	smp_wmb();
-	head->next = first;
-	first->prev = head;
-	at->prev = last;
-}
-
 /**
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
@@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	     &pos->member != (head); 					\
 	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
 
-/**
- * list_for_each_rcu	-	iterate over an rcu-protected list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_rcu(pos, head) \
-	for (pos = rcu_dereference((head)->next); \
-		prefetch(pos->next), pos != (head); \
-		pos = rcu_dereference(pos->next))
-
-#define __list_for_each_rcu(pos, head) \
-	for (pos = rcu_dereference((head)->next); \
-		pos != (head); \
-		pos = rcu_dereference(pos->next))
-
-/**
- * list_for_each_entry_rcu	-	iterate over rcu list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
-		prefetch(pos->member.next), &pos->member != (head); \
-		pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
-
-
-/**
- * list_for_each_continue_rcu
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * Iterate over an rcu-protected list, continuing after current point.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = rcu_dereference((pos)->next); \
-		prefetch((pos)->next), (pos) != (head); \
-		(pos) = rcu_dereference((pos)->next))
-
 /*
  * Double linked lists with a single pointer list head.
  * Mostly useful for hash tables where the two pointer list head is
@@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n)
 	n->pprev = LIST_POISON2;
 }
 
-/**
- * hlist_del_rcu - deletes entry from hash list without re-initialization
- * @n: the element to delete from the hash list.
- *
- * Note: list_unhashed() on entry does not return true after this,
- * the entry is in an undefined state. It is useful for RCU based
- * lockfree traversal.
- *
- * In particular, it means that we can not poison the forward
- * pointers that may still be used for walking the hash list.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_add_head_rcu()
- * or hlist_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_for_each_entry().
- */
-static inline void hlist_del_rcu(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->pprev = LIST_POISON2;
-}
-
 static inline void hlist_del_init(struct hlist_node *n)
 {
 	if (!hlist_unhashed(n)) {
@@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n)
 	}
 }
 
-/**
- * hlist_replace_rcu - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- *
- * The @old entry will be replaced with the @new entry atomically.
- */
-static inline void hlist_replace_rcu(struct hlist_node *old,
-					struct hlist_node *new)
-{
-	struct hlist_node *next = old->next;
-
-	new->next = next;
-	new->pprev = old->pprev;
-	smp_wmb();
-	if (next)
-		new->next->pprev = &new->next;
-	*new->pprev = new;
-	old->pprev = LIST_POISON2;
-}
-
 static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
 {
 	struct hlist_node *first = h->first;
@@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
 	n->pprev = &h->first;
 }
 
-
-/**
- * hlist_add_head_rcu
- * @n: the element to add to the hash list.
- * @h: the list to add to.
- *
- * Description:
- * Adds the specified element to the specified hlist,
- * while permitting racing traversals.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_add_head_rcu()
- * or hlist_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs.  Regardless of the type of CPU, the
- * list-traversal primitive must be guarded by rcu_read_lock().
- */
-static inline void hlist_add_head_rcu(struct hlist_node *n,
-					struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	n->pprev = &h->first;
-	smp_wmb();
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-}
-
 /* next must be != NULL */
 static inline void hlist_add_before(struct hlist_node *n,
 					struct hlist_node *next)
@@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n,
 		next->next->pprev  = &next->next;
 }
 
-/**
- * hlist_add_before_rcu
- * @n: the new element to add to the hash list.
- * @next: the existing element to add the new element before.
- *
- * Description:
- * Adds the specified element to the specified hlist
- * before the specified node while permitting racing traversals.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_add_head_rcu()
- * or hlist_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs.
- */
-static inline void hlist_add_before_rcu(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	n->pprev = next->pprev;
-	n->next = next;
-	smp_wmb();
-	next->pprev = &n->next;
-	*(n->pprev) = n;
-}
-
-/**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
- * @n: the new element to add to the hash list.
- *
- * Description:
- * Adds the specified element to the specified hlist
- * after the specified node while permitting racing traversals.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_add_head_rcu()
- * or hlist_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs.
- */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
-				       struct hlist_node *n)
-{
-	n->next = prev->next;
-	n->pprev = &prev->next;
-	smp_wmb();
-	prev->next = n;
-	if (n->next)
-		n->next->pprev = &n->next;
-}
-
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
@@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = n)
 
-/**
- * hlist_for_each_entry_rcu - iterate over rcu list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as hlist_add_head_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
-	for (pos = rcu_dereference((head)->first);			 \
-	        pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = rcu_dereference(pos->next))
-
 #endif
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
new file mode 100644
index 00000000000..aa9b3eb1568
--- /dev/null
+++ b/include/linux/rculist.h
@@ -0,0 +1,396 @@
+#ifndef _LINUX_RCULIST_H
+#define _LINUX_RCULIST_H
+
+#ifdef __KERNEL__
+
+/*
+ * RCU-protected list version
+ */
+#include <linux/list.h>
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add_rcu(struct list_head *new,
+		struct list_head *prev, struct list_head *next)
+{
+	new->next = next;
+	new->prev = prev;
+	smp_wmb();
+	next->prev = new;
+	prev->next = new;
+}
+
+/**
+ * list_add_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_add_rcu()
+ * or list_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ */
+static inline void list_add_rcu(struct list_head *new, struct list_head *head)
+{
+	__list_add_rcu(new, head, head->next);
+}
+
+/**
+ * list_add_tail_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_add_tail_rcu()
+ * or list_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ */
+static inline void list_add_tail_rcu(struct list_head *new,
+					struct list_head *head)
+{
+	__list_add_rcu(new, head->prev, head);
+}
+
+/**
+ * list_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * Note: list_empty() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_del_rcu()
+ * or list_add_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ *
+ * Note that the caller is not permitted to immediately free
+ * the newly deleted entry.  Instead, either synchronize_rcu()
+ * or call_rcu() must be used to defer freeing until an RCU
+ * grace period has elapsed.
+ */
+static inline void list_del_rcu(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_replace_rcu - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * The @old entry will be replaced with the @new entry atomically.
+ * Note: @old should not be empty.
+ */
+static inline void list_replace_rcu(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->prev = old->prev;
+	smp_wmb();
+	new->next->prev = new;
+	new->prev->next = new;
+	old->prev = LIST_POISON2;
+}
+
+/**
+ * list_splice_init_rcu - splice an RCU-protected list into an existing list.
+ * @list:	the RCU-protected list to splice
+ * @head:	the place in the list to splice the first list into
+ * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ *
+ * @head can be RCU-read traversed concurrently with this function.
+ *
+ * Note that this function blocks.
+ *
+ * Important note: the caller must take whatever action is necessary to
+ *	prevent any other updates to @head.  In principle, it is possible
+ *	to modify the list as soon as sync() begins execution.
+ *	If this sort of thing becomes necessary, an alternative version
+ *	based on call_rcu() could be created.  But only if -really-
+ *	needed -- there is no shortage of RCU API members.
+ */
+static inline void list_splice_init_rcu(struct list_head *list,
+					struct list_head *head,
+					void (*sync)(void))
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	if (list_empty(head))
+		return;
+
+	/* "first" and "last" tracking list, so initialize it. */
+
+	INIT_LIST_HEAD(list);
+
+	/*
+	 * At this point, the list body still points to the source list.
+	 * Wait for any readers to finish using the list before splicing
+	 * the list body into the new list.  Any new readers will see
+	 * an empty list.
+	 */
+
+	sync();
+
+	/*
+	 * Readers are finished with the source list, so perform splice.
+	 * The order is important if the new list is global and accessible
+	 * to concurrent RCU readers.  Note that RCU readers are not
+	 * permitted to traverse the prev pointers without excluding
+	 * this function.
+	 */
+
+	last->next = at;
+	smp_wmb();
+	head->next = first;
+	first->prev = head;
+	at->prev = last;
+}
+
+/**
+ * list_for_each_rcu	-	iterate over an rcu-protected list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_rcu(pos, head) \
+	for (pos = (head)->next; \
+		prefetch(rcu_dereference(pos)->next), pos != (head); \
+		pos = pos->next)
+
+#define __list_for_each_rcu(pos, head) \
+	for (pos = (head)->next; \
+		rcu_dereference(pos) != (head); \
+		pos = pos->next)
+
+/**
+ * list_for_each_safe_rcu
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ *
+ * Iterate over an rcu-protected list, safe against removal of list entry.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_safe_rcu(pos, n, head) \
+	for (pos = (head)->next; \
+		n = rcu_dereference(pos)->next, pos != (head); \
+		pos = n)
+
+/**
+ * list_for_each_entry_rcu	-	iterate over rcu list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_entry_rcu(pos, head, member) \
+	for (pos = list_entry((head)->next, typeof(*pos), member); \
+		prefetch(rcu_dereference(pos)->member.next), \
+			&pos->member != (head); \
+		pos = list_entry(pos->member.next, typeof(*pos), member))
+
+
+/**
+ * list_for_each_continue_rcu
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * Iterate over an rcu-protected list, continuing after current point.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_continue_rcu(pos, head) \
+	for ((pos) = (pos)->next; \
+		prefetch(rcu_dereference((pos))->next), (pos) != (head); \
+		(pos) = (pos)->next)
+
+/**
+ * hlist_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry().
+ */
+static inline void hlist_del_rcu(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_replace_rcu - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * The @old entry will be replaced with the @new entry atomically.
+ */
+static inline void hlist_replace_rcu(struct hlist_node *old,
+					struct hlist_node *new)
+{
+	struct hlist_node *next = old->next;
+
+	new->next = next;
+	new->pprev = old->pprev;
+	smp_wmb();
+	if (next)
+		new->next->pprev = &new->next;
+	*new->pprev = new;
+	old->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_add_head_rcu(struct hlist_node *n,
+					struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	n->pprev = &h->first;
+	smp_wmb();
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+}
+
+/**
+ * hlist_add_before_rcu
+ * @n: the new element to add to the hash list.
+ * @next: the existing element to add the new element before.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist
+ * before the specified node while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_before_rcu(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	smp_wmb();
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+/**
+ * hlist_add_after_rcu
+ * @prev: the existing element to add the new element after.
+ * @n: the new element to add to the hash list.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist
+ * after the specified node while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_after_rcu(struct hlist_node *prev,
+				       struct hlist_node *n)
+{
+	n->next = prev->next;
+	n->pprev = &prev->next;
+	smp_wmb();
+	prev->next = n;
+	if (n->next)
+		n->next->pprev = &n->next;
+}
+
+/**
+ * hlist_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
+	for (pos = (head)->first;					 \
+	     rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) &&	 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
+	     pos = pos->next)
+
+#endif	/* __KERNEL__ */
+#endif
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d49..30bd5d4b2ac 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
diff --git a/lib/textsearch.c b/lib/textsearch.c
index be8bda3862f..a3e500ad51d 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -97,6 +97,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/err.h>
 #include <linux/textsearch.h>
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 31128cb92a2..ea464393144 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/in.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 
 static LIST_HEAD(snap_list);
 static DEFINE_SPINLOCK(snap_lock);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2a739adaa92..e7ddbfa0e02 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/in.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <net/p8022.h>
 #include <net/arp.h>
 #include <linux/rtnetlink.h>
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 72c5976a5ce..142060f0205 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/times.h>
 #include <linux/netdevice.h>
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e38034aa56f..9e96ffcd29a 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -13,6 +13,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
+#include <linux/rculist.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 02c2f7c0b25..643c032a3a5 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -30,8 +30,7 @@
  */
 
 #include <linux/types.h>
-#include <linux/rcupdate.h>
-#include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
-- 
cgit v1.2.3-70-g09d2


From 711bbdd659b685b45d3f28b29a00f17be6484f38 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 17 May 2008 08:26:25 +0200
Subject: rculist.h: fix include in net/netfilter/nf_conntrack_netlink.c

this file has rculist dependency but did not explicitly include it,
which broke the build.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 net/netfilter/nf_conntrack_netlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0edefcfc594..077bcd22879 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/rculist.h>
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/skbuff.h>
-- 
cgit v1.2.3-70-g09d2


From e15f880409c807bb589e9492263564e80f0de6e9 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 19 May 2008 13:27:31 -0700
Subject: tipc: Add support for customized subscription overlap handling

This patch enables TIPC's topology server code to do customized
overlap detection handling on a per-subscription basis.  (This
capability is needed to support the upcoming introduction of
multi-cluster TIPC networks.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 4 +++-
 net/tipc/subscr.h | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 8f8d0a6c1c1..81e2bd5f241 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -157,7 +157,8 @@ void tipc_subscr_report_overlap(struct subscription *sub,
 		return;
 	if (!must && !(sub->filter & TIPC_SUB_PORTS))
 		return;
-	subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
+
+	sub->event_cb(sub, found_lower, found_upper, event, port_ref, node);
 }
 
 /**
@@ -372,6 +373,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
 		subscr_terminate(subscriber);
 		return;
 	}
+	sub->event_cb = subscr_send_event;
 	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
 	INIT_LIST_HEAD(&sub->subscription_list);
 	INIT_LIST_HEAD(&sub->nameseq_list);
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 93a8e674fac..d9553683290 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -37,11 +37,18 @@
 #ifndef _TIPC_SUBSCR_H
 #define _TIPC_SUBSCR_H
 
+struct subscription;
+
+typedef void (*tipc_subscr_event) (struct subscription *sub,
+				   u32 found_lower, u32 found_upper,
+				   u32 event, u32 port_ref, u32 node);
+
 /**
  * struct subscription - TIPC network topology subscription object
  * @seq: name sequence associated with subscription
  * @timeout: duration of subscription (in ms)
  * @filter: event filtering to be done for subscription
+ * @event_cb: routine invoked when a subscription event is detected
  * @evt: template for events generated by subscription
  * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
@@ -53,6 +60,7 @@ struct subscription {
 	struct tipc_name_seq seq;
 	u32 timeout;
 	u32 filter;
+	tipc_subscr_event event_cb;
 	struct tipc_event evt;
 	struct list_head subscription_list;
 	struct list_head nameseq_list;
-- 
cgit v1.2.3-70-g09d2


From 8e9501f5188d90eed737240453c32cad01849c96 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 19 May 2008 13:28:32 -0700
Subject: tipc: Add support for customized subscription endianness

This patch enables TIPC's topology server code to do customized
endianness conversions on a per-subscription basis.  (This
capability is needed to support the upcoming consolidation of
subscriber and subscription object references.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 35 ++++++++++++++++-------------------
 net/tipc/subscr.h |  2 ++
 2 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 81e2bd5f241..a62e5d30638 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -47,7 +47,6 @@
  * @subscriber_list: adjacent subscribers in top. server's list of subscribers
  * @subscription_list: list of subscription objects for this subscriber
  * @port_ref: object reference to port used to communicate with subscriber
- * @swap: indicates if subscriber uses opposite endianness in its messages
  */
 
 struct subscriber {
@@ -56,7 +55,6 @@ struct subscriber {
 	struct list_head subscriber_list;
 	struct list_head subscription_list;
 	u32 port_ref;
-	int swap;
 };
 
 /**
@@ -109,11 +107,11 @@ static void subscr_send_event(struct subscription *sub,
 	msg_sect.iov_base = (void *)&sub->evt;
 	msg_sect.iov_len = sizeof(struct tipc_event);
 
-	sub->evt.event = htohl(event, sub->owner->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->owner->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->owner->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->owner->swap);
-	sub->evt.port.node = htohl(node, sub->owner->swap);
+	sub->evt.event = htohl(event, sub->swap);
+	sub->evt.found_lower = htohl(found_lower, sub->swap);
+	sub->evt.found_upper = htohl(found_upper, sub->swap);
+	sub->evt.port.ref = htohl(port_ref, sub->swap);
+	sub->evt.port.node = htohl(node, sub->swap);
 	tipc_send(sub->owner->port_ref, 1, &msg_sect);
 }
 
@@ -324,18 +322,16 @@ static void subscr_subscribe(struct tipc_subscr *s,
 			     struct subscriber *subscriber)
 {
 	struct subscription *sub;
+	int swap;
 
-	/* Determine/update subscriber's endianness */
+	/* Determine subscriber's endianness */
 
-	if (s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE))
-		subscriber->swap = 0;
-	else
-		subscriber->swap = 1;
+	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
 
 	/* Detect & process a subscription cancellation request */
 
-	if (s->filter & htohl(TIPC_SUB_CANCEL, subscriber->swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, subscriber->swap);
+	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
+		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
 		subscr_cancel(s, subscriber);
 		return;
 	}
@@ -360,11 +356,11 @@ static void subscr_subscribe(struct tipc_subscr *s,
 
 	/* Initialize subscription object */
 
-	sub->seq.type = htohl(s->seq.type, subscriber->swap);
-	sub->seq.lower = htohl(s->seq.lower, subscriber->swap);
-	sub->seq.upper = htohl(s->seq.upper, subscriber->swap);
-	sub->timeout = htohl(s->timeout, subscriber->swap);
-	sub->filter = htohl(s->filter, subscriber->swap);
+	sub->seq.type = htohl(s->seq.type, swap);
+	sub->seq.lower = htohl(s->seq.lower, swap);
+	sub->seq.upper = htohl(s->seq.upper, swap);
+	sub->timeout = htohl(s->timeout, swap);
+	sub->filter = htohl(s->filter, swap);
 	if ((!(sub->filter & TIPC_SUB_PORTS)
 	     == !(sub->filter & TIPC_SUB_SERVICE))
 	    || (sub->seq.lower > sub->seq.upper)) {
@@ -378,6 +374,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
 	INIT_LIST_HEAD(&sub->subscription_list);
 	INIT_LIST_HEAD(&sub->nameseq_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
+	sub->swap = swap;
 	atomic_inc(&topsrv.subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
 		k_init_timer(&sub->timer,
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index d9553683290..3e3e0265146 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -49,6 +49,7 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
  * @timeout: duration of subscription (in ms)
  * @filter: event filtering to be done for subscription
  * @event_cb: routine invoked when a subscription event is detected
+ * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
  * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
@@ -61,6 +62,7 @@ struct subscription {
 	u32 timeout;
 	u32 filter;
 	tipc_subscr_event event_cb;
+	int swap;
 	struct tipc_event evt;
 	struct list_head subscription_list;
 	struct list_head nameseq_list;
-- 
cgit v1.2.3-70-g09d2


From fc5ad582709ce9c7b9ab7b70c1e5b5e2cfc384db Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 19 May 2008 13:29:06 -0700
Subject: tipc: Fix bug in topology server byte swapping routine

This patch fixes TIPC's topology server so that it does byte swapping
correctly when endianness conversion is required.  (Note: This bug only
impacted an application if it issues a subscription request to a
topology server on another node, rather than the server on it's own
node; since the topology server is normally not accessible by off-node
applications, most TIPC applications were not impacted by the bug.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index a62e5d30638..dde23f1e754 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -86,9 +86,7 @@ static struct top_srv topsrv = { 0 };
 
 static u32 htohl(u32 in, int swap)
 {
-	char *c = (char *)&in;
-
-	return swap ? ((c[3] << 3) + (c[2] << 2) + (c[1] << 1) + c[0]) : in;
+	return swap ? (u32)___constant_swab32(in) : in;
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 28353e7fad1d224687220a448950dc552645a50a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 19 May 2008 13:29:47 -0700
Subject: tipc: Consolidate subscriber & subscriber port references

This patch modifies TIPC's network topology service so that it
only requires a single reference table entry per subscriber
connection, rather than two.  This is achieved by letting the
reference to the server port communicating with the subscriber
act as the reference to the subscriber object itself.  (Since
the subscriber cannot exist without its port, and vice versa,
this dual role for the reference is perfectly natural.)  This
consolidation reduces the size of the reference table by 50%
in the default configuration.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c   |   3 +-
 net/tipc/subscr.c | 199 +++++++++++++++++++++++++++++++-----------------------
 net/tipc/subscr.h |   4 +-
 3 files changed, 116 insertions(+), 90 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 6d6aa5a3c24..3d97386af09 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -164,8 +164,7 @@ int tipc_core_start(void)
 	tipc_mode = TIPC_NODE_MODE;
 
 	if ((res = tipc_handler_start()) ||
-	    (res = tipc_ref_table_init(tipc_max_ports + tipc_max_subscriptions,
-				       tipc_random)) ||
+	    (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) ||
 	    (res = tipc_reg_start()) ||
 	    (res = tipc_nametbl_init()) ||
 	    (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) ||
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index dde23f1e754..7c62791eb0c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -38,23 +38,22 @@
 #include "dbg.h"
 #include "subscr.h"
 #include "name_table.h"
+#include "port.h"
 #include "ref.h"
 
 /**
  * struct subscriber - TIPC network topology subscriber
- * @ref: object reference to subscriber object itself
- * @lock: pointer to spinlock controlling access to subscriber object
+ * @port_ref: object reference to server port connecting to subscriber
+ * @lock: pointer to spinlock controlling access to subscriber's server port
  * @subscriber_list: adjacent subscribers in top. server's list of subscribers
  * @subscription_list: list of subscription objects for this subscriber
- * @port_ref: object reference to port used to communicate with subscriber
  */
 
 struct subscriber {
-	u32 ref;
+	u32 port_ref;
 	spinlock_t *lock;
 	struct list_head subscriber_list;
 	struct list_head subscription_list;
-	u32 port_ref;
 };
 
 /**
@@ -91,6 +90,9 @@ static u32 htohl(u32 in, int swap)
 
 /**
  * subscr_send_event - send a message containing a tipc_event to the subscriber
+ *
+ * Note: Must not hold subscriber's server port lock, since tipc_send() will
+ *       try to take the lock if the message is rejected and returned!
  */
 
 static void subscr_send_event(struct subscription *sub,
@@ -110,7 +112,7 @@ static void subscr_send_event(struct subscription *sub,
 	sub->evt.found_upper = htohl(found_upper, sub->swap);
 	sub->evt.port.ref = htohl(port_ref, sub->swap);
 	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_send(sub->owner->port_ref, 1, &msg_sect);
+	tipc_send(sub->server_ref, 1, &msg_sect);
 }
 
 /**
@@ -163,20 +165,18 @@ void tipc_subscr_report_overlap(struct subscription *sub,
 
 static void subscr_timeout(struct subscription *sub)
 {
-	struct subscriber *subscriber;
-	u32 subscriber_ref;
+	struct port *server_port;
 
-	/* Validate subscriber reference (in case subscriber is terminating) */
+	/* Validate server port reference (in case subscriber is terminating) */
 
-	subscriber_ref = sub->owner->ref;
-	subscriber = (struct subscriber *)tipc_ref_lock(subscriber_ref);
-	if (subscriber == NULL)
+	server_port = tipc_port_lock(sub->server_ref);
+	if (server_port == NULL)
 		return;
 
 	/* Validate timeout (in case subscription is being cancelled) */
 
 	if (sub->timeout == TIPC_WAIT_FOREVER) {
-		tipc_ref_unlock(subscriber_ref);
+		tipc_port_unlock(server_port);
 		return;
 	}
 
@@ -184,19 +184,21 @@ static void subscr_timeout(struct subscription *sub)
 
 	tipc_nametbl_unsubscribe(sub);
 
-	/* Notify subscriber of timeout, then unlink subscription */
+	/* Unlink subscription from subscriber */
 
-	subscr_send_event(sub,
-			  sub->evt.s.seq.lower,
-			  sub->evt.s.seq.upper,
-			  TIPC_SUBSCR_TIMEOUT,
-			  0,
-			  0);
 	list_del(&sub->subscription_list);
 
+	/* Release subscriber's server port */
+
+	tipc_port_unlock(server_port);
+
+	/* Notify subscriber of timeout */
+
+	subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
+			  TIPC_SUBSCR_TIMEOUT, 0, 0);
+
 	/* Now destroy subscription */
 
-	tipc_ref_unlock(subscriber_ref);
 	k_term_timer(&sub->timer);
 	kfree(sub);
 	atomic_dec(&topsrv.subscription_count);
@@ -205,7 +207,7 @@ static void subscr_timeout(struct subscription *sub)
 /**
  * subscr_del - delete a subscription within a subscription list
  *
- * Called with subscriber locked.
+ * Called with subscriber port locked.
  */
 
 static void subscr_del(struct subscription *sub)
@@ -219,7 +221,7 @@ static void subscr_del(struct subscription *sub)
 /**
  * subscr_terminate - terminate communication with a subscriber
  *
- * Called with subscriber locked.  Routine must temporarily release this lock
+ * Called with subscriber port locked.  Routine must temporarily release lock
  * to enable subscription timeout routine(s) to finish without deadlocking;
  * the lock is then reclaimed to allow caller to release it upon return.
  * (This should work even in the unlikely event some other thread creates
@@ -229,14 +231,21 @@ static void subscr_del(struct subscription *sub)
 
 static void subscr_terminate(struct subscriber *subscriber)
 {
+	u32 port_ref;
 	struct subscription *sub;
 	struct subscription *sub_temp;
 
 	/* Invalidate subscriber reference */
 
-	tipc_ref_discard(subscriber->ref);
+	port_ref = subscriber->port_ref;
+	subscriber->port_ref = 0;
 	spin_unlock_bh(subscriber->lock);
 
+	/* Sever connection to subscriber */
+
+	tipc_shutdown(port_ref);
+	tipc_deleteport(port_ref);
+
 	/* Destroy any existing subscriptions for subscriber */
 
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
@@ -250,27 +259,25 @@ static void subscr_terminate(struct subscriber *subscriber)
 		subscr_del(sub);
 	}
 
-	/* Sever connection to subscriber */
-
-	tipc_shutdown(subscriber->port_ref);
-	tipc_deleteport(subscriber->port_ref);
-
 	/* Remove subscriber from topology server's subscriber list */
 
 	spin_lock_bh(&topsrv.lock);
 	list_del(&subscriber->subscriber_list);
 	spin_unlock_bh(&topsrv.lock);
 
-	/* Now destroy subscriber */
+	/* Reclaim subscriber lock */
 
 	spin_lock_bh(subscriber->lock);
+
+	/* Now destroy subscriber */
+
 	kfree(subscriber);
 }
 
 /**
  * subscr_cancel - handle subscription cancellation request
  *
- * Called with subscriber locked.  Routine must temporarily release this lock
+ * Called with subscriber port locked.  Routine must temporarily release lock
  * to enable the subscription timeout routine to finish without deadlocking;
  * the lock is then reclaimed to allow caller to release it upon return.
  *
@@ -313,11 +320,11 @@ static void subscr_cancel(struct tipc_subscr *s,
 /**
  * subscr_subscribe - create subscription for subscriber
  *
- * Called with subscriber locked
+ * Called with subscriber port locked.
  */
 
-static void subscr_subscribe(struct tipc_subscr *s,
-			     struct subscriber *subscriber)
+static struct subscription *subscr_subscribe(struct tipc_subscr *s,
+					     struct subscriber *subscriber)
 {
 	struct subscription *sub;
 	int swap;
@@ -331,7 +338,7 @@ static void subscr_subscribe(struct tipc_subscr *s,
 	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
 		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
 		subscr_cancel(s, subscriber);
-		return;
+		return NULL;
 	}
 
 	/* Refuse subscription if global limit exceeded */
@@ -340,16 +347,16 @@ static void subscr_subscribe(struct tipc_subscr *s,
 		warn("Subscription rejected, subscription limit reached (%u)\n",
 		     tipc_max_subscriptions);
 		subscr_terminate(subscriber);
-		return;
+		return NULL;
 	}
 
 	/* Allocate subscription object */
 
-	sub = kzalloc(sizeof(*sub), GFP_ATOMIC);
+	sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
 	if (!sub) {
 		warn("Subscription rejected, no memory\n");
 		subscr_terminate(subscriber);
-		return;
+		return NULL;
 	}
 
 	/* Initialize subscription object */
@@ -365,40 +372,41 @@ static void subscr_subscribe(struct tipc_subscr *s,
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
 		subscr_terminate(subscriber);
-		return;
+		return NULL;
 	}
 	sub->event_cb = subscr_send_event;
-	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
-	INIT_LIST_HEAD(&sub->subscription_list);
 	INIT_LIST_HEAD(&sub->nameseq_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
+	sub->server_ref = subscriber->port_ref;
 	sub->swap = swap;
+	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
 	atomic_inc(&topsrv.subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
 		k_init_timer(&sub->timer,
 			     (Handler)subscr_timeout, (unsigned long)sub);
 		k_start_timer(&sub->timer, sub->timeout);
 	}
-	sub->owner = subscriber;
-	tipc_nametbl_subscribe(sub);
+
+	return sub;
 }
 
 /**
  * subscr_conn_shutdown_event - handle termination request from subscriber
+ *
+ * Called with subscriber's server port unlocked.
  */
 
 static void subscr_conn_shutdown_event(void *usr_handle,
-				       u32 portref,
+				       u32 port_ref,
 				       struct sk_buff **buf,
 				       unsigned char const *data,
 				       unsigned int size,
 				       int reason)
 {
-	struct subscriber *subscriber;
+	struct subscriber *subscriber = usr_handle;
 	spinlock_t *subscriber_lock;
 
-	subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle);
-	if (subscriber == NULL)
+	if (tipc_port_lock(port_ref) == NULL)
 		return;
 
 	subscriber_lock = subscriber->lock;
@@ -408,6 +416,8 @@ static void subscr_conn_shutdown_event(void *usr_handle,
 
 /**
  * subscr_conn_msg_event - handle new subscription request from subscriber
+ *
+ * Called with subscriber's server port unlocked.
  */
 
 static void subscr_conn_msg_event(void *usr_handle,
@@ -416,20 +426,46 @@ static void subscr_conn_msg_event(void *usr_handle,
 				  const unchar *data,
 				  u32 size)
 {
-	struct subscriber *subscriber;
+	struct subscriber *subscriber = usr_handle;
 	spinlock_t *subscriber_lock;
+	struct subscription *sub;
+
+	/*
+	 * Lock subscriber's server port (& make a local copy of lock pointer,
+	 * in case subscriber is deleted while processing subscription request)
+	 */
 
-	subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle);
-	if (subscriber == NULL)
+	if (tipc_port_lock(port_ref) == NULL)
 		return;
 
 	subscriber_lock = subscriber->lock;
-	if (size != sizeof(struct tipc_subscr))
-		subscr_terminate(subscriber);
-	else
-		subscr_subscribe((struct tipc_subscr *)data, subscriber);
 
-	spin_unlock_bh(subscriber_lock);
+	if (size != sizeof(struct tipc_subscr)) {
+		subscr_terminate(subscriber);
+		spin_unlock_bh(subscriber_lock);
+	} else {
+		sub = subscr_subscribe((struct tipc_subscr *)data, subscriber);
+		spin_unlock_bh(subscriber_lock);
+		if (sub != NULL) {
+
+			/*
+			 * We must release the server port lock before adding a
+			 * subscription to the name table since TIPC needs to be
+			 * able to (re)acquire the port lock if an event message
+			 * issued by the subscription process is rejected and
+			 * returned.  The subscription cannot be deleted while
+			 * it is being added to the name table because:
+			 * a) the single-threading of the native API port code
+			 *    ensures the subscription cannot be cancelled and
+			 *    the subscriber connection cannot be broken, and
+			 * b) the name table lock ensures the subscription
+			 *    timeout code cannot delete the subscription,
+			 * so the subscription object is still protected.
+			 */
+
+			tipc_nametbl_subscribe(sub);
+		}
+	}
 }
 
 /**
@@ -445,16 +481,10 @@ static void subscr_named_msg_event(void *usr_handle,
 				   struct tipc_portid const *orig,
 				   struct tipc_name_seq const *dest)
 {
-	struct subscriber *subscriber;
-	struct iovec msg_sect = {NULL, 0};
-	spinlock_t *subscriber_lock;
+	static struct iovec msg_sect = {NULL, 0};
 
-	dbg("subscr_named_msg_event: orig = %x own = %x,\n",
-	    orig->node, tipc_own_addr);
-	if (size && (size != sizeof(struct tipc_subscr))) {
-		warn("Subscriber rejected, invalid subscription size\n");
-		return;
-	}
+	struct subscriber *subscriber;
+	u32 server_port_ref;
 
 	/* Create subscriber object */
 
@@ -465,18 +495,11 @@ static void subscr_named_msg_event(void *usr_handle,
 	}
 	INIT_LIST_HEAD(&subscriber->subscription_list);
 	INIT_LIST_HEAD(&subscriber->subscriber_list);
-	subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock);
-	if (subscriber->ref == 0) {
-		warn("Subscriber rejected, reference table exhausted\n");
-		kfree(subscriber);
-		return;
-	}
-	spin_unlock_bh(subscriber->lock);
 
-	/* Establish a connection to subscriber */
+	/* Create server port & establish connection to subscriber */
 
 	tipc_createport(topsrv.user_ref,
-			(void *)(unsigned long)subscriber->ref,
+			subscriber,
 			importance,
 			NULL,
 			NULL,
@@ -488,32 +511,36 @@ static void subscr_named_msg_event(void *usr_handle,
 			&subscriber->port_ref);
 	if (subscriber->port_ref == 0) {
 		warn("Subscriber rejected, unable to create port\n");
-		tipc_ref_discard(subscriber->ref);
 		kfree(subscriber);
 		return;
 	}
 	tipc_connect2port(subscriber->port_ref, orig);
 
+	/* Lock server port (& save lock address for future use) */
+
+	subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock;
 
 	/* Add subscriber to topology server's subscriber list */
 
-	tipc_ref_lock(subscriber->ref);
 	spin_lock_bh(&topsrv.lock);
 	list_add(&subscriber->subscriber_list, &topsrv.subscriber_list);
 	spin_unlock_bh(&topsrv.lock);
 
-	/*
-	 * Subscribe now if message contains a subscription,
-	 * otherwise send an empty response to complete connection handshaking
-	 */
+	/* Unlock server port */
 
-	subscriber_lock = subscriber->lock;
-	if (size)
-		subscr_subscribe((struct tipc_subscr *)data, subscriber);
-	else
-		tipc_send(subscriber->port_ref, 1, &msg_sect);
+	server_port_ref = subscriber->port_ref;
+	spin_unlock_bh(subscriber->lock);
 
-	spin_unlock_bh(subscriber_lock);
+	/* Send an ACK- to complete connection handshaking */
+
+	tipc_send(server_port_ref, 1, &msg_sect);
+
+	/* Handle optional subscription request */
+
+	if (size != 0) {
+		subscr_conn_msg_event(subscriber, server_port_ref,
+				      buf, data, size);
+	}
 }
 
 int tipc_subscr_start(void)
@@ -572,8 +599,8 @@ void tipc_subscr_stop(void)
 		list_for_each_entry_safe(subscriber, subscriber_temp,
 					 &topsrv.subscriber_list,
 					 subscriber_list) {
-			tipc_ref_lock(subscriber->ref);
 			subscriber_lock = subscriber->lock;
+			spin_lock_bh(subscriber_lock);
 			subscr_terminate(subscriber);
 			spin_unlock_bh(subscriber_lock);
 		}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 3e3e0265146..b9af687b636 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -49,12 +49,12 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
  * @timeout: duration of subscription (in ms)
  * @filter: event filtering to be done for subscription
  * @event_cb: routine invoked when a subscription event is detected
+ * @server_ref: object reference of server port associated with subscription
  * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
  * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
  * @timer_ref: reference to timer governing subscription duration (may be NULL)
- * @owner: pointer to subscriber object associated with this subscription
  */
 
 struct subscription {
@@ -62,12 +62,12 @@ struct subscription {
 	u32 timeout;
 	u32 filter;
 	tipc_subscr_event event_cb;
+	u32 server_ref;
 	int swap;
 	struct tipc_event evt;
 	struct list_head subscription_list;
 	struct list_head nameseq_list;
 	struct timer_list timer;
-	struct subscriber *owner;
 };
 
 int tipc_subscr_overlap(struct subscription * sub,
-- 
cgit v1.2.3-70-g09d2


From 5b06c85c3b96fa8db632f1ee94f99a2bd0215f3a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 19 May 2008 13:30:13 -0700
Subject: tipc: Cosmetic cleanup of topology service code

This patch contains a set of cosmetic changes to TIPC's network
topology service subsystem, including:

- updates to comments (including copyright dates)
- re-ordering structure fields to group them more logically
- removal of optional debugging code that is no longer required
- minor changes to whitespace to conform to Linux coding conventions

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c |  8 +++-----
 net/tipc/subscr.h | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 7c62791eb0c..0326d3060bc 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,8 +1,8 @@
 /*
- * net/tipc/subscr.c: TIPC subscription service
+ * net/tipc/subscr.c: TIPC network topology service
  *
  * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,10 @@
 
 #include "core.h"
 #include "dbg.h"
-#include "subscr.h"
 #include "name_table.h"
 #include "port.h"
 #include "ref.h"
+#include "subscr.h"
 
 /**
  * struct subscriber - TIPC network topology subscriber
@@ -149,8 +149,6 @@ void tipc_subscr_report_overlap(struct subscription *sub,
 				u32 node,
 				int must)
 {
-	dbg("Rep overlap %u:%u,%u<->%u,%u\n", sub->seq.type, sub->seq.lower,
-	    sub->seq.upper, found_lower, found_upper);
 	if (!tipc_subscr_overlap(sub, found_lower, found_upper))
 		return;
 	if (!must && !(sub->filter & TIPC_SUB_PORTS))
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index b9af687b636..45d89bf4d20 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,8 +1,8 @@
 /*
- * net/tipc/subscr.h: Include file for TIPC subscription service
+ * net/tipc/subscr.h: Include file for TIPC network topology service
  *
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,12 +49,12 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
  * @timeout: duration of subscription (in ms)
  * @filter: event filtering to be done for subscription
  * @event_cb: routine invoked when a subscription event is detected
+ * @timer: timer governing subscription duration (optional)
+ * @nameseq_list: adjacent subscriptions in name sequence's subscription list
+ * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @server_ref: object reference of server port associated with subscription
  * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
- * @subscription_list: adjacent subscriptions in subscriber's subscription list
- * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @timer_ref: reference to timer governing subscription duration (may be NULL)
  */
 
 struct subscription {
@@ -62,19 +62,19 @@ struct subscription {
 	u32 timeout;
 	u32 filter;
 	tipc_subscr_event event_cb;
+	struct timer_list timer;
+	struct list_head nameseq_list;
+	struct list_head subscription_list;
 	u32 server_ref;
 	int swap;
 	struct tipc_event evt;
-	struct list_head subscription_list;
-	struct list_head nameseq_list;
-	struct timer_list timer;
 };
 
-int tipc_subscr_overlap(struct subscription * sub,
+int tipc_subscr_overlap(struct subscription *sub,
 			u32 found_lower,
 			u32 found_upper);
 
-void tipc_subscr_report_overlap(struct subscription * sub,
+void tipc_subscr_report_overlap(struct subscription *sub,
 				u32 found_lower,
 				u32 found_upper,
 				u32 event,
-- 
cgit v1.2.3-70-g09d2


From d62c612ef8a66be534a3ada598cfa28d40cd0b3c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:45:33 -0700
Subject: netns: Introduce sysctl root for read-only net sysctls.

This one stores all ctl-heads in one list and restricts the
permissions not give write access to non-init net namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  3 +++
 net/sysctl_net.c            | 30 ++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index aa540e6be50..8df751b3be5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -201,8 +201,11 @@ extern void unregister_pernet_gen_device(int id, struct pernet_operations *);
 struct ctl_path;
 struct ctl_table;
 struct ctl_table_header;
+
 extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
 	const struct ctl_path *path, struct ctl_table *table);
+extern struct ctl_table_header *register_net_sysctl_rotable(
+	const struct ctl_path *path, struct ctl_table *table);
 extern void unregister_net_sysctl_table(struct ctl_table_header *header);
 
 #endif /* __NET_NET_NAMESPACE_H */
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index b4f0525f91a..d8e79162724 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -40,6 +40,27 @@ static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
 };
 
+static LIST_HEAD(net_sysctl_ro_tables);
+static struct list_head *net_ctl_ro_header_lookup(struct ctl_table_root *root,
+		struct nsproxy *namespaces)
+{
+	return &net_sysctl_ro_tables;
+}
+
+static int net_ctl_ro_header_perms(struct ctl_table_root *root,
+		struct nsproxy *namespaces, struct ctl_table *table)
+{
+	if (namespaces->net_ns == &init_net)
+		return table->mode;
+	else
+		return table->mode & ~0222;
+}
+
+static struct ctl_table_root net_sysctl_ro_root = {
+	.lookup = net_ctl_ro_header_lookup,
+	.permissions = net_ctl_ro_header_perms,
+};
+
 static int sysctl_net_init(struct net *net)
 {
 	INIT_LIST_HEAD(&net->sysctl_table_headers);
@@ -64,6 +85,7 @@ static __init int sysctl_init(void)
 	if (ret)
 		goto out;
 	register_sysctl_root(&net_sysctl_root);
+	register_sysctl_root(&net_sysctl_ro_root);
 out:
 	return ret;
 }
@@ -80,6 +102,14 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net,
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl_table);
 
+struct ctl_table_header *register_net_sysctl_rotable(const
+		struct ctl_path *path, struct ctl_table *table)
+{
+	return __register_sysctl_paths(&net_sysctl_ro_root,
+			&init_nsproxy, path, table);
+}
+EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
+
 void unregister_net_sysctl_table(struct ctl_table_header *header)
 {
 	unregister_sysctl_table(header);
-- 
cgit v1.2.3-70-g09d2


From d5a4502e9efa534212484fd691339f6469cf95ff Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:49:52 -0700
Subject: netns: Register net/core/ sysctls at read-only root.

Most of the net/core/xxx sysctls are read-only now, but this
goal is achieved with excessive memory consumption in each
namespace - the whole table is cloned and most of the entries
in it are ~= 0222.

Split it into two parts and register (the largest) one at the
read-only root.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5fc80105724..a570e2af22c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -124,14 +124,6 @@ static struct ctl_table net_core_table[] = {
 	},
 #endif /* CONFIG_XFRM */
 #endif /* CONFIG_NET */
-	{
-		.ctl_name	= NET_CORE_SOMAXCONN,
-		.procname	= "somaxconn",
-		.data		= &init_net.core.sysctl_somaxconn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
 	{
 		.ctl_name	= NET_CORE_BUDGET,
 		.procname	= "netdev_budget",
@@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = {
 	{ .ctl_name = 0 }
 };
 
+static struct ctl_table netns_core_table[] = {
+	{
+		.ctl_name	= NET_CORE_SOMAXCONN,
+		.procname	= "somaxconn",
+		.data		= &init_net.core.sysctl_somaxconn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
 static __net_initdata struct ctl_path net_core_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "core", .ctl_name = NET_CORE, },
@@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = {
 
 static __net_init int sysctl_core_net_init(struct net *net)
 {
-	struct ctl_table *tbl, *tmp;
+	struct ctl_table *tbl;
 
 	net->core.sysctl_somaxconn = SOMAXCONN;
 
-	tbl = net_core_table;
+	tbl = netns_core_table;
 	if (net != &init_net) {
-		tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
+		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
 		if (tbl == NULL)
 			goto err_dup;
 
-		for (tmp = tbl; tmp->procname; tmp++) {
-			if (tmp->data >= (void *)&init_net &&
-					tmp->data < (void *)(&init_net + 1))
-				tmp->data += (char *)net - (char *)&init_net;
-			else
-				tmp->mode &= ~0222;
-		}
+		tbl[0].data = &net->core.sysctl_somaxconn;
 	}
 
 	net->core.sysctl_hdr = register_net_sysctl_table(net,
@@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
 	return 0;
 
 err_reg:
-	if (tbl != net_core_table)
+	if (tbl != netns_core_table)
 		kfree(tbl);
 err_dup:
 	return -ENOMEM;
@@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 
 	tbl = net->core.sysctl_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->core.sysctl_hdr);
-	BUG_ON(tbl == net_core_table);
+	BUG_ON(tbl == netns_core_table);
 	kfree(tbl);
 }
 
@@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
 
 static __init int sysctl_core_init(void)
 {
+	register_net_sysctl_rotable(net_core_path, net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 0a64b4b811025ce0386ad84d81504e4ff7985856 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:51:29 -0700
Subject: inet: Rename fragmentation sysctl-related functions/variables.

The fragments sysctls also contains some, that are to be
visible, but read-only in net namespaces.

The naming in net/core/sysctl_net_core.c is - tables, that are
to be registered in namespaces have a "ns" word in their names.
So rename ones in ipv4/ip_fragment.c and ipv6/reassembly.c to
fit this.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 18 +++++++++---------
 net/ipv6/reassembly.c  | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd6ce6ac635..7f102eeb618 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -598,7 +598,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 #ifdef CONFIG_SYSCTL
 static int zero;
 
-static struct ctl_table ip4_frags_ctl_table[] = {
+static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
@@ -644,14 +644,14 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 	{ }
 };
 
-static int ip4_frags_ctl_register(struct net *net)
+static int ip4_frags_ns_ctl_register(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
-	table = ip4_frags_ctl_table;
+	table = ip4_frags_ns_ctl_table;
 	if (net != &init_net) {
-		table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
+		table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
 
@@ -676,7 +676,7 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void ip4_frags_ctl_unregister(struct net *net)
+static void ip4_frags_ns_ctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -685,12 +685,12 @@ static void ip4_frags_ctl_unregister(struct net *net)
 	kfree(table);
 }
 #else
-static inline int ip4_frags_ctl_register(struct net *net)
+static inline int ip4_frags_ns_ctl_register(struct net *net)
 {
 	return 0;
 }
 
-static inline void ip4_frags_ctl_unregister(struct net *net)
+static inline void ip4_frags_ns_ctl_unregister(struct net *net)
 {
 }
 #endif
@@ -714,12 +714,12 @@ static int ipv4_frags_init_net(struct net *net)
 
 	inet_frags_init_net(&net->ipv4.frags);
 
-	return ip4_frags_ctl_register(net);
+	return ip4_frags_ns_ctl_register(net);
 }
 
 static void ipv4_frags_exit_net(struct net *net)
 {
-	ip4_frags_ctl_unregister(net);
+	ip4_frags_ns_ctl_unregister(net);
 	inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 798cabc7535..7e008de8711 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -632,7 +632,7 @@ static struct inet6_protocol frag_protocol =
 };
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table ip6_frags_ctl_table[] = {
+static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
@@ -670,14 +670,14 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 	{ }
 };
 
-static int ip6_frags_sysctl_register(struct net *net)
+static int ip6_frags_ns_sysctl_register(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
-	table = ip6_frags_ctl_table;
+	table = ip6_frags_ns_ctl_table;
 	if (net != &init_net) {
-		table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
+		table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
 
@@ -701,7 +701,7 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void ip6_frags_sysctl_unregister(struct net *net)
+static void ip6_frags_ns_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
@@ -710,12 +710,12 @@ static void ip6_frags_sysctl_unregister(struct net *net)
 	kfree(table);
 }
 #else
-static inline int ip6_frags_sysctl_register(struct net *net)
+static inline int ip6_frags_ns_sysctl_register(struct net *net)
 {
 	return 0;
 }
 
-static inline void ip6_frags_sysctl_unregister(struct net *net)
+static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
 {
 }
 #endif
@@ -728,12 +728,12 @@ static int ipv6_frags_init_net(struct net *net)
 
 	inet_frags_init_net(&net->ipv6.frags);
 
-	return ip6_frags_sysctl_register(net);
+	return ip6_frags_ns_sysctl_register(net);
 }
 
 static void ipv6_frags_exit_net(struct net *net)
 {
-	ip6_frags_sysctl_unregister(net);
+	ip6_frags_ns_sysctl_unregister(net);
 	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 0002c630c4ee7a3c6b1d87e34bfd6ce9694b49be Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:52:28 -0700
Subject: ipv6: In fragmentation code, handle error returned from
 register_pernet_subsys.

The error code is ignored now, but ipv6 is a module and one can
be loaded under memory pressure, so the error may occur (in theory).

Besides, I'm going to handle error returned from registering a
read-only part of the table, so ignoring this one, while handing
the other one would look strange.

(However, this possibility of error is rather small, so I'm not
 sure whether this is a candidate for current net tree).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7e008de8711..130d6f6b6a6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -750,7 +750,9 @@ int __init ipv6_frag_init(void)
 	if (ret)
 		goto out;
 
-	register_pernet_subsys(&ip6_frags_ops);
+	ret = register_pernet_subsys(&ip6_frags_ops);
+	if (ret)
+		goto err_pernet;
 
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
@@ -763,6 +765,10 @@ int __init ipv6_frag_init(void)
 	inet_frags_init(&ip6_frags);
 out:
 	return ret;
+
+err_pernet:
+	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	goto out;
 }
 
 void ipv6_frag_exit(void)
-- 
cgit v1.2.3-70-g09d2


From 7d291ebb834278e30c211b26fb7076adcb636ad9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:53:02 -0700
Subject: inet: Register fragmentation some ctls at read-only root.

Parts of fragments-related sysctls are read-only, but this is
done by cloning all the tables and dropping write-bits from
mode. Do the same but with read-only root.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 16 ++++++++++++++--
 net/ipv6/reassembly.c  | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7f102eeb618..be1cb89a8d5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -624,6 +624,10 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies
 	},
+	{ }
+};
+
+static struct ctl_table ip4_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
 		.procname	= "ipfrag_secret_interval",
@@ -658,8 +662,6 @@ static int ip4_frags_ns_ctl_register(struct net *net)
 		table[0].data = &net->ipv4.frags.high_thresh;
 		table[1].data = &net->ipv4.frags.low_thresh;
 		table[2].data = &net->ipv4.frags.timeout;
-		table[3].mode &= ~0222;
-		table[4].mode &= ~0222;
 	}
 
 	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
@@ -684,6 +686,11 @@ static void ip4_frags_ns_ctl_unregister(struct net *net)
 	unregister_net_sysctl_table(net->ipv4.frags_hdr);
 	kfree(table);
 }
+
+static void ip4_frags_ctl_register(void)
+{
+	register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+}
 #else
 static inline int ip4_frags_ns_ctl_register(struct net *net)
 {
@@ -693,6 +700,10 @@ static inline int ip4_frags_ns_ctl_register(struct net *net)
 static inline void ip4_frags_ns_ctl_unregister(struct net *net)
 {
 }
+
+static inline void ip4_frags_ctl_register(void)
+{
+}
 #endif
 
 static int ipv4_frags_init_net(struct net *net)
@@ -730,6 +741,7 @@ static struct pernet_operations ip4_frags_ops = {
 
 void __init ipfrag_init(void)
 {
+	ip4_frags_ctl_register();
 	register_pernet_subsys(&ip4_frags_ops);
 	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 130d6f6b6a6..9391a6949b9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -658,6 +658,10 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies,
 	},
+	{ }
+};
+
+static struct ctl_table ip6_frags_ctl_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
@@ -684,7 +688,6 @@ static int ip6_frags_ns_sysctl_register(struct net *net)
 		table[0].data = &net->ipv6.frags.high_thresh;
 		table[1].data = &net->ipv6.frags.low_thresh;
 		table[2].data = &net->ipv6.frags.timeout;
-		table[3].mode &= ~0222;
 	}
 
 	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
@@ -709,6 +712,20 @@ static void ip6_frags_ns_sysctl_unregister(struct net *net)
 	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
 	kfree(table);
 }
+
+static struct ctl_table_header *ip6_ctl_header;
+
+static int ip6_frags_sysctl_register(void)
+{
+	ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
+			ip6_frags_ctl_table);
+	return ip6_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void ip6_frags_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(ip6_ctl_header);
+}
 #else
 static inline int ip6_frags_ns_sysctl_register(struct net *net)
 {
@@ -718,6 +735,15 @@ static inline int ip6_frags_ns_sysctl_register(struct net *net)
 static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
 {
 }
+
+static inline int ip6_frags_sysctl_register(void)
+{
+	return 0;
+}
+
+static inline void ip6_frags_sysctl_unregister(void)
+{
+}
 #endif
 
 static int ipv6_frags_init_net(struct net *net)
@@ -750,6 +776,10 @@ int __init ipv6_frag_init(void)
 	if (ret)
 		goto out;
 
+	ret = ip6_frags_sysctl_register();
+	if (ret)
+		goto err_sysctl;
+
 	ret = register_pernet_subsys(&ip6_frags_ops);
 	if (ret)
 		goto err_pernet;
@@ -767,6 +797,8 @@ out:
 	return ret;
 
 err_pernet:
+	ip6_frags_sysctl_unregister();
+err_sysctl:
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 	goto out;
 }
@@ -774,6 +806,7 @@ err_pernet:
 void ipv6_frag_exit(void)
 {
 	inet_frags_fini(&ip6_frags);
+	ip6_frags_sysctl_unregister();
 	unregister_pernet_subsys(&ip6_frags_ops);
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 }
-- 
cgit v1.2.3-70-g09d2


From 34ac2573e88c4f80fc5e219d8012ea383a788803 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 19 May 2008 13:53:30 -0700
Subject: ipv6: Register some net/ipv6/ core sysctls at read-only root.

There are some sysctls left to be switched to read-only,
but they are all in ipv6, so complete with them.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3804dcbbfab..5c99274558b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -37,6 +37,10 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_MLD_MAX_MSF,
 		.procname	= "mld_max_msf",
@@ -80,12 +84,6 @@ static int ipv6_sysctl_net_init(struct net *net)
 
 	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
 
-	/* We don't want this value to be per namespace, it should be global
-	   to all namespaces, so make it read-only when we are not in the
-	   init network namespace */
-	if (net != &init_net)
-		ipv6_table[3].mode = 0444;
-
 	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
 							   ipv6_table);
 	if (!net->ipv6.sysctl.table)
@@ -126,12 +124,29 @@ static struct pernet_operations ipv6_sysctl_net_ops = {
 	.exit = ipv6_sysctl_net_exit,
 };
 
+static struct ctl_table_header *ip6_header;
+
 int ipv6_sysctl_register(void)
 {
-	return register_pernet_subsys(&ipv6_sysctl_net_ops);
+	int err = -ENOMEM;;
+
+	ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table);
+	if (ip6_header == NULL)
+		goto out;
+
+	err = register_pernet_subsys(&ipv6_sysctl_net_ops);
+	if (err)
+		goto err_pernet;
+out:
+	return err;
+
+err_pernet:
+	unregister_net_sysctl_table(ip6_header);
+	goto out;
 }
 
 void ipv6_sysctl_unregister(void)
 {
+	unregister_net_sysctl_table(ip6_header);
 	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 }
-- 
cgit v1.2.3-70-g09d2


From 96e74088f1da4d9a53735a4a57a4f984f86b75c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:12:46 -0700
Subject: net: The dev->get_stats pointer is not NULL nowadays.

And so does the pointer is returns, but sysfs and netlinks still
check for both cases.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c |  9 ++++-----
 net/core/rtnetlink.c | 20 ++++++++------------
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 90e2177af08..dccd737ea2e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d,
 			offset % sizeof(unsigned long) != 0);
 
 	read_lock(&dev_base_lock);
-	if (dev_isalive(dev) && dev->get_stats &&
-	    (stats = (*dev->get_stats)(dev)))
+	if (dev_isalive(dev)) {
+		stats = dev->get_stats(dev);
 		ret = sprintf(buf, fmt_ulong,
 			      *(unsigned long *)(((u8 *) stats) + offset));
-
+	}
 	read_unlock(&dev_base_lock);
 	return ret;
 }
@@ -457,8 +457,7 @@ int netdev_register_kobject(struct net_device *net)
 	strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
 
 #ifdef CONFIG_SYSFS
-	if (net->get_stats)
-		*groups++ = &netstat_group;
+	*groups++ = &netstat_group;
 
 #ifdef CONFIG_WIRELESS_EXT
 	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf857c4dc7b..ca32ddb8ad1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -606,6 +606,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
+	struct net_device_stats *stats;
+	struct nlattr *attr;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -652,19 +654,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
 	}
 
-	if (dev->get_stats) {
-		struct net_device_stats *stats = dev->get_stats(dev);
-		if (stats) {
-			struct nlattr *attr;
+	attr = nla_reserve(skb, IFLA_STATS,
+			sizeof(struct rtnl_link_stats));
+	if (attr == NULL)
+		goto nla_put_failure;
 
-			attr = nla_reserve(skb, IFLA_STATS,
-					   sizeof(struct rtnl_link_stats));
-			if (attr == NULL)
-				goto nla_put_failure;
-
-			copy_rtnl_link_stats(nla_data(attr), stats);
-		}
-	}
+	stats = dev->get_stats(dev);
+	copy_rtnl_link_stats(nla_data(attr), stats);
 
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
-- 
cgit v1.2.3-70-g09d2


From a339f1c881fdb8092ef9b118610307e10e885fc8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:13:47 -0700
Subject: bridge: Use on-device stats instead of private ones.

Even though bridges require 6 fields from struct net_device_stats,
the on-device stats are always there, so we may just use them.

The br_dev_get_stats is no longer required after this.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 11 ++---------
 net/bridge/br_forward.c |  2 +-
 net/bridge/br_input.c   | 10 +++++-----
 net/bridge/br_private.h |  1 -
 4 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index bf7787395fe..626c7795ae3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -21,12 +21,6 @@
 #include <asm/uaccess.h>
 #include "br_private.h"
 
-static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
-{
-	struct net_bridge *br = netdev_priv(dev);
-	return &br->statistics;
-}
-
 /* net device transmit always called with no BH (preempt_disabled) */
 int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -34,8 +28,8 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
 
-	br->statistics.tx_packets++;
-	br->statistics.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
 
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
@@ -161,7 +155,6 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->do_ioctl = br_dev_ioctl;
-	dev->get_stats = br_dev_get_stats;
 	dev->hard_start_xmit = br_dev_xmit;
 	dev->open = br_dev_open;
 	dev->set_multicast_list = br_dev_set_multicast_list;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index bdd7c35c3c7..a4711674b3d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -115,7 +115,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 				struct sk_buff *skb2;
 
 				if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
-					br->statistics.tx_dropped++;
+					br->dev->stats.tx_dropped++;
 					kfree_skb(skb);
 					return;
 				}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 255c00f60ce..fa0f5711a99 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -24,13 +24,13 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
 static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
 {
-	struct net_device *indev;
+	struct net_device *indev, *brdev = br->dev;
 
-	br->statistics.rx_packets++;
-	br->statistics.rx_bytes += skb->len;
+	brdev->stats.rx_packets++;
+	brdev->stats.rx_bytes += skb->len;
 
 	indev = skb->dev;
-	skb->dev = br->dev;
+	skb->dev = brdev;
 
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
 		netif_receive_skb);
@@ -64,7 +64,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	dst = NULL;
 
 	if (is_multicast_ether_addr(dest)) {
-		br->statistics.multicast++;
+		br->dev->stats.multicast++;
 		skb2 = skb;
 	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
 		skb2 = skb;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c11b554fd10..0243cb489ed 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -90,7 +90,6 @@ struct net_bridge
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
-	struct net_device_stats		statistics;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	struct list_head		age_list;
-- 
cgit v1.2.3-70-g09d2


From addd68eb6fe6f10017974947c90cc85ab2848cf2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:14:22 -0700
Subject: ipgre: Use on-device stats instead of private ones.

Just switch from tunnel->stat to tunnel->dev->stats. The ip_tunnel->stat
member itself will be removed after I fix its other users (very soon).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406d..eede36e5570 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -617,6 +617,8 @@ static int ipgre_rcv(struct sk_buff *skb)
 	read_lock(&ipgre_lock);
 	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
 					iph->saddr, iph->daddr, key)) != NULL) {
+		struct net_device_stats *stats = &tunnel->dev->stats;
+
 		secpath_reset(skb);
 
 		skb->protocol = *(__be16*)(h + 2);
@@ -641,28 +643,28 @@ static int ipgre_rcv(struct sk_buff *skb)
 			/* Looped back packet, drop it! */
 			if (skb->rtable->fl.iif == 0)
 				goto drop;
-			tunnel->stat.multicast++;
+			stats->multicast++;
 			skb->pkt_type = PACKET_BROADCAST;
 		}
 #endif
 
 		if (((flags&GRE_CSUM) && csum) ||
 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
-			tunnel->stat.rx_crc_errors++;
-			tunnel->stat.rx_errors++;
+			stats->rx_crc_errors++;
+			stats->rx_errors++;
 			goto drop;
 		}
 		if (tunnel->parms.i_flags&GRE_SEQ) {
 			if (!(flags&GRE_SEQ) ||
 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
-				tunnel->stat.rx_fifo_errors++;
-				tunnel->stat.rx_errors++;
+				stats->rx_fifo_errors++;
+				stats->rx_errors++;
 				goto drop;
 			}
 			tunnel->i_seqno = seqno + 1;
 		}
-		tunnel->stat.rx_packets++;
-		tunnel->stat.rx_bytes += skb->len;
+		stats->rx_packets++;
+		stats->rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -684,7 +686,7 @@ drop_nolock:
 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct net_device_stats *stats = &tunnel->stat;
+	struct net_device_stats *stats = &tunnel->dev->stats;
 	struct iphdr  *old_iph = ip_hdr(skb);
 	struct iphdr  *tiph;
 	u8     tos;
@@ -698,7 +700,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	int    mtu;
 
 	if (tunnel->recursion++) {
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -714,7 +716,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* NBMA tunnel */
 
 		if (skb->dst == NULL) {
-			tunnel->stat.tx_fifo_errors++;
+			stats->tx_fifo_errors++;
 			goto tx_error;
 		}
 
@@ -765,7 +767,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 						.tos = RT_TOS(tos) } },
 				    .proto = IPPROTO_GRE };
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tunnel->stat.tx_carrier_errors++;
+			stats->tx_carrier_errors++;
 			goto tx_error;
 		}
 	}
@@ -773,7 +775,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -1098,11 +1100,6 @@ done:
 	return err;
 }
 
-static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
-{
-	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1228,7 +1225,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 	dev->uninit		= ipgre_tunnel_uninit;
 	dev->destructor 	= free_netdev;
 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
-	dev->get_stats		= ipgre_tunnel_get_stats;
 	dev->do_ioctl		= ipgre_tunnel_ioctl;
 	dev->change_mtu		= ipgre_tunnel_change_mtu;
 
-- 
cgit v1.2.3-70-g09d2


From 50f59cea075875d84018a5fc62cf2f5e6173a919 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:15:16 -0700
Subject: ipip: Use on-device stats instead of private ones.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8..26c85c23ca4 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -496,8 +496,8 @@ static int ipip_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
-		tunnel->stat.rx_packets++;
-		tunnel->stat.rx_bytes += skb->len;
+		tunnel->dev->stats.rx_packets++;
+		tunnel->dev->stats.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -520,7 +520,7 @@ static int ipip_rcv(struct sk_buff *skb)
 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct net_device_stats *stats = &tunnel->stat;
+	struct net_device_stats *stats = &tunnel->dev->stats;
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	u8     tos = tunnel->parms.iph.tos;
 	__be16 df = tiph->frag_off;
@@ -533,7 +533,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	int    mtu;
 
 	if (tunnel->recursion++) {
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -546,7 +546,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!dst) {
 		/* NBMA tunnel */
 		if ((rt = skb->rtable) == NULL) {
-			tunnel->stat.tx_fifo_errors++;
+			stats->tx_fifo_errors++;
 			goto tx_error;
 		}
 		if ((dst = rt->rt_gateway) == 0)
@@ -561,7 +561,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 						.tos = RT_TOS(tos) } },
 				    .proto = IPPROTO_IPIP };
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tunnel->stat.tx_carrier_errors++;
+			stats->tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
 	}
@@ -569,7 +569,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -579,7 +579,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 
 	if (mtu < 68) {
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		ip_rt_put(rt);
 		goto tx_error;
 	}
@@ -813,11 +813,6 @@ done:
 	return err;
 }
 
-static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
-{
-	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -830,7 +825,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
 {
 	dev->uninit		= ipip_tunnel_uninit;
 	dev->hard_start_xmit	= ipip_tunnel_xmit;
-	dev->get_stats		= ipip_tunnel_get_stats;
 	dev->do_ioctl		= ipip_tunnel_ioctl;
 	dev->change_mtu		= ipip_tunnel_change_mtu;
 	dev->destructor		= free_netdev;
-- 
cgit v1.2.3-70-g09d2


From 4eecc107a85a5882e253bd97310d4e96300a2068 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:15:46 -0700
Subject: sit: Use on-device stats instead of private ones.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5a6fab95569..b0ee9618763 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -578,13 +578,13 @@ static int ipip6_rcv(struct sk_buff *skb)
 
 		if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
 		    !isatap_chksrc(skb, iph, tunnel)) {
-			tunnel->stat.rx_errors++;
+			tunnel->dev->stats.rx_errors++;
 			read_unlock(&ipip6_lock);
 			kfree_skb(skb);
 			return 0;
 		}
-		tunnel->stat.rx_packets++;
-		tunnel->stat.rx_bytes += skb->len;
+		tunnel->dev->stats.rx_packets++;
+		tunnel->dev->stats.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -624,7 +624,7 @@ static inline __be32 try_6to4(struct in6_addr *v6dst)
 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	struct net_device_stats *stats = &tunnel->stat;
+	struct net_device_stats *stats = &tunnel->dev->stats;
 	struct iphdr  *tiph = &tunnel->parms.iph;
 	struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
@@ -638,7 +638,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	int addr_type;
 
 	if (tunnel->recursion++) {
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -705,20 +705,20 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tunnel->stat.tx_carrier_errors++;
+			stats->tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
 	}
 	if (rt->rt_type != RTN_UNICAST) {
 		ip_rt_put(rt);
-		tunnel->stat.tx_carrier_errors++;
+		stats->tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
 	tdev = rt->u.dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		goto tx_error;
 	}
 
@@ -728,7 +728,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 
 	if (mtu < 68) {
-		tunnel->stat.collisions++;
+		stats->collisions++;
 		ip_rt_put(rt);
 		goto tx_error;
 	}
@@ -1003,11 +1003,6 @@ done:
 	return err;
 }
 
-static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
-{
-	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -1021,7 +1016,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->uninit		= ipip6_tunnel_uninit;
 	dev->destructor 	= free_netdev;
 	dev->hard_start_xmit	= ipip6_tunnel_xmit;
-	dev->get_stats		= ipip6_tunnel_get_stats;
 	dev->do_ioctl		= ipip6_tunnel_ioctl;
 	dev->change_mtu		= ipip6_tunnel_change_mtu;
 
-- 
cgit v1.2.3-70-g09d2


From 2f4c02d40c0d59f4efc59801ad1498cb0f8550ea Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:16:14 -0700
Subject: ipmr: Ipip tunnel uses on-device stats.

The ipmr uses ipip tunnels for its purposes and updates the
tunnels' stats, but the ipip driver is already switched to
use on-device ones.

Actually, this is a part of the patch #4 from this set.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 11700a4dcd9..65f12005785 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1230,8 +1230,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_TUNNEL) {
 		ip_encap(skb, vif->local, vif->remote);
 		/* FIXME: extra output firewall step used to be here. --RR */
-		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
-		((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
+		vif->dev->stats.tx_packets++;
+		vif->dev->stats.tx_bytes += skb->len;
 	}
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
-- 
cgit v1.2.3-70-g09d2


From 3dca02af38c11a970160387ab36ae6043feb03cd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:17:05 -0700
Subject: ip6tnl: Use on-device stats instead of private ones.

This tunnel uses its own private structure and requires separate
patch to switch from private stats to on-device ones.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h |  1 -
 net/ipv6/ip6_tunnel.c    | 26 ++++++--------------------
 2 files changed, 6 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 6512d85f11b..3780592ebe8 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -19,7 +19,6 @@
 struct ip6_tnl {
 	struct ip6_tnl *next;	/* next tunnel in list */
 	struct net_device *dev;	/* virtual device associated with tunnel */
-	struct net_device_stats stat;	/* statistics for tunnel device */
 	int recursion;		/* depth of hard_start_xmit recursion */
 	struct ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2bda3ba100b..37814810ac4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -711,7 +711,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		}
 
 		if (!ip6_tnl_rcv_ctl(t)) {
-			t->stat.rx_dropped++;
+			t->dev->stats.rx_dropped++;
 			read_unlock(&ip6_tnl_lock);
 			goto discard;
 		}
@@ -728,8 +728,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 
 		dscp_ecn_decapsulate(t, ipv6h, skb);
 
-		t->stat.rx_packets++;
-		t->stat.rx_bytes += skb->len;
+		t->dev->stats.rx_packets++;
+		t->dev->stats.rx_bytes += skb->len;
 		netif_rx(skb);
 		read_unlock(&ip6_tnl_lock);
 		return 0;
@@ -849,7 +849,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 			 __u32 *pmtu)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct net_device_stats *stats = &t->stat;
+	struct net_device_stats *stats = &t->dev->stats;
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct ipv6_tel_txoption opt;
 	struct dst_entry *dst;
@@ -1043,11 +1043,11 @@ static int
 ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct net_device_stats *stats = &t->stat;
+	struct net_device_stats *stats = &t->dev->stats;
 	int ret;
 
 	if (t->recursion++) {
-		t->stat.collisions++;
+		stats->collisions++;
 		goto tx_err;
 	}
 
@@ -1288,19 +1288,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return err;
 }
 
-/**
- * ip6_tnl_get_stats - return the stats for tunnel device
- *   @dev: virtual device associated with tunnel
- *
- * Return: stats for device
- **/
-
-static struct net_device_stats *
-ip6_tnl_get_stats(struct net_device *dev)
-{
-	return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
-}
-
 /**
  * ip6_tnl_change_mtu - change mtu manually for tunnel device
  *   @dev: virtual device associated with tunnel
@@ -1334,7 +1321,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
 	dev->uninit = ip6_tnl_dev_uninit;
 	dev->destructor = free_netdev;
 	dev->hard_start_xmit = ip6_tnl_xmit;
-	dev->get_stats = ip6_tnl_get_stats;
 	dev->do_ioctl = ip6_tnl_ioctl;
 	dev->change_mtu = ip6_tnl_change_mtu;
 
-- 
cgit v1.2.3-70-g09d2


From cf3677ae19c2f62979b39143f5d2f6b3dfb3b3e4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:17:33 -0700
Subject: ipmr: Use on-device stats instead of private ones.

These devices use the private area of appropriate size for
statistics. Turning them to use on-device ones make them
"privless" and thus - really small wrt kmalloc cache, they
are allocated from.

Besides, code looks nicer, because of absence of multi-braced
type casts and dereferences.

[ Fix build failures -DaveM ]

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65f12005785..a34da4977c7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -181,26 +181,20 @@ static int reg_vif_num = -1;
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	read_lock(&mrt_lock);
-	((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
-	((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return 0;
 }
 
-static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
-{
-	return (struct net_device_stats*)netdev_priv(dev);
-}
-
 static void reg_vif_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_PIMREG;
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->hard_start_xmit	= reg_vif_xmit;
-	dev->get_stats		= reg_vif_get_stats;
 	dev->destructor		= free_netdev;
 }
 
@@ -209,8 +203,7 @@ static struct net_device *ipmr_reg_vif(void)
 	struct net_device *dev;
 	struct in_device *in_dev;
 
-	dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
-			   reg_vif_setup);
+	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 
 	if (dev == NULL)
 		return NULL;
@@ -1170,8 +1163,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	if (vif->flags & VIFF_REGISTER) {
 		vif->pkt_out++;
 		vif->bytes_out+=skb->len;
-		((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
-		((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
+		vif->dev->stats.tx_bytes += skb->len;
+		vif->dev->stats.tx_packets++;
 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
 		kfree_skb(skb);
 		return;
@@ -1487,8 +1480,8 @@ int pim_rcv_v1(struct sk_buff * skb)
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
 	skb->dst = NULL;
-	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
-	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+	reg_dev->stats.rx_bytes += skb->len;
+	reg_dev->stats.rx_packets++;
 	nf_reset(skb);
 	netif_rx(skb);
 	dev_put(reg_dev);
@@ -1542,8 +1535,8 @@ static int pim_rcv(struct sk_buff * skb)
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
-	((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
-	((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+	reg_dev->stats.rx_bytes += skb->len;
+	reg_dev->stats.rx_packets++;
 	skb->dst = NULL;
 	nf_reset(skb);
 	netif_rx(skb);
-- 
cgit v1.2.3-70-g09d2


From dc58c78c047fb01f4c13e7de91abc5eb931920b3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 21 May 2008 14:17:54 -0700
Subject: ip6mr: Use on-device stats instead of private ones.

Similar to ipmr.

[ Fix build failures -DaveM ]

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2de3c464fe7..bf268b38696 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -388,8 +388,8 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
-	((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
-	((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
+	reg_dev->stats.rx_bytes += skb->len;
+	reg_dev->stats.rx_packets++;
 	skb->dst = NULL;
 	nf_reset(skb);
 	netif_rx(skb);
@@ -409,26 +409,20 @@ static struct inet6_protocol pim6_protocol = {
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	read_lock(&mrt_lock);
-	((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
-	((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
 	ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return 0;
 }
 
-static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
-{
-	return (struct net_device_stats *)netdev_priv(dev);
-}
-
 static void reg_vif_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_PIMREG;
 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->hard_start_xmit	= reg_vif_xmit;
-	dev->get_stats		= reg_vif_get_stats;
 	dev->destructor		= free_netdev;
 }
 
@@ -436,9 +430,7 @@ static struct net_device *ip6mr_reg_vif(void)
 {
 	struct net_device *dev;
 
-	dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
-			   reg_vif_setup);
-
+	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
 	if (dev == NULL)
 		return NULL;
 
@@ -1377,8 +1369,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 	if (vif->flags & MIFF_REGISTER) {
 		vif->pkt_out++;
 		vif->bytes_out += skb->len;
-		((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
-		((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
+		vif->dev->stats.tx_bytes += skb->len;
+		vif->dev->stats.tx_packets++;
 		ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
 		kfree_skb(skb);
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 59f0c4523fdea865fab7d69d878269992a9d08dd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 21 May 2008 14:52:30 -0700
Subject: tipc: Fix skb_under_panic when configuring TIPC without privileges

This patch prevents a TIPC configuration command requiring network
administrator privileges from triggering an skbuff underrun if it
is issued by a process lacking those privileges.  The revised error
handling code avoids the use of a potentially uninitialized global
variable by transforming the unauthorized command into a new command,
then following the standard command processing path to generate the
required error message.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h | 10 +++++++++-
 net/tipc/config.c           |  6 +++++-
 net/tipc/netlink.c          | 16 +++++++++-------
 3 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index b0c916d1f37..2bc6fa4adeb 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -2,7 +2,7 @@
  * include/linux/tipc_config.h: Include file for TIPC configuration interface
  * 
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -135,6 +135,14 @@
 #define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_NETID         0x800B    /* tx unsigned, rx none */
 
+/*
+ * Reserved commands:
+ * May not be issued by any process.
+ * Used internally by TIPC.
+ */
+
+#define  TIPC_CMD_NOT_NET_ADMIN     0xC001    /* tx none, rx none */
+
 /*
  * TLV types defined for TIPC
  */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 91d56f8fee9..16e7cb74969 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
  * net/tipc/config.c: TIPC configuration management code
  *
  * Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2006, Wind River Systems
+ * Copyright (c) 2004-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -602,6 +602,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
+	case TIPC_CMD_NOT_NET_ADMIN:
+		rep_tlv_buf =
+			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
+		break;
 	default:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (unknown command)");
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6a7f7b4c259..c387217bb23 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -2,7 +2,7 @@
  * net/tipc/netlink.c: TIPC configuration handling
  *
  * Copyright (c) 2005-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,15 +45,17 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct nlmsghdr *req_nlh = info->nlhdr;
 	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
 	int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	u16 cmd;
 
 	if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
-		rep_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
+		cmd = TIPC_CMD_NOT_NET_ADMIN;
 	else
-		rep_buf = tipc_cfg_do_cmd(req_userhdr->dest,
-					  req_userhdr->cmd,
-					  NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
-					  NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
-					  hdr_space);
+		cmd = req_userhdr->cmd;
+
+	rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
+			NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
+			NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
+			hdr_space);
 
 	if (rep_buf) {
 		skb_push(rep_buf, hdr_space);
-- 
cgit v1.2.3-70-g09d2


From 2ecb0924d7791372a70ef8f1174e37b329b955c3 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 21 May 2008 14:53:00 -0700
Subject: tipc: Prevent node object duplication due to simultaneous discovery

This patch ensures that the simultaneous discovery of the same
neighboring node by multiple interfaces does not cause TIPC to add
the node into its internal data structures more than once.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 598f4d3a009..34e9a2bb7c1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -52,16 +52,40 @@ static void node_established_contact(struct node *n_ptr);
 
 struct node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
 
+static DEFINE_SPINLOCK(node_create_lock);
+
 u32 tipc_own_tag = 0;
 
+/**
+ * tipc_node_create - create neighboring node
+ *
+ * Currently, this routine is called by neighbor discovery code, which holds
+ * net_lock for reading only.  We must take node_create_lock to ensure a node
+ * isn't created twice if two different bearers discover the node at the same
+ * time.  (It would be preferable to switch to holding net_lock in write mode,
+ * but this is a non-trivial change.)
+ */
+
 struct node *tipc_node_create(u32 addr)
 {
 	struct cluster *c_ptr;
 	struct node *n_ptr;
 	struct node **curr_node;
 
+	spin_lock_bh(&node_create_lock);
+
+	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+		if (addr < n_ptr->addr)
+			break;
+		if (addr == n_ptr->addr) {
+			spin_unlock_bh(&node_create_lock);
+			return n_ptr;
+		}
+	}
+
 	n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC);
 	if (!n_ptr) {
+		spin_unlock_bh(&node_create_lock);
 		warn("Node creation failed, no memory\n");
 		return NULL;
 	}
@@ -71,6 +95,7 @@ struct node *tipc_node_create(u32 addr)
 		c_ptr = tipc_cltr_create(addr);
 	}
 	if (!c_ptr) {
+		spin_unlock_bh(&node_create_lock);
 		kfree(n_ptr);
 		return NULL;
 	}
@@ -91,6 +116,7 @@ struct node *tipc_node_create(u32 addr)
 		}
 	}
 	(*curr_node) = n_ptr;
+	spin_unlock_bh(&node_create_lock);
 	return n_ptr;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 811102ca23dfdde5ee8b782b3a4bbff44c499cb2 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 21 May 2008 14:53:34 -0700
Subject: tipc: Optimize null pointer check during neighbor discovery

This patch optimizes TIPC neighbor discovery code to avoid testing for
a null node pointer when the pointer is already known to be non-null.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5d643e5721e..faeaf06d377 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -200,9 +200,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf)
 		dbg(" in own cluster\n");
 		if (n_ptr == NULL) {
 			n_ptr = tipc_node_create(orig);
-		}
-		if (n_ptr == NULL) {
-			return;
+			if (!n_ptr)
+				return;
 		}
 		spin_lock_bh(&n_ptr->lock);
 		link = n_ptr->links[b_ptr->identity];
-- 
cgit v1.2.3-70-g09d2


From 6d4a6672c8263f98544d2b91690dc7074b144090 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 21 May 2008 14:54:12 -0700
Subject: tipc: Update "previous node" indicators when node address changes

This patch ensures that the "previous node" field in any existing
TIPC port message header templates is updated properly when a TIPC
network address is assigned to the node.  (Previously, only the
"originating node" field was updated.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 757de38fe6a..2c64ad88e3c 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -780,6 +780,7 @@ void tipc_port_reinit(void)
 		msg = &p_ptr->publ.phdr;
 		if (msg_orignode(msg) == tipc_own_addr)
 			break;
+		msg_set_prevnode(msg, tipc_own_addr);
 		msg_set_orignode(msg, tipc_own_addr);
 	}
 	spin_unlock_bh(&tipc_port_list_lock);
-- 
cgit v1.2.3-70-g09d2


From 03194379a77b02df3404ec4848a50c6784e9a8a5 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 21 May 2008 14:55:04 -0700
Subject: tipc: Fix initialization sequence problems when entering network mode

This patch ensures that TIPC's topology service and configuration
service are shut down before switching into "network mode".  This
ensures that TIPC does not mistakenly try to send unnecessary
"publication withdraw" messages to other nodes before it is fully
initialized for sending off-node messages.  Note that the node's
current network address is now updated only after the two services
are shut down; this ensures that any existing connections to the
topology server are terminated correctly using the old address.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c |  3 +--
 net/tipc/core.c   |  4 ++--
 net/tipc/core.h   |  2 +-
 net/tipc/net.c    | 10 +++++++---
 net/tipc/net.h    |  2 +-
 5 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 16e7cb74969..ca3544d030c 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -293,7 +293,6 @@ static struct sk_buff *cfg_set_own_addr(void)
 	if (tipc_mode == TIPC_NET_MODE)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (cannot change node address once assigned)");
-	tipc_own_addr = addr;
 
 	/*
 	 * Must release all spinlocks before calling start_net() because
@@ -306,7 +305,7 @@ static struct sk_buff *cfg_set_own_addr(void)
 	 */
 
 	spin_unlock_bh(&config_lock);
-	tipc_core_start_net();
+	tipc_core_start_net(addr);
 	spin_lock_bh(&config_lock);
 	return tipc_cfg_reply_none();
 }
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3d97386af09..3256bd7d398 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -117,11 +117,11 @@ void tipc_core_stop_net(void)
  * start_net - start TIPC networking sub-systems
  */
 
-int tipc_core_start_net(void)
+int tipc_core_start_net(unsigned long addr)
 {
 	int res;
 
-	if ((res = tipc_net_start()) ||
+	if ((res = tipc_net_start(addr)) ||
 	    (res = tipc_eth_media_start())) {
 		tipc_core_stop_net();
 	}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index bd78d1705c0..a881f92a853 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -202,7 +202,7 @@ extern atomic_t tipc_user_count;
 
 extern int  tipc_core_start(void);
 extern void tipc_core_stop(void);
-extern int  tipc_core_start_net(void);
+extern int  tipc_core_start_net(unsigned long addr);
 extern void tipc_core_stop_net(void);
 extern int  tipc_handler_start(void);
 extern void tipc_handler_stop(void);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index c39c76201e8..cc51fa48367 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -266,7 +266,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
 	tipc_link_send(buf, dnode, msg_link_selector(msg));
 }
 
-int tipc_net_start(void)
+int tipc_net_start(u32 addr)
 {
 	char addr_string[16];
 	int res;
@@ -274,6 +274,10 @@ int tipc_net_start(void)
 	if (tipc_mode != TIPC_NODE_MODE)
 		return -ENOPROTOOPT;
 
+	tipc_subscr_stop();
+	tipc_cfg_stop();
+
+	tipc_own_addr = addr;
 	tipc_mode = TIPC_NET_MODE;
 	tipc_named_reinit();
 	tipc_port_reinit();
@@ -284,10 +288,10 @@ int tipc_net_start(void)
 	    (res = tipc_bclink_init())) {
 		return res;
 	}
-	tipc_subscr_stop();
-	tipc_cfg_stop();
+
 	tipc_k_signal((Handler)tipc_subscr_start, 0);
 	tipc_k_signal((Handler)tipc_cfg_init, 0);
+
 	info("Started in network mode\n");
 	info("Own node address %s, network identity %u\n",
 	     addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index a6a0e9976ac..d154ac2bda9 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -58,7 +58,7 @@ void tipc_net_route_msg(struct sk_buff *buf);
 struct node *tipc_net_select_remote_node(u32 addr, u32 ref);
 u32 tipc_net_select_router(u32 addr, u32 ref);
 
-int tipc_net_start(void);
+int tipc_net_start(u32 addr);
 void tipc_net_stop(void);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 402d7752ed253369b7ab037e2d778e52d59c19ed Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 6 May 2008 18:53:43 +0400
Subject: mac80211: Brush up error paths in mesh_path_add.

There are already tree paths, that do incremental rollbacks, so
merge them together, rename labels and format the code to look a
bit nicer.

(I do not mind dropping/delaying this patch however).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_pathtbl.c | 42 ++++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 99c2d360888..7097ef98199 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -158,19 +158,14 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 	if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0)
 		return -ENOSPC;
 
+	err = -ENOMEM;
 	new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL);
-	if (!new_mpath) {
-		atomic_dec(&sdata->u.sta.mpaths);
-		err = -ENOMEM;
-		goto endadd2;
-	}
+	if (!new_mpath)
+		goto err_path_alloc;
+
 	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
-	if (!new_node) {
-		kfree(new_mpath);
-		atomic_dec(&sdata->u.sta.mpaths);
-		err = -ENOMEM;
-		goto endadd2;
-	}
+	if (!new_node)
+		goto err_node_alloc;
 
 	read_lock(&pathtbl_resize_lock);
 	memcpy(new_mpath->dst, dst, ETH_ALEN);
@@ -189,16 +184,11 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 
 	spin_lock(&mesh_paths->hashwlock[hash_idx]);
 
+	err = -EEXIST;
 	hlist_for_each_entry(node, n, bucket, list) {
 		mpath = node->mpath;
-		if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN)
-				== 0) {
-			err = -EEXIST;
-			atomic_dec(&sdata->u.sta.mpaths);
-			kfree(new_node);
-			kfree(new_mpath);
-			goto endadd;
-		}
+		if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) == 0)
+			goto err_exists;
 	}
 
 	hlist_add_head_rcu(&new_node->list, bucket);
@@ -206,10 +196,9 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 		mesh_paths->mean_chain_len * (mesh_paths->hash_mask + 1))
 		grow = 1;
 
-endadd:
 	spin_unlock(&mesh_paths->hashwlock[hash_idx]);
 	read_unlock(&pathtbl_resize_lock);
-	if (!err && grow) {
+	if (grow) {
 		struct mesh_table *oldtbl, *newtbl;
 
 		write_lock(&pathtbl_resize_lock);
@@ -225,7 +214,16 @@ endadd:
 		synchronize_rcu();
 		mesh_table_free(oldtbl, false);
 	}
-endadd2:
+	return 0;
+
+err_exists:
+	spin_unlock(&mesh_paths->hashwlock[hash_idx]);
+	read_unlock(&pathtbl_resize_lock);
+	kfree(new_node);
+err_node_alloc:
+	kfree(new_mpath);
+err_path_alloc:
+	atomic_dec(&sdata->u.sta.mpaths);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5194ee82b4aafc35b32c96db11bdacea9bf548be Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:44:20 +0400
Subject: mac80211: Fix one more call to synchronize_rcu in atomic context.

(This set applies OK without the previous one of 4 patches,
 but with some fuzz in the 7th one)

The mesh_path_node_free() does so under hashwlock.

But, this one is called
1. from mesh_path_add() after an old hash is hidden and
   synchronize_rcu() is calld
2. mesh_pathtbl_unregister(), when the module is being
   unloaded and no devices exist to mess with this hash.

So, it seems to me, that simply removing the call is OK.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_pathtbl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 7097ef98199..0b6c4bfe3e7 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -458,7 +458,6 @@ static void mesh_path_node_free(struct hlist_node *p, bool free_leafs)
 	struct mpath_node *node = hlist_entry(p, struct mpath_node, list);
 	mpath = node->mpath;
 	hlist_del_rcu(p);
-	synchronize_rcu();
 	if (free_leafs)
 		kfree(mpath);
 	kfree(node);
-- 
cgit v1.2.3-70-g09d2


From 4caf86c6928cfaca270327bc944f901c2e2a8f50 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:47:01 +0400
Subject: mac80211: Prepare mesh_table_grow to failing copy_node callback.

The mesh_path_node_copy() performs kmalloc() and thus - may fail
(well, it does not now, but I'm fixing this right now). Its caller -
the mesh_table_grow() - isn't prepared for such a trick yet.

This preparation is just flush the new hash and make copy_node()
return an int value.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c         | 15 +++++++++++++--
 net/mac80211/mesh.h         |  2 +-
 net/mac80211/mesh_pathtbl.c |  3 ++-
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 697ef67f96b..ca81d0065eb 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -349,7 +349,7 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
 {
 	struct mesh_table *newtbl;
 	struct hlist_head *oldhash;
-	struct hlist_node *p;
+	struct hlist_node *p, *q;
 	int err = 0;
 	int i;
 
@@ -373,13 +373,24 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
 	oldhash = tbl->hash_buckets;
 	for (i = 0; i <= tbl->hash_mask; i++)
 		hlist_for_each(p, &oldhash[i])
-			tbl->copy_node(p, newtbl);
+			if (tbl->copy_node(p, newtbl) < 0)
+				goto errcopy;
 
 endgrow:
 	if (err)
 		return NULL;
 	else
 		return newtbl;
+
+errcopy:
+	for (i = 0; i <= newtbl->hash_mask; i++) {
+		hlist_for_each_safe(p, q, &newtbl->hash_buckets[i])
+			tbl->free_node(p, 0);
+	}
+	kfree(newtbl->hash_buckets);
+	kfree(newtbl->hashwlock);
+	kfree(newtbl);
+	return NULL;
 }
 
 /**
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 2e161f6d828..669eafafe49 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -109,7 +109,7 @@ struct mesh_table {
 	__u32 hash_rnd;			/* Used for hash generation */
 	atomic_t entries;		/* Up to MAX_MESH_NEIGHBOURS */
 	void (*free_node) (struct hlist_node *p, bool free_leafs);
-	void (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl);
+	int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl);
 	int size_order;
 	int mean_chain_len;
 };
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 0b6c4bfe3e7..512bfa112c6 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -463,7 +463,7 @@ static void mesh_path_node_free(struct hlist_node *p, bool free_leafs)
 	kfree(node);
 }
 
-static void mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
+static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node, *new_node;
@@ -476,6 +476,7 @@ static void mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 	hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl);
 	hlist_add_head(&new_node->list,
 			&newtbl->hash_buckets[hash_idx]);
+	return 0;
 }
 
 int mesh_pathtbl_init(void)
-- 
cgit v1.2.3-70-g09d2


From 00242c40a167113688dd7ed46ec94e8a4c47f603 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:48:14 +0400
Subject: mac80211: Report allocation failure from mesh_path_node_copy.

Now - return the -ENOMEM in case kmalloc fails.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_pathtbl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 512bfa112c6..fe1ceeea07f 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -469,9 +469,12 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 	struct mpath_node *node, *new_node;
 	u32 hash_idx;
 
+	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
+	if (new_node == NULL)
+		return -ENOMEM;
+
 	node = hlist_entry(p, struct mpath_node, list);
 	mpath = node->mpath;
-	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
 	new_node->mpath = mpath;
 	hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl);
 	hlist_add_head(&new_node->list,
-- 
cgit v1.2.3-70-g09d2


From 8566dc3fca470454461b161677a5ae3bb3a8c1b8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:51:51 +0400
Subject: mac80211: Fix sleeping allocation under lock in mesh_path_node_copy.

The mesh_path_node_copy() can be called like this:
mesh_path_add
 `- write_lock(&pathtbl_resize_lock); /* ! */
 `- mesh_table_grow
     `- ->copy_node
           `- mesh_path_node_copy

thus, the GFP_KERNEL is not suitable here.

The acceptable fix, I suppose, is make this allocation GPF_ATOMIC -
the mpath_node being allocated is 4 pointers, i.e. this allocation
is small enough to survive even under a moderate memory pressure.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_pathtbl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index fe1ceeea07f..1154398ecea 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -469,7 +469,7 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 	struct mpath_node *node, *new_node;
 	u32 hash_idx;
 
-	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
+	new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC);
 	if (new_node == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-70-g09d2


From 3282aea9ea5644a5b0161ad0fbd70fbf1099a470 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:54:04 +0400
Subject: mac80211: Do not report false error from mesh_path_add.

In case the hash grow failed, it is not fair to return error -
the new node _was_ _actually_ added in this case.

Besides, after my previous patch, this grow is more likely
to fail on large hashes.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_pathtbl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 1154398ecea..947b13b4072 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -206,7 +206,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 		newtbl = mesh_table_grow(mesh_paths);
 		if (!newtbl) {
 			write_unlock(&pathtbl_resize_lock);
-			return -ENOMEM;
+			return 0;
 		}
 		rcu_assign_pointer(mesh_paths, newtbl);
 		write_unlock(&pathtbl_resize_lock);
-- 
cgit v1.2.3-70-g09d2


From a3538b19a6d226f9d3d9b18865468370009dec55 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:55:59 +0400
Subject: mac80211: Merge error paths in mesh_table_grow().

This is the first (of two) clean ups after the fixes above.

The err variable is not even required after this cleaning.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ca81d0065eb..cbce001f8f2 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -350,20 +350,15 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
 	struct mesh_table *newtbl;
 	struct hlist_head *oldhash;
 	struct hlist_node *p, *q;
-	int err = 0;
 	int i;
 
 	if (atomic_read(&tbl->entries)
-			< tbl->mean_chain_len * (tbl->hash_mask + 1)) {
-		err = -EPERM;
+			< tbl->mean_chain_len * (tbl->hash_mask + 1))
 		goto endgrow;
-	}
 
 	newtbl = mesh_table_alloc(tbl->size_order + 1);
-	if (!newtbl) {
-		err = -ENOMEM;
+	if (!newtbl)
 		goto endgrow;
-	}
 
 	newtbl->free_node = tbl->free_node;
 	newtbl->mean_chain_len = tbl->mean_chain_len;
@@ -376,11 +371,7 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
 			if (tbl->copy_node(p, newtbl) < 0)
 				goto errcopy;
 
-endgrow:
-	if (err)
-		return NULL;
-	else
-		return newtbl;
+	return newtbl;
 
 errcopy:
 	for (i = 0; i <= newtbl->hash_mask; i++) {
@@ -390,6 +381,7 @@ errcopy:
 	kfree(newtbl->hash_buckets);
 	kfree(newtbl->hashwlock);
 	kfree(newtbl);
+endgrow:
 	return NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From bd9b448f4c0a514559bdae4ca18ca3e8cd999c6d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 7 May 2008 19:57:11 +0400
Subject: mac80211: Consolidate hash kfree-ing in mesh.c.

There are already two places, that kfree the mesh_table and
its buckets.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index cbce001f8f2..b5933b27149 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -315,6 +315,13 @@ struct mesh_table *mesh_table_alloc(int size_order)
 	return newtbl;
 }
 
+static void __mesh_table_free(struct mesh_table *tbl)
+{
+	kfree(tbl->hash_buckets);
+	kfree(tbl->hashwlock);
+	kfree(tbl);
+}
+
 void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
 {
 	struct hlist_head *mesh_hash;
@@ -330,9 +337,7 @@ void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
 		}
 		spin_unlock(&tbl->hashwlock[i]);
 	}
-	kfree(tbl->hash_buckets);
-	kfree(tbl->hashwlock);
-	kfree(tbl);
+	__mesh_table_free(tbl);
 }
 
 static void ieee80211_mesh_path_timer(unsigned long data)
@@ -378,9 +383,7 @@ errcopy:
 		hlist_for_each_safe(p, q, &newtbl->hash_buckets[i])
 			tbl->free_node(p, 0);
 	}
-	kfree(newtbl->hash_buckets);
-	kfree(newtbl->hashwlock);
-	kfree(newtbl);
+	__mesh_table_free(tbl);
 endgrow:
 	return NULL;
 }
-- 
cgit v1.2.3-70-g09d2


From 1bd3dff549537d3d9b92d0b284f4cc2be264a56d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:16 -0700
Subject: mac80211: michael.c use kernel-provided infrastructure

Replace private implementation of bit rotation and unaligned access helpers
with kernel-provided implementation.

Fold xswap helper in its one usage in the michael_block macro.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/michael.c | 60 ++++++++++++--------------------------------------
 1 file changed, 14 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 0f844f7895f..c1e5897fcf7 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -8,71 +8,39 @@
  */
 
 #include <linux/types.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
 
 #include "michael.h"
 
-static inline u32 rotr(u32 val, int bits)
-{
-	return (val >> bits) | (val << (32 - bits));
-}
-
-
-static inline u32 rotl(u32 val, int bits)
-{
-	return (val << bits) | (val >> (32 - bits));
-}
-
-
-static inline u32 xswap(u32 val)
-{
-	return ((val & 0xff00ff00) >> 8) | ((val & 0x00ff00ff) << 8);
-}
-
-
 #define michael_block(l, r) \
 do { \
-	r ^= rotl(l, 17); \
+	r ^= rol32(l, 17); \
 	l += r; \
-	r ^= xswap(l); \
+	r ^= ((l & 0xff00ff00) >> 8) | ((l & 0x00ff00ff) << 8); \
 	l += r; \
-	r ^= rotl(l, 3); \
+	r ^= rol32(l, 3); \
 	l += r; \
-	r ^= rotr(l, 2); \
+	r ^= ror32(l, 2); \
 	l += r; \
 } while (0)
 
-
-static inline u32 michael_get32(u8 *data)
-{
-	return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-}
-
-
-static inline void michael_put32(u32 val, u8 *data)
-{
-	data[0] = val & 0xff;
-	data[1] = (val >> 8) & 0xff;
-	data[2] = (val >> 16) & 0xff;
-	data[3] = (val >> 24) & 0xff;
-}
-
-
 void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 		 u8 *data, size_t data_len, u8 *mic)
 {
 	u32 l, r, val;
 	size_t block, blocks, left;
 
-	l = michael_get32(key);
-	r = michael_get32(key + 4);
+	l = get_unaligned_le32(key);
+	r = get_unaligned_le32(key + 4);
 
 	/* A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
 	 * calculation, but it is _not_ transmitted */
-	l ^= michael_get32(da);
+	l ^= get_unaligned_le32(da);
 	michael_block(l, r);
-	l ^= da[4] | (da[5] << 8) | (sa[0] << 16) | (sa[1] << 24);
+	l ^= get_unaligned_le16(&da[4]) | (get_unaligned_le16(sa) << 16);
 	michael_block(l, r);
-	l ^= michael_get32(&sa[2]);
+	l ^= get_unaligned_le32(&sa[2]);
 	michael_block(l, r);
 	l ^= priority;
 	michael_block(l, r);
@@ -82,7 +50,7 @@ void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 	left = data_len % 4;
 
 	for (block = 0; block < blocks; block++) {
-		l ^= michael_get32(&data[block * 4]);
+		l ^= get_unaligned_le32(&data[block * 4]);
 		michael_block(l, r);
 	}
 
@@ -99,6 +67,6 @@ void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 	/* last block is zero, so l ^ 0 = l */
 	michael_block(l, r);
 
-	michael_put32(l, mic);
-	michael_put32(r, mic + 4);
+	put_unaligned_le32(l, mic);
+	put_unaligned_le32(r, mic + 4);
 }
-- 
cgit v1.2.3-70-g09d2


From 1b19ca396621dcba573b20d4625526f5a460c886 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:17 -0700
Subject: mac80211: introduce struct michael_mic_ctx and static helpers

Replace the existing macro with a static function, significantly shrinks the
size of the produced object file.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/michael.c | 75 +++++++++++++++++++++++++++-----------------------
 net/mac80211/michael.h |  4 +++
 2 files changed, 44 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index c1e5897fcf7..4e151e0caeb 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -13,46 +13,52 @@
 
 #include "michael.h"
 
-#define michael_block(l, r) \
-do { \
-	r ^= rol32(l, 17); \
-	l += r; \
-	r ^= ((l & 0xff00ff00) >> 8) | ((l & 0x00ff00ff) << 8); \
-	l += r; \
-	r ^= rol32(l, 3); \
-	l += r; \
-	r ^= ror32(l, 2); \
-	l += r; \
-} while (0)
+static void michael_block(struct michael_mic_ctx *mctx, u32 val)
+{
+	mctx->l ^= val;
+	mctx->r ^= rol32(mctx->l, 17);
+	mctx->l += mctx->r;
+	mctx->r ^= ((mctx->l & 0xff00ff00) >> 8) |
+		   ((mctx->l & 0x00ff00ff) << 8);
+	mctx->l += mctx->r;
+	mctx->r ^= rol32(mctx->l, 3);
+	mctx->l += mctx->r;
+	mctx->r ^= ror32(mctx->l, 2);
+	mctx->l += mctx->r;
+}
+
+static void michael_mic_hdr(struct michael_mic_ctx *mctx,
+			    u8 *key, u8 *da, u8 *sa, u8 priority)
+{
+	mctx->l = get_unaligned_le32(key);
+	mctx->r = get_unaligned_le32(key + 4);
+
+	/*
+	 * A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
+	 * calculation, but it is _not_ transmitted
+	 */
+	michael_block(mctx, get_unaligned_le32(da));
+	michael_block(mctx, get_unaligned_le16(&da[4]) |
+			    (get_unaligned_le16(sa) << 16));
+	michael_block(mctx, get_unaligned_le32(&sa[2]));
+	michael_block(mctx, priority);
+}
 
 void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 		 u8 *data, size_t data_len, u8 *mic)
 {
-	u32 l, r, val;
+	u32 val;
 	size_t block, blocks, left;
+	struct michael_mic_ctx mctx;
 
-	l = get_unaligned_le32(key);
-	r = get_unaligned_le32(key + 4);
-
-	/* A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC
-	 * calculation, but it is _not_ transmitted */
-	l ^= get_unaligned_le32(da);
-	michael_block(l, r);
-	l ^= get_unaligned_le16(&da[4]) | (get_unaligned_le16(sa) << 16);
-	michael_block(l, r);
-	l ^= get_unaligned_le32(&sa[2]);
-	michael_block(l, r);
-	l ^= priority;
-	michael_block(l, r);
+	michael_mic_hdr(&mctx, key, da, sa, priority);
 
 	/* Real data */
 	blocks = data_len / 4;
 	left = data_len % 4;
 
-	for (block = 0; block < blocks; block++) {
-		l ^= get_unaligned_le32(&data[block * 4]);
-		michael_block(l, r);
-	}
+	for (block = 0; block < blocks; block++)
+		michael_block(&mctx, get_unaligned_le32(&data[block * 4]));
 
 	/* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make
 	 * total length a multiple of 4. */
@@ -62,11 +68,10 @@ void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 		left--;
 		val |= data[blocks * 4 + left];
 	}
-	l ^= val;
-	michael_block(l, r);
-	/* last block is zero, so l ^ 0 = l */
-	michael_block(l, r);
 
-	put_unaligned_le32(l, mic);
-	put_unaligned_le32(r, mic + 4);
+	michael_block(&mctx, val);
+	michael_block(&mctx, 0);
+
+	put_unaligned_le32(mctx.l, mic);
+	put_unaligned_le32(mctx.r, mic + 4);
 }
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 2e6aebabeea..7d5707bf884 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h
@@ -14,6 +14,10 @@
 
 #define MICHAEL_MIC_LEN 8
 
+struct michael_mic_ctx {
+	u32 l, r;
+};
+
 void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
 		 u8 *data, size_t data_len, u8 *mic);
 
-- 
cgit v1.2.3-70-g09d2


From a7b6f0c5558ad03281b8064d6a4ab2e124dea991 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:18 -0700
Subject: mac80211: add const, remove unused function, make one function static

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/michael.c |  7 +++----
 net/mac80211/michael.h |  4 ++--
 net/mac80211/tkip.c    | 12 ++----------
 net/mac80211/tkip.h    |  4 ----
 4 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 4e151e0caeb..1fcdf38cf60 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -6,7 +6,6 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <asm/unaligned.h>
@@ -28,7 +27,7 @@ static void michael_block(struct michael_mic_ctx *mctx, u32 val)
 }
 
 static void michael_mic_hdr(struct michael_mic_ctx *mctx,
-			    u8 *key, u8 *da, u8 *sa, u8 priority)
+			const u8 *key, const u8 *da, const u8 *sa, u8 priority)
 {
 	mctx->l = get_unaligned_le32(key);
 	mctx->r = get_unaligned_le32(key + 4);
@@ -44,8 +43,8 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx,
 	michael_block(mctx, priority);
 }
 
-void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
-		 u8 *data, size_t data_len, u8 *mic)
+void michael_mic(const u8 *key, const u8 *da, const u8 *sa, u8 priority,
+		 const u8 *data, size_t data_len, u8 *mic)
 {
 	u32 val;
 	size_t block, blocks, left;
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 7d5707bf884..69b4501f13b 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h
@@ -18,7 +18,7 @@ struct michael_mic_ctx {
 	u32 l, r;
 };
 
-void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority,
-		 u8 *data, size_t data_len, u8 *mic);
+void michael_mic(const u8 *key, const u8 *da, const u8 *sa, u8 priority,
+		 const u8 *data, size_t data_len, u8 *mic);
 
 #endif /* MICHAEL_H */
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index a7c3febc5a4..8cdf053cb83 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -6,7 +6,6 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -132,7 +131,7 @@ static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
 /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
  * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
  * the packet payload). */
-u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
+u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
 			   u8 iv0, u8 iv1, u8 iv2)
 {
 	*pos++ = iv0;
@@ -143,14 +142,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
 	return pos + 4;
 }
 
-void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
-				  u16 *phase1key)
-{
-	tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			   key->u.tkip.iv32, phase1key);
-}
-
-void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
+static void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
 			       u8 *rc4key)
 {
 	/* Calculate per-packet key */
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index 1fa0bb4dba3..b890427fc95 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -15,10 +15,6 @@
 
 u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
 			  u8 iv0, u8 iv1, u8 iv2);
-void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta,
-				  u16 *phase1key);
-void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
-			       u8 *rc4key);
 void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 				 struct ieee80211_key *key,
 				 u8 *pos, size_t payload_len, u8 *ta);
-- 
cgit v1.2.3-70-g09d2


From b0f76b335f8b1c324b4b2be06369d391b26a7cc9 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:19 -0700
Subject: mac80211: add a struct to hold tkip context

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  4 ++--
 net/mac80211/debugfs_key.c |  8 +++----
 net/mac80211/key.h         | 17 ++++++++-------
 net/mac80211/tkip.c        | 54 +++++++++++++++++++++++-----------------------
 net/mac80211/wpa.c         | 16 +++++++-------
 5 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3cef80dcd0e..dbf0563c397 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -256,8 +256,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	case ALG_TKIP:
 		params.cipher = WLAN_CIPHER_SUITE_TKIP;
 
-		iv32 = key->u.tkip.iv32;
-		iv16 = key->u.tkip.iv16;
+		iv32 = key->u.tkip.tx.iv32;
+		iv16 = key->u.tkip.tx.iv16;
 
 		if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
 		    sdata->local->ops->get_tkip_seq)
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 19efc3a6a93..7439b63df5d 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -97,8 +97,8 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 		break;
 	case ALG_TKIP:
 		len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
-				key->u.tkip.iv32,
-				key->u.tkip.iv16);
+				key->u.tkip.tx.iv32,
+				key->u.tkip.tx.iv16);
 		break;
 	case ALG_CCMP:
 		tpn = key->u.ccmp.tx_pn;
@@ -128,8 +128,8 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "%08x %04x\n",
-				       key->u.tkip.iv32_rx[i],
-				       key->u.tkip.iv16_rx[i]);
+				       key->u.tkip.rx[i].iv32,
+				       key->u.tkip.rx[i].iv16);
 		len = p - buf;
 		break;
 	case ALG_CCMP:
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index f52c3df1fe9..a0f774aafa4 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -69,6 +69,13 @@ enum ieee80211_internal_key_flags {
 	KEY_FLAG_TODO_ADD_DEBUGFS	= BIT(5),
 };
 
+struct tkip_ctx {
+	u32 iv32;
+	u16 iv16;
+	u16 p1k[5];
+	int initialized;
+};
+
 struct ieee80211_key {
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
@@ -85,16 +92,10 @@ struct ieee80211_key {
 	union {
 		struct {
 			/* last used TSC */
-			u32 iv32;
-			u16 iv16;
-			u16 p1k[5];
-			int tx_initialized;
+			struct tkip_ctx tx;
 
 			/* last received RSC */
-			u32 iv32_rx[NUM_RX_DATA_QUEUES];
-			u16 iv16_rx[NUM_RX_DATA_QUEUES];
-			u16 p1k_rx[NUM_RX_DATA_QUEUES][5];
-			int rx_initialized[NUM_RX_DATA_QUEUES];
+			struct tkip_ctx rx[NUM_RX_DATA_QUEUES];
 		} tkip;
 		struct {
 			u8 tx_pn[6];
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 8cdf053cb83..d74c91e23a7 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -138,7 +138,7 @@ u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
 	*pos++ = iv1;
 	*pos++ = iv2;
 	*pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */;
-	put_unaligned_le32(key->u.tkip.iv32, pos);
+	put_unaligned_le32(key->u.tkip.tx.iv32, pos);
 	return pos + 4;
 }
 
@@ -146,16 +146,16 @@ static void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
 			       u8 *rc4key)
 {
 	/* Calculate per-packet key */
-	if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) {
+	if (key->u.tkip.tx.iv16 == 0 || !key->u.tkip.tx.initialized) {
 		/* IV16 wrapped around - perform TKIP phase 1 */
 		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-				   key->u.tkip.iv32, key->u.tkip.p1k);
-		key->u.tkip.tx_initialized = 1;
+				   key->u.tkip.tx.iv32, key->u.tkip.tx.p1k);
+		key->u.tkip.tx.initialized = 1;
 	}
 
-	tkip_mixing_phase2(key->u.tkip.p1k,
+	tkip_mixing_phase2(key->u.tkip.tx.p1k,
 			   &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			   key->u.tkip.iv16, rc4key);
+			   key->u.tkip.tx.iv16, rc4key);
 }
 
 void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
@@ -179,9 +179,9 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n",
 			iv16, iv32);
 
-	if (iv32 != key->u.tkip.iv32) {
+	if (iv32 != key->u.tkip.tx.iv32) {
 		printk(KERN_DEBUG "skb: iv32 = 0x%08x key: iv32 = 0x%08x\n",
-			iv32, key->u.tkip.iv32);
+			iv32, key->u.tkip.tx.iv32);
 		printk(KERN_DEBUG "Wrap around of iv16 in the middle of a "
 			"fragmented packet\n");
 	}
@@ -190,19 +190,19 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	/* Update the p1k only when the iv16 in the packet wraps around, this
 	 * might occur after the wrap around of iv16 in the key in case of
 	 * fragmented packets. */
-	if (iv16 == 0 || !key->u.tkip.tx_initialized) {
+	if (iv16 == 0 || !key->u.tkip.tx.initialized) {
 		/* IV16 wrapped around - perform TKIP phase 1 */
 		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			iv32, key->u.tkip.p1k);
-		key->u.tkip.tx_initialized = 1;
+			iv32, key->u.tkip.tx.p1k);
+		key->u.tkip.tx.initialized = 1;
 	}
 
 	if (type == IEEE80211_TKIP_P1_KEY) {
-		memcpy(outkey, key->u.tkip.p1k, sizeof(u16) * 5);
+		memcpy(outkey, key->u.tkip.tx.p1k, sizeof(u16) * 5);
 		return;
 	}
 
-	tkip_mixing_phase2(key->u.tkip.p1k,
+	tkip_mixing_phase2(key->u.tkip.tx.p1k,
 		&key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],	iv16, outkey);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_key);
@@ -263,33 +263,33 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	if ((keyid >> 6) != key->conf.keyidx)
 		return TKIP_DECRYPT_INVALID_KEYIDX;
 
-	if (key->u.tkip.rx_initialized[queue] &&
-	    (iv32 < key->u.tkip.iv32_rx[queue] ||
-	     (iv32 == key->u.tkip.iv32_rx[queue] &&
-	      iv16 <= key->u.tkip.iv16_rx[queue]))) {
+	if (key->u.tkip.rx[queue].initialized &&
+	    (iv32 < key->u.tkip.rx[queue].iv32 ||
+	     (iv32 == key->u.tkip.rx[queue].iv32 &&
+	      iv16 <= key->u.tkip.rx[queue].iv16))) {
 #ifdef CONFIG_TKIP_DEBUG
 		DECLARE_MAC_BUF(mac);
 		printk(KERN_DEBUG "TKIP replay detected for RX frame from "
 		       "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n",
 		       print_mac(mac, ta),
-		       iv32, iv16, key->u.tkip.iv32_rx[queue],
-		       key->u.tkip.iv16_rx[queue]);
+		       iv32, iv16, key->u.tkip.rx[queue].iv32,
+		       key->u.tkip.rx[queue].iv16);
 #endif /* CONFIG_TKIP_DEBUG */
 		return TKIP_DECRYPT_REPLAY;
 	}
 
 	if (only_iv) {
 		res = TKIP_DECRYPT_OK;
-		key->u.tkip.rx_initialized[queue] = 1;
+		key->u.tkip.rx[queue].initialized = 1;
 		goto done;
 	}
 
-	if (!key->u.tkip.rx_initialized[queue] ||
-	    key->u.tkip.iv32_rx[queue] != iv32) {
-		key->u.tkip.rx_initialized[queue] = 1;
+	if (!key->u.tkip.rx[queue].initialized ||
+	    key->u.tkip.rx[queue].iv32 != iv32) {
+		key->u.tkip.rx[queue].initialized = 1;
 		/* IV16 wrapped around - perform TKIP phase 1 */
 		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-				   iv32, key->u.tkip.p1k_rx[queue]);
+				   iv32, key->u.tkip.rx[queue].p1k);
 #ifdef CONFIG_TKIP_DEBUG
 		{
 			int i;
@@ -303,7 +303,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 			printk("\n");
 			printk(KERN_DEBUG "TKIP decrypt: P1K=");
 			for (i = 0; i < 5; i++)
-				printk("%04x ", key->u.tkip.p1k_rx[queue][i]);
+				printk("%04x ", key->u.tkip.rx[queue].p1k[i]);
 			printk("\n");
 		}
 #endif /* CONFIG_TKIP_DEBUG */
@@ -318,11 +318,11 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 
 			key->local->ops->update_tkip_key(
 				local_to_hw(key->local), &key->conf,
-				sta_addr, iv32, key->u.tkip.p1k_rx[queue]);
+				sta_addr, iv32, key->u.tkip.rx[queue].p1k);
 		}
 	}
 
-	tkip_mixing_phase2(key->u.tkip.p1k_rx[queue],
+	tkip_mixing_phase2(key->u.tkip.rx[queue].p1k,
 			   &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
 			   iv16, rc4key);
 #ifdef CONFIG_TKIP_DEBUG
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 42f3654e1c5..d7304490d2e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -176,8 +176,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	skb_trim(skb, skb->len - MICHAEL_MIC_LEN);
 
 	/* update IV in key information to be able to detect replays */
-	rx->key->u.tkip.iv32_rx[rx->queue] = rx->tkip_iv32;
-	rx->key->u.tkip.iv16_rx[rx->queue] = rx->tkip_iv16;
+	rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32;
+	rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16;
 
 	return RX_CONTINUE;
 }
@@ -214,19 +214,19 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx,
 	pos += hdrlen;
 
 	/* Increase IV for the frame */
-	key->u.tkip.iv16++;
-	if (key->u.tkip.iv16 == 0)
-		key->u.tkip.iv32++;
+	key->u.tkip.tx.iv16++;
+	if (key->u.tkip.tx.iv16 == 0)
+		key->u.tkip.tx.iv32++;
 
 	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 		hdr = (struct ieee80211_hdr *)skb->data;
 
 		/* hwaccel - with preallocated room for IV */
 		ieee80211_tkip_add_iv(pos, key,
-				      (u8) (key->u.tkip.iv16 >> 8),
-				      (u8) (((key->u.tkip.iv16 >> 8) | 0x20) &
+				      (u8) (key->u.tkip.tx.iv16 >> 8),
+				      (u8) (((key->u.tkip.tx.iv16 >> 8) | 0x20) &
 					    0x7f),
-				      (u8) key->u.tkip.iv16);
+				      (u8) key->u.tkip.tx.iv16);
 
 		tx->control->hw_key = &tx->key->conf;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 82a57447fa66bf138cd55206f33eea21ee257335 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:19 -0700
Subject: mac80211: tkip.c use struct tkip_ctx in phase 1 key mixing

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index d74c91e23a7..e9ee247e13f 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -71,10 +71,12 @@ static u16 tkipS(u16 val)
  * TSC = TKIP sequence counter (48 bits, only 32 msb bits used)
  * P1K: 80 bits
  */
-static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32,
-			       u16 *p1k)
+static void tkip_mixing_phase1(struct ieee80211_key *key, const u8 *ta,
+			       struct tkip_ctx *ctx, u32 tsc_IV32)
 {
 	int i, j;
+	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+	u16 *p1k = ctx->p1k;
 
 	p1k[0] = tsc_IV32 & 0xFFFF;
 	p1k[1] = tsc_IV32 >> 16;
@@ -90,6 +92,7 @@ static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32,
 		p1k[3] += tkipS(p1k[2] ^ get_unaligned_le16(tk + 12 + j));
 		p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i;
 	}
+	ctx->initialized = 1;
 }
 
 static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
@@ -146,12 +149,8 @@ static void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
 			       u8 *rc4key)
 {
 	/* Calculate per-packet key */
-	if (key->u.tkip.tx.iv16 == 0 || !key->u.tkip.tx.initialized) {
-		/* IV16 wrapped around - perform TKIP phase 1 */
-		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-				   key->u.tkip.tx.iv32, key->u.tkip.tx.p1k);
-		key->u.tkip.tx.initialized = 1;
-	}
+	if (key->u.tkip.tx.iv16 == 0 || !key->u.tkip.tx.initialized)
+		tkip_mixing_phase1(key, ta, &key->u.tkip.tx, key->u.tkip.tx.iv32);
 
 	tkip_mixing_phase2(key->u.tkip.tx.p1k,
 			   &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
@@ -190,12 +189,8 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	/* Update the p1k only when the iv16 in the packet wraps around, this
 	 * might occur after the wrap around of iv16 in the key in case of
 	 * fragmented packets. */
-	if (iv16 == 0 || !key->u.tkip.tx.initialized) {
-		/* IV16 wrapped around - perform TKIP phase 1 */
-		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			iv32, key->u.tkip.tx.p1k);
-		key->u.tkip.tx.initialized = 1;
-	}
+	if (iv16 == 0 || !key->u.tkip.tx.initialized)
+		tkip_mixing_phase1(key, ta, &key->u.tkip.tx, iv32);
 
 	if (type == IEEE80211_TKIP_P1_KEY) {
 		memcpy(outkey, key->u.tkip.tx.p1k, sizeof(u16) * 5);
@@ -286,10 +281,8 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 
 	if (!key->u.tkip.rx[queue].initialized ||
 	    key->u.tkip.rx[queue].iv32 != iv32) {
-		key->u.tkip.rx[queue].initialized = 1;
 		/* IV16 wrapped around - perform TKIP phase 1 */
-		tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-				   iv32, key->u.tkip.rx[queue].p1k);
+		tkip_mixing_phase1(key, ta, &key->u.tkip.rx[queue], iv32);
 #ifdef CONFIG_TKIP_DEBUG
 		{
 			int i;
-- 
cgit v1.2.3-70-g09d2


From 3c83809917dce9bbd880f6b08edc2d0ccac14a25 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:26:20 -0700
Subject: mac80211: tkip.c use struct tkip_ctx in phase 2 key mixing

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index e9ee247e13f..a00cf1ea771 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -95,10 +95,12 @@ static void tkip_mixing_phase1(struct ieee80211_key *key, const u8 *ta,
 	ctx->initialized = 1;
 }
 
-static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16,
-			       u8 *rc4key)
+static void tkip_mixing_phase2(struct ieee80211_key *key, struct tkip_ctx *ctx,
+			       u16 tsc_IV16, u8 *rc4key)
 {
 	u16 ppk[6];
+	const u16 *p1k = ctx->p1k;
+	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 	int i;
 
 	ppk[0] = p1k[0];
@@ -152,9 +154,7 @@ static void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
 	if (key->u.tkip.tx.iv16 == 0 || !key->u.tkip.tx.initialized)
 		tkip_mixing_phase1(key, ta, &key->u.tkip.tx, key->u.tkip.tx.iv32);
 
-	tkip_mixing_phase2(key->u.tkip.tx.p1k,
-			   &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			   key->u.tkip.tx.iv16, rc4key);
+	tkip_mixing_phase2(key, &key->u.tkip.tx, key->u.tkip.tx.iv16, rc4key);
 }
 
 void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
@@ -197,8 +197,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 		return;
 	}
 
-	tkip_mixing_phase2(key->u.tkip.tx.p1k,
-		&key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],	iv16, outkey);
+	tkip_mixing_phase2(key, &key->u.tkip.tx, iv16, outkey);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_key);
 
@@ -315,9 +314,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 		}
 	}
 
-	tkip_mixing_phase2(key->u.tkip.rx[queue].p1k,
-			   &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY],
-			   iv16, rc4key);
+	tkip_mixing_phase2(key, &key->u.tkip.rx[queue], iv16, rc4key);
 #ifdef CONFIG_TKIP_DEBUG
 	{
 		int i;
-- 
cgit v1.2.3-70-g09d2


From edcdf8b21ac920e06b4180246123fe43b022e020 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Thu, 15 May 2008 13:53:55 +0800
Subject: mac80211: separate Tx and Rx MCS when configuring HT

This patch follows the 11n spec in separation between Tx and Rx MCS
capabilities. Up until now, when configuring the HT possible set of Tx
MCS only Rx MCS were considered, assuming they are the same as the Tx MCS.
This patch fixed this by looking at low level driver Tx capabilities.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl4965-base.c | 14 +++--
 include/linux/ieee80211.h                   | 18 ++++--
 net/mac80211/main.c                         | 96 ++++++++++++++++++-----------
 3 files changed, 83 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 1a2d3768867..c713b27ec65 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -3958,6 +3958,13 @@ static void iwl4965_dealloc_ucode_pci(struct iwl_priv *priv)
 	iwl_free_fw_desc(priv->pci_dev, &priv->ucode_boot);
 }
 
+static void iwl4965_nic_start(struct iwl_priv *priv)
+{
+	/* Remove all resets to allow NIC to operate */
+	iwl_write32(priv, CSR_RESET, 0);
+}
+
+
 /**
  * iwl4965_read_ucode - Read uCode images from disk file.
  *
@@ -4447,8 +4454,7 @@ static int __iwl4965_up(struct iwl_priv *priv)
 		}
 
 		/* start card; "initialize" will load runtime ucode */
-		/* Remove all resets to allow NIC to operate */
-		iwl_write32(priv, CSR_RESET, 0);
+		iwl4965_nic_start(priv);
 
 		IWL_DEBUG_INFO(DRV_NAME " is coming up\n");
 
@@ -6842,10 +6848,6 @@ static int __init iwl4965_init(void)
 
 	return ret;
 
-
-#ifdef CONFIG_IWLWIFI_DEBUG
-	pci_unregister_driver(&iwl_driver);
-#endif
 error_register:
 	iwl4965_rate_control_unregister();
 	return ret;
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a9102bc78b6..3c2ac0c37aa 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -306,8 +306,18 @@ struct ieee80211_ht_addt_info {
 #define IEEE80211_HT_CAP_SGI_40			0x0040
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
+/* 802.11n HT capability AMPDU settings */
 #define IEEE80211_HT_CAP_AMPDU_FACTOR		0x03
 #define IEEE80211_HT_CAP_AMPDU_DENSITY		0x1C
+/* 802.11n HT capability MSC set */
+#define IEEE80211_SUPP_MCS_SET_UEQM		4
+#define IEEE80211_HT_CAP_MAX_STREAMS		4
+#define IEEE80211_SUPP_MCS_SET_LEN		10
+/* maximum streams the spec allows */
+#define IEEE80211_HT_CAP_MCS_TX_DEFINED		0x01
+#define IEEE80211_HT_CAP_MCS_TX_RX_DIFF		0x02
+#define IEEE80211_HT_CAP_MCS_TX_STREAMS		0x0C
+#define IEEE80211_HT_CAP_MCS_TX_UEQM		0x10
 /* 802.11n HT IE masks */
 #define IEEE80211_HT_IE_CHA_SEC_OFFSET		0x03
 #define IEEE80211_HT_IE_CHA_WIDTH		0x04
@@ -316,10 +326,10 @@ struct ieee80211_ht_addt_info {
 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT	0x0010
 
 /* MIMO Power Save Modes */
-#define WLAN_HT_CAP_MIMO_PS_STATIC         0
-#define WLAN_HT_CAP_MIMO_PS_DYNAMIC        1
-#define WLAN_HT_CAP_MIMO_PS_INVALID        2
-#define WLAN_HT_CAP_MIMO_PS_DISABLED       3
+#define WLAN_HT_CAP_MIMO_PS_STATIC	0
+#define WLAN_HT_CAP_MIMO_PS_DYNAMIC	1
+#define WLAN_HT_CAP_MIMO_PS_INVALID	2
+#define WLAN_HT_CAP_MIMO_PS_DISABLED	3
 
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 36016363d22..b0fddb7de54 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -35,8 +35,6 @@
 #include "debugfs.h"
 #include "debugfs_netdev.h"
 
-#define SUPP_MCS_SET_LEN 16
-
 /*
  * For seeing transmitted packets on monitor interfaces
  * we have a radiotap header too.
@@ -1068,56 +1066,84 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_ht_info ht_conf;
 	struct ieee80211_ht_bss_info ht_bss_conf;
-	int i;
 	u32 changed = 0;
+	int i;
+	u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS;
+	u8 tx_mcs_set_cap;
 
 	sband = local->hw.wiphy->bands[conf->channel->band];
 
+	memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info));
+	memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info));
+
 	/* HT is not supported */
 	if (!sband->ht_info.ht_supported) {
 		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
-		return 0;
+		goto out;
 	}
 
-	memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info));
-	memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info));
-
-	if (enable_ht) {
-		if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE))
+	/* disable HT */
+	if (!enable_ht) {
+		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)
 			changed |= BSS_CHANGED_HT;
+		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
+		conf->ht_conf.ht_supported = 0;
+		goto out;
+	}
 
-		conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
-		ht_conf.ht_supported = 1;
 
-		ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap;
-		ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
-		ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
+	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE))
+		changed |= BSS_CHANGED_HT;
 
-		for (i = 0; i < SUPP_MCS_SET_LEN; i++)
-			ht_conf.supp_mcs_set[i] =
-					sband->ht_info.supp_mcs_set[i] &
-					req_ht_cap->supp_mcs_set[i];
+	conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
+	ht_conf.ht_supported = 1;
 
-		ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
-		ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
-		ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
+	ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap;
+	ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
+	ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
+	ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
+	ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
+	ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
 
-		ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
-		ht_conf.ampdu_density = req_ht_cap->ampdu_density;
+	ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
+	ht_conf.ampdu_density = req_ht_cap->ampdu_density;
 
-		/* if bss configuration changed store the new one */
-		if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) ||
-		    memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) {
-			changed |= BSS_CHANGED_HT;
-			memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf));
-			memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf));
-		}
-	} else {
-		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)
-			changed |= BSS_CHANGED_HT;
-		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
-	}
+	/* Bits 96-100 */
+	tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12];
 
+	/* configure suppoerted Tx MCS according to requested MCS
+	 * (based in most cases on Rx capabilities of peer) and self
+	 * Tx MCS capabilities (as defined by low level driver HW
+	 * Tx capabilities) */
+	if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED))
+		goto check_changed;
+
+	/* Counting from 0 therfore + 1 */
+	if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF)
+		max_tx_streams = ((tx_mcs_set_cap &
+				IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1;
+
+	for (i = 0; i < max_tx_streams; i++)
+		ht_conf.supp_mcs_set[i] =
+			sband->ht_info.supp_mcs_set[i] &
+					req_ht_cap->supp_mcs_set[i];
+
+	if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM)
+		for (i = IEEE80211_SUPP_MCS_SET_UEQM;
+		     i < IEEE80211_SUPP_MCS_SET_LEN; i++)
+			ht_conf.supp_mcs_set[i] =
+				sband->ht_info.supp_mcs_set[i] &
+					req_ht_cap->supp_mcs_set[i];
+
+check_changed:
+	/* if bss configuration changed store the new one */
+	if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) ||
+	    memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) {
+		changed |= BSS_CHANGED_HT;
+		memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf));
+		memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf));
+	}
+out:
 	return changed;
 }
 
-- 
cgit v1.2.3-70-g09d2


From c4680470a34a4f39af3d0a5c40f70befd8701908 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 15 May 2008 12:55:25 +0200
Subject: mac80211: fix bugs in queue handling functions

Commit 55c308c1315bc7267dbb88011c208fd743cdce31
("mac80211: QoS related cleanups") introduced another bug,
the queue handling functions that operate on all queues now
only operated on the first queues, not the A-MPDU queues as
expected. This patch fixes this.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24a465c4df0..9cd07e1031a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -355,7 +355,7 @@ void ieee80211_start_queues(struct ieee80211_hw *hw)
 	struct ieee80211_local *local = hw_to_local(hw);
 	int i;
 
-	for (i = 0; i < local->hw.queues; i++)
+	for (i = 0; i < hw->queues + hw->ampdu_queues; i++)
 		clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]);
 	if (!ieee80211_qdisc_installed(local->mdev))
 		netif_start_queue(local->mdev);
@@ -366,7 +366,7 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw)
 {
 	int i;
 
-	for (i = 0; i < hw->queues; i++)
+	for (i = 0; i < hw->queues + hw->ampdu_queues; i++)
 		ieee80211_stop_queue(hw, i);
 }
 EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -375,7 +375,7 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
 {
 	int i;
 
-	for (i = 0; i < hw->queues; i++)
+	for (i = 0; i < hw->queues + hw->ampdu_queues; i++)
 		ieee80211_wake_queue(hw, i);
 }
 EXPORT_SYMBOL(ieee80211_wake_queues);
-- 
cgit v1.2.3-70-g09d2


From 36d6825b91bc492b65b6333c369cd96a2fc8c903 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 15 May 2008 12:55:26 +0200
Subject: mac80211: let drivers wake but not start queues

Having drivers start queues is just confusing, their ->start()
callback can block and do whatever is necessary, so let mac80211
start queues and have drivers wake queues when necessary (to get
packets flowing again right away.)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  2 +-
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/main.c             |  1 -
 drivers/net/wireless/b43legacy/main.c       |  1 -
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  2 +-
 drivers/net/wireless/iwlwifi/iwl4965-base.c |  2 +-
 drivers/net/wireless/p54/p54common.c        |  3 ---
 drivers/net/wireless/p54/p54pci.c           |  2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  4 ++--
 include/net/mac80211.h                      |  8 --------
 net/mac80211/main.c                         |  8 +++++++-
 net/mac80211/util.c                         | 12 ------------
 12 files changed, 14 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 7af5d8851f6..79dfca546c8 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -2015,7 +2015,7 @@ static int adm8211_resume(struct pci_dev *pdev)
 
 	if (priv->mode != IEEE80211_IF_TYPE_INVALID) {
 		adm8211_start(dev);
-		ieee80211_start_queues(dev);
+		ieee80211_wake_queues(dev);
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index c76ada17878..3f16ad66bdb 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1599,7 +1599,7 @@ ath5k_txq_cleanup(struct ath5k_softc *sc)
 					sc->txqs[i].link);
 			}
 	}
-	ieee80211_start_queues(sc->hw); /* XXX move to callers */
+	ieee80211_wake_queues(sc->hw); /* XXX move to callers */
 
 	for (i = 0; i < ARRAY_SIZE(sc->txqs); i++)
 		if (sc->txqs[i].setup)
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index fc23ba5309b..9445a604a96 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3497,7 +3497,6 @@ static int b43_wireless_core_start(struct b43_wldev *dev)
 	/* Start data flow (TX/RX). */
 	b43_mac_enable(dev);
 	b43_interrupt_enable(dev, dev->irq_savedstate);
-	ieee80211_start_queues(dev->wl->hw);
 
 	/* Start maintainance work */
 	b43_periodic_tasks_setup(dev);
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 7755c59e080..b05a507ed44 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2794,7 +2794,6 @@ static int b43legacy_wireless_core_start(struct b43legacy_wldev *dev)
 	/* Start data flow (TX/RX) */
 	b43legacy_mac_enable(dev);
 	b43legacy_interrupt_enable(dev, dev->irq_savedstate);
-	ieee80211_start_queues(dev->wl->hw);
 
 	/* Start maintenance work */
 	b43legacy_periodic_tasks_setup(dev);
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index b8fb8d8d95f..54cde8a7b5f 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -5823,7 +5823,7 @@ static void iwl3945_alive_start(struct iwl3945_priv *priv)
 	if (iwl3945_is_rfkill(priv))
 		return;
 
-	ieee80211_start_queues(priv->hw);
+	ieee80211_wake_queues(priv->hw);
 
 	priv->active_rate = priv->rates_mask;
 	priv->active_rate_basic = priv->rates_mask & IWL_BASIC_RATES_MASK;
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 48c59cbefb4..db4f606bad5 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -3367,7 +3367,7 @@ static void iwl4965_alive_start(struct iwl_priv *priv)
 	if (iwl_is_rfkill(priv))
 		return;
 
-	ieee80211_start_queues(priv->hw);
+	ieee80211_wake_queues(priv->hw);
 
 	priv->active_rate = priv->rates_mask;
 	priv->active_rate_basic = priv->rates_mask & IWL_BASIC_RATES_MASK;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 9cbef5bce0f..3d35fe6a8f5 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -375,9 +375,6 @@ static void inline p54_wake_free_queues(struct ieee80211_hw *dev)
 	struct p54_common *priv = dev->priv;
 	int i;
 
-	/* ieee80211_start_queues is great if all queues are really empty.
-	 * But, what if some are full? */
-
 	for (i = 0; i < dev->queues; i++)
 		if (priv->tx_stats[i].len < priv->tx_stats[i].limit)
 			ieee80211_wake_queue(dev, i);
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index fa527723fbe..7dd4add4bf4 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -665,7 +665,7 @@ static int p54p_resume(struct pci_dev *pdev)
 
 	if (priv->common.mode != IEEE80211_IF_TYPE_INVALID) {
 		p54p_open(dev);
-		ieee80211_start_queues(dev);
+		ieee80211_wake_queues(dev);
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 171f445962d..d341764e1b2 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -125,7 +125,7 @@ int rt2x00lib_enable_radio(struct rt2x00_dev *rt2x00dev)
 	/*
 	 * Start the TX queues.
 	 */
-	ieee80211_start_queues(rt2x00dev->hw);
+	ieee80211_wake_queues(rt2x00dev->hw);
 
 	return 0;
 }
@@ -1186,7 +1186,7 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev)
 	 * In that case we have disabled the TX queue and should
 	 * now enable it again
 	 */
-	ieee80211_start_queues(rt2x00dev->hw);
+	ieee80211_wake_queues(rt2x00dev->hw);
 
 	/*
 	 * During interface iteration we might have changed the
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 909956c97c4..f00fc76a734 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1566,14 +1566,6 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue);
  */
 void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue);
 
-/**
- * ieee80211_start_queues - start all queues
- * @hw: pointer to as obtained from ieee80211_alloc_hw().
- *
- * Drivers should use this function instead of netif_start_queue.
- */
-void ieee80211_start_queues(struct ieee80211_hw *hw);
-
 /**
  * ieee80211_stop_queues - stop all queues
  * @hw: pointer as obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b0fddb7de54..9761d9bd5a7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -110,7 +110,13 @@ static int ieee80211_master_open(struct net_device *dev)
 			break;
 		}
 	}
-	return res;
+
+	if (res)
+		return res;
+
+	netif_start_queue(local->mdev);
+
+	return 0;
 }
 
 static int ieee80211_master_stop(struct net_device *dev)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9cd07e1031a..800c15aff6e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -350,18 +350,6 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
 }
 EXPORT_SYMBOL(ieee80211_stop_queue);
 
-void ieee80211_start_queues(struct ieee80211_hw *hw)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	int i;
-
-	for (i = 0; i < hw->queues + hw->ampdu_queues; i++)
-		clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]);
-	if (!ieee80211_qdisc_installed(local->mdev))
-		netif_start_queue(local->mdev);
-}
-EXPORT_SYMBOL(ieee80211_start_queues);
-
 void ieee80211_stop_queues(struct ieee80211_hw *hw)
 {
 	int i;
-- 
cgit v1.2.3-70-g09d2


From 2e92e6f2c50b4baf85cca968f0e6f1b5c0df7d39 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 15 May 2008 12:55:27 +0200
Subject: mac80211: use rate index in TX control

This patch modifies struct ieee80211_tx_control to give band
info and the rate index (instead of rate pointers) to drivers.
This mostly serves to reduce the TX control structure size to
make it fit into skb->cb so that the fragmentation code can
put it there and we can think about passing it to drivers that
way in the future.

The rt2x00 driver update was done by Ivo, thanks.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |   6 +-
 drivers/net/wireless/ath5k/base.c           |   8 ++-
 drivers/net/wireless/b43/main.c             |   2 +-
 drivers/net/wireless/b43/xmit.c             |  13 ++--
 drivers/net/wireless/b43legacy/xmit.c       |   9 ++-
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c  |   6 +-
 drivers/net/wireless/iwlwifi/iwl-3945.c     |   7 +-
 drivers/net/wireless/iwlwifi/iwl-4965-rs.c  |  12 ++--
 drivers/net/wireless/iwlwifi/iwl-4965.c     |  10 +--
 drivers/net/wireless/iwlwifi/iwl-tx.c       |   8 ++-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |   4 +-
 drivers/net/wireless/iwlwifi/iwl4965-base.c |   2 +-
 drivers/net/wireless/p54/p54common.c        |   2 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c   |   9 ++-
 drivers/net/wireless/rtl8180_dev.c          |  19 +++--
 drivers/net/wireless/rtl8187_dev.c          |  10 +--
 drivers/net/wireless/zd1211rw/zd_mac.c      |   7 +-
 include/net/mac80211.h                      |  51 ++++++++++----
 net/mac80211/ieee80211_i.h                  |   8 +--
 net/mac80211/mlme.c                         |   6 +-
 net/mac80211/rate.c                         |  12 ++--
 net/mac80211/rate.h                         |  25 +++----
 net/mac80211/rc80211_pid_algo.c             |   6 +-
 net/mac80211/tx.c                           | 104 ++++++++++++++++------------
 net/mac80211/util.c                         |  10 ++-
 25 files changed, 205 insertions(+), 151 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 79dfca546c8..22db664a58d 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1693,10 +1693,10 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	size_t payload_len, hdrlen;
 	int plcp, dur, len, plcp_signal, short_preamble;
 	struct ieee80211_hdr *hdr;
+	struct ieee80211_rate *txrate = ieee80211_get_tx_rate(dev, control);
 
-	short_preamble = !!(control->tx_rate->flags &
-					IEEE80211_TXCTL_SHORT_PREAMBLE);
-	plcp_signal = control->tx_rate->bitrate;
+	short_preamble = !!(txrate->flags & IEEE80211_TXCTL_SHORT_PREAMBLE);
+	plcp_signal = txrate->bitrate;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	fc = le16_to_cpu(hdr->frame_control) & ~IEEE80211_FCTL_PROTECTED;
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 3f16ad66bdb..32ee351a765 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1323,7 +1323,8 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
 
 	ret = ah->ah_setup_tx_desc(ah, ds, pktlen,
 		ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL,
-		(sc->power_level * 2), ctl->tx_rate->hw_value,
+		(sc->power_level * 2),
+		ieee80211_get_tx_rate(sc->hw, ctl)->hw_value,
 		ctl->retry_limit, keyidx, 0, flags, 0, 0);
 	if (ret)
 		goto err_unmap;
@@ -2046,7 +2047,8 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
 	ret = ah->ah_setup_tx_desc(ah, ds, skb->len,
 			ieee80211_get_hdrlen_from_skb(skb),
 			AR5K_PKT_TYPE_BEACON, (sc->power_level * 2),
-			ctl->tx_rate->hw_value, 1, AR5K_TXKEYIX_INVALID,
+			ieee80211_get_tx_rate(sc->hw, ctl)->hw_value,
+			1, AR5K_TXKEYIX_INVALID,
 			antenna, flags, 0, 0);
 	if (ret)
 		goto err_unmap;
@@ -2654,7 +2656,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		memmove(skb->data, skb->data+pad, hdrlen);
 	}
 
-	sc->led_txrate = ctl->tx_rate->hw_value;
+	sc->led_txrate = ieee80211_get_tx_rate(hw, ctl)->hw_value;
 
 	spin_lock_irqsave(&sc->txbuflock, flags);
 	if (list_empty(&sc->txbuf)) {
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 9445a604a96..e428645352b 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1372,7 +1372,7 @@ static void b43_write_beacon_template(struct b43_wldev *dev,
 	bcn = (const struct ieee80211_mgmt *)(dev->wl->current_beacon->data);
 	len = min((size_t) dev->wl->current_beacon->len,
 		  0x200 - sizeof(struct b43_plcp_hdr6));
-	rate = dev->wl->beacon_txctl.tx_rate->hw_value;
+	rate = ieee80211_get_tx_rate(dev->wl->hw, &dev->wl->beacon_txctl)->hw_value;
 
 	b43_write_template_common(dev, (const u8 *)bcn,
 				  len, ram_offset, shm_size_offset, rate);
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index afce9338d83..9b682a3cf5e 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -201,13 +201,14 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	u32 mac_ctl = 0;
 	u16 phy_ctl = 0;
 	u8 extra_ft = 0;
+	struct ieee80211_rate *txrate;
 
 	memset(txhdr, 0, sizeof(*txhdr));
 
-	WARN_ON(!txctl->tx_rate);
-	rate = txctl->tx_rate ? txctl->tx_rate->hw_value : B43_CCK_RATE_1MB;
+	txrate = ieee80211_get_tx_rate(dev->wl->hw, txctl);
+	rate = txrate ? txrate->hw_value : B43_CCK_RATE_1MB;
 	rate_ofdm = b43_is_ofdm_rate(rate);
-	fbrate = txctl->alt_retry_rate ? : txctl->tx_rate;
+	fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, txctl) ? : txrate;
 	rate_fb = fbrate->hw_value;
 	rate_fb_ofdm = b43_is_ofdm_rate(rate_fb);
 
@@ -336,9 +337,11 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		int rts_rate, rts_rate_fb;
 		int rts_rate_ofdm, rts_rate_fb_ofdm;
 		struct b43_plcp_hdr6 *plcp;
+		struct ieee80211_rate *rts_cts_rate;
 
-		WARN_ON(!txctl->rts_cts_rate);
-		rts_rate = txctl->rts_cts_rate ? txctl->rts_cts_rate->hw_value : B43_CCK_RATE_1MB;
+		rts_cts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, txctl);
+
+		rts_rate = rts_cts_rate ? rts_cts_rate->hw_value : B43_CCK_RATE_1MB;
 		rts_rate_ofdm = b43_is_ofdm_rate(rts_rate);
 		rts_rate_fb = b43_calc_fallback_rate(rts_rate);
 		rts_rate_fb_ofdm = b43_is_ofdm_rate(rts_rate_fb);
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index bed9e041d6c..55dc251bf51 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -201,15 +201,18 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	unsigned int plcp_fragment_len;
 	u32 mac_ctl = 0;
 	u16 phy_ctl = 0;
+	struct ieee80211_rate *tx_rate;
 
 	wlhdr = (const struct ieee80211_hdr *)fragment_data;
 	fctl = le16_to_cpu(wlhdr->frame_control);
 
 	memset(txhdr, 0, sizeof(*txhdr));
 
-	rate = txctl->tx_rate->hw_value;
+	tx_rate = ieee80211_get_tx_rate(dev->wl->hw, txctl);
+
+	rate = tx_rate->hw_value;
 	rate_ofdm = b43legacy_is_ofdm_rate(rate);
-	rate_fb = txctl->alt_retry_rate ? : txctl->tx_rate;
+	rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, txctl) ? : tx_rate;
 	rate_fb_ofdm = b43legacy_is_ofdm_rate(rate_fb->hw_value);
 
 	txhdr->mac_frame_ctl = wlhdr->frame_control;
@@ -312,7 +315,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		int rts_rate_ofdm;
 		int rts_rate_fb_ofdm;
 
-		rts_rate = txctl->rts_cts_rate->hw_value;
+		rts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, txctl)->hw_value;
 		rts_rate_ofdm = b43legacy_is_ofdm_rate(rts_rate);
 		rts_rate_fb = b43legacy_calc_fallback_rate(rts_rate);
 		rts_rate_fb_ofdm = b43legacy_is_ofdm_rate(rts_rate_fb);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index e51eeeff699..f3ca02fe961 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -464,7 +464,7 @@ static void rs_tx_status(void *priv_rate,
 
 
 	retries = tx_resp->retry_count;
-	first_index = tx_resp->control.tx_rate->hw_value;
+	first_index = sband->bitrates[tx_resp->control.tx_rate_idx].hw_value;
 	if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) {
 		IWL_DEBUG_RATE("leave: Rate out of bounds: %d\n", first_index);
 		return;
@@ -669,7 +669,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	    is_multicast_ether_addr(hdr->addr1) ||
 	    !sta || !sta->rate_ctrl_priv) {
 		IWL_DEBUG_RATE("leave: No STA priv data to update!\n");
-		sel->rate = rate_lowest(local, sband, sta);
+		sel->rate_idx = rate_lowest_index(local, sband, sta);
 		rcu_read_unlock();
 		return;
 	}
@@ -813,7 +813,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
 	IWL_DEBUG_RATE("leave: %d\n", index);
 
-	sel->rate = &sband->bitrates[sta->txrate_idx];
+	sel->rate_idx = sta->txrate_idx;
 }
 
 static struct rate_control_ops rs_ops = {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index ad4e7b74ca2..f8e691f88ab 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -331,7 +331,9 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 			tx_resp->rate, tx_resp->failure_frame);
 
 	rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate);
-	tx_status->control.tx_rate = &priv->ieee_rates[rate_idx];
+	if (tx_status->control.band == IEEE80211_BAND_5GHZ)
+		rate_idx -= IWL_FIRST_OFDM_RATE;
+	tx_status->control.tx_rate_idx = rate_idx;
 	IWL_DEBUG_TX_REPLY("Tx queue reclaim %d\n", index);
 	iwl3945_tx_queue_reclaim(priv, txq_id, index);
 
@@ -962,7 +964,8 @@ void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv,
 			      struct ieee80211_hdr *hdr, int sta_id, int tx_id)
 {
 	unsigned long flags;
-	u16 rate_index = min(ctrl->tx_rate->hw_value & 0xffff, IWL_RATE_COUNT - 1);
+	u16 hw_value = ieee80211_get_tx_rate(priv->hw, ctrl)->hw_value;
+	u16 rate_index = min(hw_value & 0xffff, IWL_RATE_COUNT - 1);
 	u16 rate_mask;
 	int rate;
 	u8 rts_retry_limit;
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
index 2adc2281c77..7993a1d8302 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
@@ -862,7 +862,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	if (priv->band == IEEE80211_BAND_5GHZ)
 		rs_index -= IWL_FIRST_OFDM_RATE;
 
-	if ((tx_resp->control.tx_rate == NULL) ||
+	if ((tx_resp->control.tx_rate_idx < 0) ||
 	    (tbl_type.is_SGI ^
 		!!(tx_resp->control.flags & IEEE80211_TXCTL_SHORT_GI)) ||
 	    (tbl_type.is_fat ^
@@ -875,7 +875,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	    (!!(tx_rate & RATE_MCS_GF_MSK) ^
 		!!(tx_resp->control.flags & IEEE80211_TXCTL_GREEN_FIELD)) ||
 	    (hw->wiphy->bands[priv->band]->bitrates[rs_index].bitrate !=
-		tx_resp->control.tx_rate->bitrate)) {
+	     hw->wiphy->bands[tx_resp->control.band]->bitrates[tx_resp->control.tx_rate_idx].bitrate)) {
 		IWL_DEBUG_RATE("initial rate does not match 0x%x\n", tx_rate);
 		goto out;
 	}
@@ -2154,7 +2154,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	fc = le16_to_cpu(hdr->frame_control);
 	if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) ||
 	    !sta || !sta->rate_ctrl_priv) {
-		sel->rate = rate_lowest(local, sband, sta);
+		sel->rate_idx = rate_lowest_index(local, sband, sta);
 		goto out;
 	}
 
@@ -2184,11 +2184,13 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
 done:
 	if ((i < 0) || (i > IWL_RATE_COUNT)) {
-		sel->rate = rate_lowest(local, sband, sta);
+		sel->rate_idx = rate_lowest_index(local, sband, sta);
 		goto out;
 	}
 
-	sel->rate = &priv->ieee_rates[i];
+	if (sband->band == IEEE80211_BAND_5GHZ)
+		i -= IWL_FIRST_OFDM_RATE;
+	sel->rate_idx = i;
 out:
 	rcu_read_unlock();
 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index f848a5b0f62..fb670b5cfeb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -373,14 +373,10 @@ void iwl4965_hwrate_to_tx_control(struct iwl_priv *priv, u32 rate_n_flags,
 		control->flags |= IEEE80211_TXCTL_DUP_DATA;
 	if (rate_n_flags & RATE_MCS_SGI_MSK)
 		control->flags |= IEEE80211_TXCTL_SHORT_GI;
-	/* since iwl4965_hwrate_to_plcp_idx is band indifferent, we always use
-	 * IEEE80211_BAND_2GHZ band as it contains all the rates */
 	rate_index = iwl4965_hwrate_to_plcp_idx(rate_n_flags);
-	if (rate_index == -1)
-		control->tx_rate = NULL;
-	else
-		control->tx_rate =
-			&priv->bands[IEEE80211_BAND_2GHZ].bitrates[rate_index];
+	if (control->band == IEEE80211_BAND_5GHZ)
+		rate_index -= IWL_FIRST_OFDM_RATE;
+	control->tx_rate_idx = rate_index;
 }
 
 int iwl4965_hw_rxq_stop(struct iwl_priv *priv)
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index f32cddabdf6..4b5149c8c32 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -578,7 +578,10 @@ static void iwl_tx_cmd_build_rate(struct iwl_priv *priv,
 	u8 data_retry_limit = 0;
 	u8 rate_plcp;
 	u16 rate_flags = 0;
-	int rate_idx = min(ctrl->tx_rate->hw_value & 0xffff, IWL_RATE_COUNT - 1);
+	int rate_idx;
+
+	rate_idx = min(ieee80211_get_tx_rate(priv->hw, ctrl)->hw_value & 0xffff,
+			IWL_RATE_COUNT - 1);
 
 	rate_plcp = iwl_rates[rate_idx].plcp;
 
@@ -723,7 +726,8 @@ int iwl_tx_skb(struct iwl_priv *priv,
 		goto drop_unlock;
 	}
 
-	if ((ctl->tx_rate->hw_value & 0xFF) == IWL_INVALID_RATE) {
+	if ((ieee80211_get_tx_rate(priv->hw, ctl)->hw_value & 0xFF) ==
+	     IWL_INVALID_RATE) {
 		IWL_ERROR("ERROR: No TX rate available.\n");
 		goto drop_unlock;
 	}
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 54cde8a7b5f..a28b4c9f652 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2581,7 +2581,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 		goto drop_unlock;
 	}
 
-	if ((ctl->tx_rate->hw_value & 0xFF) == IWL_INVALID_RATE) {
+	if ((ieee80211_get_tx_rate(priv->hw, ctl)->hw_value & 0xFF) == IWL_INVALID_RATE) {
 		IWL_ERROR("ERROR: No TX rate available.\n");
 		goto drop_unlock;
 	}
@@ -6694,7 +6694,7 @@ static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
-		     ctl->tx_rate->bitrate);
+		     ieee80211_get_tx_rate(hw, ctl)->bitrate);
 
 	if (iwl3945_tx_skb(priv, skb, ctl))
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index db4f606bad5..1fad6227aa5 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -4281,7 +4281,7 @@ static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
-		     ctl->tx_rate->bitrate);
+		     ieee80211_get_tx_rate(hw, ctl)->bitrate);
 
 	if (iwl_tx_skb(priv, skb, ctl))
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 3d35fe6a8f5..3ca9386561f 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -592,7 +592,7 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	txhdr->padding2 = 0;
 
 	/* TODO: add support for alternate retry TX rates */
-	rate = control->tx_rate->hw_value;
+	rate = ieee80211_get_tx_rate(dev, control)->hw_value;
 	if (control->flags & IEEE80211_TXCTL_SHORT_PREAMBLE)
 		rate |= 0x10;
 	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 19c10629c76..5cf4c2f5926 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -33,8 +33,10 @@ void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 				      struct txentry_desc *txdesc,
 				      struct ieee80211_tx_control *control)
 {
+	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
-	struct ieee80211_rate *rate = control->tx_rate;
+	struct ieee80211_rate *rate =
+	    ieee80211_get_tx_rate(rt2x00dev->hw, control);
 	const struct rt2x00_rate *hwrate;
 	unsigned int data_length;
 	unsigned int duration;
@@ -77,8 +79,9 @@ void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 			__set_bit(ENTRY_TXD_CTS_FRAME, &txdesc->flags);
 			__clear_bit(ENTRY_TXD_ACK, &txdesc->flags);
 		}
-		if (control->rts_cts_rate)
-			rate = control->rts_cts_rate;
+		if (control->rts_cts_rate_idx >= 0)
+			rate =
+			    ieee80211_get_rts_cts_rate(rt2x00dev->hw, control);
 	}
 
 	/*
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index c220998cee6..6263209b889 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -257,24 +257,21 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	mapping = pci_map_single(priv->pdev, skb->data,
 				 skb->len, PCI_DMA_TODEVICE);
 
-	BUG_ON(!control->tx_rate);
-
 	tx_flags = RTL8180_TX_DESC_FLAG_OWN | RTL8180_TX_DESC_FLAG_FS |
 		   RTL8180_TX_DESC_FLAG_LS |
-		   (control->tx_rate->hw_value << 24) | skb->len;
+		   (ieee80211_get_tx_rate(dev, control)->hw_value << 24) |
+		   skb->len;
 
 	if (priv->r8185)
 		tx_flags |= RTL8180_TX_DESC_FLAG_DMA |
 			    RTL8180_TX_DESC_FLAG_NO_ENC;
 
 	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
-		BUG_ON(!control->rts_cts_rate);
 		tx_flags |= RTL8180_TX_DESC_FLAG_RTS;
-		tx_flags |= control->rts_cts_rate->hw_value << 19;
+		tx_flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
 	} else if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
-		BUG_ON(!control->rts_cts_rate);
 		tx_flags |= RTL8180_TX_DESC_FLAG_CTS;
-		tx_flags |= control->rts_cts_rate->hw_value << 19;
+		tx_flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
 	}
 
 	*((struct ieee80211_tx_control **) skb->cb) =
@@ -288,9 +285,9 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 		unsigned int remainder;
 
 		plcp_len = DIV_ROUND_UP(16 * (skb->len + 4),
-					(control->tx_rate->bitrate * 2) / 10);
+				(ieee80211_get_tx_rate(dev, control)->bitrate * 2) / 10);
 		remainder = (16 * (skb->len + 4)) %
-			    ((control->tx_rate->bitrate * 2) / 10);
+			    ((ieee80211_get_tx_rate(dev, control)->bitrate * 2) / 10);
 		if (remainder > 0 && remainder <= 6)
 			plcp_len |= 1 << 15;
 	}
@@ -303,8 +300,8 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	entry->plcp_len = cpu_to_le16(plcp_len);
 	entry->tx_buf = cpu_to_le32(mapping);
 	entry->frame_len = cpu_to_le32(skb->len);
-	entry->flags2 = control->alt_retry_rate != NULL ?
-			control->alt_retry_rate->bitrate << 4 : 0;
+	entry->flags2 = control->alt_retry_rate_idx >= 0 ?
+		ieee80211_get_alt_retry_rate(dev, control)->bitrate << 4 : 0;
 	entry->retry_limit = control->retry_limit;
 	entry->flags = cpu_to_le32(tx_flags);
 	__skb_queue_tail(&ring->queue, skb);
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index e14c8424868..86a09b49681 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -179,21 +179,17 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	flags = skb->len;
 	flags |= RTL8187_TX_FLAG_NO_ENCRYPT;
 
-	BUG_ON(!control->tx_rate);
-
-	flags |= control->tx_rate->hw_value << 24;
+	flags |= ieee80211_get_tx_rate(dev, control)->hw_value << 24;
 	if (ieee80211_get_morefrag((struct ieee80211_hdr *)skb->data))
 		flags |= RTL8187_TX_FLAG_MORE_FRAG;
 	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
-		BUG_ON(!control->rts_cts_rate);
 		flags |= RTL8187_TX_FLAG_RTS;
-		flags |= control->rts_cts_rate->hw_value << 19;
+		flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
 		rts_dur = ieee80211_rts_duration(dev, priv->vif,
 						 skb->len, control);
 	} else if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
-		BUG_ON(!control->rts_cts_rate);
 		flags |= RTL8187_TX_FLAG_CTS;
-		flags |= control->rts_cts_rate->hw_value << 19;
+		flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
 	}
 
 	hdr = (struct rtl8187_tx_hdr *)skb_push(skb, sizeof(*hdr));
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 0c736735e21..99c508c09e5 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -523,14 +523,17 @@ static int fill_ctrlset(struct zd_mac *mac,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	unsigned int frag_len = skb->len + FCS_LEN;
 	unsigned int packet_length;
+	struct ieee80211_rate *txrate;
 	struct zd_ctrlset *cs = (struct zd_ctrlset *)
 		skb_push(skb, sizeof(struct zd_ctrlset));
 
 	ZD_ASSERT(frag_len <= 0xffff);
 
-	cs->modulation = control->tx_rate->hw_value;
+	txrate = ieee80211_get_tx_rate(mac->hw, control);
+
+	cs->modulation = txrate->hw_value;
 	if (control->flags & IEEE80211_TXCTL_SHORT_PREAMBLE)
-		cs->modulation = control->tx_rate->hw_value_short;
+		cs->modulation = txrate->hw_value_short;
 
 	cs->tx_length = cpu_to_le16(frag_len);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f00fc76a734..0df91bea6c1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -266,27 +266,26 @@ enum mac80211_tx_control_flags {
  * ieee80211_ops->remove_interface() callback funtion.
  * The hw_key pointer is valid until it has been removed with the
  * ieee80211_ops->set_key() callback function.
- * The tx_rate and alt_retry_rate pointers are valid until the phy is
- * deregistered.
  */
 struct ieee80211_tx_control {
-	struct ieee80211_vif *vif;
-	struct ieee80211_rate *tx_rate;
+	u32 flags;		/* tx control flags defined above */
+
+	s8 tx_rate_idx,		/* Transmit rate (indexes registered rates) */
+	   rts_cts_rate_idx,	/* Transmit rate for RTS/CTS frame */
+	   alt_retry_rate_idx;	/* retry rate for the last retries */
 
-	/* Transmit rate for RTS/CTS frame */
-	struct ieee80211_rate *rts_cts_rate;
+	s8 retry_limit;		/* 1 = only first attempt, 2 = one retry, ..
+				 * This could be used when set_retry_limit
+				 * is not implemented by the driver */
 
-	/* retry rate for the last retries */
-	struct ieee80211_rate *alt_retry_rate;
+	struct ieee80211_vif *vif;
 
 	/* Key used for hardware encryption
 	 * NULL if IEEE80211_TXCTL_DO_NOT_ENCRYPT is set */
 	struct ieee80211_key_conf *hw_key;
 
-	u32 flags;		/* tx control flags defined above */
-	u8 retry_limit;		/* 1 = only first attempt, 2 = one retry, ..
-				 * This could be used when set_retry_limit
-				 * is not implemented by the driver */
+	enum ieee80211_band band;
+
 	u8 antenna_sel_tx; 	/* 0 = default/diversity, otherwise bit
 				 * position represents antenna number used */
 	u8 icv_len;		/* length of the ICV/MIC field in octets */
@@ -298,6 +297,7 @@ struct ieee80211_tx_control {
 };
 
 
+
 /**
  * enum mac80211_rx_flags - receive flags
  *
@@ -823,6 +823,33 @@ static inline void SET_IEEE80211_PERM_ADDR(struct ieee80211_hw *hw, u8 *addr)
 	memcpy(hw->wiphy->perm_addr, addr, ETH_ALEN);
 }
 
+static inline struct ieee80211_rate *
+ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
+		      const struct ieee80211_tx_control *c)
+{
+	if (WARN_ON(c->tx_rate_idx < 0))
+		return NULL;
+	return &hw->wiphy->bands[c->band]->bitrates[c->tx_rate_idx];
+}
+
+static inline struct ieee80211_rate *
+ieee80211_get_rts_cts_rate(const struct ieee80211_hw *hw,
+			   const struct ieee80211_tx_control *c)
+{
+	if (c->rts_cts_rate_idx < 0)
+		return NULL;
+	return &hw->wiphy->bands[c->band]->bitrates[c->rts_cts_rate_idx];
+}
+
+static inline struct ieee80211_rate *
+ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
+			     const struct ieee80211_tx_control *c)
+{
+	if (c->alt_retry_rate_idx < 0)
+		return NULL;
+	return &hw->wiphy->bands[c->band]->bitrates[c->alt_retry_rate_idx];
+}
+
 /**
  * DOC: Hardware crypto acceleration
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ed0d9b35ae6..a4cccd1b7d5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -159,11 +159,11 @@ struct ieee80211_tx_data {
 
 	struct ieee80211_tx_control *control;
 	struct ieee80211_channel *channel;
-	struct ieee80211_rate *rate;
+	s8 rate_idx;
 	/* use this rate (if set) for last fragment; rate can
 	 * be set to lower rate for the first fragments, e.g.,
 	 * when using CTS protection with IEEE 802.11g. */
-	struct ieee80211_rate *last_frag_rate;
+	s8 last_frag_rate_idx;
 
 	/* Extra fragments (in addition to the first fragment
 	 * in skb) */
@@ -225,9 +225,9 @@ struct ieee80211_tx_stored_packet {
 	struct ieee80211_tx_control control;
 	struct sk_buff *skb;
 	struct sk_buff **extra_frag;
-	struct ieee80211_rate *last_frag_rate;
+	s8 last_frag_rate_idx;
 	int num_extra_frag;
-	unsigned int last_frag_rate_ctrl_probe;
+	bool last_frag_rate_ctrl_probe;
 };
 
 struct beacon_data {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7877d3b3f4c..604149369dc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2406,15 +2406,15 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 
 		memset(&control, 0, sizeof(control));
 		rate_control_get_rate(dev, sband, skb, &ratesel);
-		if (!ratesel.rate) {
+		if (ratesel.rate_idx < 0) {
 			printk(KERN_DEBUG "%s: Failed to determine TX rate "
 			       "for IBSS beacon\n", dev->name);
 			break;
 		}
 		control.vif = &sdata->vif;
-		control.tx_rate = ratesel.rate;
+		control.tx_rate_idx = ratesel.rate_idx;
 		if (sdata->bss_conf.use_short_preamble &&
-		    ratesel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
+		    sband->bitrates[ratesel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
 			control.flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
 		control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control.flags |= IEEE80211_TXCTL_NO_ACK;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 841df93807f..0388c090dfe 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -176,20 +176,24 @@ void rate_control_get_rate(struct net_device *dev,
 	rcu_read_lock();
 	sta = sta_info_get(local, hdr->addr1);
 
-	memset(sel, 0, sizeof(struct rate_selection));
+	sel->rate_idx = -1;
+	sel->nonerp_idx = -1;
+	sel->probe_idx = -1;
 
 	ref->ops->get_rate(ref->priv, dev, sband, skb, sel);
 
+	BUG_ON(sel->rate_idx < 0);
+
 	/* Select a non-ERP backup rate. */
-	if (!sel->nonerp) {
+	if (sel->nonerp_idx < 0) {
 		for (i = 0; i < sband->n_bitrates; i++) {
 			struct ieee80211_rate *rate = &sband->bitrates[i];
-			if (sel->rate->bitrate < rate->bitrate)
+			if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate)
 				break;
 
 			if (rate_supported(sta, sband->band, i) &&
 			    !(rate->flags & IEEE80211_RATE_ERP_G))
-				sel->nonerp = rate;
+				sel->nonerp_idx = i;
 		}
 	}
 
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5b45f33cb76..a29148dcca9 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -19,14 +19,15 @@
 #include "ieee80211_i.h"
 #include "sta_info.h"
 
-/* TODO: kdoc */
+/**
+ * struct rate_selection - rate selection for rate control algos
+ * @rate: selected transmission rate index
+ * @nonerp: Non-ERP rate to use instead if ERP cannot be used
+ * @probe: rate for probing (or -1)
+ *
+ */
 struct rate_selection {
-	/* Selected transmission rate */
-	struct ieee80211_rate *rate;
-	/* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */
-	struct ieee80211_rate *nonerp;
-	/* probe with this rate, or NULL for no probing */
-	struct ieee80211_rate *probe;
+	s8 rate_idx, nonerp_idx, probe_idx;
 };
 
 struct rate_control_ops {
@@ -138,7 +139,7 @@ static inline int rate_supported(struct sta_info *sta,
 	return (sta == NULL || sta->supp_rates[band] & BIT(index));
 }
 
-static inline int
+static inline s8
 rate_lowest_index(struct ieee80211_local *local,
 		  struct ieee80211_supported_band *sband,
 		  struct sta_info *sta)
@@ -155,14 +156,6 @@ rate_lowest_index(struct ieee80211_local *local,
 	return 0;
 }
 
-static inline struct ieee80211_rate *
-rate_lowest(struct ieee80211_local *local,
-	    struct ieee80211_supported_band *sband,
-	    struct sta_info *sta)
-{
-	return &sband->bitrates[rate_lowest_index(local, sband, sta)];
-}
-
 
 /* functions for rate control related to a device */
 int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index a849b745bdb..14cde36f507 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -266,7 +266,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
-	if (status->control.tx_rate != &sband->bitrates[sta->txrate_idx])
+	if (status->control.tx_rate_idx != sta->txrate_idx)
 		goto unlock;
 
 	spinfo = sta->rate_ctrl_priv;
@@ -330,7 +330,7 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 	fc = le16_to_cpu(hdr->frame_control);
 	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
 	    is_multicast_ether_addr(hdr->addr1) || !sta) {
-		sel->rate = rate_lowest(local, sband, sta);
+		sel->rate_idx = rate_lowest_index(local, sband, sta);
 		rcu_read_unlock();
 		return;
 	}
@@ -349,7 +349,7 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 
 	rcu_read_unlock();
 
-	sel->rate = &sband->bitrates[rateidx];
+	sel->rate_idx = rateidx;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	rate_control_pid_event_tx_rate(
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index aecec2a72b0..99c3860bc0e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -91,11 +91,12 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 			      int next_frag_len)
 {
 	int rate, mrate, erp, dur, i;
-	struct ieee80211_rate *txrate = tx->rate;
+	struct ieee80211_rate *txrate;
 	struct ieee80211_local *local = tx->local;
 	struct ieee80211_supported_band *sband;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	sband = local->hw.wiphy->bands[tx->channel->band];
+	txrate = &sband->bitrates[tx->rate_idx];
 
 	erp = 0;
 	if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
@@ -610,40 +611,40 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	struct rate_selection rsel;
 	struct ieee80211_supported_band *sband;
 
-	sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band];
+	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	if (likely(!tx->rate)) {
+	if (likely(tx->rate_idx < 0)) {
 		rate_control_get_rate(tx->dev, sband, tx->skb, &rsel);
-		tx->rate = rsel.rate;
-		if (unlikely(rsel.probe)) {
+		tx->rate_idx = rsel.rate_idx;
+		if (unlikely(rsel.probe_idx >= 0)) {
 			tx->control->flags |=
 				IEEE80211_TXCTL_RATE_CTRL_PROBE;
 			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-			tx->control->alt_retry_rate = tx->rate;
-			tx->rate = rsel.probe;
+			tx->control->alt_retry_rate_idx = tx->rate_idx;
+			tx->rate_idx = rsel.probe_idx;
 		} else
-			tx->control->alt_retry_rate = NULL;
+			tx->control->alt_retry_rate_idx = -1;
 
-		if (!tx->rate)
+		if (unlikely(tx->rate_idx < 0))
 			return TX_DROP;
 	} else
-		tx->control->alt_retry_rate = NULL;
+		tx->control->alt_retry_rate_idx = -1;
 
 	if (tx->sdata->bss_conf.use_cts_prot &&
-	    (tx->flags & IEEE80211_TX_FRAGMENTED) && rsel.nonerp) {
-		tx->last_frag_rate = tx->rate;
-		if (rsel.probe)
+	    (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) {
+		tx->last_frag_rate_idx = tx->rate_idx;
+		if (rsel.probe_idx >= 0)
 			tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG;
 		else
 			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-		tx->rate = rsel.nonerp;
-		tx->control->tx_rate = rsel.nonerp;
+		tx->rate_idx = rsel.nonerp_idx;
+		tx->control->tx_rate_idx = rsel.nonerp_idx;
 		tx->control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
 	} else {
-		tx->last_frag_rate = tx->rate;
-		tx->control->tx_rate = tx->rate;
+		tx->last_frag_rate_idx = tx->rate_idx;
+		tx->control->tx_rate_idx = tx->rate_idx;
 	}
-	tx->control->tx_rate = tx->rate;
+	tx->control->tx_rate_idx = tx->rate_idx;
 
 	return TX_CONTINUE;
 }
@@ -655,6 +656,9 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	u16 fc = le16_to_cpu(hdr->frame_control);
 	u16 dur;
 	struct ieee80211_tx_control *control = tx->control;
+	struct ieee80211_supported_band *sband;
+
+	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	if (!control->retry_limit) {
 		if (!is_multicast_ether_addr(hdr->addr1)) {
@@ -681,14 +685,14 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		 * frames.
 		 * TODO: The last fragment could still use multiple retry
 		 * rates. */
-		control->alt_retry_rate = NULL;
+		control->alt_retry_rate_idx = -1;
 	}
 
 	/* Use CTS protection for unicast frames sent using extended rates if
 	 * there are associated non-ERP stations and RTS/CTS is not configured
 	 * for the frame. */
 	if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) &&
-	    (tx->rate->flags & IEEE80211_RATE_ERP_G) &&
+	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) &&
 	    (tx->flags & IEEE80211_TX_UNICAST) &&
 	    tx->sdata->bss_conf.use_cts_prot &&
 	    !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
@@ -698,7 +702,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	 * short preambles at the selected rate and short preambles are
 	 * available on the network at the current point in time. */
 	if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
-	    (tx->rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
+	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
 	    tx->sdata->bss_conf.use_short_preamble &&
 	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
 		tx->control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
@@ -715,32 +719,32 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) ||
 	    (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) {
 		struct ieee80211_supported_band *sband;
-		struct ieee80211_rate *rate, *baserate;
+		struct ieee80211_rate *rate;
+		s8 baserate = -1;
 		int idx;
 
-		sband = tx->local->hw.wiphy->bands[
-				tx->local->hw.conf.channel->band];
+		sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 		/* Do not use multiple retry rates when using RTS/CTS */
-		control->alt_retry_rate = NULL;
+		control->alt_retry_rate_idx = -1;
 
 		/* Use min(data rate, max base rate) as CTS/RTS rate */
-		rate = tx->rate;
-		baserate = NULL;
+		rate = &sband->bitrates[tx->rate_idx];
 
 		for (idx = 0; idx < sband->n_bitrates; idx++) {
 			if (sband->bitrates[idx].bitrate > rate->bitrate)
 				continue;
 			if (tx->sdata->basic_rates & BIT(idx) &&
-			    (!baserate ||
-			     (baserate->bitrate < sband->bitrates[idx].bitrate)))
-				baserate = &sband->bitrates[idx];
+			    (baserate < 0 ||
+			     (sband->bitrates[baserate].bitrate
+			      < sband->bitrates[idx].bitrate)))
+				baserate = idx;
 		}
 
-		if (baserate)
-			control->rts_cts_rate = baserate;
+		if (baserate >= 0)
+			control->rts_cts_rate_idx = baserate;
 		else
-			control->rts_cts_rate = &sband->bitrates[0];
+			control->rts_cts_rate_idx = 0;
 	}
 
 	if (tx->sta) {
@@ -768,7 +772,11 @@ ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx)
 	struct sk_buff *skb = tx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u32 load = 0, hdrtime;
-	struct ieee80211_rate *rate = tx->rate;
+	struct ieee80211_rate *rate;
+	struct ieee80211_supported_band *sband;
+
+	sband = tx->local->hw.wiphy->bands[tx->channel->band];
+	rate = &sband->bitrates[tx->rate_idx];
 
 	/* TODO: this could be part of tx_status handling, so that the number
 	 * of retries would be known; TX rate should in that case be stored
@@ -803,7 +811,7 @@ ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx)
 		for (i = 0; i < tx->num_extra_frag; i++) {
 			load += 2 * hdrtime;
 			load += tx->extra_frag[i]->len *
-				tx->rate->bitrate;
+				rate->bitrate;
 		}
 	}
 
@@ -859,7 +867,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len);
 	struct ieee80211_tx_control *control = tx->control;
 
-	sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band];
+	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
 	tx->flags |= IEEE80211_TX_INJECTED;
@@ -899,7 +907,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				r = &sband->bitrates[i];
 
 				if (r->bitrate == target_rate) {
-					tx->rate = r;
+					tx->rate_idx = i;
 					break;
 				}
 			}
@@ -1097,7 +1105,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 			if (__ieee80211_queue_stopped(local, control->queue))
 				return IEEE80211_TX_FRAG_AGAIN;
 			if (i == tx->num_extra_frag) {
-				control->tx_rate = tx->last_frag_rate;
+				control->tx_rate_idx = tx->last_frag_rate_idx;
 
 				if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG)
 					control->flags |=
@@ -1155,6 +1163,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 
 	sta = tx.sta;
 	tx.channel = local->hw.conf.channel;
+	control->band = tx.channel->band;
 
 	for (handler = ieee80211_tx_handlers; *handler != NULL;
 	     handler++) {
@@ -1187,7 +1196,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 				next_len = tx.extra_frag[i + 1]->len;
 			} else {
 				next_len = 0;
-				tx.rate = tx.last_frag_rate;
+				tx.rate_idx = tx.last_frag_rate_idx;
 			}
 			dur = ieee80211_duration(&tx, 0, next_len);
 			hdr->duration_id = cpu_to_le16(dur);
@@ -1224,7 +1233,7 @@ retry:
 		store->skb = skb;
 		store->extra_frag = tx.extra_frag;
 		store->num_extra_frag = tx.num_extra_frag;
-		store->last_frag_rate = tx.last_frag_rate;
+		store->last_frag_rate_idx = tx.last_frag_rate_idx;
 		store->last_frag_rate_ctrl_probe =
 			!!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG);
 	}
@@ -1685,7 +1694,7 @@ void ieee80211_tx_pending(unsigned long data)
 		tx.control = &store->control;
 		tx.extra_frag = store->extra_frag;
 		tx.num_extra_frag = store->num_extra_frag;
-		tx.last_frag_rate = store->last_frag_rate;
+		tx.last_frag_rate_idx = store->last_frag_rate_idx;
 		tx.flags = 0;
 		if (store->last_frag_rate_ctrl_probe)
 			tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG;
@@ -1789,9 +1798,10 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	struct ieee80211_mgmt *mgmt;
 	int *num_beacons;
 	bool err = true;
+	enum ieee80211_band band = local->hw.conf.channel->band;
 	u8 *pos;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	sband = local->hw.wiphy->bands[band];
 
 	rcu_read_lock();
 
@@ -1885,8 +1895,9 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	}
 
 	if (control) {
+		control->band = band;
 		rate_control_get_rate(local->mdev, sband, skb, &rsel);
-		if (!rsel.rate) {
+		if (unlikely(rsel.rate_idx < 0)) {
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
 				       "no rate found\n",
@@ -1898,9 +1909,9 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		}
 
 		control->vif = vif;
-		control->tx_rate = rsel.rate;
+		control->tx_rate_idx = rsel.rate_idx;
 		if (sdata->bss_conf.use_short_preamble &&
-		    rsel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
+		    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
 			control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
 		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control->flags |= IEEE80211_TXCTL_NO_ACK;
@@ -2006,6 +2017,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	sta = tx.sta;
 	tx.flags |= IEEE80211_TX_PS_BUFFERED;
 	tx.channel = local->hw.conf.channel;
+	control->band = tx.channel->band;
 
 	for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) {
 		res = (*handler)(&tx);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 800c15aff6e..65a34fddeb0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -266,10 +266,13 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 	bool short_preamble;
 	int erp;
 	u16 dur;
+	struct ieee80211_supported_band *sband;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
 	short_preamble = sdata->bss_conf.use_short_preamble;
 
-	rate = frame_txctl->rts_cts_rate;
+	rate = &sband->bitrates[frame_txctl->rts_cts_rate_idx];
 
 	erp = 0;
 	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
@@ -300,10 +303,13 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 	bool short_preamble;
 	int erp;
 	u16 dur;
+	struct ieee80211_supported_band *sband;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
 	short_preamble = sdata->bss_conf.use_short_preamble;
 
-	rate = frame_txctl->rts_cts_rate;
+	rate = &sband->bitrates[frame_txctl->rts_cts_rate_idx];
 	erp = 0;
 	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
 		erp = rate->flags & IEEE80211_RATE_ERP_G;
-- 
cgit v1.2.3-70-g09d2


From e24549485f859be6518929bb1c9c0257d79f033d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 15 May 2008 12:55:28 +0200
Subject: mac80211: reorder some transmit handlers

The next patch will require that transmit handlers that are after
fragmentation are aware of the fact that the control info is also
fragmented. To make that easier, this patch moves a number of
transmit handlers before fragmentation.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 234 +++++++++++++++++++++++++++---------------------------
 1 file changed, 118 insertions(+), 116 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 99c3860bc0e..666158f02a8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -505,106 +505,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
-ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-	size_t hdrlen, per_fragm, num_fragm, payload_len, left;
-	struct sk_buff **frags, *first, *frag;
-	int i;
-	u16 seq;
-	u8 *pos;
-	int frag_threshold = tx->local->fragmentation_threshold;
-
-	if (!(tx->flags & IEEE80211_TX_FRAGMENTED))
-		return TX_CONTINUE;
-
-	first = tx->skb;
-
-	hdrlen = ieee80211_get_hdrlen(tx->fc);
-	payload_len = first->len - hdrlen;
-	per_fragm = frag_threshold - hdrlen - FCS_LEN;
-	num_fragm = DIV_ROUND_UP(payload_len, per_fragm);
-
-	frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC);
-	if (!frags)
-		goto fail;
-
-	hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
-	seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ;
-	pos = first->data + hdrlen + per_fragm;
-	left = payload_len - per_fragm;
-	for (i = 0; i < num_fragm - 1; i++) {
-		struct ieee80211_hdr *fhdr;
-		size_t copylen;
-
-		if (left <= 0)
-			goto fail;
-
-		/* reserve enough extra head and tail room for possible
-		 * encryption */
-		frag = frags[i] =
-			dev_alloc_skb(tx->local->tx_headroom +
-				      frag_threshold +
-				      IEEE80211_ENCRYPT_HEADROOM +
-				      IEEE80211_ENCRYPT_TAILROOM);
-		if (!frag)
-			goto fail;
-		/* Make sure that all fragments use the same priority so
-		 * that they end up using the same TX queue */
-		frag->priority = first->priority;
-		skb_reserve(frag, tx->local->tx_headroom +
-				  IEEE80211_ENCRYPT_HEADROOM);
-		fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen);
-		memcpy(fhdr, first->data, hdrlen);
-		if (i == num_fragm - 2)
-			fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS);
-		fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
-		copylen = left > per_fragm ? per_fragm : left;
-		memcpy(skb_put(frag, copylen), pos, copylen);
-
-		pos += copylen;
-		left -= copylen;
-	}
-	skb_trim(first, hdrlen + per_fragm);
-
-	tx->num_extra_frag = num_fragm - 1;
-	tx->extra_frag = frags;
-
-	return TX_CONTINUE;
-
- fail:
-	printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name);
-	if (frags) {
-		for (i = 0; i < num_fragm - 1; i++)
-			if (frags[i])
-				dev_kfree_skb(frags[i]);
-		kfree(frags);
-	}
-	I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment);
-	return TX_DROP;
-}
-
-static ieee80211_tx_result
-ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
-{
-	if (!tx->key)
-		return TX_CONTINUE;
-
-	switch (tx->key->conf.alg) {
-	case ALG_WEP:
-		return ieee80211_crypto_wep_encrypt(tx);
-	case ALG_TKIP:
-		return ieee80211_crypto_tkip_encrypt(tx);
-	case ALG_CCMP:
-		return ieee80211_crypto_ccmp_encrypt(tx);
-	}
-
-	/* not reached */
-	WARN_ON(1);
-	return TX_DROP;
-}
-
 static ieee80211_tx_result
 ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
@@ -747,26 +647,114 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 			control->rts_cts_rate_idx = 0;
 	}
 
-	if (tx->sta) {
+	if (tx->sta)
 		control->aid = tx->sta->aid;
-		tx->sta->tx_packets++;
-		tx->sta->tx_fragments++;
-		tx->sta->tx_bytes += tx->skb->len;
-		if (tx->extra_frag) {
-			int i;
-			tx->sta->tx_fragments += tx->num_extra_frag;
-			for (i = 0; i < tx->num_extra_frag; i++) {
-				tx->sta->tx_bytes +=
-					tx->extra_frag[i]->len;
-			}
-		}
+
+	return TX_CONTINUE;
+}
+
+static ieee80211_tx_result
+ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+	size_t hdrlen, per_fragm, num_fragm, payload_len, left;
+	struct sk_buff **frags, *first, *frag;
+	int i;
+	u16 seq;
+	u8 *pos;
+	int frag_threshold = tx->local->fragmentation_threshold;
+
+	if (!(tx->flags & IEEE80211_TX_FRAGMENTED))
+		return TX_CONTINUE;
+
+	first = tx->skb;
+
+	hdrlen = ieee80211_get_hdrlen(tx->fc);
+	payload_len = first->len - hdrlen;
+	per_fragm = frag_threshold - hdrlen - FCS_LEN;
+	num_fragm = DIV_ROUND_UP(payload_len, per_fragm);
+
+	frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC);
+	if (!frags)
+		goto fail;
+
+	hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
+	seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ;
+	pos = first->data + hdrlen + per_fragm;
+	left = payload_len - per_fragm;
+	for (i = 0; i < num_fragm - 1; i++) {
+		struct ieee80211_hdr *fhdr;
+		size_t copylen;
+
+		if (left <= 0)
+			goto fail;
+
+		/* reserve enough extra head and tail room for possible
+		 * encryption */
+		frag = frags[i] =
+			dev_alloc_skb(tx->local->tx_headroom +
+				      frag_threshold +
+				      IEEE80211_ENCRYPT_HEADROOM +
+				      IEEE80211_ENCRYPT_TAILROOM);
+		if (!frag)
+			goto fail;
+		/* Make sure that all fragments use the same priority so
+		 * that they end up using the same TX queue */
+		frag->priority = first->priority;
+		skb_reserve(frag, tx->local->tx_headroom +
+				  IEEE80211_ENCRYPT_HEADROOM);
+		fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen);
+		memcpy(fhdr, first->data, hdrlen);
+		if (i == num_fragm - 2)
+			fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS);
+		fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
+		copylen = left > per_fragm ? per_fragm : left;
+		memcpy(skb_put(frag, copylen), pos, copylen);
+
+		pos += copylen;
+		left -= copylen;
 	}
+	skb_trim(first, hdrlen + per_fragm);
+
+	tx->num_extra_frag = num_fragm - 1;
+	tx->extra_frag = frags;
 
 	return TX_CONTINUE;
+
+ fail:
+	printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name);
+	if (frags) {
+		for (i = 0; i < num_fragm - 1; i++)
+			if (frags[i])
+				dev_kfree_skb(frags[i]);
+		kfree(frags);
+	}
+	I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment);
+	return TX_DROP;
 }
 
 static ieee80211_tx_result
-ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx)
+ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
+{
+	if (!tx->key)
+		return TX_CONTINUE;
+
+	switch (tx->key->conf.alg) {
+	case ALG_WEP:
+		return ieee80211_crypto_wep_encrypt(tx);
+	case ALG_TKIP:
+		return ieee80211_crypto_tkip_encrypt(tx);
+	case ALG_CCMP:
+		return ieee80211_crypto_ccmp_encrypt(tx);
+	}
+
+	/* not reached */
+	WARN_ON(1);
+	return TX_DROP;
+}
+
+static ieee80211_tx_result
+ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_local *local = tx->local;
 	struct sk_buff *skb = tx->skb;
@@ -822,6 +810,20 @@ ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx)
 		tx->sta->channel_use_raw += load;
 	tx->sdata->channel_use_raw += load;
 
+	if (tx->sta) {
+		tx->sta->tx_packets++;
+		tx->sta->tx_fragments++;
+		tx->sta->tx_bytes += tx->skb->len;
+		if (tx->extra_frag) {
+			int i;
+			tx->sta->tx_fragments += tx->num_extra_frag;
+			for (i = 0; i < tx->num_extra_frag; i++) {
+				tx->sta->tx_bytes +=
+					tx->extra_frag[i]->len;
+			}
+		}
+	}
+
 	return TX_CONTINUE;
 }
 
@@ -834,11 +836,11 @@ static ieee80211_tx_handler ieee80211_tx_handlers[] =
 	ieee80211_tx_h_ps_buf,
 	ieee80211_tx_h_select_key,
 	ieee80211_tx_h_michael_mic_add,
-	ieee80211_tx_h_fragment,
-	ieee80211_tx_h_encrypt,
 	ieee80211_tx_h_rate_ctrl,
 	ieee80211_tx_h_misc,
-	ieee80211_tx_h_load_stats,
+	ieee80211_tx_h_fragment,
+	ieee80211_tx_h_encrypt,
+	ieee80211_tx_h_stats,
 	NULL
 };
 
-- 
cgit v1.2.3-70-g09d2


From e039fa4a4195ac4ee895e6f3d1334beed63256fe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 15 May 2008 12:55:29 +0200
Subject: mac80211: move TX info into skb->cb

This patch converts mac80211 and all drivers to have transmit
information and status in skb->cb rather than allocating extra
memory for it and copying all the data around. To make it fit,
a union is used where only data that is necessary for all steps
is kept outside of the union.

A number of fixes were done by Ivo, as well as the rt2x00 part
of this patch.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  31 ++-
 drivers/net/wireless/adm8211.h              |   1 -
 drivers/net/wireless/ath5k/base.c           |  70 +++----
 drivers/net/wireless/ath5k/base.h           |   1 -
 drivers/net/wireless/b43/b43.h              |   1 -
 drivers/net/wireless/b43/dma.c              |  45 ++--
 drivers/net/wireless/b43/dma.h              |   3 +-
 drivers/net/wireless/b43/main.c             |  33 ++-
 drivers/net/wireless/b43/pio.c              |  36 ++--
 drivers/net/wireless/b43/pio.h              |   8 +-
 drivers/net/wireless/b43/xmit.c             |  58 +++---
 drivers/net/wireless/b43/xmit.h             |   4 +-
 drivers/net/wireless/b43legacy/dma.c        |  36 ++--
 drivers/net/wireless/b43legacy/dma.h        |   4 +-
 drivers/net/wireless/b43legacy/main.c       |  12 +-
 drivers/net/wireless/b43legacy/pio.c        |  19 +-
 drivers/net/wireless/b43legacy/pio.h        |   4 +-
 drivers/net/wireless/b43legacy/xmit.c       |  44 ++--
 drivers/net/wireless/b43legacy/xmit.h       |   2 +-
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c  |  12 +-
 drivers/net/wireless/iwlwifi/iwl-3945.c     |  24 +--
 drivers/net/wireless/iwlwifi/iwl-3945.h     |   3 +-
 drivers/net/wireless/iwlwifi/iwl-4965-rs.c  |  49 +++--
 drivers/net/wireless/iwlwifi/iwl-4965.c     |  28 +--
 drivers/net/wireless/iwlwifi/iwl-core.h     |   3 +-
 drivers/net/wireless/iwlwifi/iwl-dev.h      |   5 +-
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  40 ++--
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  48 ++---
 drivers/net/wireless/iwlwifi/iwl4965-base.c |  52 ++---
 drivers/net/wireless/p54/p54common.c        |  90 ++++----
 drivers/net/wireless/p54/p54common.h        |   1 -
 drivers/net/wireless/rt2x00/rt2400pci.c     |   8 +-
 drivers/net/wireless/rt2x00/rt2500pci.c     |   8 +-
 drivers/net/wireless/rt2x00/rt2500usb.c     |   9 +-
 drivers/net/wireless/rt2x00/rt2x00.h        |  10 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  49 +++--
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  74 ++++---
 drivers/net/wireless/rt2x00/rt2x00pci.c     |  11 +-
 drivers/net/wireless/rt2x00/rt2x00pci.h     |   6 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c   |  31 ++-
 drivers/net/wireless/rt2x00/rt2x00queue.h   |  15 +-
 drivers/net/wireless/rt2x00/rt2x00usb.c     |  12 +-
 drivers/net/wireless/rt2x00/rt2x00usb.h     |   9 +-
 drivers/net/wireless/rt2x00/rt61pci.c       |   8 +-
 drivers/net/wireless/rt2x00/rt73usb.c       |   8 +-
 drivers/net/wireless/rtl8180_dev.c          |  57 +++--
 drivers/net/wireless/rtl8187.h              |   6 -
 drivers/net/wireless/rtl8187_dev.c          |  41 ++--
 drivers/net/wireless/zd1211rw/zd_mac.c      | 156 ++++----------
 drivers/net/wireless/zd1211rw/zd_mac.h      |  16 --
 drivers/net/wireless/zd1211rw/zd_usb.c      |   6 +-
 include/net/mac80211.h                      | 299 ++++++++++++---------------
 net/mac80211/ieee80211_i.h                  |  18 +-
 net/mac80211/main.c                         | 128 ++++--------
 net/mac80211/mlme.c                         |  30 +--
 net/mac80211/rate.h                         |   8 +-
 net/mac80211/rc80211_pid.h                  |   4 +-
 net/mac80211/rc80211_pid_algo.c             |  18 +-
 net/mac80211/rc80211_pid_debugfs.c          |   8 +-
 net/mac80211/rx.c                           |  10 +-
 net/mac80211/sta_info.c                     |   6 +-
 net/mac80211/sta_info.h                     |   2 +-
 net/mac80211/tx.c                           | 310 ++++++++++++++--------------
 net/mac80211/util.c                         |  10 +-
 net/mac80211/wep.c                          |   9 +-
 net/mac80211/wme.c                          |  22 +-
 net/mac80211/wpa.c                          |  65 +++---
 67 files changed, 980 insertions(+), 1274 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 22db664a58d..0ba55ba9395 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -324,7 +324,7 @@ static void adm8211_interrupt_tci(struct ieee80211_hw *dev)
 	for (dirty_tx = priv->dirty_tx; priv->cur_tx - dirty_tx; dirty_tx++) {
 		unsigned int entry = dirty_tx % priv->tx_ring_size;
 		u32 status = le32_to_cpu(priv->tx_ring[entry].status);
-		struct ieee80211_tx_status tx_status;
+		struct ieee80211_tx_info *txi;
 		struct adm8211_tx_ring_info *info;
 		struct sk_buff *skb;
 
@@ -334,24 +334,23 @@ static void adm8211_interrupt_tci(struct ieee80211_hw *dev)
 
 		info = &priv->tx_buffers[entry];
 		skb = info->skb;
+		txi = IEEE80211_SKB_CB(skb);
 
 		/* TODO: check TDES0_STATUS_TUF and TDES0_STATUS_TRO */
 
 		pci_unmap_single(priv->pdev, info->mapping,
 				 info->skb->len, PCI_DMA_TODEVICE);
 
-		memset(&tx_status, 0, sizeof(tx_status));
+		memset(&txi->status, 0, sizeof(txi->status));
 		skb_pull(skb, sizeof(struct adm8211_tx_hdr));
 		memcpy(skb_push(skb, info->hdrlen), skb->cb, info->hdrlen);
-		memcpy(&tx_status.control, &info->tx_control,
-		       sizeof(tx_status.control));
-		if (!(tx_status.control.flags & IEEE80211_TXCTL_NO_ACK)) {
+		if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK)) {
 			if (status & TDES0_STATUS_ES)
-				tx_status.excessive_retries = 1;
+				txi->status.excessive_retries = 1;
 			else
-				tx_status.flags |= IEEE80211_TX_STATUS_ACK;
+				txi->flags |= IEEE80211_TX_STAT_ACK;
 		}
-		ieee80211_tx_status_irqsafe(dev, skb, &tx_status);
+		ieee80211_tx_status_irqsafe(dev, skb);
 
 		info->skb = NULL;
 	}
@@ -1638,7 +1637,6 @@ static void adm8211_calc_durations(int *dur, int *plcp, size_t payload_len, int
 /* Transmit skb w/adm8211_tx_hdr (802.11 header created by hardware) */
 static void adm8211_tx_raw(struct ieee80211_hw *dev, struct sk_buff *skb,
 			   u16 plcp_signal,
-			   struct ieee80211_tx_control *control,
 			   size_t hdrlen)
 {
 	struct adm8211_priv *priv = dev->priv;
@@ -1664,7 +1662,6 @@ static void adm8211_tx_raw(struct ieee80211_hw *dev, struct sk_buff *skb,
 
 	priv->tx_buffers[entry].skb = skb;
 	priv->tx_buffers[entry].mapping = mapping;
-	memcpy(&priv->tx_buffers[entry].tx_control, control, sizeof(*control));
 	priv->tx_buffers[entry].hdrlen = hdrlen;
 	priv->tx_ring[entry].buffer1 = cpu_to_le32(mapping);
 
@@ -1685,17 +1682,17 @@ static void adm8211_tx_raw(struct ieee80211_hw *dev, struct sk_buff *skb,
 }
 
 /* Put adm8211_tx_hdr on skb and transmit */
-static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
-		      struct ieee80211_tx_control *control)
+static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 {
 	struct adm8211_tx_hdr *txhdr;
 	u16 fc;
 	size_t payload_len, hdrlen;
 	int plcp, dur, len, plcp_signal, short_preamble;
 	struct ieee80211_hdr *hdr;
-	struct ieee80211_rate *txrate = ieee80211_get_tx_rate(dev, control);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_rate *txrate = ieee80211_get_tx_rate(dev, info);
 
-	short_preamble = !!(txrate->flags & IEEE80211_TXCTL_SHORT_PREAMBLE);
+	short_preamble = !!(txrate->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE);
 	plcp_signal = txrate->bitrate;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -1730,15 +1727,15 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	if (short_preamble)
 		txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_SHORT_PREAMBLE);
 
-	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_ENABLE_RTS);
 
 	if (fc & IEEE80211_FCTL_PROTECTED)
 		txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_ENABLE_WEP_ENGINE);
 
-	txhdr->retry_limit = control->retry_limit;
+	txhdr->retry_limit = info->control.retry_limit;
 
-	adm8211_tx_raw(dev, skb, plcp_signal, control, hdrlen);
+	adm8211_tx_raw(dev, skb, plcp_signal, hdrlen);
 
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/wireless/adm8211.h b/drivers/net/wireless/adm8211.h
index 8d7c564b3b0..9b190ee26e9 100644
--- a/drivers/net/wireless/adm8211.h
+++ b/drivers/net/wireless/adm8211.h
@@ -443,7 +443,6 @@ struct adm8211_rx_ring_info {
 struct adm8211_tx_ring_info {
 	struct sk_buff *skb;
 	dma_addr_t mapping;
-	struct ieee80211_tx_control tx_control;
 	size_t hdrlen;
 };
 
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 32ee351a765..7d97934265d 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -167,8 +167,7 @@ static struct pci_driver ath5k_pci_driver = {
 /*
  * Prototypes - MAC 802.11 stack related functions
  */
-static int ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		struct ieee80211_tx_control *ctl);
+static int ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb);
 static int ath5k_reset(struct ieee80211_hw *hw);
 static int ath5k_start(struct ieee80211_hw *hw);
 static void ath5k_stop(struct ieee80211_hw *hw);
@@ -196,8 +195,7 @@ static int ath5k_get_tx_stats(struct ieee80211_hw *hw,
 static u64 ath5k_get_tsf(struct ieee80211_hw *hw);
 static void ath5k_reset_tsf(struct ieee80211_hw *hw);
 static int ath5k_beacon_update(struct ieee80211_hw *hw,
-		struct sk_buff *skb,
-		struct ieee80211_tx_control *ctl);
+		struct sk_buff *skb);
 
 static struct ieee80211_ops ath5k_hw_ops = {
 	.tx 		= ath5k_tx,
@@ -251,9 +249,7 @@ static void	ath5k_desc_free(struct ath5k_softc *sc,
 static int 	ath5k_rxbuf_setup(struct ath5k_softc *sc,
 				struct ath5k_buf *bf);
 static int 	ath5k_txbuf_setup(struct ath5k_softc *sc,
-				struct ath5k_buf *bf,
-				struct ieee80211_tx_control *ctl);
-
+				struct ath5k_buf *bf);
 static inline void ath5k_txbuf_free(struct ath5k_softc *sc,
 				struct ath5k_buf *bf)
 {
@@ -289,8 +285,7 @@ static void 	ath5k_tx_processq(struct ath5k_softc *sc,
 static void 	ath5k_tasklet_tx(unsigned long data);
 /* Beacon handling */
 static int 	ath5k_beacon_setup(struct ath5k_softc *sc,
-				struct ath5k_buf *bf,
-				struct ieee80211_tx_control *ctl);
+					struct ath5k_buf *bf);
 static void 	ath5k_beacon_send(struct ath5k_softc *sc);
 static void 	ath5k_beacon_config(struct ath5k_softc *sc);
 static void	ath5k_beacon_update_timers(struct ath5k_softc *sc, u64 bc_tsf);
@@ -1295,37 +1290,36 @@ ath5k_rxbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 }
 
 static int
-ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
-		struct ieee80211_tx_control *ctl)
+ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 {
 	struct ath5k_hw *ah = sc->ah;
 	struct ath5k_txq *txq = sc->txq;
 	struct ath5k_desc *ds = bf->desc;
 	struct sk_buff *skb = bf->skb;
+	struct ieee80211_tx_info *info = (void*) skb->cb;
 	unsigned int pktlen, flags, keyidx = AR5K_TXKEYIX_INVALID;
 	int ret;
 
 	flags = AR5K_TXDESC_INTREQ | AR5K_TXDESC_CLRDMASK;
-	bf->ctl = *ctl;
+
 	/* XXX endianness */
 	bf->skbaddr = pci_map_single(sc->pdev, skb->data, skb->len,
 			PCI_DMA_TODEVICE);
 
-	if (ctl->flags & IEEE80211_TXCTL_NO_ACK)
+	if (info->flags & IEEE80211_TX_CTL_NO_ACK)
 		flags |= AR5K_TXDESC_NOACK;
 
 	pktlen = skb->len;
 
-	if (!(ctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) {
-		keyidx = ctl->hw_key->hw_key_idx;
-		pktlen += ctl->icv_len;
+	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) {
+		keyidx = info->control.hw_key->hw_key_idx;
+		pktlen += info->control.icv_len;
 	}
-
 	ret = ah->ah_setup_tx_desc(ah, ds, pktlen,
 		ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL,
 		(sc->power_level * 2),
-		ieee80211_get_tx_rate(sc->hw, ctl)->hw_value,
-		ctl->retry_limit, keyidx, 0, flags, 0, 0);
+		ieee80211_get_tx_rate(sc->hw, info)->hw_value,
+		info->control.retry_limit, keyidx, 0, flags, 0, 0);
 	if (ret)
 		goto err_unmap;
 
@@ -1927,11 +1921,11 @@ next:
 static void
 ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq)
 {
-	struct ieee80211_tx_status txs = {};
 	struct ath5k_tx_status ts = {};
 	struct ath5k_buf *bf, *bf0;
 	struct ath5k_desc *ds;
 	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
 	int ret;
 
 	spin_lock(&txq->lock);
@@ -1951,24 +1945,25 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq)
 		}
 
 		skb = bf->skb;
+		info = (void*) skb->cb;
 		bf->skb = NULL;
+
 		pci_unmap_single(sc->pdev, bf->skbaddr, skb->len,
 				PCI_DMA_TODEVICE);
 
-		txs.control = bf->ctl;
-		txs.retry_count = ts.ts_shortretry + ts.ts_longretry / 6;
+		info->status.retry_count = ts.ts_shortretry + ts.ts_longretry / 6;
 		if (unlikely(ts.ts_status)) {
 			sc->ll_stats.dot11ACKFailureCount++;
 			if (ts.ts_status & AR5K_TXERR_XRETRY)
-				txs.excessive_retries = 1;
+				info->status.excessive_retries = 1;
 			else if (ts.ts_status & AR5K_TXERR_FILT)
-				txs.flags |= IEEE80211_TX_STATUS_TX_FILTERED;
+				info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 		} else {
-			txs.flags |= IEEE80211_TX_STATUS_ACK;
-			txs.ack_signal = ts.ts_rssi;
+			info->flags |= IEEE80211_TX_STAT_ACK;
+			info->status.ack_signal = ts.ts_rssi;
 		}
 
-		ieee80211_tx_status(sc->hw, skb, &txs);
+		ieee80211_tx_status(sc->hw, skb);
 		sc->tx_stats[txq->qnum].count++;
 
 		spin_lock(&sc->txbuflock);
@@ -2005,10 +2000,10 @@ ath5k_tasklet_tx(unsigned long data)
  * Setup the beacon frame for transmit.
  */
 static int
-ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
-		struct ieee80211_tx_control *ctl)
+ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 {
 	struct sk_buff *skb = bf->skb;
+	struct	ieee80211_tx_info *info = (void*) skb->cb;
 	struct ath5k_hw *ah = sc->ah;
 	struct ath5k_desc *ds;
 	int ret, antenna = 0;
@@ -2047,7 +2042,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
 	ret = ah->ah_setup_tx_desc(ah, ds, skb->len,
 			ieee80211_get_hdrlen_from_skb(skb),
 			AR5K_PKT_TYPE_BEACON, (sc->power_level * 2),
-			ieee80211_get_tx_rate(sc->hw, ctl)->hw_value,
+			ieee80211_get_tx_rate(sc->hw, info)->hw_value,
 			1, AR5K_TXKEYIX_INVALID,
 			antenna, flags, 0, 0);
 	if (ret)
@@ -2626,11 +2621,11 @@ ath5k_led_event(struct ath5k_softc *sc, int event)
 \********************/
 
 static int
-ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-			struct ieee80211_tx_control *ctl)
+ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct ath5k_softc *sc = hw->priv;
 	struct ath5k_buf *bf;
+	struct ieee80211_tx_info *info = (void*) skb->cb;
 	unsigned long flags;
 	int hdrlen;
 	int pad;
@@ -2656,13 +2651,13 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		memmove(skb->data, skb->data+pad, hdrlen);
 	}
 
-	sc->led_txrate = ieee80211_get_tx_rate(hw, ctl)->hw_value;
+	sc->led_txrate = ieee80211_get_tx_rate(hw, info)->hw_value;
 
 	spin_lock_irqsave(&sc->txbuflock, flags);
 	if (list_empty(&sc->txbuf)) {
 		ATH5K_ERR(sc, "no further txbuf available, dropping packet\n");
 		spin_unlock_irqrestore(&sc->txbuflock, flags);
-		ieee80211_stop_queue(hw, ctl->queue);
+		ieee80211_stop_queue(hw, info->queue);
 		return -1;
 	}
 	bf = list_first_entry(&sc->txbuf, struct ath5k_buf, list);
@@ -2674,7 +2669,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 
 	bf->skb = skb;
 
-	if (ath5k_txbuf_setup(sc, bf, ctl)) {
+	if (ath5k_txbuf_setup(sc, bf)) {
 		bf->skb = NULL;
 		spin_lock_irqsave(&sc->txbuflock, flags);
 		list_add_tail(&bf->list, &sc->txbuf);
@@ -3052,8 +3047,7 @@ ath5k_reset_tsf(struct ieee80211_hw *hw)
 }
 
 static int
-ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-			struct ieee80211_tx_control *ctl)
+ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct ath5k_softc *sc = hw->priv;
 	int ret;
@@ -3069,7 +3063,7 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 
 	ath5k_txbuf_free(sc, sc->bbuf);
 	sc->bbuf->skb = skb;
-	ret = ath5k_beacon_setup(sc, sc->bbuf, ctl);
+	ret = ath5k_beacon_setup(sc, sc->bbuf);
 	if (ret)
 		sc->bbuf->skb = NULL;
 	else
diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h
index ecb17495488..bb4b26d523a 100644
--- a/drivers/net/wireless/ath5k/base.h
+++ b/drivers/net/wireless/ath5k/base.h
@@ -60,7 +60,6 @@ struct ath5k_buf {
 	dma_addr_t		daddr;	/* physical addr of desc */
 	struct sk_buff		*skb;	/* skbuff for buf */
 	dma_addr_t		skbaddr;/* physical addr of skb data */
-	struct ieee80211_tx_control ctl;
 };
 
 /*
diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h
index 0c2bc061e8f..aa493830a82 100644
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h
@@ -733,7 +733,6 @@ struct b43_wl {
 	/* The beacon we are currently using (AP or IBSS mode).
 	 * This beacon stuff is protected by the irq_lock. */
 	struct sk_buff *current_beacon;
-	struct ieee80211_tx_control beacon_txctl;
 	bool beacon0_uploaded;
 	bool beacon1_uploaded;
 	struct work_struct beacon_update_trigger;
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index f50e2014ffb..aced9866d81 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -1131,10 +1131,10 @@ struct b43_dmaring *parse_cookie(struct b43_wldev *dev, u16 cookie, int *slot)
 }
 
 static int dma_tx_fragment(struct b43_dmaring *ring,
-			   struct sk_buff *skb,
-			   struct ieee80211_tx_control *ctl)
+			   struct sk_buff *skb)
 {
 	const struct b43_dma_ops *ops = ring->ops;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u8 *header;
 	int slot, old_top_slot, old_used_slots;
 	int err;
@@ -1158,7 +1158,7 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 	header = &(ring->txhdr_cache[slot * hdrsize]);
 	cookie = generate_cookie(ring, slot);
 	err = b43_generate_txhdr(ring->dev, header,
-				 skb->data, skb->len, ctl, cookie);
+				 skb->data, skb->len, info, cookie);
 	if (unlikely(err)) {
 		ring->current_slot = old_top_slot;
 		ring->used_slots = old_used_slots;
@@ -1180,7 +1180,6 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 	desc = ops->idx2desc(ring, slot, &meta);
 	memset(meta, 0, sizeof(*meta));
 
-	memcpy(&meta->txstat.control, ctl, sizeof(*ctl));
 	meta->skb = skb;
 	meta->is_last_fragment = 1;
 
@@ -1210,7 +1209,7 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 
 	ops->fill_descriptor(ring, desc, meta->dmaaddr, skb->len, 0, 1, 1);
 
-	if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+	if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
 		/* Tell the firmware about the cookie of the last
 		 * mcast frame, so it can clear the more-data bit in it. */
 		b43_shm_write16(ring->dev, B43_SHM_SHARED,
@@ -1281,16 +1280,16 @@ static struct b43_dmaring * select_ring_by_priority(struct b43_wldev *dev,
 	return ring;
 }
 
-int b43_dma_tx(struct b43_wldev *dev,
-	       struct sk_buff *skb, struct ieee80211_tx_control *ctl)
+int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 {
 	struct b43_dmaring *ring;
 	struct ieee80211_hdr *hdr;
 	int err = 0;
 	unsigned long flags;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	hdr = (struct ieee80211_hdr *)skb->data;
-	if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+	if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
 		/* The multicast ring will be sent after the DTIM */
 		ring = dev->dma.tx_ring_mcast;
 		/* Set the more-data bit. Ucode will clear it on
@@ -1298,7 +1297,7 @@ int b43_dma_tx(struct b43_wldev *dev,
 		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 	} else {
 		/* Decide by priority where to put this frame. */
-		ring = select_ring_by_priority(dev, ctl->queue);
+		ring = select_ring_by_priority(dev, info->queue);
 	}
 
 	spin_lock_irqsave(&ring->lock, flags);
@@ -1316,9 +1315,9 @@ int b43_dma_tx(struct b43_wldev *dev,
 	/* Assign the queue number to the ring (if not already done before)
 	 * so TX status handling can use it. The queue to ring mapping is
 	 * static, so we don't need to store it per frame. */
-	ring->queue_prio = ctl->queue;
+	ring->queue_prio = info->queue;
 
-	err = dma_tx_fragment(ring, skb, ctl);
+	err = dma_tx_fragment(ring, skb);
 	if (unlikely(err == -ENOKEY)) {
 		/* Drop this packet, as we don't have the encryption key
 		 * anymore and must not transmit it unencrypted. */
@@ -1334,7 +1333,7 @@ int b43_dma_tx(struct b43_wldev *dev,
 	if ((free_slots(ring) < SLOTS_PER_PACKET) ||
 	    should_inject_overflow(ring)) {
 		/* This TX ring is full. */
-		ieee80211_stop_queue(dev->wl->hw, ctl->queue);
+		ieee80211_stop_queue(dev->wl->hw, info->queue);
 		ring->stopped = 1;
 		if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
 			b43dbg(dev->wl, "Stopped TX ring %d\n", ring->index);
@@ -1377,13 +1376,19 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
 					 b43_txhdr_size(dev), 1);
 
 		if (meta->is_last_fragment) {
-			B43_WARN_ON(!meta->skb);
-			/* Call back to inform the ieee80211 subsystem about the
-			 * status of the transmission.
-			 * Some fields of txstat are already filled in dma_tx().
+			struct ieee80211_tx_info *info;
+
+			BUG_ON(!meta->skb);
+
+			info = IEEE80211_SKB_CB(meta->skb);
+
+			memset(&info->status, 0, sizeof(info->status));
+
+			/*
+			 * Call back to inform the ieee80211 subsystem about
+			 * the status of the transmission.
 			 */
-			frame_succeed = b43_fill_txstatus_report(
-						&(meta->txstat), status);
+			frame_succeed = b43_fill_txstatus_report(info, status);
 #ifdef CONFIG_B43_DEBUG
 			if (frame_succeed)
 				ring->nr_succeed_tx_packets++;
@@ -1391,8 +1396,8 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
 				ring->nr_failed_tx_packets++;
 			ring->nr_total_packet_tries += status->frame_count;
 #endif /* DEBUG */
-			ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb,
-						    &(meta->txstat));
+			ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
+
 			/* skb is freed by ieee80211_tx_status_irqsafe() */
 			meta->skb = NULL;
 		} else {
diff --git a/drivers/net/wireless/b43/dma.h b/drivers/net/wireless/b43/dma.h
index 20acf885dba..d1eb5c0848a 100644
--- a/drivers/net/wireless/b43/dma.h
+++ b/drivers/net/wireless/b43/dma.h
@@ -181,7 +181,6 @@ struct b43_dmadesc_meta {
 	dma_addr_t dmaaddr;
 	/* ieee80211 TX status. Only used once per 802.11 frag. */
 	bool is_last_fragment;
-	struct ieee80211_tx_status txstat;
 };
 
 struct b43_dmaring;
@@ -285,7 +284,7 @@ void b43_dma_get_tx_stats(struct b43_wldev *dev,
 			  struct ieee80211_tx_queue_stats *stats);
 
 int b43_dma_tx(struct b43_wldev *dev,
-	       struct sk_buff *skb, struct ieee80211_tx_control *ctl);
+	       struct sk_buff *skb);
 void b43_dma_handle_txstatus(struct b43_wldev *dev,
 			     const struct b43_txstatus *status);
 
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index e428645352b..3622d76de1e 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1368,18 +1368,18 @@ static void b43_write_beacon_template(struct b43_wldev *dev,
 	unsigned int rate;
 	u16 ctl;
 	int antenna;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(dev->wl->current_beacon);
 
 	bcn = (const struct ieee80211_mgmt *)(dev->wl->current_beacon->data);
 	len = min((size_t) dev->wl->current_beacon->len,
 		  0x200 - sizeof(struct b43_plcp_hdr6));
-	rate = ieee80211_get_tx_rate(dev->wl->hw, &dev->wl->beacon_txctl)->hw_value;
+	rate = ieee80211_get_tx_rate(dev->wl->hw, info)->hw_value;
 
 	b43_write_template_common(dev, (const u8 *)bcn,
 				  len, ram_offset, shm_size_offset, rate);
 
 	/* Write the PHY TX control parameters. */
-	antenna = b43_antenna_from_ieee80211(dev,
-			dev->wl->beacon_txctl.antenna_sel_tx);
+	antenna = b43_antenna_from_ieee80211(dev, info->antenna_sel_tx);
 	antenna = b43_antenna_to_phyctl(antenna);
 	ctl = b43_shm_read16(dev, B43_SHM_SHARED, B43_SHM_SH_BEACPHYCTL);
 	/* We can't send beacons with short preamble. Would get PHY errors. */
@@ -1613,8 +1613,7 @@ static void b43_beacon_update_trigger_work(struct work_struct *work)
 
 /* Asynchronously update the packet templates in template RAM.
  * Locking: Requires wl->irq_lock to be locked. */
-static void b43_update_templates(struct b43_wl *wl, struct sk_buff *beacon,
-				 const struct ieee80211_tx_control *txctl)
+static void b43_update_templates(struct b43_wl *wl, struct sk_buff *beacon)
 {
 	/* This is the top half of the ansynchronous beacon update.
 	 * The bottom half is the beacon IRQ.
@@ -1625,7 +1624,6 @@ static void b43_update_templates(struct b43_wl *wl, struct sk_buff *beacon,
 	if (wl->current_beacon)
 		dev_kfree_skb_any(wl->current_beacon);
 	wl->current_beacon = beacon;
-	memcpy(&wl->beacon_txctl, txctl, sizeof(wl->beacon_txctl));
 	wl->beacon0_uploaded = 0;
 	wl->beacon1_uploaded = 0;
 	queue_work(wl->hw->workqueue, &wl->beacon_update_trigger);
@@ -2813,8 +2811,7 @@ static int b43_rng_init(struct b43_wl *wl)
 }
 
 static int b43_op_tx(struct ieee80211_hw *hw,
-		     struct sk_buff *skb,
-		     struct ieee80211_tx_control *ctl)
+		     struct sk_buff *skb)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct b43_wldev *dev = wl->current_dev;
@@ -2836,9 +2833,9 @@ static int b43_op_tx(struct ieee80211_hw *hw,
 	err = -ENODEV;
 	if (likely(b43_status(dev) >= B43_STAT_STARTED)) {
 		if (b43_using_pio_transfers(dev))
-			err = b43_pio_tx(dev, skb, ctl);
+			err = b43_pio_tx(dev, skb);
 		else
-			err = b43_dma_tx(dev, skb, ctl);
+			err = b43_dma_tx(dev, skb);
 	}
 
 	read_unlock_irqrestore(&wl->tx_lock, flags);
@@ -3429,10 +3426,8 @@ static int b43_op_config_interface(struct ieee80211_hw *hw,
 		if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP)) {
 			B43_WARN_ON(conf->type != IEEE80211_IF_TYPE_AP);
 			b43_set_ssid(dev, conf->ssid, conf->ssid_len);
-			if (conf->beacon) {
-				b43_update_templates(wl, conf->beacon,
-						     conf->beacon_control);
-			}
+			if (conf->beacon)
+				b43_update_templates(wl, conf->beacon);
 		}
 		b43_write_mac_bssid_templates(dev);
 	}
@@ -4118,31 +4113,29 @@ static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set)
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct sk_buff *beacon;
 	unsigned long flags;
-	struct ieee80211_tx_control txctl;
 
 	/* We could modify the existing beacon and set the aid bit in
 	 * the TIM field, but that would probably require resizing and
 	 * moving of data within the beacon template.
 	 * Simply request a new beacon and let mac80211 do the hard work. */
-	beacon = ieee80211_beacon_get(hw, wl->vif, &txctl);
+	beacon = ieee80211_beacon_get(hw, wl->vif);
 	if (unlikely(!beacon))
 		return -ENOMEM;
 	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43_update_templates(wl, beacon, &txctl);
+	b43_update_templates(wl, beacon);
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 
 	return 0;
 }
 
 static int b43_op_ibss_beacon_update(struct ieee80211_hw *hw,
-				     struct sk_buff *beacon,
-				     struct ieee80211_tx_control *ctl)
+				     struct sk_buff *beacon)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	unsigned long flags;
 
 	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43_update_templates(wl, beacon, ctl);
+	b43_update_templates(wl, beacon);
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 
 	return 0;
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 08c8a087f30..284786a94e7 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -446,29 +446,27 @@ static void pio_tx_frame_4byte_queue(struct b43_pio_txpacket *pack,
 }
 
 static int pio_tx_frame(struct b43_pio_txqueue *q,
-			struct sk_buff *skb,
-			struct ieee80211_tx_control *ctl)
+			struct sk_buff *skb)
 {
 	struct b43_pio_txpacket *pack;
 	struct b43_txhdr txhdr;
 	u16 cookie;
 	int err;
 	unsigned int hdrlen;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	B43_WARN_ON(list_empty(&q->packets_list));
 	pack = list_entry(q->packets_list.next,
 			  struct b43_pio_txpacket, list);
-	memset(&pack->txstat, 0, sizeof(pack->txstat));
-	memcpy(&pack->txstat.control, ctl, sizeof(*ctl));
 
 	cookie = generate_cookie(q, pack);
 	hdrlen = b43_txhdr_size(q->dev);
 	err = b43_generate_txhdr(q->dev, (u8 *)&txhdr, skb->data,
-				 skb->len, ctl, cookie);
+				 skb->len, info, cookie);
 	if (err)
 		return err;
 
-	if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+	if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
 		/* Tell the firmware about the cookie of the last
 		 * mcast frame, so it can clear the more-data bit in it. */
 		b43_shm_write16(q->dev, B43_SHM_SHARED,
@@ -492,17 +490,18 @@ static int pio_tx_frame(struct b43_pio_txqueue *q,
 	return 0;
 }
 
-int b43_pio_tx(struct b43_wldev *dev,
-	       struct sk_buff *skb, struct ieee80211_tx_control *ctl)
+int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
 {
 	struct b43_pio_txqueue *q;
 	struct ieee80211_hdr *hdr;
 	unsigned long flags;
 	unsigned int hdrlen, total_len;
 	int err = 0;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	hdr = (struct ieee80211_hdr *)skb->data;
-	if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+
+	if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) {
 		/* The multicast queue will be sent after the DTIM. */
 		q = dev->pio.tx_queue_mcast;
 		/* Set the frame More-Data bit. Ucode will clear it
@@ -510,7 +509,7 @@ int b43_pio_tx(struct b43_wldev *dev,
 		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 	} else {
 		/* Decide by priority where to put this frame. */
-		q = select_queue_by_priority(dev, ctl->queue);
+		q = select_queue_by_priority(dev, info->queue);
 	}
 
 	spin_lock_irqsave(&q->lock, flags);
@@ -533,7 +532,7 @@ int b43_pio_tx(struct b43_wldev *dev,
 	if (total_len > (q->buffer_size - q->buffer_used)) {
 		/* Not enough memory on the queue. */
 		err = -EBUSY;
-		ieee80211_stop_queue(dev->wl->hw, ctl->queue);
+		ieee80211_stop_queue(dev->wl->hw, info->queue);
 		q->stopped = 1;
 		goto out_unlock;
 	}
@@ -541,9 +540,9 @@ int b43_pio_tx(struct b43_wldev *dev,
 	/* Assign the queue number to the ring (if not already done before)
 	 * so TX status handling can use it. The mac80211-queue to b43-queue
 	 * mapping is static, so we don't need to store it per frame. */
-	q->queue_prio = ctl->queue;
+	q->queue_prio = info->queue;
 
-	err = pio_tx_frame(q, skb, ctl);
+	err = pio_tx_frame(q, skb);
 	if (unlikely(err == -ENOKEY)) {
 		/* Drop this packet, as we don't have the encryption key
 		 * anymore and must not transmit it unencrypted. */
@@ -561,7 +560,7 @@ int b43_pio_tx(struct b43_wldev *dev,
 	if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) ||
 	    (q->free_packet_slots == 0)) {
 		/* The queue is full. */
-		ieee80211_stop_queue(dev->wl->hw, ctl->queue);
+		ieee80211_stop_queue(dev->wl->hw, info->queue);
 		q->stopped = 1;
 	}
 
@@ -578,6 +577,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
 	struct b43_pio_txqueue *q;
 	struct b43_pio_txpacket *pack = NULL;
 	unsigned int total_len;
+	struct ieee80211_tx_info *info;
 
 	q = parse_cookie(dev, status->cookie, &pack);
 	if (unlikely(!q))
@@ -586,15 +586,17 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
 
 	spin_lock(&q->lock); /* IRQs are already disabled. */
 
-	b43_fill_txstatus_report(&(pack->txstat), status);
+	info = (void *)pack->skb;
+	memset(&info->status, 0, sizeof(info->status));
+
+	b43_fill_txstatus_report(info, status);
 
 	total_len = pack->skb->len + b43_txhdr_size(dev);
 	total_len = roundup(total_len, 4);
 	q->buffer_used -= total_len;
 	q->free_packet_slots += 1;
 
-	ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb,
-				    &(pack->txstat));
+	ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb);
 	pack->skb = NULL;
 	list_add(&pack->list, &q->packets_list);
 
diff --git a/drivers/net/wireless/b43/pio.h b/drivers/net/wireless/b43/pio.h
index e2ec676cc9e..6c174c91ca2 100644
--- a/drivers/net/wireless/b43/pio.h
+++ b/drivers/net/wireless/b43/pio.h
@@ -62,8 +62,6 @@ struct b43_pio_txpacket {
 	struct b43_pio_txqueue *queue;
 	/* The TX data packet. */
 	struct sk_buff *skb;
-	/* The status meta data. */
-	struct ieee80211_tx_status txstat;
 	/* Index in the (struct b43_pio_txqueue)->packets array. */
 	u8 index;
 
@@ -167,8 +165,7 @@ int b43_pio_init(struct b43_wldev *dev);
 void b43_pio_stop(struct b43_wldev *dev);
 void b43_pio_free(struct b43_wldev *dev);
 
-int b43_pio_tx(struct b43_wldev *dev,
-	       struct sk_buff *skb, struct ieee80211_tx_control *ctl);
+int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb);
 void b43_pio_handle_txstatus(struct b43_wldev *dev,
 			     const struct b43_txstatus *status);
 void b43_pio_get_tx_stats(struct b43_wldev *dev,
@@ -193,8 +190,7 @@ static inline void b43_pio_stop(struct b43_wldev *dev)
 {
 }
 static inline int b43_pio_tx(struct b43_wldev *dev,
-			     struct sk_buff *skb,
-			     struct ieee80211_tx_control *ctl)
+			     struct sk_buff *skb)
 {
 	return 0;
 }
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 9b682a3cf5e..f9e1cff2aec 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -185,14 +185,14 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		       u8 *_txhdr,
 		       const unsigned char *fragment_data,
 		       unsigned int fragment_len,
-		       const struct ieee80211_tx_control *txctl,
+		       const struct ieee80211_tx_info *info,
 		       u16 cookie)
 {
 	struct b43_txhdr *txhdr = (struct b43_txhdr *)_txhdr;
 	const struct b43_phy *phy = &dev->phy;
 	const struct ieee80211_hdr *wlhdr =
 	    (const struct ieee80211_hdr *)fragment_data;
-	int use_encryption = (!(txctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT));
+	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
 	u16 fctl = le16_to_cpu(wlhdr->frame_control);
 	struct ieee80211_rate *fbrate;
 	u8 rate, rate_fb;
@@ -205,10 +205,10 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 
 	memset(txhdr, 0, sizeof(*txhdr));
 
-	txrate = ieee80211_get_tx_rate(dev->wl->hw, txctl);
+	txrate = ieee80211_get_tx_rate(dev->wl->hw, info);
 	rate = txrate ? txrate->hw_value : B43_CCK_RATE_1MB;
 	rate_ofdm = b43_is_ofdm_rate(rate);
-	fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, txctl) ? : txrate;
+	fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : txrate;
 	rate_fb = fbrate->hw_value;
 	rate_fb_ofdm = b43_is_ofdm_rate(rate_fb);
 
@@ -228,15 +228,13 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		 * use the original dur_id field. */
 		txhdr->dur_fb = wlhdr->duration_id;
 	} else {
-		txhdr->dur_fb = ieee80211_generic_frame_duration(dev->wl->hw,
-								 txctl->vif,
-								 fragment_len,
-								 fbrate);
+		txhdr->dur_fb = ieee80211_generic_frame_duration(
+			dev->wl->hw, info->control.vif, fragment_len, fbrate);
 	}
 
 	plcp_fragment_len = fragment_len + FCS_LEN;
 	if (use_encryption) {
-		u8 key_idx = txctl->hw_key->hw_key_idx;
+		u8 key_idx = info->control.hw_key->hw_key_idx;
 		struct b43_key *key;
 		int wlhdr_len;
 		size_t iv_len;
@@ -254,7 +252,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		}
 
 		/* Hardware appends ICV. */
-		plcp_fragment_len += txctl->icv_len;
+		plcp_fragment_len += info->control.icv_len;
 
 		key_idx = b43_kidx_to_fw(dev, key_idx);
 		mac_ctl |= (key_idx << B43_TXH_MAC_KEYIDX_SHIFT) &
@@ -262,7 +260,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		mac_ctl |= (key->algorithm << B43_TXH_MAC_KEYALG_SHIFT) &
 			   B43_TXH_MAC_KEYALG;
 		wlhdr_len = ieee80211_get_hdrlen(fctl);
-		iv_len = min((size_t) txctl->iv_len,
+		iv_len = min((size_t) info->control.iv_len,
 			     ARRAY_SIZE(txhdr->iv));
 		memcpy(txhdr->iv, ((u8 *) wlhdr) + wlhdr_len, iv_len);
 	}
@@ -293,10 +291,10 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		phy_ctl |= B43_TXH_PHY_ENC_OFDM;
 	else
 		phy_ctl |= B43_TXH_PHY_ENC_CCK;
-	if (txctl->flags & IEEE80211_TXCTL_SHORT_PREAMBLE)
+	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE)
 		phy_ctl |= B43_TXH_PHY_SHORTPRMBL;
 
-	switch (b43_ieee80211_antenna_sanitize(dev, txctl->antenna_sel_tx)) {
+	switch (b43_ieee80211_antenna_sanitize(dev, info->antenna_sel_tx)) {
 	case 0: /* Default */
 		phy_ctl |= B43_TXH_PHY_ANT01AUTO;
 		break;
@@ -317,21 +315,21 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	}
 
 	/* MAC control */
-	if (!(txctl->flags & IEEE80211_TXCTL_NO_ACK))
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43_TXH_MAC_ACK;
 	if (!(((fctl & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) &&
 	      ((fctl & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)))
 		mac_ctl |= B43_TXH_MAC_HWSEQ;
-	if (txctl->flags & IEEE80211_TXCTL_FIRST_FRAGMENT)
+	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		mac_ctl |= B43_TXH_MAC_STMSDU;
 	if (phy->type == B43_PHYTYPE_A)
 		mac_ctl |= B43_TXH_MAC_5GHZ;
-	if (txctl->flags & IEEE80211_TXCTL_LONG_RETRY_LIMIT)
+	if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
 		mac_ctl |= B43_TXH_MAC_LONGFRAME;
 
 	/* Generate the RTS or CTS-to-self frame */
-	if ((txctl->flags & IEEE80211_TXCTL_USE_RTS_CTS) ||
-	    (txctl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) {
+	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
+	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
 		unsigned int len;
 		struct ieee80211_hdr *hdr;
 		int rts_rate, rts_rate_fb;
@@ -339,14 +337,14 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		struct b43_plcp_hdr6 *plcp;
 		struct ieee80211_rate *rts_cts_rate;
 
-		rts_cts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, txctl);
+		rts_cts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, info);
 
 		rts_rate = rts_cts_rate ? rts_cts_rate->hw_value : B43_CCK_RATE_1MB;
 		rts_rate_ofdm = b43_is_ofdm_rate(rts_rate);
 		rts_rate_fb = b43_calc_fallback_rate(rts_rate);
 		rts_rate_fb_ofdm = b43_is_ofdm_rate(rts_rate_fb);
 
-		if (txctl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+		if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 			struct ieee80211_cts *cts;
 
 			if (b43_is_old_txhdr_format(dev)) {
@@ -356,9 +354,9 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 				cts = (struct ieee80211_cts *)
 					(txhdr->new_format.rts_frame);
 			}
-			ieee80211_ctstoself_get(dev->wl->hw, txctl->vif,
+			ieee80211_ctstoself_get(dev->wl->hw, info->control.vif,
 						fragment_data, fragment_len,
-						txctl, cts);
+						info, cts);
 			mac_ctl |= B43_TXH_MAC_SENDCTS;
 			len = sizeof(struct ieee80211_cts);
 		} else {
@@ -371,9 +369,9 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 				rts = (struct ieee80211_rts *)
 					(txhdr->new_format.rts_frame);
 			}
-			ieee80211_rts_get(dev->wl->hw, txctl->vif,
+			ieee80211_rts_get(dev->wl->hw, info->control.vif,
 					  fragment_data, fragment_len,
-					  txctl, rts);
+					  info, rts);
 			mac_ctl |= B43_TXH_MAC_SENDRTS;
 			len = sizeof(struct ieee80211_rts);
 		}
@@ -687,27 +685,27 @@ void b43_handle_txstatus(struct b43_wldev *dev,
 /* Fill out the mac80211 TXstatus report based on the b43-specific
  * txstatus report data. This returns a boolean whether the frame was
  * successfully transmitted. */
-bool b43_fill_txstatus_report(struct ieee80211_tx_status *report,
+bool b43_fill_txstatus_report(struct ieee80211_tx_info *report,
 			      const struct b43_txstatus *status)
 {
 	bool frame_success = 1;
 
 	if (status->acked) {
 		/* The frame was ACKed. */
-		report->flags |= IEEE80211_TX_STATUS_ACK;
+		report->flags |= IEEE80211_TX_STAT_ACK;
 	} else {
 		/* The frame was not ACKed... */
-		if (!(report->control.flags & IEEE80211_TXCTL_NO_ACK)) {
+		if (!(report->flags & IEEE80211_TX_CTL_NO_ACK)) {
 			/* ...but we expected an ACK. */
 			frame_success = 0;
-			report->excessive_retries = 1;
+			report->status.excessive_retries = 1;
 		}
 	}
 	if (status->frame_count == 0) {
 		/* The frame was not transmitted at all. */
-		report->retry_count = 0;
+		report->status.retry_count = 0;
 	} else
-		report->retry_count = status->frame_count - 1;
+		report->status.retry_count = status->frame_count - 1;
 
 	return frame_success;
 }
diff --git a/drivers/net/wireless/b43/xmit.h b/drivers/net/wireless/b43/xmit.h
index b05f44e0d62..0215faf4754 100644
--- a/drivers/net/wireless/b43/xmit.h
+++ b/drivers/net/wireless/b43/xmit.h
@@ -178,7 +178,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		       u8 * txhdr,
 		       const unsigned char *fragment_data,
 		       unsigned int fragment_len,
-		       const struct ieee80211_tx_control *txctl, u16 cookie);
+		       const struct ieee80211_tx_info *txctl, u16 cookie);
 
 /* Transmit Status */
 struct b43_txstatus {
@@ -294,7 +294,7 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr);
 
 void b43_handle_txstatus(struct b43_wldev *dev,
 			 const struct b43_txstatus *status);
-bool b43_fill_txstatus_report(struct ieee80211_tx_status *report,
+bool b43_fill_txstatus_report(struct ieee80211_tx_info *report,
 			      const struct b43_txstatus *status);
 
 void b43_tx_suspend(struct b43_wldev *dev);
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index d6686f713b6..c1c501d963b 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -1205,10 +1205,10 @@ struct b43legacy_dmaring *parse_cookie(struct b43legacy_wldev *dev,
 }
 
 static int dma_tx_fragment(struct b43legacy_dmaring *ring,
-			    struct sk_buff *skb,
-			    struct ieee80211_tx_control *ctl)
+			    struct sk_buff *skb)
 {
 	const struct b43legacy_dma_ops *ops = ring->ops;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u8 *header;
 	int slot, old_top_slot, old_used_slots;
 	int err;
@@ -1231,7 +1231,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,
 	header = &(ring->txhdr_cache[slot * sizeof(
 			       struct b43legacy_txhdr_fw3)]);
 	err = b43legacy_generate_txhdr(ring->dev, header,
-				 skb->data, skb->len, ctl,
+				 skb->data, skb->len, info,
 				 generate_cookie(ring, slot));
 	if (unlikely(err)) {
 		ring->current_slot = old_top_slot;
@@ -1255,7 +1255,6 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,
 	desc = ops->idx2desc(ring, slot, &meta);
 	memset(meta, 0, sizeof(*meta));
 
-	memcpy(&meta->txstat.control, ctl, sizeof(*ctl));
 	meta->skb = skb;
 	meta->is_last_fragment = 1;
 
@@ -1323,14 +1322,14 @@ int should_inject_overflow(struct b43legacy_dmaring *ring)
 }
 
 int b43legacy_dma_tx(struct b43legacy_wldev *dev,
-		     struct sk_buff *skb,
-		     struct ieee80211_tx_control *ctl)
+		     struct sk_buff *skb)
 {
 	struct b43legacy_dmaring *ring;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int err = 0;
 	unsigned long flags;
 
-	ring = priority_to_txring(dev, ctl->queue);
+	ring = priority_to_txring(dev, info->queue);
 	spin_lock_irqsave(&ring->lock, flags);
 	B43legacy_WARN_ON(!ring->tx);
 	if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
@@ -1343,7 +1342,7 @@ int b43legacy_dma_tx(struct b43legacy_wldev *dev,
 	 * That would be a mac80211 bug. */
 	B43legacy_BUG_ON(ring->stopped);
 
-	err = dma_tx_fragment(ring, skb, ctl);
+	err = dma_tx_fragment(ring, skb);
 	if (unlikely(err == -ENOKEY)) {
 		/* Drop this packet, as we don't have the encryption key
 		 * anymore and must not transmit it unencrypted. */
@@ -1401,26 +1400,29 @@ void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev,
 					 1);
 
 		if (meta->is_last_fragment) {
-			B43legacy_WARN_ON(!meta->skb);
+			struct ieee80211_tx_info *info;
+			BUG_ON(!meta->skb);
+			info = IEEE80211_SKB_CB(meta->skb);
 			/* Call back to inform the ieee80211 subsystem about the
 			 * status of the transmission.
 			 * Some fields of txstat are already filled in dma_tx().
 			 */
+
+			memset(&info->status, 0, sizeof(info->status));
+
 			if (status->acked) {
-				meta->txstat.flags |= IEEE80211_TX_STATUS_ACK;
+				info->flags |= IEEE80211_TX_STAT_ACK;
 			} else {
-				if (!(meta->txstat.control.flags
-				      & IEEE80211_TXCTL_NO_ACK))
-					 meta->txstat.excessive_retries = 1;
+				if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
+					 info->status.excessive_retries = 1;
 			}
 			if (status->frame_count == 0) {
 				/* The frame was not transmitted at all. */
-				meta->txstat.retry_count = 0;
+				info->status.retry_count = 0;
 			} else
-				meta->txstat.retry_count = status->frame_count
+				info->status.retry_count = status->frame_count
 							   - 1;
-			ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb,
-						    &(meta->txstat));
+			ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
 			/* skb is freed by ieee80211_tx_status_irqsafe() */
 			meta->skb = NULL;
 		} else {
diff --git a/drivers/net/wireless/b43legacy/dma.h b/drivers/net/wireless/b43legacy/dma.h
index 2dd488c5be2..67537a7495f 100644
--- a/drivers/net/wireless/b43legacy/dma.h
+++ b/drivers/net/wireless/b43legacy/dma.h
@@ -195,7 +195,6 @@ struct b43legacy_dmadesc_meta {
 	dma_addr_t dmaaddr;
 	/* ieee80211 TX status. Only used once per 802.11 frag. */
 	bool is_last_fragment;
-	struct ieee80211_tx_status txstat;
 };
 
 struct b43legacy_dmaring;
@@ -297,8 +296,7 @@ void b43legacy_dma_get_tx_stats(struct b43legacy_wldev *dev,
 				struct ieee80211_tx_queue_stats *stats);
 
 int b43legacy_dma_tx(struct b43legacy_wldev *dev,
-		     struct sk_buff *skb,
-		     struct ieee80211_tx_control *ctl);
+		     struct sk_buff *skb);
 void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev,
 				   const struct b43legacy_txstatus *status);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index b05a507ed44..f706ca65f15 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2358,8 +2358,7 @@ static int b43legacy_rng_init(struct b43legacy_wl *wl)
 }
 
 static int b43legacy_op_tx(struct ieee80211_hw *hw,
-			   struct sk_buff *skb,
-			   struct ieee80211_tx_control *ctl)
+			   struct sk_buff *skb)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	struct b43legacy_wldev *dev = wl->current_dev;
@@ -2373,10 +2372,10 @@ static int b43legacy_op_tx(struct ieee80211_hw *hw,
 	/* DMA-TX is done without a global lock. */
 	if (b43legacy_using_pio(dev)) {
 		spin_lock_irqsave(&wl->irq_lock, flags);
-		err = b43legacy_pio_tx(dev, skb, ctl);
+		err = b43legacy_pio_tx(dev, skb);
 		spin_unlock_irqrestore(&wl->irq_lock, flags);
 	} else
-		err = b43legacy_dma_tx(dev, skb, ctl);
+		err = b43legacy_dma_tx(dev, skb);
 out:
 	if (unlikely(err))
 		return NETDEV_TX_BUSY;
@@ -3409,7 +3408,7 @@ static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw,
 	 * field, but that would probably require resizing and moving of data
 	 * within the beacon template. Simply request a new beacon and let
 	 * mac80211 do the hard work. */
-	beacon = ieee80211_beacon_get(hw, wl->vif, NULL);
+	beacon = ieee80211_beacon_get(hw, wl->vif);
 	if (unlikely(!beacon))
 		return -ENOMEM;
 	spin_lock_irqsave(&wl->irq_lock, flags);
@@ -3420,8 +3419,7 @@ static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw,
 }
 
 static int b43legacy_op_ibss_beacon_update(struct ieee80211_hw *hw,
-					   struct sk_buff *beacon,
-					   struct ieee80211_tx_control *ctl)
+					   struct sk_buff *beacon)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	unsigned long flags;
diff --git a/drivers/net/wireless/b43legacy/pio.c b/drivers/net/wireless/b43legacy/pio.c
index 8d3d27d3cd6..a86c7647fa2 100644
--- a/drivers/net/wireless/b43legacy/pio.c
+++ b/drivers/net/wireless/b43legacy/pio.c
@@ -196,7 +196,7 @@ static int pio_tx_write_fragment(struct b43legacy_pioqueue *queue,
 	B43legacy_WARN_ON(skb_shinfo(skb)->nr_frags != 0);
 	err = b43legacy_generate_txhdr(queue->dev,
 				 txhdr, skb->data, skb->len,
-				 &packet->txstat.control,
+				 IEEE80211_SKB_CB(skb),
 				 generate_cookie(queue, packet));
 	if (err)
 		return err;
@@ -463,8 +463,7 @@ err_destroy0:
 }
 
 int b43legacy_pio_tx(struct b43legacy_wldev *dev,
-		     struct sk_buff *skb,
-		     struct ieee80211_tx_control *ctl)
+		     struct sk_buff *skb)
 {
 	struct b43legacy_pioqueue *queue = dev->pio.queue1;
 	struct b43legacy_pio_txpacket *packet;
@@ -476,9 +475,6 @@ int b43legacy_pio_tx(struct b43legacy_wldev *dev,
 			    list);
 	packet->skb = skb;
 
-	memset(&packet->txstat, 0, sizeof(packet->txstat));
-	memcpy(&packet->txstat.control, ctl, sizeof(*ctl));
-
 	list_move_tail(&packet->list, &queue->txqueue);
 	queue->nr_txfree--;
 	queue->nr_tx_packets++;
@@ -494,6 +490,7 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev,
 {
 	struct b43legacy_pioqueue *queue;
 	struct b43legacy_pio_txpacket *packet;
+	struct ieee80211_tx_info *info;
 
 	queue = parse_cookie(dev, status->cookie, &packet);
 	B43legacy_WARN_ON(!queue);
@@ -505,11 +502,13 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev,
 	queue->tx_devq_used -= (packet->skb->len +
 				sizeof(struct b43legacy_txhdr_fw3));
 
+	info = IEEE80211_SKB_CB(packet->skb);
+	memset(&info->status, 0, sizeof(info->status));
+
 	if (status->acked)
-		packet->txstat.flags |= IEEE80211_TX_STATUS_ACK;
-	packet->txstat.retry_count = status->frame_count - 1;
-	ieee80211_tx_status_irqsafe(dev->wl->hw, packet->skb,
-				    &(packet->txstat));
+		info->flags |= IEEE80211_TX_STAT_ACK;
+	info->status.retry_count = status->frame_count - 1;
+	ieee80211_tx_status_irqsafe(dev->wl->hw, packet->skb);
 	packet->skb = NULL;
 
 	free_txpacket(packet, 1);
diff --git a/drivers/net/wireless/b43legacy/pio.h b/drivers/net/wireless/b43legacy/pio.h
index 5bfed0c4003..49310dfaf2d 100644
--- a/drivers/net/wireless/b43legacy/pio.h
+++ b/drivers/net/wireless/b43legacy/pio.h
@@ -41,7 +41,6 @@ struct b43legacy_xmitstatus;
 struct b43legacy_pio_txpacket {
 	struct b43legacy_pioqueue *queue;
 	struct sk_buff *skb;
-	struct ieee80211_tx_status txstat;
 	struct list_head list;
 };
 
@@ -104,8 +103,7 @@ int b43legacy_pio_init(struct b43legacy_wldev *dev);
 void b43legacy_pio_free(struct b43legacy_wldev *dev);
 
 int b43legacy_pio_tx(struct b43legacy_wldev *dev,
-		   struct sk_buff *skb,
-		   struct ieee80211_tx_control *ctl);
+		   struct sk_buff *skb);
 void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev,
 				 const struct b43legacy_txstatus *status);
 void b43legacy_pio_get_tx_stats(struct b43legacy_wldev *dev,
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index 55dc251bf51..82dc04d5944 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -188,11 +188,11 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 			       struct b43legacy_txhdr_fw3 *txhdr,
 			       const unsigned char *fragment_data,
 			       unsigned int fragment_len,
-			       const struct ieee80211_tx_control *txctl,
+			       const struct ieee80211_tx_info *info,
 			       u16 cookie)
 {
 	const struct ieee80211_hdr *wlhdr;
-	int use_encryption = (!(txctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT));
+	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
 	u16 fctl;
 	u8 rate;
 	struct ieee80211_rate *rate_fb;
@@ -208,11 +208,11 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 
 	memset(txhdr, 0, sizeof(*txhdr));
 
-	tx_rate = ieee80211_get_tx_rate(dev->wl->hw, txctl);
+	tx_rate = ieee80211_get_tx_rate(dev->wl->hw, info);
 
 	rate = tx_rate->hw_value;
 	rate_ofdm = b43legacy_is_ofdm_rate(rate);
-	rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, txctl) ? : tx_rate;
+	rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : tx_rate;
 	rate_fb_ofdm = b43legacy_is_ofdm_rate(rate_fb->hw_value);
 
 	txhdr->mac_frame_ctl = wlhdr->frame_control;
@@ -228,14 +228,14 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		txhdr->dur_fb = wlhdr->duration_id;
 	} else {
 		txhdr->dur_fb = ieee80211_generic_frame_duration(dev->wl->hw,
-							 txctl->vif,
+							 info->control.vif,
 							 fragment_len,
 							 rate_fb);
 	}
 
 	plcp_fragment_len = fragment_len + FCS_LEN;
 	if (use_encryption) {
-		u8 key_idx = txctl->hw_key->hw_key_idx;
+		u8 key_idx = info->control.hw_key->hw_key_idx;
 		struct b43legacy_key *key;
 		int wlhdr_len;
 		size_t iv_len;
@@ -245,7 +245,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 
 		if (key->enabled) {
 			/* Hardware appends ICV. */
-			plcp_fragment_len += txctl->icv_len;
+			plcp_fragment_len += info->control.icv_len;
 
 			key_idx = b43legacy_kidx_to_fw(dev, key_idx);
 			mac_ctl |= (key_idx << B43legacy_TX4_MAC_KEYIDX_SHIFT) &
@@ -254,7 +254,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 				   B43legacy_TX4_MAC_KEYALG_SHIFT) &
 				   B43legacy_TX4_MAC_KEYALG;
 			wlhdr_len = ieee80211_get_hdrlen(fctl);
-			iv_len = min((size_t)txctl->iv_len,
+			iv_len = min((size_t)info->control.iv_len,
 				     ARRAY_SIZE(txhdr->iv));
 			memcpy(txhdr->iv, ((u8 *)wlhdr) + wlhdr_len, iv_len);
 		} else {
@@ -278,7 +278,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		phy_ctl |= B43legacy_TX4_PHY_OFDM;
 	if (dev->short_preamble)
 		phy_ctl |= B43legacy_TX4_PHY_SHORTPRMBL;
-	switch (txctl->antenna_sel_tx) {
+	switch (info->antenna_sel_tx) {
 	case 0:
 		phy_ctl |= B43legacy_TX4_PHY_ANTLAST;
 		break;
@@ -293,21 +293,21 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	}
 
 	/* MAC control */
-	if (!(txctl->flags & IEEE80211_TXCTL_NO_ACK))
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43legacy_TX4_MAC_ACK;
 	if (!(((fctl & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) &&
 	      ((fctl & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)))
 		mac_ctl |= B43legacy_TX4_MAC_HWSEQ;
-	if (txctl->flags & IEEE80211_TXCTL_FIRST_FRAGMENT)
+	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		mac_ctl |= B43legacy_TX4_MAC_STMSDU;
 	if (rate_fb_ofdm)
 		mac_ctl |= B43legacy_TX4_MAC_FALLBACKOFDM;
-	if (txctl->flags & IEEE80211_TXCTL_LONG_RETRY_LIMIT)
+	if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
 		mac_ctl |= B43legacy_TX4_MAC_LONGFRAME;
 
 	/* Generate the RTS or CTS-to-self frame */
-	if ((txctl->flags & IEEE80211_TXCTL_USE_RTS_CTS) ||
-	    (txctl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) {
+	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
+	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
 		unsigned int len;
 		struct ieee80211_hdr *hdr;
 		int rts_rate;
@@ -315,26 +315,26 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		int rts_rate_ofdm;
 		int rts_rate_fb_ofdm;
 
-		rts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, txctl)->hw_value;
+		rts_rate = ieee80211_get_rts_cts_rate(dev->wl->hw, info)->hw_value;
 		rts_rate_ofdm = b43legacy_is_ofdm_rate(rts_rate);
 		rts_rate_fb = b43legacy_calc_fallback_rate(rts_rate);
 		rts_rate_fb_ofdm = b43legacy_is_ofdm_rate(rts_rate_fb);
 		if (rts_rate_fb_ofdm)
 			mac_ctl |= B43legacy_TX4_MAC_CTSFALLBACKOFDM;
 
-		if (txctl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+		if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 			ieee80211_ctstoself_get(dev->wl->hw,
-						txctl->vif,
+						info->control.vif,
 						fragment_data,
-						fragment_len, txctl,
+						fragment_len, info,
 						(struct ieee80211_cts *)
 						(txhdr->rts_frame));
 			mac_ctl |= B43legacy_TX4_MAC_SENDCTS;
 			len = sizeof(struct ieee80211_cts);
 		} else {
 			ieee80211_rts_get(dev->wl->hw,
-					  txctl->vif,
-					  fragment_data, fragment_len, txctl,
+					  info->control.vif,
+					  fragment_data, fragment_len, info,
 					  (struct ieee80211_rts *)
 					  (txhdr->rts_frame));
 			mac_ctl |= B43legacy_TX4_MAC_SENDRTS;
@@ -365,12 +365,12 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev,
 			      u8 *txhdr,
 			      const unsigned char *fragment_data,
 			      unsigned int fragment_len,
-			      const struct ieee80211_tx_control *txctl,
+			      const struct ieee80211_tx_info *info,
 			      u16 cookie)
 {
 	return generate_txhdr_fw3(dev, (struct b43legacy_txhdr_fw3 *)txhdr,
 			   fragment_data, fragment_len,
-			   txctl, cookie);
+			   info, cookie);
 }
 
 static s8 b43legacy_rssi_postprocess(struct b43legacy_wldev *dev,
diff --git a/drivers/net/wireless/b43legacy/xmit.h b/drivers/net/wireless/b43legacy/xmit.h
index bab47928a0c..e56777e0fea 100644
--- a/drivers/net/wireless/b43legacy/xmit.h
+++ b/drivers/net/wireless/b43legacy/xmit.h
@@ -80,7 +80,7 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev,
 			      u8 *txhdr,
 			      const unsigned char *fragment_data,
 			      unsigned int fragment_len,
-			      const struct ieee80211_tx_control *txctl,
+			      const struct ieee80211_tx_info *info,
 			      u16 cookie);
 
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index f3ca02fe961..10c64bdb314 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -445,8 +445,7 @@ static int rs_adjust_next_rate(struct iwl3945_priv *priv, int rate)
  */
 static void rs_tx_status(void *priv_rate,
 			 struct net_device *dev,
-			 struct sk_buff *skb,
-			 struct ieee80211_tx_status *tx_resp)
+			 struct sk_buff *skb)
 {
 	u8 retries, current_count;
 	int scale_rate_index, first_index, last_index;
@@ -457,14 +456,15 @@ static void rs_tx_status(void *priv_rate,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct iwl3945_rs_sta *rs_sta;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	IWL_DEBUG_RATE("enter\n");
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
 
-	retries = tx_resp->retry_count;
-	first_index = sband->bitrates[tx_resp->control.tx_rate_idx].hw_value;
+	retries = info->status.retry_count;
+	first_index = sband->bitrates[info->tx_rate_idx].hw_value;
 	if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) {
 		IWL_DEBUG_RATE("leave: Rate out of bounds: %d\n", first_index);
 		return;
@@ -525,11 +525,11 @@ static void rs_tx_status(void *priv_rate,
 	/* Update the last index window with success/failure based on ACK */
 	IWL_DEBUG_RATE("Update rate %d with %s.\n",
 		       last_index,
-		       (tx_resp->flags & IEEE80211_TX_STATUS_ACK) ?
+		       (info->flags & IEEE80211_TX_STAT_ACK) ?
 		       "success" : "failure");
 	iwl3945_collect_tx_data(rs_sta,
 			    &rs_sta->win[last_index],
-			    tx_resp->flags & IEEE80211_TX_STATUS_ACK, 1);
+			    info->flags & IEEE80211_TX_STAT_ACK, 1);
 
 	/* We updated the rate scale window -- if its been more than
 	 * flush_time since the last run, schedule the flush
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index f8e691f88ab..0ba6889dfd4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -283,8 +283,7 @@ static void iwl3945_tx_queue_reclaim(struct iwl3945_priv *priv,
 		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
 
 		tx_info = &txq->txb[txq->q.read_ptr];
-		ieee80211_tx_status_irqsafe(priv->hw, tx_info->skb[0],
-					    &tx_info->status);
+		ieee80211_tx_status_irqsafe(priv->hw, tx_info->skb[0]);
 		tx_info->skb[0] = NULL;
 		iwl3945_hw_txq_free_tfd(priv, txq);
 	}
@@ -306,7 +305,7 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 	int txq_id = SEQ_TO_QUEUE(sequence);
 	int index = SEQ_TO_INDEX(sequence);
 	struct iwl3945_tx_queue *txq = &priv->txq[txq_id];
-	struct ieee80211_tx_status *tx_status;
+	struct ieee80211_tx_info *info;
 	struct iwl3945_tx_resp *tx_resp = (void *)&pkt->u.raw[0];
 	u32  status = le32_to_cpu(tx_resp->status);
 	int rate_idx;
@@ -319,21 +318,22 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 		return;
 	}
 
-	tx_status = &(txq->txb[txq->q.read_ptr].status);
+	info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb[0]);
+	memset(&info->status, 0, sizeof(info->status));
 
-	tx_status->retry_count = tx_resp->failure_frame;
+	info->status.retry_count = tx_resp->failure_frame;
 	/* tx_status->rts_retry_count = tx_resp->failure_rts; */
-	tx_status->flags = ((status & TX_STATUS_MSK) == TX_STATUS_SUCCESS) ?
-				IEEE80211_TX_STATUS_ACK : 0;
+	info->flags |= ((status & TX_STATUS_MSK) == TX_STATUS_SUCCESS) ?
+				IEEE80211_TX_STAT_ACK : 0;
 
 	IWL_DEBUG_TX("Tx queue %d Status %s (0x%08x) plcp rate %d retries %d\n",
 			txq_id, iwl3945_get_tx_fail_reason(status), status,
 			tx_resp->rate, tx_resp->failure_frame);
 
 	rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate);
-	if (tx_status->control.band == IEEE80211_BAND_5GHZ)
+	if (info->band == IEEE80211_BAND_5GHZ)
 		rate_idx -= IWL_FIRST_OFDM_RATE;
-	tx_status->control.tx_rate_idx = rate_idx;
+	info->tx_rate_idx = rate_idx;
 	IWL_DEBUG_TX_REPLY("Tx queue reclaim %d\n", index);
 	iwl3945_tx_queue_reclaim(priv, txq_id, index);
 
@@ -960,11 +960,11 @@ u8 iwl3945_hw_find_station(struct iwl3945_priv *priv, const u8 *addr)
 */
 void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv,
 			      struct iwl3945_cmd *cmd,
-			      struct ieee80211_tx_control *ctrl,
+			      struct ieee80211_tx_info *info,
 			      struct ieee80211_hdr *hdr, int sta_id, int tx_id)
 {
 	unsigned long flags;
-	u16 hw_value = ieee80211_get_tx_rate(priv->hw, ctrl)->hw_value;
+	u16 hw_value = ieee80211_get_tx_rate(priv->hw, info)->hw_value;
 	u16 rate_index = min(hw_value & 0xffff, IWL_RATE_COUNT - 1);
 	u16 rate_mask;
 	int rate;
@@ -977,7 +977,7 @@ void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv,
 	tx_flags = cmd->cmd.tx.tx_flags;
 
 	/* We need to figure out how to get the sta->supp_rates while
-	 * in this running context; perhaps encoding into ctrl->tx_rate? */
+	 * in this running context */
 	rate_mask = IWL_RATES_MASK;
 
 	spin_lock_irqsave(&priv->sta_lock, flags);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h
index 9fdc1405e85..835c5b4320e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@ -124,7 +124,6 @@ int iwl3945_x2_queue_used(const struct iwl3945_queue *q, int i);
 
 /* One for each TFD */
 struct iwl3945_tx_info {
-	struct ieee80211_tx_status status;
 	struct sk_buff *skb[MAX_NUM_OF_TBS];
 };
 
@@ -645,7 +644,7 @@ extern unsigned int iwl3945_hw_get_beacon_cmd(struct iwl3945_priv *priv,
 extern int iwl3945_hw_get_rx_read(struct iwl3945_priv *priv);
 extern void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv,
 				     struct iwl3945_cmd *cmd,
-				     struct ieee80211_tx_control *ctrl,
+				     struct ieee80211_tx_info *info,
 				     struct ieee80211_hdr *hdr,
 				     int sta_id, int tx_id);
 extern int iwl3945_hw_reg_send_txpower(struct iwl3945_priv *priv);
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
index 7993a1d8302..f28b3cc272d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
@@ -785,8 +785,7 @@ out:
  * mac80211 sends us Tx status
  */
 static void rs_tx_status(void *priv_rate, struct net_device *dev,
-			 struct sk_buff *skb,
-			 struct ieee80211_tx_status *tx_resp)
+			 struct sk_buff *skb)
 {
 	int status;
 	u8 retries;
@@ -798,6 +797,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	struct iwl_priv *priv = (struct iwl_priv *)priv_rate;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = local_to_hw(local);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl4965_rate_scale_data *window = NULL;
 	struct iwl4965_rate_scale_data *search_win = NULL;
 	u32 tx_rate;
@@ -813,11 +813,11 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 		return;
 
 	/* This packet was aggregated but doesn't carry rate scale info */
-	if ((tx_resp->control.flags & IEEE80211_TXCTL_AMPDU) &&
-	    !(tx_resp->flags & IEEE80211_TX_STATUS_AMPDU))
+	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
+	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
 		return;
 
-	retries = tx_resp->retry_count;
+	retries = info->status.retry_count;
 
 	if (retries > 15)
 		retries = 15;
@@ -862,20 +862,20 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	if (priv->band == IEEE80211_BAND_5GHZ)
 		rs_index -= IWL_FIRST_OFDM_RATE;
 
-	if ((tx_resp->control.tx_rate_idx < 0) ||
+	if ((info->tx_rate_idx < 0) ||
 	    (tbl_type.is_SGI ^
-		!!(tx_resp->control.flags & IEEE80211_TXCTL_SHORT_GI)) ||
+		!!(info->flags & IEEE80211_TX_CTL_SHORT_GI)) ||
 	    (tbl_type.is_fat ^
-		!!(tx_resp->control.flags & IEEE80211_TXCTL_40_MHZ_WIDTH)) ||
+		!!(info->flags & IEEE80211_TX_CTL_40_MHZ_WIDTH)) ||
 	    (tbl_type.is_dup ^
-		!!(tx_resp->control.flags & IEEE80211_TXCTL_DUP_DATA)) ||
-	    (tbl_type.ant_type ^ tx_resp->control.antenna_sel_tx) ||
+		!!(info->flags & IEEE80211_TX_CTL_DUP_DATA)) ||
+	    (tbl_type.ant_type ^ info->antenna_sel_tx) ||
 	    (!!(tx_rate & RATE_MCS_HT_MSK) ^
-		!!(tx_resp->control.flags & IEEE80211_TXCTL_OFDM_HT)) ||
+		!!(info->flags & IEEE80211_TX_CTL_OFDM_HT)) ||
 	    (!!(tx_rate & RATE_MCS_GF_MSK) ^
-		!!(tx_resp->control.flags & IEEE80211_TXCTL_GREEN_FIELD)) ||
+		!!(info->flags & IEEE80211_TX_CTL_GREEN_FIELD)) ||
 	    (hw->wiphy->bands[priv->band]->bitrates[rs_index].bitrate !=
-	     hw->wiphy->bands[tx_resp->control.band]->bitrates[tx_resp->control.tx_rate_idx].bitrate)) {
+	     hw->wiphy->bands[info->band]->bitrates[info->tx_rate_idx].bitrate)) {
 		IWL_DEBUG_RATE("initial rate does not match 0x%x\n", tx_rate);
 		goto out;
 	}
@@ -929,10 +929,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	rs_get_tbl_info_from_mcs(tx_rate, priv->band, &tbl_type, &rs_index);
 
 	/* Update frame history window with "success" if Tx got ACKed ... */
-	if (tx_resp->flags & IEEE80211_TX_STATUS_ACK)
-		status = 1;
-	else
-		status = 0;
+	status = !!(info->flags & IEEE80211_TX_STAT_ACK);
 
 	/* If type matches "search" table,
 	 * add final tx status to "search" history */
@@ -943,10 +940,10 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 			tpt = search_tbl->expected_tpt[rs_index];
 		else
 			tpt = 0;
-		if (tx_resp->control.flags & IEEE80211_TXCTL_AMPDU)
+		if (info->flags & IEEE80211_TX_CTL_AMPDU)
 			rs_collect_tx_data(search_win, rs_index, tpt,
-					   tx_resp->ampdu_ack_len,
-					   tx_resp->ampdu_ack_map);
+					   info->status.ampdu_ack_len,
+					   info->status.ampdu_ack_map);
 		else
 			rs_collect_tx_data(search_win, rs_index, tpt,
 					   1, status);
@@ -959,10 +956,10 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 			tpt = curr_tbl->expected_tpt[rs_index];
 		else
 			tpt = 0;
-		if (tx_resp->control.flags & IEEE80211_TXCTL_AMPDU)
+		if (info->flags & IEEE80211_TX_CTL_AMPDU)
 			rs_collect_tx_data(window, rs_index, tpt,
-					   tx_resp->ampdu_ack_len,
-					   tx_resp->ampdu_ack_map);
+					   info->status.ampdu_ack_len,
+					   info->status.ampdu_ack_map);
 		else
 			rs_collect_tx_data(window, rs_index, tpt,
 					   1, status);
@@ -971,10 +968,10 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	/* If not searching for new mode, increment success/failed counter
 	 * ... these help determine when to start searching again */
 	if (lq_sta->stay_in_tbl) {
-		if (tx_resp->control.flags & IEEE80211_TXCTL_AMPDU) {
-			lq_sta->total_success += tx_resp->ampdu_ack_map;
+		if (info->flags & IEEE80211_TX_CTL_AMPDU) {
+			lq_sta->total_success += info->status.ampdu_ack_map;
 			lq_sta->total_failed +=
-			     (tx_resp->ampdu_ack_len - tx_resp->ampdu_ack_map);
+			     (info->status.ampdu_ack_len - info->status.ampdu_ack_map);
 		} else {
 			if (status)
 				lq_sta->total_success++;
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index fb670b5cfeb..ca9ca92bb7f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -357,22 +357,22 @@ int iwl4965_hwrate_to_plcp_idx(u32 rate_n_flags)
  * translate ucode response to mac80211 tx status control values
  */
 void iwl4965_hwrate_to_tx_control(struct iwl_priv *priv, u32 rate_n_flags,
-				  struct ieee80211_tx_control *control)
+				  struct ieee80211_tx_info *control)
 {
 	int rate_index;
 
 	control->antenna_sel_tx =
 		((rate_n_flags & RATE_MCS_ANT_ABC_MSK) >> RATE_MCS_ANT_POS);
 	if (rate_n_flags & RATE_MCS_HT_MSK)
-		control->flags |= IEEE80211_TXCTL_OFDM_HT;
+		control->flags |= IEEE80211_TX_CTL_OFDM_HT;
 	if (rate_n_flags & RATE_MCS_GF_MSK)
-		control->flags |= IEEE80211_TXCTL_GREEN_FIELD;
+		control->flags |= IEEE80211_TX_CTL_GREEN_FIELD;
 	if (rate_n_flags & RATE_MCS_FAT_MSK)
-		control->flags |= IEEE80211_TXCTL_40_MHZ_WIDTH;
+		control->flags |= IEEE80211_TX_CTL_40_MHZ_WIDTH;
 	if (rate_n_flags & RATE_MCS_DUP_MSK)
-		control->flags |= IEEE80211_TXCTL_DUP_DATA;
+		control->flags |= IEEE80211_TX_CTL_DUP_DATA;
 	if (rate_n_flags & RATE_MCS_SGI_MSK)
-		control->flags |= IEEE80211_TXCTL_SHORT_GI;
+		control->flags |= IEEE80211_TX_CTL_SHORT_GI;
 	rate_index = iwl4965_hwrate_to_plcp_idx(rate_n_flags);
 	if (control->band == IEEE80211_BAND_5GHZ)
 		rate_index -= IWL_FIRST_OFDM_RATE;
@@ -3007,7 +3007,7 @@ static int iwl4965_tx_status_reply_compressed_ba(struct iwl_priv *priv,
 	u16 scd_flow = le16_to_cpu(ba_resp->scd_flow);
 	u64 bitmap;
 	int successes = 0;
-	struct ieee80211_tx_status *tx_status;
+	struct ieee80211_tx_info *info;
 
 	if (unlikely(!agg->wait_for_ba))  {
 		IWL_ERROR("Received BA when not expected\n");
@@ -3045,13 +3045,13 @@ static int iwl4965_tx_status_reply_compressed_ba(struct iwl_priv *priv,
 			agg->start_idx + i);
 	}
 
-	tx_status = &priv->txq[scd_flow].txb[agg->start_idx].status;
-	tx_status->flags = IEEE80211_TX_STATUS_ACK;
-	tx_status->flags |= IEEE80211_TX_STATUS_AMPDU;
-	tx_status->ampdu_ack_map = successes;
-	tx_status->ampdu_ack_len = agg->frame_count;
-	iwl4965_hwrate_to_tx_control(priv, agg->rate_n_flags,
-				     &tx_status->control);
+	info = IEEE80211_SKB_CB(priv->txq[scd_flow].txb[agg->start_idx].skb[0]);
+	memset(&info->status, 0, sizeof(info->status));
+	info->flags = IEEE80211_TX_STAT_ACK;
+	info->flags |= IEEE80211_TX_STAT_AMPDU;
+	info->status.ampdu_ack_map = successes;
+	info->status.ampdu_ack_len = agg->frame_count;
+	iwl4965_hwrate_to_tx_control(priv, agg->rate_n_flags, info);
 
 	IWL_DEBUG_TX_REPLY("Bitmap %llx\n", (unsigned long long)bitmap);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index a8d062f7b87..ad7422eadab 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -207,8 +207,7 @@ void iwl_rx_allocate(struct iwl_priv *priv);
 * TX
 ******************************************************/
 int iwl_txq_ctx_reset(struct iwl_priv *priv);
-int iwl_tx_skb(struct iwl_priv *priv,
-		struct sk_buff *skb, struct ieee80211_tx_control *ctl);
+int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb);
 /* FIXME: remove when free Tx is fully merged into iwlcore */
 int iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq);
 void iwl_hw_txq_ctx_free(struct iwl_priv *priv);
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 820542bac44..f7fd8ea6177 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -119,7 +119,6 @@ struct iwl_queue {
 
 /* One for each TFD */
 struct iwl_tx_info {
-	struct ieee80211_tx_status status;
 	struct sk_buff *skb[MAX_NUM_OF_TBS];
 };
 
@@ -693,7 +692,7 @@ extern unsigned int iwl4965_hw_get_beacon_cmd(struct iwl_priv *priv,
 				 struct iwl_frame *frame, u8 rate);
 extern void iwl4965_hw_build_tx_cmd_rate(struct iwl_priv *priv,
 				     struct iwl_cmd *cmd,
-				     struct ieee80211_tx_control *ctrl,
+				     struct ieee80211_tx_info *info,
 				     struct ieee80211_hdr *hdr,
 				     int sta_id, int tx_id);
 extern int iwl4965_hw_reg_send_txpower(struct iwl_priv *priv);
@@ -749,7 +748,7 @@ extern void iwl4965_update_rate_scaling(struct iwl_priv *priv, u8 mode);
 extern void iwl4965_rf_kill_ct_config(struct iwl_priv *priv);
 extern void iwl4965_hwrate_to_tx_control(struct iwl_priv *priv,
 					 u32 rate_n_flags,
-					 struct ieee80211_tx_control *control);
+					 struct ieee80211_tx_info *info);
 
 #ifdef CONFIG_IWL4965_HT
 extern void iwl4965_init_ht_hw_capab(const struct iwl_priv *priv,
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 4b5149c8c32..a61293ba3f6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -502,7 +502,7 @@ int iwl_txq_ctx_reset(struct iwl_priv *priv)
  */
 static void iwl_tx_cmd_build_basic(struct iwl_priv *priv,
 				  struct iwl_tx_cmd *tx_cmd,
-				  struct ieee80211_tx_control *ctrl,
+				  struct ieee80211_tx_info *info,
 				  struct ieee80211_hdr *hdr,
 				  int is_unicast, u8 std_id)
 {
@@ -510,7 +510,7 @@ static void iwl_tx_cmd_build_basic(struct iwl_priv *priv,
 	__le32 tx_flags = tx_cmd->tx_flags;
 
 	tx_cmd->stop_time.life_time = TX_CMD_LIFE_TIME_INFINITE;
-	if (!(ctrl->flags & IEEE80211_TXCTL_NO_ACK)) {
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 		tx_flags |= TX_CMD_FLG_ACK_MSK;
 		if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)
 			tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
@@ -538,10 +538,10 @@ static void iwl_tx_cmd_build_basic(struct iwl_priv *priv,
 		tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
 	}
 
-	if (ctrl->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
 		tx_flags |= TX_CMD_FLG_RTS_MSK;
 		tx_flags &= ~TX_CMD_FLG_CTS_MSK;
-	} else if (ctrl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 		tx_flags &= ~TX_CMD_FLG_RTS_MSK;
 		tx_flags |= TX_CMD_FLG_CTS_MSK;
 	}
@@ -570,7 +570,7 @@ static void iwl_tx_cmd_build_basic(struct iwl_priv *priv,
 
 static void iwl_tx_cmd_build_rate(struct iwl_priv *priv,
 			      struct iwl_tx_cmd *tx_cmd,
-			      struct ieee80211_tx_control *ctrl,
+			      struct ieee80211_tx_info *info,
 			      u16 fc, int sta_id,
 			      int is_hcca)
 {
@@ -580,7 +580,7 @@ static void iwl_tx_cmd_build_rate(struct iwl_priv *priv,
 	u16 rate_flags = 0;
 	int rate_idx;
 
-	rate_idx = min(ieee80211_get_tx_rate(priv->hw, ctrl)->hw_value & 0xffff,
+	rate_idx = min(ieee80211_get_tx_rate(priv->hw, info)->hw_value & 0xffff,
 			IWL_RATE_COUNT - 1);
 
 	rate_plcp = iwl_rates[rate_idx].plcp;
@@ -637,18 +637,18 @@ static void iwl_tx_cmd_build_rate(struct iwl_priv *priv,
 }
 
 static void iwl_tx_cmd_build_hwcrypto(struct iwl_priv *priv,
-				      struct ieee80211_tx_control *ctl,
+				      struct ieee80211_tx_info *info,
 				      struct iwl_tx_cmd *tx_cmd,
 				      struct sk_buff *skb_frag,
 				      int sta_id)
 {
-	struct ieee80211_key_conf *keyconf = ctl->hw_key;
+	struct ieee80211_key_conf *keyconf = info->control.hw_key;
 
 	switch (keyconf->alg) {
 	case ALG_CCMP:
 		tx_cmd->sec_ctl = TX_CMD_SEC_CCM;
 		memcpy(tx_cmd->key, keyconf->key, keyconf->keylen);
-		if (ctl->flags & IEEE80211_TXCTL_AMPDU)
+		if (info->flags & IEEE80211_TX_CTL_AMPDU)
 			tx_cmd->tx_flags |= TX_CMD_FLG_AGG_CCMP_MSK;
 		IWL_DEBUG_TX("tx_cmd with aes hwcrypto\n");
 		break;
@@ -690,13 +690,13 @@ static void iwl_update_tx_stats(struct iwl_priv *priv, u16 fc, u16 len)
 /*
  * start REPLY_TX command process
  */
-int iwl_tx_skb(struct iwl_priv *priv,
-		struct sk_buff *skb, struct ieee80211_tx_control *ctl)
+int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl_tfd_frame *tfd;
 	u32 *control_flags;
-	int txq_id = ctl->queue;
+	int txq_id = info->queue;
 	struct iwl_tx_queue *txq = NULL;
 	struct iwl_queue *q = NULL;
 	dma_addr_t phys_addr;
@@ -726,7 +726,7 @@ int iwl_tx_skb(struct iwl_priv *priv,
 		goto drop_unlock;
 	}
 
-	if ((ieee80211_get_tx_rate(priv->hw, ctl)->hw_value & 0xFF) ==
+	if ((ieee80211_get_tx_rate(priv->hw, info)->hw_value & 0xFF) ==
 	     IWL_INVALID_RATE) {
 		IWL_ERROR("ERROR: No TX rate available.\n");
 		goto drop_unlock;
@@ -782,7 +782,7 @@ int iwl_tx_skb(struct iwl_priv *priv,
 		seq_number += 0x10;
 #ifdef CONFIG_IWL4965_HT
 		/* aggregation is on for this <sta,tid> */
-		if (ctl->flags & IEEE80211_TXCTL_AMPDU)
+		if (info->flags & IEEE80211_TX_CTL_AMPDU)
 			txq_id = priv->stations[sta_id].tid[tid].agg.txq_id;
 		priv->stations[sta_id].tid[tid].tfds_in_queue++;
 #endif /* CONFIG_IWL4965_HT */
@@ -803,8 +803,6 @@ int iwl_tx_skb(struct iwl_priv *priv,
 	/* Set up driver data for this TFD */
 	memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info));
 	txq->txb[q->write_ptr].skb[0] = skb;
-	memcpy(&(txq->txb[q->write_ptr].status.control),
-	       ctl, sizeof(struct ieee80211_tx_control));
 
 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
 	out_cmd = &txq->cmd[idx];
@@ -854,8 +852,8 @@ int iwl_tx_skb(struct iwl_priv *priv,
 	 * first entry */
 	iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(ctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
-		iwl_tx_cmd_build_hwcrypto(priv, ctl, tx_cmd, skb, sta_id);
+	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+		iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
 	 * if any (802.11 null frames have no payload). */
@@ -874,10 +872,10 @@ int iwl_tx_skb(struct iwl_priv *priv,
 	len = (u16)skb->len;
 	tx_cmd->len = cpu_to_le16(len);
 	/* TODO need this for burst mode later on */
-	iwl_tx_cmd_build_basic(priv, tx_cmd, ctl, hdr, unicast, sta_id);
+	iwl_tx_cmd_build_basic(priv, tx_cmd, info, hdr, unicast, sta_id);
 
 	/* set is_hcca to 0; it probably will never be implemented */
-	iwl_tx_cmd_build_rate(priv, tx_cmd, ctl, fc, sta_id, 0);
+	iwl_tx_cmd_build_rate(priv, tx_cmd, info, fc, sta_id, 0);
 
 	iwl_update_tx_stats(priv, fc, len);
 
@@ -919,7 +917,7 @@ int iwl_tx_skb(struct iwl_priv *priv,
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 
-		ieee80211_stop_queue(priv->hw, ctl->queue);
+		ieee80211_stop_queue(priv->hw, info->queue);
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index a28b4c9f652..a740a1817d1 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2376,13 +2376,13 @@ static int iwl3945_set_mode(struct iwl3945_priv *priv, int mode)
 }
 
 static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
-				      struct ieee80211_tx_control *ctl,
+				      struct ieee80211_tx_info *info,
 				      struct iwl3945_cmd *cmd,
 				      struct sk_buff *skb_frag,
 				      int last_frag)
 {
 	struct iwl3945_hw_key *keyinfo =
-	    &priv->stations[ctl->hw_key->hw_key_idx].keyinfo;
+	    &priv->stations[info->control.hw_key->hw_key_idx].keyinfo;
 
 	switch (keyinfo->alg) {
 	case ALG_CCMP:
@@ -2405,7 +2405,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
 
 	case ALG_WEP:
 		cmd->cmd.tx.sec_ctl = TX_CMD_SEC_WEP |
-		    (ctl->hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT;
+		    (info->control.hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT;
 
 		if (keyinfo->keylen == 13)
 			cmd->cmd.tx.sec_ctl |= TX_CMD_SEC_KEY128;
@@ -2413,7 +2413,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
 		memcpy(&cmd->cmd.tx.key[3], keyinfo->key, keyinfo->keylen);
 
 		IWL_DEBUG_TX("Configuring packet for WEP encryption "
-			     "with key %d\n", ctl->hw_key->hw_key_idx);
+			     "with key %d\n", info->control.hw_key->hw_key_idx);
 		break;
 
 	default:
@@ -2427,7 +2427,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl3945_priv *priv,
  */
 static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv,
 				  struct iwl3945_cmd *cmd,
-				  struct ieee80211_tx_control *ctrl,
+				  struct ieee80211_tx_info *info,
 				  struct ieee80211_hdr *hdr,
 				  int is_unicast, u8 std_id)
 {
@@ -2435,7 +2435,7 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv,
 	__le32 tx_flags = cmd->cmd.tx.tx_flags;
 
 	cmd->cmd.tx.stop_time.life_time = TX_CMD_LIFE_TIME_INFINITE;
-	if (!(ctrl->flags & IEEE80211_TXCTL_NO_ACK)) {
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 		tx_flags |= TX_CMD_FLG_ACK_MSK;
 		if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)
 			tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
@@ -2459,10 +2459,10 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv,
 		tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
 	}
 
-	if (ctrl->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
 		tx_flags |= TX_CMD_FLG_RTS_MSK;
 		tx_flags &= ~TX_CMD_FLG_CTS_MSK;
-	} else if (ctrl->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 		tx_flags &= ~TX_CMD_FLG_RTS_MSK;
 		tx_flags |= TX_CMD_FLG_CTS_MSK;
 	}
@@ -2546,13 +2546,13 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h
 /*
  * start REPLY_TX command process
  */
-static int iwl3945_tx_skb(struct iwl3945_priv *priv,
-		      struct sk_buff *skb, struct ieee80211_tx_control *ctl)
+static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl3945_tfd_frame *tfd;
 	u32 *control_flags;
-	int txq_id = ctl->queue;
+	int txq_id = info->queue;
 	struct iwl3945_tx_queue *txq = NULL;
 	struct iwl3945_queue *q = NULL;
 	dma_addr_t phys_addr;
@@ -2581,7 +2581,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 		goto drop_unlock;
 	}
 
-	if ((ieee80211_get_tx_rate(priv->hw, ctl)->hw_value & 0xFF) == IWL_INVALID_RATE) {
+	if ((ieee80211_get_tx_rate(priv->hw, info)->hw_value & 0xFF) == IWL_INVALID_RATE) {
 		IWL_ERROR("ERROR: No TX rate available.\n");
 		goto drop_unlock;
 	}
@@ -2650,8 +2650,6 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 	/* Set up driver data for this TFD */
 	memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl3945_tx_info));
 	txq->txb[q->write_ptr].skb[0] = skb;
-	memcpy(&(txq->txb[q->write_ptr].status.control),
-	       ctl, sizeof(struct ieee80211_tx_control));
 
 	/* Init first empty entry in queue's array of Tx/cmd buffers */
 	out_cmd = &txq->cmd[idx];
@@ -2700,8 +2698,8 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 	 * first entry */
 	iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(ctl->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
-		iwl3945_build_tx_cmd_hwcrypto(priv, ctl, out_cmd, skb, 0);
+	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+		iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
 	 * if any (802.11 null frames have no payload). */
@@ -2726,10 +2724,10 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 	out_cmd->cmd.tx.len = cpu_to_le16(len);
 
 	/* TODO need this for burst mode later on */
-	iwl3945_build_tx_cmd_basic(priv, out_cmd, ctl, hdr, unicast, sta_id);
+	iwl3945_build_tx_cmd_basic(priv, out_cmd, info, hdr, unicast, sta_id);
 
 	/* set is_hcca to 0; it probably will never be implemented */
-	iwl3945_hw_build_tx_cmd_rate(priv, out_cmd, ctl, hdr, sta_id, 0);
+	iwl3945_hw_build_tx_cmd_rate(priv, out_cmd, info, hdr, sta_id, 0);
 
 	out_cmd->cmd.tx.tx_flags &= ~TX_CMD_FLG_ANT_A_MSK;
 	out_cmd->cmd.tx.tx_flags &= ~TX_CMD_FLG_ANT_B_MSK;
@@ -2767,7 +2765,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv,
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 
-		ieee80211_stop_queue(priv->hw, ctl->queue);
+		ieee80211_stop_queue(priv->hw, info->queue);
 	}
 
 	return 0;
@@ -3230,7 +3228,7 @@ static void iwl3945_bg_beacon_update(struct work_struct *work)
 	struct sk_buff *beacon;
 
 	/* Pull updated AP beacon from mac80211. will fail if not in AP mode */
-	beacon = ieee80211_beacon_get(priv->hw, priv->vif, NULL);
+	beacon = ieee80211_beacon_get(priv->hw, priv->vif);
 
 	if (!beacon) {
 		IWL_ERROR("update beacon failed\n");
@@ -6681,8 +6679,7 @@ static void iwl3945_mac_stop(struct ieee80211_hw *hw)
 	IWL_DEBUG_MAC80211("leave\n");
 }
 
-static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		      struct ieee80211_tx_control *ctl)
+static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct iwl3945_priv *priv = hw->priv;
 
@@ -6694,9 +6691,9 @@ static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
-		     ieee80211_get_tx_rate(hw, ctl)->bitrate);
+		     ieee80211_get_tx_rate(hw, IEEE80211_SKB_CB(skb))->bitrate);
 
-	if (iwl3945_tx_skb(priv, skb, ctl))
+	if (iwl3945_tx_skb(priv, skb))
 		dev_kfree_skb_any(skb);
 
 	IWL_DEBUG_MAC80211("leave\n");
@@ -7333,8 +7330,7 @@ static void iwl3945_mac_reset_tsf(struct ieee80211_hw *hw)
 
 }
 
-static int iwl3945_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				 struct ieee80211_tx_control *control)
+static int iwl3945_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct iwl3945_priv *priv = hw->priv;
 	unsigned long flags;
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 1fad6227aa5..07c52b69d1e 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -1606,17 +1606,7 @@ static int iwl4965_get_measurement(struct iwl_priv *priv,
 static void iwl4965_txstatus_to_ieee(struct iwl_priv *priv,
 				     struct iwl_tx_info *tx_sta)
 {
-
-	tx_sta->status.ack_signal = 0;
-	tx_sta->status.excessive_retries = 0;
-
-	if (in_interrupt())
-		ieee80211_tx_status_irqsafe(priv->hw,
-					    tx_sta->skb[0], &(tx_sta->status));
-	else
-		ieee80211_tx_status(priv->hw,
-				    tx_sta->skb[0], &(tx_sta->status));
-
+	ieee80211_tx_status_irqsafe(priv->hw, tx_sta->skb[0]);
 	tx_sta->skb[0] = NULL;
 }
 
@@ -1710,7 +1700,7 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv,
 {
 	u16 status;
 	struct agg_tx_status *frame_status = &tx_resp->status;
-	struct ieee80211_tx_status *tx_status = NULL;
+	struct ieee80211_tx_info *info = NULL;
 	struct ieee80211_hdr *hdr = NULL;
 	int i, sh;
 	int txq_id, idx;
@@ -1736,14 +1726,14 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv,
 		IWL_DEBUG_TX_REPLY("FrameCnt = %d, StartIdx=%d idx=%d\n",
 				   agg->frame_count, agg->start_idx, idx);
 
-		tx_status = &(priv->txq[txq_id].txb[idx].status);
-		tx_status->retry_count = tx_resp->failure_frame;
-		tx_status->control.flags &= ~IEEE80211_TXCTL_AMPDU;
-		tx_status->flags = iwl4965_is_tx_success(status)?
-			IEEE80211_TX_STATUS_ACK : 0;
+		info = IEEE80211_SKB_CB(priv->txq[txq_id].txb[idx].skb[0]);
+		info->status.retry_count = tx_resp->failure_frame;
+		info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+		info->flags |= iwl4965_is_tx_success(status)?
+			IEEE80211_TX_STAT_ACK : 0;
 		iwl4965_hwrate_to_tx_control(priv,
 					     le32_to_cpu(tx_resp->rate_n_flags),
-					     &tx_status->control);
+					     info);
 		/* FIXME: code repetition end */
 
 		IWL_DEBUG_TX_REPLY("1 Frame 0x%x failure :%d\n",
@@ -1830,7 +1820,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
 	int txq_id = SEQ_TO_QUEUE(sequence);
 	int index = SEQ_TO_INDEX(sequence);
 	struct iwl_tx_queue *txq = &priv->txq[txq_id];
-	struct ieee80211_tx_status *tx_status;
+	struct ieee80211_tx_info *info;
 	struct iwl4965_tx_resp *tx_resp = (void *)&pkt->u.raw[0];
 	u32  status = le32_to_cpu(tx_resp->status);
 #ifdef CONFIG_IWL4965_HT
@@ -1848,6 +1838,9 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
 		return;
 	}
 
+	info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb[0]);
+	memset(&info->status, 0, sizeof(info->status));
+
 #ifdef CONFIG_IWL4965_HT
 	hdr = iwl4965_tx_queue_get_hdr(priv, txq_id, index);
 	fc = le16_to_cpu(hdr->frame_control);
@@ -1902,13 +1895,12 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
 		}
 	} else {
 #endif /* CONFIG_IWL4965_HT */
-	tx_status = &(txq->txb[txq->q.read_ptr].status);
 
-	tx_status->retry_count = tx_resp->failure_frame;
-	tx_status->flags =
-	    iwl4965_is_tx_success(status) ? IEEE80211_TX_STATUS_ACK : 0;
+	info->status.retry_count = tx_resp->failure_frame;
+	info->flags |=
+	    iwl4965_is_tx_success(status) ? IEEE80211_TX_STAT_ACK : 0;
 	iwl4965_hwrate_to_tx_control(priv, le32_to_cpu(tx_resp->rate_n_flags),
-				     &tx_status->control);
+				     info);
 
 	IWL_DEBUG_TX("Tx queue %d Status %s (0x%08x) rate_n_flags 0x%x "
 		     "retries %d\n", txq_id, iwl4965_get_tx_fail_reason(status),
@@ -2053,7 +2045,7 @@ static void iwl4965_bg_beacon_update(struct work_struct *work)
 	struct sk_buff *beacon;
 
 	/* Pull updated AP beacon from mac80211. will fail if not in AP mode */
-	beacon = ieee80211_beacon_get(priv->hw, priv->vif, NULL);
+	beacon = ieee80211_beacon_get(priv->hw, priv->vif);
 
 	if (!beacon) {
 		IWL_ERROR("update beacon failed\n");
@@ -4268,8 +4260,7 @@ static void iwl4965_mac_stop(struct ieee80211_hw *hw)
 	IWL_DEBUG_MAC80211("leave\n");
 }
 
-static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		      struct ieee80211_tx_control *ctl)
+static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct iwl_priv *priv = hw->priv;
 
@@ -4281,9 +4272,9 @@ static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
-		     ieee80211_get_tx_rate(hw, ctl)->bitrate);
+		     ieee80211_get_tx_rate(hw, IEEE80211_SKB_CB(skb))->bitrate);
 
-	if (iwl_tx_skb(priv, skb, ctl))
+	if (iwl_tx_skb(priv, skb))
 		dev_kfree_skb_any(skb);
 
 	IWL_DEBUG_MAC80211("leave\n");
@@ -5065,8 +5056,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw)
 	IWL_DEBUG_MAC80211("leave\n");
 }
 
-static int iwl4965_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				 struct ieee80211_tx_control *control)
+static int iwl4965_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct iwl_priv *priv = hw->priv;
 	unsigned long flags;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 3ca9386561f..850857932e2 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -394,7 +394,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 	while (entry != (struct sk_buff *)&priv->tx_queue) {
 		range = (struct memrecord *)&entry->cb;
 		if (range->start_addr == addr) {
-			struct ieee80211_tx_status status;
+			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry);
 			struct p54_control_hdr *entry_hdr;
 			struct p54_tx_control_allocdata *entry_data;
 			int pad = 0;
@@ -406,30 +406,23 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 
 			last_addr = range->end_addr;
 			__skb_unlink(entry, &priv->tx_queue);
-			if (!range->control) {
-				kfree_skb(entry);
-				break;
-			}
-			memset(&status, 0, sizeof(status));
-			memcpy(&status.control, range->control,
-			       sizeof(status.control));
-			kfree(range->control);
-			priv->tx_stats[status.control.queue].len--;
+			memset(&info->status, 0, sizeof(info->status));
+			priv->tx_stats[info->queue].len--;
 			entry_hdr = (struct p54_control_hdr *) entry->data;
 			entry_data = (struct p54_tx_control_allocdata *) entry_hdr->data;
 			if ((entry_hdr->magic1 & cpu_to_le16(0x4000)) != 0)
 				pad = entry_data->align[0];
 
-			if (!(status.control.flags & IEEE80211_TXCTL_NO_ACK)) {
+			if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 				if (!(payload->status & 0x01))
-					status.flags |= IEEE80211_TX_STATUS_ACK;
+					info->flags |= IEEE80211_TX_STAT_ACK;
 				else
-					status.excessive_retries = 1;
+					info->status.excessive_retries = 1;
 			}
-			status.retry_count = payload->retries - 1;
-			status.ack_signal = le16_to_cpu(payload->ack_rssi);
+			info->status.retry_count = payload->retries - 1;
+			info->status.ack_signal = le16_to_cpu(payload->ack_rssi);
 			skb_pull(entry, sizeof(*hdr) + pad + sizeof(*entry_data));
-			ieee80211_tx_status_irqsafe(dev, entry, &status);
+			ieee80211_tx_status_irqsafe(dev, entry);
 			break;
 		} else
 			last_addr = range->end_addr;
@@ -494,13 +487,11 @@ EXPORT_SYMBOL_GPL(p54_rx);
  * allocated areas.
  */
 static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
-			       struct p54_control_hdr *data, u32 len,
-			       struct ieee80211_tx_control *control)
+			       struct p54_control_hdr *data, u32 len)
 {
 	struct p54_common *priv = dev->priv;
 	struct sk_buff *entry = priv->tx_queue.next;
 	struct sk_buff *target_skb = NULL;
-	struct memrecord *range;
 	u32 last_addr = priv->rx_start;
 	u32 largest_hole = 0;
 	u32 target_addr = priv->rx_start;
@@ -512,7 +503,8 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 	left = skb_queue_len(&priv->tx_queue);
 	while (left--) {
 		u32 hole_size;
-		range = (struct memrecord *)&entry->cb;
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry);
+		struct memrecord *range = (void *)info->driver_data;
 		hole_size = range->start_addr - last_addr;
 		if (!target_skb && hole_size >= len) {
 			target_skb = entry->prev;
@@ -527,17 +519,18 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 		target_skb = priv->tx_queue.prev;
 		largest_hole = max(largest_hole, priv->rx_end - last_addr - len);
 		if (!skb_queue_empty(&priv->tx_queue)) {
-			range = (struct memrecord *)&target_skb->cb;
+			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(target_skb);
+			struct memrecord *range = (void *)info->driver_data;
 			target_addr = range->end_addr;
 		}
 	} else
 		largest_hole = max(largest_hole, priv->rx_end - last_addr);
 
 	if (skb) {
-		range = (struct memrecord *)&skb->cb;
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		struct memrecord *range = (void *)info->driver_data;
 		range->start_addr = target_addr;
 		range->end_addr = target_addr + len;
-		range->control = control;
 		__skb_queue_after(&priv->tx_queue, target_skb, skb);
 		if (largest_hole < IEEE80211_MAX_RTS_THRESHOLD + 0x170 +
 				   sizeof(struct p54_control_hdr))
@@ -548,32 +541,27 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 	data->req_id = cpu_to_le32(target_addr + 0x70);
 }
 
-static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
-		  struct ieee80211_tx_control *control)
+static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_tx_queue_stats *current_queue;
 	struct p54_common *priv = dev->priv;
 	struct p54_control_hdr *hdr;
 	struct p54_tx_control_allocdata *txhdr;
-	struct ieee80211_tx_control *control_copy;
 	size_t padding, len;
 	u8 rate;
 
-	current_queue = &priv->tx_stats[control->queue];
+	current_queue = &priv->tx_stats[info->queue];
 	if (unlikely(current_queue->len > current_queue->limit))
 		return NETDEV_TX_BUSY;
 	current_queue->len++;
 	current_queue->count++;
 	if (current_queue->len == current_queue->limit)
-		ieee80211_stop_queue(dev, control->queue);
+		ieee80211_stop_queue(dev, info->queue);
 
 	padding = (unsigned long)(skb->data - (sizeof(*hdr) + sizeof(*txhdr))) & 3;
 	len = skb->len;
 
-	control_copy = kmalloc(sizeof(*control), GFP_ATOMIC);
-	if (control_copy)
-		memcpy(control_copy, control, sizeof(*control));
-
 	txhdr = (struct p54_tx_control_allocdata *)
 			skb_push(skb, sizeof(*txhdr) + padding);
 	hdr = (struct p54_control_hdr *) skb_push(skb, sizeof(*hdr));
@@ -583,35 +571,37 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	else
 		hdr->magic1 = cpu_to_le16(0x0010);
 	hdr->len = cpu_to_le16(len);
-	hdr->type = (control->flags & IEEE80211_TXCTL_NO_ACK) ? 0 : cpu_to_le16(1);
-	hdr->retry1 = hdr->retry2 = control->retry_limit;
-	p54_assign_address(dev, skb, hdr, skb->len, control_copy);
+	hdr->type = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 0 : cpu_to_le16(1);
+	hdr->retry1 = hdr->retry2 = info->control.retry_limit;
 
 	memset(txhdr->wep_key, 0x0, 16);
 	txhdr->padding = 0;
 	txhdr->padding2 = 0;
 
 	/* TODO: add support for alternate retry TX rates */
-	rate = ieee80211_get_tx_rate(dev, control)->hw_value;
-	if (control->flags & IEEE80211_TXCTL_SHORT_PREAMBLE)
+	rate = ieee80211_get_tx_rate(dev, info)->hw_value;
+	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE)
 		rate |= 0x10;
-	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		rate |= 0x40;
-	else if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
+	else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
 		rate |= 0x20;
 	memset(txhdr->rateset, rate, 8);
 	txhdr->wep_key_present = 0;
 	txhdr->wep_key_len = 0;
-	txhdr->frame_type = cpu_to_le32(control->queue + 4);
+	txhdr->frame_type = cpu_to_le32(info->queue + 4);
 	txhdr->magic4 = 0;
-	txhdr->antenna = (control->antenna_sel_tx == 0) ?
-		2 : control->antenna_sel_tx - 1;
+	txhdr->antenna = (info->antenna_sel_tx == 0) ?
+		2 : info->antenna_sel_tx - 1;
 	txhdr->output_power = 0x7f; // HW Maximum
-	txhdr->magic5 = (control->flags & IEEE80211_TXCTL_NO_ACK) ?
+	txhdr->magic5 = (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
 		0 : ((rate > 0x3) ? cpu_to_le32(0x33) : cpu_to_le32(0x23));
 	if (padding)
 		txhdr->align[0] = padding;
 
+	/* modifies skb->cb and with it info, so must be last! */
+	p54_assign_address(dev, skb, hdr, skb->len);
+
 	priv->tx(dev, hdr, skb->len, 0);
 	return 0;
 }
@@ -634,7 +624,7 @@ static int p54_set_filter(struct ieee80211_hw *dev, u16 filter_type,
 	filter = (struct p54_tx_control_filter *) hdr->data;
 	hdr->magic1 = cpu_to_le16(0x8001);
 	hdr->len = cpu_to_le16(sizeof(*filter));
-	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*filter), NULL);
+	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*filter));
 	hdr->type = cpu_to_le16(P54_CONTROL_TYPE_FILTER_SET);
 
 	filter->filter_type = cpu_to_le16(filter_type);
@@ -678,7 +668,7 @@ static int p54_set_freq(struct ieee80211_hw *dev, __le16 freq)
 	hdr->magic1 = cpu_to_le16(0x8001);
 	hdr->len = cpu_to_le16(sizeof(*chan));
 	hdr->type = cpu_to_le16(P54_CONTROL_TYPE_CHANNEL_CHANGE);
-	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + payload_len, NULL);
+	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + payload_len);
 
 	chan->magic1 = cpu_to_le16(0x1);
 	chan->magic2 = cpu_to_le16(0x0);
@@ -751,7 +741,7 @@ static int p54_set_leds(struct ieee80211_hw *dev, int mode, int link, int act)
 	hdr->magic1 = cpu_to_le16(0x8001);
 	hdr->len = cpu_to_le16(sizeof(*led));
 	hdr->type = cpu_to_le16(P54_CONTROL_TYPE_LED);
-	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*led), NULL);
+	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*led));
 
 	led = (struct p54_tx_control_led *) hdr->data;
 	led->mode = cpu_to_le16(mode);
@@ -801,7 +791,7 @@ static void p54_set_vdcf(struct ieee80211_hw *dev)
 
 	hdr = (void *)priv->cached_vdcf + priv->tx_hdr_len;
 
-	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*vdcf), NULL);
+	p54_assign_address(dev, NULL, hdr, sizeof(*hdr) + sizeof(*vdcf));
 
 	vdcf = (struct p54_tx_control_vdcf *) hdr->data;
 
@@ -837,12 +827,8 @@ static void p54_stop(struct ieee80211_hw *dev)
 {
 	struct p54_common *priv = dev->priv;
 	struct sk_buff *skb;
-	while ((skb = skb_dequeue(&priv->tx_queue))) {
-		struct memrecord *range = (struct memrecord *)&skb->cb;
-		if (range->control)
-			kfree(range->control);
+	while ((skb = skb_dequeue(&priv->tx_queue)))
 		kfree_skb(skb);
-	}
 	priv->stop(dev);
 	priv->mode = IEEE80211_IF_TYPE_INVALID;
 }
diff --git a/drivers/net/wireless/p54/p54common.h b/drivers/net/wireless/p54/p54common.h
index c15b56e1d75..2245fcce92d 100644
--- a/drivers/net/wireless/p54/p54common.h
+++ b/drivers/net/wireless/p54/p54common.h
@@ -152,7 +152,6 @@ struct pda_pa_curve_data {
 struct memrecord {
 	u32 start_addr;
 	u32 end_addr;
-	struct ieee80211_tx_control *control;
 };
 
 struct p54_eeprom_lm86 {
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 4fcba9b4635..900140d3b30 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1484,11 +1484,11 @@ static u64 rt2400pci_get_tsf(struct ieee80211_hw *hw)
 	return tsf;
 }
 
-static int rt2400pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				   struct ieee80211_tx_control *control)
+static int rt2400pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
-	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct queue_entry_priv_pci *entry_priv;
 	struct skb_frame_desc *skbdesc;
 	struct txentry_desc txdesc;
@@ -1504,7 +1504,7 @@ static int rt2400pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * for our information.
 	 */
 	intf->beacon->skb = skb;
-	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
 	/*
 	 * Fill in skb descriptor
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index 06e87cdff45..673350953b8 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1799,11 +1799,11 @@ static u64 rt2500pci_get_tsf(struct ieee80211_hw *hw)
 	return tsf;
 }
 
-static int rt2500pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				   struct ieee80211_tx_control *control)
+static int rt2500pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
-	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct queue_entry_priv_pci *entry_priv;
 	struct skb_frame_desc *skbdesc;
 	struct txentry_desc txdesc;
@@ -1820,7 +1820,7 @@ static int rt2500pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * for our information.
 	 */
 	intf->beacon->skb = skb;
-	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
 	/*
 	 * Fill in skb descriptor
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 4122c5ebe7c..cca1504550d 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1666,13 +1666,12 @@ static int rt2500usb_probe_hw(struct rt2x00_dev *rt2x00dev)
 /*
  * IEEE80211 stack callback functions.
  */
-static int rt2500usb_beacon_update(struct ieee80211_hw *hw,
-				   struct sk_buff *skb,
-				   struct ieee80211_tx_control *control)
+static int rt2500usb_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct usb_device *usb_dev = rt2x00dev_usb_dev(rt2x00dev);
-	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct queue_entry_priv_usb_bcn *bcn_priv;
 	struct skb_frame_desc *skbdesc;
 	struct txentry_desc txdesc;
@@ -1691,7 +1690,7 @@ static int rt2500usb_beacon_update(struct ieee80211_hw *hw,
 	 * for our information.
 	 */
 	intf->beacon->skb = skb;
-	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
 	/*
 	 * Add the descriptor in front of the skb.
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 1900d4c0e84..5c7220ea46e 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -542,8 +542,7 @@ struct rt2x00lib_ops {
 			       struct sk_buff *skb,
 			       struct txentry_desc *txdesc);
 	int (*write_tx_data) (struct rt2x00_dev *rt2x00dev,
-			      struct data_queue *queue, struct sk_buff *skb,
-			      struct ieee80211_tx_control *control);
+			      struct data_queue *queue, struct sk_buff *skb);
 	int (*get_tx_data_len) (struct rt2x00_dev *rt2x00dev,
 				struct sk_buff *skb);
 	void (*kick_tx_queue) (struct rt2x00_dev *rt2x00dev,
@@ -930,7 +929,6 @@ static inline u16 get_duration_res(const unsigned int size, const u8 rate)
  * rt2x00queue_create_tx_descriptor - Create TX descriptor from mac80211 input
  * @entry: The entry which will be used to transfer the TX frame.
  * @txdesc: rt2x00 TX descriptor which will be initialized by this function.
- * @control: mac80211 TX control structure from where we read the information.
  *
  * This function will initialize the &struct txentry_desc based on information
  * from mac80211. This descriptor can then be used by rt2x00lib and the drivers
@@ -943,8 +941,7 @@ static inline u16 get_duration_res(const unsigned int size, const u8 rate)
  * the &struct txentry_desc structure.
  */
 void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
-				      struct txentry_desc *txdesc,
-				      struct ieee80211_tx_control *control);
+				      struct txentry_desc *txdesc);
 
 /**
  * rt2x00queue_write_tx_descriptor - Write TX descriptor to hardware
@@ -1001,8 +998,7 @@ void rt2x00lib_rxdone(struct queue_entry *entry,
 /*
  * mac80211 handlers.
  */
-int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		 struct ieee80211_tx_control *control);
+int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb);
 int rt2x00mac_start(struct ieee80211_hw *hw);
 void rt2x00mac_stop(struct ieee80211_hw *hw);
 int rt2x00mac_add_interface(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index d341764e1b2..69e233610c9 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -415,7 +415,6 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac,
 	struct rt2x00_dev *rt2x00dev = data;
 	struct rt2x00_intf *intf = vif_to_intf(vif);
 	struct sk_buff *skb;
-	struct ieee80211_tx_control control;
 	struct ieee80211_bss_conf conf;
 	int delayed_flags;
 
@@ -433,9 +432,9 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac,
 	spin_unlock(&intf->lock);
 
 	if (delayed_flags & DELAYED_UPDATE_BEACON) {
-		skb = ieee80211_beacon_get(rt2x00dev->hw, vif, &control);
-		if (skb && rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw,
-							     skb, &control))
+		skb = ieee80211_beacon_get(rt2x00dev->hw, vif);
+		if (skb &&
+		    rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw, skb))
 			dev_kfree_skb(skb);
 	}
 
@@ -494,8 +493,13 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 		      struct txdone_entry_desc *txdesc)
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
-	struct skb_frame_desc *skbdesc;
-	struct ieee80211_tx_status tx_status;
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
+
+	/*
+	 * Send frame to debugfs immediately, after this call is completed
+	 * we are going to overwrite the skb->cb array.
+	 */
+	rt2x00debug_dump_frame(rt2x00dev, DUMP_FRAME_TXDONE, entry->skb);
 
 	/*
 	 * Update TX statistics.
@@ -508,21 +512,20 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 	/*
 	 * Initialize TX status
 	 */
-	tx_status.flags = 0;
-	tx_status.ack_signal = 0;
-	tx_status.excessive_retries =
+	memset(&tx_info->status, 0, sizeof(tx_info->status));
+	tx_info->status.ack_signal = 0;
+	tx_info->status.excessive_retries =
 	    test_bit(TXDONE_EXCESSIVE_RETRY, &txdesc->flags);
-	tx_status.retry_count = txdesc->retry;
-	memcpy(&tx_status.control, txdesc->control, sizeof(*txdesc->control));
+	tx_info->status.retry_count = txdesc->retry;
 
-	if (!(tx_status.control.flags & IEEE80211_TXCTL_NO_ACK)) {
+	if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 		if (test_bit(TXDONE_SUCCESS, &txdesc->flags))
-			tx_status.flags |= IEEE80211_TX_STATUS_ACK;
+			tx_info->flags |= IEEE80211_TX_STAT_ACK;
 		else if (test_bit(TXDONE_FAILURE, &txdesc->flags))
 			rt2x00dev->low_level_stats.dot11ACKFailureCount++;
 	}
 
-	if (tx_status.control.flags & IEEE80211_TXCTL_USE_RTS_CTS) {
+	if (tx_info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
 		if (test_bit(TXDONE_SUCCESS, &txdesc->flags))
 			rt2x00dev->low_level_stats.dot11RTSSuccessCount++;
 		else if (test_bit(TXDONE_FAILURE, &txdesc->flags))
@@ -530,19 +533,13 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 	}
 
 	/*
-	 * Send the tx_status to debugfs. Only send the status report
-	 * to mac80211 when the frame originated from there. If this was
-	 * a extra frame coming through a mac80211 library call (RTS/CTS)
-	 * then we should not send the status report back.
-	 * If send to mac80211, mac80211 will clean up the skb structure,
-	 * otherwise we have to do it ourself.
+	 * Only send the status report to mac80211 when TX status was
+	 * requested by it. If this was a extra frame coming through
+	 * a mac80211 library call (RTS/CTS) then we should not send the
+	 * status report back.
 	 */
-	rt2x00debug_dump_frame(rt2x00dev, DUMP_FRAME_TXDONE, entry->skb);
-
-	skbdesc = get_skb_frame_desc(entry->skb);
-	if (!(skbdesc->flags & FRAME_DESC_DRIVER_GENERATED))
-		ieee80211_tx_status_irqsafe(rt2x00dev->hw,
-					    entry->skb, &tx_status);
+	if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)
+		ieee80211_tx_status_irqsafe(rt2x00dev->hw, entry->skb);
 	else
 		dev_kfree_skb_irq(entry->skb);
 	entry->skb = NULL;
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index c5cedb29b87..b5379b027b1 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -31,14 +31,15 @@
 
 static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 				struct data_queue *queue,
-				struct sk_buff *frag_skb,
-				struct ieee80211_tx_control *control)
+				struct sk_buff *frag_skb)
 {
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(frag_skb);
 	struct skb_frame_desc *skbdesc;
+	struct ieee80211_tx_info *rts_info;
 	struct sk_buff *skb;
 	int size;
 
-	if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
+	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
 		size = sizeof(struct ieee80211_cts);
 	else
 		size = sizeof(struct ieee80211_rts);
@@ -52,13 +53,33 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	skb_reserve(skb, rt2x00dev->hw->extra_tx_headroom);
 	skb_put(skb, size);
 
-	if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
-		ieee80211_ctstoself_get(rt2x00dev->hw, control->vif,
-					frag_skb->data, frag_skb->len, control,
+	/*
+	 * Copy TX information over from original frame to
+	 * RTS/CTS frame. Note that we set the no encryption flag
+	 * since we don't want this frame to be encrypted.
+	 * RTS frames should be acked, while CTS-to-self frames
+	 * should not. The ready for TX flag is cleared to prevent
+	 * it being automatically send when the descriptor is
+	 * written to the hardware.
+	 */
+	memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
+	rts_info = IEEE80211_SKB_CB(skb);
+	rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
+	rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+		rts_info->flags |= IEEE80211_TX_CTL_NO_ACK;
+	else
+		rts_info->flags &= ~IEEE80211_TX_CTL_NO_ACK;
+
+	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+		ieee80211_ctstoself_get(rt2x00dev->hw, tx_info->control.vif,
+					frag_skb->data, size, tx_info,
 					(struct ieee80211_cts *)(skb->data));
 	else
-		ieee80211_rts_get(rt2x00dev->hw, control->vif,
-				  frag_skb->data, frag_skb->len, control,
+		ieee80211_rts_get(rt2x00dev->hw, tx_info->control.vif,
+				  frag_skb->data, size, tx_info,
 				  (struct ieee80211_rts *)(skb->data));
 
 	/*
@@ -68,7 +89,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	memset(skbdesc, 0, sizeof(*skbdesc));
 	skbdesc->flags |= FRAME_DESC_DRIVER_GENERATED;
 
-	if (rt2x00dev->ops->lib->write_tx_data(rt2x00dev, queue, skb, control)) {
+	if (rt2x00dev->ops->lib->write_tx_data(rt2x00dev, queue, skb)) {
 		WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n");
 		return NETDEV_TX_BUSY;
 	}
@@ -76,14 +97,13 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	return NETDEV_TX_OK;
 }
 
-int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		 struct ieee80211_tx_control *control)
+int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
-	enum data_queue_qid qid = mac80211_queue_to_qid(control->queue);
+	enum data_queue_qid qid = mac80211_queue_to_qid(tx_info->queue);
 	struct data_queue *queue;
-	struct skb_frame_desc *skbdesc;
 	u16 frame_control;
 
 	/*
@@ -100,7 +120,7 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	/*
 	 * Determine which queue to put packet on.
 	 */
-	if (control->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM &&
+	if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM &&
 	    test_bit(DRIVER_REQUIRE_ATIM_QUEUE, &rt2x00dev->flags))
 		queue = rt2x00queue_get_queue(rt2x00dev, QID_ATIM);
 	else
@@ -125,33 +145,27 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 */
 	frame_control = le16_to_cpu(ieee80211hdr->frame_control);
 	if (!is_rts_frame(frame_control) && !is_cts_frame(frame_control) &&
-	    (control->flags & (IEEE80211_TXCTL_USE_RTS_CTS |
-			       IEEE80211_TXCTL_USE_CTS_PROTECT)) &&
+	    (tx_info->flags & (IEEE80211_TX_CTL_USE_RTS_CTS |
+			       IEEE80211_TX_CTL_USE_CTS_PROTECT)) &&
 	    !rt2x00dev->ops->hw->set_rts_threshold) {
 		if (rt2x00queue_available(queue) <= 1) {
-			ieee80211_stop_queue(rt2x00dev->hw, control->queue);
+			ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
 			return NETDEV_TX_BUSY;
 		}
 
-		if (rt2x00mac_tx_rts_cts(rt2x00dev, queue, skb, control)) {
-			ieee80211_stop_queue(rt2x00dev->hw, control->queue);
+		if (rt2x00mac_tx_rts_cts(rt2x00dev, queue, skb)) {
+			ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
 			return NETDEV_TX_BUSY;
 		}
 	}
 
-	/*
-	 * Initialize skb descriptor
-	 */
-	skbdesc = get_skb_frame_desc(skb);
-	memset(skbdesc, 0, sizeof(*skbdesc));
-
-	if (rt2x00dev->ops->lib->write_tx_data(rt2x00dev, queue, skb, control)) {
-		ieee80211_stop_queue(rt2x00dev->hw, control->queue);
+	if (rt2x00dev->ops->lib->write_tx_data(rt2x00dev, queue, skb)) {
+		ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
 		return NETDEV_TX_BUSY;
 	}
 
 	if (rt2x00queue_full(queue))
-		ieee80211_stop_queue(rt2x00dev->hw, control->queue);
+		ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
 
 	if (rt2x00dev->ops->lib->kick_tx_queue)
 		rt2x00dev->ops->lib->kick_tx_queue(rt2x00dev, qid);
@@ -380,9 +394,7 @@ int rt2x00mac_config_interface(struct ieee80211_hw *hw,
 	if (conf->type != IEEE80211_IF_TYPE_AP || !conf->beacon)
 		return 0;
 
-	status = rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw,
-						   conf->beacon,
-						   conf->beacon_control);
+	status = rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw, conf->beacon);
 	if (status)
 		dev_kfree_skb(conf->beacon);
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00pci.c b/drivers/net/wireless/rt2x00/rt2x00pci.c
index fa7de41be04..70a3d135f64 100644
--- a/drivers/net/wireless/rt2x00/rt2x00pci.c
+++ b/drivers/net/wireless/rt2x00/rt2x00pci.c
@@ -35,8 +35,7 @@
  * TX data handlers.
  */
 int rt2x00pci_write_tx_data(struct rt2x00_dev *rt2x00dev,
-			    struct data_queue *queue, struct sk_buff *skb,
-			    struct ieee80211_tx_control *control)
+			    struct data_queue *queue, struct sk_buff *skb)
 {
 	struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX);
 	struct queue_entry_priv_pci *entry_priv = entry->priv_data;
@@ -64,19 +63,19 @@ int rt2x00pci_write_tx_data(struct rt2x00_dev *rt2x00dev,
 	 * for our information.
 	 */
 	entry->skb = skb;
-	rt2x00queue_create_tx_descriptor(entry, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(entry, &txdesc);
 
 	/*
 	 * Fill in skb descriptor
 	 */
 	skbdesc = get_skb_frame_desc(skb);
+	memset(skbdesc, 0, sizeof(*skbdesc));
 	skbdesc->data = skb->data;
 	skbdesc->data_len = skb->len;
 	skbdesc->desc = entry_priv->desc;
 	skbdesc->desc_len = queue->desc_size;
 	skbdesc->entry = entry;
 
-	memcpy(&entry_priv->control, control, sizeof(entry_priv->control));
 	memcpy(entry_priv->data, skb->data, skb->len);
 
 	rt2x00queue_write_tx_descriptor(entry, &txdesc);
@@ -164,9 +163,9 @@ void rt2x00pci_txdone(struct rt2x00_dev *rt2x00dev, struct queue_entry *entry,
 		      struct txdone_entry_desc *txdesc)
 {
 	struct queue_entry_priv_pci *entry_priv = entry->priv_data;
+	enum data_queue_qid qid = skb_get_queue_mapping(entry->skb);
 	u32 word;
 
-	txdesc->control = &entry_priv->control;
 	rt2x00lib_txdone(entry, txdesc);
 
 	/*
@@ -187,7 +186,7 @@ void rt2x00pci_txdone(struct rt2x00_dev *rt2x00dev, struct queue_entry *entry,
 	 * is reenabled when the txdone handler has finished.
 	 */
 	if (!rt2x00queue_full(entry->queue))
-		ieee80211_wake_queue(rt2x00dev->hw, entry_priv->control.queue);
+		ieee80211_wake_queue(rt2x00dev->hw, qid);
 
 }
 EXPORT_SYMBOL_GPL(rt2x00pci_txdone);
diff --git a/drivers/net/wireless/rt2x00/rt2x00pci.h b/drivers/net/wireless/rt2x00/rt2x00pci.h
index 557d15a888a..37c851e442c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00pci.h
+++ b/drivers/net/wireless/rt2x00/rt2x00pci.h
@@ -91,8 +91,7 @@ rt2x00pci_register_multiwrite(struct rt2x00_dev *rt2x00dev,
  * TX data handlers.
  */
 int rt2x00pci_write_tx_data(struct rt2x00_dev *rt2x00dev,
-			    struct data_queue *queue, struct sk_buff *skb,
-			    struct ieee80211_tx_control *control);
+			    struct data_queue *queue, struct sk_buff *skb);
 
 /**
  * struct queue_entry_priv_pci: Per entry PCI specific information
@@ -101,7 +100,6 @@ int rt2x00pci_write_tx_data(struct rt2x00_dev *rt2x00dev,
  * @desc_dma: DMA pointer to &desc.
  * @data: Pointer to device's entry memory.
  * @data_dma: DMA pointer to &data.
- * @control: mac80211 control structure used to transmit data.
  */
 struct queue_entry_priv_pci {
 	__le32 *desc;
@@ -109,8 +107,6 @@ struct queue_entry_priv_pci {
 
 	void *data;
 	dma_addr_t data_dma;
-
-	struct ieee80211_tx_control control;
 };
 
 /**
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 5cf4c2f5926..e69ef4b1923 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -30,13 +30,13 @@
 #include "rt2x00lib.h"
 
 void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
-				      struct txentry_desc *txdesc,
-				      struct ieee80211_tx_control *control)
+				      struct txentry_desc *txdesc)
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
 	struct ieee80211_rate *rate =
-	    ieee80211_get_tx_rate(rt2x00dev->hw, control);
+	    ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
 	const struct rt2x00_rate *hwrate;
 	unsigned int data_length;
 	unsigned int duration;
@@ -64,7 +64,7 @@ void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 	/*
 	 * Check whether this frame is to be acked.
 	 */
-	if (!(control->flags & IEEE80211_TXCTL_NO_ACK))
+	if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK))
 		__set_bit(ENTRY_TXD_ACK, &txdesc->flags);
 
 	/*
@@ -72,23 +72,20 @@ void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 	 */
 	if (is_rts_frame(frame_control) || is_cts_frame(frame_control)) {
 		__set_bit(ENTRY_TXD_BURST, &txdesc->flags);
-		if (is_rts_frame(frame_control)) {
+		if (is_rts_frame(frame_control))
 			__set_bit(ENTRY_TXD_RTS_FRAME, &txdesc->flags);
-			__set_bit(ENTRY_TXD_ACK, &txdesc->flags);
-		} else {
+		else
 			__set_bit(ENTRY_TXD_CTS_FRAME, &txdesc->flags);
-			__clear_bit(ENTRY_TXD_ACK, &txdesc->flags);
-		}
-		if (control->rts_cts_rate_idx >= 0)
+		if (tx_info->control.rts_cts_rate_idx >= 0)
 			rate =
-			    ieee80211_get_rts_cts_rate(rt2x00dev->hw, control);
+			    ieee80211_get_rts_cts_rate(rt2x00dev->hw, tx_info);
 	}
 
 	/*
 	 * Determine retry information.
 	 */
-	txdesc->retry_limit = control->retry_limit;
-	if (control->flags & IEEE80211_TXCTL_LONG_RETRY_LIMIT)
+	txdesc->retry_limit = tx_info->control.retry_limit;
+	if (tx_info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
 		__set_bit(ENTRY_TXD_RETRY_MODE, &txdesc->flags);
 
 	/*
@@ -113,7 +110,7 @@ void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 	 */
 	if (test_bit(ENTRY_TXD_RTS_FRAME, &txdesc->flags)) {
 		txdesc->ifs = IFS_SIFS;
-	} else if (control->flags & IEEE80211_TXCTL_FIRST_FRAGMENT) {
+	} else if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) {
 		__set_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags);
 		txdesc->ifs = IFS_BACKOFF;
 	} else {
@@ -179,8 +176,10 @@ void rt2x00queue_write_tx_descriptor(struct queue_entry *entry,
 
 	/*
 	 * We are done writing the frame to the queue entry,
-	 * if this entry is a RTS of CTS-to-self frame we are done,
-	 * otherwise we need to kick the queue.
+	 * also kick the queue in case the correct flags are set,
+	 * note that this will automatically filter beacons and
+	 * RTS/CTS frames since those frames don't have this flag
+	 * set.
 	 */
 	if (rt2x00dev->ops->lib->kick_tx_queue &&
 	    !(skbdesc->flags & FRAME_DESC_DRIVER_GENERATED))
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index c6edc52873c..f263fe422f8 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -105,8 +105,8 @@ enum skb_frame_desc_flags {
 /**
  * struct skb_frame_desc: Descriptor information for the skb buffer
  *
- * This structure is placed over the skb->cb array, this means that
- * this structure should not exceed the size of that array (48 bytes).
+ * This structure is placed over the driver_data array, this means that
+ * this structure should not exceed the size of that array (40 bytes).
  *
  * @flags: Frame flags, see &enum skb_frame_desc_flags.
  * @data: Pointer to data part of frame (Start of ieee80211 header).
@@ -129,10 +129,15 @@ struct skb_frame_desc {
 	struct queue_entry *entry;
 };
 
+/**
+ * get_skb_frame_desc - Obtain the rt2x00 frame descriptor from a sk_buff.
+ * @skb: &struct sk_buff from where we obtain the &struct skb_frame_desc
+ */
 static inline struct skb_frame_desc* get_skb_frame_desc(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(struct skb_frame_desc) > sizeof(skb->cb));
-	return (struct skb_frame_desc *)&skb->cb[0];
+	BUILD_BUG_ON(sizeof(struct skb_frame_desc) >
+		     IEEE80211_TX_INFO_DRIVER_DATA_SIZE);
+	return (struct skb_frame_desc *)&IEEE80211_SKB_CB(skb)->driver_data;
 }
 
 /**
@@ -189,12 +194,10 @@ enum txdone_entry_desc_flags {
  * Summary of information that has been read from the TX frame descriptor
  * after the device is done with transmission.
  *
- * @control: Control structure which was used to transmit the frame.
  * @flags: TX done flags (See &enum txdone_entry_desc_flags).
  * @retry: Retry count.
  */
 struct txdone_entry_desc {
-	struct ieee80211_tx_control *control;
 	unsigned long flags;
 	int retry;
 };
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index dcee1b4f152..52d12fdc0cc 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -129,9 +129,9 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 {
 	struct queue_entry *entry = (struct queue_entry *)urb->context;
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
-	struct queue_entry_priv_usb *entry_priv = entry->priv_data;
 	struct txdone_entry_desc txdesc;
 	__le32 *txd = (__le32 *)entry->skb->data;
+	enum data_queue_qid qid = skb_get_queue_mapping(entry->skb);
 	u32 word;
 
 	if (!test_bit(DEVICE_ENABLED_RADIO, &rt2x00dev->flags) ||
@@ -159,7 +159,6 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	else
 		__set_bit(TXDONE_FAILURE, &txdesc.flags);
 	txdesc.retry = 0;
-	txdesc.control = &entry_priv->control;
 
 	rt2x00lib_txdone(entry, &txdesc);
 
@@ -175,12 +174,11 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	 * is reenabled when the txdone handler has finished.
 	 */
 	if (!rt2x00queue_full(entry->queue))
-		ieee80211_wake_queue(rt2x00dev->hw, entry_priv->control.queue);
+		ieee80211_wake_queue(rt2x00dev->hw, qid);
 }
 
 int rt2x00usb_write_tx_data(struct rt2x00_dev *rt2x00dev,
-			    struct data_queue *queue, struct sk_buff *skb,
-			    struct ieee80211_tx_control *control)
+			    struct data_queue *queue, struct sk_buff *skb)
 {
 	struct usb_device *usb_dev = rt2x00dev_usb_dev(rt2x00dev);
 	struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX);
@@ -206,7 +204,7 @@ int rt2x00usb_write_tx_data(struct rt2x00_dev *rt2x00dev,
 	 * for our information.
 	 */
 	entry->skb = skb;
-	rt2x00queue_create_tx_descriptor(entry, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(entry, &txdesc);
 
 	/*
 	 * Add the descriptor in front of the skb.
@@ -218,13 +216,13 @@ int rt2x00usb_write_tx_data(struct rt2x00_dev *rt2x00dev,
 	 * Fill in skb descriptor
 	 */
 	skbdesc = get_skb_frame_desc(skb);
+	memset(skbdesc, 0, sizeof(*skbdesc));
 	skbdesc->data = skb->data + queue->desc_size;
 	skbdesc->data_len = skb->len - queue->desc_size;
 	skbdesc->desc = skb->data;
 	skbdesc->desc_len = queue->desc_size;
 	skbdesc->entry = entry;
 
-	memcpy(&entry_priv->control, control, sizeof(entry_priv->control));
 	rt2x00queue_write_tx_descriptor(entry, &txdesc);
 
 	/*
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.h b/drivers/net/wireless/rt2x00/rt2x00usb.h
index 15b404aa714..26f53f868af 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.h
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.h
@@ -216,19 +216,15 @@ void rt2x00usb_disable_radio(struct rt2x00_dev *rt2x00dev);
  * TX data handlers.
  */
 int rt2x00usb_write_tx_data(struct rt2x00_dev *rt2x00dev,
-			    struct data_queue *queue, struct sk_buff *skb,
-			    struct ieee80211_tx_control *control);
+			    struct data_queue *queue, struct sk_buff *skb);
 
 /**
  * struct queue_entry_priv_usb: Per entry USB specific information
  *
  * @urb: Urb structure used for device communication.
- * @control: mac80211 control structure used to transmit data.
  */
 struct queue_entry_priv_usb {
 	struct urb *urb;
-
-	struct ieee80211_tx_control control;
 };
 
 /**
@@ -239,15 +235,12 @@ struct queue_entry_priv_usb {
  * with beacons.
  *
  * @urb: Urb structure used for device communication.
- * @control: mac80211 control structure used to transmit data.
  * @guardian_data: Set to 0, used for sending the guardian data.
  * @guardian_urb: Urb structure used to send the guardian data.
  */
 struct queue_entry_priv_usb_bcn {
 	struct urb *urb;
 
-	struct ieee80211_tx_control control;
-
 	unsigned int guardian_data;
 	struct urb *guardian_urb;
 };
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 7598b6e1578..e13ed5ced26 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2357,11 +2357,11 @@ static u64 rt61pci_get_tsf(struct ieee80211_hw *hw)
 	return tsf;
 }
 
-static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				 struct ieee80211_tx_control *control)
+static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
-	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct queue_entry_priv_pci *entry_priv;
 	struct skb_frame_desc *skbdesc;
 	struct txentry_desc txdesc;
@@ -2377,7 +2377,7 @@ static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * for our information.
 	 */
 	intf->beacon->skb = skb;
-	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
 	entry_priv = intf->beacon->priv_data;
 	memset(entry_priv->desc, 0, intf->beacon->queue->desc_size);
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index e55bcbdfc1e..26c2e0a1a30 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -1948,11 +1948,11 @@ static u64 rt73usb_get_tsf(struct ieee80211_hw *hw)
 #define rt73usb_get_tsf	NULL
 #endif
 
-static int rt73usb_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
-				 struct ieee80211_tx_control *control)
+static int rt73usb_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
-	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct skb_frame_desc *skbdesc;
 	struct txentry_desc txdesc;
 	unsigned int beacon_base;
@@ -1967,7 +1967,7 @@ static int rt73usb_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * for our information.
 	 */
 	intf->beacon->skb = skb;
-	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc, control);
+	rt2x00queue_create_tx_descriptor(intf->beacon, &txdesc);
 
 	/*
 	 * Add the descriptor in front of the skb.
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index 6263209b889..4427bc9f78a 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -170,34 +170,29 @@ static void rtl8180_handle_tx(struct ieee80211_hw *dev, unsigned int prio)
 	while (skb_queue_len(&ring->queue)) {
 		struct rtl8180_tx_desc *entry = &ring->desc[ring->idx];
 		struct sk_buff *skb;
-		struct ieee80211_tx_status status;
-		struct ieee80211_tx_control *control;
+		struct ieee80211_tx_info *info;
 		u32 flags = le32_to_cpu(entry->flags);
 
 		if (flags & RTL8180_TX_DESC_FLAG_OWN)
 			return;
 
-		memset(&status, 0, sizeof(status));
-
 		ring->idx = (ring->idx + 1) % ring->entries;
 		skb = __skb_dequeue(&ring->queue);
 		pci_unmap_single(priv->pdev, le32_to_cpu(entry->tx_buf),
 				 skb->len, PCI_DMA_TODEVICE);
 
-		control = *((struct ieee80211_tx_control **)skb->cb);
-		if (control)
-			memcpy(&status.control, control, sizeof(*control));
-		kfree(control);
+		info = IEEE80211_SKB_CB(skb);
+		memset(&info->status, 0, sizeof(info->status));
 
-		if (!(status.control.flags & IEEE80211_TXCTL_NO_ACK)) {
+		if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 			if (flags & RTL8180_TX_DESC_FLAG_TX_OK)
-				status.flags = IEEE80211_TX_STATUS_ACK;
+				info->flags |= IEEE80211_TX_STAT_ACK;
 			else
-				status.excessive_retries = 1;
+				info->status.excessive_retries = 1;
 		}
-		status.retry_count = flags & 0xFF;
+		info->status.retry_count = flags & 0xFF;
 
-		ieee80211_tx_status_irqsafe(dev, skb, &status);
+		ieee80211_tx_status_irqsafe(dev, skb);
 		if (ring->entries - skb_queue_len(&ring->queue) == 2)
 			ieee80211_wake_queue(dev, prio);
 	}
@@ -238,9 +233,9 @@ static irqreturn_t rtl8180_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
-		      struct ieee80211_tx_control *control)
+static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct rtl8180_priv *priv = dev->priv;
 	struct rtl8180_tx_ring *ring;
 	struct rtl8180_tx_desc *entry;
@@ -251,7 +246,7 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	u16 plcp_len = 0;
 	__le16 rts_duration = 0;
 
-	prio = control->queue;
+	prio = info->queue;
 	ring = &priv->tx_ring[prio];
 
 	mapping = pci_map_single(priv->pdev, skb->data,
@@ -259,35 +254,32 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 
 	tx_flags = RTL8180_TX_DESC_FLAG_OWN | RTL8180_TX_DESC_FLAG_FS |
 		   RTL8180_TX_DESC_FLAG_LS |
-		   (ieee80211_get_tx_rate(dev, control)->hw_value << 24) |
+		   (ieee80211_get_tx_rate(dev, info)->hw_value << 24) |
 		   skb->len;
 
 	if (priv->r8185)
 		tx_flags |= RTL8180_TX_DESC_FLAG_DMA |
 			    RTL8180_TX_DESC_FLAG_NO_ENC;
 
-	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
 		tx_flags |= RTL8180_TX_DESC_FLAG_RTS;
-		tx_flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
-	} else if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
+	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 		tx_flags |= RTL8180_TX_DESC_FLAG_CTS;
-		tx_flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
+		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 	}
 
-	*((struct ieee80211_tx_control **) skb->cb) =
-		kmemdup(control, sizeof(*control), GFP_ATOMIC);
-
-	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		rts_duration = ieee80211_rts_duration(dev, priv->vif, skb->len,
-						      control);
+						      info);
 
 	if (!priv->r8185) {
 		unsigned int remainder;
 
 		plcp_len = DIV_ROUND_UP(16 * (skb->len + 4),
-				(ieee80211_get_tx_rate(dev, control)->bitrate * 2) / 10);
+				(ieee80211_get_tx_rate(dev, info)->bitrate * 2) / 10);
 		remainder = (16 * (skb->len + 4)) %
-			    ((ieee80211_get_tx_rate(dev, control)->bitrate * 2) / 10);
+			    ((ieee80211_get_tx_rate(dev, info)->bitrate * 2) / 10);
 		if (remainder > 0 && remainder <= 6)
 			plcp_len |= 1 << 15;
 	}
@@ -300,13 +292,13 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	entry->plcp_len = cpu_to_le16(plcp_len);
 	entry->tx_buf = cpu_to_le32(mapping);
 	entry->frame_len = cpu_to_le32(skb->len);
-	entry->flags2 = control->alt_retry_rate_idx >= 0 ?
-		ieee80211_get_alt_retry_rate(dev, control)->bitrate << 4 : 0;
-	entry->retry_limit = control->retry_limit;
+	entry->flags2 = info->control.alt_retry_rate_idx >= 0 ?
+		ieee80211_get_alt_retry_rate(dev, info)->bitrate << 4 : 0;
+	entry->retry_limit = info->control.retry_limit;
 	entry->flags = cpu_to_le32(tx_flags);
 	__skb_queue_tail(&ring->queue, skb);
 	if (ring->entries - skb_queue_len(&ring->queue) < 2)
-		ieee80211_stop_queue(dev, control->queue);
+		ieee80211_stop_queue(dev, info->queue);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	rtl818x_iowrite8(priv, &priv->map->TX_DMA_POLLING, (1 << (prio + 4)));
@@ -522,7 +514,6 @@ static void rtl8180_free_tx_ring(struct ieee80211_hw *dev, unsigned int prio)
 
 		pci_unmap_single(priv->pdev, le32_to_cpu(entry->tx_buf),
 				 skb->len, PCI_DMA_TODEVICE);
-		kfree(*((struct ieee80211_tx_control **) skb->cb));
 		kfree_skb(skb);
 		ring->idx = (ring->idx + 1) % ring->entries;
 	}
diff --git a/drivers/net/wireless/rtl8187.h b/drivers/net/wireless/rtl8187.h
index 076d88b6db0..a0cfb666de0 100644
--- a/drivers/net/wireless/rtl8187.h
+++ b/drivers/net/wireless/rtl8187.h
@@ -44,12 +44,6 @@ struct rtl8187_rx_hdr {
 	__le64 mac_time;
 } __attribute__((packed));
 
-struct rtl8187_tx_info {
-	struct ieee80211_tx_control *control;
-	struct urb *urb;
-	struct ieee80211_hw *dev;
-};
-
 struct rtl8187_tx_hdr {
 	__le32 flags;
 #define RTL8187_TX_FLAG_NO_ENCRYPT	(1 << 15)
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index 86a09b49681..b581ef8a637 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -145,27 +145,22 @@ void rtl8187_write_phy(struct ieee80211_hw *dev, u8 addr, u32 data)
 
 static void rtl8187_tx_cb(struct urb *urb)
 {
-	struct ieee80211_tx_status status;
 	struct sk_buff *skb = (struct sk_buff *)urb->context;
-	struct rtl8187_tx_info *info = (struct rtl8187_tx_info *)skb->cb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hw *hw = info->driver_data[0];
 
-	memset(&status, 0, sizeof(status));
-
-	usb_free_urb(info->urb);
-	if (info->control)
-		memcpy(&status.control, info->control, sizeof(status.control));
-	kfree(info->control);
+	usb_free_urb(info->driver_data[1]);
 	skb_pull(skb, sizeof(struct rtl8187_tx_hdr));
-	status.flags |= IEEE80211_TX_STATUS_ACK;
-	ieee80211_tx_status_irqsafe(info->dev, skb, &status);
+	memset(&info->status, 0, sizeof(info->status));
+	info->flags |= IEEE80211_TX_STAT_ACK;
+	ieee80211_tx_status_irqsafe(hw, skb);
 }
 
-static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
-		      struct ieee80211_tx_control *control)
+static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 {
 	struct rtl8187_priv *priv = dev->priv;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct rtl8187_tx_hdr *hdr;
-	struct rtl8187_tx_info *info;
 	struct urb *urb;
 	__le16 rts_dur = 0;
 	u32 flags;
@@ -179,29 +174,27 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb,
 	flags = skb->len;
 	flags |= RTL8187_TX_FLAG_NO_ENCRYPT;
 
-	flags |= ieee80211_get_tx_rate(dev, control)->hw_value << 24;
+	flags |= ieee80211_get_tx_rate(dev, info)->hw_value << 24;
 	if (ieee80211_get_morefrag((struct ieee80211_hdr *)skb->data))
 		flags |= RTL8187_TX_FLAG_MORE_FRAG;
-	if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS) {
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
 		flags |= RTL8187_TX_FLAG_RTS;
-		flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
+		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 		rts_dur = ieee80211_rts_duration(dev, priv->vif,
-						 skb->len, control);
-	} else if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) {
+						 skb->len, info);
+	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
 		flags |= RTL8187_TX_FLAG_CTS;
-		flags |= ieee80211_get_rts_cts_rate(dev, control)->hw_value << 19;
+		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 	}
 
 	hdr = (struct rtl8187_tx_hdr *)skb_push(skb, sizeof(*hdr));
 	hdr->flags = cpu_to_le32(flags);
 	hdr->len = 0;
 	hdr->rts_duration = rts_dur;
-	hdr->retry = cpu_to_le32(control->retry_limit << 8);
+	hdr->retry = cpu_to_le32(info->control.retry_limit << 8);
 
-	info = (struct rtl8187_tx_info *)skb->cb;
-	info->control = kmemdup(control, sizeof(*control), GFP_ATOMIC);
-	info->urb = urb;
-	info->dev = dev;
+	info->driver_data[0] = dev;
+	info->driver_data[1] = urb;
 	usb_fill_bulk_urb(urb, priv->udev, usb_sndbulkpipe(priv->udev, 2),
 			  hdr, skb->len, rtl8187_tx_cb, skb);
 	usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 99c508c09e5..edb1aefb4ad 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -224,36 +224,6 @@ out:
 	return r;
 }
 
-/**
- * clear_tx_skb_control_block - clears the control block of tx skbuffs
- * @skb: a &struct sk_buff pointer
- *
- * This clears the control block of skbuff buffers, which were transmitted to
- * the device. Notify that the function is not thread-safe, so prevent
- * multiple calls.
- */
-static void clear_tx_skb_control_block(struct sk_buff *skb)
-{
-	struct zd_tx_skb_control_block *cb =
-		(struct zd_tx_skb_control_block *)skb->cb;
-
-	kfree(cb->control);
-	cb->control = NULL;
-}
-
-/**
- * kfree_tx_skb - frees a tx skbuff
- * @skb: a &struct sk_buff pointer
- *
- * Frees the tx skbuff. Frees also the allocated control structure in the
- * control block if necessary.
- */
-static void kfree_tx_skb(struct sk_buff *skb)
-{
-	clear_tx_skb_control_block(skb);
-	dev_kfree_skb_any(skb);
-}
-
 static void zd_op_stop(struct ieee80211_hw *hw)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
@@ -276,40 +246,15 @@ static void zd_op_stop(struct ieee80211_hw *hw)
 
 
 	while ((skb = skb_dequeue(ack_wait_queue)))
-		kfree_tx_skb(skb);
-}
-
-/**
- * init_tx_skb_control_block - initializes skb control block
- * @skb: a &sk_buff pointer
- * @dev: pointer to the mac80221 device
- * @control: mac80211 tx control applying for the frame in @skb
- *
- * Initializes the control block of the skbuff to be transmitted.
- */
-static int init_tx_skb_control_block(struct sk_buff *skb,
-				     struct ieee80211_hw *hw,
-	                             struct ieee80211_tx_control *control)
-{
-	struct zd_tx_skb_control_block *cb =
-		(struct zd_tx_skb_control_block *)skb->cb;
-
-	ZD_ASSERT(sizeof(*cb) <= sizeof(skb->cb));
-	memset(cb, 0, sizeof(*cb));
-	cb->hw= hw;
-	cb->control = kmalloc(sizeof(*control), GFP_ATOMIC);
-	if (cb->control == NULL)
-		return -ENOMEM;
-	memcpy(cb->control, control, sizeof(*control));
-
-	return 0;
+		dev_kfree_skb_any(skb);
 }
 
 /**
  * tx_status - reports tx status of a packet if required
  * @hw - a &struct ieee80211_hw pointer
  * @skb - a sk-buffer
- * @status - the tx status of the packet without control information
+ * @flags: extra flags to set in the TX status info
+ * @ackssi: ACK signal strength
  * @success - True for successfull transmission of the frame
  *
  * This information calls ieee80211_tx_status_irqsafe() if required by the
@@ -319,18 +264,17 @@ static int init_tx_skb_control_block(struct sk_buff *skb,
  * If no status information has been requested, the skb is freed.
  */
 static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
-	              struct ieee80211_tx_status *status,
-		      bool success)
+		      u32 flags, int ackssi, bool success)
 {
-	struct zd_tx_skb_control_block *cb = (struct zd_tx_skb_control_block *)
-		skb->cb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+	memset(&info->status, 0, sizeof(info->status));
 
-	ZD_ASSERT(cb->control != NULL);
-	memcpy(&status->control, cb->control, sizeof(status->control));
 	if (!success)
-		status->excessive_retries = 1;
-	clear_tx_skb_control_block(skb);
-	ieee80211_tx_status_irqsafe(hw, skb, status);
+		info->status.excessive_retries = 1;
+	info->flags |= flags;
+	info->status.ack_signal = ackssi;
+	ieee80211_tx_status_irqsafe(hw, skb);
 }
 
 /**
@@ -345,15 +289,12 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw)
 {
 	struct sk_buff_head *q = &zd_hw_mac(hw)->ack_wait_queue;
 	struct sk_buff *skb;
-	struct ieee80211_tx_status status;
 
 	skb = skb_dequeue(q);
 	if (skb == NULL)
 		return;
 
-	memset(&status, 0, sizeof(status));
-
-	tx_status(hw, skb, &status, 0);
+	tx_status(hw, skb, 0, 0, 0);
 }
 
 /**
@@ -368,28 +309,20 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw)
  */
 void zd_mac_tx_to_dev(struct sk_buff *skb, int error)
 {
-	struct zd_tx_skb_control_block *cb =
-		(struct zd_tx_skb_control_block *)skb->cb;
-	struct ieee80211_hw *hw = cb->hw;
-
-	if (likely(cb->control)) {
-		skb_pull(skb, sizeof(struct zd_ctrlset));
-		if (unlikely(error ||
-		    (cb->control->flags & IEEE80211_TXCTL_NO_ACK)))
-		{
-			struct ieee80211_tx_status status;
-			memset(&status, 0, sizeof(status));
-			tx_status(hw, skb, &status, !error);
-		} else {
-			struct sk_buff_head *q =
-				&zd_hw_mac(hw)->ack_wait_queue;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hw *hw = info->driver_data[0];
 
-			skb_queue_tail(q, skb);
-			while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS)
-				zd_mac_tx_failed(hw);
-		}
+	skb_pull(skb, sizeof(struct zd_ctrlset));
+	if (unlikely(error ||
+	    (info->flags & IEEE80211_TX_CTL_NO_ACK))) {
+		tx_status(hw, skb, 0, 0, !error);
 	} else {
-		kfree_tx_skb(skb);
+		struct sk_buff_head *q =
+			&zd_hw_mac(hw)->ack_wait_queue;
+
+		skb_queue_tail(q, skb);
+		while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS)
+			zd_mac_tx_failed(hw);
 	}
 }
 
@@ -454,7 +387,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
 	cs->control = 0;
 
 	/* First fragment */
-	if (flags & IEEE80211_TXCTL_FIRST_FRAGMENT)
+	if (flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		cs->control |= ZD_CS_NEED_RANDOM_BACKOFF;
 
 	/* Multicast */
@@ -466,10 +399,10 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
 	    (IEEE80211_FTYPE_CTL|IEEE80211_STYPE_PSPOLL))
 		cs->control |= ZD_CS_PS_POLL_FRAME;
 
-	if (flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	if (flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		cs->control |= ZD_CS_RTS;
 
-	if (flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
+	if (flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
 		cs->control |= ZD_CS_SELF_CTS;
 
 	/* FIXME: Management frame? */
@@ -516,8 +449,7 @@ void zd_mac_config_beacon(struct ieee80211_hw *hw, struct sk_buff *beacon)
 }
 
 static int fill_ctrlset(struct zd_mac *mac,
-			struct sk_buff *skb,
-			struct ieee80211_tx_control *control)
+			struct sk_buff *skb)
 {
 	int r;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -526,18 +458,19 @@ static int fill_ctrlset(struct zd_mac *mac,
 	struct ieee80211_rate *txrate;
 	struct zd_ctrlset *cs = (struct zd_ctrlset *)
 		skb_push(skb, sizeof(struct zd_ctrlset));
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	ZD_ASSERT(frag_len <= 0xffff);
 
-	txrate = ieee80211_get_tx_rate(mac->hw, control);
+	txrate = ieee80211_get_tx_rate(mac->hw, info);
 
 	cs->modulation = txrate->hw_value;
-	if (control->flags & IEEE80211_TXCTL_SHORT_PREAMBLE)
+	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE)
 		cs->modulation = txrate->hw_value_short;
 
 	cs->tx_length = cpu_to_le16(frag_len);
 
-	cs_set_control(mac, cs, hdr, control->flags);
+	cs_set_control(mac, cs, hdr, info->flags);
 
 	packet_length = frag_len + sizeof(struct zd_ctrlset) + 10;
 	ZD_ASSERT(packet_length <= 0xffff);
@@ -582,24 +515,21 @@ static int fill_ctrlset(struct zd_mac *mac,
  * control block of the skbuff will be initialized. If necessary the incoming
  * mac80211 queues will be stopped.
  */
-static int zd_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		     struct ieee80211_tx_control *control)
+static int zd_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int r;
 
-	r = fill_ctrlset(mac, skb, control);
+	r = fill_ctrlset(mac, skb);
 	if (r)
 		return r;
 
-	r = init_tx_skb_control_block(skb, hw, control);
-	if (r)
-		return r;
+	info->driver_data[0] = hw;
+
 	r = zd_usb_tx(&mac->chip.usb, skb);
-	if (r) {
-		clear_tx_skb_control_block(skb);
+	if (r)
 		return r;
-	}
 	return 0;
 }
 
@@ -637,13 +567,8 @@ static int filter_ack(struct ieee80211_hw *hw, struct ieee80211_hdr *rx_hdr,
 		tx_hdr = (struct ieee80211_hdr *)skb->data;
 		if (likely(!compare_ether_addr(tx_hdr->addr2, rx_hdr->addr1)))
 		{
-			struct ieee80211_tx_status status;
-
-			memset(&status, 0, sizeof(status));
-			status.flags = IEEE80211_TX_STATUS_ACK;
-			status.ack_signal = stats->signal;
 			__skb_unlink(skb, q);
-			tx_status(hw, skb, &status, 1);
+			tx_status(hw, skb, IEEE80211_TX_STAT_ACK, stats->signal, 1);
 			goto out;
 		}
 	}
@@ -947,8 +872,7 @@ static void zd_op_bss_info_changed(struct ieee80211_hw *hw,
 }
 
 static int zd_op_beacon_update(struct ieee80211_hw *hw,
-			       struct sk_buff *skb,
-			       struct ieee80211_tx_control *ctl)
+			       struct sk_buff *skb)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 	zd_mac_config_beacon(hw, skb);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.h b/drivers/net/wireless/zd1211rw/zd_mac.h
index 71170244d2c..18c1d56d3dd 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.h
+++ b/drivers/net/wireless/zd1211rw/zd_mac.h
@@ -149,22 +149,6 @@ struct housekeeping {
 	struct delayed_work link_led_work;
 };
 
-/**
- * struct zd_tx_skb_control_block - control block for tx skbuffs
- * @control: &struct ieee80211_tx_control pointer
- * @context: context pointer
- *
- * This structure is used to fill the cb field in an &sk_buff to transmit.
- * The control field is NULL, if there is no requirement from the mac80211
- * stack to report about the packet ACK. This is the case if the flag
- * IEEE80211_TXCTL_NO_ACK is not set in &struct ieee80211_tx_control.
- */
-struct zd_tx_skb_control_block {
-	struct ieee80211_tx_control *control;
-	struct ieee80211_hw *hw;
-	void *context;
-};
-
 #define ZD_MAC_STATS_BUFFER_SIZE 16
 
 #define ZD_MAC_MAX_ACK_WAITERS 10
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 12e24f04ddd..c8a0b34aecc 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -869,7 +869,7 @@ static void tx_urb_complete(struct urb *urb)
 {
 	int r;
 	struct sk_buff *skb;
-	struct zd_tx_skb_control_block *cb;
+	struct ieee80211_tx_info *info;
 	struct zd_usb *usb;
 
 	switch (urb->status) {
@@ -893,8 +893,8 @@ free_urb:
 	 * grab 'usb' pointer before handing off the skb (since
 	 * it might be freed by zd_mac_tx_to_dev or mac80211)
 	 */
-	cb = (struct zd_tx_skb_control_block *)skb->cb;
-	usb = &zd_hw_mac(cb->hw)->chip.usb;
+	info = IEEE80211_SKB_CB(skb);
+	usb = &zd_hw_mac(info->driver_data[0])->chip.usb;
 	zd_mac_tx_to_dev(skb, urb->status);
 	free_tx_urb(usb, urb);
 	tx_dec_submitted_urbs(usb);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0df91bea6c1..54960b83db7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -201,101 +201,127 @@ struct ieee80211_bss_conf {
 };
 
 /**
- * enum mac80211_tx_control_flags - flags to describe Tx configuration for
- * 				    the Tx frame
- *
- * These flags are used with the @flags member of &ieee80211_tx_control
- *
- * @IEEE80211_TXCTL_REQ_TX_STATUS: request TX status callback for this frame.
- * @IEEE80211_TXCTL_DO_NOT_ENCRYPT: send this frame without encryption;
- * 				    e.g., for EAPOL frame
- * @IEEE80211_TXCTL_USE_RTS_CTS: use RTS-CTS before sending frame
- * @IEEE80211_TXCTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
- * 				     for combined 802.11g / 802.11b networks)
- * @IEEE80211_TXCTL_NO_ACK: tell the low level not to wait for an ack
- * @IEEE80211_TXCTL_RATE_CTRL_PROBE
- * @EEE80211_TXCTL_CLEAR_PS_FILT: clear powersave filter
- *                                 for destination station
- * @IEEE80211_TXCTL_REQUEUE:
- * @IEEE80211_TXCTL_FIRST_FRAGMENT: this is a first fragment of the frame
- * @IEEE80211_TXCTL_LONG_RETRY_LIMIT: this frame should be send using the
- * 				      through set_retry_limit configured long
- * 				      retry value
- * @IEEE80211_TXCTL_EAPOL_FRAME: internal to mac80211
- * @IEEE80211_TXCTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
- * @IEEE80211_TXCTL_AMPDU: this frame should be sent as part of an A-MPDU
- * @IEEE80211_TXCTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
- * 			     of streams when this flag is on can be extracted
- *			     from antenna_sel_tx, so if 1 antenna is marked
- *			     use SISO, 2 antennas marked use MIMO, n antennas
- *			     marked use MIMO_n.
- * @IEEE80211_TXCTL_GREEN_FIELD: use green field protection for this frame
- * @IEEE80211_TXCTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width
- * @IEEE80211_TXCTL_DUP_DATA: duplicate data frame on both 20 Mhz channels
- * @IEEE80211_TXCTL_SHORT_GI: send this frame using short guard interval
+ * enum mac80211_tx_flags - flags to transmission information/status
+ *
+ * These flags are used with the @flags member of &ieee80211_tx_info
+ *
+ * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
+ * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption;
+ *	e.g., for EAPOL frame
+ * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame
+ * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
+ *	for combined 802.11g / 802.11b networks)
+ * @IEEE80211_TX_CTL_NO_ACK: tell the low level not to wait for an ack
+ * @IEEE80211_TX_CTL_RATE_CTRL_PROBE
+ * @IEEE80211_TX_CTL_CLEAR_PS_FILT: clear powersave filter for destination
+ *	station
+ * @IEEE80211_TX_CTL_REQUEUE:
+ * @IEEE80211_TX_CTL_FIRST_FRAGMENT: this is a first fragment of the frame
+ * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
+ *	through set_retry_limit configured long retry value
+ * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211
+ * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
+ * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
+ * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
+ *	of streams when this flag is on can be extracted from antenna_sel_tx,
+ *	so if 1 antenna is marked use SISO, 2 antennas marked use MIMO, n
+ *	antennas marked use MIMO_n.
+ * @IEEE80211_TX_CTL_GREEN_FIELD: use green field protection for this frame
+ * @IEEE80211_TX_CTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width
+ * @IEEE80211_TX_CTL_DUP_DATA: duplicate data frame on both 20 Mhz channels
+ * @IEEE80211_TX_CTL_SHORT_GI: send this frame using short guard interval
+ * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted
+ *	because the destination STA was in powersave mode.
+ * @IEEE80211_TX_STAT_ACK: Frame was acknowledged
+ * @IEEE80211_TX_STAT_AMPDU: The frame was aggregated, so status
+ * 	is for the whole aggregation.
  */
 enum mac80211_tx_control_flags {
-	IEEE80211_TXCTL_REQ_TX_STATUS		= (1<<0),
-	IEEE80211_TXCTL_DO_NOT_ENCRYPT		= (1<<1),
-	IEEE80211_TXCTL_USE_RTS_CTS		= (1<<2),
-	IEEE80211_TXCTL_USE_CTS_PROTECT		= (1<<3),
-	IEEE80211_TXCTL_NO_ACK			= (1<<4),
-	IEEE80211_TXCTL_RATE_CTRL_PROBE		= (1<<5),
-	IEEE80211_TXCTL_CLEAR_PS_FILT		= (1<<6),
-	IEEE80211_TXCTL_REQUEUE			= (1<<7),
-	IEEE80211_TXCTL_FIRST_FRAGMENT		= (1<<8),
-	IEEE80211_TXCTL_SHORT_PREAMBLE		= (1<<9),
-	IEEE80211_TXCTL_LONG_RETRY_LIMIT	= (1<<10),
-	IEEE80211_TXCTL_EAPOL_FRAME		= (1<<11),
-	IEEE80211_TXCTL_SEND_AFTER_DTIM		= (1<<12),
-	IEEE80211_TXCTL_AMPDU			= (1<<13),
-	IEEE80211_TXCTL_OFDM_HT			= (1<<14),
-	IEEE80211_TXCTL_GREEN_FIELD		= (1<<15),
-	IEEE80211_TXCTL_40_MHZ_WIDTH		= (1<<16),
-	IEEE80211_TXCTL_DUP_DATA		= (1<<17),
-	IEEE80211_TXCTL_SHORT_GI		= (1<<18),
+	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
+	IEEE80211_TX_CTL_DO_NOT_ENCRYPT		= BIT(1),
+	IEEE80211_TX_CTL_USE_RTS_CTS		= BIT(2),
+	IEEE80211_TX_CTL_USE_CTS_PROTECT	= BIT(3),
+	IEEE80211_TX_CTL_NO_ACK			= BIT(4),
+	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(5),
+	IEEE80211_TX_CTL_CLEAR_PS_FILT		= BIT(6),
+	IEEE80211_TX_CTL_REQUEUE		= BIT(7),
+	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(8),
+	IEEE80211_TX_CTL_SHORT_PREAMBLE		= BIT(9),
+	IEEE80211_TX_CTL_LONG_RETRY_LIMIT	= BIT(10),
+	IEEE80211_TX_CTL_EAPOL_FRAME		= BIT(11),
+	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(12),
+	IEEE80211_TX_CTL_AMPDU			= BIT(13),
+	IEEE80211_TX_CTL_OFDM_HT		= BIT(14),
+	IEEE80211_TX_CTL_GREEN_FIELD		= BIT(15),
+	IEEE80211_TX_CTL_40_MHZ_WIDTH		= BIT(16),
+	IEEE80211_TX_CTL_DUP_DATA		= BIT(17),
+	IEEE80211_TX_CTL_SHORT_GI		= BIT(18),
+	IEEE80211_TX_CTL_INJECTED		= BIT(19),
+	IEEE80211_TX_STAT_TX_FILTERED		= BIT(20),
+	IEEE80211_TX_STAT_ACK			= BIT(21),
+	IEEE80211_TX_STAT_AMPDU			= BIT(22),
 };
 
-/* Transmit control fields. This data structure is passed to low-level driver
- * with each TX frame. The low-level driver is responsible for configuring
- * the hardware to use given values (depending on what is supported).
- *
- * NOTE: Be careful with using the pointers outside of the ieee80211_ops->tx()
- * context (i.e. when defering the work to a workqueue).
- * The vif pointer is valid until the it has been removed with the
- * ieee80211_ops->remove_interface() callback funtion.
- * The hw_key pointer is valid until it has been removed with the
- * ieee80211_ops->set_key() callback function.
- */
-struct ieee80211_tx_control {
-	u32 flags;		/* tx control flags defined above */
-
-	s8 tx_rate_idx,		/* Transmit rate (indexes registered rates) */
-	   rts_cts_rate_idx,	/* Transmit rate for RTS/CTS frame */
-	   alt_retry_rate_idx;	/* retry rate for the last retries */
-
-	s8 retry_limit;		/* 1 = only first attempt, 2 = one retry, ..
-				 * This could be used when set_retry_limit
-				 * is not implemented by the driver */
 
-	struct ieee80211_vif *vif;
-
-	/* Key used for hardware encryption
-	 * NULL if IEEE80211_TXCTL_DO_NOT_ENCRYPT is set */
-	struct ieee80211_key_conf *hw_key;
+#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE \
+	(sizeof(((struct sk_buff *)0)->cb) - 8)
+#define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \
+	(IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *))
 
-	enum ieee80211_band band;
+/**
+ * struct ieee80211_tx_info - skb transmit information
+ *
+ * This structure is placed in skb->cb for three uses:
+ *  (1) mac80211 TX control - mac80211 tells the driver what to do
+ *  (2) driver internal use (if applicable)
+ *  (3) TX status information - driver tells mac80211 what happened
+ *
+ * @flags: transmit info flags, defined above
+ * @retry_count: number of retries
+ * @excessive_retries: set to 1 if the frame was retried many times
+ *	but not acknowledged
+ * @ampdu_ack_len: number of aggregated frames.
+ * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
+ * @ampdu_ack_map: block ack bit map for the aggregation.
+ * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
+ * @ack_signal: signal strength of the ACK frame
+ */
+struct ieee80211_tx_info {
+	/* common information */
+	u32 flags;
+	u8 band;
+	s8 tx_rate_idx;
+	u8 antenna_sel_tx;
 
-	u8 antenna_sel_tx; 	/* 0 = default/diversity, otherwise bit
-				 * position represents antenna number used */
-	u8 icv_len;		/* length of the ICV/MIC field in octets */
-	u8 iv_len;		/* length of the IV field in octets */
-	u16 queue;		/* hardware queue to use for this frame;
-				 * 0 = highest, hw->queues-1 = lowest */
-	u16 aid;		/* Station AID */
-	int type;	/* internal */
+	u8 queue; /* use skb_queue_mapping soon */
+
+	union {
+		struct {
+			struct ieee80211_vif *vif;
+			struct ieee80211_key_conf *hw_key;
+			unsigned long jiffies;
+			int ifindex;
+			u16 aid;
+			s8 rts_cts_rate_idx, alt_retry_rate_idx;
+			u8 retry_limit;
+			u8 icv_len;
+			u8 iv_len;
+		} control;
+		struct {
+			u64 ampdu_ack_map;
+			int ack_signal;
+			u8 retry_count;
+			bool excessive_retries;
+			u8 ampdu_ack_len;
+		} status;
+		void *driver_data[IEEE80211_TX_INFO_DRIVER_DATA_PTRS];
+	};
 };
 
+static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb)
+{
+	return (struct ieee80211_tx_info *)skb->cb;
+}
 
 
 /**
@@ -362,52 +388,6 @@ struct ieee80211_rx_status {
 	int flag;
 };
 
-/**
- * enum ieee80211_tx_status_flags - transmit status flags
- *
- * Status flags to indicate various transmit conditions.
- *
- * @IEEE80211_TX_STATUS_TX_FILTERED: The frame was not transmitted
- *	because the destination STA was in powersave mode.
- * @IEEE80211_TX_STATUS_ACK: Frame was acknowledged
- * @IEEE80211_TX_STATUS_AMPDU: The frame was aggregated, so status
- * 	is for the whole aggregation.
- */
-enum ieee80211_tx_status_flags {
-	IEEE80211_TX_STATUS_TX_FILTERED	= 1<<0,
-	IEEE80211_TX_STATUS_ACK		= 1<<1,
-	IEEE80211_TX_STATUS_AMPDU	= 1<<2,
-};
-
-/**
- * struct ieee80211_tx_status - transmit status
- *
- * As much information as possible should be provided for each transmitted
- * frame with ieee80211_tx_status().
- *
- * @control: a copy of the &struct ieee80211_tx_control passed to the driver
- *	in the tx() callback.
- * @flags: transmit status flags, defined above
- * @retry_count: number of retries
- * @excessive_retries: set to 1 if the frame was retried many times
- *	but not acknowledged
- * @ampdu_ack_len: number of aggregated frames.
- * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
- * @ampdu_ack_map: block ack bit map for the aggregation.
- * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
- * @ack_signal: signal strength of the ACK frame either in dBm, dB or unspec
- *	depending on hardware capabilites flags @IEEE80211_HW_SIGNAL_*
- */
-struct ieee80211_tx_status {
-	struct ieee80211_tx_control control;
-	u8 flags;
-	u8 retry_count;
-	bool excessive_retries;
-	u8 ampdu_ack_len;
-	u64 ampdu_ack_map;
-	int ack_signal;
-};
-
 /**
  * enum ieee80211_conf_flags - configuration flags
  *
@@ -563,7 +543,6 @@ struct ieee80211_if_conf {
 	u8 *ssid;
 	size_t ssid_len;
 	struct sk_buff *beacon;
-	struct ieee80211_tx_control *beacon_control;
 };
 
 /**
@@ -825,7 +804,7 @@ static inline void SET_IEEE80211_PERM_ADDR(struct ieee80211_hw *hw, u8 *addr)
 
 static inline struct ieee80211_rate *
 ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
-		      const struct ieee80211_tx_control *c)
+		      const struct ieee80211_tx_info *c)
 {
 	if (WARN_ON(c->tx_rate_idx < 0))
 		return NULL;
@@ -834,20 +813,20 @@ ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
 
 static inline struct ieee80211_rate *
 ieee80211_get_rts_cts_rate(const struct ieee80211_hw *hw,
-			   const struct ieee80211_tx_control *c)
+			   const struct ieee80211_tx_info *c)
 {
-	if (c->rts_cts_rate_idx < 0)
+	if (c->control.rts_cts_rate_idx < 0)
 		return NULL;
-	return &hw->wiphy->bands[c->band]->bitrates[c->rts_cts_rate_idx];
+	return &hw->wiphy->bands[c->band]->bitrates[c->control.rts_cts_rate_idx];
 }
 
 static inline struct ieee80211_rate *
 ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
-			     const struct ieee80211_tx_control *c)
+			     const struct ieee80211_tx_info *c)
 {
-	if (c->alt_retry_rate_idx < 0)
+	if (c->control.alt_retry_rate_idx < 0)
 		return NULL;
-	return &hw->wiphy->bands[c->band]->bitrates[c->alt_retry_rate_idx];
+	return &hw->wiphy->bands[c->band]->bitrates[c->control.alt_retry_rate_idx];
 }
 
 /**
@@ -1142,8 +1121,7 @@ enum ieee80211_ampdu_mlme_action {
  * 	that TX/RX_STOP can pass NULL for this parameter.
  */
 struct ieee80211_ops {
-	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb,
-		  struct ieee80211_tx_control *control);
+	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
 	int (*start)(struct ieee80211_hw *hw);
 	void (*stop)(struct ieee80211_hw *hw);
 	int (*add_interface)(struct ieee80211_hw *hw,
@@ -1187,8 +1165,7 @@ struct ieee80211_ops {
 	u64 (*get_tsf)(struct ieee80211_hw *hw);
 	void (*reset_tsf)(struct ieee80211_hw *hw);
 	int (*beacon_update)(struct ieee80211_hw *hw,
-			     struct sk_buff *skb,
-			     struct ieee80211_tx_control *control);
+			     struct sk_buff *skb);
 	int (*tx_last_beacon)(struct ieee80211_hw *hw);
 	int (*ampdu_action)(struct ieee80211_hw *hw,
 			    enum ieee80211_ampdu_mlme_action action,
@@ -1384,13 +1361,9 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw,
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
- * @status: status information for this frame; the status pointer need not
- *	be valid after this function returns and is not freed by mac80211,
- *	it is recommended that it points to a stack area
  */
 void ieee80211_tx_status(struct ieee80211_hw *hw,
-			 struct sk_buff *skb,
-			 struct ieee80211_tx_status *status);
+			 struct sk_buff *skb);
 
 /**
  * ieee80211_tx_status_irqsafe - irq-safe transmit status callback
@@ -1403,13 +1376,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw,
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
- * @status: status information for this frame; the status pointer need not
- *	be valid after this function returns and is not freed by mac80211,
- *	it is recommended that it points to a stack area
  */
 void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
-				 struct sk_buff *skb,
-				 struct ieee80211_tx_status *status);
+				 struct sk_buff *skb);
 
 /**
  * ieee80211_beacon_get - beacon generation function
@@ -1425,8 +1394,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
  * is responsible of freeing it.
  */
 struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
-				     struct ieee80211_vif *vif,
-				     struct ieee80211_tx_control *control);
+				     struct ieee80211_vif *vif);
 
 /**
  * ieee80211_rts_get - RTS frame generation function
@@ -1434,7 +1402,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
  * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf.
  * @frame: pointer to the frame that is going to be protected by the RTS.
  * @frame_len: the frame length (in octets).
- * @frame_txctl: &struct ieee80211_tx_control of the frame.
+ * @frame_txctl: &struct ieee80211_tx_info of the frame.
  * @rts: The buffer where to store the RTS frame.
  *
  * If the RTS frames are generated by the host system (i.e., not in
@@ -1444,7 +1412,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
  */
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
-		       const struct ieee80211_tx_control *frame_txctl,
+		       const struct ieee80211_tx_info *frame_txctl,
 		       struct ieee80211_rts *rts);
 
 /**
@@ -1452,7 +1420,7 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
  * @hw: pointer obtained from ieee80211_alloc_hw().
  * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf.
  * @frame_len: the length of the frame that is going to be protected by the RTS.
- * @frame_txctl: &struct ieee80211_tx_control of the frame.
+ * @frame_txctl: &struct ieee80211_tx_info of the frame.
  *
  * If the RTS is generated in firmware, but the host system must provide
  * the duration field, the low-level driver uses this function to receive
@@ -1460,7 +1428,7 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
  */
 __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif, size_t frame_len,
-			      const struct ieee80211_tx_control *frame_txctl);
+			      const struct ieee80211_tx_info *frame_txctl);
 
 /**
  * ieee80211_ctstoself_get - CTS-to-self frame generation function
@@ -1468,7 +1436,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
  * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf.
  * @frame: pointer to the frame that is going to be protected by the CTS-to-self.
  * @frame_len: the frame length (in octets).
- * @frame_txctl: &struct ieee80211_tx_control of the frame.
+ * @frame_txctl: &struct ieee80211_tx_info of the frame.
  * @cts: The buffer where to store the CTS-to-self frame.
  *
  * If the CTS-to-self frames are generated by the host system (i.e., not in
@@ -1479,7 +1447,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 void ieee80211_ctstoself_get(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif,
 			     const void *frame, size_t frame_len,
-			     const struct ieee80211_tx_control *frame_txctl,
+			     const struct ieee80211_tx_info *frame_txctl,
 			     struct ieee80211_cts *cts);
 
 /**
@@ -1487,7 +1455,7 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw,
  * @hw: pointer obtained from ieee80211_alloc_hw().
  * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf.
  * @frame_len: the length of the frame that is going to be protected by the CTS-to-self.
- * @frame_txctl: &struct ieee80211_tx_control of the frame.
+ * @frame_txctl: &struct ieee80211_tx_info of the frame.
  *
  * If the CTS-to-self is generated in firmware, but the host system must provide
  * the duration field, the low-level driver uses this function to receive
@@ -1496,7 +1464,7 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw,
 __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    size_t frame_len,
-				    const struct ieee80211_tx_control *frame_txctl);
+				    const struct ieee80211_tx_info *frame_txctl);
 
 /**
  * ieee80211_generic_frame_duration - Calculate the duration field for a frame
@@ -1535,8 +1503,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
  * use common code for all beacons.
  */
 struct sk_buff *
-ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-			  struct ieee80211_tx_control *control);
+ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 
 /**
  * ieee80211_get_hdrlen_from_skb - get header length from data
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a4cccd1b7d5..79a65b3ee02 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2,6 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -147,7 +148,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result;
 #define IEEE80211_TX_UNICAST		BIT(1)
 #define IEEE80211_TX_PS_BUFFERED	BIT(2)
 #define IEEE80211_TX_PROBE_LAST_FRAG	BIT(3)
-#define IEEE80211_TX_INJECTED		BIT(4)
 
 struct ieee80211_tx_data {
 	struct sk_buff *skb;
@@ -157,7 +157,6 @@ struct ieee80211_tx_data {
 	struct sta_info *sta;
 	struct ieee80211_key *key;
 
-	struct ieee80211_tx_control *control;
 	struct ieee80211_channel *channel;
 	s8 rate_idx;
 	/* use this rate (if set) for last fragment; rate can
@@ -207,22 +206,7 @@ struct ieee80211_rx_data {
 	u16 tkip_iv16;
 };
 
-/* flags used in struct ieee80211_tx_packet_data.flags */
-#define IEEE80211_TXPD_REQ_TX_STATUS	BIT(0)
-#define IEEE80211_TXPD_DO_NOT_ENCRYPT	BIT(1)
-#define IEEE80211_TXPD_REQUEUE		BIT(2)
-#define IEEE80211_TXPD_EAPOL_FRAME	BIT(3)
-#define IEEE80211_TXPD_AMPDU		BIT(4)
-/* Stored in sk_buff->cb */
-struct ieee80211_tx_packet_data {
-	int ifindex;
-	unsigned long jiffies;
-	unsigned int flags;
-	u8 queue;
-};
-
 struct ieee80211_tx_stored_packet {
-	struct ieee80211_tx_control control;
 	struct sk_buff *skb;
 	struct sk_buff **extra_frag;
 	s8 last_frag_rate_idx;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9761d9bd5a7..8f1ff7ef166 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -971,8 +971,7 @@ void ieee80211_if_setup(struct net_device *dev)
 /* everything else */
 
 static int __ieee80211_if_config(struct net_device *dev,
-				 struct sk_buff *beacon,
-				 struct ieee80211_tx_control *control)
+				 struct sk_buff *beacon)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
@@ -990,13 +989,11 @@ static int __ieee80211_if_config(struct net_device *dev,
 		conf.ssid_len = sdata->u.sta.ssid_len;
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
 		conf.beacon = beacon;
-		conf.beacon_control = control;
 		ieee80211_start_mesh(dev);
 	} else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
 		conf.ssid = sdata->u.ap.ssid;
 		conf.ssid_len = sdata->u.ap.ssid_len;
 		conf.beacon = beacon;
-		conf.beacon_control = control;
 	}
 	return local->ops->config_interface(local_to_hw(local),
 					    &sdata->vif, &conf);
@@ -1009,23 +1006,21 @@ int ieee80211_if_config(struct net_device *dev)
 	if (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT &&
 	    (local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
 		return ieee80211_if_config_beacon(dev);
-	return __ieee80211_if_config(dev, NULL, NULL);
+	return __ieee80211_if_config(dev, NULL);
 }
 
 int ieee80211_if_config_beacon(struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_control control;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sk_buff *skb;
 
 	if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
 		return 0;
-	skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif,
-				   &control);
+	skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif);
 	if (!skb)
 		return -ENOMEM;
-	return __ieee80211_if_config(dev, skb, &control);
+	return __ieee80211_if_config(dev, skb);
 }
 
 int ieee80211_hw_config(struct ieee80211_local *local)
@@ -1180,38 +1175,20 @@ void ieee80211_reset_erp_info(struct net_device *dev)
 }
 
 void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
-				 struct sk_buff *skb,
-				 struct ieee80211_tx_status *status)
+				 struct sk_buff *skb)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_tx_status *saved;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int tmp;
 
 	skb->dev = local->mdev;
-	saved = kmalloc(sizeof(struct ieee80211_tx_status), GFP_ATOMIC);
-	if (unlikely(!saved)) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s: Not enough memory, "
-			       "dropping tx status", skb->dev->name);
-		/* should be dev_kfree_skb_irq, but due to this function being
-		 * named _irqsafe instead of just _irq we can't be sure that
-		 * people won't call it from non-irq contexts */
-		dev_kfree_skb_any(skb);
-		return;
-	}
-	memcpy(saved, status, sizeof(struct ieee80211_tx_status));
-	/* copy pointer to saved status into skb->cb for use by tasklet */
-	memcpy(skb->cb, &saved, sizeof(saved));
-
 	skb->pkt_type = IEEE80211_TX_STATUS_MSG;
-	skb_queue_tail(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS ?
+	skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ?
 		       &local->skb_queue : &local->skb_queue_unreliable, skb);
 	tmp = skb_queue_len(&local->skb_queue) +
 		skb_queue_len(&local->skb_queue_unreliable);
 	while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT &&
 	       (skb = skb_dequeue(&local->skb_queue_unreliable))) {
-		memcpy(&saved, skb->cb, sizeof(saved));
-		kfree(saved);
 		dev_kfree_skb_irq(skb);
 		tmp--;
 		I802_DEBUG_INC(local->tx_status_drop);
@@ -1225,7 +1202,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
 	struct sk_buff *skb;
 	struct ieee80211_rx_status rx_status;
-	struct ieee80211_tx_status *tx_status;
 	struct ieee80211_ra_tid *ra_tid;
 
 	while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -1240,12 +1216,8 @@ static void ieee80211_tasklet_handler(unsigned long data)
 			__ieee80211_rx(local_to_hw(local), skb, &rx_status);
 			break;
 		case IEEE80211_TX_STATUS_MSG:
-			/* get pointer to saved status out of skb->cb */
-			memcpy(&tx_status, skb->cb, sizeof(tx_status));
 			skb->pkt_type = 0;
-			ieee80211_tx_status(local_to_hw(local),
-					    skb, tx_status);
-			kfree(tx_status);
+			ieee80211_tx_status(local_to_hw(local), skb);
 			break;
 		case IEEE80211_DELBA_MSG:
 			ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
@@ -1274,24 +1246,15 @@ static void ieee80211_tasklet_handler(unsigned long data)
  * Also, tx_packet_data in cb is restored from tx_control. */
 static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 				      struct ieee80211_key *key,
-				      struct sk_buff *skb,
-				      struct ieee80211_tx_control *control)
+				      struct sk_buff *skb)
 {
 	int hdrlen, iv_len, mic_len;
-	struct ieee80211_tx_packet_data *pkt_data;
-
-	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex;
-	pkt_data->flags = 0;
-	if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS)
-		pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
-	if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)
-		pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
-	if (control->flags & IEEE80211_TXCTL_REQUEUE)
-		pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
-	if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME)
-		pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
-	pkt_data->queue = control->queue;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+	info->flags &=	IEEE80211_TX_CTL_REQ_TX_STATUS |
+			IEEE80211_TX_CTL_DO_NOT_ENCRYPT |
+			IEEE80211_TX_CTL_REQUEUE |
+			IEEE80211_TX_CTL_EAPOL_FRAME;
 
 	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 
@@ -1338,9 +1301,10 @@ no_key:
 
 static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 					    struct sta_info *sta,
-					    struct sk_buff *skb,
-					    struct ieee80211_tx_status *status)
+					    struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
 	sta->tx_filtered_count++;
 
 	/*
@@ -1382,18 +1346,16 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 */
 	if (test_sta_flags(sta, WLAN_STA_PS) &&
 	    skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
-		ieee80211_remove_tx_extra(local, sta->key, skb,
-					  &status->control);
+		ieee80211_remove_tx_extra(local, sta->key, skb);
 		skb_queue_tail(&sta->tx_filtered, skb);
 		return;
 	}
 
 	if (!test_sta_flags(sta, WLAN_STA_PS) &&
-	    !(status->control.flags & IEEE80211_TXCTL_REQUEUE)) {
+	    !(info->flags & IEEE80211_TX_CTL_REQUEUE)) {
 		/* Software retry the packet once */
-		status->control.flags |= IEEE80211_TXCTL_REQUEUE;
-		ieee80211_remove_tx_extra(local, sta->key, skb,
-					  &status->control);
+		info->flags |= IEEE80211_TX_CTL_REQUEUE;
+		ieee80211_remove_tx_extra(local, sta->key, skb);
 		dev_queue_xmit(skb);
 		return;
 	}
@@ -1407,28 +1369,20 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	dev_kfree_skb(skb);
 }
 
-void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
-			 struct ieee80211_tx_status *status)
+void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u16 frag, type;
 	struct ieee80211_tx_status_rtap_hdr *rthdr;
 	struct ieee80211_sub_if_data *sdata;
 	struct net_device *prev_dev = NULL;
 
-	if (!status) {
-		printk(KERN_ERR
-		       "%s: ieee80211_tx_status called with NULL status\n",
-		       wiphy_name(local->hw.wiphy));
-		dev_kfree_skb(skb);
-		return;
-	}
-
 	rcu_read_lock();
 
-	if (status->excessive_retries) {
+	if (info->status.excessive_retries) {
 		struct sta_info *sta;
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
@@ -1437,27 +1391,23 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 				 * The STA is in power save mode, so assume
 				 * that this TX packet failed because of that.
 				 */
-				status->excessive_retries = 0;
-				status->flags |= IEEE80211_TX_STATUS_TX_FILTERED;
-				ieee80211_handle_filtered_frame(local, sta,
-								skb, status);
+				ieee80211_handle_filtered_frame(local, sta, skb);
 				rcu_read_unlock();
 				return;
 			}
 		}
 	}
 
-	if (status->flags & IEEE80211_TX_STATUS_TX_FILTERED) {
+	if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
 		struct sta_info *sta;
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
-			ieee80211_handle_filtered_frame(local, sta, skb,
-							status);
+			ieee80211_handle_filtered_frame(local, sta, skb);
 			rcu_read_unlock();
 			return;
 		}
 	} else
-		rate_control_tx_status(local->mdev, skb, status);
+		rate_control_tx_status(local->mdev, skb);
 
 	rcu_read_unlock();
 
@@ -1471,14 +1421,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 	frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
 	type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE;
 
-	if (status->flags & IEEE80211_TX_STATUS_ACK) {
+	if (info->flags & IEEE80211_TX_STAT_ACK) {
 		if (frag == 0) {
 			local->dot11TransmittedFrameCount++;
 			if (is_multicast_ether_addr(hdr->addr1))
 				local->dot11MulticastTransmittedFrameCount++;
-			if (status->retry_count > 0)
+			if (info->status.retry_count > 0)
 				local->dot11RetryCount++;
-			if (status->retry_count > 1)
+			if (info->status.retry_count > 1)
 				local->dot11MultipleRetryCount++;
 		}
 
@@ -1524,17 +1474,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
 		cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) |
 			    (1 << IEEE80211_RADIOTAP_DATA_RETRIES));
 
-	if (!(status->flags & IEEE80211_TX_STATUS_ACK) &&
+	if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
 	    !is_multicast_ether_addr(hdr->addr1))
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL);
 
-	if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) &&
-	    (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT))
+	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) &&
+	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT))
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS);
-	else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS);
 
-	rthdr->data_retries = status->retry_count;
+	rthdr->data_retries = info->status.retry_count;
 
 	/* XXX: is this sufficient for BPF? */
 	skb_set_mac_header(skb, 0);
@@ -1895,7 +1845,9 @@ static int __init ieee80211_init(void)
 	struct sk_buff *skb;
 	int ret;
 
-	BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
+	BUILD_BUG_ON(sizeof(struct ieee80211_tx_info) > sizeof(skb->cb));
+	BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) +
+	             IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb));
 
 	ret = rc80211_pid_init();
 	if (ret)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 604149369dc..9a264379d7b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -578,7 +578,7 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 		      int encrypt)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	skb->dev = sdata->local->mdev;
@@ -586,11 +586,11 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 	skb_set_network_header(skb, 0);
 	skb_set_transport_header(skb, 0);
 
-	pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
-	memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
-	pkt_data->ifindex = sdata->dev->ifindex;
+	info = IEEE80211_SKB_CB(skb);
+	memset(info, 0, sizeof(struct ieee80211_tx_info));
+	info->control.ifindex = sdata->dev->ifindex;
 	if (!encrypt)
-		pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
+		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 
 	dev_queue_xmit(skb);
 }
@@ -2314,7 +2314,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	int res, rates, i, j;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	struct ieee80211_tx_control control;
+	struct ieee80211_tx_info *control;
 	struct rate_selection ratesel;
 	u8 *pos;
 	struct ieee80211_sub_if_data *sdata;
@@ -2404,21 +2404,22 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 			memcpy(pos, &bss->supp_rates[8], rates);
 		}
 
-		memset(&control, 0, sizeof(control));
+		control = IEEE80211_SKB_CB(skb);
+
 		rate_control_get_rate(dev, sband, skb, &ratesel);
 		if (ratesel.rate_idx < 0) {
 			printk(KERN_DEBUG "%s: Failed to determine TX rate "
 			       "for IBSS beacon\n", dev->name);
 			break;
 		}
-		control.vif = &sdata->vif;
-		control.tx_rate_idx = ratesel.rate_idx;
+		control->control.vif = &sdata->vif;
+		control->tx_rate_idx = ratesel.rate_idx;
 		if (sdata->bss_conf.use_short_preamble &&
 		    sband->bitrates[ratesel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
-			control.flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
-		control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
-		control.flags |= IEEE80211_TXCTL_NO_ACK;
-		control.retry_limit = 1;
+			control->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
+		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
+		control->flags |= IEEE80211_TX_CTL_NO_ACK;
+		control->control.retry_limit = 1;
 
 		ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC);
 		if (ifsta->probe_resp) {
@@ -2433,8 +2434,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		}
 
 		if (local->ops->beacon_update &&
-		    local->ops->beacon_update(local_to_hw(local),
-					     skb, &control) == 0) {
+		    local->ops->beacon_update(local_to_hw(local), skb) == 0) {
 			printk(KERN_DEBUG "%s: Configured IBSS beacon "
 			       "template\n", dev->name);
 			skb = NULL;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index a29148dcca9..0ed9c8a2f56 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -34,8 +34,7 @@ struct rate_control_ops {
 	struct module *module;
 	const char *name;
 	void (*tx_status)(void *priv, struct net_device *dev,
-			  struct sk_buff *skb,
-			  struct ieee80211_tx_status *status);
+			  struct sk_buff *skb);
 	void (*get_rate)(void *priv, struct net_device *dev,
 			 struct ieee80211_supported_band *band,
 			 struct sk_buff *skb,
@@ -77,13 +76,12 @@ struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
 void rate_control_put(struct rate_control_ref *ref);
 
 static inline void rate_control_tx_status(struct net_device *dev,
-					  struct sk_buff *skb,
-					  struct ieee80211_tx_status *status)
+					  struct sk_buff *skb)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct rate_control_ref *ref = local->rate_ctrl;
 
-	ref->ops->tx_status(ref->priv, dev, skb, status);
+	ref->ops->tx_status(ref->priv, dev, skb);
 }
 
 
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 04afc13ed82..2078803d358 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -61,7 +61,7 @@ enum rc_pid_event_type {
 union rc_pid_event_data {
 	/* RC_PID_EVENT_TX_STATUS */
 	struct {
-		struct ieee80211_tx_status tx_status;
+		struct ieee80211_tx_info tx_status;
 	};
 	/* RC_PID_EVENT_TYPE_RATE_CHANGE */
 	/* RC_PID_EVENT_TYPE_TX_RATE */
@@ -158,7 +158,7 @@ struct rc_pid_debugfs_entries {
 };
 
 void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
-					     struct ieee80211_tx_status *stat);
+				      struct ieee80211_tx_info *stat);
 
 void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
 					       int index, int rate);
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 14cde36f507..e8945413e4a 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -237,8 +237,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 }
 
 static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
-				       struct sk_buff *skb,
-				       struct ieee80211_tx_status *status)
+				       struct sk_buff *skb)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -248,6 +247,7 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	struct rc_pid_sta_info *spinfo;
 	unsigned long period;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	rcu_read_lock();
 
@@ -266,28 +266,28 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
-	if (status->control.tx_rate_idx != sta->txrate_idx)
+	if (info->tx_rate_idx != sta->txrate_idx)
 		goto unlock;
 
 	spinfo = sta->rate_ctrl_priv;
 	spinfo->tx_num_xmit++;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
-	rate_control_pid_event_tx_status(&spinfo->events, status);
+	rate_control_pid_event_tx_status(&spinfo->events, info);
 #endif
 
 	/* We count frames that totally failed to be transmitted as two bad
 	 * frames, those that made it out but had some retries as one good and
 	 * one bad frame. */
-	if (status->excessive_retries) {
+	if (info->status.excessive_retries) {
 		spinfo->tx_num_failed += 2;
 		spinfo->tx_num_xmit++;
-	} else if (status->retry_count) {
+	} else if (info->status.retry_count) {
 		spinfo->tx_num_failed++;
 		spinfo->tx_num_xmit++;
 	}
 
-	if (status->excessive_retries) {
+	if (info->status.excessive_retries) {
 		sta->tx_retry_failed++;
 		sta->tx_num_consecutive_failures++;
 		sta->tx_num_mpdu_fail++;
@@ -295,8 +295,8 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 		sta->tx_num_consecutive_failures = 0;
 		sta->tx_num_mpdu_ok++;
 	}
-	sta->tx_retry_count += status->retry_count;
-	sta->tx_num_mpdu_fail += status->retry_count;
+	sta->tx_retry_count += info->status.retry_count;
+	sta->tx_num_mpdu_fail += info->status.retry_count;
 
 	/* Update PID controller state. */
 	period = (HZ * pinfo->sampling_period + 500) / 1000;
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index ff5c380f3c1..8121d3bc683 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -39,11 +39,11 @@ static void rate_control_pid_event(struct rc_pid_event_buffer *buf,
 }
 
 void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
-					     struct ieee80211_tx_status *stat)
+				      struct ieee80211_tx_info *stat)
 {
 	union rc_pid_event_data evd;
 
-	memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status));
+	memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info));
 	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
 }
 
@@ -167,8 +167,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
 	switch (ev->type) {
 	case RC_PID_EVENT_TYPE_TX_STATUS:
 		p += snprintf(pb + p, length - p, "tx_status %u %u",
-			      ev->data.tx_status.excessive_retries,
-			      ev->data.tx_status.retry_count);
+			      ev->data.tx_status.status.excessive_retries,
+			      ev->data.tx_status.status.retry_count);
 		break;
 	case RC_PID_EVENT_TYPE_RATE_CHANGE:
 		p += snprintf(pb + p, length - p, "rate_change %d %d",
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fa68305fd59..cf0de3b0fe2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -714,7 +714,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 	struct sk_buff *skb;
 	int sent = 0;
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info;
 	DECLARE_MAC_BUF(mac);
 
 	sdata = sta->sdata;
@@ -734,13 +734,13 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 
 	/* Send all buffered frames to the station */
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
-		pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+		info = IEEE80211_SKB_CB(skb);
 		sent++;
-		pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
+		info->flags |= IEEE80211_TX_CTL_REQUEUE;
 		dev_queue_xmit(skb);
 	}
 	while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
-		pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+		info = IEEE80211_SKB_CB(skb);
 		local->total_ps_buffered--;
 		sent++;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
@@ -748,7 +748,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 		       "since STA not sleeping anymore\n", dev->name,
 		       print_mac(mac, sta->addr), sta->aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
-		pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
+		info->flags |= IEEE80211_TX_CTL_REQUEUE;
 		dev_queue_xmit(skb);
 	}
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index baf5e474688..ef3149324d5 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -511,20 +511,20 @@ static inline int sta_info_buffer_expired(struct ieee80211_local *local,
 					  struct sta_info *sta,
 					  struct sk_buff *skb)
 {
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info;
 	int timeout;
 
 	if (!skb)
 		return 0;
 
-	pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
+	info = IEEE80211_SKB_CB(skb);
 
 	/* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */
 	timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 /
 		   15625) * HZ;
 	if (timeout < STA_TX_BUFFER_EXPIRE)
 		timeout = STA_TX_BUFFER_EXPIRE;
-	return time_after(jiffies, pkt_data->jiffies + timeout);
+	return time_after(jiffies, info->control.jiffies + timeout);
 }
 
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e89cc165554..f592290e42b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -32,7 +32,7 @@
  * @WLAN_STA_WDS: Station is one of our WDS peers.
  * @WLAN_STA_PSPOLL: Station has just PS-polled us.
  * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
- *	IEEE80211_TXCTL_CLEAR_PS_FILT control flag) when the next
+ *	IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
  *	frame to this station is transmitted.
  */
 enum ieee80211_sta_info_flags {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 666158f02a8..ac9a4af7ad4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -238,12 +238,12 @@ static ieee80211_tx_result
 ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-	struct sk_buff *skb = tx->skb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u32 sta_flags;
 
-	if (unlikely(tx->flags & IEEE80211_TX_INJECTED))
+	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED))
 		return TX_CONTINUE;
 
 	if (unlikely(tx->local->sta_sw_scanning) &&
@@ -348,6 +348,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 static ieee80211_tx_result
 ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+
 	/*
 	 * broadcast/multicast frame
 	 *
@@ -383,7 +385,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	}
 
 	/* buffered in hardware */
-	tx->control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM;
+	info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
 
 	return TX_CONTINUE;
 }
@@ -392,6 +394,7 @@ static ieee80211_tx_result
 ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 {
 	struct sta_info *sta = tx->sta;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u32 staflags;
 	DECLARE_MAC_BUF(mac);
 
@@ -404,7 +407,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 
 	if (unlikely((staflags & WLAN_STA_PS) &&
 		     !(staflags & WLAN_STA_PSPOLL))) {
-		struct ieee80211_tx_packet_data *pkt_data;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries "
 		       "before %d)\n",
@@ -428,8 +430,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		if (skb_queue_empty(&sta->ps_tx_buf))
 			sta_info_set_tim_bit(sta);
 
-		pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb;
-		pkt_data->jiffies = jiffies;
+		info->control.jiffies = jiffies;
 		skb_queue_tail(&sta->ps_tx_buf, tx->skb);
 		return TX_QUEUED;
 	}
@@ -461,17 +462,18 @@ static ieee80211_tx_result
 ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_key *key;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u16 fc = tx->fc;
 
-	if (unlikely(tx->control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
+	if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(tx->control->flags & IEEE80211_TXCTL_EAPOL_FRAME) &&
-		 !(tx->flags & IEEE80211_TX_INJECTED)) {
+		 !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) &&
+		 !(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TX_DROP;
 	} else
@@ -500,7 +502,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	}
 
 	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		tx->control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
+		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 
 	return TX_CONTINUE;
 }
@@ -510,6 +512,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
 	struct rate_selection rsel;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
@@ -517,18 +520,17 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		rate_control_get_rate(tx->dev, sband, tx->skb, &rsel);
 		tx->rate_idx = rsel.rate_idx;
 		if (unlikely(rsel.probe_idx >= 0)) {
-			tx->control->flags |=
-				IEEE80211_TXCTL_RATE_CTRL_PROBE;
+			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-			tx->control->alt_retry_rate_idx = tx->rate_idx;
+			info->control.alt_retry_rate_idx = tx->rate_idx;
 			tx->rate_idx = rsel.probe_idx;
 		} else
-			tx->control->alt_retry_rate_idx = -1;
+			info->control.alt_retry_rate_idx = -1;
 
 		if (unlikely(tx->rate_idx < 0))
 			return TX_DROP;
 	} else
-		tx->control->alt_retry_rate_idx = -1;
+		info->control.alt_retry_rate_idx = -1;
 
 	if (tx->sdata->bss_conf.use_cts_prot &&
 	    (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) {
@@ -538,13 +540,13 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		else
 			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
 		tx->rate_idx = rsel.nonerp_idx;
-		tx->control->tx_rate_idx = rsel.nonerp_idx;
-		tx->control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
+		info->tx_rate_idx = rsel.nonerp_idx;
+		info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 	} else {
 		tx->last_frag_rate_idx = tx->rate_idx;
-		tx->control->tx_rate_idx = tx->rate_idx;
+		info->tx_rate_idx = tx->rate_idx;
 	}
-	tx->control->tx_rate_idx = tx->rate_idx;
+	info->tx_rate_idx = tx->rate_idx;
 
 	return TX_CONTINUE;
 }
@@ -555,28 +557,32 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
 	u16 fc = le16_to_cpu(hdr->frame_control);
 	u16 dur;
-	struct ieee80211_tx_control *control = tx->control;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_supported_band *sband;
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	if (!control->retry_limit) {
+	if (tx->sta)
+		info->control.aid = tx->sta->aid;
+
+	if (!info->control.retry_limit) {
 		if (!is_multicast_ether_addr(hdr->addr1)) {
-			if (tx->skb->len + FCS_LEN > tx->local->rts_threshold
+			int len = min_t(int, tx->skb->len + FCS_LEN,
+					tx->local->fragmentation_threshold);
+			if (len > tx->local->rts_threshold
 			    && tx->local->rts_threshold <
-					IEEE80211_MAX_RTS_THRESHOLD) {
-				control->flags |=
-					IEEE80211_TXCTL_USE_RTS_CTS;
-				control->flags |=
-					IEEE80211_TXCTL_LONG_RETRY_LIMIT;
-				control->retry_limit =
+						IEEE80211_MAX_RTS_THRESHOLD) {
+				info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS;
+				info->flags |=
+					IEEE80211_TX_CTL_LONG_RETRY_LIMIT;
+				info->control.retry_limit =
 					tx->local->long_retry_limit;
 			} else {
-				control->retry_limit =
+				info->control.retry_limit =
 					tx->local->short_retry_limit;
 			}
 		} else {
-			control->retry_limit = 1;
+			info->control.retry_limit = 1;
 		}
 	}
 
@@ -585,7 +591,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		 * frames.
 		 * TODO: The last fragment could still use multiple retry
 		 * rates. */
-		control->alt_retry_rate_idx = -1;
+		info->control.alt_retry_rate_idx = -1;
 	}
 
 	/* Use CTS protection for unicast frames sent using extended rates if
@@ -595,8 +601,8 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) &&
 	    (tx->flags & IEEE80211_TX_UNICAST) &&
 	    tx->sdata->bss_conf.use_cts_prot &&
-	    !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
-		control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT;
+	    !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS))
+		info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT;
 
 	/* Transmit data frames using short preambles if the driver supports
 	 * short preambles at the selected rate and short preambles are
@@ -605,7 +611,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
 	    tx->sdata->bss_conf.use_short_preamble &&
 	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
-		tx->control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
+		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 	}
 
 	/* Setup duration field for the first fragment of the frame. Duration
@@ -616,8 +622,8 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 				 tx->extra_frag[0]->len : 0);
 	hdr->duration_id = cpu_to_le16(dur);
 
-	if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) ||
-	    (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) {
+	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
+	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
 		struct ieee80211_supported_band *sband;
 		struct ieee80211_rate *rate;
 		s8 baserate = -1;
@@ -626,7 +632,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 		/* Do not use multiple retry rates when using RTS/CTS */
-		control->alt_retry_rate_idx = -1;
+		info->control.alt_retry_rate_idx = -1;
 
 		/* Use min(data rate, max base rate) as CTS/RTS rate */
 		rate = &sband->bitrates[tx->rate_idx];
@@ -642,13 +648,13 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		}
 
 		if (baserate >= 0)
-			control->rts_cts_rate_idx = baserate;
+			info->control.rts_cts_rate_idx = baserate;
 		else
-			control->rts_cts_rate_idx = 0;
+			info->control.rts_cts_rate_idx = 0;
 	}
 
 	if (tx->sta)
-		control->aid = tx->sta->aid;
+		info->control.aid = tx->sta->aid;
 
 	return TX_CONTINUE;
 }
@@ -762,6 +768,7 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 	u32 load = 0, hdrtime;
 	struct ieee80211_rate *rate;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 	rate = &sband->bitrates[tx->rate_idx];
@@ -786,9 +793,9 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 	if (!is_multicast_ether_addr(hdr->addr1))
 		load += hdrtime;
 
-	if (tx->control->flags & IEEE80211_TXCTL_USE_RTS_CTS)
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
 		load += 2 * hdrtime;
-	else if (tx->control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)
+	else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
 		load += hdrtime;
 
 	/* TODO: optimise again */
@@ -839,6 +846,7 @@ static ieee80211_tx_handler ieee80211_tx_handlers[] =
 	ieee80211_tx_h_rate_ctrl,
 	ieee80211_tx_h_misc,
 	ieee80211_tx_h_fragment,
+	/* handlers after fragment must be aware of tx info fragmentation! */
 	ieee80211_tx_h_encrypt,
 	ieee80211_tx_h_stats,
 	NULL
@@ -867,12 +875,12 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_supported_band *sband;
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len);
-	struct ieee80211_tx_control *control = tx->control;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
-	tx->flags |= IEEE80211_TX_INJECTED;
+	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	info->flags |= IEEE80211_TX_CTL_INJECTED;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
 	/*
@@ -920,7 +928,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 			 * radiotap uses 0 for 1st ant, mac80211 is 1 for
 			 * 1st ant
 			 */
-			control->antenna_sel_tx = (*iterator.this_arg) + 1;
+			info->antenna_sel_tx = (*iterator.this_arg) + 1;
 			break;
 
 #if 0
@@ -944,8 +952,8 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				skb_trim(skb, skb->len - FCS_LEN);
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
-				control->flags &=
-					~IEEE80211_TXCTL_DO_NOT_ENCRYPT;
+				info->flags &=
+					~IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
@@ -980,12 +988,12 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 static ieee80211_tx_result
 __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 		       struct sk_buff *skb,
-		       struct net_device *dev,
-		       struct ieee80211_tx_control *control)
+		       struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	int hdrlen;
 
@@ -994,7 +1002,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	tx->dev = dev; /* use original interface */
 	tx->local = local;
 	tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	tx->control = control;
+	tx->channel = local->hw.conf.channel;
 	/*
 	 * Set this flag (used below to indicate "automatic fragmentation"),
 	 * it will be cleared/left by radiotap as desired.
@@ -1021,10 +1029,10 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	if (is_multicast_ether_addr(hdr->addr1)) {
 		tx->flags &= ~IEEE80211_TX_UNICAST;
-		control->flags |= IEEE80211_TXCTL_NO_ACK;
+		info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	} else {
 		tx->flags |= IEEE80211_TX_UNICAST;
-		control->flags &= ~IEEE80211_TXCTL_NO_ACK;
+		info->flags &= ~IEEE80211_TX_CTL_NO_ACK;
 	}
 
 	if (tx->flags & IEEE80211_TX_FRAGMENTED) {
@@ -1037,16 +1045,16 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	}
 
 	if (!tx->sta)
-		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
+		info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT))
-		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
+		info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 
 	hdrlen = ieee80211_get_hdrlen(tx->fc);
 	if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) {
 		u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)];
 		tx->ethertype = (pos[0] << 8) | pos[1];
 	}
-	control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT;
+	info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT;
 
 	return TX_CONTINUE;
 }
@@ -1056,14 +1064,12 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
  */
 static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 				struct sk_buff *skb,
-				struct net_device *mdev,
-				struct ieee80211_tx_control *control)
+				struct net_device *mdev)
 {
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct net_device *dev;
 
-	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	dev = dev_get_by_index(&init_net, pkt_data->ifindex);
+	dev = dev_get_by_index(&init_net, info->control.ifindex);
 	if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
 		dev_put(dev);
 		dev = NULL;
@@ -1071,7 +1077,7 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	if (unlikely(!dev))
 		return -ENODEV;
 	/* initialises tx with control */
-	__ieee80211_tx_prepare(tx, skb, dev, control);
+	__ieee80211_tx_prepare(tx, skb, dev);
 	dev_put(dev);
 	return 0;
 }
@@ -1079,7 +1085,7 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_tx_control *control = tx->control;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret, i;
 
 	if (!ieee80211_qdisc_installed(local->mdev) &&
@@ -1090,39 +1096,39 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 	if (skb) {
 		ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
 				     "TX to low-level driver", skb);
-		ret = local->ops->tx(local_to_hw(local), skb, control);
+		ret = local->ops->tx(local_to_hw(local), skb);
 		if (ret)
 			return IEEE80211_TX_AGAIN;
 		local->mdev->trans_start = jiffies;
 		ieee80211_led_tx(local, 1);
 	}
 	if (tx->extra_frag) {
-		control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS |
-				    IEEE80211_TXCTL_USE_CTS_PROTECT |
-				    IEEE80211_TXCTL_CLEAR_PS_FILT |
-				    IEEE80211_TXCTL_FIRST_FRAGMENT);
 		for (i = 0; i < tx->num_extra_frag; i++) {
 			if (!tx->extra_frag[i])
 				continue;
-			if (__ieee80211_queue_stopped(local, control->queue))
+			info = IEEE80211_SKB_CB(tx->extra_frag[i]);
+			info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS |
+					 IEEE80211_TX_CTL_USE_CTS_PROTECT |
+					 IEEE80211_TX_CTL_CLEAR_PS_FILT |
+					 IEEE80211_TX_CTL_FIRST_FRAGMENT);
+			if (__ieee80211_queue_stopped(local, info->queue))
 				return IEEE80211_TX_FRAG_AGAIN;
 			if (i == tx->num_extra_frag) {
-				control->tx_rate_idx = tx->last_frag_rate_idx;
+				info->tx_rate_idx = tx->last_frag_rate_idx;
 
 				if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG)
-					control->flags |=
-						IEEE80211_TXCTL_RATE_CTRL_PROBE;
+					info->flags |=
+						IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 				else
-					control->flags &=
-						~IEEE80211_TXCTL_RATE_CTRL_PROBE;
+					info->flags &=
+						~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 			}
 
 			ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
 					     "TX to low-level driver",
 					     tx->extra_frag[i]);
 			ret = local->ops->tx(local_to_hw(local),
-					    tx->extra_frag[i],
-					    control);
+					    tx->extra_frag[i]);
 			if (ret)
 				return IEEE80211_TX_FRAG_AGAIN;
 			local->mdev->trans_start = jiffies;
@@ -1135,17 +1141,18 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 	return IEEE80211_TX_OK;
 }
 
-static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
-			struct ieee80211_tx_control *control)
+static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct sta_info *sta;
 	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result res = TX_DROP, res_prepare;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret, i;
+	int queue = info->queue;
 
-	WARN_ON(__ieee80211_queue_pending(local, control->queue));
+	WARN_ON(__ieee80211_queue_pending(local, queue));
 
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
@@ -1155,7 +1162,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 	rcu_read_lock();
 
 	/* initialises tx */
-	res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control);
+	res_prepare = __ieee80211_tx_prepare(&tx, skb, dev);
 
 	if (res_prepare == TX_DROP) {
 		dev_kfree_skb(skb);
@@ -1165,7 +1172,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 
 	sta = tx.sta;
 	tx.channel = local->hw.conf.channel;
-	control->band = tx.channel->band;
+	info->band = tx.channel->band;
 
 	for (handler = ieee80211_tx_handlers; *handler != NULL;
 	     handler++) {
@@ -1174,7 +1181,8 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 			break;
 	}
 
-	skb = tx.skb; /* handlers are allowed to change skb */
+	if (WARN_ON(tx.skb != skb))
+		goto drop;
 
 	if (unlikely(res == TX_DROP)) {
 		I802_DEBUG_INC(local->tx_handlers_drop);
@@ -1209,12 +1217,12 @@ retry:
 	ret = __ieee80211_tx(local, skb, &tx);
 	if (ret) {
 		struct ieee80211_tx_stored_packet *store =
-			&local->pending_packet[control->queue];
+			&local->pending_packet[info->queue];
 
 		if (ret == IEEE80211_TX_FRAG_AGAIN)
 			skb = NULL;
 		set_bit(IEEE80211_LINK_STATE_PENDING,
-			&local->state[control->queue]);
+			&local->state[queue]);
 		smp_mb();
 		/* When the driver gets out of buffers during sending of
 		 * fragments and calls ieee80211_stop_queue, there is
@@ -1225,13 +1233,11 @@ retry:
 		 * called with IEEE80211_LINK_STATE_PENDING. Prevent this by
 		 * continuing transmitting here when that situation is
 		 * possible to have happened. */
-		if (!__ieee80211_queue_stopped(local, control->queue)) {
+		if (!__ieee80211_queue_stopped(local, queue)) {
 			clear_bit(IEEE80211_LINK_STATE_PENDING,
-				  &local->state[control->queue]);
+				  &local->state[queue]);
 			goto retry;
 		}
-		memcpy(&store->control, control,
-		       sizeof(struct ieee80211_tx_control));
 		store->skb = skb;
 		store->extra_frag = tx.extra_frag;
 		store->num_extra_frag = tx.num_extra_frag;
@@ -1258,21 +1264,14 @@ retry:
 int ieee80211_master_start_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
-	struct ieee80211_tx_control control;
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct net_device *odev = NULL;
 	struct ieee80211_sub_if_data *osdata;
 	int headroom;
 	int ret;
 
-	/*
-	 * copy control out of the skb so other people can use skb->cb
-	 */
-	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	memset(&control, 0, sizeof(struct ieee80211_tx_control));
-
-	if (pkt_data->ifindex)
-		odev = dev_get_by_index(&init_net, pkt_data->ifindex);
+	if (info->control.ifindex)
+		odev = dev_get_by_index(&init_net, info->control.ifindex);
 	if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
 		dev_put(odev);
 		odev = NULL;
@@ -1285,6 +1284,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		dev_kfree_skb(skb);
 		return 0;
 	}
+
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
 	headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM;
@@ -1296,21 +1296,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		}
 	}
 
-	control.vif = &osdata->vif;
-	control.type = osdata->vif.type;
-	if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS)
-		control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
-	if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT)
-		control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
-	if (pkt_data->flags & IEEE80211_TXPD_REQUEUE)
-		control.flags |= IEEE80211_TXCTL_REQUEUE;
-	if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME)
-		control.flags |= IEEE80211_TXCTL_EAPOL_FRAME;
-	if (pkt_data->flags & IEEE80211_TXPD_AMPDU)
-		control.flags |= IEEE80211_TXCTL_AMPDU;
-	control.queue = pkt_data->queue;
-
-	ret = ieee80211_tx(odev, skb, &control);
+	info->control.vif = &osdata->vif;
+	ret = ieee80211_tx(odev, skb);
 	dev_put(odev);
 
 	return ret;
@@ -1320,7 +1307,7 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_radiotap_header *prthdr =
 		(struct ieee80211_radiotap_header *)skb->data;
 	u16 len_rthdr;
@@ -1342,14 +1329,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 
 	skb->dev = local->mdev;
 
-	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	memset(pkt_data, 0, sizeof(*pkt_data));
 	/* needed because we set skb device to master */
-	pkt_data->ifindex = dev->ifindex;
+	info->control.ifindex = dev->ifindex;
 
-	pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
+	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 	/* Interfaces should always request a status report */
-	pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
+	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
 
 	/*
 	 * fix up the pointers accounting for the radiotap
@@ -1393,7 +1378,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_packet_data *pkt_data;
+	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	int ret = 1, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0, fc;
@@ -1625,14 +1610,14 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	nh_pos += hdrlen;
 	h_pos += hdrlen;
 
-	pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
-	memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
-	pkt_data->ifindex = dev->ifindex;
+	info = IEEE80211_SKB_CB(skb);
+	memset(info, 0, sizeof(*info));
+	info->control.ifindex = dev->ifindex;
 	if (ethertype == ETH_P_PAE)
-		pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
+		info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME;
 
 	/* Interfaces should always request a status report */
-	pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
+	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
 
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
@@ -1693,7 +1678,6 @@ void ieee80211_tx_pending(unsigned long data)
 			continue;
 		}
 		store = &local->pending_packet[i];
-		tx.control = &store->control;
 		tx.extra_frag = store->extra_frag;
 		tx.num_extra_frag = store->num_extra_frag;
 		tx.last_frag_rate_idx = store->last_frag_rate_idx;
@@ -1786,11 +1770,11 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
 }
 
 struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
-				     struct ieee80211_vif *vif,
-				     struct ieee80211_tx_control *control)
+				     struct ieee80211_vif *vif)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
 	struct net_device *bdev;
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct ieee80211_if_ap *ap = NULL;
@@ -1896,31 +1880,32 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		goto out;
 	}
 
-	if (control) {
-		control->band = band;
-		rate_control_get_rate(local->mdev, sband, skb, &rsel);
-		if (unlikely(rsel.rate_idx < 0)) {
-			if (net_ratelimit()) {
-				printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
-				       "no rate found\n",
-				       wiphy_name(local->hw.wiphy));
-			}
-			dev_kfree_skb(skb);
-			skb = NULL;
-			goto out;
-		}
+	info = IEEE80211_SKB_CB(skb);
+
+	info->band = band;
+	rate_control_get_rate(local->mdev, sband, skb, &rsel);
 
-		control->vif = vif;
-		control->tx_rate_idx = rsel.rate_idx;
-		if (sdata->bss_conf.use_short_preamble &&
-		    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
-			control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
-		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
-		control->flags |= IEEE80211_TXCTL_NO_ACK;
-		control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
-		control->retry_limit = 1;
-		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
+	if (unlikely(rsel.rate_idx < 0)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
+			       "no rate found\n",
+			       wiphy_name(local->hw.wiphy));
+		}
+		dev_kfree_skb(skb);
+		skb = NULL;
+		goto out;
 	}
+
+	info->control.vif = vif;
+	info->tx_rate_idx = rsel.rate_idx;
+	if (sdata->bss_conf.use_short_preamble &&
+	    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
+		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
+	info->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
+	info->flags |= IEEE80211_TX_CTL_NO_ACK;
+	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	info->control.retry_limit = 1;
+	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	(*num_beacons)++;
 out:
 	rcu_read_unlock();
@@ -1930,7 +1915,7 @@ EXPORT_SYMBOL(ieee80211_beacon_get);
 
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
-		       const struct ieee80211_tx_control *frame_txctl,
+		       const struct ieee80211_tx_info *frame_txctl,
 		       struct ieee80211_rts *rts)
 {
 	const struct ieee80211_hdr *hdr = frame;
@@ -1947,7 +1932,7 @@ EXPORT_SYMBOL(ieee80211_rts_get);
 
 void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			     const void *frame, size_t frame_len,
-			     const struct ieee80211_tx_control *frame_txctl,
+			     const struct ieee80211_tx_info *frame_txctl,
 			     struct ieee80211_cts *cts)
 {
 	const struct ieee80211_hdr *hdr = frame;
@@ -1963,8 +1948,7 @@ EXPORT_SYMBOL(ieee80211_ctstoself_get);
 
 struct sk_buff *
 ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
-			  struct ieee80211_vif *vif,
-			  struct ieee80211_tx_control *control)
+			  struct ieee80211_vif *vif)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb;
@@ -1976,6 +1960,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_ap *bss = NULL;
 	struct beacon_data *beacon;
+	struct ieee80211_tx_info *info;
 
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
@@ -1995,7 +1980,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 
 	if (bss->dtim_count != 0)
 		return NULL; /* send buffered bc/mc only after DTIM beacon */
-	memset(control, 0, sizeof(*control));
+
 	while (1) {
 		skb = skb_dequeue(&bss->ps_bc_buf);
 		if (!skb)
@@ -2012,21 +1997,26 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 		}
 
-		if (!ieee80211_tx_prepare(&tx, skb, local->mdev, control))
+		if (!ieee80211_tx_prepare(&tx, skb, local->mdev))
 			break;
 		dev_kfree_skb_any(skb);
 	}
+
+	info = IEEE80211_SKB_CB(skb);
+
 	sta = tx.sta;
 	tx.flags |= IEEE80211_TX_PS_BUFFERED;
 	tx.channel = local->hw.conf.channel;
-	control->band = tx.channel->band;
+	info->band = tx.channel->band;
 
 	for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) {
 		res = (*handler)(&tx);
 		if (res == TX_DROP || res == TX_QUEUED)
 			break;
 	}
-	skb = tx.skb; /* handlers are allowed to change skb */
+
+	if (WARN_ON(tx.skb != skb))
+		return NULL;
 
 	if (res == TX_DROP) {
 		I802_DEBUG_INC(local->tx_handlers_drop);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 65a34fddeb0..d9109dee461 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(ieee80211_generic_frame_duration);
 
 __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif, size_t frame_len,
-			      const struct ieee80211_tx_control *frame_txctl)
+			      const struct ieee80211_tx_info *frame_txctl)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
@@ -272,7 +272,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 
 	short_preamble = sdata->bss_conf.use_short_preamble;
 
-	rate = &sband->bitrates[frame_txctl->rts_cts_rate_idx];
+	rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx];
 
 	erp = 0;
 	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
@@ -295,7 +295,7 @@ EXPORT_SYMBOL(ieee80211_rts_duration);
 __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    size_t frame_len,
-				    const struct ieee80211_tx_control *frame_txctl)
+				    const struct ieee80211_tx_info *frame_txctl)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
@@ -309,7 +309,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 
 	short_preamble = sdata->bss_conf.use_short_preamble;
 
-	rate = &sband->bitrates[frame_txctl->rts_cts_rate_idx];
+	rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx];
 	erp = 0;
 	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
 		erp = rate->flags & IEEE80211_RATE_ERP_G;
@@ -317,7 +317,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 	/* Data frame duration */
 	dur = ieee80211_frame_duration(local, frame_len, rate->bitrate,
 				       erp, short_preamble);
-	if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) {
+	if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) {
 		/* ACK duration */
 		dur += ieee80211_frame_duration(local, 10, rate->bitrate,
 						erp, short_preamble);
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 3cbae42ec50..1e7f03dd8f6 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -333,11 +333,16 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
 
 static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+	info->control.iv_len = WEP_IV_LEN;
+	info->control.icv_len = WEP_ICV_LEN;
+
 	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) {
+		info->control.hw_key = &tx->key->conf;
 		if (ieee80211_wep_encrypt(tx->local, skb, tx->key))
 			return -1;
 	} else {
-		tx->control->hw_key = &tx->key->conf;
 		if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
 			if (!ieee80211_wep_add_iv(tx->local, skb, tx->key))
 				return -1;
@@ -349,8 +354,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 ieee80211_tx_result
 ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx)
 {
-	tx->control->iv_len = WEP_IV_LEN;
-	tx->control->icv_len = WEP_ICV_LEN;
 	ieee80211_tx_set_protected(tx);
 
 	if (wep_encrypt_skb(tx, tx->skb) < 0) {
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index c87baf4ce97..477690f4dca 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -149,8 +149,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_tx_packet_data *pkt_data =
-		(struct ieee80211_tx_packet_data *) skb->cb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	unsigned short fc = le16_to_cpu(hdr->frame_control);
 	struct Qdisc *qdisc;
@@ -158,8 +157,8 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	int err, queue;
 	u8 tid;
 
-	if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) {
-		queue = pkt_data->queue;
+	if (info->flags & IEEE80211_TX_CTL_REQUEUE) {
+		queue = info->queue;
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr->addr1);
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
@@ -168,9 +167,9 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			if ((ampdu_queue < QD_NUM(hw)) &&
 			    test_bit(ampdu_queue, q->qdisc_pool)) {
 				queue = ampdu_queue;
-				pkt_data->flags |= IEEE80211_TXPD_AMPDU;
+				info->flags |= IEEE80211_TX_CTL_AMPDU;
 			} else {
-				pkt_data->flags &= ~IEEE80211_TXPD_AMPDU;
+				info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 			}
 		}
 		rcu_read_unlock();
@@ -206,9 +205,9 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			if ((ampdu_queue < QD_NUM(hw)) &&
 			    test_bit(ampdu_queue, q->qdisc_pool)) {
 				queue = ampdu_queue;
-				pkt_data->flags |= IEEE80211_TXPD_AMPDU;
+				info->flags |= IEEE80211_TX_CTL_AMPDU;
 			} else {
-				pkt_data->flags &= ~IEEE80211_TXPD_AMPDU;
+				info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 			}
 		}
 
@@ -220,7 +219,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			err = NET_XMIT_DROP;
 	} else {
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
-		pkt_data->queue = (unsigned int) queue;
+		info->queue = (unsigned int) queue;
 		qdisc = q->queues[queue];
 		err = qdisc->enqueue(skb, qdisc);
 		if (err == NET_XMIT_SUCCESS) {
@@ -241,13 +240,12 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_tx_packet_data *pkt_data =
-		(struct ieee80211_tx_packet_data *) skb->cb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct Qdisc *qdisc;
 	int err;
 
 	/* we recorded which queue to use earlier! */
-	qdisc = q->queues[pkt_data->queue];
+	qdisc = q->queues[info->queue];
 
 	if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) {
 		qd->q.qlen++;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d7304490d2e..d6635f6e561 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -183,15 +183,25 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 }
 
 
-static int tkip_encrypt_skb(struct ieee80211_tx_data *tx,
-			    struct sk_buff *skb, int test)
+static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tailneed;
 	u16 fc;
 	u8 *pos;
 
+	info->control.icv_len = TKIP_ICV_LEN;
+	info->control.iv_len = TKIP_IV_LEN;
+
+	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
+	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+		/* hwaccel - with no need for preallocated room for IV/ICV */
+		info->control.hw_key = &tx->key->conf;
+		return TX_CONTINUE;
+	}
+
 	fc = le16_to_cpu(hdr->frame_control);
 	hdrlen = ieee80211_get_hdrlen(fc);
 	len = skb->len - hdrlen;
@@ -228,7 +238,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx,
 					    0x7f),
 				      (u8) key->u.tkip.tx.iv16);
 
-		tx->control->hw_key = &tx->key->conf;
+		info->control.hw_key = &tx->key->conf;
 		return 0;
 	}
 
@@ -246,28 +256,16 @@ ieee80211_tx_result
 ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
-	int wpa_test = 0, test = 0;
 
-	tx->control->icv_len = TKIP_ICV_LEN;
-	tx->control->iv_len = TKIP_IV_LEN;
 	ieee80211_tx_set_protected(tx);
 
-	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
-	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
-	    !wpa_test) {
-		/* hwaccel - with no need for preallocated room for IV/ICV */
-		tx->control->hw_key = &tx->key->conf;
-		return TX_CONTINUE;
-	}
-
-	if (tkip_encrypt_skb(tx, skb, test) < 0)
+	if (tkip_encrypt_skb(tx, skb) < 0)
 		return TX_DROP;
 
 	if (tx->extra_frag) {
 		int i;
 		for (i = 0; i < tx->num_extra_frag; i++) {
-			if (tkip_encrypt_skb(tx, tx->extra_frag[i], test)
-			    < 0)
+			if (tkip_encrypt_skb(tx, tx->extra_frag[i]) < 0)
 				return TX_DROP;
 		}
 	}
@@ -429,16 +427,27 @@ static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr)
 }
 
 
-static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx,
-			    struct sk_buff *skb, int test)
+static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tailneed;
 	u16 fc;
 	u8 *pos, *pn, *b_0, *aad, *scratch;
 	int i;
 
+	info->control.icv_len = CCMP_MIC_LEN;
+	info->control.iv_len = CCMP_HDR_LEN;
+
+	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
+	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+		/* hwaccel - with no need for preallocated room for CCMP "
+		 * header or MIC fields */
+		info->control.hw_key = &tx->key->conf;
+		return TX_CONTINUE;
+	}
+
 	scratch = key->u.ccmp.tx_crypto_buf;
 	b_0 = scratch + 3 * AES_BLOCK_LEN;
 	aad = scratch + 4 * AES_BLOCK_LEN;
@@ -478,7 +487,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx,
 
 	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 		/* hwaccel - with preallocated room for CCMP header */
-		tx->control->hw_key = &tx->key->conf;
+		info->control.hw_key = &tx->key->conf;
 		return 0;
 	}
 
@@ -495,28 +504,16 @@ ieee80211_tx_result
 ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
-	int test = 0;
 
-	tx->control->icv_len = CCMP_MIC_LEN;
-	tx->control->iv_len = CCMP_HDR_LEN;
 	ieee80211_tx_set_protected(tx);
 
-	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
-	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
-		/* hwaccel - with no need for preallocated room for CCMP "
-		 * header or MIC fields */
-		tx->control->hw_key = &tx->key->conf;
-		return TX_CONTINUE;
-	}
-
-	if (ccmp_encrypt_skb(tx, skb, test) < 0)
+	if (ccmp_encrypt_skb(tx, skb) < 0)
 		return TX_DROP;
 
 	if (tx->extra_frag) {
 		int i;
 		for (i = 0; i < tx->num_extra_frag; i++) {
-			if (ccmp_encrypt_skb(tx, tx->extra_frag[i], test)
-			    < 0)
+			if (ccmp_encrypt_skb(tx, tx->extra_frag[i]) < 0)
 				return TX_DROP;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From eefce91a384a64c7bbf913eb08c4adfb911c3639 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 17 May 2008 00:57:13 +0200
Subject: mac80211: dont allow fragmentation and requeuing on A-MPDU queues

There really is no reason for a driver to reject a frame on
an A-MPDU queue when it can stop that queue for any period
of time and is given frames one by one. Hence, disallow it
with a big warning and reduce mac80211-internal state.

Also add a warning when we try to fragment a frame destined
for an A-MPDU queue and drop it, the actual bug needs to be
fixed elsewhere but I'm not exactly sure how to yet.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  6 ++++--
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/tx.c          | 23 +++++++++++++++++++++--
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 54960b83db7..4df39eb9115 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -986,8 +986,10 @@ enum ieee80211_ampdu_mlme_action {
  * @tx: Handler that 802.11 module calls for each transmitted frame.
  *	skb contains the buffer starting from the IEEE 802.11 header.
  *	The low-level driver should send the frame out based on
- *	configuration in the TX control data. Must be implemented and
- *	atomic.
+ *	configuration in the TX control data. This handler should,
+ *	preferably, never fail and stop queues appropriately, more
+ *	importantly, however, it must never fail for A-MPDU-queues.
+ *	Must be implemented and atomic.
  *
  * @start: Called before the first netdevice attached to the hardware
  *	is enabled. This should turn on the hardware and must turn on
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 79a65b3ee02..86a861251e8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -595,7 +595,7 @@ struct ieee80211_local {
 	struct timer_list sta_cleanup;
 
 	unsigned long state[IEEE80211_MAX_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
-	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
+	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
 	/* number of interfaces with corresponding IFF_ flags */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ac9a4af7ad4..6268bbca148 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -673,6 +673,16 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	if (!(tx->flags & IEEE80211_TX_FRAGMENTED))
 		return TX_CONTINUE;
 
+	/*
+	 * Warn when submitting a fragmented A-MPDU frame and drop it.
+	 * This is an error and needs to be fixed elsewhere, but when
+	 * done needs to take care of monitor interfaces (injection)
+	 * etc.
+	 */
+	if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU ||
+		    IEEE80211_SKB_CB(tx->skb)->queue >= tx->local->hw.queues))
+		return TX_DROP;
+
 	first = tx->skb;
 
 	hdrlen = ieee80211_get_hdrlen(tx->fc);
@@ -1216,8 +1226,17 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 retry:
 	ret = __ieee80211_tx(local, skb, &tx);
 	if (ret) {
-		struct ieee80211_tx_stored_packet *store =
-			&local->pending_packet[info->queue];
+		struct ieee80211_tx_stored_packet *store;
+
+		/*
+		 * Since there are no fragmented frames on A-MPDU
+		 * queues, there's no reason for a driver to reject
+		 * a frame there, warn and drop it.
+		 */
+		if (WARN_ON(queue >= local->hw.queues))
+			goto drop;
+
+		store = &local->pending_packet[queue];
 
 		if (ret == IEEE80211_TX_FRAG_AGAIN)
 			skb = NULL;
-- 
cgit v1.2.3-70-g09d2


From e2530083609148a7835b54c431f6b8956407c1f6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 17 May 2008 00:57:14 +0200
Subject: mac80211: use multi-queue master netdevice

This patch updates mac80211 and drivers to be multi-queue aware and
use that instead of the internal queue mapping. Also does a number
of cleanups in various pieces of the code that fall out and reduces
internal mac80211 state size.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/dma.c              |  7 ++-
 drivers/net/wireless/b43/pio.c              |  8 +--
 drivers/net/wireless/b43legacy/dma.c        |  3 +-
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  4 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  4 +-
 drivers/net/wireless/p54/p54common.c        |  8 +--
 drivers/net/wireless/rt2x00/rt2x00mac.c     | 10 +--
 drivers/net/wireless/rt2x00/rt2x00queue.h   | 13 ----
 drivers/net/wireless/rtl8180_dev.c          |  4 +-
 include/net/mac80211.h                      | 20 +++++-
 net/mac80211/Kconfig                        | 14 ++++-
 net/mac80211/Makefile                       |  2 +-
 net/mac80211/ieee80211_i.h                  | 16 +++--
 net/mac80211/iface.c                        |  2 +-
 net/mac80211/main.c                         | 29 ++++++---
 net/mac80211/sta_info.c                     |  2 +-
 net/mac80211/tx.c                           | 97 +++++++++++++----------------
 net/mac80211/util.c                         | 29 ++++-----
 net/mac80211/wme.c                          | 14 ++---
 net/mac80211/wme.h                          |  2 +-
 21 files changed, 158 insertions(+), 132 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 7d97934265d..18e9422d26d 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -2657,7 +2657,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	if (list_empty(&sc->txbuf)) {
 		ATH5K_ERR(sc, "no further txbuf available, dropping packet\n");
 		spin_unlock_irqrestore(&sc->txbuflock, flags);
-		ieee80211_stop_queue(hw, info->queue);
+		ieee80211_stop_queue(hw, skb_get_queue_mapping(skb));
 		return -1;
 	}
 	bf = list_first_entry(&sc->txbuf, struct ath5k_buf, list);
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index aced9866d81..b4eadd908be 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -1297,7 +1297,8 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 	} else {
 		/* Decide by priority where to put this frame. */
-		ring = select_ring_by_priority(dev, info->queue);
+		ring = select_ring_by_priority(
+			dev, skb_get_queue_mapping(skb));
 	}
 
 	spin_lock_irqsave(&ring->lock, flags);
@@ -1315,7 +1316,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 	/* Assign the queue number to the ring (if not already done before)
 	 * so TX status handling can use it. The queue to ring mapping is
 	 * static, so we don't need to store it per frame. */
-	ring->queue_prio = info->queue;
+	ring->queue_prio = skb_get_queue_mapping(skb);
 
 	err = dma_tx_fragment(ring, skb);
 	if (unlikely(err == -ENOKEY)) {
@@ -1333,7 +1334,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
 	if ((free_slots(ring) < SLOTS_PER_PACKET) ||
 	    should_inject_overflow(ring)) {
 		/* This TX ring is full. */
-		ieee80211_stop_queue(dev->wl->hw, info->queue);
+		ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
 		ring->stopped = 1;
 		if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
 			b43dbg(dev->wl, "Stopped TX ring %d\n", ring->index);
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 284786a94e7..8b1555d95f1 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -509,7 +509,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
 		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 	} else {
 		/* Decide by priority where to put this frame. */
-		q = select_queue_by_priority(dev, info->queue);
+		q = select_queue_by_priority(dev, skb_get_queue_mapping(skb));
 	}
 
 	spin_lock_irqsave(&q->lock, flags);
@@ -532,7 +532,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
 	if (total_len > (q->buffer_size - q->buffer_used)) {
 		/* Not enough memory on the queue. */
 		err = -EBUSY;
-		ieee80211_stop_queue(dev->wl->hw, info->queue);
+		ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
 		q->stopped = 1;
 		goto out_unlock;
 	}
@@ -540,7 +540,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
 	/* Assign the queue number to the ring (if not already done before)
 	 * so TX status handling can use it. The mac80211-queue to b43-queue
 	 * mapping is static, so we don't need to store it per frame. */
-	q->queue_prio = info->queue;
+	q->queue_prio = skb_get_queue_mapping(skb);
 
 	err = pio_tx_frame(q, skb);
 	if (unlikely(err == -ENOKEY)) {
@@ -560,7 +560,7 @@ int b43_pio_tx(struct b43_wldev *dev, struct sk_buff *skb)
 	if (((q->buffer_size - q->buffer_used) < roundup(2 + 2 + 6, 4)) ||
 	    (q->free_packet_slots == 0)) {
 		/* The queue is full. */
-		ieee80211_stop_queue(dev->wl->hw, info->queue);
+		ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
 		q->stopped = 1;
 	}
 
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index c1c501d963b..33cc256c5ba 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -1325,11 +1325,10 @@ int b43legacy_dma_tx(struct b43legacy_wldev *dev,
 		     struct sk_buff *skb)
 {
 	struct b43legacy_dmaring *ring;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int err = 0;
 	unsigned long flags;
 
-	ring = priority_to_txring(dev, info->queue);
+	ring = priority_to_txring(dev, skb_get_queue_mapping(skb));
 	spin_lock_irqsave(&ring->lock, flags);
 	B43legacy_WARN_ON(!ring->tx);
 	if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index a61293ba3f6..0ebab967d5e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -696,7 +696,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl_tfd_frame *tfd;
 	u32 *control_flags;
-	int txq_id = info->queue;
+	int txq_id = skb_get_queue_mapping(skb);
 	struct iwl_tx_queue *txq = NULL;
 	struct iwl_queue *q = NULL;
 	dma_addr_t phys_addr;
@@ -917,7 +917,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 
-		ieee80211_stop_queue(priv->hw, info->queue);
+		ieee80211_stop_queue(priv->hw, skb_get_queue_mapping(skb));
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index a740a1817d1..e0f52e26441 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2552,7 +2552,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl3945_tfd_frame *tfd;
 	u32 *control_flags;
-	int txq_id = info->queue;
+	int txq_id = skb_get_queue_mapping(skb);
 	struct iwl3945_tx_queue *txq = NULL;
 	struct iwl3945_queue *q = NULL;
 	dma_addr_t phys_addr;
@@ -2765,7 +2765,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 
-		ieee80211_stop_queue(priv->hw, info->queue);
+		ieee80211_stop_queue(priv->hw, skb_get_queue_mapping(skb));
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 850857932e2..91ac9208b77 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -407,7 +407,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 			last_addr = range->end_addr;
 			__skb_unlink(entry, &priv->tx_queue);
 			memset(&info->status, 0, sizeof(info->status));
-			priv->tx_stats[info->queue].len--;
+			priv->tx_stats[skb_get_queue_mapping(skb)].len--;
 			entry_hdr = (struct p54_control_hdr *) entry->data;
 			entry_data = (struct p54_tx_control_allocdata *) entry_hdr->data;
 			if ((entry_hdr->magic1 & cpu_to_le16(0x4000)) != 0)
@@ -551,13 +551,13 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	size_t padding, len;
 	u8 rate;
 
-	current_queue = &priv->tx_stats[info->queue];
+	current_queue = &priv->tx_stats[skb_get_queue_mapping(skb)];
 	if (unlikely(current_queue->len > current_queue->limit))
 		return NETDEV_TX_BUSY;
 	current_queue->len++;
 	current_queue->count++;
 	if (current_queue->len == current_queue->limit)
-		ieee80211_stop_queue(dev, info->queue);
+		ieee80211_stop_queue(dev, skb_get_queue_mapping(skb));
 
 	padding = (unsigned long)(skb->data - (sizeof(*hdr) + sizeof(*txhdr))) & 3;
 	len = skb->len;
@@ -589,7 +589,7 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	memset(txhdr->rateset, rate, 8);
 	txhdr->wep_key_present = 0;
 	txhdr->wep_key_len = 0;
-	txhdr->frame_type = cpu_to_le32(info->queue + 4);
+	txhdr->frame_type = cpu_to_le32(skb_get_queue_mapping(skb) + 4);
 	txhdr->magic4 = 0;
 	txhdr->antenna = (info->antenna_sel_tx == 0) ?
 		2 : info->antenna_sel_tx - 1;
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index b5379b027b1..c05e05b5888 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -102,7 +102,7 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
-	enum data_queue_qid qid = mac80211_queue_to_qid(tx_info->queue);
+	enum data_queue_qid qid = skb_get_queue_mapping(skb);
 	struct data_queue *queue;
 	u16 frame_control;
 
@@ -149,23 +149,23 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 			       IEEE80211_TX_CTL_USE_CTS_PROTECT)) &&
 	    !rt2x00dev->ops->hw->set_rts_threshold) {
 		if (rt2x00queue_available(queue) <= 1) {
-			ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
+			ieee80211_stop_queue(rt2x00dev->hw, qid);
 			return NETDEV_TX_BUSY;
 		}
 
 		if (rt2x00mac_tx_rts_cts(rt2x00dev, queue, skb)) {
-			ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
+			ieee80211_stop_queue(rt2x00dev->hw, qid);
 			return NETDEV_TX_BUSY;
 		}
 	}
 
 	if (rt2x00dev->ops->lib->write_tx_data(rt2x00dev, queue, skb)) {
-		ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
+		ieee80211_stop_queue(rt2x00dev->hw, qid);
 		return NETDEV_TX_BUSY;
 	}
 
 	if (rt2x00queue_full(queue))
-		ieee80211_stop_queue(rt2x00dev->hw, tx_info->queue);
+		ieee80211_stop_queue(rt2x00dev->hw, qid);
 
 	if (rt2x00dev->ops->lib->kick_tx_queue)
 		rt2x00dev->ops->lib->kick_tx_queue(rt2x00dev, qid);
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index f263fe422f8..4d00ced14cc 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -79,19 +79,6 @@ enum data_queue_qid {
 	QID_ATIM,
 };
 
-/**
- * mac80211_queue_to_qid - Convert mac80211 queue to rt2x00 qid
- * @queue: mac80211 queue.
- */
-static inline enum data_queue_qid mac80211_queue_to_qid(unsigned int queue)
-{
-	/* Regular TX queues are mapped directly */
-	if (queue < 4)
-		return queue;
-	WARN_ON(1);
-	return QID_OTHER;
-}
-
 /**
  * enum skb_frame_desc_flags: Flags for &struct skb_frame_desc
  *
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index 4427bc9f78a..b7172a12c05 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -246,7 +246,7 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	u16 plcp_len = 0;
 	__le16 rts_duration = 0;
 
-	prio = info->queue;
+	prio = skb_get_queue_mapping(skb);
 	ring = &priv->tx_ring[prio];
 
 	mapping = pci_map_single(priv->pdev, skb->data,
@@ -298,7 +298,7 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	entry->flags = cpu_to_le32(tx_flags);
 	__skb_queue_tail(&ring->queue, skb);
 	if (ring->entries - skb_queue_len(&ring->queue) < 2)
-		ieee80211_stop_queue(dev, info->queue);
+		ieee80211_stop_queue(dev, skb_get_queue_mapping(skb));
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	rtl818x_iowrite8(priv, &priv->map->TX_DMA_POLLING, (1 << (prio + 4)));
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4df39eb9115..c80e3be8f79 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -293,7 +293,7 @@ struct ieee80211_tx_info {
 	s8 tx_rate_idx;
 	u8 antenna_sel_tx;
 
-	u8 queue; /* use skb_queue_mapping soon */
+	/* 1 byte hole */
 
 	union {
 		struct {
@@ -802,6 +802,24 @@ static inline void SET_IEEE80211_PERM_ADDR(struct ieee80211_hw *hw, u8 *addr)
 	memcpy(hw->wiphy->perm_addr, addr, ETH_ALEN);
 }
 
+static inline int ieee80211_num_regular_queues(struct ieee80211_hw *hw)
+{
+#ifdef CONFIG_MAC80211_QOS
+	return hw->queues;
+#else
+	return 1;
+#endif
+}
+
+static inline int ieee80211_num_queues(struct ieee80211_hw *hw)
+{
+#ifdef CONFIG_MAC80211_QOS
+	return hw->queues + hw->ampdu_queues;
+#else
+	return 1;
+#endif
+}
+
 static inline struct ieee80211_rate *
 ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
 		      const struct ieee80211_tx_info *c)
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a24b459dd45..590e00b2766 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -7,11 +7,23 @@ config MAC80211
 	select CRC32
 	select WIRELESS_EXT
 	select CFG80211
-	select NET_SCH_FIFO
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
 
+config MAC80211_QOS
+	def_bool y
+	depends on MAC80211
+	depends on NET_SCHED
+	depends on NETDEVICES_MULTIQUEUE
+
+comment "QoS/HT support disabled"
+	depends on MAC80211 && !MAC80211_QOS
+comment "QoS/HT support needs CONFIG_NET_SCHED"
+	depends on MAC80211 && !NET_SCHED
+comment "QoS/HT support needs CONFIG_NETDEVICES_MULTIQUEUE"
+	depends on MAC80211 && !NETDEVICES_MULTIQUEUE
+
 menu "Rate control algorithm selection"
 	depends on MAC80211 != n
 
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4e5847fd316..1d2a4e010e5 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -29,7 +29,7 @@ mac80211-y := \
 	event.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
-mac80211-$(CONFIG_NET_SCHED) += wme.o
+mac80211-$(CONFIG_MAC80211_QOS) += wme.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
 	debugfs.o \
 	debugfs_sta.o \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 86a861251e8..7d614cdcefc 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -594,7 +594,7 @@ struct ieee80211_local {
 	struct sta_info *sta_hash[STA_HASH_SIZE];
 	struct timer_list sta_cleanup;
 
-	unsigned long state[IEEE80211_MAX_QUEUES + IEEE80211_MAX_AMPDU_QUEUES];
+	unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)];
 	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
@@ -758,6 +758,15 @@ struct ieee80211_local {
 #endif
 };
 
+static inline int ieee80211_is_multiqueue(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_QOS
+	return netif_is_multiqueue(local->mdev);
+#else
+	return 0;
+#endif
+}
+
 /* this struct represents 802.11n's RA/TID combination */
 struct ieee80211_ra_tid {
 	u8 ra[ETH_ALEN];
@@ -827,11 +836,6 @@ static inline struct ieee80211_hw *local_to_hw(
 	return &local->hw;
 }
 
-enum ieee80211_link_state_t {
-	IEEE80211_LINK_STATE_XOFF = 0,
-	IEEE80211_LINK_STATE_PENDING,
-};
-
 struct sta_attribute {
 	struct attribute attr;
 	ssize_t (*show)(const struct sta_info *, char *buf);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3c64e42eb12..98447270238 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -168,7 +168,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 		ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
 			IEEE80211_STA_AUTO_BSSID_SEL |
 			IEEE80211_STA_AUTO_CHANNEL_SEL;
-		if (sdata->local->hw.queues >= 4)
+		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
 			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
 
 		msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8f1ff7ef166..97d4a537ca2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1634,12 +1634,32 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (result < 0)
 		return result;
 
+	/*
+	 * We use the number of queues for feature tests (QoS, HT) internally
+	 * so restrict them appropriately.
+	 */
+#ifdef CONFIG_MAC80211_QOS
+	if (hw->queues > IEEE80211_MAX_QUEUES)
+		hw->queues = IEEE80211_MAX_QUEUES;
+	if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES)
+		hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES;
+	if (hw->queues < 4)
+		hw->ampdu_queues = 0;
+#else
+	hw->queues = 1;
+	hw->ampdu_queues = 0;
+#endif
+
 	/* for now, mdev needs sub_if_data :/ */
-	mdev = alloc_netdev(sizeof(struct ieee80211_sub_if_data),
-			    "wmaster%d", ether_setup);
+	mdev = alloc_netdev_mq(sizeof(struct ieee80211_sub_if_data),
+			       "wmaster%d", ether_setup,
+			       ieee80211_num_queues(hw));
 	if (!mdev)
 		goto fail_mdev_alloc;
 
+	if (ieee80211_num_queues(hw) > 1)
+		mdev->features |= NETIF_F_MULTI_QUEUE;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(mdev);
 	mdev->ieee80211_ptr = &sdata->wdev;
 	sdata->wdev.wiphy = local->hw.wiphy;
@@ -1728,11 +1748,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_wep;
 	}
 
-	if (hw->queues > IEEE80211_MAX_QUEUES)
-		hw->queues = IEEE80211_MAX_QUEUES;
-	if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES)
-		hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES;
-
 	ieee80211_install_qdisc(local->mdev);
 
 	/* add one default STA interface */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ef3149324d5..c24770cb02c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -255,7 +255,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		 * sta_rx_agg_session_timer_expired for useage */
 		sta->timer_to_tid[i] = i;
 		/* tid to tx queue: initialize according to HW (0 is valid) */
-		sta->tid_to_tx_q[i] = local->hw.queues + local->hw.ampdu_queues;
+		sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw);
 		/* rx */
 		sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
 		sta->ampdu_mlme.tid_rx[i] = NULL;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6268bbca148..9273651d3d7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -213,18 +213,6 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	return dur;
 }
 
-static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local,
-					    int queue)
-{
-	return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]);
-}
-
-static inline int __ieee80211_queue_pending(const struct ieee80211_local *local,
-					    int queue)
-{
-	return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]);
-}
-
 static int inline is_ieee80211_device(struct net_device *dev,
 				      struct net_device *master)
 {
@@ -680,7 +668,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	 * etc.
 	 */
 	if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU ||
-		    IEEE80211_SKB_CB(tx->skb)->queue >= tx->local->hw.queues))
+		    skb_get_queue_mapping(tx->skb) >=
+			ieee80211_num_regular_queues(&tx->local->hw)))
 		return TX_DROP;
 
 	first = tx->skb;
@@ -1098,11 +1087,9 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret, i;
 
-	if (!ieee80211_qdisc_installed(local->mdev) &&
-	    __ieee80211_queue_stopped(local, 0)) {
-		netif_stop_queue(local->mdev);
+	if (netif_subqueue_stopped(local->mdev, skb))
 		return IEEE80211_TX_AGAIN;
-	}
+
 	if (skb) {
 		ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
 				     "TX to low-level driver", skb);
@@ -1121,7 +1108,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 					 IEEE80211_TX_CTL_USE_CTS_PROTECT |
 					 IEEE80211_TX_CTL_CLEAR_PS_FILT |
 					 IEEE80211_TX_CTL_FIRST_FRAGMENT);
-			if (__ieee80211_queue_stopped(local, info->queue))
+			if (netif_subqueue_stopped(local->mdev,
+						   tx->extra_frag[i]))
 				return IEEE80211_TX_FRAG_AGAIN;
 			if (i == tx->num_extra_frag) {
 				info->tx_rate_idx = tx->last_frag_rate_idx;
@@ -1160,9 +1148,11 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 	ieee80211_tx_result res = TX_DROP, res_prepare;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret, i;
-	int queue = info->queue;
+	u16 queue;
 
-	WARN_ON(__ieee80211_queue_pending(local, queue));
+	queue = skb_get_queue_mapping(skb);
+
+	WARN_ON(test_bit(queue, local->queues_pending));
 
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
@@ -1233,28 +1223,28 @@ retry:
 		 * queues, there's no reason for a driver to reject
 		 * a frame there, warn and drop it.
 		 */
-		if (WARN_ON(queue >= local->hw.queues))
+		if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw)))
 			goto drop;
 
 		store = &local->pending_packet[queue];
 
 		if (ret == IEEE80211_TX_FRAG_AGAIN)
 			skb = NULL;
-		set_bit(IEEE80211_LINK_STATE_PENDING,
-			&local->state[queue]);
+		set_bit(queue, local->queues_pending);
 		smp_mb();
-		/* When the driver gets out of buffers during sending of
-		 * fragments and calls ieee80211_stop_queue, there is
-		 * a small window between IEEE80211_LINK_STATE_XOFF and
-		 * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer
+		/*
+		 * When the driver gets out of buffers during sending of
+		 * fragments and calls ieee80211_stop_queue, the netif
+		 * subqueue is stopped. There is, however, a small window
+		 * in which the PENDING bit is not yet set. If a buffer
 		 * gets available in that window (i.e. driver calls
 		 * ieee80211_wake_queue), we would end up with ieee80211_tx
-		 * called with IEEE80211_LINK_STATE_PENDING. Prevent this by
+		 * called with the PENDING bit still set. Prevent this by
 		 * continuing transmitting here when that situation is
-		 * possible to have happened. */
-		if (!__ieee80211_queue_stopped(local, queue)) {
-			clear_bit(IEEE80211_LINK_STATE_PENDING,
-				  &local->state[queue]);
+		 * possible to have happened.
+		 */
+		if (!__netif_subqueue_stopped(local->mdev, queue)) {
+			clear_bit(queue, local->queues_pending);
 			goto retry;
 		}
 		store->skb = skb;
@@ -1509,7 +1499,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	}
 
 	/* receiver and we are QoS enabled, use a QoS type frame */
-	if (sta_flags & WLAN_STA_WME && local->hw.queues >= 4) {
+	if (sta_flags & WLAN_STA_WME &&
+	    ieee80211_num_regular_queues(&local->hw) >= 4) {
 		fc |= IEEE80211_STYPE_QOS_DATA;
 		hdrlen += 2;
 	}
@@ -1661,41 +1652,51 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-/* helper functions for pending packets for when queues are stopped */
 
+/*
+ * ieee80211_clear_tx_pending may not be called in a context where
+ * it is possible that it packets could come in again.
+ */
 void ieee80211_clear_tx_pending(struct ieee80211_local *local)
 {
 	int i, j;
 	struct ieee80211_tx_stored_packet *store;
 
-	for (i = 0; i < local->hw.queues; i++) {
-		if (!__ieee80211_queue_pending(local, i))
+	for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) {
+		if (!test_bit(i, local->queues_pending))
 			continue;
 		store = &local->pending_packet[i];
 		kfree_skb(store->skb);
 		for (j = 0; j < store->num_extra_frag; j++)
 			kfree_skb(store->extra_frag[j]);
 		kfree(store->extra_frag);
-		clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]);
+		clear_bit(i, local->queues_pending);
 	}
 }
 
+/*
+ * Transmit all pending packets. Called from tasklet, locks master device
+ * TX lock so that no new packets can come in.
+ */
 void ieee80211_tx_pending(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *)data;
 	struct net_device *dev = local->mdev;
 	struct ieee80211_tx_stored_packet *store;
 	struct ieee80211_tx_data tx;
-	int i, ret, reschedule = 0;
+	int i, ret;
 
 	netif_tx_lock_bh(dev);
-	for (i = 0; i < local->hw.queues; i++) {
-		if (__ieee80211_queue_stopped(local, i))
+	for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) {
+		/* Check that this queue is ok */
+		if (__netif_subqueue_stopped(local->mdev, i))
 			continue;
-		if (!__ieee80211_queue_pending(local, i)) {
-			reschedule = 1;
+
+		if (!test_bit(i, local->queues_pending)) {
+			ieee80211_wake_queue(&local->hw, i);
 			continue;
 		}
+
 		store = &local->pending_packet[i];
 		tx.extra_frag = store->extra_frag;
 		tx.num_extra_frag = store->num_extra_frag;
@@ -1708,19 +1709,11 @@ void ieee80211_tx_pending(unsigned long data)
 			if (ret == IEEE80211_TX_FRAG_AGAIN)
 				store->skb = NULL;
 		} else {
-			clear_bit(IEEE80211_LINK_STATE_PENDING,
-				  &local->state[i]);
-			reschedule = 1;
+			clear_bit(i, local->queues_pending);
+			ieee80211_wake_queue(&local->hw, i);
 		}
 	}
 	netif_tx_unlock_bh(dev);
-	if (reschedule) {
-		if (!ieee80211_qdisc_installed(dev)) {
-			if (!__ieee80211_queue_stopped(local, 0))
-				netif_wake_queue(dev);
-		} else
-			netif_schedule(dev);
-	}
 }
 
 /* functions for drivers to get certain frames */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d9109dee461..4f7180b287d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -331,17 +331,15 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF,
-			       &local->state[queue])) {
-		if (test_bit(IEEE80211_LINK_STATE_PENDING,
-			     &local->state[queue]))
-			tasklet_schedule(&local->tx_pending_tasklet);
-		else
-			if (!ieee80211_qdisc_installed(local->mdev)) {
-				if (queue == 0)
-					netif_wake_queue(local->mdev);
-			} else
-				__netif_schedule(local->mdev);
+	if (test_bit(queue, local->queues_pending)) {
+		tasklet_schedule(&local->tx_pending_tasklet);
+	} else {
+		if (ieee80211_is_multiqueue(local)) {
+			netif_wake_subqueue(local->mdev, queue);
+		} else {
+			WARN_ON(queue != 0);
+			netif_wake_queue(local->mdev);
+		}
 	}
 }
 EXPORT_SYMBOL(ieee80211_wake_queue);
@@ -350,9 +348,12 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (!ieee80211_qdisc_installed(local->mdev) && queue == 0)
+	if (ieee80211_is_multiqueue(local)) {
+		netif_stop_subqueue(local->mdev, queue);
+	} else {
+		WARN_ON(queue != 0);
 		netif_stop_queue(local->mdev);
-	set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]);
+	}
 }
 EXPORT_SYMBOL(ieee80211_stop_queue);
 
@@ -360,7 +361,7 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw)
 {
 	int i;
 
-	for (i = 0; i < hw->queues + hw->ampdu_queues; i++)
+	for (i = 0; i < ieee80211_num_queues(hw); i++)
 		ieee80211_stop_queue(hw, i);
 }
 EXPORT_SYMBOL(ieee80211_stop_queues);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 477690f4dca..14a9ff10a1e 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -158,7 +158,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	u8 tid;
 
 	if (info->flags & IEEE80211_TX_CTL_REQUEUE) {
-		queue = info->queue;
+		queue = skb_get_queue_mapping(skb);
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr->addr1);
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
@@ -219,7 +219,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			err = NET_XMIT_DROP;
 	} else {
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
-		info->queue = (unsigned int) queue;
+		skb_set_queue_mapping(skb, queue);
 		qdisc = q->queues[queue];
 		err = qdisc->enqueue(skb, qdisc);
 		if (err == NET_XMIT_SUCCESS) {
@@ -240,12 +240,11 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct Qdisc *qdisc;
 	int err;
 
 	/* we recorded which queue to use earlier! */
-	qdisc = q->queues[info->queue];
+	qdisc = q->queues[skb_get_queue_mapping(skb)];
 
 	if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) {
 		qd->q.qlen++;
@@ -269,11 +268,8 @@ static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
 	/* check all the h/w queues in numeric/priority order */
 	for (queue = 0; queue < QD_NUM(hw); queue++) {
 		/* see if there is room in this hardware queue */
-		if ((test_bit(IEEE80211_LINK_STATE_XOFF,
-				&local->state[queue])) ||
-		    (test_bit(IEEE80211_LINK_STATE_PENDING,
-				&local->state[queue])) ||
-			 (!test_bit(queue, q->qdisc_pool)))
+		if (__netif_subqueue_stopped(local->mdev, queue) ||
+		    !test_bit(queue, q->qdisc_pool))
 			continue;
 
 		/* there is space - try and get a frame */
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index fcc6b05508c..bbdb5334481 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -31,7 +31,7 @@ static inline int WLAN_FC_IS_QOS_DATA(u16 fc)
 	return (fc & 0x8C) == 0x88;
 }
 
-#ifdef CONFIG_NET_SCHED
+#ifdef CONFIG_MAC80211_QOS
 void ieee80211_install_qdisc(struct net_device *dev);
 int ieee80211_qdisc_installed(struct net_device *dev);
 int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
-- 
cgit v1.2.3-70-g09d2


From 9e72ebd686a7f39facdfff639386055f1ad7dc88 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 21 May 2008 17:33:42 +0200
Subject: mac80211: remove channel use statistics

The useless channel use statistics are quite a lot of code, currently
use integer divisions in the packet fast path, are rather inaccurate
since they do not account for retries and finally nobody even cares.
Hence, remove them completely.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c |  9 ------
 net/mac80211/debugfs_sta.c    |  1 -
 net/mac80211/ieee80211_i.h    | 32 -------------------
 net/mac80211/rx.c             | 62 ++----------------------------------
 net/mac80211/sta_info.h       |  4 ---
 net/mac80211/tx.c             | 73 ++++++-------------------------------------
 6 files changed, 12 insertions(+), 169 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 3ae5493d728..b2089b2da48 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -155,7 +155,6 @@ static const struct file_operations name##_ops = {			\
 		__IEEE80211_IF_WFILE(name)
 
 /* common attributes */
-IEEE80211_IF_FILE(channel_use, channel_use, DEC);
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
 
 /* STA/IBSS attributes */
@@ -248,7 +247,6 @@ IEEE80211_IF_WFILE(min_discovery_timeout,
 
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(channel_use, sta);
 	DEBUGFS_ADD(drop_unencrypted, sta);
 	DEBUGFS_ADD(state, sta);
 	DEBUGFS_ADD(bssid, sta);
@@ -269,7 +267,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(channel_use, ap);
 	DEBUGFS_ADD(drop_unencrypted, ap);
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_count, ap);
@@ -281,14 +278,12 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(channel_use, wds);
 	DEBUGFS_ADD(drop_unencrypted, wds);
 	DEBUGFS_ADD(peer, wds);
 }
 
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_ADD(channel_use, vlan);
 	DEBUGFS_ADD(drop_unencrypted, vlan);
 }
 
@@ -376,7 +371,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 
 static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_DEL(channel_use, sta);
 	DEBUGFS_DEL(drop_unencrypted, sta);
 	DEBUGFS_DEL(state, sta);
 	DEBUGFS_DEL(bssid, sta);
@@ -397,7 +391,6 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 
 static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_DEL(channel_use, ap);
 	DEBUGFS_DEL(drop_unencrypted, ap);
 	DEBUGFS_DEL(num_sta_ps, ap);
 	DEBUGFS_DEL(dtim_count, ap);
@@ -409,14 +402,12 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 
 static void del_wds_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_DEL(channel_use, wds);
 	DEBUGFS_DEL(drop_unencrypted, wds);
 	DEBUGFS_DEL(peer, wds);
 }
 
 static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
-	DEBUGFS_DEL(channel_use, vlan);
 	DEBUGFS_DEL(drop_unencrypted, vlan);
 }
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2cc0284c9d..79a062782d5 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -66,7 +66,6 @@ STA_FILE(tx_retry_count, tx_retry_count, LU);
 STA_FILE(last_signal, last_signal, D);
 STA_FILE(last_qual, last_qual, D);
 STA_FILE(last_noise, last_noise, D);
-STA_FILE(channel_use, channel_use, D);
 STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7d614cdcefc..3f8601cafff 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -201,7 +201,6 @@ struct ieee80211_rx_data {
 	unsigned int flags;
 	int sent_ps_buffered;
 	int queue;
-	int load;
 	u32 tkip_iv32;
 	u16 tkip_iv16;
 };
@@ -448,14 +447,11 @@ struct ieee80211_sub_if_data {
 		struct ieee80211_if_sta sta;
 		u32 mntr_flags;
 	} u;
-	int channel_use;
-	int channel_use_raw;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct dentry *debugfsdir;
 	union {
 		struct {
-			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
 			struct dentry *state;
 			struct dentry *bssid;
@@ -474,7 +470,6 @@ struct ieee80211_sub_if_data {
 			struct dentry *num_beacons_sta;
 		} sta;
 		struct {
-			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
 			struct dentry *num_sta_ps;
 			struct dentry *dtim_count;
@@ -484,12 +479,10 @@ struct ieee80211_sub_if_data {
 			struct dentry *num_buffered_multicast;
 		} ap;
 		struct {
-			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
 			struct dentry *peer;
 		} wds;
 		struct {
-			struct dentry *channel_use;
 			struct dentry *drop_unencrypted;
 		} vlan;
 		struct {
@@ -661,9 +654,6 @@ struct ieee80211_local {
 	     assoc_led_name[32], radio_led_name[32];
 #endif
 
-	u32 channel_use;
-	u32 channel_use_raw;
-
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct work_struct sta_debugfs_add;
 #endif
@@ -861,28 +851,6 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 
 /* ieee80211_ioctl.c */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
-
-
-/* Least common multiple of the used rates (in 100 kbps). This is used to
- * calculate rate_inv values for each rate so that only integers are needed. */
-#define CHAN_UTIL_RATE_LCM 95040
-/* 1 usec is 1/8 * (95040/10) = 1188 */
-#define CHAN_UTIL_PER_USEC 1188
-/* Amount of bits to shift the result right to scale the total utilization
- * to values that will not wrap around 32-bit integers. */
-#define CHAN_UTIL_SHIFT 9
-/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1):
- * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the
- * raw value with about 23 should give utilization in 10th of a percentage
- * (1/1000). However, utilization is only estimated and not all intervals
- * between frames etc. are calculated. 18 seems to give numbers that are closer
- * to the real maximum. */
-#define CHAN_UTIL_PER_10MS 18
-#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC)
-#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC)
-
-
-/* ieee80211_ioctl.c */
 int ieee80211_set_freq(struct ieee80211_local *local, int freq);
 /* ieee80211_sta.c */
 void ieee80211_sta_timer(unsigned long data);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index cf0de3b0fe2..9400a9766a7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -387,50 +387,8 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx)
 }
 
 
-static u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
-				   struct sk_buff *skb,
-				   struct ieee80211_rx_status *status,
-				   struct ieee80211_rate *rate)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u32 load = 0, hdrtime;
-
-	/* Estimate total channel use caused by this frame */
-
-	/* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values,
-	 * 1 usec = 1/8 * (1080 / 10) = 13.5 */
-
-	if (status->band == IEEE80211_BAND_5GHZ ||
-	    (status->band == IEEE80211_BAND_5GHZ &&
-	     rate->flags & IEEE80211_RATE_ERP_G))
-		hdrtime = CHAN_UTIL_HDR_SHORT;
-	else
-		hdrtime = CHAN_UTIL_HDR_LONG;
-
-	load = hdrtime;
-	if (!is_multicast_ether_addr(hdr->addr1))
-		load += hdrtime;
-
-	/* TODO: optimise again */
-	load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate;
-
-	/* Divide channel_use by 8 to avoid wrapping around the counter */
-	load >>= CHAN_UTIL_SHIFT;
-
-	return load;
-}
-
 /* rx handlers */
 
-static ieee80211_rx_result
-ieee80211_rx_h_if_stats(struct ieee80211_rx_data *rx)
-{
-	if (rx->sta)
-		rx->sta->channel_use_raw += rx->load;
-	rx->sdata->channel_use_raw += rx->load;
-	return RX_CONTINUE;
-}
-
 static ieee80211_rx_result
 ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 {
@@ -1780,7 +1738,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx)
 typedef ieee80211_rx_result (*ieee80211_rx_handler)(struct ieee80211_rx_data *);
 static ieee80211_rx_handler ieee80211_rx_handlers[] =
 {
-	ieee80211_rx_h_if_stats,
 	ieee80211_rx_h_passive_scan,
 	ieee80211_rx_h_check,
 	ieee80211_rx_h_decrypt,
@@ -1939,7 +1896,6 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					 struct sk_buff *skb,
 					 struct ieee80211_rx_status *status,
-					 u32 load,
 					 struct ieee80211_rate *rate)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -1958,7 +1914,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	rx.local = local;
 
 	rx.status = status;
-	rx.load = load;
 	rx.rate = rate;
 	rx.fc = le16_to_cpu(hdr->frame_control);
 	type = rx.fc & IEEE80211_FCTL_FTYPE;
@@ -2067,7 +2022,6 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	struct ieee80211_rx_status status;
 	u16 head_seq_num, buf_size;
 	int index;
-	u32 pkt_load;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rate *rate;
 
@@ -2102,12 +2056,9 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					sizeof(status));
 				sband = local->hw.wiphy->bands[status.band];
 				rate = &sband->bitrates[status.rate_idx];
-				pkt_load = ieee80211_rx_load_stats(local,
-						tid_agg_rx->reorder_buf[index],
-						&status, rate);
 				__ieee80211_rx_handle_packet(hw,
 					tid_agg_rx->reorder_buf[index],
-					&status, pkt_load, rate);
+					&status, rate);
 				tid_agg_rx->stored_mpdu_num--;
 				tid_agg_rx->reorder_buf[index] = NULL;
 			}
@@ -2149,11 +2100,8 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 			sizeof(status));
 		sband = local->hw.wiphy->bands[status.band];
 		rate = &sband->bitrates[status.rate_idx];
-		pkt_load = ieee80211_rx_load_stats(local,
-					tid_agg_rx->reorder_buf[index],
-					&status, rate);
 		__ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
-					     &status, pkt_load, rate);
+					     &status, rate);
 		tid_agg_rx->stored_mpdu_num--;
 		tid_agg_rx->reorder_buf[index] = NULL;
 		tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
@@ -2232,7 +2180,6 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		    struct ieee80211_rx_status *status)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	u32 pkt_load;
 	struct ieee80211_rate *rate = NULL;
 	struct ieee80211_supported_band *sband;
 
@@ -2272,11 +2219,8 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		return;
 	}
 
-	pkt_load = ieee80211_rx_load_stats(local, skb, status, rate);
-	local->channel_use_raw += pkt_load;
-
 	if (!ieee80211_rx_reorder_ampdu(local, skb))
-		__ieee80211_rx_handle_packet(hw, skb, status, pkt_load, rate);
+		__ieee80211_rx_handle_packet(hw, skb, status, rate);
 
 	rcu_read_unlock();
 }
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index f592290e42b..95753f860ac 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -245,10 +245,6 @@ struct sta_info {
 	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
 #endif
 
-	/* Debug counters, no locking doesn't matter */
-	int channel_use;
-	int channel_use_raw;
-
 	/*
 	 * Aggregation information, locked with lock.
 	 */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9273651d3d7..baa1be0671e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -761,73 +761,18 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result
 ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_local *local = tx->local;
-	struct sk_buff *skb = tx->skb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u32 load = 0, hdrtime;
-	struct ieee80211_rate *rate;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	sband = tx->local->hw.wiphy->bands[tx->channel->band];
-	rate = &sband->bitrates[tx->rate_idx];
-
-	/* TODO: this could be part of tx_status handling, so that the number
-	 * of retries would be known; TX rate should in that case be stored
-	 * somewhere with the packet */
-
-	/* Estimate total channel use caused by this frame */
-
-	/* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values,
-	 * 1 usec = 1/8 * (1080 / 10) = 13.5 */
-
-	if (tx->channel->band == IEEE80211_BAND_5GHZ ||
-	    (tx->channel->band == IEEE80211_BAND_2GHZ &&
-	     rate->flags & IEEE80211_RATE_ERP_G))
-		hdrtime = CHAN_UTIL_HDR_SHORT;
-	else
-		hdrtime = CHAN_UTIL_HDR_LONG;
-
-	load = hdrtime;
-	if (!is_multicast_ether_addr(hdr->addr1))
-		load += hdrtime;
-
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
-		load += 2 * hdrtime;
-	else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
-		load += hdrtime;
+	int i;
 
-	/* TODO: optimise again */
-	load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate;
+	if (!tx->sta)
+		return TX_CONTINUE;
 
+	tx->sta->tx_packets++;
+	tx->sta->tx_fragments++;
+	tx->sta->tx_bytes += tx->skb->len;
 	if (tx->extra_frag) {
-		int i;
-		for (i = 0; i < tx->num_extra_frag; i++) {
-			load += 2 * hdrtime;
-			load += tx->extra_frag[i]->len *
-				rate->bitrate;
-		}
-	}
-
-	/* Divide channel_use by 8 to avoid wrapping around the counter */
-	load >>= CHAN_UTIL_SHIFT;
-	local->channel_use_raw += load;
-	if (tx->sta)
-		tx->sta->channel_use_raw += load;
-	tx->sdata->channel_use_raw += load;
-
-	if (tx->sta) {
-		tx->sta->tx_packets++;
-		tx->sta->tx_fragments++;
-		tx->sta->tx_bytes += tx->skb->len;
-		if (tx->extra_frag) {
-			int i;
-			tx->sta->tx_fragments += tx->num_extra_frag;
-			for (i = 0; i < tx->num_extra_frag; i++) {
-				tx->sta->tx_bytes +=
-					tx->extra_frag[i]->len;
-			}
-		}
+		tx->sta->tx_fragments += tx->num_extra_frag;
+		for (i = 0; i < tx->num_extra_frag; i++)
+			tx->sta->tx_bytes += tx->extra_frag[i]->len;
 	}
 
 	return TX_CONTINUE;
-- 
cgit v1.2.3-70-g09d2


From 2ba4cc319ab26c56205d4f23724c4748a553c845 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 13 May 2008 15:37:05 +0200
Subject: rcu: fix nf_conntrack_helper.c build bug

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 net/netfilter/nf_conntrack_helper.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7d1b1170374..8e0b4c8f62a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/rculist.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
-- 
cgit v1.2.3-70-g09d2


From 0e12f848b337fc034ceb3c0d03d75f8de1b8cc96 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:13 +0200
Subject: net: use performance variant for_each_cpu_mask_nr

Change references from for_each_cpu_mask to for_each_cpu_mask_nr
where appropriate

Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 net/core/dev.c  | 4 ++--
 net/iucv/iucv.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 58296307787..ee61b987f4d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2239,7 +2239,7 @@ out:
 	 */
 	if (!cpus_empty(net_dma.channel_mask)) {
 		int chan_idx;
-		for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
+		for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
 			struct dma_chan *chan = net_dma.channels[chan_idx];
 			if (chan)
 				dma_async_memcpy_issue_pending(chan);
@@ -4300,7 +4300,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
 	i = 0;
 	cpu = first_cpu(cpu_online_map);
 
-	for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
+	for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
 		chan = net_dma->channels[chan_idx];
 
 		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 91897076213..8de51107059 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -497,7 +497,7 @@ static void iucv_setmask_up(void)
 	/* Disable all cpu but the first in cpu_irq_cpumask. */
 	cpumask = iucv_irq_cpumask;
 	cpu_clear(first_cpu(iucv_irq_cpumask), cpumask);
-	for_each_cpu_mask(cpu, cpumask)
+	for_each_cpu_mask_nr(cpu, cpumask)
 		smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3f9b48a7584851997702cdc3f58e7811b5546397 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:13 +0200
Subject: net: Pass reference to cpumask variable in net/sunrpc/svc.c

  * Pass reference to cpumask variable instead of using stack.

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 net/sunrpc/svc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01c7e311b90..d43cf8ddff6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -603,7 +603,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
 	error = kernel_thread((int (*)(void *)) func, rqstp, 0);
 
 	if (have_oldmask)
-		set_cpus_allowed(current, oldmask);
+		set_cpus_allowed_ptr(current, &oldmask);
 
 	if (error < 0)
 		goto out_thread;
-- 
cgit v1.2.3-70-g09d2


From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001
From: "Carlos R. Mafra" <crmafra2@gmail.com>
Date: Thu, 15 May 2008 11:15:37 -0300
Subject: Remove argument from open_softirq which is always NULL

As git-grep shows, open_softirq() is always called with the last argument
being NULL

block/blk-core.c:       open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
kernel/hrtimer.c:       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
kernel/rcuclassic.c:    open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
kernel/rcupreempt.c:    open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
kernel/softirq.c:       open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
kernel/softirq.c:       open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);

This observation has already been made by Matthew Wilcox in June 2002
(http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html)

"I notice that none of the current softirq routines use the data element
passed to them."

and the situation hasn't changed since them. So it appears we can safely
remove that extra argument to save 128 (54) bytes of kernel data (text).

Signed-off-by: Carlos R. Mafra <crmafra@ift.unesp.br>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 block/blk-core.c          | 2 +-
 include/linux/interrupt.h | 3 +--
 kernel/hrtimer.c          | 2 +-
 kernel/rcuclassic.c       | 2 +-
 kernel/rcupreempt.c       | 2 +-
 kernel/sched.c            | 2 +-
 kernel/softirq.c          | 7 +++----
 kernel/timer.c            | 2 +-
 net/core/dev.c            | 4 ++--
 9 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6a9cc0d22a6..75fdc65136e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2048,7 +2048,7 @@ int __init blk_dev_init(void)
 	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 
-	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
+	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
 	register_hotcpu_notifier(&blk_cpu_notifier);
 
 	return 0;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f1fc7470d26..a86186dd047 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -285,12 +285,11 @@ enum
 struct softirq_action
 {
 	void	(*action)(struct softirq_action *);
-	void	*data;
 };
 
 asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
-extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
+extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
 extern void raise_softirq_irqoff(unsigned int nr);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 421be5fe5cc..861b4088092 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1669,7 +1669,7 @@ void __init hrtimers_init(void)
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
 #ifdef CONFIG_HIGH_RES_TIMERS
-	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
+	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
 #endif
 }
 
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index f4ffbd0f306..f6e01f3ae9c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -529,7 +529,7 @@ static void __cpuinit rcu_online_cpu(int cpu)
 
 	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
 	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
 
 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index e1cdf196a51..9dd827db359 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1125,7 +1125,7 @@ void __init __rcu_init(void)
 	for_each_online_cpu(cpu)
 		rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,	(void *)(long) cpu);
 
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index cfa222a9153..56ea3a203a5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8154,7 +8154,7 @@ void __init sched_init(void)
 #endif
 
 #ifdef CONFIG_SMP
-	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
+	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 #endif
 
 #ifdef CONFIG_RT_MUTEXES
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 36e06174004..059256874e9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -347,9 +347,8 @@ void raise_softirq(unsigned int nr)
 	local_irq_restore(flags);
 }
 
-void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+void open_softirq(int nr, void (*action)(struct softirq_action *))
 {
-	softirq_vec[nr].data = data;
 	softirq_vec[nr].action = action;
 }
 
@@ -503,8 +502,8 @@ void __init softirq_init(void)
 			&per_cpu(tasklet_hi_vec, cpu).head;
 	}
 
-	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
-	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 }
 
 static int ksoftirqd(void * __bind_cpu)
diff --git a/kernel/timer.c b/kernel/timer.c
index ceacc662657..b4da888497f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1502,7 +1502,7 @@ void __init init_timers(void)
 
 	BUG_ON(err == NOTIFY_BAD);
 	register_cpu_notifier(&timers_nb);
-	open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
+	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
 
 /**
diff --git a/net/core/dev.c b/net/core/dev.c
index 58296307787..cf0e16731dc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4563,8 +4563,8 @@ static int __init net_dev_init(void)
 
 	dev_boot_phase = 0;
 
-	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
-	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
-- 
cgit v1.2.3-70-g09d2


From 866988edacebb6f1982266b51fc344c969f481ec Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Sun, 25 May 2008 23:41:40 -0700
Subject: wanrouter: Push down BKL

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wanrouter/wanmain.c | 6 ++++--
 net/wanrouter/wanproc.c | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 9ab31a3ce3a..b210a88d096 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -350,9 +350,9 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
  *	o execute requested action or pass command to the device driver
  */
 
-int wanrouter_ioctl(struct inode *inode, struct file *file,
-		unsigned int cmd, unsigned long arg)
+long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int err = 0;
 	struct proc_dir_entry *dent;
 	struct wan_device *wandev;
@@ -372,6 +372,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file,
 	if (wandev->magic != ROUTER_MAGIC)
 		return -EINVAL;
 
+	lock_kernel();
 	switch (cmd) {
 	case ROUTER_SETUP:
 		err = wanrouter_device_setup(wandev, data);
@@ -403,6 +404,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file,
 			err = wandev->ioctl(wandev, cmd, arg);
 		else err = -EINVAL;
 	}
+	unlock_kernel();
 	return err;
 }
 
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 5bebe40bf4e..267f7ff4982 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -278,7 +278,7 @@ static const struct file_operations wandev_fops = {
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
 	.release = single_release,
-	.ioctl	 = wanrouter_ioctl,
+	.unlocked_ioctl  = wanrouter_ioctl,
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 54064600981ab0fe977b0e760732c30f4472d693 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Sun, 25 May 2008 23:43:11 -0700
Subject: irda: Push BKL down into irda ioctl handlers

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 54 +++++++++++++++++++++++++---------------------
 net/irda/irnet/irnet_ppp.h |  7 +++---
 2 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index e0eab5927c4..f6e54fa97f4 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -628,8 +628,8 @@ dev_irnet_poll(struct file *	file,
  * This is the way pppd configure us and control us while the PPP
  * instance is active.
  */
-static int
-dev_irnet_ioctl(struct inode *	inode,
+static long
+dev_irnet_ioctl(
 		struct file *	file,
 		unsigned int	cmd,
 		unsigned long	arg)
@@ -660,6 +660,7 @@ dev_irnet_ioctl(struct inode *	inode,
 	{
 	  DEBUG(FS_INFO, "Entering PPP discipline.\n");
 	  /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/
+	  lock_kernel();
 	  err = ppp_register_channel(&ap->chan);
 	  if(err == 0)
 	    {
@@ -672,12 +673,14 @@ dev_irnet_ioctl(struct inode *	inode,
 	    }
 	  else
 	    DERROR(FS_ERROR, "Can't setup PPP channel...\n");
+          unlock_kernel();
 	}
       else
 	{
 	  /* In theory, should be N_TTY */
 	  DEBUG(FS_INFO, "Exiting PPP discipline.\n");
 	  /* Disconnect from the generic PPP layer */
+	  lock_kernel();
 	  if(ap->ppp_open)
 	    {
 	      ap->ppp_open = 0;
@@ -686,24 +689,20 @@ dev_irnet_ioctl(struct inode *	inode,
 	  else
 	    DERROR(FS_ERROR, "Channel not registered !\n");
 	  err = 0;
+	  unlock_kernel();
 	}
       break;
 
       /* Query PPP channel and unit number */
     case PPPIOCGCHAN:
-      if(!ap->ppp_open)
-	break;
-      if(put_user(ppp_channel_index(&ap->chan), (int __user *)argp))
-	break;
-      DEBUG(FS_INFO, "Query channel.\n");
-      err = 0;
+      if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
+						(int __user *)argp))
+	err = 0;
       break;
     case PPPIOCGUNIT:
-      if(!ap->ppp_open)
-	break;
-      if(put_user(ppp_unit_number(&ap->chan), (int __user *)argp))
-	break;
-      DEBUG(FS_INFO, "Query unit number.\n");
+      lock_kernel();
+      if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
+						(int __user *)argp))
       err = 0;
       break;
 
@@ -723,34 +722,39 @@ dev_irnet_ioctl(struct inode *	inode,
       DEBUG(FS_INFO, "Standard PPP ioctl.\n");
       if(!capable(CAP_NET_ADMIN))
 	err = -EPERM;
-      else
+      else {
+	lock_kernel();
 	err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
+	unlock_kernel();
+      }
       break;
 
       /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */
       /* Get termios */
     case TCGETS:
       DEBUG(FS_INFO, "Get termios.\n");
+      lock_kernel();
 #ifndef TCGETS2
-      if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
-	break;
+      if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
+	err = 0;
 #else
       if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
-	break;
+	err = 0;
 #endif
-      err = 0;
+      unlock_kernel();
       break;
       /* Set termios */
     case TCSETSF:
       DEBUG(FS_INFO, "Set termios.\n");
+      lock_kernel();
 #ifndef TCGETS2
-      if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
-	break;
+      if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
+	err = 0;
 #else
-      if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
-	break;
+      if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
+	err = 0;
 #endif
-      err = 0;
+      unlock_kernel();
       break;
 
       /* Set DTR/RTS */
@@ -773,7 +777,9 @@ dev_irnet_ioctl(struct inode *	inode,
        * We should also worry that we don't accept junk here and that
        * we get rid of our own buffers */
 #ifdef FLUSH_TO_PPP
+      lock_kernel();
       ppp_output_wakeup(&ap->chan);
+      unlock_kernel();
 #endif /* FLUSH_TO_PPP */
       err = 0;
       break;
@@ -788,7 +794,7 @@ dev_irnet_ioctl(struct inode *	inode,
 
     default:
       DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd);
-      err = -ENOIOCTLCMD;
+      err = -ENOTTY;
     }
 
   DEXIT(FS_TRACE, " - err = 0x%X\n", err);
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index d2beb7df8f7..d9f8bd4ebd0 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -76,9 +76,8 @@ static ssize_t
 static unsigned int
 	dev_irnet_poll(struct file *,
 		       poll_table *);
-static int
-	dev_irnet_ioctl(struct inode *,
-			struct file *,
+static long
+	dev_irnet_ioctl(struct file *,
 			unsigned int,
 			unsigned long);
 /* ------------------------ PPP INTERFACE ------------------------ */
@@ -102,7 +101,7 @@ static struct file_operations irnet_device_fops =
 	.read		= dev_irnet_read,
 	.write		= dev_irnet_write,
 	.poll		= dev_irnet_poll,
-	.ioctl		= dev_irnet_ioctl,
+	.unlocked_ioctl	= dev_irnet_ioctl,
 	.open		= dev_irnet_open,
 	.release	= dev_irnet_close
   /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
-- 
cgit v1.2.3-70-g09d2


From 23c0752a25d73ccc4547700e8a57d5ae2f2edf56 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 29 May 2008 10:38:53 +0200
Subject: mac80211: clean up skb reallocation code

This cleans up the skb reallocation code to avoid problems with
skb->truesize, not resize an skb twice for a single output path
because we didn't expand it enough during the first copy and also
removes the code to further expand it during crypto operations
which will no longer be necessary.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c  | 97 ++++++++++++++++++++++++++++++++++++++----------------
 net/mac80211/wep.c | 10 ++----
 net/mac80211/wpa.c | 58 ++++++++++++++------------------
 3 files changed, 95 insertions(+), 70 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index baa1be0671e..dac44cbd036 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1215,6 +1215,45 @@ retry:
 
 /* device xmit handlers */
 
+static int ieee80211_skb_resize(struct ieee80211_local *local,
+				struct sk_buff *skb,
+				int head_need, bool may_encrypt)
+{
+	int tail_need = 0;
+
+	/*
+	 * This could be optimised, devices that do full hardware
+	 * crypto (including TKIP MMIC) need no tailroom... But we
+	 * have no drivers for such devices currently.
+	 */
+	if (may_encrypt) {
+		tail_need = IEEE80211_ENCRYPT_TAILROOM;
+		tail_need -= skb_tailroom(skb);
+		tail_need = max_t(int, tail_need, 0);
+	}
+
+	if (head_need || tail_need) {
+		/* Sorry. Can't account for this any more */
+		skb_orphan(skb);
+	}
+
+	if (skb_header_cloned(skb))
+		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
+	else
+		I802_DEBUG_INC(local->tx_expand_skb_head);
+
+	if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
+		printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n",
+		       wiphy_name(local->hw.wiphy));
+		return -ENOMEM;
+	}
+
+	/* update truesize too */
+	skb->truesize += head_need + tail_need;
+
+	return 0;
+}
+
 int ieee80211_master_start_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
@@ -1222,6 +1261,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	struct net_device *odev = NULL;
 	struct ieee80211_sub_if_data *osdata;
 	int headroom;
+	bool may_encrypt;
 	int ret;
 
 	if (info->control.ifindex)
@@ -1241,13 +1281,18 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
-	headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM;
-	if (skb_headroom(skb) < headroom) {
-		if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
-			dev_kfree_skb(skb);
-			dev_put(odev);
-			return 0;
-		}
+	may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT);
+
+	headroom = osdata->local->tx_headroom;
+	if (may_encrypt)
+		headroom += IEEE80211_ENCRYPT_HEADROOM;
+	headroom -= skb_headroom(skb);
+	headroom = max_t(int, 0, headroom);
+
+	if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) {
+		dev_kfree_skb(skb);
+		dev_put(odev);
+		return 0;
 	}
 
 	info->control.vif = &osdata->vif;
@@ -1509,32 +1554,26 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	 * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and
 	 * alloc_skb() (net/core/skbuff.c)
 	 */
-	head_need = hdrlen + encaps_len + meshhdrlen + local->tx_headroom;
-	head_need -= skb_headroom(skb);
+	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
-	/* We are going to modify skb data, so make a copy of it if happens to
-	 * be cloned. This could happen, e.g., with Linux bridge code passing
-	 * us broadcast frames. */
+	/*
+	 * So we need to modify the skb header and hence need a copy of
+	 * that. The head_need variable above doesn't, so far, include
+	 * the needed header space that we don't need right away. If we
+	 * can, then we don't reallocate right now but only after the
+	 * frame arrives at the master device (if it does...)
+	 *
+	 * If we cannot, however, then we will reallocate to include all
+	 * the ever needed space. Also, if we need to reallocate it anyway,
+	 * make it big enough for everything we may ever need.
+	 */
 
 	if (head_need > 0 || skb_header_cloned(skb)) {
-#if 0
-		printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes "
-		       "of headroom\n", dev->name, head_need);
-#endif
-
-		if (skb_header_cloned(skb))
-			I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
-		else
-			I802_DEBUG_INC(local->tx_expand_skb_head);
-		/* Since we have to reallocate the buffer, make sure that there
-		 * is enough room for possible WEP IV/ICV and TKIP (8 bytes
-		 * before payload and 12 after). */
-		if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8),
-				     12, GFP_ATOMIC)) {
-			printk(KERN_DEBUG "%s: failed to reallocate TX buffer"
-			       "\n", dev->name);
+		head_need += IEEE80211_ENCRYPT_HEADROOM;
+		head_need += local->tx_headroom;
+		head_need = max_t(int, 0, head_need);
+		if (ieee80211_skb_resize(local, skb, head_need, true))
 			goto fail;
-		}
 	}
 
 	if (encaps_data) {
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 1e7f03dd8f6..c9fd1291b19 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -93,13 +93,9 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
 	fc |= IEEE80211_FCTL_PROTECTED;
 	hdr->frame_control = cpu_to_le16(fc);
 
-	if ((skb_headroom(skb) < WEP_IV_LEN ||
-	     skb_tailroom(skb) < WEP_ICV_LEN)) {
-		I802_DEBUG_INC(local->tx_expand_skb_head);
-		if (unlikely(pskb_expand_head(skb, WEP_IV_LEN, WEP_ICV_LEN,
-					      GFP_ATOMIC)))
-			return NULL;
-	}
+	if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN ||
+		    skb_headroom(skb) < WEP_IV_LEN))
+		return NULL;
 
 	hdrlen = ieee80211_get_hdrlen(fc);
 	newhdr = skb_push(skb, WEP_IV_LEN);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d6635f6e561..9f6fd20374e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -79,6 +79,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	struct sk_buff *skb = tx->skb;
 	int authenticator;
 	int wpa_test = 0;
+	int tail;
 
 	fc = tx->fc;
 
@@ -98,16 +99,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 	}
 
-	if (skb_tailroom(skb) < MICHAEL_MIC_LEN) {
-		I802_DEBUG_INC(tx->local->tx_expand_skb_head);
-		if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN,
-					      MICHAEL_MIC_LEN + TKIP_ICV_LEN,
-					      GFP_ATOMIC))) {
-			printk(KERN_DEBUG "%s: failed to allocate more memory "
-			       "for Michael MIC\n", tx->dev->name);
-			return TX_DROP;
-		}
-	}
+	tail = MICHAEL_MIC_LEN;
+	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+		tail += TKIP_ICV_LEN;
+
+	if (WARN_ON(skb_tailroom(skb) < tail ||
+		    skb_headroom(skb) < TKIP_IV_LEN))
+		return TX_DROP;
 
 #if 0
 	authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */
@@ -188,7 +186,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	int hdrlen, len, tailneed;
+	int hdrlen, len, tail;
 	u16 fc;
 	u8 *pos;
 
@@ -199,7 +197,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
 		/* hwaccel - with no need for preallocated room for IV/ICV */
 		info->control.hw_key = &tx->key->conf;
-		return TX_CONTINUE;
+		return 0;
 	}
 
 	fc = le16_to_cpu(hdr->frame_control);
@@ -207,17 +205,13 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	len = skb->len - hdrlen;
 
 	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
-		tailneed = 0;
+		tail = 0;
 	else
-		tailneed = TKIP_ICV_LEN;
-
-	if ((skb_headroom(skb) < TKIP_IV_LEN ||
-	     skb_tailroom(skb) < tailneed)) {
-		I802_DEBUG_INC(tx->local->tx_expand_skb_head);
-		if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, tailneed,
-					      GFP_ATOMIC)))
-			return -1;
-	}
+		tail = TKIP_ICV_LEN;
+
+	if (WARN_ON(skb_tailroom(skb) < tail ||
+		    skb_headroom(skb) < TKIP_IV_LEN))
+		return -1;
 
 	pos = skb_push(skb, TKIP_IV_LEN);
 	memmove(pos, pos + TKIP_IV_LEN, hdrlen);
@@ -432,7 +426,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	int hdrlen, len, tailneed;
+	int hdrlen, len, tail;
 	u16 fc;
 	u8 *pos, *pn, *b_0, *aad, *scratch;
 	int i;
@@ -445,7 +439,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		/* hwaccel - with no need for preallocated room for CCMP "
 		 * header or MIC fields */
 		info->control.hw_key = &tx->key->conf;
-		return TX_CONTINUE;
+		return 0;
 	}
 
 	scratch = key->u.ccmp.tx_crypto_buf;
@@ -457,17 +451,13 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	len = skb->len - hdrlen;
 
 	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
-		tailneed = 0;
+		tail = 0;
 	else
-		tailneed = CCMP_MIC_LEN;
-
-	if ((skb_headroom(skb) < CCMP_HDR_LEN ||
-	     skb_tailroom(skb) < tailneed)) {
-		I802_DEBUG_INC(tx->local->tx_expand_skb_head);
-		if (unlikely(pskb_expand_head(skb, CCMP_HDR_LEN, tailneed,
-					      GFP_ATOMIC)))
-			return -1;
-	}
+		tail = CCMP_MIC_LEN;
+
+	if (WARN_ON(skb_tailroom(skb) < tail ||
+		    skb_headroom(skb) < CCMP_HDR_LEN))
+		return -1;
 
 	pos = skb_push(skb, CCMP_HDR_LEN);
 	memmove(pos, pos + CCMP_HDR_LEN, hdrlen);
-- 
cgit v1.2.3-70-g09d2


From 747cf5e924a469a15a454b88a813236460b30975 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 27 May 2008 17:50:51 +0300
Subject: mac80211: fix ieee80211_get_buffered_bc

fix bss not initialized in ieee80211_get_buffered_bc
and unbalanced locking

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index dac44cbd036..16af30811f9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1947,7 +1947,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 			  struct ieee80211_vif *vif)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct sta_info *sta;
 	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
@@ -1960,7 +1960,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
-
+	bss = &sdata->u.ap;
 
 	if (!bss)
 		return NULL;
@@ -1968,19 +1968,16 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	rcu_read_lock();
 	beacon = rcu_dereference(bss->beacon);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon ||
-	    !beacon->head) {
-		rcu_read_unlock();
-		return NULL;
-	}
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || !beacon->head)
+		goto out;
 
 	if (bss->dtim_count != 0)
-		return NULL; /* send buffered bc/mc only after DTIM beacon */
+		goto out; /* send buffered bc/mc only after DTIM beacon */
 
 	while (1) {
 		skb = skb_dequeue(&bss->ps_bc_buf);
 		if (!skb)
-			return NULL;
+			goto out;
 		local->total_ps_buffered--;
 
 		if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) {
@@ -2023,6 +2020,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 		skb = NULL;
 	}
 
+out:
 	rcu_read_unlock();
 
 	return skb;
-- 
cgit v1.2.3-70-g09d2


From b83f4e15e65d94f6f56924b0b06a77a7ca2b4d8a Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 27 May 2008 17:50:52 +0300
Subject: mac80211: fix deadlock in sta->lock

This patch fixes a deadlock of sta->lock use, occurring while changing
tx aggregation states, as dev_queue_xmit end up in new function
test_and_clear_sta_flags that uses that lock thus leading to deadlock

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 97d4a537ca2..f79f6b9938a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -589,8 +589,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	sta = sta_info_get(local, ra);
 	if (!sta) {
 		printk(KERN_DEBUG "Could not find the station\n");
-		rcu_read_unlock();
-		return -ENOENT;
+		ret = -ENOENT;
+		goto exit;
 	}
 
 	spin_lock_bh(&sta->lock);
@@ -598,7 +598,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	/* we have tried too many times, receiver does not want A-MPDU */
 	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
 		ret = -EBUSY;
-		goto start_ba_exit;
+		goto err_unlock_sta;
 	}
 
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
@@ -609,7 +609,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 				 "idle on tid %u\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 		ret = -EAGAIN;
-		goto start_ba_exit;
+		goto err_unlock_sta;
 	}
 
 	/* prepare A-MPDU MLME for Tx aggregation */
@@ -620,7 +620,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 			printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
 					tid);
 		ret = -ENOMEM;
-		goto start_ba_exit;
+		goto err_unlock_sta;
 	}
 	/* Tx timer */
 	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
@@ -643,7 +643,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 		printk(KERN_DEBUG "BA request denied - queue unavailable for"
 					" tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto start_ba_err;
+		goto err_unlock_queue;
 	}
 	sdata = sta->sdata;
 
@@ -665,12 +665,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 					" tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 		*state = HT_AGG_STATE_IDLE;
-		goto start_ba_err;
+		goto err_unlock_queue;
 	}
 
 	/* Will put all the packets in the new SW queue */
 	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
 	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
 	sta->ampdu_mlme.dialog_token_allocator++;
@@ -678,25 +679,26 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 			sta->ampdu_mlme.dialog_token_allocator;
 	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
 
+
 	ieee80211_send_addba_request(sta->sdata->dev, ra, tid,
 			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
 			 sta->ampdu_mlme.tid_tx[tid]->ssn,
 			 0x40, 5000);
-
 	/* activate the timer for the recipient's addBA response */
 	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
 				jiffies + ADDBA_RESP_INTERVAL;
 	add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
 	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
-	goto start_ba_exit;
+	goto exit;
 
-start_ba_err:
+err_unlock_queue:
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
 	spin_unlock_bh(&local->mdev->queue_lock);
 	ret = -EBUSY;
-start_ba_exit:
+err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
+exit:
 	rcu_read_unlock();
 	return ret;
 }
@@ -835,10 +837,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	}
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
-	spin_lock_bh(&sta->lock);
+	/* NOTE: no need to use sta->lock in this state check, as
+	 * ieee80211_stop_tx_ba_session will let only
+	 * one stop call to pass through per sta/tid */
 	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
-		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
 	}
@@ -861,6 +864,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	 * ieee80211_wake_queue is not used here as this queue is not
 	 * necessarily stopped */
 	netif_schedule(local->mdev);
+	spin_lock_bh(&sta->lock);
 	*state = HT_AGG_STATE_IDLE;
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
-- 
cgit v1.2.3-70-g09d2


From e623157b8d778a63736b0f41c04acc57c4f61ae0 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 27 May 2008 20:00:11 +0300
Subject: mac80211: sends HT IE to user level through wext

This patch adds HT IE in the scan list that is returned to user level
through wext. This is useful to let wpa_supplicant if a bss supports 11n or
not: WEP and TKIP are not supported in 11n.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9a264379d7b..6faa7006681 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4128,6 +4128,14 @@ ieee80211_sta_scan_result(struct net_device *dev,
 						  bss->rsn_ie);
 	}
 
+	if (bss && bss->ht_ie) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = bss->ht_ie_len;
+		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
+						  bss->ht_ie);
+	}
+
 	if (bss && bss->supp_rates_len > 0) {
 		/* display all supported rates in readable format */
 		char *p = current_ev + IW_EV_LCP_LEN;
-- 
cgit v1.2.3-70-g09d2


From 9306102ea5696a3815f8d24ac0c0fbd1e19be7d3 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 29 May 2008 16:35:23 +0800
Subject: mac80211: allow disable FAT in specific configurations

This patch allows to disable FAT channel in specific configurations.

For example the configuration (8, +1), (primary channel 8, extension
channel 12) isn't permitted in U.S., but (8, -1), (primary channel 8,
extension channel 4) is. When FAT channel configuration is not
permitted, FAT channel should be reported as not supported in the
capabilities of the HT IE in association request. And sssociation is
performed on 20Mhz channel.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  2 ++
 include/net/wireless.h     |  6 ++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3c2ac0c37aa..9300f37cd7e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -320,6 +320,8 @@ struct ieee80211_ht_addt_info {
 #define IEEE80211_HT_CAP_MCS_TX_UEQM		0x10
 /* 802.11n HT IE masks */
 #define IEEE80211_HT_IE_CHA_SEC_OFFSET		0x03
+#define IEEE80211_HT_IE_CHA_SEC_ABOVE 		0x01
+#define IEEE80211_HT_IE_CHA_SEC_BELOW 		0x03
 #define IEEE80211_HT_IE_CHA_WIDTH		0x04
 #define IEEE80211_HT_IE_HT_PROTECTION		0x0003
 #define IEEE80211_HT_IE_NON_GF_STA_PRSNT	0x0004
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 667b4080d30..9324f8dd183 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -39,12 +39,18 @@ enum ieee80211_band {
  *	on this channel.
  * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel.
  * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
+ * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel
+ * 	is not permitted.
+ * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel
+ * 	is not permitted.
  */
 enum ieee80211_channel_flags {
 	IEEE80211_CHAN_DISABLED		= 1<<0,
 	IEEE80211_CHAN_PASSIVE_SCAN	= 1<<1,
 	IEEE80211_CHAN_NO_IBSS		= 1<<2,
 	IEEE80211_CHAN_RADAR		= 1<<3,
+	IEEE80211_CHAN_NO_FAT_ABOVE	= 1<<4,
+	IEEE80211_CHAN_NO_FAT_BELOW	= 1<<5,
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3f8601cafff..432011cd364 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,6 +92,8 @@ struct ieee80211_sta_bss {
 	size_t wmm_ie_len;
 	u8 *ht_ie;
 	size_t ht_ie_len;
+	u8 *ht_add_ie;
+	size_t ht_add_ie_len;
 #ifdef CONFIG_MAC80211_MESH
 	u8 *mesh_id;
 	size_t mesh_id_len;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6faa7006681..d30c11337b0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -808,8 +808,29 @@ static void ieee80211_send_assoc(struct net_device *dev,
 
 	/* wmm support is a must to HT */
 	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
-	    sband->ht_info.ht_supported) {
-		__le16 tmp = cpu_to_le16(sband->ht_info.cap);
+	    sband->ht_info.ht_supported && bss->ht_add_ie) {
+		struct ieee80211_ht_addt_info *ht_add_info =
+			(struct ieee80211_ht_addt_info *)bss->ht_add_ie;
+		u16 cap = sband->ht_info.cap;
+		__le16 tmp;
+		u32 flags = local->hw.conf.channel->flags;
+
+		switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_IE_CHA_SEC_ABOVE:
+			if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		case IEEE80211_HT_IE_CHA_SEC_BELOW:
+			if (flags & IEEE80211_CHAN_NO_FAT_BELOW) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		}
+
+		tmp = cpu_to_le16(cap);
 		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
@@ -2264,6 +2285,7 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 	kfree(bss->rsn_ie);
 	kfree(bss->wmm_ie);
 	kfree(bss->ht_ie);
+	kfree(bss->ht_add_ie);
 	kfree(bss_mesh_id(bss));
 	kfree(bss_mesh_cfg(bss));
 	kfree(bss);
@@ -2640,6 +2662,26 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		bss->ht_ie_len = 0;
 	}
 
+	if (elems.ht_info_elem &&
+	     (!bss->ht_add_ie ||
+	     bss->ht_add_ie_len != elems.ht_info_elem_len ||
+	     memcmp(bss->ht_add_ie, elems.ht_info_elem,
+			elems.ht_info_elem_len))) {
+		kfree(bss->ht_add_ie);
+		bss->ht_add_ie =
+			kmalloc(elems.ht_info_elem_len + 2, GFP_ATOMIC);
+		if (bss->ht_add_ie) {
+			memcpy(bss->ht_add_ie, elems.ht_info_elem - 2,
+				elems.ht_info_elem_len + 2);
+			bss->ht_add_ie_len = elems.ht_info_elem_len + 2;
+		} else
+			bss->ht_add_ie_len = 0;
+	} else if (!elems.ht_info_elem && bss->ht_add_ie) {
+		kfree(bss->ht_add_ie);
+		bss->ht_add_ie = NULL;
+		bss->ht_add_ie_len = 0;
+	}
+
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
-- 
cgit v1.2.3-70-g09d2


From 5854a32e6cb672d182ce378c69f0f7470137a062 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 2 Jun 2008 09:38:04 +0200
Subject: mac80211: fix rate control initialisation

In commit 2e92e6f2c50b4baf85cca968f0e6f1b5c0df7d39 ("mac80211: use rate
index in TX control") I forgot to initialise a few new variables to -1 which
means that the rate control algorithm is never triggered and 0 is used as
the only rate index, effectively fixing the transmit bitrate at the lowest
supported.

This patch adds the missing initialisation.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Bisected-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 16af30811f9..332ddcb1068 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -947,6 +947,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	tx->local = local;
 	tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	tx->channel = local->hw.conf.channel;
+	tx->rate_idx = -1;
+	tx->last_frag_rate_idx = -1;
 	/*
 	 * Set this flag (used below to indicate "automatic fragmentation"),
 	 * it will be cleared/left by radiotap as desired.
-- 
cgit v1.2.3-70-g09d2


From 2b2121417eff64125bdb7f322d3b533e06d73dae Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Mon, 2 Jun 2008 07:54:50 -0400
Subject: mac80211: fix panic when using hardware WEP

e039fa4a4195ac4ee895e6f3d1334beed63256fe ("mac80211: move TX info into
skb->cb") misplaced code for setting hardware WEP keys.  Move it back.
This fixes kernel panic in b43 if WEP is used and hardware encryption
is enabled.

Signed-off-by: Pavel Roskin <proski@gnu.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index c9fd1291b19..e7b6344c900 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -335,10 +335,10 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	info->control.icv_len = WEP_ICV_LEN;
 
 	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) {
-		info->control.hw_key = &tx->key->conf;
 		if (ieee80211_wep_encrypt(tx->local, skb, tx->key))
 			return -1;
 	} else {
+		info->control.hw_key = &tx->key->conf;
 		if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) {
 			if (!ieee80211_wep_add_iv(tx->local, skb, tx->key))
 				return -1;
-- 
cgit v1.2.3-70-g09d2


From b97e77e0446f0702de7fa0f5d2c52acf42d0289f Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 2 Jun 2008 20:31:56 +0300
Subject: mac80211: fix unbalanced locking in ieee80211_get_buffered_bc

This patch fixes unbalanced locking in ieee80211_get_buffered_bc

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 332ddcb1068..4214d039fbc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2011,7 +2011,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	}
 
 	if (WARN_ON(tx.skb != skb))
-		return NULL;
+		res = TX_DROP;
 
 	if (res == TX_DROP) {
 		I802_DEBUG_INC(local->tx_handlers_drop);
-- 
cgit v1.2.3-70-g09d2


From 2d892986e82306b8ad96285fb54b9999523331e0 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 2 Jun 2008 20:31:57 +0300
Subject: mac80211: removing shadowed sband

This patch removes doubly defined sband variable

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4214d039fbc..1ad9e664f28 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -612,13 +612,10 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 
 	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
 	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
-		struct ieee80211_supported_band *sband;
 		struct ieee80211_rate *rate;
 		s8 baserate = -1;
 		int idx;
 
-		sband = tx->local->hw.wiphy->bands[tx->channel->band];
-
 		/* Do not use multiple retry rates when using RTS/CTS */
 		info->control.alt_retry_rate_idx = -1;
 
-- 
cgit v1.2.3-70-g09d2


From 84b07c1638c36ae937d4930b467001a0d22904e5 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:28:21 -0700
Subject: tipc: Fix bug in connection setup via native API

This patch fixes a bug that prevented TIPC from receiving a
connection setup request message on a native TIPC port.
The revised connection setup logic ensures that validation
of the source of a connection-based message is skipped if
the port is not yet connected to a peer.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 2c64ad88e3c..0bd3e6192c4 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -842,13 +842,10 @@ static void port_dispatcher_sigh(void *dummy)
 
 				tipc_port_unlock(p_ptr);
 				if (unlikely(!connected)) {
-					if (unlikely(published))
+					if (tipc_connect2port(dref, &orig))
 						goto reject;
-					tipc_connect2port(dref,&orig);
-				}
-				if (unlikely(msg_origport(msg) != peer_port))
-					goto reject;
-				if (unlikely(msg_orignode(msg) != peer_node))
+				} else if ((msg_origport(msg) != peer_port) ||
+					   (msg_orignode(msg) != peer_node))
 					goto reject;
 				if (unlikely(!cb))
 					goto reject;
-- 
cgit v1.2.3-70-g09d2


From 5307e46957e76d71f02d2d736030ad92cdb3dd8c Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:28:45 -0700
Subject: tipc: Standardize error checking on incoming messages via native API

This patch re-orders & re-groups the error checks performed on
messages being delivered to native API ports, in order to clarify the
similarities and differences required for the various message types.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0bd3e6192c4..4dfef9e798e 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -841,14 +841,14 @@ static void port_dispatcher_sigh(void *dummy)
 				u32 peer_node = port_peernode(p_ptr);
 
 				tipc_port_unlock(p_ptr);
+				if (unlikely(!cb))
+					goto reject;
 				if (unlikely(!connected)) {
 					if (tipc_connect2port(dref, &orig))
 						goto reject;
 				} else if ((msg_origport(msg) != peer_port) ||
 					   (msg_orignode(msg) != peer_node))
 					goto reject;
-				if (unlikely(!cb))
-					goto reject;
 				if (unlikely(++p_ptr->publ.conn_unacked >=
 					     TIPC_FLOW_CONTROL_WIN))
 					tipc_acknowledge(dref,
@@ -862,9 +862,7 @@ static void port_dispatcher_sigh(void *dummy)
 				tipc_msg_event cb = up_ptr->msg_cb;
 
 				tipc_port_unlock(p_ptr);
-				if (unlikely(connected))
-					goto reject;
-				if (unlikely(!cb))
+				if (unlikely(!cb || connected))
 					goto reject;
 				skb_pull(buf, msg_hdr_sz(msg));
 				cb(usr_handle, dref, &buf, msg_data(msg),
@@ -877,11 +875,7 @@ static void port_dispatcher_sigh(void *dummy)
 				tipc_named_msg_event cb = up_ptr->named_msg_cb;
 
 				tipc_port_unlock(p_ptr);
-				if (unlikely(connected))
-					goto reject;
-				if (unlikely(!cb))
-					goto reject;
-				if (unlikely(!published))
+				if (unlikely(!cb || connected || !published))
 					goto reject;
 				dseq.type =  msg_nametype(msg);
 				dseq.lower = msg_nameinst(msg);
@@ -908,11 +902,10 @@ err:
 				u32 peer_node = port_peernode(p_ptr);
 
 				tipc_port_unlock(p_ptr);
-				if (!connected || !cb)
-					break;
-				if (msg_origport(msg) != peer_port)
+				if (!cb || !connected)
 					break;
-				if (msg_orignode(msg) != peer_node)
+				if ((msg_origport(msg) != peer_port) ||
+				    (msg_orignode(msg) != peer_node))
 					break;
 				tipc_disconnect(dref);
 				skb_pull(buf, msg_hdr_sz(msg));
@@ -924,7 +917,7 @@ err:
 				tipc_msg_err_event cb = up_ptr->err_cb;
 
 				tipc_port_unlock(p_ptr);
-				if (connected || !cb)
+				if (!cb || connected)
 					break;
 				skb_pull(buf, msg_hdr_sz(msg));
 				cb(usr_handle, dref, &buf, msg_data(msg),
@@ -937,7 +930,7 @@ err:
 					up_ptr->named_err_cb;
 
 				tipc_port_unlock(p_ptr);
-				if (connected || !cb)
+				if (!cb || connected)
 					break;
 				dseq.type =  msg_nametype(msg);
 				dseq.lower = msg_nameinst(msg);
-- 
cgit v1.2.3-70-g09d2


From e0d4e3d0d72cfae7b7eac14e39e12dfc6b406313 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:29:09 -0700
Subject: tipc: Fix bugs in message error code display when debugging

This patch corrects two problems in the display of error code
information in TIPC messages when debugging:
- no longer tries to display error code in NAME_DISTRIBUTOR
  messages, which don't have the error field
- now displays error code in 24 byte data messages, which do
  have the error field

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 38abebaae88..73dcd00d674 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -230,13 +230,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 
 	switch (usr) {
 	case CONN_MANAGER:
-	case NAME_DISTRIBUTOR:
 	case TIPC_LOW_IMPORTANCE:
 	case TIPC_MEDIUM_IMPORTANCE:
 	case TIPC_HIGH_IMPORTANCE:
 	case TIPC_CRITICAL_IMPORTANCE:
-		if (msg_short(msg))
-			break;	/* No error */
 		switch (msg_errcode(msg)) {
 		case TIPC_OK:
 			break;
-- 
cgit v1.2.3-70-g09d2


From a686e6859e976712e28f6af927cd52a6a3bb372a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:29:39 -0700
Subject: tipc: Fix minor bugs in link session number handling

This patch introduces a new, out-of-range value to indicate that
a link endpoint does not have an existing session established
with its peer, eliminating the risk that the previously used
"invalid session number" value (i.e. zero) might eventually be
assigned as a valid session number and cause incorrect link
behavior.

The patch also introduces explicit bit masking when assigning a
new link session number to ensure it does not exceed 16 bits.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index bd206ebe4ee..b8c1231e314 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -50,6 +50,12 @@
 #include "bcast.h"
 
 
+/*
+ * Out-of-range value for link session numbers
+ */
+
+#define INVALID_SESSION 0x10000
+
 /*
  * Limit for deferred reception queue:
  */
@@ -464,7 +470,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 	msg = l_ptr->pmsg;
 	msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
-	msg_set_session(msg, tipc_random);
+	msg_set_session(msg, (tipc_random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
 	strcpy((char *)msg_data(msg), if_name);
 
@@ -705,10 +711,10 @@ void tipc_link_reset(struct link *l_ptr)
 	u32 checkpoint = l_ptr->next_in_no;
 	int was_active_link = tipc_link_is_active(l_ptr);
 
-	msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1);
+	msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff));
 
-	/* Link is down, accept any session: */
-	l_ptr->peer_session = 0;
+	/* Link is down, accept any session */
+	l_ptr->peer_session = INVALID_SESSION;
 
 	/* Prepare for max packet size negotiation */
 	link_init_max_pkt(l_ptr);
@@ -2275,7 +2281,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 	switch (msg_type(msg)) {
 
 	case RESET_MSG:
-		if (!link_working_unknown(l_ptr) && l_ptr->peer_session) {
+		if (!link_working_unknown(l_ptr) &&
+		    (l_ptr->peer_session != INVALID_SESSION)) {
 			if (msg_session(msg) == l_ptr->peer_session) {
 				dbg("Duplicate RESET: %u<->%u\n",
 				    msg_session(msg), l_ptr->peer_session);
-- 
cgit v1.2.3-70-g09d2


From 1265a02108c508b508112cdeac922aad03e0146a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:32:35 -0700
Subject: tipc: Minor optimizations to received message processing

This patch enhances TIPC's handler for incoming messages in two
ways:
- the trivial, single-use routine for processing non-sequenced
  messages has been merged into the main handler
- the interface that received a message is now identified without
  having to access and/or modify the associated sk_buff

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c |  4 ++--
 net/tipc/discover.h |  2 +-
 net/tipc/link.c     | 27 +++++----------------------
 3 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index faeaf06d377..ada213aac4d 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -156,11 +156,11 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
 /**
  * tipc_disc_recv_msg - handle incoming link setup message (request or response)
  * @buf: buffer containing message
+ * @b_ptr: bearer that message arrived on
  */
 
-void tipc_disc_recv_msg(struct sk_buff *buf)
+void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 {
-	struct bearer *b_ptr = (struct bearer *)TIPC_SKB_CB(buf)->handle;
 	struct link *link;
 	struct tipc_media_addr media_addr;
 	struct tipc_msg *msg = buf_msg(buf);
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 9fd7587b143..c36eaeb7d5d 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -48,7 +48,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
 void tipc_disc_update_link_req(struct link_req *req);
 void tipc_disc_stop_link_req(struct link_req *req);
 
-void tipc_disc_recv_msg(struct sk_buff *buf);
+void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
 
 void tipc_disc_link_event(u32 addr, char *name, int up);
 #if 0
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b8c1231e314..c62ebfea930 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1766,21 +1766,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 	l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
 }
 
-/*
- * link_recv_non_seq: Receive packets which are outside
- *                    the link sequence flow
- */
-
-static void link_recv_non_seq(struct sk_buff *buf)
-{
-	struct tipc_msg *msg = buf_msg(buf);
-
-	if (msg_user(msg) ==  LINK_CONFIG)
-		tipc_disc_recv_msg(buf);
-	else
-		tipc_bclink_recv_pkt(buf);
-}
-
 /**
  * link_insert_deferred_queue - insert deferred messages back into receive chain
  */
@@ -1857,7 +1842,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 {
 	read_lock_bh(&tipc_net_lock);
 	while (head) {
-		struct bearer *b_ptr;
+		struct bearer *b_ptr = (struct bearer *)tb_ptr;
 		struct node *n_ptr;
 		struct link *l_ptr;
 		struct sk_buff *crs;
@@ -1868,9 +1853,6 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 		u32 released = 0;
 		int type;
 
-		b_ptr = (struct bearer *)tb_ptr;
-		TIPC_SKB_CB(buf)->handle = b_ptr;
-
 		head = head->next;
 
 		/* Ensure message is well-formed */
@@ -1889,7 +1871,10 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 		msg = buf_msg(buf);
 
 		if (unlikely(msg_non_seq(msg))) {
-			link_recv_non_seq(buf);
+			if (msg_user(msg) ==  LINK_CONFIG)
+				tipc_disc_recv_msg(buf, b_ptr);
+			else
+				tipc_bclink_recv_pkt(buf);
 			continue;
 		}
 
@@ -1996,8 +1981,6 @@ deliver:
 						if (link_recv_changeover_msg(&l_ptr, &buf)) {
 							msg = buf_msg(buf);
 							seq_no = msg_seqno(msg);
-							TIPC_SKB_CB(buf)->handle
-								= b_ptr;
 							if (type == ORIGINAL_MSG)
 								goto deliver;
 							goto protocol_check;
-- 
cgit v1.2.3-70-g09d2


From 9c396a7bfb4fe74e444be09069651280da520944 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:36:58 -0700
Subject: tipc: Prevent access of non-existent field in short message header

This patch eliminates a case where TIPC's link code could try reading
a field that is not present in a short message header.  (The random
value obtained was not being used, but the read operation could result
in an invalid memory access exception in extremely rare circumstances.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c62ebfea930..022cb2f107a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2674,10 +2674,12 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 pack_sz = link_max_pkt(l_ptr);
 	u32 fragm_sz = pack_sz - INT_H_SIZE;
 	u32 fragm_no = 1;
-	u32 destaddr = msg_destnode(inmsg);
+	u32 destaddr;
 
 	if (msg_short(inmsg))
 		destaddr = l_ptr->addr;
+	else
+		destaddr = msg_destnode(inmsg);
 
 	if (msg_routed(inmsg))
 		msg_set_prevnode(inmsg, tipc_own_addr);
-- 
cgit v1.2.3-70-g09d2


From 757152175666681d54d370500e41a756cfedd4fc Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:37:34 -0700
Subject: tipc: Optimize message initialization routine

This patch eliminates the rarely-used "error code" argument
when initializing a TIPC message header, since the default
value of zero is the desired result in most cases; the few
exceptional cases now set the error code explicitly.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      |  2 +-
 net/tipc/cluster.c    |  2 +-
 net/tipc/discover.c   |  3 +--
 net/tipc/link.c       | 12 ++++++------
 net/tipc/msg.h        |  3 +--
 net/tipc/name_distr.c |  3 +--
 net/tipc/port.c       |  9 +++++----
 7 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e7880172ef1..0052c0747d0 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -276,7 +276,7 @@ static void bclink_send_nack(struct node *n_ptr)
 	if (buf) {
 		msg = buf_msg(buf);
 		msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
-			 TIPC_OK, INT_H_SIZE, n_ptr->addr);
+			 INT_H_SIZE, n_ptr->addr);
 		msg_set_mc_netid(msg, tipc_net_id);
 		msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
 		msg_set_bcgap_after(msg, n_ptr->bclink.gap_after);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 4bb3404f610..bc1db474fe0 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
 	if (buf) {
 		msg = buf_msg(buf);
 		memset((char *)msg, 0, size);
-		msg_init(msg, ROUTE_DISTRIBUTOR, 0, TIPC_OK, INT_H_SIZE, dest);
+		msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
 	}
 	return buf;
 }
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ada213aac4d..64c20284e0f 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -120,8 +120,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, LINK_CONFIG, type, TIPC_OK, DSC_H_SIZE,
-			 dest_domain);
+		msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
 		msg_set_non_seq(msg);
 		msg_set_req_links(msg, req_links);
 		msg_set_dest_domain(msg, dest_domain);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 022cb2f107a..9784a8e963b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -468,7 +468,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
 	msg = l_ptr->pmsg;
-	msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+	msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
 	msg_set_session(msg, (tipc_random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
@@ -1128,7 +1128,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 
 			if (bundler) {
 				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
-					 TIPC_OK, INT_H_SIZE, l_ptr->addr);
+					 INT_H_SIZE, l_ptr->addr);
 				skb_copy_to_linear_data(bundler, &bundler_hdr,
 							INT_H_SIZE);
 				skb_trim(bundler, INT_H_SIZE);
@@ -1392,7 +1392,7 @@ again:
 
 	msg_dbg(hdr, ">FRAGMENTING>");
 	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
-		 TIPC_OK, INT_H_SIZE, msg_destnode(hdr));
+		 INT_H_SIZE, msg_destnode(hdr));
 	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
 	msg_set_size(&fragm_hdr, max_pkt);
 	msg_set_fragm_no(&fragm_hdr, 1);
@@ -2426,7 +2426,7 @@ void tipc_link_changeover(struct link *l_ptr)
 	}
 
 	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
-		 ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+		 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
 	dbg("Link changeover requires %u tunnel messages\n", msgcount);
@@ -2481,7 +2481,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 	struct tipc_msg tunnel_hdr;
 
 	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
-		 DUPLICATE_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr);
+		 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	iter = l_ptr->first_out;
@@ -2687,7 +2687,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 	/* Prepare reusable fragment header: */
 
 	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
-		 TIPC_OK, INT_H_SIZE, destaddr);
+		 INT_H_SIZE, destaddr);
 	msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
 	msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
 	msg_set_fragm_no(&fragm_hdr, fragm_no);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index ad487e8abcc..3418ffa72c9 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -696,7 +696,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m)
 
 
 static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
-			    u32 err, u32 hsize, u32 destnode)
+			    u32 hsize, u32 destnode)
 {
 	memset(m, 0, hsize);
 	msg_set_version(m);
@@ -705,7 +705,6 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
 	msg_set_size(m, hsize);
 	msg_set_prevnode(m, tipc_own_addr);
 	msg_set_type(m, type);
-	msg_set_errcode(m, err);
 	if (!msg_short(m)) {
 		msg_set_orignode(m, tipc_own_addr);
 		msg_set_destnode(m, destnode);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index aecba5cd87d..10a69894e2f 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -103,8 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		msg_init(msg, NAME_DISTRIBUTOR, type, TIPC_OK,
-			 LONG_H_SIZE, dest);
+		msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
 		msg_set_size(msg, LONG_H_SIZE + size);
 	}
 	return buf;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 4dfef9e798e..e3e9c121afb 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -244,8 +244,7 @@ u32 tipc_createport_raw(void *usr_handle,
 	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
 	msg = &p_ptr->publ.phdr;
-	msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE,
-		 0);
+	msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
 	msg_set_orignode(msg, tipc_own_addr);
 	msg_set_prevnode(msg, tipc_own_addr);
 	msg_set_origport(msg, ref);
@@ -404,7 +403,8 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 	buf = buf_acquire(LONG_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, usr, type, err, LONG_H_SIZE, destnode);
+		msg_init(msg, usr, type, LONG_H_SIZE, destnode);
+		msg_set_errcode(msg, err);
 		msg_set_destport(msg, destport);
 		msg_set_origport(msg, origport);
 		msg_set_destnode(msg, destnode);
@@ -448,7 +448,8 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 		return data_sz;
 	}
 	rmsg = buf_msg(rbuf);
-	msg_init(rmsg, imp, msg_type(msg), err, hdr_sz, msg_orignode(msg));
+	msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
+	msg_set_errcode(rmsg, err);
 	msg_set_destport(rmsg, msg_origport(msg));
 	msg_set_prevnode(rmsg, tipc_own_addr);
 	msg_set_origport(rmsg, msg_destport(msg));
-- 
cgit v1.2.3-70-g09d2


From 0f15d36453bbd02d404445dace49f4ae072f44e5 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:37:59 -0700
Subject: tipc: Prevent display of name table types with no publications

This patch adds a check to prevent TIPC's name table display code
from listing a name type entry if it exists only to hold subscription
info, rather than published names.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 892373e498e..4455f13cfaf 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -905,6 +905,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
 	struct sub_seq *sseq;
 	char typearea[11];
 
+	if (seq->first_free == 0)
+		return;
+
 	sprintf(typearea, "%-10u", seq->type);
 
 	if (depth == 1) {
-- 
cgit v1.2.3-70-g09d2


From 307fdf5e7defcacf84db21cda18770b2bf5f5196 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:38:22 -0700
Subject: tipc: Add missing spinlock in name table display code

This patch ensures that the display code that traverses the
publication lists belonging to a name table entry take its
associated spinlock, to protect against a possible change to
one of its "head of list" pointers caused by a simultaneous
name table lookup operation by another thread of control.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 4455f13cfaf..096f7bd240a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -74,7 +74,7 @@ struct sub_seq {
  * @first_free: array index of first unused sub-sequence entry
  * @ns_list: links to adjacent name sequences in hash chain
  * @subscriptions: list of subscriptions for this 'type'
- * @lock: spinlock controlling access to name sequence structure
+ * @lock: spinlock controlling access to publication lists of all sub-sequences
  */
 
 struct name_seq {
@@ -918,7 +918,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
 	for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) {
 		if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) {
 			tipc_printf(buf, "%s ", typearea);
+			spin_lock_bh(&seq->lock);
 			subseq_list(sseq, buf, depth, index);
+			spin_unlock_bh(&seq->lock);
 			sprintf(typearea, "%10s", " ");
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From bd7845337b105e090dd18912d511139945fa7586 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:47:30 -0700
Subject: tipc: Expand link sequence gap field to 13 bits

This patch increases the "sequence gap" field of the LINK_PROTOCOL
message header from 8 bits to 13 bits (utilizing 5 previously
unused 0 bits).  This ensures that the field is big enough to
indicate the loss of up to 8191 consecutive messages on the link,
thereby accommodating the current worst-case scenario of 4000
lost messages.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 3418ffa72c9..b23619fab96 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -2,7 +2,7 @@
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
  * Copyright (c) 2000-2007, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2005-2008, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -325,7 +325,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    w0:|vers |msg usr|hdr sz |n|resrv|            packet size          |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w1:|m typ|rsv=0|   sequence gap    |       broadcast ack no        |
+   w1:|m typ|      sequence gap       |       broadcast ack no        |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    w2:| link level ack no/bc_gap_from |     seq no / bcast_gap_to     |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -388,12 +388,12 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
 
 static inline u32 msg_seq_gap(struct tipc_msg *m)
 {
-	return msg_bits(m, 1, 16, 0xff);
+	return msg_bits(m, 1, 16, 0x1fff);
 }
 
 static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n)
 {
-	msg_set_bits(m, 1, 16, 0xff, n);
+	msg_set_bits(m, 1, 16, 0x1fff, n);
 }
 
 static inline u32 msg_req_links(struct tipc_msg *m)
-- 
cgit v1.2.3-70-g09d2


From 9bef54383d16568da19cfe46bdc52cdedb9bb8da Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:47:55 -0700
Subject: tipc: Message header creation optimizations

This patch eliminates several cases where message header fields
were being set to the same value twice.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index e3e9c121afb..93014f9bc95 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -244,11 +244,8 @@ u32 tipc_createport_raw(void *usr_handle,
 	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
 	msg = &p_ptr->publ.phdr;
-	msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
-	msg_set_orignode(msg, tipc_own_addr);
-	msg_set_prevnode(msg, tipc_own_addr);
+	msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
 	msg_set_origport(msg, ref);
-	msg_set_importance(msg,importance);
 	p_ptr->last_in_seqno = 41;
 	p_ptr->sent = 1;
 	INIT_LIST_HEAD(&p_ptr->wait_list);
@@ -407,7 +404,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 		msg_set_errcode(msg, err);
 		msg_set_destport(msg, destport);
 		msg_set_origport(msg, origport);
-		msg_set_destnode(msg, destnode);
 		msg_set_orignode(msg, orignode);
 		msg_set_transp_seqno(msg, seqno);
 		msg_set_msgcnt(msg, ack);
@@ -451,7 +447,6 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
 	msg_set_errcode(rmsg, err);
 	msg_set_destport(rmsg, msg_origport(msg));
-	msg_set_prevnode(rmsg, tipc_own_addr);
 	msg_set_origport(rmsg, msg_destport(msg));
 	if (msg_short(msg))
 		msg_set_orignode(rmsg, tipc_own_addr);
-- 
cgit v1.2.3-70-g09d2


From 99c145939bc1f65f9b946f2b9dd7bfc1f44783d6 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:48:25 -0700
Subject: tipc: Fix bugs in rejection of message with short header

This patch ensures that TIPC doesn't try to access non-existent
message header fields when rejecting a message with a short header.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 93014f9bc95..2e0cff408ff 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -448,13 +448,15 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	msg_set_errcode(rmsg, err);
 	msg_set_destport(rmsg, msg_origport(msg));
 	msg_set_origport(rmsg, msg_destport(msg));
-	if (msg_short(msg))
+	if (msg_short(msg)) {
 		msg_set_orignode(rmsg, tipc_own_addr);
-	else
+		/* leave name type & instance as zeroes */
+	} else {
 		msg_set_orignode(rmsg, msg_destnode(msg));
+		msg_set_nametype(rmsg, msg_nametype(msg));
+		msg_set_nameinst(rmsg, msg_nameinst(msg));
+	}
 	msg_set_size(rmsg, data_sz + hdr_sz);
-	msg_set_nametype(rmsg, msg_nametype(msg));
-	msg_set_nameinst(rmsg, msg_nameinst(msg));
 	skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
 
 	/* send self-abort message when rejecting on a connected port */
-- 
cgit v1.2.3-70-g09d2


From 40aecb1b13f50d96616abb612c17e59457f54263 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Wed, 4 Jun 2008 17:54:48 -0700
Subject: tipc: Message rejection rework preparatory changes

This patch defines a few new message header manipulation routines,
and generalizes the usefulness of another, in preparation for upcoming
rework of TIPC's message rejection code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c    |  2 +-
 net/tipc/discover.c |  2 +-
 net/tipc/msg.h      | 31 +++++++++++++++++++++++++++++--
 3 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 0052c0747d0..a5883b1452f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -571,7 +571,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		assert(tipc_cltr_bcast_nodes.count != 0);
 		bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count);
 		msg = buf_msg(buf);
-		msg_set_non_seq(msg);
+		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tipc_net_id);
 	}
 
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 64c20284e0f..1657f0e795f 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -121,7 +121,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 	if (buf) {
 		msg = buf_msg(buf);
 		msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
-		msg_set_non_seq(msg);
+		msg_set_non_seq(msg, 1);
 		msg_set_req_links(msg, req_links);
 		msg_set_dest_domain(msg, dest_domain);
 		msg_set_bc_netid(msg, tipc_net_id);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index b23619fab96..7ee6ae23814 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -75,6 +75,14 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
 	m->hdr[w] |= htonl(val);
 }
 
+static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
+{
+	u32 temp = msg->hdr[a];
+
+	msg->hdr[a] = msg->hdr[b];
+	msg->hdr[b] = temp;
+}
+
 /*
  * Word 0
  */
@@ -119,9 +127,9 @@ static inline int msg_non_seq(struct tipc_msg *m)
 	return msg_bits(m, 0, 20, 1);
 }
 
-static inline void msg_set_non_seq(struct tipc_msg *m)
+static inline void msg_set_non_seq(struct tipc_msg *m, u32 n)
 {
-	msg_set_bits(m, 0, 20, 1, 1);
+	msg_set_bits(m, 0, 20, 1, n);
 }
 
 static inline int msg_dest_droppable(struct tipc_msg *m)
@@ -224,6 +232,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 2, 0, 0xffff, n);
 }
 
+/*
+ * TIPC may utilize the "link ack #" and "link seq #" fields of a short
+ * message header to hold the destination node for the message, since the
+ * normal "dest node" field isn't present.  This cache is only referenced
+ * when required, so populating the cache of a longer message header is
+ * harmless (as long as the header has the two link sequence fields present).
+ *
+ * Note: Host byte order is OK here, since the info never goes off-card.
+ */
+
+static inline u32 msg_destnode_cache(struct tipc_msg *m)
+{
+	return m->hdr[2];
+}
+
+static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode)
+{
+	m->hdr[2] = dnode;
+}
 
 /*
  * Words 3-10
-- 
cgit v1.2.3-70-g09d2


From 9457afee85e0dfc2b5075a391d6f34463b4c2b90 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 5 Jun 2008 11:23:39 -0700
Subject: netlink: Remove nonblock parameter from netlink_attachskb

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 2 +-
 ipc/mqueue.c             | 2 +-
 net/netlink/af_netlink.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index bec1062a25a..9ff1b54908f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -193,7 +193,7 @@ extern int netlink_unregister_notifier(struct notifier_block *nb);
 
 /* finegrained unicast helpers: */
 struct sock *netlink_getsockbyfilp(struct file *filp);
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		      long *timeo, struct sock *ssk);
 void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
 int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index b3b69fd5133..3e84b958186 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1054,7 +1054,7 @@ retry:
 			}
 
 			timeo = MAX_SCHEDULE_TIMEOUT;
-			ret = netlink_attachskb(sock, nc, 0, &timeo, NULL);
+			ret = netlink_attachskb(sock, nc, &timeo, NULL);
 			if (ret == 1)
 		       		goto retry;
 			if (ret) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9b97f8006c9..6507c02dbe0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -759,7 +759,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
  * 0: continue
  * 1: repeat lookup - reference dropped while waiting for socket memory.
  */
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		      long *timeo, struct sock *ssk)
 {
 	struct netlink_sock *nlk;
@@ -892,7 +892,7 @@ retry:
 		return err;
 	}
 
-	err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk);
+	err = netlink_attachskb(sk, skb, &timeo, ssk);
 	if (err == 1)
 		goto retry;
 	if (err)
-- 
cgit v1.2.3-70-g09d2


From 7bfe8bdb80d5988483b01177b09b9c8d1605dffb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 9 Jun 2008 15:45:05 -0700
Subject: sctp: Fix problems with the new SCTP_DELAYED_ACK code

The default sack frequency should be 2.  Also fix copy/paste
error when updating all transports.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 81600eea05d..253e5ea7e1e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2471,7 +2471,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
 					(trans->param_flags & ~SPP_SACKDELAY) |
 					SPP_SACKDELAY_ENABLE;
 			}
-			if (params.sack_delay == 1) {
+			if (params.sack_freq == 1) {
 				trans->param_flags =
 					(trans->param_flags & ~SPP_SACKDELAY) |
 					SPP_SACKDELAY_DISABLE;
@@ -3536,7 +3536,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->pathmaxrxt  = sctp_max_retrans_path;
 	sp->pathmtu     = 0; // allow default discovery
 	sp->sackdelay   = sctp_sack_timeout;
-	sp->sackfreq	= 3;
+	sp->sackfreq	= 2;
 	sp->param_flags = SPP_HB_ENABLE |
 			  SPP_PMTUD_ENABLE |
 			  SPP_SACKDELAY_ENABLE;
-- 
cgit v1.2.3-70-g09d2


From f1494ed1d318542baa9480cfd44d040a92635129 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 9 Jun 2008 15:49:57 -0700
Subject: iucv: fix section mismatch warning.

WARNING: net/iucv/built-in.o(.exit.text+0x9c): Section mismatch in
reference from the function iucv_exit() to the variable
.cpuinit.data:iucv_cpu_notifier

This warning is caused by a reference from unregister_hotcpu_notifier()
from an exit function to a cpuinitdata annotated data structurre.
This is a false positive warning since for the non CPU_HOTPLUG case
unregister_hotcpu_notifier() is a nop.
Use __refdata instead of __cpuinitdata to get rid of the warning.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 91897076213..2d43175b0cd 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -598,7 +598,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata iucv_cpu_notifier = {
+static struct notifier_block __refdata iucv_cpu_notifier = {
 	.notifier_call = iucv_cpu_notify,
 };
 
-- 
cgit v1.2.3-70-g09d2


From 7b9d1b22a382aa221018c19880ee22c44467feec Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 9 Jun 2008 15:50:30 -0700
Subject: iucv: prevent cpu hotplug when walking cpu_online_map.

The code used preempt_disable() to prevent cpu hotplug, however that
doesn't protect for cpus being added. So use get_online_cpus() instead.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 2d43175b0cd..531a206ce7a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -474,14 +474,14 @@ static void iucv_setmask_mp(void)
 {
 	int cpu;
 
-	preempt_disable();
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		/* Enable all cpus with a declared buffer. */
 		if (cpu_isset(cpu, iucv_buffer_cpumask) &&
 		    !cpu_isset(cpu, iucv_irq_cpumask))
 			smp_call_function_single(cpu, iucv_allow_cpu,
 						 NULL, 0, 1);
-	preempt_enable();
+	put_online_cpus();
 }
 
 /**
@@ -521,16 +521,17 @@ static int iucv_enable(void)
 		goto out;
 	/* Declare per cpu buffers. */
 	rc = -EIO;
-	preempt_disable();
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
-	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
+	put_online_cpus();
 	return 0;
 
 out_path:
+	put_online_cpus();
 	kfree(iucv_path_table);
 out:
 	return rc;
@@ -545,7 +546,9 @@ out:
  */
 static void iucv_disable(void)
 {
+	get_online_cpus();
 	on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1);
+	put_online_cpus();
 	kfree(iucv_path_table);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 469689a4dd476c1be6750deea5f59528a17b8b4a Mon Sep 17 00:00:00 2001
From: Ursula Braun <braunu@de.ibm.com>
Date: Mon, 9 Jun 2008 15:51:03 -0700
Subject: af_iucv: exploit target message class support of IUCV

The first 4 bytes of data to be sent are stored additionally into
the message class field of the send request. A receiving target
program (not an af_iucv socket program) can make use of this
information to pre-screen incoming messages.

Signed-off-by: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7b0038f45b1..58e4aee3e69 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -644,6 +644,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		}
 
 		txmsg.class = 0;
+		memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len);
 		txmsg.tag = iucv->send_tag++;
 		memcpy(skb->cb, &txmsg.tag, 4);
 		skb_queue_tail(&iucv->send_skb_q, skb);
-- 
cgit v1.2.3-70-g09d2


From 93f65158723ceb7078ee9a0fd4830c0de00f4b9e Mon Sep 17 00:00:00 2001
From: Kuo-lang Tseng <kuo-lang.tseng@intel.com>
Date: Mon, 9 Jun 2008 15:55:45 -0700
Subject: netfilter: ebtables: add IPv6 support

It implements matching functions for IPv6 address & traffic class
(merged from the patch sent by Jan Engelhardt [jengelh@computergmbh.de]
http://marc.info/?l=netfilter-devel&m=120182168424052&w=2), protocol,
and layer-4 port id. Corresponding watcher logging function is also
added for IPv6.

Signed-off-by: Kuo-lang Tseng <kuo-lang.tseng@intel.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge/ebt_ip6.h |  40 +++++++++
 include/linux/netfilter_bridge/ebt_log.h |   3 +-
 net/bridge/netfilter/Kconfig             |   9 ++
 net/bridge/netfilter/Makefile            |   1 +
 net/bridge/netfilter/ebt_ip6.c           | 144 +++++++++++++++++++++++++++++++
 net/bridge/netfilter/ebt_log.c           |  64 ++++++++++----
 6 files changed, 244 insertions(+), 17 deletions(-)
 create mode 100644 include/linux/netfilter_bridge/ebt_ip6.h
 create mode 100644 net/bridge/netfilter/ebt_ip6.c

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
new file mode 100644
index 00000000000..2273c3ae33c
--- /dev/null
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -0,0 +1,40 @@
+/*
+ *  ebt_ip6
+ *
+ *	Authors:
+ * Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
+ * Manohar Castelino <manohar.r.castelino@intel.com>
+ *
+ *  Jan 11, 2008
+ *
+ */
+
+#ifndef __LINUX_BRIDGE_EBT_IP6_H
+#define __LINUX_BRIDGE_EBT_IP6_H
+
+#define EBT_IP6_SOURCE 0x01
+#define EBT_IP6_DEST 0x02
+#define EBT_IP6_TCLASS 0x04
+#define EBT_IP6_PROTO 0x08
+#define EBT_IP6_SPORT 0x10
+#define EBT_IP6_DPORT 0x20
+#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
+		      EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
+#define EBT_IP6_MATCH "ip6"
+
+/* the same values are used for the invflags */
+struct ebt_ip6_info
+{
+	struct in6_addr saddr;
+	struct in6_addr daddr;
+	struct in6_addr smsk;
+	struct in6_addr dmsk;
+	uint8_t  tclass;
+	uint8_t  protocol;
+	uint8_t  bitmask;
+	uint8_t  invflags;
+	uint16_t sport[2];
+	uint16_t dport[2];
+};
+
+#endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index 96e231ae755..b76e653157e 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -4,7 +4,8 @@
 #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
 #define EBT_LOG_ARP 0x02
 #define EBT_LOG_NFLOG 0x04
-#define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP)
+#define EBT_LOG_IP6 0x08
+#define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP | EBT_LOG_IP6)
 #define EBT_LOG_PREFIX_SIZE 30
 #define EBT_LOG_WATCHER "log"
 
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 7beeefa0f9c..fb684c2ff8b 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -83,6 +83,15 @@ config BRIDGE_EBT_IP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config BRIDGE_EBT_IP6
+	tristate "ebt: IP6 filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the IP6 match, which allows basic IPV6 header field
+	  filtering.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config BRIDGE_EBT_LIMIT
 	tristate "ebt: limit match support"
 	depends on BRIDGE_NF_EBTABLES
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 83715d73a50..dd960645b41 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o
 obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o
 obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o
 obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o
+obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip6.o
 obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o
 obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o
 obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
new file mode 100644
index 00000000000..36efb3a7524
--- /dev/null
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -0,0 +1,144 @@
+/*
+ *  ebt_ip6
+ *
+ *	Authors:
+ *	Manohar Castelino <manohar.r.castelino@intel.com>
+ *	Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
+ *	Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * Summary:
+ * This is just a modification of the IPv4 code written by
+ * Bart De Schuymer <bdschuym@pandora.be>
+ * with the changes required to support IPv6
+ *
+ *  Jan, 2008
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ip6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/in.h>
+#include <linux/module.h>
+#include <net/dsfield.h>
+
+struct tcpudphdr {
+	__be16 src;
+	__be16 dst;
+};
+
+static int ebt_filter_ip6(const struct sk_buff *skb,
+   const struct net_device *in,
+   const struct net_device *out, const void *data,
+   unsigned int datalen)
+{
+	const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+	const struct ipv6hdr *ih6;
+	struct ipv6hdr _ip6h;
+	const struct tcpudphdr *pptr;
+	struct tcpudphdr _ports;
+	struct in6_addr tmp_addr;
+	int i;
+
+	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
+	if (ih6 == NULL)
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_IP6_TCLASS &&
+	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+		return EBT_NOMATCH;
+	for (i = 0; i < 4; i++)
+		tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] &
+			info->smsk.in6_u.u6_addr32[i];
+	if (info->bitmask & EBT_IP6_SOURCE &&
+		FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
+			EBT_IP6_SOURCE))
+		return EBT_NOMATCH;
+	for (i = 0; i < 4; i++)
+		tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
+			info->dmsk.in6_u.u6_addr32[i];
+	if (info->bitmask & EBT_IP6_DEST &&
+	   FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_IP6_PROTO) {
+		uint8_t nexthdr = ih6->nexthdr;
+		int offset_ph;
+
+		offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr);
+		if (offset_ph == -1)
+			return EBT_NOMATCH;
+		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+			return EBT_NOMATCH;
+		if (!(info->bitmask & EBT_IP6_DPORT) &&
+		    !(info->bitmask & EBT_IP6_SPORT))
+			return EBT_MATCH;
+		pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
+					  &_ports);
+		if (pptr == NULL)
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_IP6_DPORT) {
+			u32 dst = ntohs(pptr->dst);
+			if (FWINV(dst < info->dport[0] ||
+				  dst > info->dport[1], EBT_IP6_DPORT))
+				return EBT_NOMATCH;
+		}
+		if (info->bitmask & EBT_IP6_SPORT) {
+			u32 src = ntohs(pptr->src);
+			if (FWINV(src < info->sport[0] ||
+				  src > info->sport[1], EBT_IP6_SPORT))
+			return EBT_NOMATCH;
+		}
+		return EBT_MATCH;
+	}
+	return EBT_MATCH;
+}
+
+static int ebt_ip6_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info)))
+		return -EINVAL;
+	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
+		return -EINVAL;
+	if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
+		return -EINVAL;
+	if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
+		if (info->invflags & EBT_IP6_PROTO)
+			return -EINVAL;
+		if (info->protocol != IPPROTO_TCP &&
+		    info->protocol != IPPROTO_UDP &&
+		    info->protocol != IPPROTO_UDPLITE &&
+		    info->protocol != IPPROTO_SCTP &&
+		    info->protocol != IPPROTO_DCCP)
+			 return -EINVAL;
+	}
+	if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
+		return -EINVAL;
+	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_match filter_ip6 =
+{
+	.name		= EBT_IP6_MATCH,
+	.match		= ebt_filter_ip6,
+	.check		= ebt_ip6_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init ebt_ip6_init(void)
+{
+	return ebt_register_match(&filter_ip6);
+}
+
+static void __exit ebt_ip6_fini(void)
+{
+	ebt_unregister_match(&filter_ip6);
+}
+
+module_init(ebt_ip6_init);
+module_exit(ebt_ip6_fini);
+MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match");
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 0b209e4aad0..c883ec8a28b 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -18,6 +18,9 @@
 #include <linux/if_arp.h>
 #include <linux/spinlock.h>
 #include <net/netfilter/nf_log.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/in6.h>
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
@@ -58,6 +61,27 @@ static void print_MAC(const unsigned char *p)
 		printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':');
 }
 
+static void
+print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
+{
+	if (protocol == IPPROTO_TCP ||
+	    protocol == IPPROTO_UDP ||
+	    protocol == IPPROTO_UDPLITE ||
+	    protocol == IPPROTO_SCTP ||
+	    protocol == IPPROTO_DCCP) {
+		const struct tcpudphdr *pptr;
+		struct tcpudphdr _ports;
+
+		pptr = skb_header_pointer(skb, offset,
+					  sizeof(_ports), &_ports);
+		if (pptr == NULL) {
+			printk(" INCOMPLETE TCP/UDP header");
+			return;
+		}
+		printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+	}
+}
+
 #define myNIPQUAD(a) a[0], a[1], a[2], a[3]
 static void
 ebt_log_packet(unsigned int pf, unsigned int hooknum,
@@ -95,23 +119,31 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
 		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP "
 		       "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr),
 		       NIPQUAD(ih->daddr), ih->tos, ih->protocol);
-		if (ih->protocol == IPPROTO_TCP ||
-		    ih->protocol == IPPROTO_UDP ||
-		    ih->protocol == IPPROTO_UDPLITE ||
-		    ih->protocol == IPPROTO_SCTP ||
-		    ih->protocol == IPPROTO_DCCP) {
-			const struct tcpudphdr *pptr;
-			struct tcpudphdr _ports;
-
-			pptr = skb_header_pointer(skb, ih->ihl*4,
-						  sizeof(_ports), &_ports);
-			if (pptr == NULL) {
-				printk(" INCOMPLETE TCP/UDP header");
-				goto out;
-			}
-			printk(" SPT=%u DPT=%u", ntohs(pptr->src),
-			   ntohs(pptr->dst));
+		print_ports(skb, ih->protocol, ih->ihl*4);
+		goto out;
+	}
+
+	if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto ==
+	   htons(ETH_P_IPV6)) {
+		const struct ipv6hdr *ih;
+		struct ipv6hdr _iph;
+		uint8_t nexthdr;
+		int offset_ph;
+
+		ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+		if (ih == NULL) {
+			printk(" INCOMPLETE IPv6 header");
+			goto out;
 		}
+		printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x "
+		       "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 "
+		       "priority=0x%01X, Next Header=%d", NIP6(ih->saddr),
+		       NIP6(ih->daddr), ih->priority, ih->nexthdr);
+		nexthdr = ih->nexthdr;
+		offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr);
+		if (offset_ph == -1)
+			goto out;
+		print_ports(skb, nexthdr, offset_ph);
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 0adf9d67489cd30bab8eb93f7de81a674e44e1c3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Jun 2008 15:56:20 -0700
Subject: netfilter: ctnetlink: group errors into logical errno sets

This patch groups ctnetlink errors into three logical sets:

* Malformed messages: if ctnetlink receives a message without some mandatory
attribute, then it returns EINVAL.
* Unsupported operations: if userspace tries to perform an unsupported
operation, then it returns EOPNOTSUPP.
* Unchangeable: if userspace tries to change some attribute of the
conntrack object that can only be set once, then it returns EBUSY.

This patch reduces the number of -EINVAL from 23 to 14 and it results in
5 -EBUSY and 6 -EOPNOTSUPP.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0edefcfc594..13918c1fbf6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -4,7 +4,7 @@
  * (C) 2001 by Jay Schulist <jschlst@samba.org>
  * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
  * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * Initial connection tracking via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -891,20 +891,19 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 
 	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
 		/* unchangeable */
-		return -EINVAL;
+		return -EBUSY;
 
 	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
 		/* SEEN_REPLY bit can only be set */
-		return -EINVAL;
-
+		return -EBUSY;
 
 	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
 		/* ASSURED bit can only be set */
-		return -EINVAL;
+		return -EBUSY;
 
 	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
 #ifndef CONFIG_NF_NAT_NEEDED
-		return -EINVAL;
+		return -EOPNOTSUPP;
 #else
 		struct nf_nat_range range;
 
@@ -945,7 +944,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 
 	/* don't change helper of sibling connections */
 	if (ct->master)
-		return -EINVAL;
+		return -EBUSY;
 
 	err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
 	if (err < 0)
@@ -963,7 +962,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 
 	helper = __nf_conntrack_helper_find_byname(helpname);
 	if (helper == NULL)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	if (help) {
 		if (help->helper == helper)
@@ -1258,12 +1257,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
 		/* we only allow nat config for new conntracks */
 		if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-			err = -EINVAL;
+			err = -EOPNOTSUPP;
 			goto out_unlock;
 		}
 		/* can't link an existing conntrack to a master */
 		if (cda[CTA_TUPLE_MASTER]) {
-			err = -EINVAL;
+			err = -EOPNOTSUPP;
 			goto out_unlock;
 		}
 		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
@@ -1608,7 +1607,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		h = __nf_conntrack_helper_find_byname(name);
 		if (!h) {
 			spin_unlock_bh(&nf_conntrack_lock);
-			return -EINVAL;
+			return -EOPNOTSUPP;
 		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-- 
cgit v1.2.3-70-g09d2


From a258860e01b80e8f554a4ab1a6c95e6042eb8b73 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Jun 2008 15:56:39 -0700
Subject: netfilter: ctnetlink: add full support for SCTP to ctnetlink

This patch adds full support for SCTP to ctnetlink. This includes three
new attributes: state, original vtag and reply vtag.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 10 ++++
 net/netfilter/nf_conntrack_proto_sctp.c       | 80 +++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

(limited to 'net')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 0a383ac083c..759bc043dc6 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -81,6 +81,7 @@ enum ctattr_protoinfo {
 	CTA_PROTOINFO_UNSPEC,
 	CTA_PROTOINFO_TCP,
 	CTA_PROTOINFO_DCCP,
+	CTA_PROTOINFO_SCTP,
 	__CTA_PROTOINFO_MAX
 };
 #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
@@ -103,6 +104,15 @@ enum ctattr_protoinfo_dccp {
 };
 #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1)
 
+enum ctattr_protoinfo_sctp {
+	CTA_PROTOINFO_SCTP_UNSPEC,
+	CTA_PROTOINFO_SCTP_STATE,
+	CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
+	CTA_PROTOINFO_SCTP_VTAG_REPLY,
+	__CTA_PROTOINFO_SCTP_MAX
+};
+#define CTA_PROTOINFO_SCTP_MAX (__CTA_PROTOINFO_SCTP_MAX - 1)
+
 enum ctattr_counters {
 	CTA_COUNTERS_UNSPEC,
 	CTA_COUNTERS_PACKETS,		/* old 64bit counters */
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index cbf2e27a22b..41183a4d2d6 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -463,6 +463,82 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
+			  const struct nf_conn *ct)
+{
+	struct nlattr *nest_parms;
+
+	read_lock_bh(&sctp_lock);
+	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state);
+
+	NLA_PUT_BE32(skb,
+		     CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
+		     htonl(ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]));
+
+	NLA_PUT_BE32(skb,
+		     CTA_PROTOINFO_SCTP_VTAG_REPLY,
+		     htonl(ct->proto.sctp.vtag[IP_CT_DIR_REPLY]));
+
+	read_unlock_bh(&sctp_lock);
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	read_unlock_bh(&sctp_lock);
+	return -1;
+}
+
+static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
+	[CTA_PROTOINFO_SCTP_STATE]	    = { .type = NLA_U8 },
+	[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]  = { .type = NLA_U32 },
+	[CTA_PROTOINFO_SCTP_VTAG_REPLY]     = { .type = NLA_U32 },
+};
+
+static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
+{
+	struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
+	struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1];
+	int err;
+
+	/* updates may not contain the internal protocol info, skip parsing */
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested(tb,
+			       CTA_PROTOINFO_SCTP_MAX,
+			       attr,
+			       sctp_nla_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_PROTOINFO_SCTP_STATE] ||
+	    !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] ||
+	    !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
+		return -EINVAL;
+
+	write_lock_bh(&sctp_lock);
+	ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
+	ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
+		ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]));
+	ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
+		ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]));
+	write_unlock_bh(&sctp_lock);
+
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SYSCTL
 static unsigned int sctp_sysctl_table_users;
 static struct ctl_table_header *sctp_sysctl_header;
@@ -591,6 +667,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.new 			= sctp_new,
 	.me 			= THIS_MODULE,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nlattr		= sctp_to_nlattr,
+	.from_nlattr		= nlattr_to_sctp,
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
@@ -617,6 +695,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.new 			= sctp_new,
 	.me 			= THIS_MODULE,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nlattr		= sctp_to_nlattr,
+	.from_nlattr		= nlattr_to_sctp,
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
-- 
cgit v1.2.3-70-g09d2


From 560ee653b67074b805f1b661988a72a0e58811a5 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Mon, 9 Jun 2008 15:57:24 -0700
Subject: netfilter: ip_tables: add iptables security table for mandatory
 access control rules

The following patch implements a new "security" table for iptables, so
that MAC (SELinux etc.) networking rules can be managed separately to
standard DAC rules.

This is to help with distro integration of the new secmark-based
network controls, per various previous discussions.

The need for a separate table arises from the fact that existing tools
and usage of iptables will likely clash with centralized MAC policy
management.

The SECMARK and CONNSECMARK targets will still be valid in the mangle
table to prevent breakage of existing users.

Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h        |   1 +
 include/net/netns/ipv4.h              |   1 +
 net/ipv4/netfilter/Kconfig            |  12 +++
 net/ipv4/netfilter/Makefile           |   1 +
 net/ipv4/netfilter/iptable_security.c | 180 ++++++++++++++++++++++++++++++++++
 net/netfilter/xt_CONNSECMARK.c        |  10 +-
 net/netfilter/xt_SECMARK.c            |  10 +-
 7 files changed, 209 insertions(+), 6 deletions(-)
 create mode 100644 net/ipv4/netfilter/iptable_security.c

(limited to 'net')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 650318b0c40..29c7727ff0e 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -60,6 +60,7 @@ enum nf_ip_hook_priorities {
 	NF_IP_PRI_MANGLE = -150,
 	NF_IP_PRI_NAT_DST = -100,
 	NF_IP_PRI_FILTER = 0,
+	NF_IP_PRI_SECURITY = 50,
 	NF_IP_PRI_NAT_SRC = 100,
 	NF_IP_PRI_SELINUX_LAST = 225,
 	NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 34ee348a2cf..6ef90b5fafb 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_mangle;
 	struct xt_table		*iptable_raw;
 	struct xt_table		*arptable_filter;
+	struct xt_table		*iptable_security;
 #endif
 
 	int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2767841a8ce..6e251402506 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -365,6 +365,18 @@ config IP_NF_RAW
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+# security table for MAC policy
+config IP_NF_SECURITY
+	tristate "Security table"
+	depends on IP_NF_IPTABLES
+	depends on SECURITY
+	default m if NETFILTER_ADVANCED=n
+	help
+	  This option adds a `security' table to iptables, for use
+	  with Mandatory Access Control (MAC) policy.
+	 
+	  If unsure, say N.
+
 # ARP tables
 config IP_NF_ARPTABLES
 	tristate "ARP tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d9b92fbf557..3f31291f37c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
 obj-$(CONFIG_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
 # matches
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 00000000000..2b472ac2263
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,180 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS	(1 << NF_INET_LOCAL_IN) | \
+				(1 << NF_INET_FORWARD) | \
+				(1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+	struct ipt_replace repl;
+	struct ipt_standard entries[3];
+	struct ipt_error term;
+} initial_table __initdata = {
+	.repl = {
+		.name = "security",
+		.valid_hooks = SECURITY_VALID_HOOKS,
+		.num_entries = 4,
+		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+		.hook_entry = {
+			[NF_INET_LOCAL_IN] 	= 0,
+			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
+		},
+		.underflow = {
+			[NF_INET_LOCAL_IN] 	= 0,
+			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
+		},
+	},
+	.entries = {
+		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
+		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
+		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
+	},
+	.term = IPT_ERROR_INIT,			/* ERROR */
+};
+
+static struct xt_table security_table = {
+	.name		= "security",
+	.valid_hooks	= SECURITY_VALID_HOOKS,
+	.lock		= __RW_LOCK_UNLOCKED(security_table.lock),
+	.me		= THIS_MODULE,
+	.af		= AF_INET,
+};
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+		  struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  int (*okfn)(struct sk_buff *))
+{
+	return ipt_do_table(skb, hook, in, out,
+			    nf_local_in_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
+		 struct sk_buff *skb,
+		 const struct net_device *in,
+		 const struct net_device *out,
+		 int (*okfn)(struct sk_buff *))
+{
+	return ipt_do_table(skb, hook, in, out,
+			    nf_forward_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+		   struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	/* Somebody is playing with raw sockets. */
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk(KERN_INFO "iptable_security: ignoring short "
+			       "SOCK_RAW packet.\n");
+		return NF_ACCEPT;
+	}
+	return ipt_do_table(skb, hook, in, out,
+			    nf_local_out_net(in, out)->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+	{
+		.hook		= ipt_local_in_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_SECURITY,
+	},
+	{
+		.hook		= ipt_forward_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= NF_IP_PRI_SECURITY,
+	},
+	{
+		.hook		= ipt_local_out_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_SECURITY,
+	},
+};
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+	net->ipv4.iptable_security =
+		ipt_register_table(net, &security_table, &initial_table.repl);
+
+	if (IS_ERR(net->ipv4.iptable_security))
+		return PTR_ERR(net->ipv4.iptable_security);
+
+	return 0;
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+	ipt_unregister_table(net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+	.init = iptable_security_net_init,
+	.exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&iptable_security_net_ops);
+        if (ret < 0)
+		return ret;
+
+	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	if (ret < 0)
+		goto cleanup_table;
+
+	return ret;
+
+cleanup_table:
+	unregister_pernet_subsys(&iptable_security_net_ops);
+	return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 211189eb2b6..76ca1f2421e 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -8,7 +8,7 @@
  *   Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *    by Henrik Nordstrom <hno@marasystems.com>
  *
- * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -94,6 +94,12 @@ connsecmark_tg_check(const char *tablename, const void *entry,
 {
 	const struct xt_connsecmark_target_info *info = targinfo;
 
+	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
+		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		return false;
+	}
+
 	switch (info->mode) {
 	case CONNSECMARK_SAVE:
 	case CONNSECMARK_RESTORE:
@@ -126,7 +132,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = {
 		.destroy	= connsecmark_tg_destroy,
 		.target		= connsecmark_tg,
 		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 	{
@@ -136,7 +141,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = {
 		.destroy	= connsecmark_tg_destroy,
 		.target		= connsecmark_tg,
 		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c0284856ccd..94f87ee7552 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -5,7 +5,7 @@
  * Based on the nfmark match by:
  * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
  *
- * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -89,6 +89,12 @@ secmark_tg_check(const char *tablename, const void *entry,
 {
 	struct xt_secmark_target_info *info = targinfo;
 
+	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
+		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		return false;
+	}
+
 	if (mode && mode != info->mode) {
 		printk(KERN_INFO PFX "mode already set to %hu cannot mix with "
 		       "rules for mode %hu\n", mode, info->mode);
@@ -127,7 +133,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
 		.destroy	= secmark_tg_destroy,
 		.target		= secmark_tg,
 		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 	{
@@ -137,7 +142,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
 		.destroy	= secmark_tg_destroy,
 		.target		= secmark_tg,
 		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
 };
-- 
cgit v1.2.3-70-g09d2


From 17e6e59f0a1d7188d783c15dc3ccebd95a0840cd Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Mon, 9 Jun 2008 15:58:05 -0700
Subject: netfilter: ip6_tables: add ip6tables security table

This is a port of the IPv4 security table for IPv6.

Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6.h         |   1 +
 include/net/netns/ipv6.h               |   1 +
 net/ipv6/netfilter/Kconfig             |  12 +++
 net/ipv6/netfilter/Makefile            |   1 +
 net/ipv6/netfilter/ip6table_security.c | 172 +++++++++++++++++++++++++++++++++
 5 files changed, 187 insertions(+)
 create mode 100644 net/ipv6/netfilter/ip6table_security.c

(limited to 'net')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 3475a65dae9..fd50988b83e 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -64,6 +64,7 @@ enum nf_ip6_hook_priorities {
 	NF_IP6_PRI_MANGLE = -150,
 	NF_IP6_PRI_NAT_DST = -100,
 	NF_IP6_PRI_FILTER = 0,
+	NF_IP6_PRI_SECURITY = 50,
 	NF_IP6_PRI_NAT_SRC = 100,
 	NF_IP6_PRI_SELINUX_LAST = 225,
 	NF_IP6_PRI_LAST = INT_MAX,
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index ac053be6c25..5bacd838e88 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -35,6 +35,7 @@ struct netns_ipv6 {
 	struct xt_table		*ip6table_filter;
 	struct xt_table		*ip6table_mangle;
 	struct xt_table		*ip6table_raw;
+	struct xt_table		*ip6table_security;
 #endif
 	struct rt6_info         *ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6cae5475737..689dec899c5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -208,5 +208,17 @@ config IP6_NF_RAW
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+# security table for MAC policy
+config IP6_NF_SECURITY
+       tristate "Security table"
+       depends on IP6_NF_IPTABLES
+       depends on SECURITY
+       default m if NETFILTER_ADVANCED=n
+       help
+         This option adds a `security' table to iptables, for use
+         with Mandatory Access Control (MAC) policy.
+        
+         If unsure, say N.
+
 endmenu
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fbf2c14ed88..3f17c948eef 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
 obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
+obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
new file mode 100644
index 00000000000..063a3d9c3c6
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -0,0 +1,172 @@
+/*
+ * "security" table for IPv6
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS	(1 << NF_INET_LOCAL_IN) | \
+				(1 << NF_INET_FORWARD) | \
+				(1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+	struct ip6t_replace repl;
+	struct ip6t_standard entries[3];
+	struct ip6t_error term;
+} initial_table __initdata = {
+	.repl = {
+		.name = "security",
+		.valid_hooks = SECURITY_VALID_HOOKS,
+		.num_entries = 4,
+		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
+		.hook_entry = {
+			[NF_INET_LOCAL_IN] 	= 0,
+			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
+		},
+		.underflow = {
+			[NF_INET_LOCAL_IN] 	= 0,
+			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
+			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
+		},
+	},
+	.entries = {
+		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
+		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
+		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
+	},
+	.term = IP6T_ERROR_INIT,		/* ERROR */
+};
+
+static struct xt_table security_table = {
+	.name		= "security",
+	.valid_hooks	= SECURITY_VALID_HOOKS,
+	.lock		= __RW_LOCK_UNLOCKED(security_table.lock),
+	.me		= THIS_MODULE,
+	.af		= AF_INET6,
+};
+
+static unsigned int
+ip6t_local_in_hook(unsigned int hook,
+		   struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(skb, hook, in, out,
+			     init_net.ipv6.ip6table_security);
+}
+
+static unsigned int
+ip6t_forward_hook(unsigned int hook,
+		  struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(skb, hook, in, out,
+			     init_net.ipv6.ip6table_security);
+}
+
+static unsigned int
+ip6t_local_out_hook(unsigned int hook,
+		    struct sk_buff *skb,
+		    const struct net_device *in,
+		    const struct net_device *out,
+		    int (*okfn)(struct sk_buff *))
+{
+	/* TBD: handle short packets via raw socket */
+	return ip6t_do_table(skb, hook, in, out,
+			     init_net.ipv6.ip6table_security);
+}
+
+static struct nf_hook_ops ip6t_ops[] __read_mostly = {
+	{
+		.hook		= ip6t_local_in_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_SECURITY,
+	},
+	{
+		.hook		= ip6t_forward_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= NF_IP6_PRI_SECURITY,
+	},
+	{
+		.hook		= ip6t_local_out_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_SECURITY,
+	},
+};
+
+static int __net_init ip6table_security_net_init(struct net *net)
+{
+	net->ipv6.ip6table_security =
+		ip6t_register_table(net, &security_table, &initial_table.repl);
+
+	if (IS_ERR(net->ipv6.ip6table_security))
+		return PTR_ERR(net->ipv6.ip6table_security);
+
+	return 0;
+}
+
+static void __net_exit ip6table_security_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net->ipv6.ip6table_security);
+}
+
+static struct pernet_operations ip6table_security_net_ops = {
+	.init = ip6table_security_net_init,
+	.exit = ip6table_security_net_exit,
+};
+
+static int __init ip6table_security_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&ip6table_security_net_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	if (ret < 0)
+		goto cleanup_table;
+
+	return ret;
+
+cleanup_table:
+	unregister_pernet_subsys(&ip6table_security_net_ops);
+	return ret;
+}
+
+static void __exit ip6table_security_fini(void)
+{
+	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	unregister_pernet_subsys(&ip6table_security_net_ops);
+}
+
+module_init(ip6table_security_init);
+module_exit(ip6table_security_fini);
-- 
cgit v1.2.3-70-g09d2


From 31d8519c9cf87e9d8a0cc5b9734fda02af66d7e2 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 9 Jun 2008 15:58:39 -0700
Subject: netfilter: nf_conntrack_extend: use krealloc() in
 nf_conntrack_extend.c V2

The ksize() API is going away because it is being abused and it doesn't even
work consistenly across different allocators. Therefore, convert
net/netfilter/nf_conntrack_extend.c to use krealloc().

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_extend.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bcc19fa4ed1..ba1c4915e9e 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -88,13 +88,11 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	newlen = newoff + t->len;
 	rcu_read_unlock();
 
-	if (newlen >= ksize(ct->ext)) {
-		new = kmalloc(newlen, gfp);
-		if (!new)
-			return NULL;
-
-		memcpy(new, ct->ext, ct->ext->len);
+	new = krealloc(ct->ext, newlen, gfp);
+	if (!new)
+		return NULL;
 
+	if (new != ct->ext) {
 		for (i = 0; i < NF_CT_EXT_NUM; i++) {
 			if (!nf_ct_ext_exist(ct, i))
 				continue;
-- 
cgit v1.2.3-70-g09d2


From 51091764f26ec36c02e35166f083193a30f426fc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Jun 2008 15:59:06 -0700
Subject: netfilter: nf_conntrack: add nf_ct_kill()

Encapsulate the common

	if (del_timer(&ct->timeout))
		ct->timeout.function((unsigned long)ct)

sequence in a new function.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack.h           | 2 ++
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 5 ++---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 ++---
 net/netfilter/nf_conntrack_core.c              | 7 +++++++
 net/netfilter/nf_conntrack_netlink.c           | 3 +--
 net/netfilter/nf_conntrack_proto_dccp.c        | 3 +--
 net/netfilter/nf_conntrack_proto_tcp.c         | 9 +++------
 7 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2dbd6c015b9..fc19ab23eea 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -223,6 +223,8 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
 	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
+extern void nf_ct_kill(struct nf_conn *ct);
+
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
 extern void nf_conntrack_tcp_update(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 78ab19accac..0e21a46184f 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct,
 	   means this will only run once even if count hits zero twice
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		if (atomic_dec_and_test(&ct->proto.icmp.count))
+			nf_ct_kill(ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ee713b03e9e..fe081b90e05 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -89,9 +89,8 @@ static int icmpv6_packet(struct nf_conn *ct,
 	   means this will only run once even if count hits zero twice
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		if (atomic_dec_and_test(&ct->proto.icmp.count))
+			nf_ct_kill(ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4b1799da5d..79b07c35eb8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -848,6 +848,13 @@ acct:
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
+void nf_ct_kill(struct nf_conn *ct)
+{
+	if (del_timer(&ct->timeout))
+		ct->timeout.function((unsigned long)ct);
+}
+EXPORT_SYMBOL_GPL(nf_ct_kill);
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 13918c1fbf6..ab655f660df 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -812,9 +812,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 		}
 	}
-	if (del_timer(&ct->timeout))
-		ct->timeout.function((unsigned long)ct);
 
+	nf_ct_kill(ct);
 	nf_ct_put(ct);
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index afb4a1861d2..223742f371f 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -475,8 +475,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	if (type == DCCP_PKT_RESET &&
 	    !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
 		/* Tear down connection immediately if only reply is a RESET */
-		if (del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
+		nf_ct_kill(ct);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ba94004fe32..c4aa11e0140 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -843,8 +843,7 @@ static int tcp_packet(struct nf_conn *ct,
 			/* Attempt to reopen a closed/aborted connection.
 			 * Delete this connection and look up again. */
 			write_unlock_bh(&tcp_lock);
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return -NF_REPEAT;
 		}
 		/* Fall through */
@@ -877,8 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
 			if (LOG_INVALID(IPPROTO_TCP))
 				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 					  "nf_ct_tcp: killing out of sync session ");
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return -NF_DROP;
 		}
 		ct->proto.tcp.last_index = index;
@@ -961,8 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   problem case, so we can delete the conntrack
 		   immediately.  --RR */
 		if (th->rst) {
-			if (del_timer(&ct->timeout))
-				ct->timeout.function((unsigned long)ct);
+			nf_ct_kill(ct);
 			return NF_ACCEPT;
 		}
 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
-- 
cgit v1.2.3-70-g09d2


From 718d4ad98e272daebc258e49dc02f52a6a8de9d3 Mon Sep 17 00:00:00 2001
From: Fabian Hugelshofer <hugelshofer2006@gmx.ch>
Date: Mon, 9 Jun 2008 15:59:40 -0700
Subject: netfilter: nf_conntrack: properly account terminating packets

Currently the last packet of a connection isn't accounted when its causing
abnormal termination.

Introduces nf_ct_kill_acct() which increments the accounting counters on
conntrack kill. The new function was necessary, because there are calls
to nf_ct_kill() which don't need accounting:

nf_conntrack_proto_tcp.c line ~847:
Kills ct and returns NF_REPEAT. We don't want to count twice.

nf_conntrack_proto_tcp.c line ~880:
Kills ct and returns NF_DROP. I think we don't want to count dropped
packets.

nf_conntrack_netlink.c line ~824:
As far as I can see ctnetlink_del_conntrack() is used to destroy a
conntrack on behalf of the user. There is an sk_buff, but I don't think
this is an actual packet. Incrementing counters here is therefore not
desired.

Signed-off-by: Fabian Hugelshofer <hugelshofer2006@gmx.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack.h           | 19 ++++++++++++++++++-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 16 ++++++++++++++--
 net/netfilter/nf_conntrack_proto_dccp.c        |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c         |  2 +-
 6 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index fc19ab23eea..d77dec768dc 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -223,7 +223,24 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
 	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
-extern void nf_ct_kill(struct nf_conn *ct);
+extern void __nf_ct_kill_acct(struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				const struct sk_buff *skb,
+				int do_acct);
+
+/* kill conntrack and do accounting */
+static inline void nf_ct_kill_acct(struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				const struct sk_buff *skb)
+{
+	__nf_ct_kill_acct(ct, ctinfo, skb, 1);
+}
+
+/* kill conntrack without accounting */
+static inline void nf_ct_kill(struct nf_conn *ct)
+{
+	__nf_ct_kill_acct(ct, 0, NULL, 0);
+}
 
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 0e21a46184f..97791048fa9 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -88,7 +88,7 @@ static int icmp_packet(struct nf_conn *ct,
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
 		if (atomic_dec_and_test(&ct->proto.icmp.count))
-			nf_ct_kill(ct);
+			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fe081b90e05..14d47d83354 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -90,7 +90,7 @@ static int icmpv6_packet(struct nf_conn *ct,
 	   (theoretically possible with SMP) */
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
 		if (atomic_dec_and_test(&ct->proto.icmp.count))
-			nf_ct_kill(ct);
+			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
 		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 79b07c35eb8..e6d645221d5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -848,12 +848,24 @@ acct:
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
-void nf_ct_kill(struct nf_conn *ct)
+void __nf_ct_kill_acct(struct nf_conn *ct,
+		enum ip_conntrack_info ctinfo,
+		const struct sk_buff *skb,
+		int do_acct)
 {
+#ifdef CONFIG_NF_CT_ACCT
+	if (do_acct) {
+		spin_lock_bh(&nf_conntrack_lock);
+		ct->counters[CTINFO2DIR(ctinfo)].packets++;
+		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+			skb->len - skb_network_offset(skb);
+		spin_unlock_bh(&nf_conntrack_lock);
+	}
+#endif
 	if (del_timer(&ct->timeout))
 		ct->timeout.function((unsigned long)ct);
 }
-EXPORT_SYMBOL_GPL(nf_ct_kill);
+EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 223742f371f..e7866dd3cde 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -475,7 +475,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	if (type == DCCP_PKT_RESET &&
 	    !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
 		/* Tear down connection immediately if only reply is a RESET */
-		nf_ct_kill(ct);
+		nf_ct_kill_acct(ct, ctinfo, skb);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c4aa11e0140..8db13fba10b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   problem case, so we can delete the conntrack
 		   immediately.  --RR */
 		if (th->rst) {
-			nf_ct_kill(ct);
+			nf_ct_kill_acct(ct, ctinfo, skb);
 			return NF_ACCEPT;
 		}
 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
-- 
cgit v1.2.3-70-g09d2


From e57dce60c7478fdeeb9a1ebd311261ec901afe4d Mon Sep 17 00:00:00 2001
From: Fabian Hugelshofer <hugelshofer2006@gmx.ch>
Date: Mon, 9 Jun 2008 15:59:58 -0700
Subject: netfilter: ctnetlink: include conntrack status in destroy event
 message

When a conntrack is destroyed, the connection status does not get
exported to netlink. I don't see a reason for not doing so. This patch
exports the status on all conntrack events.

Signed-off-by: Fabian Hugelshofer <hugelshofer2006@gmx.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ab655f660df..63c4e1f299b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -475,14 +475,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	if (ctnetlink_dump_id(skb, ct) < 0)
 		goto nla_put_failure;
 
+	if (ctnetlink_dump_status(skb, ct) < 0)
+		goto nla_put_failure;
+
 	if (events & IPCT_DESTROY) {
 		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 			goto nla_put_failure;
 	} else {
-		if (ctnetlink_dump_status(skb, ct) < 0)
-			goto nla_put_failure;
-
 		if (ctnetlink_dump_timeout(skb, ct) < 0)
 			goto nla_put_failure;
 
-- 
cgit v1.2.3-70-g09d2


From e64bda89b8fe81cce9b4a20885d2c204c2d52532 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Mon, 9 Jun 2008 16:00:45 -0700
Subject: netfilter: {ip,ip6,nfnetlink}_queue: misc cleanups

- No need to perform data_len = 0 in the switch command, since data_len
  is initialized to 0 in the beginning of the ipq_build_packet_message()
  method.

- {ip,ip6}_queue: We can reach nlmsg_failure only from one place; skb is
  sure to be NULL when getting there; since skb is NULL, there is no need
  to check this fact and call kfree_skb().

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c   | 3 ---
 net/ipv6/netfilter/ip6_queue.c  | 3 ---
 net/netfilter/nfnetlink_queue.c | 1 -
 3 files changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 26a37cedcf2..aa33a4a7a71 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
-		data_len = 0;
 		break;
 
 	case IPQ_COPY_PACKET:
@@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	return skb;
 
 nlmsg_failure:
-	if (skb)
-		kfree_skb(skb);
 	*errp = -EINVAL;
 	printk(KERN_ERR "ip_queue: error creating packet message\n");
 	return NULL;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 2eff3ae8977..1b8815f6153 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -159,7 +159,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
-		data_len = 0;
 		break;
 
 	case IPQ_COPY_PACKET:
@@ -226,8 +225,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	return skb;
 
 nlmsg_failure:
-	if (skb)
-		kfree_skb(skb);
 	*errp = -EINVAL;
 	printk(KERN_ERR "ip6_queue: error creating packet message\n");
 	return NULL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3447025ce06..04e9c965f8c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -243,7 +243,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	switch ((enum nfqnl_config_mode)queue->copy_mode) {
 	case NFQNL_COPY_META:
 	case NFQNL_COPY_NONE:
-		data_len = 0;
 		break;
 
 	case NFQNL_COPY_PACKET:
-- 
cgit v1.2.3-70-g09d2


From 9a727a250c676334efdcb71a5b2ad4603addda06 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 10 Jun 2008 13:31:23 -0400
Subject: net/mac80211/ieee80211_i.h: fix-up merge damage

These definitions were originally removed in "mac80211: remove channel
use statistics".

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 884be4d100f..b19bd16703b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -853,27 +853,6 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 
 /* ieee80211_ioctl.c */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
-
-/* Least common multiple of the used rates (in 100 kbps). This is used to
- * calculate rate_inv values for each rate so that only integers are needed. */
-#define CHAN_UTIL_RATE_LCM 95040
-/* 1 usec is 1/8 * (95040/10) = 1188 */
-#define CHAN_UTIL_PER_USEC 1188
-/* Amount of bits to shift the result right to scale the total utilization
- * to values that will not wrap around 32-bit integers. */
-#define CHAN_UTIL_SHIFT 9
-/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1):
- * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the
- * raw value with about 23 should give utilization in 10th of a percentage
- * (1/1000). However, utilization is only estimated and not all intervals
- * between frames etc. are calculated. 18 seems to give numbers that are closer
- * to the real maximum. */
-#define CHAN_UTIL_PER_10MS 18
-#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC)
-#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC)
-
-
-/* ieee80211_ioctl.c */
 int ieee80211_set_freq(struct net_device *dev, int freq);
 
 /* ieee80211_sta.c */
-- 
cgit v1.2.3-70-g09d2


From 573bf470e693f73a6ac437b17a64a10902ba54bf Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 10 Jun 2008 15:40:04 -0700
Subject: ipv4 addr: Send netlink notification for address label changes

Makes people happy who try to keep a list of addresses up to date by
listening to notifications.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 79a7ef6209f..61011e1d580 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 		if (named++ == 0)
-			continue;
+			goto skip;
 		dot = strchr(old, ':');
 		if (dot == NULL) {
 			sprintf(old, ":%d", named);
@@ -1024,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
 		} else {
 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
 		}
+skip:
+		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9cba632e24ef18e2905c18997a8f24e8d7a29e71 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 23 Apr 2008 14:34:00 +0300
Subject: ipv6 mcast: Remove unused macro (MLDV2_QQIC) from mcast.c.

This patch removes  MLDV2_QQIC macro from mcast.c
as it is unused.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/mcast.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd632dd7f98..ee30ec4c3a6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -164,7 +164,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 	((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
 	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
 
-#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
 #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
 
 #define IPV6_MLD_MAX_MSF	64
-- 
cgit v1.2.3-70-g09d2


From 7d120c55df02a2b87f4aa317f1f04e398398dcdc Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 23 Apr 2008 14:35:13 +0300
Subject: ipv6 mroute: Use MRT6_VERSION instead of MRT_VERSION in ip6mr.c.

MRT6_VERSION should be used instead of MRT_VERSION in ip6mr.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bf268b38696..0b11b378d89 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1240,7 +1240,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 
 #endif
 	/*
-	 *	Spurious command, or MRT_VERSION which you cannot
+	 *	Spurious command, or MRT6_VERSION which you cannot
 	 *	set.
 	 */
 	default:
-- 
cgit v1.2.3-70-g09d2


From 2b5ead46442d80928cce987ae6acf3fe99968ad8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 13 May 2008 01:16:24 +0900
Subject: ipv6 addrconf: Introduce addrconf_is_prefix_route() helper.

This inline function, for readability, returns if the route
is a "prefix" route regardless if it was installed by RA or by
hand.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrconf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 147588f4c7c..9ea4e62741e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -231,6 +231,12 @@ static inline int addrconf_qdisc_ok(struct net_device *dev)
 	return (dev->qdisc != &noop_qdisc);
 }
 
+/* Check if a route is valid prefix route */
+static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
+{
+	return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0);
+}
+
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 {
 	if (del_timer(&ifp->timer))
@@ -777,7 +783,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
 		rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
 
-		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+		if (rt && addrconf_is_prefix_route(rt)) {
 			if (onlink == 0) {
 				ip6_del_rt(rt);
 				rt = NULL;
@@ -1788,7 +1794,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 		rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
 				dev->ifindex, 1);
 
-		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+		if (rt && addrconf_is_prefix_route(rt)) {
 			/* Autoconf prefix route */
 			if (valid_lft == 0) {
 				ip6_del_rt(rt);
-- 
cgit v1.2.3-70-g09d2


From 3de232554a91adc74e80dc15c304be806bd7e1f9 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 28 May 2008 14:51:24 +0200
Subject: ipv6 netns: Address labels per namespace

This pacth makes IPv6 address labels per network namespace.
It keeps the global label tables, ip6addrlbl_table, but
adds a 'net' member to each ip6addrlbl_entry.
This new member is taken into account when matching labels.

Changelog
=========
* v1: Initial version
* v2:
  * Minize the penalty when network namespaces are not configured:
      *  the 'net' member is added only if CONFIG_NET_NS is
         defined. This saves space when network namespaces are not
         configured.
      * 'net' value is retrieved with the inlined function
         ip6addrlbl_net() that always return &init_net when
         CONFIG_NET_NS is not defined.
  * 'net' member in ip6addrlbl_entry renamed to the less generic
    'lbl_net' name (helps code search).

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/addrconf.h |   3 +-
 net/ipv6/addrconf.c    |  12 +++---
 net/ipv6/addrlabel.c   | 106 ++++++++++++++++++++++++++++++++++---------------
 3 files changed, 84 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index bbd3d583c6e..06b28142b3a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -121,7 +121,8 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
  */
 extern int			ipv6_addr_label_init(void);
 extern void			ipv6_addr_label_rtnl_register(void);
-extern u32			ipv6_addr_label(const struct in6_addr *addr,
+extern u32			ipv6_addr_label(struct net *net,
+						const struct in6_addr *addr,
 						int type, int ifindex);
 
 /*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ea4e62741e..fa43374e85c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -964,7 +964,8 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
-static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score,
+static int ipv6_get_saddr_eval(struct net *net,
+			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
 			       int i)
 {
@@ -1043,7 +1044,8 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score,
 		break;
 	case IPV6_SADDR_RULE_LABEL:
 		/* Rule 6: Prefer matching label */
-		ret = ipv6_addr_label(&score->ifa->addr, score->addr_type,
+		ret = ipv6_addr_label(net,
+				      &score->ifa->addr, score->addr_type,
 				      score->ifa->idev->dev->ifindex) == dst->label;
 		break;
 #ifdef CONFIG_IPV6_PRIVACY
@@ -1097,7 +1099,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev,
 	dst.addr = daddr;
 	dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
 	dst.scope = __ipv6_addr_src_scope(dst_type);
-	dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex);
+	dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
 	dst.prefs = prefs;
 
 	hiscore->rule = -1;
@@ -1165,8 +1167,8 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev,
 			for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
 				int minihiscore, miniscore;
 
-				minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i);
-				miniscore = ipv6_get_saddr_eval(score, &dst, i);
+				minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
+				miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
 
 				if (minihiscore > miniscore) {
 					if (i == IPV6_SADDR_RULE_SCOPE &&
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 9bfa8846f26..08909039d87 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -29,6 +29,9 @@
  */
 struct ip6addrlbl_entry
 {
+#ifdef CONFIG_NET_NS
+	struct net *lbl_net;
+#endif
 	struct in6_addr prefix;
 	int prefixlen;
 	int ifindex;
@@ -46,6 +49,16 @@ static struct ip6addrlbl_table
 	u32 seq;
 } ip6addrlbl_table;
 
+static inline
+struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
+{
+#ifdef CONFIG_NET_NS
+	return lbl->lbl_net;
+#else
+	return &init_net;
+#endif
+}
+
 /*
  * Default policy table (RFC3484 + extensions)
  *
@@ -65,7 +78,7 @@ static struct ip6addrlbl_table
 
 #define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL
 
-static const __initdata struct ip6addrlbl_init_table
+static const __net_initdata struct ip6addrlbl_init_table
 {
 	const struct in6_addr *prefix;
 	int prefixlen;
@@ -108,6 +121,9 @@ static const __initdata struct ip6addrlbl_init_table
 /* Object management */
 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
 {
+#ifdef CONFIG_NET_NS
+	release_net(p->lbl_net);
+#endif
 	kfree(p);
 }
 
@@ -128,10 +144,13 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
 }
 
 /* Find label */
-static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
+static int __ip6addrlbl_match(struct net *net,
+			      struct ip6addrlbl_entry *p,
 			      const struct in6_addr *addr,
 			      int addrtype, int ifindex)
 {
+	if (!net_eq(ip6addrlbl_net(p), net))
+		return 0;
 	if (p->ifindex && p->ifindex != ifindex)
 		return 0;
 	if (p->addrtype && p->addrtype != addrtype)
@@ -141,19 +160,21 @@ static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
 	return 1;
 }
 
-static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
+static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
+						  const struct in6_addr *addr,
 						  int type, int ifindex)
 {
 	struct hlist_node *pos;
 	struct ip6addrlbl_entry *p;
 	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
-		if (__ip6addrlbl_match(p, addr, type, ifindex))
+		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
 			return p;
 	}
 	return NULL;
 }
 
-u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
+u32 ipv6_addr_label(struct net *net,
+		    const struct in6_addr *addr, int type, int ifindex)
 {
 	u32 label;
 	struct ip6addrlbl_entry *p;
@@ -161,7 +182,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
 	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
 
 	rcu_read_lock();
-	p = __ipv6_addr_label(addr, type, ifindex);
+	p = __ipv6_addr_label(net, addr, type, ifindex);
 	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
 	rcu_read_unlock();
 
@@ -174,7 +195,8 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
 }
 
 /* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
+						 const struct in6_addr *prefix,
 						 int prefixlen, int ifindex,
 						 u32 label)
 {
@@ -216,6 +238,9 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
 	newp->addrtype = addrtype;
 	newp->label = label;
 	INIT_HLIST_NODE(&newp->list);
+#ifdef CONFIG_NET_NS
+	newp->lbl_net = hold_net(net);
+#endif
 	atomic_set(&newp->refcnt, 1);
 	return newp;
 }
@@ -237,6 +262,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 		hlist_for_each_entry_safe(p, pos, n,
 					  &ip6addrlbl_table.head, list) {
 			if (p->prefixlen == newp->prefixlen &&
+			    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
 			    p->ifindex == newp->ifindex &&
 			    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
 				if (!replace) {
@@ -261,7 +287,8 @@ out:
 }
 
 /* add a label */
-static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
+static int ip6addrlbl_add(struct net *net,
+			  const struct in6_addr *prefix, int prefixlen,
 			  int ifindex, u32 label, int replace)
 {
 	struct ip6addrlbl_entry *newp;
@@ -274,7 +301,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
 			(unsigned int)label,
 			replace);
 
-	newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
+	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
 	if (IS_ERR(newp))
 		return PTR_ERR(newp);
 	spin_lock(&ip6addrlbl_table.lock);
@@ -286,7 +313,8 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
 }
 
 /* remove a label */
-static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+static int __ip6addrlbl_del(struct net *net,
+			    const struct in6_addr *prefix, int prefixlen,
 			    int ifindex)
 {
 	struct ip6addrlbl_entry *p = NULL;
@@ -300,6 +328,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
 
 	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
 		if (p->prefixlen == prefixlen &&
+		    net_eq(ip6addrlbl_net(p), net) &&
 		    p->ifindex == ifindex &&
 		    ipv6_addr_equal(&p->prefix, prefix)) {
 			hlist_del_rcu(&p->list);
@@ -311,7 +340,8 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
 	return ret;
 }
 
-static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+static int ip6addrlbl_del(struct net *net,
+			  const struct in6_addr *prefix, int prefixlen,
 			  int ifindex)
 {
 	struct in6_addr prefix_buf;
@@ -324,13 +354,13 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
 
 	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
 	spin_lock(&ip6addrlbl_table.lock);
-	ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
+	ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
 	spin_unlock(&ip6addrlbl_table.lock);
 	return ret;
 }
 
 /* add default label */
-static __init int ip6addrlbl_init(void)
+static int __net_init ip6addrlbl_net_init(struct net *net)
 {
 	int err = 0;
 	int i;
@@ -338,7 +368,8 @@ static __init int ip6addrlbl_init(void)
 	ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
 
 	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
-		int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
+		int ret = ip6addrlbl_add(net,
+					 ip6addrlbl_init_table[i].prefix,
 					 ip6addrlbl_init_table[i].prefixlen,
 					 0,
 					 ip6addrlbl_init_table[i].label, 0);
@@ -349,11 +380,32 @@ static __init int ip6addrlbl_init(void)
 	return err;
 }
 
+static void __net_exit ip6addrlbl_net_exit(struct net *net)
+{
+	struct ip6addrlbl_entry *p = NULL;
+	struct hlist_node *pos, *n;
+
+	/* Remove all labels belonging to the exiting net */
+	spin_lock(&ip6addrlbl_table.lock);
+	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+		if (net_eq(ip6addrlbl_net(p), net)) {
+			hlist_del_rcu(&p->list);
+			ip6addrlbl_put(p);
+		}
+	}
+	spin_unlock(&ip6addrlbl_table.lock);
+}
+
+static struct pernet_operations ipv6_addr_label_ops = {
+	.init = ip6addrlbl_net_init,
+	.exit = ip6addrlbl_net_exit,
+};
+
 int __init ipv6_addr_label_init(void)
 {
 	spin_lock_init(&ip6addrlbl_table.lock);
 
-	return ip6addrlbl_init();
+	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
 static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
@@ -371,9 +423,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 label;
 	int err = 0;
 
-	if (net != &init_net)
-		return 0;
-
 	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 	if (err < 0)
 		return err;
@@ -385,7 +434,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	if (ifal->ifal_index &&
-	    !__dev_get_by_index(&init_net, ifal->ifal_index))
+	    !__dev_get_by_index(net, ifal->ifal_index))
 		return -EINVAL;
 
 	if (!tb[IFAL_ADDRESS])
@@ -403,12 +452,12 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	switch(nlh->nlmsg_type) {
 	case RTM_NEWADDRLABEL:
-		err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
+		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
 				     ifal->ifal_index, label,
 				     nlh->nlmsg_flags & NLM_F_REPLACE);
 		break;
 	case RTM_DELADDRLABEL:
-		err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
+		err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
 				     ifal->ifal_index);
 		break;
 	default:
@@ -458,12 +507,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0, s_idx = cb->args[0];
 	int err;
 
-	if (net != &init_net)
-		return 0;
-
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
-		if (idx >= s_idx) {
+		if (idx >= s_idx &&
+		    net_eq(ip6addrlbl_net(p), net)) {
 			if ((err = ip6addrlbl_fill(skb, p,
 						   ip6addrlbl_table.seq,
 						   NETLINK_CB(cb->skb).pid,
@@ -499,9 +546,6 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	struct ip6addrlbl_entry *p;
 	struct sk_buff *skb;
 
-	if (net != &init_net)
-		return 0;
-
 	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 	if (err < 0)
 		return err;
@@ -513,7 +557,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		return -EINVAL;
 
 	if (ifal->ifal_index &&
-	    !__dev_get_by_index(&init_net, ifal->ifal_index))
+	    !__dev_get_by_index(net, ifal->ifal_index))
 		return -EINVAL;
 
 	if (!tb[IFAL_ADDRESS])
@@ -524,7 +568,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		return -EINVAL;
 
 	rcu_read_lock();
-	p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
+	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
 	if (p && ip6addrlbl_hold(p))
 		p = NULL;
 	lseq = ip6addrlbl_table.seq;
@@ -552,7 +596,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		goto out;
 	}
 
-	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
 out:
 	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From 9e8b4ed8bb7d2f0f91dccf0abf648771d76e7a01 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Apr 2008 00:50:45 +0900
Subject: key: Introduce pfkey_sockaddr_len() for raw sockaddr{} length.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/key/af_key.c | 50 +++++++++++++++++++-------------------------------
 1 file changed, 19 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9bba7ac5fee..02c8aba4239 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -579,6 +579,19 @@ static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
 	return (proto ? proto : IPSEC_PROTO_ANY);
 }
 
+static inline int pfkey_sockaddr_len(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return sizeof(struct sockaddr_in);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return sizeof(struct sockaddr_in6);
+#endif
+	}
+	return 0;
+}
+
 static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr,
 				     xfrm_address_t *xaddr)
 {
@@ -642,20 +655,11 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **
 }
 
 #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
+
 static int
 pfkey_sockaddr_size(sa_family_t family)
 {
-	switch (family) {
-	case AF_INET:
-		return PFKEY_ALIGN8(sizeof(struct sockaddr_in));
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6));
-#endif
-	default:
-		return 0;
-	}
-	/* NOTREACHED */
+	return PFKEY_ALIGN8(pfkey_sockaddr_len(family));
 }
 
 static inline int pfkey_mode_from_xfrm(int mode)
@@ -1952,9 +1956,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 
 	for (i=0; i<xp->xfrm_nr; i++) {
 		t = xp->xfrm_vec + i;
-		socklen += (t->encap_family == AF_INET ?
-			    sizeof(struct sockaddr_in) :
-			    sizeof(struct sockaddr_in6));
+		socklen += pfkey_sockaddr_len(t->encap_family);
 	}
 
 	return sizeof(struct sadb_msg) +
@@ -1996,9 +1998,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	int i;
 	int size;
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
-	int socklen = (xp->family == AF_INET ?
-		       sizeof(struct sockaddr_in) :
-		       sizeof(struct sockaddr_in6));
+	int socklen = pfkey_sockaddr_len(xp->family);
 
 	size = pfkey_xfrm_policy2msg_size(xp);
 
@@ -2122,9 +2122,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
 		if (t->mode == XFRM_MODE_TUNNEL)
-			req_size += ((t->encap_family == AF_INET ?
-				     sizeof(struct sockaddr_in) :
-				     sizeof(struct sockaddr_in6)) * 2);
+			req_size += pfkey_sockaddr_len(t->encap_family) * 2;
 		else
 			size -= 2*socklen;
 		rq = (void*)skb_put(skb, req_size);
@@ -2459,17 +2457,7 @@ out:
 #ifdef CONFIG_NET_KEY_MIGRATE
 static int pfkey_sockaddr_pair_size(sa_family_t family)
 {
-	switch (family) {
-	case AF_INET:
-		return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2);
-#endif
-	default:
-		return 0;
-	}
-	/* NOTREACHED */
+	return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2);
 }
 
 static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq,
-- 
cgit v1.2.3-70-g09d2


From e5b56652c11baf144eb713c33ff70a1b5d6d09bc Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Apr 2008 01:46:41 +0900
Subject: key: Share common code path to fill sockaddr{}.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/key/af_key.c | 458 ++++++++++++++-----------------------------------------
 1 file changed, 111 insertions(+), 347 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 02c8aba4239..2b05cb6c15c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -691,6 +691,36 @@ static inline int pfkey_mode_to_xfrm(int mode)
 	}
 }
 
+static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
+				       struct sockaddr *sa,
+				       unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+	    {
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+		sin->sin_family = AF_INET;
+		sin->sin_port = port;
+		sin->sin_addr.s_addr = xaddr->a4;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		return 32;
+	    }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+	    {
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = port;
+		sin6->sin6_flowinfo = 0;
+		ipv6_addr_copy(&sin6->sin6_addr, (struct in6_addr *)xaddr->a6);
+		sin6->sin6_scope_id = 0;
+		return 128;
+	    }
+#endif
+	}
+	return 0;
+}
+
 static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 					      int add_keys, int hsc)
 {
@@ -701,13 +731,9 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 	struct sadb_address *addr;
 	struct sadb_key *key;
 	struct sadb_x_sa2 *sa2;
-	struct sockaddr_in *sin;
 	struct sadb_x_sec_ctx *sec_ctx;
 	struct xfrm_sec_ctx *xfrm_ctx;
 	int ctx_size = 0;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
 	int size;
 	int auth_key_size = 0;
 	int encrypt_key_size = 0;
@@ -865,29 +891,12 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 	   protocol's number." - RFC2367 */
 	addr->sadb_address_proto = 0;
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		addr->sadb_address_prefixlen = 32;
 
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = x->props.saddr.a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
-
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr, x->props.saddr.a6,
-		       sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(&x->props.saddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
 		BUG();
 
 	/* dst address */
@@ -898,70 +907,32 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 			sizeof(uint64_t);
 	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
 	addr->sadb_address_proto = 0;
-	addr->sadb_address_prefixlen = 32; /* XXX */
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = x->id.daddr.a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 
-		if (x->sel.saddr.a4 != x->props.saddr.a4) {
-			addr = (struct sadb_address*) skb_put(skb,
-				sizeof(struct sadb_address)+sockaddr_size);
-			addr->sadb_address_len =
-				(sizeof(struct sadb_address)+sockaddr_size)/
-				sizeof(uint64_t);
-			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
-			addr->sadb_address_proto =
-				pfkey_proto_from_xfrm(x->sel.proto);
-			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
-			addr->sadb_address_reserved = 0;
-
-			sin = (struct sockaddr_in *) (addr + 1);
-			sin->sin_family = AF_INET;
-			sin->sin_addr.s_addr = x->sel.saddr.a4;
-			sin->sin_port = x->sel.sport;
-			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-		}
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(&x->id.daddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
+		BUG();
 
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
+	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr,
+			  x->props.family)) {
+		addr = (struct sadb_address*) skb_put(skb,
+			sizeof(struct sadb_address)+sockaddr_size);
+		addr->sadb_address_len =
+			(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+		addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+		addr->sadb_address_proto =
+			pfkey_proto_from_xfrm(x->sel.proto);
+		addr->sadb_address_prefixlen = x->sel.prefixlen_s;
+		addr->sadb_address_reserved = 0;
 
-		if (memcmp (x->sel.saddr.a6, x->props.saddr.a6,
-			    sizeof(struct in6_addr))) {
-			addr = (struct sadb_address *) skb_put(skb,
-				sizeof(struct sadb_address)+sockaddr_size);
-			addr->sadb_address_len =
-				(sizeof(struct sadb_address)+sockaddr_size)/
-				sizeof(uint64_t);
-			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
-			addr->sadb_address_proto =
-				pfkey_proto_from_xfrm(x->sel.proto);
-			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
-			addr->sadb_address_reserved = 0;
-
-			sin6 = (struct sockaddr_in6 *) (addr + 1);
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_port = x->sel.sport;
-			sin6->sin6_flowinfo = 0;
-			memcpy(&sin6->sin6_addr, x->sel.saddr.a6,
-			       sizeof(struct in6_addr));
-			sin6->sin6_scope_id = 0;
-		}
+		pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
 	}
-#endif
-	else
-		BUG();
 
 	/* auth key */
 	if (add_keys && auth_key_size) {
@@ -1989,12 +1960,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	struct sadb_address *addr;
 	struct sadb_lifetime *lifetime;
 	struct sadb_x_policy *pol;
-	struct sockaddr_in   *sin;
 	struct sadb_x_sec_ctx *sec_ctx;
 	struct xfrm_sec_ctx *xfrm_ctx;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6  *sin6;
-#endif
 	int i;
 	int size;
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
@@ -2016,26 +1983,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
 	addr->sadb_address_prefixlen = xp->selector.prefixlen_s;
 	addr->sadb_address_reserved = 0;
-	/* src address */
-	if (xp->family == AF_INET) {
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = xp->selector.saddr.a4;
-		sin->sin_port = xp->selector.sport;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (xp->family == AF_INET6) {
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = xp->selector.sport;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr, xp->selector.saddr.a6,
-		       sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	if (!pfkey_sockaddr_fill(&xp->selector.saddr,
+				 xp->selector.sport,
+				 (struct sockaddr *) (addr + 1),
+				 xp->family))
 		BUG();
 
 	/* dst address */
@@ -2048,26 +1999,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
 	addr->sadb_address_prefixlen = xp->selector.prefixlen_d;
 	addr->sadb_address_reserved = 0;
-	if (xp->family == AF_INET) {
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = xp->selector.daddr.a4;
-		sin->sin_port = xp->selector.dport;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (xp->family == AF_INET6) {
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = xp->selector.dport;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr, xp->selector.daddr.a6,
-		       sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
-		BUG();
+
+	pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport,
+			    (struct sockaddr *) (addr + 1),
+			    xp->family);
 
 	/* hard time */
 	lifetime = (struct sadb_lifetime *)  skb_put(skb,
@@ -2121,10 +2056,13 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 		int mode;
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
-		if (t->mode == XFRM_MODE_TUNNEL)
-			req_size += pfkey_sockaddr_len(t->encap_family) * 2;
-		else
+		if (t->mode == XFRM_MODE_TUNNEL) {
+			socklen = pfkey_sockaddr_len(t->encap_family);
+			req_size += socklen * 2;
+		} else {
 			size -= 2*socklen;
+			socklen = 0;
+		}
 		rq = (void*)skb_put(skb, req_size);
 		pol->sadb_x_policy_len += req_size/8;
 		memset(rq, 0, sizeof(*rq));
@@ -2139,42 +2077,15 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 		if (t->optional)
 			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
 		rq->sadb_x_ipsecrequest_reqid = t->reqid;
+
 		if (t->mode == XFRM_MODE_TUNNEL) {
-			switch (t->encap_family) {
-			case AF_INET:
-				sin = (void*)(rq+1);
-				sin->sin_family = AF_INET;
-				sin->sin_addr.s_addr = t->saddr.a4;
-				sin->sin_port = 0;
-				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-				sin++;
-				sin->sin_family = AF_INET;
-				sin->sin_addr.s_addr = t->id.daddr.a4;
-				sin->sin_port = 0;
-				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-				break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-			case AF_INET6:
-				sin6 = (void*)(rq+1);
-				sin6->sin6_family = AF_INET6;
-				sin6->sin6_port = 0;
-				sin6->sin6_flowinfo = 0;
-				memcpy(&sin6->sin6_addr, t->saddr.a6,
-				       sizeof(struct in6_addr));
-				sin6->sin6_scope_id = 0;
-
-				sin6++;
-				sin6->sin6_family = AF_INET6;
-				sin6->sin6_port = 0;
-				sin6->sin6_flowinfo = 0;
-				memcpy(&sin6->sin6_addr, t->id.daddr.a6,
-				       sizeof(struct in6_addr));
-				sin6->sin6_scope_id = 0;
-				break;
-#endif
-			default:
-				break;
-			}
+			u8 *sa = (void *)(rq + 1);
+			pfkey_sockaddr_fill(&t->saddr, 0,
+					    (struct sockaddr *)sa,
+					    t->encap_family);
+			pfkey_sockaddr_fill(&t->id.daddr, 0,
+					    (struct sockaddr *) (sa + socklen),
+					    t->encap_family);
 		}
 	}
 
@@ -3079,10 +2990,6 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	struct sadb_msg *hdr;
 	struct sadb_address *addr;
 	struct sadb_x_policy *pol;
-	struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
 	int sockaddr_size;
 	int size;
 	struct sadb_x_sec_ctx *sec_ctx;
@@ -3131,29 +3038,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
 	addr->sadb_address_proto = 0;
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		addr->sadb_address_prefixlen = 32;
-
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = x->props.saddr.a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
-
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr,
-		       x->props.saddr.a6, sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(&x->props.saddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
 		BUG();
 
 	/* dst address */
@@ -3165,29 +3054,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
 	addr->sadb_address_proto = 0;
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		addr->sadb_address_prefixlen = 32;
-
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = x->id.daddr.a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
-
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr,
-		       x->id.daddr.a6, sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(&x->id.daddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
 		BUG();
 
 	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
@@ -3313,10 +3184,6 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	struct sadb_sa *sa;
 	struct sadb_address *addr;
 	struct sadb_x_nat_t_port *n_port;
-	struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
 	int sockaddr_size;
 	int size;
 	__u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0);
@@ -3380,29 +3247,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
 	addr->sadb_address_proto = 0;
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		addr->sadb_address_prefixlen = 32;
-
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = x->props.saddr.a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
-
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr,
-		       x->props.saddr.a6, sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(&x->props.saddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
 		BUG();
 
 	/* NAT_T_SPORT (old port) */
@@ -3421,28 +3270,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
 	addr->sadb_address_proto = 0;
 	addr->sadb_address_reserved = 0;
-	if (x->props.family == AF_INET) {
-		addr->sadb_address_prefixlen = 32;
-
-		sin = (struct sockaddr_in *) (addr + 1);
-		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = ipaddr->a4;
-		sin->sin_port = 0;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (x->props.family == AF_INET6) {
-		addr->sadb_address_prefixlen = 128;
-
-		sin6 = (struct sockaddr_in6 *) (addr + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr));
-		sin6->sin6_scope_id = 0;
-	}
-#endif
-	else
+	addr->sadb_address_prefixlen =
+		pfkey_sockaddr_fill(ipaddr, 0,
+				    (struct sockaddr *) (addr + 1),
+				    x->props.family);
+	if (!addr->sadb_address_prefixlen)
 		BUG();
 
 	/* NAT_T_DPORT (new port) */
@@ -3460,10 +3292,6 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
 			    struct xfrm_selector *sel)
 {
 	struct sadb_address *addr;
-	struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
 	addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
 	addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8;
 	addr->sadb_address_exttype = type;
@@ -3472,50 +3300,16 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
 
 	switch (type) {
 	case SADB_EXT_ADDRESS_SRC:
-		if (sel->family == AF_INET) {
-			addr->sadb_address_prefixlen = sel->prefixlen_s;
-			sin = (struct sockaddr_in *)(addr + 1);
-			sin->sin_family = AF_INET;
-			memcpy(&sin->sin_addr.s_addr, &sel->saddr,
-			       sizeof(sin->sin_addr.s_addr));
-			sin->sin_port = 0;
-			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		else if (sel->family == AF_INET6) {
-			addr->sadb_address_prefixlen = sel->prefixlen_s;
-			sin6 = (struct sockaddr_in6 *)(addr + 1);
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_port = 0;
-			sin6->sin6_flowinfo = 0;
-			sin6->sin6_scope_id = 0;
-			memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr,
-			       sizeof(sin6->sin6_addr.s6_addr));
-		}
-#endif
+		addr->sadb_address_prefixlen = sel->prefixlen_s;
+		pfkey_sockaddr_fill(&sel->saddr, 0,
+				    (struct sockaddr *)(addr + 1),
+				    sel->family);
 		break;
 	case SADB_EXT_ADDRESS_DST:
-		if (sel->family == AF_INET) {
-			addr->sadb_address_prefixlen = sel->prefixlen_d;
-			sin = (struct sockaddr_in *)(addr + 1);
-			sin->sin_family = AF_INET;
-			memcpy(&sin->sin_addr.s_addr, &sel->daddr,
-			       sizeof(sin->sin_addr.s_addr));
-			sin->sin_port = 0;
-			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		else if (sel->family == AF_INET6) {
-			addr->sadb_address_prefixlen = sel->prefixlen_d;
-			sin6 = (struct sockaddr_in6 *)(addr + 1);
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_port = 0;
-			sin6->sin6_flowinfo = 0;
-			sin6->sin6_scope_id = 0;
-			memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr,
-			       sizeof(sin6->sin6_addr.s6_addr));
-		}
-#endif
+		addr->sadb_address_prefixlen = sel->prefixlen_d;
+		pfkey_sockaddr_fill(&sel->daddr, 0,
+				    (struct sockaddr *)(addr + 1),
+				    sel->family);
 		break;
 	default:
 		return -EINVAL;
@@ -3530,10 +3324,8 @@ static int set_ipsecrequest(struct sk_buff *skb,
 			    xfrm_address_t *src, xfrm_address_t *dst)
 {
 	struct sadb_x_ipsecrequest *rq;
-	struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
+	u8 *sa;
+	int socklen = pfkey_sockaddr_len(family);
 	int size_req;
 
 	size_req = sizeof(struct sadb_x_ipsecrequest) +
@@ -3547,38 +3339,10 @@ static int set_ipsecrequest(struct sk_buff *skb,
 	rq->sadb_x_ipsecrequest_level = level;
 	rq->sadb_x_ipsecrequest_reqid = reqid;
 
-	switch (family) {
-	case AF_INET:
-		sin = (struct sockaddr_in *)(rq + 1);
-		sin->sin_family = AF_INET;
-		memcpy(&sin->sin_addr.s_addr, src,
-		       sizeof(sin->sin_addr.s_addr));
-		sin++;
-		sin->sin_family = AF_INET;
-		memcpy(&sin->sin_addr.s_addr, dst,
-		       sizeof(sin->sin_addr.s_addr));
-		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-		sin6 = (struct sockaddr_in6 *)(rq + 1);
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
-		memcpy(&sin6->sin6_addr.s6_addr, src,
-		       sizeof(sin6->sin6_addr.s6_addr));
-		sin6++;
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = 0;
-		sin6->sin6_flowinfo = 0;
-		sin6->sin6_scope_id = 0;
-		memcpy(&sin6->sin6_addr.s6_addr, dst,
-		       sizeof(sin6->sin6_addr.s6_addr));
-		break;
-#endif
-	default:
+	sa = (u8 *) (rq + 1);
+	if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) ||
+	    !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family))
 		return -EINVAL;
-	}
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 5f95ac9111f75aa240dc3bcabffc0f047f13cb64 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Apr 2008 02:46:24 +0900
Subject: key: Share common code path to extract address from sockaddr{}.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/key/af_key.c | 107 ++++++++++++++++++++-----------------------------------
 1 file changed, 38 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 2b05cb6c15c..771bd61d630 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -592,25 +592,30 @@ static inline int pfkey_sockaddr_len(sa_family_t family)
 	return 0;
 }
 
-static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr,
-				     xfrm_address_t *xaddr)
+static
+int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
 {
-	switch (((struct sockaddr*)(addr + 1))->sa_family) {
+	switch (sa->sa_family) {
 	case AF_INET:
 		xaddr->a4 =
-			((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr;
+			((struct sockaddr_in *)sa)->sin_addr.s_addr;
 		return AF_INET;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
 		memcpy(xaddr->a6,
-		       &((struct sockaddr_in6 *)(addr + 1))->sin6_addr,
+		       &((struct sockaddr_in6 *)sa)->sin6_addr,
 		       sizeof(struct in6_addr));
 		return AF_INET6;
 #endif
-	default:
-		return 0;
 	}
-	/* NOTREACHED */
+	return 0;
+}
+
+static
+int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr)
+{
+	return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1),
+				      xaddr);
 }
 
 static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs)
@@ -1828,10 +1833,6 @@ static int
 parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 {
 	struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
-	struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
-#endif
 	int mode;
 
 	if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
@@ -1856,31 +1857,19 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 
 	/* addresses present only in tunnel mode */
 	if (t->mode == XFRM_MODE_TUNNEL) {
-		struct sockaddr *sa;
-		sa = (struct sockaddr *)(rq+1);
-		switch(sa->sa_family) {
-		case AF_INET:
-			sin = (struct sockaddr_in*)sa;
-			t->saddr.a4 = sin->sin_addr.s_addr;
-			sin++;
-			if (sin->sin_family != AF_INET)
-				return -EINVAL;
-			t->id.daddr.a4 = sin->sin_addr.s_addr;
-			break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		case AF_INET6:
-			sin6 = (struct sockaddr_in6*)sa;
-			memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
-			sin6++;
-			if (sin6->sin6_family != AF_INET6)
-				return -EINVAL;
-			memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
-			break;
-#endif
-		default:
+		u8 *sa = (u8 *) (rq + 1);
+		int family, socklen;
+
+		family = pfkey_sockaddr_extract((struct sockaddr *)sa,
+						&t->saddr);
+		if (!family)
 			return -EINVAL;
-		}
-		t->encap_family = sa->sa_family;
+
+		socklen = pfkey_sockaddr_len(family);
+		if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen),
+					   &t->id.daddr) != family)
+			return -EINVAL;
+		t->encap_family = family;
 	} else
 		t->encap_family = xp->family;
 
@@ -2375,44 +2364,24 @@ static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq,
 			       xfrm_address_t *saddr, xfrm_address_t *daddr,
 			       u16 *family)
 {
-	struct sockaddr *sa = (struct sockaddr *)(rq + 1);
+	u8 *sa = (u8 *) (rq + 1);
+	int af, socklen;
+
 	if (rq->sadb_x_ipsecrequest_len <
-	    pfkey_sockaddr_pair_size(sa->sa_family))
+	    pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family))
 		return -EINVAL;
 
-	switch (sa->sa_family) {
-	case AF_INET:
-		{
-			struct sockaddr_in *sin;
-			sin = (struct sockaddr_in *)sa;
-			if ((sin+1)->sin_family != AF_INET)
-				return -EINVAL;
-			memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4));
-			sin++;
-			memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4));
-			*family = AF_INET;
-			break;
-		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-		{
-			struct sockaddr_in6 *sin6;
-			sin6 = (struct sockaddr_in6 *)sa;
-			if ((sin6+1)->sin6_family != AF_INET6)
-				return -EINVAL;
-			memcpy(&saddr->a6, &sin6->sin6_addr,
-			       sizeof(saddr->a6));
-			sin6++;
-			memcpy(&daddr->a6, &sin6->sin6_addr,
-			       sizeof(daddr->a6));
-			*family = AF_INET6;
-			break;
-		}
-#endif
-	default:
+	af = pfkey_sockaddr_extract((struct sockaddr *) sa,
+				    saddr);
+	if (!af)
+		return -EINVAL;
+
+	socklen = pfkey_sockaddr_len(af);
+	if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen),
+				   daddr) != af)
 		return -EINVAL;
-	}
 
+	*family = af;
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 81b302a321a0d99ff172b8cb2a8de17bff2f9499 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Apr 2008 03:17:38 +0900
Subject: key: Use xfrm_addr_cmp() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/key/af_key.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 771bd61d630..841af9f2d5e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -767,14 +767,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 	}
 
 	/* identity & sensitivity */
-
-	if ((x->props.family == AF_INET &&
-	     x->sel.saddr.a4 != x->props.saddr.a4)
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	    || (x->props.family == AF_INET6 &&
-		memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr)))
-#endif
-		)
+	if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family))
 		size += sizeof(struct sadb_address) + sockaddr_size;
 
 	if (add_keys) {
-- 
cgit v1.2.3-70-g09d2


From 7d5d5525bd88313e6fd90c0659665aee5114bc2d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 17 Apr 2008 12:29:53 +0900
Subject: tcp md5sig: Share MD5 Signature option parser between IPv4 and IPv6.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/tcp.h    |  2 ++
 net/ipv4/tcp_input.c | 40 ++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c  | 42 +-----------------------------------------
 net/ipv6/tcp_ipv6.c  | 33 +--------------------------------
 4 files changed, 44 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 633147cb6bb..b1c4b78f1cb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -399,6 +399,8 @@ extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
 						  int estab);
 
+extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
+
 /*
  *	TCP v4 functions exported for the inet6 API
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b56..f331e67f232 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3450,6 +3450,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 	return 1;
 }
 
+#ifdef CONFIG_TCP_MD5SIG
+/*
+ * Parse MD5 Signature option
+ */
+u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+{
+	int length = (th->doff << 2) - sizeof (*th);
+	u8 *ptr = (u8*)(th + 1);
+
+	/* If the TCP option is too short, we can short cut */
+	if (length < TCPOLEN_MD5SIG)
+		return NULL;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch(opcode) {
+		case TCPOPT_EOL:
+			return NULL;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2 || opsize > length)
+				return NULL;
+			if (opcode == TCPOPT_MD5SIG)
+				return ptr;
+		}
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+	return NULL;
+}
+#endif
+
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
@@ -5467,6 +5504,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 EXPORT_SYMBOL(tcp_parse_options);
+#ifdef CONFIG_TCP_MD5SIG
+EXPORT_SYMBOL(tcp_parse_md5sig_option);
+#endif
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
 EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2..56f55093364 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1134,52 +1134,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	struct tcp_md5sig_key *hash_expected;
 	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
-	int length = (th->doff << 2) - sizeof(struct tcphdr);
 	int genhash;
-	unsigned char *ptr;
 	unsigned char newhash[16];
 
 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
 
-	/*
-	 * If the TCP option length is less than the TCP_MD5SIG
-	 * option length, then we can shortcut
-	 */
-	if (length < TCPOLEN_MD5SIG) {
-		if (hash_expected)
-			return 1;
-		else
-			return 0;
-	}
-
-	/* Okay, we can't shortcut - we have to grub through the options */
-	ptr = (unsigned char *)(th + 1);
-	while (length > 0) {
-		int opcode = *ptr++;
-		int opsize;
-
-		switch (opcode) {
-		case TCPOPT_EOL:
-			goto done_opts;
-		case TCPOPT_NOP:
-			length--;
-			continue;
-		default:
-			opsize = *ptr++;
-			if (opsize < 2)
-				goto done_opts;
-			if (opsize > length)
-				goto done_opts;
-
-			if (opcode == TCPOPT_MD5SIG) {
-				hash_location = ptr;
-				goto done_opts;
-			}
-		}
-		ptr += opsize-2;
-		length -= opsize;
-	}
-done_opts:
 	/* We've parsed the options - do we have a hash? */
 	if (!hash_expected && !hash_location)
 		return 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac..dd4ddb30a3a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -844,43 +844,12 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	struct tcp_md5sig_key *hash_expected;
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
-	int length = (th->doff << 2) - sizeof (*th);
 	int genhash;
-	u8 *ptr;
 	u8 newhash[16];
 
 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
 
-	/* If the TCP option is too short, we can short cut */
-	if (length < TCPOLEN_MD5SIG)
-		return hash_expected ? 1 : 0;
-
-	/* parse options */
-	ptr = (u8*)(th + 1);
-	while (length > 0) {
-		int opcode = *ptr++;
-		int opsize;
-
-		switch(opcode) {
-		case TCPOPT_EOL:
-			goto done_opts;
-		case TCPOPT_NOP:
-			length--;
-			continue;
-		default:
-			opsize = *ptr++;
-			if (opsize < 2 || opsize > length)
-				goto done_opts;
-			if (opcode == TCPOPT_MD5SIG) {
-				hash_location = ptr;
-				goto done_opts;
-			}
-		}
-		ptr += opsize - 2;
-		length -= opsize;
-	}
-
-done_opts:
 	/* do we have a hash as expected? */
 	if (!hash_expected) {
 		if (!hash_location)
-- 
cgit v1.2.3-70-g09d2


From 076fb7223357769c39f3ddf900bba6752369c76a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 17 Apr 2008 12:48:12 +0900
Subject: tcp md5sig: Remove redundant protocol argument.

Protocol is always TCP, so remove useless protocol argument.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/tcp.h     |  2 --
 net/ipv4/tcp_ipv4.c   | 20 ++++++++------------
 net/ipv4/tcp_output.c |  3 +--
 net/ipv6/tcp_ipv6.c   | 17 +++++++----------
 4 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b1c4b78f1cb..ae2549c45fc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1123,7 +1123,6 @@ extern int			tcp_v4_calc_md5_hash(char *md5_hash,
 						     struct dst_entry *dst,
 						     struct request_sock *req,
 						     struct tcphdr *th,
-						     int protocol,
 						     unsigned int tcplen);
 extern struct tcp_md5sig_key	*tcp_v4_md5_lookup(struct sock *sk,
 						   struct sock *addr_sk);
@@ -1373,7 +1372,6 @@ struct tcp_sock_af_ops {
 						  struct dst_entry *dst,
 						  struct request_sock *req,
 						  struct tcphdr *th,
-						  int protocol,
 						  unsigned int len);
 	int			(*md5_add) (struct sock *sk,
 					    struct sock *addr_sk,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 56f55093364..f25445d21bd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -95,8 +95,7 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
 						   __be32 addr);
 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   __be32 saddr, __be32 daddr,
-				   struct tcphdr *th, int protocol,
-				   unsigned int tcplen);
+				   struct tcphdr *th, unsigned int tcplen);
 #endif
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -586,8 +585,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 					key,
 					ip_hdr(skb)->daddr,
 					ip_hdr(skb)->saddr,
-					&rep.th, IPPROTO_TCP,
-					arg.iov[0].iov_len);
+					&rep.th, arg.iov[0].iov_len);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -680,8 +678,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 					key,
 					ip_hdr(skb)->daddr,
 					ip_hdr(skb)->saddr,
-					&rep.th, IPPROTO_TCP,
-					arg.iov[0].iov_len);
+					&rep.th, arg.iov[0].iov_len);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -1006,7 +1003,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 
 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   __be32 saddr, __be32 daddr,
-				   struct tcphdr *th, int protocol,
+				   struct tcphdr *th,
 				   unsigned int tcplen)
 {
 	struct scatterlist sg[4];
@@ -1039,7 +1036,7 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	bp->saddr = saddr;
 	bp->daddr = daddr;
 	bp->pad = 0;
-	bp->protocol = protocol;
+	bp->protocol = IPPROTO_TCP;
 	bp->len = htons(tcplen);
 
 	sg_init_table(sg, 4);
@@ -1099,7 +1096,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 			 struct sock *sk,
 			 struct dst_entry *dst,
 			 struct request_sock *req,
-			 struct tcphdr *th, int protocol,
+			 struct tcphdr *th,
 			 unsigned int tcplen)
 {
 	__be32 saddr, daddr;
@@ -1115,7 +1112,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	}
 	return tcp_v4_do_calc_md5_hash(md5_hash, key,
 				       saddr, daddr,
-				       th, protocol, tcplen);
+				       th, tcplen);
 }
 
 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
@@ -1166,8 +1163,7 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	genhash = tcp_v4_do_calc_md5_hash(newhash,
 					  hash_expected,
 					  iph->saddr, iph->daddr,
-					  th, sk->sk_protocol,
-					  skb->len);
+					  th, skb->len);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb481..f3ffd674edd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -607,7 +607,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 					       md5,
 					       sk, NULL, NULL,
 					       tcp_hdr(skb),
-					       sk->sk_protocol,
 					       skb->len);
 	}
 #endif
@@ -2266,7 +2265,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       NULL, dst, req,
-					       tcp_hdr(skb), sk->sk_protocol,
+					       tcp_hdr(skb),
 					       skb->len);
 	}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dd4ddb30a3a..334d21c23da 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -736,8 +736,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   struct in6_addr *saddr,
 				   struct in6_addr *daddr,
-				   struct tcphdr *th, int protocol,
-				   unsigned int tcplen)
+				   struct tcphdr *th, unsigned int tcplen)
 {
 	struct scatterlist sg[4];
 	__u16 data_len;
@@ -761,7 +760,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	ipv6_addr_copy(&bp->saddr, saddr);
 	ipv6_addr_copy(&bp->daddr, daddr);
 	bp->len = htonl(tcplen);
-	bp->protocol = htonl(protocol);
+	bp->protocol = htonl(IPPROTO_TCP);
 
 	sg_init_table(sg, 4);
 
@@ -821,8 +820,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				struct sock *sk,
 				struct dst_entry *dst,
 				struct request_sock *req,
-				struct tcphdr *th, int protocol,
-				unsigned int tcplen)
+				struct tcphdr *th, unsigned int tcplen)
 {
 	struct in6_addr *saddr, *daddr;
 
@@ -835,7 +833,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	}
 	return tcp_v6_do_calc_md5_hash(md5_hash, key,
 				       saddr, daddr,
-				       th, protocol, tcplen);
+				       th, tcplen);
 }
 
 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
@@ -879,8 +877,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	genhash = tcp_v6_do_calc_md5_hash(newhash,
 					  hash_expected,
 					  &ip6h->saddr, &ip6h->daddr,
-					  th, sk->sk_protocol,
-					  skb->len);
+					  th, skb->len);
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
 			printk(KERN_INFO "MD5 Hash %s for "
@@ -1020,7 +1017,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
 					&ipv6_hdr(skb)->daddr,
 					&ipv6_hdr(skb)->saddr,
-					t1, IPPROTO_TCP, tot_len);
+					t1, tot_len);
 	}
 #endif
 
@@ -1126,7 +1123,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 		tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
 					&ipv6_hdr(skb)->daddr,
 					&ipv6_hdr(skb)->saddr,
-					t1, IPPROTO_TCP, tot_len);
+					t1, tot_len);
 	}
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 8d26d76dd4a4c87ef037a44a42a0608ffc730199 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 17 Apr 2008 13:19:16 +0900
Subject: tcp md5sig: Share most of hash calcucaltion bits between IPv4 and
 IPv6.

We can share most part of the hash calculation code because
the only difference between IPv4 and IPv6 is their pseudo headers.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/tcp.h   |  7 ++++++
 net/ipv4/tcp.c      | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c | 52 +++------------------------------------
 net/ipv6/tcp_ipv6.c | 53 ++++------------------------------------
 4 files changed, 86 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index ae2549c45fc..eac26b73bcc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,13 @@ struct tcp_md5sig_pool {
 #define TCP_MD5SIG_MAXKEYS	(~(u32)0)	/* really?! */
 
 /* - functions */
+extern int			tcp_calc_md5_hash(char *md5_hash,
+						  struct tcp_md5sig_key *key,
+						  int bplen,
+						  struct tcphdr *th,
+						  unsigned int tcplen,
+						  struct tcp_md5sig_pool *hp);
+
 extern int			tcp_v4_calc_md5_hash(char *md5_hash,
 						     struct tcp_md5sig_key *key,
 						     struct sock *sk,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b804..6efbae0e551 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2459,6 +2459,76 @@ static unsigned long tcp_md5sig_users;
 static struct tcp_md5sig_pool **tcp_md5sig_pool;
 static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
 
+int tcp_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+		      int bplen,
+		      struct tcphdr *th, unsigned int tcplen,
+		      struct tcp_md5sig_pool *hp)
+{
+	struct scatterlist sg[4];
+	__u16 data_len;
+	int block = 0;
+	__sum16 cksum;
+	struct hash_desc *desc = &hp->md5_desc;
+	int err;
+	unsigned int nbytes = 0;
+
+	sg_init_table(sg, 4);
+
+	/* 1. The TCP pseudo-header */
+	sg_set_buf(&sg[block++], &hp->md5_blk, bplen);
+	nbytes += bplen;
+
+	/* 2. The TCP header, excluding options, and assuming a
+	 * checksum of zero
+	 */
+	cksum = th->check;
+	th->check = 0;
+	sg_set_buf(&sg[block++], th, sizeof(*th));
+	nbytes += sizeof(*th);
+
+	/* 3. The TCP segment data (if any) */
+	data_len = tcplen - (th->doff << 2);
+	if (data_len > 0) {
+		u8 *data = (u8 *)th + (th->doff << 2);
+		sg_set_buf(&sg[block++], data, data_len);
+		nbytes += data_len;
+	}
+
+	/* 4. an independently-specified key or password, known to both
+	 * TCPs and presumably connection-specific
+	 */
+	sg_set_buf(&sg[block++], key->key, key->keylen);
+	nbytes += key->keylen;
+
+	sg_mark_end(&sg[block - 1]);
+
+	/* Now store the hash into the packet */
+	err = crypto_hash_init(desc);
+	if (err) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s(): hash_init failed\n", __func__);
+		return -1;
+	}
+	err = crypto_hash_update(desc, sg, nbytes);
+	if (err) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s(): hash_update failed\n", __func__);
+		return -1;
+	}
+	err = crypto_hash_final(desc, md5_hash);
+	if (err) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s(): hash_final failed\n", __func__);
+		return -1;
+	}
+
+	/* Reset header */
+	th->check = cksum;
+
+	return 0;
+}
+EXPORT_SYMBOL(tcp_calc_md5_hash);
+
 static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
 {
 	int cpu;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f25445d21bd..e331cdbd095 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1006,15 +1006,9 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   struct tcphdr *th,
 				   unsigned int tcplen)
 {
-	struct scatterlist sg[4];
-	__u16 data_len;
-	int block = 0;
-	__sum16 old_checksum;
 	struct tcp_md5sig_pool *hp;
 	struct tcp4_pseudohdr *bp;
-	struct hash_desc *desc;
 	int err;
-	unsigned int nbytes = 0;
 
 	/*
 	 * Okay, so RFC2385 is turned on for this connection,
@@ -1026,10 +1020,9 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 		goto clear_hash_noput;
 
 	bp = &hp->md5_blk.ip4;
-	desc = &hp->md5_desc;
 
 	/*
-	 * 1. the TCP pseudo-header (in the order: source IP address,
+	 * The TCP pseudo-header (in the order: source IP address,
 	 * destination IP address, zero-padded protocol number, and
 	 * segment length)
 	 */
@@ -1039,50 +1032,13 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	bp->protocol = IPPROTO_TCP;
 	bp->len = htons(tcplen);
 
-	sg_init_table(sg, 4);
-
-	sg_set_buf(&sg[block++], bp, sizeof(*bp));
-	nbytes += sizeof(*bp);
-
-	/* 2. the TCP header, excluding options, and assuming a
-	 * checksum of zero/
-	 */
-	old_checksum = th->check;
-	th->check = 0;
-	sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
-	nbytes += sizeof(struct tcphdr);
-
-	/* 3. the TCP segment data (if any) */
-	data_len = tcplen - (th->doff << 2);
-	if (data_len > 0) {
-		unsigned char *data = (unsigned char *)th + (th->doff << 2);
-		sg_set_buf(&sg[block++], data, data_len);
-		nbytes += data_len;
-	}
-
-	/* 4. an independently-specified key or password, known to both
-	 * TCPs and presumably connection-specific
-	 */
-	sg_set_buf(&sg[block++], key->key, key->keylen);
-	nbytes += key->keylen;
-
-	sg_mark_end(&sg[block - 1]);
-
-	/* Now store the Hash into the packet */
-	err = crypto_hash_init(desc);
-	if (err)
-		goto clear_hash;
-	err = crypto_hash_update(desc, sg, nbytes);
-	if (err)
-		goto clear_hash;
-	err = crypto_hash_final(desc, md5_hash);
+	err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
+				th, tcplen, hp);
 	if (err)
 		goto clear_hash;
 
-	/* Reset header, and free up the crypto */
+	/* Free up the crypto pool */
 	tcp_put_md5sig_pool();
-	th->check = old_checksum;
-
 out:
 	return 0;
 clear_hash:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 334d21c23da..0ae0311082f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -738,23 +738,17 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   struct in6_addr *daddr,
 				   struct tcphdr *th, unsigned int tcplen)
 {
-	struct scatterlist sg[4];
-	__u16 data_len;
-	int block = 0;
-	__sum16 cksum;
 	struct tcp_md5sig_pool *hp;
 	struct tcp6_pseudohdr *bp;
-	struct hash_desc *desc;
 	int err;
-	unsigned int nbytes = 0;
 
 	hp = tcp_get_md5sig_pool();
 	if (!hp) {
 		printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
 		goto clear_hash_noput;
 	}
+
 	bp = &hp->md5_blk.ip6;
-	desc = &hp->md5_desc;
 
 	/* 1. TCP pseudo-header (RFC2460) */
 	ipv6_addr_copy(&bp->saddr, saddr);
@@ -762,51 +756,14 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	bp->len = htonl(tcplen);
 	bp->protocol = htonl(IPPROTO_TCP);
 
-	sg_init_table(sg, 4);
-
-	sg_set_buf(&sg[block++], bp, sizeof(*bp));
-	nbytes += sizeof(*bp);
-
-	/* 2. TCP header, excluding options */
-	cksum = th->check;
-	th->check = 0;
-	sg_set_buf(&sg[block++], th, sizeof(*th));
-	nbytes += sizeof(*th);
-
-	/* 3. TCP segment data (if any) */
-	data_len = tcplen - (th->doff << 2);
-	if (data_len > 0) {
-		u8 *data = (u8 *)th + (th->doff << 2);
-		sg_set_buf(&sg[block++], data, data_len);
-		nbytes += data_len;
-	}
-
-	/* 4. shared key */
-	sg_set_buf(&sg[block++], key->key, key->keylen);
-	nbytes += key->keylen;
-
-	sg_mark_end(&sg[block - 1]);
+	err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
+				th, tcplen, hp);
 
-	/* Now store the hash into the packet */
-	err = crypto_hash_init(desc);
-	if (err) {
-		printk(KERN_WARNING "%s(): hash_init failed\n", __func__);
-		goto clear_hash;
-	}
-	err = crypto_hash_update(desc, sg, nbytes);
-	if (err) {
-		printk(KERN_WARNING "%s(): hash_update failed\n", __func__);
-		goto clear_hash;
-	}
-	err = crypto_hash_final(desc, md5_hash);
-	if (err) {
-		printk(KERN_WARNING "%s(): hash_final failed\n", __func__);
+	if (err)
 		goto clear_hash;
-	}
 
-	/* Reset header, and free up the crypto */
+	/* Free up the crypto pool */
 	tcp_put_md5sig_pool();
-	th->check = cksum;
 out:
 	return 0;
 clear_hash:
-- 
cgit v1.2.3-70-g09d2


From 9501f9722922f2e80e1f9dc6682311d65c2b5690 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 18 Apr 2008 12:45:16 +0900
Subject: tcp md5sig: Let the caller pass appropriate key for
 tcp_v{4,6}_do_calc_md5_hash().

As we do for other socket/timewait-socket specific parameters,
let the callers pass appropriate arguments to
tcp_v{4,6}_do_calc_md5_hash().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/tcp.h   | 10 ++++++++++
 net/ipv4/tcp_ipv4.c | 50 ++++++++++++++++++++------------------------------
 net/ipv6/tcp_ipv6.c | 33 ++++++++++++---------------------
 3 files changed, 42 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eac26b73bcc..07005ebb47a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1142,6 +1142,16 @@ extern int			tcp_v4_md5_do_add(struct sock *sk,
 extern int			tcp_v4_md5_do_del(struct sock *sk,
 						  __be32 addr);
 
+#ifdef CONFIG_TCP_MD5SIG
+#define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_keylen ? 		 \
+				 &(struct tcp_md5sig_key) {		 \
+					.key = (twsk)->tw_md5_key,	 \
+					.keylen = (twsk)->tw_md5_keylen, \
+				} : NULL)
+#else
+#define tcp_twsk_md5_key(twsk)	NULL
+#endif
+
 extern struct tcp_md5sig_pool	**tcp_alloc_md5sig_pool(void);
 extern void			tcp_free_md5sig_pool(void);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e331cdbd095..f7ff2a64a7f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -96,6 +96,12 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 				   __be32 saddr, __be32 daddr,
 				   struct tcphdr *th, unsigned int tcplen);
+#else
+static inline
+struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+{
+	return NULL;
+}
 #endif
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -604,9 +610,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
-			    struct sk_buff *skb, u32 seq, u32 ack,
-			    u32 win, u32 ts)
+static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+			    u32 win, u32 ts, int oif,
+			    struct tcp_md5sig_key *key)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 	struct {
@@ -618,10 +624,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 			];
 	} rep;
 	struct ip_reply_arg arg;
-#ifdef CONFIG_TCP_MD5SIG
-	struct tcp_md5sig_key *key;
-	struct tcp_md5sig_key tw_key;
-#endif
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
@@ -647,23 +649,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 	rep.th.window  = htons(win);
 
 #ifdef CONFIG_TCP_MD5SIG
-	/*
-	 * The SKB holds an imcoming packet, but may not have a valid ->sk
-	 * pointer. This is especially the case when we're dealing with a
-	 * TIME_WAIT ack, because the sk structure is long gone, and only
-	 * the tcp_timewait_sock remains. So the md5 key is stashed in that
-	 * structure, and we use it in preference.  I believe that (twsk ||
-	 * skb->sk) holds true, but we program defensively.
-	 */
-	if (!twsk && skb->sk) {
-		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
-	} else if (twsk && twsk->tw_md5_keylen) {
-		tw_key.key = twsk->tw_md5_key;
-		tw_key.keylen = twsk->tw_md5_keylen;
-		key = &tw_key;
-	} else
-		key = NULL;
-
 	if (key) {
 		int offset = (ts) ? 3 : 0;
 
@@ -685,8 +670,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
-	if (twsk)
-		arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
+	if (oif)
+		arg.bound_dev_if = oif;
 
 	ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
@@ -699,9 +684,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-	tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcptw->tw_ts_recent);
+			tcptw->tw_ts_recent,
+			tw->tw_bound_dev_if,
+			tcp_twsk_md5_key(tcptw)
+			);
 
 	inet_twsk_put(tw);
 }
@@ -709,9 +697,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
 				  struct request_sock *req)
 {
-	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
+	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
-			req->ts_recent);
+			req->ts_recent,
+			0,
+			tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
 }
 
 /*
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0ae0311082f..ecdbb9f4654 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -82,6 +82,12 @@ static struct inet_connection_sock_af_ops ipv6_specific;
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+#else
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+						   struct in6_addr *addr)
+{
+	return NULL;
+}
 #endif
 
 static void tcp_v6_hash(struct sock *sk)
@@ -1011,8 +1017,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	kfree_skb(buff);
 }
 
-static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
-			    struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+			    struct tcp_md5sig_key *key)
 {
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
@@ -1021,22 +1027,6 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
 	__be32 *topt;
-#ifdef CONFIG_TCP_MD5SIG
-	struct tcp_md5sig_key *key;
-	struct tcp_md5sig_key tw_key;
-#endif
-
-#ifdef CONFIG_TCP_MD5SIG
-	if (!tw && skb->sk) {
-		key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
-	} else if (tw && tw->tw_md5_keylen) {
-		tw_key.key = tw->tw_md5_key;
-		tw_key.keylen = tw->tw_md5_keylen;
-		key = &tw_key;
-	} else {
-		key = NULL;
-	}
-#endif
 
 	if (ts)
 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -1116,16 +1106,17 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-	tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcptw->tw_ts_recent);
+			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
 
 	inet_twsk_put(tw);
 }
 
 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 {
-	tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
+	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+			tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr));
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 0b040829952d84bf2a62526f0e24b624e0699447 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 10 Jun 2008 22:46:50 -0700
Subject: net: remove CVS keywords

This patch removes CVS keywords that weren't updated for a long time
from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h          | 2 --
 include/linux/if_ppp.h             | 2 --
 include/linux/if_tun.h             | 2 --
 include/linux/ip6_tunnel.h         | 4 ----
 include/linux/ppp-comp.h           | 2 --
 include/linux/ppp_defs.h           | 2 --
 include/linux/sunrpc/auth_gss.h    | 2 --
 include/linux/sunrpc/gss_api.h     | 2 --
 include/linux/sunrpc/svcauth_gss.h | 3 ---
 include/net/inetpeer.h             | 2 --
 include/net/ip6_tunnel.h           | 4 ----
 include/net/ipconfig.h             | 2 --
 include/net/ipv6.h                 | 2 --
 include/net/snmp.h                 | 2 --
 net/bluetooth/bnep/bnep.h          | 4 ----
 net/bluetooth/bnep/core.c          | 4 ----
 net/bluetooth/bnep/netdev.c        | 4 ----
 net/bluetooth/bnep/sock.c          | 4 ----
 net/bluetooth/rfcomm/core.c        | 2 --
 net/bluetooth/rfcomm/sock.c        | 2 --
 net/bluetooth/rfcomm/tty.c         | 2 --
 net/bridge/br.c                    | 2 --
 net/bridge/br_device.c             | 2 --
 net/bridge/br_fdb.c                | 2 --
 net/bridge/br_forward.c            | 2 --
 net/bridge/br_if.c                 | 2 --
 net/bridge/br_input.c              | 2 --
 net/bridge/br_ioctl.c              | 2 --
 net/bridge/br_notify.c             | 2 --
 net/bridge/br_private.h            | 2 --
 net/bridge/br_private_stp.h        | 2 --
 net/bridge/br_stp.c                | 2 --
 net/bridge/br_stp_bpdu.c           | 2 --
 net/bridge/br_stp_if.c             | 2 --
 net/bridge/br_stp_timer.c          | 2 --
 net/core/skbuff.c                  | 2 --
 net/core/sock.c                    | 2 --
 net/ipv4/af_inet.c                 | 2 --
 net/ipv4/arp.c                     | 2 --
 net/ipv4/devinet.c                 | 2 --
 net/ipv4/fib_frontend.c            | 2 --
 net/ipv4/fib_hash.c                | 2 --
 net/ipv4/fib_semantics.c           | 2 --
 net/ipv4/fib_trie.c                | 2 --
 net/ipv4/icmp.c                    | 2 --
 net/ipv4/igmp.c                    | 2 --
 net/ipv4/inet_diag.c               | 2 --
 net/ipv4/inetpeer.c                | 2 --
 net/ipv4/ip_forward.c              | 2 --
 net/ipv4/ip_fragment.c             | 2 --
 net/ipv4/ip_input.c                | 2 --
 net/ipv4/ip_options.c              | 2 --
 net/ipv4/ip_output.c               | 2 --
 net/ipv4/ip_sockglue.c             | 2 --
 net/ipv4/ipconfig.c                | 2 --
 net/ipv4/ipip.c                    | 2 --
 net/ipv4/ipmr.c                    | 2 --
 net/ipv4/ipvs/ip_vs_app.c          | 2 --
 net/ipv4/ipvs/ip_vs_conn.c         | 2 --
 net/ipv4/ipvs/ip_vs_core.c         | 2 --
 net/ipv4/ipvs/ip_vs_ctl.c          | 2 --
 net/ipv4/ipvs/ip_vs_dh.c           | 2 --
 net/ipv4/ipvs/ip_vs_est.c          | 2 --
 net/ipv4/ipvs/ip_vs_ftp.c          | 2 --
 net/ipv4/ipvs/ip_vs_lblc.c         | 2 --
 net/ipv4/ipvs/ip_vs_lblcr.c        | 2 --
 net/ipv4/ipvs/ip_vs_lc.c           | 2 --
 net/ipv4/ipvs/ip_vs_nq.c           | 2 --
 net/ipv4/ipvs/ip_vs_proto.c        | 2 --
 net/ipv4/ipvs/ip_vs_proto_ah.c     | 2 --
 net/ipv4/ipvs/ip_vs_proto_esp.c    | 2 --
 net/ipv4/ipvs/ip_vs_proto_tcp.c    | 2 --
 net/ipv4/ipvs/ip_vs_proto_udp.c    | 2 --
 net/ipv4/ipvs/ip_vs_rr.c           | 2 --
 net/ipv4/ipvs/ip_vs_sched.c        | 2 --
 net/ipv4/ipvs/ip_vs_sed.c          | 2 --
 net/ipv4/ipvs/ip_vs_sh.c           | 2 --
 net/ipv4/ipvs/ip_vs_sync.c         | 2 --
 net/ipv4/ipvs/ip_vs_wlc.c          | 2 --
 net/ipv4/ipvs/ip_vs_wrr.c          | 2 --
 net/ipv4/ipvs/ip_vs_xmit.c         | 2 --
 net/ipv4/proc.c                    | 2 --
 net/ipv4/protocol.c                | 2 --
 net/ipv4/raw.c                     | 2 --
 net/ipv4/route.c                   | 2 --
 net/ipv4/syncookies.c              | 2 --
 net/ipv4/sysctl_net_ipv4.c         | 2 --
 net/ipv4/tcp.c                     | 2 --
 net/ipv4/tcp_diag.c                | 2 --
 net/ipv4/tcp_input.c               | 2 --
 net/ipv4/tcp_ipv4.c                | 2 --
 net/ipv4/tcp_minisocks.c           | 2 --
 net/ipv4/tcp_output.c              | 2 --
 net/ipv4/tcp_timer.c               | 2 --
 net/ipv4/udp.c                     | 2 --
 net/ipv4/udplite.c                 | 2 --
 net/ipv6/addrconf.c                | 2 --
 net/ipv6/af_inet6.c                | 2 --
 net/ipv6/datagram.c                | 2 --
 net/ipv6/exthdrs.c                 | 2 --
 net/ipv6/icmp.c                    | 2 --
 net/ipv6/ip6_fib.c                 | 2 --
 net/ipv6/ip6_input.c               | 2 --
 net/ipv6/ip6_output.c              | 2 --
 net/ipv6/ip6_tunnel.c              | 2 --
 net/ipv6/ipv6_sockglue.c           | 2 --
 net/ipv6/mcast.c                   | 2 --
 net/ipv6/proc.c                    | 2 --
 net/ipv6/protocol.c                | 2 --
 net/ipv6/raw.c                     | 2 --
 net/ipv6/reassembly.c              | 2 --
 net/ipv6/route.c                   | 2 --
 net/ipv6/sit.c                     | 2 --
 net/ipv6/tcp_ipv6.c                | 2 --
 net/ipv6/udp.c                     | 2 --
 net/ipv6/udplite.c                 | 2 --
 net/packet/af_packet.c             | 2 --
 net/sched/sch_htb.c                | 2 --
 net/sunrpc/auth_gss/auth_gss.c     | 2 --
 net/sysctl_net.c                   | 1 -
 net/unix/af_unix.c                 | 2 --
 121 files changed, 254 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 950e13d09e0..6badb3e2c4e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -4,8 +4,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: if_bridge.h,v 1.1 2000/02/18 16:47:01 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h
index 0f2f70d4e48..c3b1f856270 100644
--- a/include/linux/if_ppp.h
+++ b/include/linux/if_ppp.h
@@ -1,5 +1,3 @@
-/*	$Id: if_ppp.h,v 1.21 2000/03/27 06:03:36 paulus Exp $	*/
-
 /*
  * if_ppp.h - Point-to-Point Protocol definitions.
  *
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 8c71fe2fb1f..18f31b6187a 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -11,8 +11,6 @@
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  *  GNU General Public License for more details.
- *
- *  $Id: if_tun.h,v 1.2 2001/06/01 18:39:47 davem Exp $
  */
 
 #ifndef __IF_TUN_H
diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h
index af3f4a70f3d..1e7cc4af40d 100644
--- a/include/linux/ip6_tunnel.h
+++ b/include/linux/ip6_tunnel.h
@@ -1,7 +1,3 @@
-/*
- * $Id$
- */
-
 #ifndef _IP6_TUNNEL_H
 #define _IP6_TUNNEL_H
 
diff --git a/include/linux/ppp-comp.h b/include/linux/ppp-comp.h
index e86a7a5cf35..b8d4ddd2273 100644
--- a/include/linux/ppp-comp.h
+++ b/include/linux/ppp-comp.h
@@ -23,8 +23,6 @@
  * ON AN "AS IS" BASIS, AND THE AUSTRALIAN NATIONAL UNIVERSITY HAS NO
  * OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
  * OR MODIFICATIONS.
- *
- * $Id: ppp-comp.h,v 1.6 1997/11/27 06:04:44 paulus Exp $
  */
 
 /*
diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
index c6b13ff8502..6e8adc77522 100644
--- a/include/linux/ppp_defs.h
+++ b/include/linux/ppp_defs.h
@@ -1,5 +1,3 @@
-/*	$Id: ppp_defs.h,v 1.2 1994/09/21 01:31:06 paulus Exp $	*/
-
 /*
  * ppp_defs.h - PPP definitions.
  *
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index fec6899bf35..d48d4e605f7 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -7,8 +7,6 @@
  * Andy Adamson <andros@umich.edu>
  * Bruce Fields <bfields@umich.edu>
  * Copyright (c) 2000 The Regents of the University of Michigan
- *
- * $Id$
  */
 
 #ifndef _LINUX_SUNRPC_AUTH_GSS_H
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 459c5fc11d5..03f33330ece 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -7,8 +7,6 @@
  * Andy Adamson <andros@umich.edu>
  * Bruce Fields <bfields@umich.edu>
  * Copyright (c) 2000 The Regents of the University of Michigan
- *
- * $Id$
  */
 
 #ifndef _LINUX_SUNRPC_GSS_API_H
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index 417a1def56d..c9165d9771a 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -3,9 +3,6 @@
  *
  * Bruce Fields <bfields@umich.edu>
  * Copyright (c) 2002 The Regents of the Unviersity of Michigan
- *
- * $Id$
- *
  */
 
 #ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index ad8404b5611..15e1f8fe4c1 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -1,8 +1,6 @@
 /*
  *		INETPEER - A storage for permanent information about peers
  *
- *  Version:	$Id: inetpeer.h,v 1.2 2002/01/12 07:54:56 davem Exp $
- *
  *  Authors:	Andrey V. Savochkin <saw@msu.ru>
  */
 
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 3780592ebe8..83b4e008b16 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -1,7 +1,3 @@
-/*
- * $Id$
- */
-
 #ifndef _NET_IP6_TUNNEL_H
 #define _NET_IP6_TUNNEL_H
 
diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h
index 3924d7d2cb1..c74cc1bd5a0 100644
--- a/include/net/ipconfig.h
+++ b/include/net/ipconfig.h
@@ -1,6 +1,4 @@
 /*
- *  $Id: ipconfig.h,v 1.4 2001/04/30 04:51:46 davem Exp $
- *
  *  Copyright (C) 1997 Martin Mares
  *
  *  Automatic IP Layer Configuration
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e0a612bc9c4..7f7db8d5793 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -4,8 +4,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: ipv6.h,v 1.1 2002/05/20 15:13:07 jgrimm Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/include/net/snmp.h b/include/net/snmp.h
index ce2f4850751..57c93628695 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -14,8 +14,6 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
- *		$Id: snmp.h,v 1.19 2001/06/14 13:40:46 davem Exp $
- *
  */
  
 #ifndef _SNMP_H
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index e69244dd8de..b69bf4e7c48 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -16,10 +16,6 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
-/*
- * $Id: bnep.h,v 1.5 2002/08/04 21:23:58 maxk Exp $
- */
-
 #ifndef _BNEP_H
 #define _BNEP_H
 
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index f85d94643aa..1d98a1b80da 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -25,10 +25,6 @@
    SOFTWARE IS DISCLAIMED.
 */
 
-/*
- * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $
- */
-
 #include <linux/module.h>
 
 #include <linux/kernel.h>
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 95e3837e431..d9fa0ab2c87 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -25,10 +25,6 @@
    SOFTWARE IS DISCLAIMED.
 */
 
-/*
- * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $
- */
-
 #include <linux/module.h>
 
 #include <linux/socket.h>
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 201e5b1ce47..8ffb57f2303 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -24,10 +24,6 @@
    SOFTWARE IS DISCLAIMED.
 */
 
-/*
- * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $
- */
-
 #include <linux/module.h>
 
 #include <linux/types.h>
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 0c2c93735e9..b4fb84e398e 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -23,8 +23,6 @@
 
 /*
  * Bluetooth RFCOMM core.
- *
- * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $
  */
 
 #include <linux/module.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 5083adcbfae..c9054487670 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -23,8 +23,6 @@
 
 /*
  * RFCOMM sockets.
- *
- * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $
  */
 
 #include <linux/module.h>
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index c9191871c1e..be84f4fc147 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -23,8 +23,6 @@
 
 /*
  * RFCOMM TTY.
- *
- * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $
  */
 
 #include <linux/module.h>
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 8f3c58e5f7a..cede010f4dd 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 626c7795ae3..a6ffc6c2a69 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 72c5976a5ce..4de74cdd091 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a4711674b3d..512645727f5 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c2397f503b0..143c954681b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fa0f5711a99..0145e941671 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 0655a5f07f5..eeee218eed8 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 00644a544e3..88d8ec7b314 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0243cb489ed..83ff5861c2d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -4,8 +4,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index e29f01ac1ad..8b650f7fbfa 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -4,8 +4,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e38034aa56f..284d1b2fa1f 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index ddeb6e5d45d..9dc2de65696 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 1a430eccec9..1a4e5c37a0c 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 77f5255e691..772a140bfdf 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e556d31211..3e18f8525e8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,8 +4,6 @@
  *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
- *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
- *
  *	Fixes:
  *		Alan Cox	:	Fixed the worst of the load
  *					balancer bugs.
diff --git a/net/core/sock.c b/net/core/sock.c
index 88094cb09c0..3879bf65897 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,8 +7,6 @@
  *		handler for protocols to use and generic option handler.
  *
  *
- * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Florian La Roche, <flla@stud.uni-sb.de>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24eca23c2db..42bd24b64b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,8 +5,6 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Florian La Roche, <flla@stud.uni-sb.de>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9b539fa9fe1..20c515a1be2 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,6 +1,4 @@
 /* linux/net/ipv4/arp.c
- *
- * Version:	$Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
  *
  * Copyright (C) 1994 by Florian  La Roche
  *
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 61011e1d580..f8c0b0aea93 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,8 +1,6 @@
 /*
  *	NET3	IP device support routines.
  *
- *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
- *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0b2ac6a3d90..5ad01d63f83 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,8 +5,6 @@
  *
  *		IPv4 Forwarding Information Base: FIB frontend.
  *
- * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2e2fc3376ac..eeec4bf982b 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -5,8 +5,6 @@
  *
  *		IPv4 FIB: lookup engine and maintenance routines.
  *
- * Version:	$Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019f..9335eba683c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -5,8 +5,6 @@
  *
  *		IPv4 Forwarding Information Base: semantics.
  *
- * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4b02d14e7ab..394db9c941a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -22,8 +22,6 @@
  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
  *
- * Version:	$Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
- *
  *
  * Code from fib_hash has been reused which includes the following header:
  *
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 87397351dda..aa7cf46853b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,8 +3,6 @@
  *
  *		Alan Cox, <alan@redhat.com>
  *
- *	Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2769dc4a4c8..68e84a933e9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,8 +8,6 @@
  *	the older version didn't come out right using gcc 2.5.8, the newer one
  *	seems to fall out with gcc 2.6.2.
  *
- *	Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $
- *
  *	Authors:
  *		Alan Cox <Alan.Cox@linux.org>
  *
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index da97695e709..c10036e7a46 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1,8 +1,6 @@
 /*
  * inet_diag.c	Module for monitoring INET transport protocols sockets.
  *
- * Version:	$Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index af995198f64..a456ceeac3f 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -3,8 +3,6 @@
  *
  *  This source is covered by the GNU GPL, the same as all kernel sources.
  *
- *  Version:	$Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $
- *
  *  Authors:	Andrey V. Savochkin <saw@msu.ru>
  */
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 4813c39b438..37d36a3f33c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,8 +5,6 @@
  *
  *		The IP forwarding functionality.
  *
- * Version:	$Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $
- *
  * Authors:	see ip.c
  *
  * Fixes:
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index be1cb89a8d5..91e32140731 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,8 +5,6 @@
  *
  *		The IP fragmentation functionality.
  *
- * Version:	$Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
- *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox <Alan.Cox@linux.org>
  *
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ff77a4a7f9e..7c26428ea67 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,8 +5,6 @@
  *
  *		The Internet Protocol (IP) module.
  *
- * Version:	$Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 33126ad2cfd..be3f18a7a40 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,8 +5,6 @@
  *
  *		The options processing module for ip.c
  *
- * Version:	$Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
- *
  * Authors:	A.N.Kuznetsov
  *
  */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e527628f56c..f1278eecf56 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,8 +5,6 @@
  *
  *		The Internet Protocol (IP) output module.
  *
- * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e0514e82308..105d92a039b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,8 +5,6 @@
  *
  *		The IP to API glue.
  *
- * Version:	$Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	see ip.c
  *
  * Fixes:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed45037ce9b..b88aa9afa42 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,6 +1,4 @@
 /*
- *  $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $
- *
  *  Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
  *  user-supplied information to configure own IP address and routes.
  *
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 86d8836551b..4c6d2caf920 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,8 +1,6 @@
 /*
  *	Linux NET3:	IP/IP protocol decoder.
  *
- *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
- *
  *	Authors:
  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
  *
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a34da4977c7..300ab0c2919 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,8 +9,6 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  *
- *	Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
- *
  *	Fixes:
  *	Michael Chastain	:	Incorrect size of copying.
  *	Alan Cox		:	Added the cache manager code
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 535abe0c45e..1f1897a1a70 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_app.c: Application module support for IPVS
  *
- * Version:     $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 65f1ba11275..f8bdae47a77 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 963981a9d50..bcf6276ba4b 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 94c5767c8e0..9a5ace0b4dd 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index dcf5d46aaa5..8afc1503ed2 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Destination Hashing scheduling module
  *
- * Version:     $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
  *              Inspired by the consistent hashing scheduler patch from
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index dfa0d713c80..bc04eedd6db 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_est.c: simple rate estimator for IPVS
  *
- * Version:     $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 59aa166b767..c1c758e4f73 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_ftp.c: IPVS ftp application module
  *
- * Version:	$Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * Changes:
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 3888642706a..0efa3db4b18 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Locality-Based Least-Connection scheduling module
  *
- * Version:     $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index daa260eb21c..8e3bbeb4513 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Locality-Based Least-Connection with Replication scheduler
  *
- * Version:     $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index d88fef90a64..ac9f08e065d 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Least-Connection Scheduling module
  *
- * Version:     $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index bc2a9e5f2a7..a46bf258d42 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Never Queue scheduling module
  *
- * Version:     $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 4b1c16cbb16..876714f23d6 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_proto.c: transport protocol load balancing support for IPVS
  *
- * Version:     $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 4bf835e1d86..73e0ea87c1f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_proto_ah.c:	AH IPSec load balancing support for IPVS
  *
- * Version:     $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
  * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
  *		Wensong Zhang <wensong@linuxvirtualserver.org>
  *
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index db6a6b7b1a0..21d70c8ffa5 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_proto_esp.c:	ESP IPSec load balancing support for IPVS
  *
- * Version:     $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
  * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
  *		Wensong Zhang <wensong@linuxvirtualserver.org>
  *
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index b83dc14b0a4..d0ea467986a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
  *
- * Version:     $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 75771cb3cd6..c6be5d56823 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
  *
- * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 433f8a94792..c8db12d39e6 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Round-Robin Scheduling module
  *
- * Version:     $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 121a32b1b75..b6476730985 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index dd7c128f9db..2a7d3135818 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Shortest Expected Delay scheduling module
  *
- * Version:     $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 1b25b00ef1e..b8fdfac6500 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Source Hashing scheduling module
  *
- * Version:     $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@gnuchina.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index eff54efe035..2d4a86f7332 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * ip_vs_sync:  sync connection info from master load balancer to backups
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 8a9d913261d..772c3cb4eca 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Weighted Least-Connection Scheduling module
  *
- * Version:     $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 85c680add6d..1d6932d7dc9 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -1,8 +1,6 @@
 /*
  * IPVS:        Weighted Round-Robin Scheduling module
  *
- * Version:     $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  *              This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index f63006caea0..9892d4aca42 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -1,8 +1,6 @@
 /*
  * ip_vs_xmit.c: various packet transmitters for IPVS
  *
- * Version:     $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Julian Anastasov <ja@ssi.bg>
  *
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 552169b41b1..eb5cee279c5 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,8 +7,6 @@
  *		PROC file system.  It is mainly used for debugging and
  *		statistics.
  *
- * Version:	$Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $
- *
  * Authors:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
  *		Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 971ab9356e5..ea50da0649f 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,8 +5,6 @@
  *
  *		INET protocol dispatch tables.
  *
- * Version:	$Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e7e091d365f..1d0c97c8712 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,8 +5,6 @@
  *
  *		RAW - implementation of IP "raw" sockets.
  *
- * Version:	$Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 96be336064f..fe3a0223728 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,8 +5,6 @@
  *
  *		ROUTE - implementation of the IP router.
  *
- * Version:	$Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d6..6317d3c8dc0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -8,8 +8,6 @@
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
- *
- *  $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
  */
 
 #include <linux/tcp.h>
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c437f804ee3..90160700320 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,8 +1,6 @@
 /*
  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
  *
- * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $
- *
  * Begun April 1, 1996, Mike Shaver.
  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
  */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b804..ad66b09e0bc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 2fbcc7d1b1a..838d491dfda 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,8 +1,6 @@
 /*
  * tcp_diag.c	Module for monitoring TCP transport protocols sockets.
  *
- * Version:	$Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b56..b68c3c7d906 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2..f2926ae1de5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
- *
  *		IPv4 specific functions
  *
  *
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5c..1276cab85e3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb481..b171ac65cca 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2a..e77e7ae0bf2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,8 +5,6 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 56fcda3694b..355e6d62d48 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,8 +5,6 @@
  *
  *		The User Datagram Protocol (UDP).
  *
- * Version:	$Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 72ce26b6c4d..4ad16b6d513 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -1,8 +1,6 @@
 /*
  *  UDPLITE     An implementation of the UDP-Lite protocol (RFC 3828).
  *
- *  Version:    $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
- *
  *  Authors:    Gerrit Renker       <gerrit@erg.abdn.ac.uk>
  *
  *  Changes:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 147588f4c7c..deb38bf0337 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6,8 +6,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e84b3fd17fb..350457c761e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,8 +7,6 @@
  *
  *	Adapted from linux/net/ipv4/af_inet.c
  *
- *	$Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
- *
  * 	Fixes:
  *	piggy, Karl Knutson	:	Socket protocol table
  * 	Hideaki YOSHIFUJI	:	sin6_scope_id support
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b9c2de84a8a..8cdb6b65ee9 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cd1c993d52..602ea826f0a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -7,8 +7,6 @@
  *	Andi Kleen		<ak@muc.de>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d42dd16d348..399d41f6543 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
- *
  *	Based on net/ipv4/icmp.c
  *
  *	RFC 1885
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1ee4fa17c12..4de2b9efcac 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832..f77a6011c30 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -6,8 +6,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
  *
- *	$Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
- *
  *	Based in linux/net/ipv4/ip_input.c
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 48cdce9c696..40a2813a63d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
- *
  *	Based on linux/net/ipv4/ip_output.c
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 37814810ac4..17c7b098cdb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -6,8 +6,6 @@
  *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
  *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
  *
- *	$Id$
- *
  *      Based on:
  *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 26b83e512a0..237ebbb9383 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,8 +7,6 @@
  *
  *	Based on linux/net/ipv4/ip_sockglue.c
  *
- *	$Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd632dd7f98..fbb2d12c41b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
- *
  *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index df0736a4caf..cbc7e514d3e 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,8 +7,6 @@
  *		PROC file system.  This is very similar to the IPv4 version,
  *		except it reports the sockets in the INET6 address family.
  *
- * Version:	$Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $
- *
  * Authors:	David S. Miller (davem@caip.rutgers.edu)
  * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  *
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index f929f47b925..9ab78915991 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -5,8 +5,6 @@
  *
  *		PF_INET6 protocol dispatch tables.
  *
- * Version:	$Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $
- *
  * Authors:	Pedro Roque	<roque@di.fc.ul.pt>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8fee9a15b2d..e03c1898ab2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,8 +7,6 @@
  *
  *	Adapted from linux/net/ipv4/raw.c
  *
- *	$Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
- *
  *	Fixes:
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 9391a6949b9..13509f906d8 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
- *
  *	Based on: net/ipv4/ip_fragment.c
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 220cffe9e63..edae81319b5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6b8f0583b63..b0c5080420a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -6,8 +6,6 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
- *	$Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
- *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      as published by the Free Software Foundation; either version
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac..155499197fc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,8 +5,6 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
- *
  *	Based on:
  *	linux/net/ipv4/tcp.c
  *	linux/net/ipv4/tcp_input.c
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dd309626ae9..e0693fffc9b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,8 +7,6 @@
  *
  *	Based on linux/ipv4/udp.c
  *
- *	$Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $
- *
  *	Fixes:
  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 491efd00a86..f6cdcb348e0 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -2,8 +2,6 @@
  *  UDPLITEv6   An implementation of the UDP-Lite protocol over IPv6.
  *              See also net/ipv4/udplite.c
  *
- *  Version:    $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $
- *
  *  Authors:    Gerrit Renker       <gerrit@erg.abdn.ac.uk>
  *
  *  Changes:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2cee87da444..beca6402f1c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -5,8 +5,6 @@
  *
  *		PACKET - implements raw packet sockets.
  *
- * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
- *
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5bc1ed49018..21307185903 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -24,8 +24,6 @@
  *		Jiri Fojtasek
  *			fixed requeue routine
  *		and many others. thanks.
- *
- * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  */
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index cc12d5f5d5d..019d4b4478c 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -33,8 +33,6 @@
  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Id$
  */
 
 
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index d8e79162724..007c1a6708e 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -4,7 +4,6 @@
  * Begun April 1, 1996, Mike Shaver.
  * Added /proc/sys/net directories for each protocol family. [MS]
  *
- * $Log: sysctl_net.c,v $
  * Revision 1.2  1996/05/08  20:24:40  shaver
  * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and
  * NET_IPV4_IP_FORWARD.
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e18cd3628db..392e80e3268 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -8,8 +8,6 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
- * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
- *
  * Fixes:
  *		Linus Torvalds	:	Assorted bug cures.
  *		Niibe Yutaka	:	async I/O support.
-- 
cgit v1.2.3-70-g09d2


From d2c3cc0070d32bf6cabe6b82942c3e80eae0bfc3 Mon Sep 17 00:00:00 2001
From: Tony Vroon <tony@linx.net>
Date: Wed, 11 Jun 2008 16:23:56 -0400
Subject: mac80211: implement EU regulatory domain

Implement missing EU regulatory domain for mac80211. Based on the
information in IEEE 802.11-2007 (specifically pages 1142, 1143 & 1148)
and ETSI 301 893 (V1.4.1).
With thanks to Johannes Berg.

Signed-off-by: Tony Vroon <tony@linx.net>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 185488da246..855bff4b325 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = {
 				     IEEE80211_CHAN_RADAR),
 };
 
+static const struct ieee80211_channel_range ieee80211_EU_channels[] = {
+	/* IEEE 802.11b/g, channels 1..13 */
+	RANGE_PWR(2412, 2472, 20, 6, 0),
+	/* IEEE 802.11a, channel 36*/
+	RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channel 40*/
+	RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channel 44*/
+	RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channels 48..64 */
+	RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS |
+				     IEEE80211_CHAN_RADAR),
+	/* IEEE 802.11a, channels 100..140 */
+	RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS |
+				     IEEE80211_CHAN_RADAR),
+};
+
 #define REGDOM(_code)							\
 	{								\
 		.code = __stringify(_code),				\
@@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = {
 static const struct ieee80211_regdomain ieee80211_regdoms[] = {
 	REGDOM(US),
 	REGDOM(JP),
+	REGDOM(EU),
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From 8d5e0d58b39eed9b0f1064f4a7f2b215869b7e71 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Thu, 12 Jun 2008 15:42:29 +0300
Subject: mac80211: do not fragment while aggregation is in use

This patch denies the use of framentation while ampdu is used.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1ad9e664f28..195cb6dd02a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -660,9 +660,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 
 	/*
 	 * Warn when submitting a fragmented A-MPDU frame and drop it.
-	 * This is an error and needs to be fixed elsewhere, but when
-	 * done needs to take care of monitor interfaces (injection)
-	 * etc.
+	 * This scenario is handled in __ieee80211_tx_prepare but extra
+	 * caution taken here as fragmented ampdu may cause Tx stop.
 	 */
 	if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU ||
 		    skb_get_queue_mapping(tx->skb) >=
@@ -981,7 +980,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	if (tx->flags & IEEE80211_TX_FRAGMENTED) {
 		if ((tx->flags & IEEE80211_TX_UNICAST) &&
 		    skb->len + FCS_LEN > local->fragmentation_threshold &&
-		    !local->ops->set_frag_threshold)
+		    !local->ops->set_frag_threshold &&
+		    !(info->flags & IEEE80211_TX_CTL_AMPDU))
 			tx->flags |= IEEE80211_TX_FRAGMENTED;
 		else
 			tx->flags &= ~IEEE80211_TX_FRAGMENTED;
-- 
cgit v1.2.3-70-g09d2


From dc0ae30c31ee6ef83992bb692f37dfbba08a2ef6 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 12 Jun 2008 22:38:37 +0300
Subject: mac80211: fix beacon interval value

This patch fixes setting beacon interval

1. in register_hw it honors value requested by the driver
2. It uses default 100 instead of 1000 or 10000. Scanning for beacon
interval ~1sec and above is not sane

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 3 ++-
 net/mac80211/mlme.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b182f018a18..5c5396edad3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1707,7 +1707,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	debugfs_hw_add(local);
 
-	local->hw.conf.beacon_int = 1000;
+	if (local->hw.conf.beacon_int < 10)
+		local->hw.conf.beacon_int = 100;
 
 	local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
 						  IEEE80211_HW_SIGNAL_DB |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7f05820dc62..46f20374083 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3583,7 +3583,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	sband = local->hw.wiphy->bands[bss->band];
 
 	if (local->hw.conf.beacon_int == 0)
-		local->hw.conf.beacon_int = 10000;
+		local->hw.conf.beacon_int = 100;
 	bss->beacon_int = local->hw.conf.beacon_int;
 	bss->last_update = jiffies;
 	bss->capability = WLAN_CAPABILITY_IBSS;
-- 
cgit v1.2.3-70-g09d2


From c9c6950c14ffc0e30e592fec1ebcb203ad3dff10 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:57 -0700
Subject: mac80211: make ieee80211_get_hdrlen_from_skb return unsigned

Many callers already expect it to.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 +-
 net/mac80211/util.c    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1196de85f8d..f9b391faaf7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1535,7 +1535,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
  *
  * @skb: the frame
  */
-int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
+unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
 
 /**
  * ieee80211_get_hdrlen - get header length from frame control
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6513bc2d270..f3c30d00b21 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -133,14 +133,14 @@ int ieee80211_get_hdrlen(u16 fc)
 }
 EXPORT_SYMBOL(ieee80211_get_hdrlen);
 
-int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
+unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
 {
-	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data;
-	int hdrlen;
+	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 
 	if (unlikely(skb->len < 10))
 		return 0;
-	hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (unlikely(hdrlen > skb->len))
 		return 0;
 	return hdrlen;
-- 
cgit v1.2.3-70-g09d2


From 6693be7124cb8e4f15f0d80ed6e3e50678771737 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:57 -0700
Subject: mac80211: add utility function to get header length

Take a __le16 directly rather than a host-endian value.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  6 ++++++
 net/mac80211/util.c    | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f9b391faaf7..7ab4ff6159a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1547,6 +1547,12 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
  */
 int ieee80211_get_hdrlen(u16 fc);
 
+/**
+ * ieee80211_hdrlen - get header length in bytes from frame control
+ * @fc: frame control field in little-endian format
+ */
+unsigned int ieee80211_hdrlen(__le16 fc);
+
 /**
  * ieee80211_get_tkip_key - get a TKIP rc4 for skb
  *
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f3c30d00b21..9f365a3af96 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -133,6 +133,38 @@ int ieee80211_get_hdrlen(u16 fc)
 }
 EXPORT_SYMBOL(ieee80211_get_hdrlen);
 
+unsigned int ieee80211_hdrlen(__le16 fc)
+{
+	unsigned int hdrlen = 24;
+
+	if (ieee80211_is_data(fc)) {
+		if (ieee80211_has_a4(fc))
+			hdrlen = 30;
+		if (ieee80211_is_data_qos(fc))
+			hdrlen += IEEE80211_QOS_CTL_LEN;
+		goto out;
+	}
+
+	if (ieee80211_is_ctl(fc)) {
+		/*
+		 * ACK and CTS are 10 bytes, all others 16. To see how
+		 * to get this condition consider
+		 *   subtype mask:   0b0000000011110000 (0x00F0)
+		 *   ACK subtype:    0b0000000011010000 (0x00D0)
+		 *   CTS subtype:    0b0000000011000000 (0x00C0)
+		 *   bits that matter:         ^^^      (0x00E0)
+		 *   value of those: 0b0000000011000000 (0x00C0)
+		 */
+		if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0))
+			hdrlen = 10;
+		else
+			hdrlen = 16;
+	}
+out:
+	return hdrlen;
+}
+EXPORT_SYMBOL(ieee80211_hdrlen);
+
 unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
 {
 	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data;
-- 
cgit v1.2.3-70-g09d2


From d5184cacf3eeaeb6ae0c7a02aa44fe589beeda23 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:58 -0700
Subject: mac80211: wpa.c use new access helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wpa.c | 63 ++++++++++++++----------------------------------------
 1 file changed, 16 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9f6fd20374e..9834cecaca3 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -24,46 +24,22 @@ static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da,
 {
 	struct ieee80211_hdr *hdr;
 	size_t hdrlen;
-	u16 fc;
-	int a4_included;
-	u8 *pos;
+	__le16 fc;
 
-	hdr = (struct ieee80211_hdr *) skb->data;
-	fc = le16_to_cpu(hdr->frame_control);
-
-	hdrlen = 24;
-	if ((fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) ==
-	    (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
-		hdrlen += ETH_ALEN;
-		*sa = hdr->addr4;
-		*da = hdr->addr3;
-	} else if (fc & IEEE80211_FCTL_FROMDS) {
-		*sa = hdr->addr3;
-		*da = hdr->addr1;
-	} else if (fc & IEEE80211_FCTL_TODS) {
-		*sa = hdr->addr2;
-		*da = hdr->addr3;
-	} else {
-		*sa = hdr->addr2;
-		*da = hdr->addr1;
-	}
+	hdr = (struct ieee80211_hdr *)skb->data;
+	fc = hdr->frame_control;
 
-	if (fc & 0x80)
-		hdrlen += 2;
+	hdrlen = ieee80211_hdrlen(fc);
+
+	*sa = ieee80211_get_SA(hdr);
+	*da = ieee80211_get_DA(hdr);
 
 	*data = skb->data + hdrlen;
 	*data_len = skb->len - hdrlen;
 
-	a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
-		(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS);
-	if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
-	    fc & IEEE80211_STYPE_QOS_DATA) {
-		pos = (u8 *) &hdr->addr4;
-		if (a4_included)
-			pos += 6;
-		*qos_tid = pos[0] & 0x0f;
-		*qos_tid |= 0x80; /* qos_included flag */
-	} else
+	if (ieee80211_is_data_qos(fc))
+		*qos_tid = (*ieee80211_get_qos_ctl(hdr) & 0x0f) | 0x80;
+	else
 		*qos_tid = 0;
 
 	return skb->len < hdrlen ? -1 : 0;
@@ -186,8 +162,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	int hdrlen, len, tail;
-	u16 fc;
+	unsigned int hdrlen;
+	int len, tail;
 	u8 *pos;
 
 	info->control.icv_len = TKIP_ICV_LEN;
@@ -200,8 +176,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		return 0;
 	}
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	len = skb->len - hdrlen;
 
 	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
@@ -272,14 +247,12 @@ ieee80211_rx_result
 ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-	u16 fc;
 	int hdrlen, res, hwaccel = 0, wpa_test = 0;
 	struct ieee80211_key *key = rx->key;
 	struct sk_buff *skb = rx->skb;
 	DECLARE_MAC_BUF(mac);
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
 		return RX_CONTINUE;
@@ -427,7 +400,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tail;
-	u16 fc;
 	u8 *pos, *pn, *b_0, *aad, *scratch;
 	int i;
 
@@ -446,8 +418,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	b_0 = scratch + 3 * AES_BLOCK_LEN;
 	aad = scratch + 4 * AES_BLOCK_LEN;
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	len = skb->len - hdrlen;
 
 	if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
@@ -516,7 +487,6 @@ ieee80211_rx_result
 ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-	u16 fc;
 	int hdrlen;
 	struct ieee80211_key *key = rx->key;
 	struct sk_buff *skb = rx->skb;
@@ -524,8 +494,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	int data_len;
 	DECLARE_MAC_BUF(mac);
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
 		return RX_CONTINUE;
-- 
cgit v1.2.3-70-g09d2


From a494bb1cae40dd0a98682826d91ddf533cbc864e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:58 -0700
Subject: mac80211: use new helpers in util.c - ieee80211_get_bssid()

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9f365a3af96..ce62b163b82 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -45,38 +45,37 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 			enum ieee80211_if_types type)
 {
-	u16 fc;
+	__le16 fc = hdr->frame_control;
 
 	 /* drop ACK/CTS frames and incorrect hdr len (ctrl) */
 	if (len < 16)
 		return NULL;
 
-	fc = le16_to_cpu(hdr->frame_control);
-
-	switch (fc & IEEE80211_FCTL_FTYPE) {
-	case IEEE80211_FTYPE_DATA:
+	if (ieee80211_is_data(fc)) {
 		if (len < 24) /* drop incorrect hdr len (data) */
 			return NULL;
-		switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
-		case IEEE80211_FCTL_TODS:
-			return hdr->addr1;
-		case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
+
+		if (ieee80211_has_a4(fc))
 			return NULL;
-		case IEEE80211_FCTL_FROMDS:
+		if (ieee80211_has_tods(fc))
+			return hdr->addr1;
+		if (ieee80211_has_fromds(fc))
 			return hdr->addr2;
-		case 0:
-			return hdr->addr3;
-		}
-		break;
-	case IEEE80211_FTYPE_MGMT:
+
+		return hdr->addr3;
+	}
+
+	if (ieee80211_is_mgmt(fc)) {
 		if (len < 24) /* drop incorrect hdr len (mgmt) */
 			return NULL;
 		return hdr->addr3;
-	case IEEE80211_FTYPE_CTL:
-		if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
+	}
+
+	if (ieee80211_is_ctl(fc)) {
+		if(ieee80211_is_pspoll(fc))
 			return hdr->addr1;
-		else if ((fc & IEEE80211_FCTL_STYPE) ==
-						IEEE80211_STYPE_BACK_REQ) {
+
+		if (ieee80211_is_back_req(fc)) {
 			switch (type) {
 			case IEEE80211_IF_TYPE_STA:
 				return hdr->addr2;
@@ -84,11 +83,9 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 			case IEEE80211_IF_TYPE_VLAN:
 				return hdr->addr1;
 			default:
-				return NULL;
+				break; /* fall through to the return */
 			}
 		}
-		else
-			return NULL;
 	}
 
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From 002aaf4ea6be3247c246d274979359c3bc93c82a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:59 -0700
Subject: mac80211: wme.c use new helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 14a9ff10a1e..d8c2f9688b2 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -105,11 +105,8 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 {
 	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	unsigned short fc = le16_to_cpu(hdr->frame_control);
-	int qos;
 
-	/* see if frame is data or non data frame */
-	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) {
+	if (!ieee80211_is_data(hdr->frame_control)) {
 		/* management frames go on AC_VO queue, but are sent
 		* without QoS control fields */
 		return 0;
@@ -119,10 +116,7 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 		/* use AC from radiotap */
 	}
 
-	/* is this a QoS frame? */
-	qos = fc & IEEE80211_STYPE_QOS_DATA;
-
-	if (!qos) {
+	if (!ieee80211_is_data_qos(hdr->frame_control)) {
 		skb->priority = 0; /* required for correct WPA/11i MIC */
 		return ieee802_1d_to_ac[skb->priority];
 	}
@@ -151,7 +145,6 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	unsigned short fc = le16_to_cpu(hdr->frame_control);
 	struct Qdisc *qdisc;
 	struct sta_info *sta;
 	int err, queue;
@@ -185,16 +178,15 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 
 	/* now we know the 1d priority, fill in the QoS header if there is one
 	 */
-	if (WLAN_FC_IS_QOS_DATA(fc)) {
-		u8 *p = skb->data + ieee80211_get_hdrlen(fc) - 2;
+	if (ieee80211_is_data_qos(hdr->frame_control)) {
+		u8 *p = ieee80211_get_qos_ctl(hdr);
 		u8 ack_policy = 0;
 		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
 		if (local->wifi_wme_noack_test)
 			ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK <<
 					QOS_CONTROL_ACK_POLICY_SHIFT;
 		/* qos header is 2 bytes, second reserved */
-		*p = ack_policy | tid;
-		p++;
+		*p++ = ack_policy | tid;
 		*p = 0;
 
 		rcu_read_lock();
-- 
cgit v1.2.3-70-g09d2


From 87228f57434108d8463ff10fd408d8d1273a23d2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:21:59 -0700
Subject: mac80211: rx.c use new helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a3643fd86af..e80336f8f1e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -67,12 +67,9 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
 		return 1;
 	if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len))
 		return 1;
-	if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
-			cpu_to_le16(IEEE80211_FTYPE_CTL)) &&
-	    ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
-			cpu_to_le16(IEEE80211_STYPE_PSPOLL)) &&
-	    ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
-			cpu_to_le16(IEEE80211_STYPE_BACK_REQ)))
+	if (ieee80211_is_ctl(hdr->frame_control) &&
+	    !ieee80211_is_pspoll(hdr->frame_control) &&
+	    !ieee80211_is_back_req(hdr->frame_control))
 		return 1;
 	return 0;
 }
@@ -2118,7 +2115,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct sta_info *sta;
 	struct tid_ampdu_rx *tid_agg_rx;
-	u16 fc, sc;
+	u16 sc;
 	u16 mpdu_seq_num;
 	u8 ret = 0, *qc;
 	int tid;
@@ -2127,14 +2124,12 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	if (!sta)
 		return ret;
 
-	fc = le16_to_cpu(hdr->frame_control);
-
 	/* filter the QoS data rx stream according to
 	 * STA/TID and check if this STA/TID is on aggregation */
-	if (!WLAN_FC_IS_QOS_DATA(fc))
+	if (!ieee80211_is_data_qos(hdr->frame_control))
 		goto end_reorder;
 
-	qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN;
+	qc = ieee80211_get_qos_ctl(hdr);
 	tid = qc[0] & QOS_CONTROL_TID_MASK;
 
 	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
@@ -2143,7 +2138,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
 
 	/* null data frames are excluded */
-	if (unlikely(fc & IEEE80211_STYPE_NULLFUNC))
+	if (unlikely(ieee80211_is_nullfunc(hdr->frame_control)))
 		goto end_reorder;
 
 	/* new un-ordered ampdu frame - process it */
-- 
cgit v1.2.3-70-g09d2


From c801242c38de247d82f12f6bf28bd19a280a12ae Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:22:00 -0700
Subject: mac80211: tkip.c consolidate tkip IV writing in helper

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 25 ++++++++++++++-----------
 net/mac80211/tkip.h |  4 ++--
 net/mac80211/wpa.c  |  8 +-------
 3 files changed, 17 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index a00cf1ea771..a2c8ca1100b 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -64,6 +64,15 @@ static u16 tkipS(u16 val)
 	return tkip_sbox[val & 0xff] ^ swab16(tkip_sbox[val >> 8]);
 }
 
+static u8 *write_tkip_iv(u8 *pos, u16 iv16)
+{
+	*pos++ = iv16 >> 8;
+	*pos++ = ((iv16 >> 8) | 0x20) & 0x7f;
+	*pos++ = iv16 & 0xFF;
+	return pos;
+}
+
+
 /*
  * P1K := Phase1(TA, TK, TSC)
  * TA = transmitter address (48 bits)
@@ -123,12 +132,9 @@ static void tkip_mixing_phase2(struct ieee80211_key *key, struct tkip_ctx *ctx,
 	ppk[4] += ror16(ppk[3], 1);
 	ppk[5] += ror16(ppk[4], 1);
 
-	rc4key[0] = tsc_IV16 >> 8;
-	rc4key[1] = ((tsc_IV16 >> 8) | 0x20) & 0x7f;
-	rc4key[2] = tsc_IV16 & 0xFF;
-	rc4key[3] = ((ppk[5] ^ get_unaligned_le16(tk)) >> 1) & 0xFF;
+	rc4key = write_tkip_iv(rc4key, tsc_IV16);
+	*rc4key++ = ((ppk[5] ^ get_unaligned_le16(tk)) >> 1) & 0xFF;
 
-	rc4key += 4;
 	for (i = 0; i < 6; i++)
 		put_unaligned_le16(ppk[i], rc4key + 2 * i);
 }
@@ -136,12 +142,9 @@ static void tkip_mixing_phase2(struct ieee80211_key *key, struct tkip_ctx *ctx,
 /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
  * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
  * the packet payload). */
-u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
-			   u8 iv0, u8 iv1, u8 iv2)
+u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16)
 {
-	*pos++ = iv0;
-	*pos++ = iv1;
-	*pos++ = iv2;
+	pos = write_tkip_iv(pos, iv16);
 	*pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */;
 	put_unaligned_le32(key->u.tkip.tx.iv32, pos);
 	return pos + 4;
@@ -213,7 +216,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 	u8 rc4key[16];
 
 	ieee80211_tkip_gen_rc4key(key, ta, rc4key);
-	pos = ieee80211_tkip_add_iv(pos, key, rc4key[0], rc4key[1], rc4key[2]);
+	pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
 	ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
 }
 
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index b890427fc95..d4714383f5f 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -13,8 +13,8 @@
 #include <linux/crypto.h>
 #include "key.h"
 
-u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key,
-			  u8 iv0, u8 iv1, u8 iv2);
+u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16);
+
 void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 				 struct ieee80211_key *key,
 				 u8 *pos, size_t payload_len, u8 *ta);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9834cecaca3..345e10e9b31 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -198,14 +198,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		key->u.tkip.tx.iv32++;
 
 	if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
-		hdr = (struct ieee80211_hdr *)skb->data;
-
 		/* hwaccel - with preallocated room for IV */
-		ieee80211_tkip_add_iv(pos, key,
-				      (u8) (key->u.tkip.tx.iv16 >> 8),
-				      (u8) (((key->u.tkip.tx.iv16 >> 8) | 0x20) &
-					    0x7f),
-				      (u8) key->u.tkip.tx.iv16);
+		ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
 
 		info->control.hw_key = &tx->key->conf;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 7c70537f97fe35f46762247a4bda72c16d585736 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:22:00 -0700
Subject: mac80211: tkip.c fold ieee80211_gen_rc4key into its one caller

Also change the arguments of the phase1, 2 key mixing to take
a pointer to the encrytion key and the tkip_ctx in the same
order.

Do the dereference of the encryption key in the callers.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index a2c8ca1100b..8a7dac742b7 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -72,7 +72,6 @@ static u8 *write_tkip_iv(u8 *pos, u16 iv16)
 	return pos;
 }
 
-
 /*
  * P1K := Phase1(TA, TK, TSC)
  * TA = transmitter address (48 bits)
@@ -80,11 +79,10 @@ static u8 *write_tkip_iv(u8 *pos, u16 iv16)
  * TSC = TKIP sequence counter (48 bits, only 32 msb bits used)
  * P1K: 80 bits
  */
-static void tkip_mixing_phase1(struct ieee80211_key *key, const u8 *ta,
-			       struct tkip_ctx *ctx, u32 tsc_IV32)
+static void tkip_mixing_phase1(const u8 *tk, struct tkip_ctx *ctx,
+			       const u8 *ta, u32 tsc_IV32)
 {
 	int i, j;
-	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 	u16 *p1k = ctx->p1k;
 
 	p1k[0] = tsc_IV32 & 0xFFFF;
@@ -104,12 +102,11 @@ static void tkip_mixing_phase1(struct ieee80211_key *key, const u8 *ta,
 	ctx->initialized = 1;
 }
 
-static void tkip_mixing_phase2(struct ieee80211_key *key, struct tkip_ctx *ctx,
+static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx,
 			       u16 tsc_IV16, u8 *rc4key)
 {
 	u16 ppk[6];
 	const u16 *p1k = ctx->p1k;
-	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 	int i;
 
 	ppk[0] = p1k[0];
@@ -150,16 +147,6 @@ u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16)
 	return pos + 4;
 }
 
-static void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta,
-			       u8 *rc4key)
-{
-	/* Calculate per-packet key */
-	if (key->u.tkip.tx.iv16 == 0 || !key->u.tkip.tx.initialized)
-		tkip_mixing_phase1(key, ta, &key->u.tkip.tx, key->u.tkip.tx.iv32);
-
-	tkip_mixing_phase2(key, &key->u.tkip.tx, key->u.tkip.tx.iv16, rc4key);
-}
-
 void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 			struct sk_buff *skb, enum ieee80211_tkip_key_type type,
 			u8 *outkey)
@@ -170,6 +157,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	u8 *data = (u8 *) hdr;
 	u16 fc = le16_to_cpu(hdr->frame_control);
 	int hdr_len = ieee80211_get_hdrlen(fc);
+	u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 	u8 *ta = hdr->addr2;
 	u16 iv16;
 	u32 iv32;
@@ -193,14 +181,14 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	 * might occur after the wrap around of iv16 in the key in case of
 	 * fragmented packets. */
 	if (iv16 == 0 || !key->u.tkip.tx.initialized)
-		tkip_mixing_phase1(key, ta, &key->u.tkip.tx, iv32);
+		tkip_mixing_phase1(tk, &key->u.tkip.tx, ta, iv32);
 
 	if (type == IEEE80211_TKIP_P1_KEY) {
 		memcpy(outkey, key->u.tkip.tx.p1k, sizeof(u16) * 5);
 		return;
 	}
 
-	tkip_mixing_phase2(key, &key->u.tkip.tx, iv16, outkey);
+	tkip_mixing_phase2(tk, &key->u.tkip.tx, iv16, outkey);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_key);
 
@@ -214,8 +202,15 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 				 u8 *pos, size_t payload_len, u8 *ta)
 {
 	u8 rc4key[16];
+	struct tkip_ctx *ctx = &key->u.tkip.tx;
+	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+
+	/* Calculate per-packet key */
+	if (ctx->iv16 == 0 || !ctx->initialized)
+		tkip_mixing_phase1(tk, ctx, ta, ctx->iv32);
+
+	tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key);
 
-	ieee80211_tkip_gen_rc4key(key, ta, rc4key);
 	pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16);
 	ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
 }
@@ -234,6 +229,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	u32 iv16;
 	u8 rc4key[16], keyid, *pos = payload;
 	int res;
+	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 
 	if (payload_len < 12)
 		return -1;
@@ -284,7 +280,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	if (!key->u.tkip.rx[queue].initialized ||
 	    key->u.tkip.rx[queue].iv32 != iv32) {
 		/* IV16 wrapped around - perform TKIP phase 1 */
-		tkip_mixing_phase1(key, ta, &key->u.tkip.rx[queue], iv32);
+		tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32);
 #ifdef CONFIG_TKIP_DEBUG
 		{
 			int i;
@@ -317,7 +313,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 		}
 	}
 
-	tkip_mixing_phase2(key, &key->u.tkip.rx[queue], iv16, rc4key);
+	tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key);
 #ifdef CONFIG_TKIP_DEBUG
 	{
 		int i;
-- 
cgit v1.2.3-70-g09d2


From c644bce95f287e763a0b49e5d03f0fe6256f6d2e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 11 Jun 2008 14:22:02 -0700
Subject: mac80211: tkip.c use a local struct tkip_ctx in
 ieee80211_get_tkip_key

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tkip.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 8a7dac742b7..e710243d82e 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -153,25 +153,27 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 {
 	struct ieee80211_key *key = (struct ieee80211_key *)
 			container_of(keyconf, struct ieee80211_key, conf);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u8 *data = (u8 *) hdr;
-	u16 fc = le16_to_cpu(hdr->frame_control);
-	int hdr_len = ieee80211_get_hdrlen(fc);
-	u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
-	u8 *ta = hdr->addr2;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	u8 *data;
+	const u8 *tk;
+	struct tkip_ctx *ctx;
 	u16 iv16;
 	u32 iv32;
 
-	iv16 = data[hdr_len + 2] | (data[hdr_len] << 8);
-	iv32 = get_unaligned_le32(data + hdr_len + 4);
+	data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
+	iv16 = data[2] | (data[0] << 8);
+	iv32 = get_unaligned_le32(&data[4]);
+
+	tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+	ctx = &key->u.tkip.tx;
 
 #ifdef CONFIG_TKIP_DEBUG
 	printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n",
 			iv16, iv32);
 
-	if (iv32 != key->u.tkip.tx.iv32) {
+	if (iv32 != ctx->iv32) {
 		printk(KERN_DEBUG "skb: iv32 = 0x%08x key: iv32 = 0x%08x\n",
-			iv32, key->u.tkip.tx.iv32);
+			iv32, ctx->iv32);
 		printk(KERN_DEBUG "Wrap around of iv16 in the middle of a "
 			"fragmented packet\n");
 	}
@@ -180,15 +182,15 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	/* Update the p1k only when the iv16 in the packet wraps around, this
 	 * might occur after the wrap around of iv16 in the key in case of
 	 * fragmented packets. */
-	if (iv16 == 0 || !key->u.tkip.tx.initialized)
-		tkip_mixing_phase1(tk, &key->u.tkip.tx, ta, iv32);
+	if (iv16 == 0 || !ctx->initialized)
+		tkip_mixing_phase1(tk, ctx, hdr->addr2, iv32);
 
 	if (type == IEEE80211_TKIP_P1_KEY) {
-		memcpy(outkey, key->u.tkip.tx.p1k, sizeof(u16) * 5);
+		memcpy(outkey, ctx->p1k, sizeof(u16) * 5);
 		return;
 	}
 
-	tkip_mixing_phase2(tk, &key->u.tkip.tx, iv16, outkey);
+	tkip_mixing_phase2(tk, ctx, iv16, outkey);
 }
 EXPORT_SYMBOL(ieee80211_get_tkip_key);
 
-- 
cgit v1.2.3-70-g09d2


From 87291c0269e77b029282676448fed3706a54211a Mon Sep 17 00:00:00 2001
From: Vladimir Koutny <vlado@work.ksp.sk>
Date: Fri, 13 Jun 2008 16:50:44 +0200
Subject: mac80211: eliminate IBSS warning in rate_lowest_index()

In IBSS mode prior to join/creation of new IBSS it is possible that
a frame from unknown station is received and an ibss_add_sta() is
called. This will cause a warning in rate_lowest_index() since the
list of supported rates of our station is not initialized yet.

The fix is to add ibss stations with a rate we received that frame
at; this single-element set will be extended later based on beacon
data. Also there is no need to store stations from a foreign IBSS.

Signed-off-by: Vladimir Koutny <vlado@ksp.sk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/mlme.c        | 15 +++++++++++----
 net/mac80211/rx.c          | 10 ++++++++--
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b19bd16703b..14fccf16b80 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -876,7 +876,7 @@ void ieee80211_rx_bss_list_deinit(struct net_device *dev);
 int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len);
 struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 					struct sk_buff *skb, u8 *bssid,
-					u8 *addr);
+					u8 *addr, u64 supp_rates);
 int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
 int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 46f20374083..55659a730dc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2863,7 +2863,8 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 				       dev->name, print_mac(mac, mgmt->bssid));
 			ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss);
 			ieee80211_ibss_add_sta(dev, NULL,
-					       mgmt->bssid, mgmt->sa);
+					       mgmt->bssid, mgmt->sa,
+					       BIT(rx_status->rate_idx));
 		}
 	}
 
@@ -4307,12 +4308,13 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
 
 struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 					struct sk_buff *skb, u8 *bssid,
-					u8 *addr)
+					u8 *addr, u64 supp_rates)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	DECLARE_MAC_BUF(mac);
+	int band = local->hw.conf.channel->band;
 
 	/* TODO: Could consider removing the least recently used entry and
 	 * allow new one to be added. */
@@ -4324,6 +4326,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 		return NULL;
 	}
 
+	if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid))
+		return NULL;
+
 	printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n",
 	       wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name);
 
@@ -4333,8 +4338,10 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 
 	set_sta_flags(sta, WLAN_STA_AUTHORIZED);
 
-	sta->supp_rates[local->hw.conf.channel->band] =
-		sdata->u.sta.supp_rates_bits[local->hw.conf.channel->band];
+	if (supp_rates)
+		sta->supp_rates[band] = supp_rates;
+	else
+		sta->supp_rates[band] = sdata->u.sta.supp_rates_bits[band];
 
 	rate_control_rate_init(sta, local);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e80336f8f1e..c32a0bcd53b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1823,8 +1823,13 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		if (!bssid)
 			return 0;
 		if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT &&
-		    (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON)
+		    (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
+			if (!rx->sta)
+				rx->sta = ieee80211_ibss_add_sta(sdata->dev,
+						rx->skb, bssid, hdr->addr2,
+						BIT(rx->status->rate_idx));
 			return 1;
+		}
 		else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) {
 			if (!(rx->flags & IEEE80211_RX_IN_SCAN))
 				return 0;
@@ -1837,7 +1842,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		} else if (!rx->sta)
 			rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb,
-							 bssid, hdr->addr2);
+						bssid, hdr->addr2,
+						BIT(rx->status->rate_idx));
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
 		if (!multicast &&
-- 
cgit v1.2.3-70-g09d2


From 7d06b2e053d2d536348e3a0f6bb02982a41bea37 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Sat, 14 Jun 2008 17:04:49 -0700
Subject: net: change proto destroy method to return void

Change struct proto destroy function pointer to return void.  Noticed
by Al Viro.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h      | 2 +-
 include/net/tcp.h       | 2 +-
 include/net/transp_v6.h | 2 +-
 net/dccp/dccp.h         | 2 +-
 net/dccp/ipv6.c         | 4 ++--
 net/dccp/proto.c        | 4 +---
 net/ipv4/raw.c          | 3 +--
 net/ipv4/tcp_ipv4.c     | 4 +---
 net/ipv4/udp.c          | 3 +--
 net/ipv4/udp_impl.h     | 2 +-
 net/ipv6/af_inet6.c     | 4 +---
 net/ipv6/raw.c          | 4 ++--
 net/ipv6/tcp_ipv6.c     | 4 ++--
 net/ipv6/udp.c          | 4 +---
 net/ipv6/udp_impl.h     | 2 +-
 net/sctp/socket.c       | 3 +--
 16 files changed, 19 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index dc42b44c2aa..0a80961a83c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -524,7 +524,7 @@ struct proto {
 	int			(*ioctl)(struct sock *sk, int cmd,
 					 unsigned long arg);
 	int			(*init)(struct sock *sk);
-	int			(*destroy)(struct sock *sk);
+	void			(*destroy)(struct sock *sk);
 	void			(*shutdown)(struct sock *sk, int how);
 	int			(*setsockopt)(struct sock *sk, int level, 
 					int optname, char __user *optval,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b5a1b9eb12e..6b827cc6c52 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1366,7 +1366,7 @@ extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
 extern struct request_sock_ops tcp_request_sock_ops;
 extern struct request_sock_ops tcp6_request_sock_ops;
 
-extern int tcp_v4_destroy_sock(struct sock *sk);
+extern void tcp_v4_destroy_sock(struct sock *sk);
 
 extern int tcp_v4_gso_send_check(struct sk_buff *skb);
 extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 112934a3288..876b6f2bb4f 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -53,7 +53,7 @@ extern int			datagram_send_ctl(struct net *net,
  */
 extern struct inet_connection_sock_af_ops ipv4_specific;
 
-extern int inet6_destroy_sock(struct sock *sk);
+extern void inet6_destroy_sock(struct sock *sk);
 
 #endif
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f44d492d3b7..1b2cea244e1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -262,7 +262,7 @@ extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				const struct dccp_hdr *dh, const unsigned len);
 
 extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-extern int dccp_destroy_sock(struct sock *sk);
+extern void dccp_destroy_sock(struct sock *sk);
 
 extern void		dccp_close(struct sock *sk, long timeout);
 extern struct sk_buff	*dccp_make_response(struct sock *sk,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index f7fe2a572d7..eec3c471789 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1091,10 +1091,10 @@ static int dccp_v6_init_sock(struct sock *sk)
 	return err;
 }
 
-static int dccp_v6_destroy_sock(struct sock *sk)
+static void dccp_v6_destroy_sock(struct sock *sk)
 {
 	dccp_destroy_sock(sk);
-	return inet6_destroy_sock(sk);
+	inet6_destroy_sock(sk);
 }
 
 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9dfe2470962..a0b56009611 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -237,7 +237,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 
 EXPORT_SYMBOL_GPL(dccp_init_sock);
 
-int dccp_destroy_sock(struct sock *sk)
+void dccp_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_minisock *dmsk = dccp_msk(sk);
@@ -268,8 +268,6 @@ int dccp_destroy_sock(struct sock *sk)
 
 	/* clean up feature negotiation state */
 	dccp_feat_clean(dmsk);
-
-	return 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1d0c97c8712..36035a0c6dc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -606,12 +606,11 @@ static void raw_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 }
 
-static int raw_destroy(struct sock *sk)
+static void raw_destroy(struct sock *sk)
 {
 	lock_sock(sk);
 	ip_flush_pending_frames(sk);
 	release_sock(sk);
-	return 0;
 }
 
 /* This gets rid of all the nasties in af_inet. -DaveM */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b219a7a7cd0..64b385f6593 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1775,7 +1775,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	return 0;
 }
 
-int tcp_v4_destroy_sock(struct sock *sk)
+void tcp_v4_destroy_sock(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -1819,8 +1819,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	}
 
 	atomic_dec(&tcp_sockets_allocated);
-
-	return 0;
 }
 
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 355e6d62d48..eba790dcd16 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1253,12 +1253,11 @@ int udp_rcv(struct sk_buff *skb)
 	return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
 
-int udp_destroy_sock(struct sock *sk)
+void udp_destroy_sock(struct sock *sk)
 {
 	lock_sock(sk);
 	udp_flush_pending_frames(sk);
 	release_sock(sk);
-	return 0;
 }
 
 /*
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7288bf7977f..2e9bad2fa1b 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -26,7 +26,7 @@ extern int	udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 extern int	udp_sendpage(struct sock *sk, struct page *page, int offset,
 			     size_t size, int flags);
 extern int	udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern int	udp_destroy_sock(struct sock *sk);
+extern void	udp_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
 extern int	udp4_seq_show(struct seq_file *seq, void *v);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 350457c761e..3ce8d2f318c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -371,7 +371,7 @@ int inet6_release(struct socket *sock)
 
 EXPORT_SYMBOL(inet6_release);
 
-int inet6_destroy_sock(struct sock *sk)
+void inet6_destroy_sock(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
@@ -389,8 +389,6 @@ int inet6_destroy_sock(struct sock *sk)
 
 	if ((opt = xchg(&np->opt, NULL)) != NULL)
 		sock_kfree_s(sk, opt, opt->tot_len);
-
-	return 0;
 }
 
 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 70a57e45bf0..456777d7a40 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1162,13 +1162,13 @@ static void rawv6_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 }
 
-static int raw6_destroy(struct sock *sk)
+static void raw6_destroy(struct sock *sk)
 {
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
 
-	return inet6_destroy_sock(sk);
+	inet6_destroy_sock(sk);
 }
 
 static int rawv6_init_sk(struct sock *sk)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ebed5d3adb8..daefc18d50a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1872,7 +1872,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	return 0;
 }
 
-static int tcp_v6_destroy_sock(struct sock *sk)
+static void tcp_v6_destroy_sock(struct sock *sk)
 {
 #ifdef CONFIG_TCP_MD5SIG
 	/* Clean up the MD5 key list */
@@ -1880,7 +1880,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 		tcp_v6_clear_md5_list(sk);
 #endif
 	tcp_v4_destroy_sock(sk);
-	return inet6_destroy_sock(sk);
+	inet6_destroy_sock(sk);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e0693fffc9b..09687f7a856 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -879,15 +879,13 @@ do_confirm:
 	goto out;
 }
 
-int udpv6_destroy_sock(struct sock *sk)
+void udpv6_destroy_sock(struct sock *sk)
 {
 	lock_sock(sk);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
 	inet6_destroy_sock(sk);
-
-	return 0;
 }
 
 /*
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 321b81a4d41..92dd7da766d 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -29,7 +29,7 @@ extern int	udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			      struct msghdr *msg, size_t len,
 			      int noblock, int flags, int *addr_len);
 extern int	udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern int	udpv6_destroy_sock(struct sock *sk);
+extern void	udpv6_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
 extern int	udp6_seq_show(struct seq_file *seq, void *v);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 253e5ea7e1e..f98650cc48d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3588,7 +3588,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 }
 
 /* Cleanup any SCTP per socket resources.  */
-SCTP_STATIC int sctp_destroy_sock(struct sock *sk)
+SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
 {
 	struct sctp_endpoint *ep;
 
@@ -3598,7 +3598,6 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk)
 	ep = sctp_sk(sk)->ep;
 	sctp_endpoint_free(ep);
 	atomic_dec(&sctp_sockets_allocated);
-	return 0;
 }
 
 /* API 4.1.7 shutdown() - TCP Style Syntax
-- 
cgit v1.2.3-70-g09d2


From d6266281f8175e3ad68c28b20a609b278b47ade5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:11:50 -0700
Subject: udp: introduce a udp_hashfn function

Currently the chain to store a UDP socket is calculated with
simple (x & (UDP_HTABLE_SIZE - 1)). But taking net into account
would make this calculation a bit more complex, so moving it into
a function would help.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  5 +++++
 net/ipv4/udp.c      | 12 ++++++------
 net/ipv6/udp.c      |  4 ++--
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 581ca2c14c5..9c94312b2de 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -46,6 +46,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 
 #define UDP_HTABLE_SIZE		128
 
+static inline int udp_hashfn(const unsigned num)
+{
+	return num & (UDP_HTABLE_SIZE - 1);
+}
+
 struct udp_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock inet;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eba790dcd16..d8f527d1570 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -134,7 +134,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+	sk_for_each(sk, node, &udptable[udp_hashfn(num)])
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
 			return 1;
 	return 0;
@@ -174,7 +174,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
 			int size = 0;
 
-			head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
+			head = &udptable[udp_hashfn(rover)];
 			if (hlist_empty(head))
 				goto gotit;
 
@@ -211,7 +211,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 gotit:
 		snum = rover;
 	} else {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[udp_hashfn(snum)];
 
 		sk_for_each(sk2, node, head)
 			if (sk2->sk_hash == snum                             &&
@@ -227,7 +227,7 @@ gotit:
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[udp_hashfn(snum)];
 		sk_add_node(sk, head);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
@@ -264,7 +264,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	int badness = -1;
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -1068,7 +1068,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 09687f7a856..6e4a822ba65 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -65,7 +65,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	int badness = -1;
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+	sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -361,7 +361,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
-- 
cgit v1.2.3-70-g09d2


From e31634931d00081c75e3fb3f3ec51a50dbf108bb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:12:11 -0700
Subject: udp: provide a struct net pointer for __udp[46]_lib_mcast_deliver

They both calculate the hash chain, but currently do not have
a struct net pointer, so pass one there via additional argument,
all the more so their callers already have such.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c |  9 ++++++---
 net/ipv6/udp.c | 12 ++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d8f527d1570..6b0acb438f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1059,7 +1059,7 @@ drop:
  *	Note: called only from the BH handler context,
  *	so we don't need to lock the hashes.
  */
-static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
+static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				    struct udphdr  *uh,
 				    __be32 saddr, __be32 daddr,
 				    struct hlist_head udptable[])
@@ -1156,6 +1156,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	struct rtable *rt = (struct rtable*)skb->dst;
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
+	struct net *net;
 
 	/*
 	 *  Validate the packet.
@@ -1177,10 +1178,12 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
+	net = dev_net(skb->dev);
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
-		return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
+		return __udp4_lib_mcast_deliver(net, skb, uh,
+				saddr, daddr, udptable);
 
-	sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
+	sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
 			uh->dest, inet_iif(skb), udptable);
 
 	if (sk != NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6e4a822ba65..80fb72c4897 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -353,8 +353,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
  * Note: called only from the BH handler context,
  * so we don't need to lock the hashes.
  */
-static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
-			   struct in6_addr *daddr, struct hlist_head udptable[])
+static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+		struct in6_addr *saddr, struct in6_addr *daddr,
+		struct hlist_head udptable[])
 {
 	struct sock *sk, *sk2;
 	const struct udphdr *uh = udp_hdr(skb);
@@ -435,6 +436,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	struct net_device *dev = skb->dev;
 	struct in6_addr *saddr, *daddr;
 	u32 ulen = 0;
+	struct net *net;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
@@ -469,11 +471,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp6_csum_init(skb, uh, proto))
 		goto discard;
 
+	net = dev_net(skb->dev);
 	/*
 	 *	Multicast receive code
 	 */
 	if (ipv6_addr_is_multicast(daddr))
-		return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable);
+		return __udp6_lib_mcast_deliver(net, skb,
+				saddr, daddr, udptable);
 
 	/* Unicast */
 
@@ -481,7 +485,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	 * check socket cache ... must talk to Alan about his plans
 	 * for sock caches... i'll skip this for now.
 	 */
-	sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source,
+	sk = __udp6_lib_lookup(net, saddr, uh->source,
 			       daddr, uh->dest, inet6_iif(skb), udptable);
 
 	if (sk == NULL) {
-- 
cgit v1.2.3-70-g09d2


From 19c7578fb22b0aef103222cae9b522f03ae489d6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:12:29 -0700
Subject: udp: add struct net argument to udp_hashfn

Every caller already has this one. The new argument is currently
unused, but this will be fixed shortly.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h |  2 +-
 net/ipv4/udp.c      | 12 ++++++------
 net/ipv6/udp.c      |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 9c94312b2de..3deccac2e81 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -46,7 +46,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 
 #define UDP_HTABLE_SIZE		128
 
-static inline int udp_hashfn(const unsigned num)
+static inline int udp_hashfn(struct net *net, const unsigned num)
 {
 	return num & (UDP_HTABLE_SIZE - 1);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b0acb438f3..11eabf13614 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -134,7 +134,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
 	struct sock *sk;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udptable[udp_hashfn(num)])
+	sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
 			return 1;
 	return 0;
@@ -174,7 +174,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
 			int size = 0;
 
-			head = &udptable[udp_hashfn(rover)];
+			head = &udptable[udp_hashfn(net, rover)];
 			if (hlist_empty(head))
 				goto gotit;
 
@@ -211,7 +211,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 gotit:
 		snum = rover;
 	} else {
-		head = &udptable[udp_hashfn(snum)];
+		head = &udptable[udp_hashfn(net, snum)];
 
 		sk_for_each(sk2, node, head)
 			if (sk2->sk_hash == snum                             &&
@@ -227,7 +227,7 @@ gotit:
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		head = &udptable[udp_hashfn(snum)];
+		head = &udptable[udp_hashfn(net, snum)];
 		sk_add_node(sk, head);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
@@ -264,7 +264,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	int badness = -1;
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
+	sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -1068,7 +1068,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
+	sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 80fb72c4897..432edaa882f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -65,7 +65,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	int badness = -1;
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[udp_hashfn(hnum)]) {
+	sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -362,7 +362,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif;
 
 	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[udp_hashfn(ntohs(uh->dest))]);
+	sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
-- 
cgit v1.2.3-70-g09d2


From 7f635ab71eef8da012320c0092b662d6af8c1e69 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:12:49 -0700
Subject: inet: add struct net argument to inet_bhashfn

Binding to some port in many namespaces may create too long
chains in bhash-es, so prepare the hashfn to take struct net
into account.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h   |  3 ++-
 net/ipv4/inet_connection_sock.c |  6 ++++--
 net/ipv4/inet_hashtables.c      | 11 +++++++----
 net/ipv4/inet_timewait_sock.c   |  6 ++++--
 4 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 735b926a349..61dd3317089 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -201,7 +201,8 @@ extern struct inet_bind_bucket *
 extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
 				     struct inet_bind_bucket *tb);
 
-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
+static inline int inet_bhashfn(struct net *net,
+		const __u16 lport, const int bhash_size)
 {
 	return lport & (bhash_size - 1);
 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 045e799d3e1..4c804b3c287 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 		rover = net_random() % remaining + low;
 
 		do {
-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+			head = &hashinfo->bhash[inet_bhashfn(net, rover,
+					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
 				if (tb->ib_net == net && tb->port == rover)
@@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 		 */
 		snum = rover;
 	} else {
-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+		head = &hashinfo->bhash[inet_bhashfn(net, snum,
+				hashinfo->bhash_size)];
 		spin_lock(&head->lock);
 		inet_bind_bucket_for_each(tb, node, &head->chain)
 			if (tb->ib_net == net && tb->port == snum)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2023d37b270..dc1b78de899 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 static void __inet_put_port(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
+			hashinfo->bhash_size);
 	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
 	struct inet_bind_bucket *tb;
 
@@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port);
 void __inet_inherit_port(struct sock *sk, struct sock *child)
 {
 	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
+			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
 	struct inet_bind_bucket *tb;
 
@@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
-			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+			head = &hinfo->bhash[inet_bhashfn(net, port,
+					hinfo->bhash_size)];
 			spin_lock(&head->lock);
 
 			/* Does not bother with rcv_saddr checks,
@@ -493,7 +496,7 @@ ok:
 		goto out;
 	}
 
-	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
 	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ce16e9ac24c..06006a5ac8b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	write_unlock(lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
 	__hlist_del(&tw->tw_bind_node);
@@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
+			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	BUG_TRAP(icsk->icsk_bind_hash);
-- 
cgit v1.2.3-70-g09d2


From 2086a65078bd24682bdcf413d9c91d81988b8359 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:13:08 -0700
Subject: inet: add struct net argument to inet_lhashfn

Listening-on-one-port sockets in many namespaces produce long
chains in the listening_hash-es, so prepare the inet_lhashfn to
take struct net into account.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 4 ++--
 net/ipv4/inet_hashtables.c    | 2 +-
 net/ipv6/inet6_hashtables.c   | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 61dd3317089..26336cdcdc1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -211,14 +211,14 @@ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 			   const unsigned short snum);
 
 /* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(const unsigned short num)
+static inline int inet_lhashfn(struct net *net, const unsigned short num)
 {
 	return num & (INET_LHTABLE_SIZE - 1);
 }
 
 static inline int inet_sk_listen_hashfn(const struct sock *sk)
 {
-	return inet_lhashfn(inet_sk(sk)->num);
+	return inet_lhashfn(sock_net(sk), inet_sk(sk)->num);
 }
 
 /* Caller must disable local BH processing. */
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index dc1b78de899..4f597b3175e 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -194,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 	const struct hlist_head *head;
 
 	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
 	if (!hlist_empty(head)) {
 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 580014aea4d..b940156ca4f 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -104,7 +104,8 @@ struct sock *inet6_lookup_listener(struct net *net,
 	int score, hiscore = 0;
 
 	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+	sk_for_each(sk, node,
+			&hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
 		if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
 				sk->sk_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
-- 
cgit v1.2.3-70-g09d2


From 9f26b3add3783c0af869ea2207871da5dafefffa Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:13:27 -0700
Subject: inet: add struct net argument to inet_ehashfn

Although this hash takes addresses into account, the ehash chains
can also be too long when, for instance, communications via lo occur.
So, prepare the inet_hashfn to take struct net into account.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h    | 6 ++++--
 net/ipv4/inet_hashtables.c | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 9fabe5b3891..85bb420c5d8 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -171,7 +171,8 @@ extern int inet_sk_rebuild_header(struct sock *sk);
 extern u32 inet_ehash_secret;
 extern void build_ehash_secret(void);
 
-static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
+static inline unsigned int inet_ehashfn(struct net *net,
+					const __be32 laddr, const __u16 lport,
 					const __be32 faddr, const __be16 fport)
 {
 	return jhash_3words((__force __u32) laddr,
@@ -187,8 +188,9 @@ static inline int inet_sk_ehashfn(const struct sock *sk)
 	const __u16 lport = inet->num;
 	const __be32 faddr = inet->daddr;
 	const __be16 fport = inet->dport;
+	struct net *net = sock_net(sk);
 
-	return inet_ehashfn(laddr, lport, faddr, fport);
+	return inet_ehashfn(net, laddr, lport, faddr, fport);
 }
 
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4f597b3175e..eca5899729e 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -227,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net,
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
 
@@ -267,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct net *net = sock_net(sk);
+	unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
-	struct net *net = sock_net(sk);
 
 	prefetch(head->chain.first);
 	write_lock(lock);
-- 
cgit v1.2.3-70-g09d2


From 33de014c63646f69f36f3673e3b4676f931dc878 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 16 Jun 2008 17:13:48 -0700
Subject: inet6: add struct net argument to inet6_ehashfn

Same as for inet_hashfn, prepare its ipv6 incarnation.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 7 +++++--
 net/ipv6/inet6_hashtables.c    | 6 +++---
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 62a5b691858..72f13a9928e 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -28,7 +28,8 @@
 struct inet_hashinfo;
 
 /* I have no idea if this is a good hash for v6 or not. -DaveM */
-static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
+static inline unsigned int inet6_ehashfn(struct net *net,
+				const struct in6_addr *laddr, const u16 lport,
 				const struct in6_addr *faddr, const __be16 fport)
 {
 	u32 ports = (lport ^ (__force u16)fport);
@@ -46,7 +47,9 @@ static inline int inet6_sk_ehashfn(const struct sock *sk)
 	const struct in6_addr *faddr = &np->daddr;
 	const __u16 lport = inet->num;
 	const __be16 fport = inet->dport;
-	return inet6_ehashfn(laddr, lport, faddr, fport);
+	struct net *net = sock_net(sk);
+
+	return inet6_ehashfn(net, laddr, lport, faddr, fport);
 }
 
 extern void __inet6_hash(struct sock *sk);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b940156ca4f..a9cc8ab33a4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -68,7 +68,7 @@ struct sock *__inet6_lookup_established(struct net *net,
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
 
@@ -166,14 +166,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *saddr = &np->daddr;
 	const int dif = sk->sk_bound_dev_if;
 	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
-	const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
+	struct net *net = sock_net(sk);
+	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
 						inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
-	struct net *net = sock_net(sk);
 
 	prefetch(head->chain.first);
 	write_lock(lock);
-- 
cgit v1.2.3-70-g09d2


From 25519a2a769d42fc2733a8f119682272d99b1304 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:22:38 -0800
Subject: wext: Remove inline from get_priv_size() and adjust_priv_size().

The compiler inlines when appropriate.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 947188a5b93..e83d74affd7 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -500,7 +500,7 @@ static int call_commit_handler(struct net_device *dev)
 /*
  * Calculate size of private arguments
  */
-static inline int get_priv_size(__u16	args)
+static int get_priv_size(__u16 args)
 {
 	int	num = args & IW_PRIV_SIZE_MASK;
 	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
@@ -512,8 +512,7 @@ static inline int get_priv_size(__u16	args)
 /*
  * Re-calculate the size of private arguments
  */
-static inline int adjust_priv_size(__u16		args,
-				   union iwreq_data *	wrqu)
+static int adjust_priv_size(__u16 args, union iwreq_data *wrqu)
 {
 	int	num = wrqu->data.length;
 	int	max = args & IW_PRIV_SIZE_MASK;
-- 
cgit v1.2.3-70-g09d2


From 208887d4cc5a5c1eeb68bd170e21e32b1129cd94 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:24:24 -0800
Subject: wext: Make adjust_priv_size() take a "struct iw_point *".

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index e83d74affd7..cd2cf9fec10 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -512,9 +512,9 @@ static int get_priv_size(__u16 args)
 /*
  * Re-calculate the size of private arguments
  */
-static int adjust_priv_size(__u16 args, union iwreq_data *wrqu)
+static int adjust_priv_size(__u16 args, struct iw_point *iwp)
 {
-	int	num = wrqu->data.length;
+	int	num = iwp->length;
 	int	max = args & IW_PRIV_SIZE_MASK;
 	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
 
@@ -976,7 +976,7 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
 			 * avoid leaking kernel bits outside. */
 			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
 				extra_size = adjust_priv_size(descr->get_args,
-							      &(iwr->u));
+							      &(iwr->u.data));
 			}
 
 			err = copy_to_user(iwr->u.data.pointer, extra,
-- 
cgit v1.2.3-70-g09d2


From 84149b0fca08f9ec554dfc28dabc39839fdf8a06 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:27:17 -0800
Subject: wext: Extract standard call iw_point handling into seperate function.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 258 +++++++++++++++++++++++++++-------------------------
 1 file changed, 134 insertions(+), 124 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index cd2cf9fec10..d17c0f45acc 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -694,6 +694,138 @@ void wext_proc_exit(struct net *net)
  */
 
 /* ---------------------------------------------------------------- */
+static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+				   const struct iw_ioctl_description *descr,
+				   iw_handler handler, struct net_device *dev,
+				   struct iw_request_info *info)
+{
+	int err, extra_size, user_length = 0, essid_compat = 0;
+	char *extra;
+
+	/* Calculate space needed by arguments. Always allocate
+	 * for max space.
+	 */
+	extra_size = descr->max_tokens * descr->token_size;
+
+	/* Check need for ESSID compatibility for WE < 21 */
+	switch (cmd) {
+	case SIOCSIWESSID:
+	case SIOCGIWESSID:
+	case SIOCSIWNICKN:
+	case SIOCGIWNICKN:
+		if (iwp->length == descr->max_tokens + 1)
+			essid_compat = 1;
+		else if (IW_IS_SET(cmd) && (iwp->length != 0)) {
+			char essid[IW_ESSID_MAX_SIZE + 1];
+
+			err = copy_from_user(essid, iwp->pointer,
+					     iwp->length *
+					     descr->token_size);
+			if (err)
+				return -EFAULT;
+
+			if (essid[iwp->length - 1] == '\0')
+				essid_compat = 1;
+		}
+		break;
+	default:
+		break;
+	}
+
+	iwp->length -= essid_compat;
+
+	/* Check what user space is giving us */
+	if (IW_IS_SET(cmd)) {
+		/* Check NULL pointer */
+		if (!iwp->pointer && iwp->length != 0)
+			return -EFAULT;
+		/* Check if number of token fits within bounds */
+		if (iwp->length > descr->max_tokens)
+			return -E2BIG;
+		if (iwp->length < descr->min_tokens)
+			return -EINVAL;
+	} else {
+		/* Check NULL pointer */
+		if (!iwp->pointer)
+			return -EFAULT;
+		/* Save user space buffer size for checking */
+		user_length = iwp->length;
+
+		/* Don't check if user_length > max to allow forward
+		 * compatibility. The test user_length < min is
+		 * implied by the test at the end.
+		 */
+
+		/* Support for very large requests */
+		if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+		    (user_length > descr->max_tokens)) {
+			/* Allow userspace to GET more than max so
+			 * we can support any size GET requests.
+			 * There is still a limit : -ENOMEM.
+			 */
+			extra_size = user_length * descr->token_size;
+
+			/* Note : user_length is originally a __u16,
+			 * and token_size is controlled by us,
+			 * so extra_size won't get negative and
+			 * won't overflow...
+			 */
+		}
+	}
+
+	/* kzalloc() ensures NULL-termination for essid_compat. */
+	extra = kzalloc(extra_size, GFP_KERNEL);
+	if (!extra)
+		return -ENOMEM;
+
+	/* If it is a SET, get all the extra data in here */
+	if (IW_IS_SET(cmd) && (iwp->length != 0)) {
+		if (copy_from_user(extra, iwp->pointer,
+				   iwp->length *
+				   descr->token_size)) {
+			err = -EFAULT;
+			goto out;
+		}
+	}
+
+	err = handler(dev, info, (union iwreq_data *) iwp, extra);
+
+	iwp->length += essid_compat;
+
+	/* If we have something to return to the user */
+	if (!err && IW_IS_GET(cmd)) {
+		/* Check if there is enough buffer up there */
+		if (user_length < iwp->length) {
+			err = -E2BIG;
+			goto out;
+		}
+
+		if (copy_to_user(iwp->pointer, extra,
+				 iwp->length *
+				 descr->token_size)) {
+			err = -EFAULT;
+			goto out;
+		}
+	}
+
+	/* Generate an event to notify listeners of the change */
+	if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) {
+		union iwreq_data *data = (union iwreq_data *) iwp;
+
+		if (descr->flags & IW_DESCR_FLAG_RESTRICT)
+			/* If the event is restricted, don't
+			 * export the payload.
+			 */
+			wireless_send_event(dev, cmd, data, NULL);
+		else
+			wireless_send_event(dev, cmd, data, extra);
+	}
+
+out:
+	kfree(extra);
+	return err;
+}
+
 /*
  * Wrapper to call a standard Wireless Extension handler.
  * We do various checks and also take care of moving data between
@@ -729,130 +861,8 @@ static int ioctl_standard_call(struct net_device *	dev,
 		   ((ret == 0) || (ret == -EIWCOMMIT)))
 			wireless_send_event(dev, cmd, &(iwr->u), NULL);
 	} else {
-		char *	extra;
-		int	extra_size;
-		int	user_length = 0;
-		int	err;
-		int	essid_compat = 0;
-
-		/* Calculate space needed by arguments. Always allocate
-		 * for max space. Easier, and won't last long... */
-		extra_size = descr->max_tokens * descr->token_size;
-
-		/* Check need for ESSID compatibility for WE < 21 */
-		switch (cmd) {
-		case SIOCSIWESSID:
-		case SIOCGIWESSID:
-		case SIOCSIWNICKN:
-		case SIOCGIWNICKN:
-			if (iwr->u.data.length == descr->max_tokens + 1)
-				essid_compat = 1;
-			else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-				char essid[IW_ESSID_MAX_SIZE + 1];
-
-				err = copy_from_user(essid, iwr->u.data.pointer,
-						     iwr->u.data.length *
-						     descr->token_size);
-				if (err)
-					return -EFAULT;
-
-				if (essid[iwr->u.data.length - 1] == '\0')
-					essid_compat = 1;
-			}
-			break;
-		default:
-			break;
-		}
-
-		iwr->u.data.length -= essid_compat;
-
-		/* Check what user space is giving us */
-		if (IW_IS_SET(cmd)) {
-			/* Check NULL pointer */
-			if ((iwr->u.data.pointer == NULL) &&
-			   (iwr->u.data.length != 0))
-				return -EFAULT;
-			/* Check if number of token fits within bounds */
-			if (iwr->u.data.length > descr->max_tokens)
-				return -E2BIG;
-			if (iwr->u.data.length < descr->min_tokens)
-				return -EINVAL;
-		} else {
-			/* Check NULL pointer */
-			if (iwr->u.data.pointer == NULL)
-				return -EFAULT;
-			/* Save user space buffer size for checking */
-			user_length = iwr->u.data.length;
-
-			/* Don't check if user_length > max to allow forward
-			 * compatibility. The test user_length < min is
-			 * implied by the test at the end. */
-
-			/* Support for very large requests */
-			if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
-			   (user_length > descr->max_tokens)) {
-				/* Allow userspace to GET more than max so
-				 * we can support any size GET requests.
-				 * There is still a limit : -ENOMEM. */
-				extra_size = user_length * descr->token_size;
-				/* Note : user_length is originally a __u16,
-				 * and token_size is controlled by us,
-				 * so extra_size won't get negative and
-				 * won't overflow... */
-			}
-		}
-
-		/* Create the kernel buffer */
-		/*    kzalloc ensures NULL-termination for essid_compat */
-		extra = kzalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* If it is a SET, get all the extra data in here */
-		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-			err = copy_from_user(extra, iwr->u.data.pointer,
-					     iwr->u.data.length *
-					     descr->token_size);
-			if (err) {
-				kfree(extra);
-				return -EFAULT;
-			}
-		}
-
-		/* Call the handler */
-		ret = handler(dev, &info, &(iwr->u), extra);
-
-		iwr->u.data.length += essid_compat;
-
-		/* If we have something to return to the user */
-		if (!ret && IW_IS_GET(cmd)) {
-			/* Check if there is enough buffer up there */
-			if (user_length < iwr->u.data.length) {
-				kfree(extra);
-				return -E2BIG;
-			}
-
-			err = copy_to_user(iwr->u.data.pointer, extra,
-					   iwr->u.data.length *
-					   descr->token_size);
-			if (err)
-				ret =  -EFAULT;
-		}
-
-		/* Generate an event to notify listeners of the change */
-		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
-		   ((ret == 0) || (ret == -EIWCOMMIT))) {
-			if (descr->flags & IW_DESCR_FLAG_RESTRICT)
-				/* If the event is restricted, don't
-				 * export the payload */
-				wireless_send_event(dev, cmd, &(iwr->u), NULL);
-			else
-				wireless_send_event(dev, cmd, &(iwr->u),
-						    extra);
-		}
-
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
+		ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr,
+					      handler, dev, &info);
 	}
 
 	/* Call commit handler if needed and defined */
-- 
cgit v1.2.3-70-g09d2


From d88174e4d295f0880e5f9cb6d42f26b0367c8fd9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:33:46 -0800
Subject: wext: Extract private call iw_point handling into seperate functions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 141 +++++++++++++++++++++++++++-------------------------
 1 file changed, 74 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index d17c0f45acc..a1cd19add6d 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -890,25 +890,22 @@ static int ioctl_standard_call(struct net_device *	dev,
  * a iw_handler but process it in your ioctl handler (i.e. use the
  * old driver API).
  */
-static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
-			      unsigned int cmd, iw_handler handler)
+static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd,
+				   const struct iw_priv_args **descrp)
 {
-	struct iwreq *			iwr = (struct iwreq *) ifr;
-	const struct iw_priv_args *	descr = NULL;
-	struct iw_request_info		info;
-	int				extra_size = 0;
-	int				i;
-	int				ret = -EINVAL;
+	const struct iw_priv_args *descr;
+	int i, extra_size;
 
-	/* Get the description of the IOCTL */
-	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+	descr = NULL;
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++) {
 		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
+			descr = &dev->wireless_handlers->private_args[i];
 			break;
 		}
+	}
 
-	/* Compute the size of the set/get arguments */
-	if (descr != NULL) {
+	extra_size = 0;
+	if (descr) {
 		if (IW_IS_SET(cmd)) {
 			int	offset = 0;	/* For sub-ioctls */
 			/* Check for sub-ioctl handler */
@@ -933,72 +930,82 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
 				extra_size = 0;
 		}
 	}
+	*descrp = descr;
+	return extra_size;
+}
 
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
+static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd,
+				  const struct iw_priv_args *descr,
+				  iw_handler handler, struct net_device *dev,
+				  struct iw_request_info *info, int extra_size)
+{
+	char *extra;
+	int err;
 
-	/* Check if we have a pointer to user space data or not. */
-	if (extra_size == 0) {
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
-	} else {
-		char *	extra;
-		int	err;
+	/* Check what user space is giving us */
+	if (IW_IS_SET(cmd)) {
+		if (!iwp->pointer && iwp->length != 0)
+			return -EFAULT;
 
-		/* Check what user space is giving us */
-		if (IW_IS_SET(cmd)) {
-			/* Check NULL pointer */
-			if ((iwr->u.data.pointer == NULL) &&
-			   (iwr->u.data.length != 0))
-				return -EFAULT;
+		if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK))
+			return -E2BIG;
+	} else if (!iwp->pointer)
+		return -EFAULT;
 
-			/* Does it fits within bounds ? */
-			if (iwr->u.data.length > (descr->set_args &
-						 IW_PRIV_SIZE_MASK))
-				return -E2BIG;
-		} else if (iwr->u.data.pointer == NULL)
-			return -EFAULT;
+	extra = kmalloc(extra_size, GFP_KERNEL);
+	if (!extra)
+		return -ENOMEM;
 
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* If it is a SET, get all the extra data in here */
-		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-			err = copy_from_user(extra, iwr->u.data.pointer,
-					     extra_size);
-			if (err) {
-				kfree(extra);
-				return -EFAULT;
-			}
+	/* If it is a SET, get all the extra data in here */
+	if (IW_IS_SET(cmd) && (iwp->length != 0)) {
+		if (copy_from_user(extra, iwp->pointer, extra_size)) {
+			err = -EFAULT;
+			goto out;
 		}
+	}
 
-		/* Call the handler */
-		ret = handler(dev, &info, &(iwr->u), extra);
+	/* Call the handler */
+	err = handler(dev, info, (union iwreq_data *) iwp, extra);
 
-		/* If we have something to return to the user */
-		if (!ret && IW_IS_GET(cmd)) {
+	/* If we have something to return to the user */
+	if (!err && IW_IS_GET(cmd)) {
+		/* Adjust for the actual length if it's variable,
+		 * avoid leaking kernel bits outside.
+		 */
+		if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
+			extra_size = adjust_priv_size(descr->get_args, iwp);
 
-			/* Adjust for the actual length if it's variable,
-			 * avoid leaking kernel bits outside. */
-			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
-				extra_size = adjust_priv_size(descr->get_args,
-							      &(iwr->u.data));
-			}
+		if (copy_to_user(iwp->pointer, extra, extra_size))
+			err =  -EFAULT;
+	}
 
-			err = copy_to_user(iwr->u.data.pointer, extra,
-					   extra_size);
-			if (err)
-				ret =  -EFAULT;
-		}
+out:
+	kfree(extra);
+	return err;
+}
 
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
-	}
+static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+			      unsigned int cmd, iw_handler handler)
+{
+	struct iwreq *iwr = (struct iwreq *) ifr;
+	int extra_size = 0, ret = -EINVAL;
+	const struct iw_priv_args *descr;
+	struct iw_request_info info;
 
+	extra_size = get_priv_descr_and_size(dev, cmd, &descr);
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not. */
+	if (extra_size == 0) {
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+	} else {
+		ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr,
+					     handler, dev, &info, extra_size);
+	}
 
 	/* Call commit handler if needed and defined */
 	if (ret == -EIWCOMMIT)
-- 
cgit v1.2.3-70-g09d2


From 67dd7608078b17f63f29ff2108fc5bf2407ddcec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:36:31 -0800
Subject: wext: Pull ioctl permission checking out into helper function.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index a1cd19add6d..e96559ea0fb 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1061,18 +1061,26 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i
 	return -EOPNOTSUPP;
 }
 
+/* If command is `set a parameter', or `get the encoding parameters',
+ * check if the user has the right to do it.
+ */
+static int wext_permission_check(unsigned int cmd)
+{
+	if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+	    && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+
 /* entry point from dev ioctl */
 int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 		      void __user *arg)
 {
-	int ret;
+	int ret = wext_permission_check(cmd);
 
-	/* If command is `set a parameter', or
-	 * `get the encoding parameters', check if
-	 * the user has the right to do it */
-	if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
-	    && !capable(CAP_NET_ADMIN))
-		return -EPERM;
+	if (ret)
+		return ret;
 
 	dev_load(net, ifr->ifr_name);
 	rtnl_lock();
-- 
cgit v1.2.3-70-g09d2


From ca1e8bb8e4e89e2769e2b39eb29fdcfc5c19cf89 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:41:45 -0800
Subject: wext: Parameterize the standard/private handlers.

The WEXT standard and private handlers to use are now
arguments to wireless_process_ioctl().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index e96559ea0fb..929d24120f3 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1015,11 +1015,17 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
 }
 
 /* ---------------------------------------------------------------- */
+typedef int (*wext_ioctl_func)(struct net_device *, struct ifreq *,
+			       unsigned int, iw_handler);
+
 /*
  * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd)
+static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+				  unsigned int cmd,
+				  wext_ioctl_func standard,
+				  wext_ioctl_func private)
 {
 	struct net_device *dev;
 	iw_handler	handler;
@@ -1035,12 +1041,12 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
 	if (cmd == SIOCGIWSTATS)
-		return ioctl_standard_call(dev, ifr, cmd,
-					   &iw_handler_get_iwstats);
+		return standard(dev, ifr, cmd,
+				&iw_handler_get_iwstats);
 
 	if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
-		return ioctl_standard_call(dev, ifr, cmd,
-					   &iw_handler_get_private);
+		return standard(dev, ifr, cmd,
+				&iw_handler_get_private);
 
 	/* Basic check */
 	if (!netif_device_present(dev))
@@ -1051,9 +1057,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i
 	if (handler) {
 		/* Standard and private are not the same */
 		if (cmd < SIOCIWFIRSTPRIV)
-			return ioctl_standard_call(dev, ifr, cmd, handler);
+			return standard(dev, ifr, cmd, handler);
 		else
-			return ioctl_private_call(dev, ifr, cmd, handler);
+			return private(dev, ifr, cmd, handler);
 	}
 	/* Old driver API : call driver ioctl handler */
 	if (dev->do_ioctl)
@@ -1084,7 +1090,9 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 
 	dev_load(net, ifr->ifr_name);
 	rtnl_lock();
-	ret = wireless_process_ioctl(net, ifr, cmd);
+	ret = wireless_process_ioctl(net, ifr, cmd,
+				     ioctl_standard_call,
+				     ioctl_private_call);
 	rtnl_unlock();
 	if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct iwreq)))
 		return -EFAULT;
-- 
cgit v1.2.3-70-g09d2


From d2911255590d9ca561a481b9dbebcfcbbf38fa4e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 21 Dec 2007 03:46:01 -0800
Subject: wext: Pass iwreq pointer down into standard/private handlers.

They have no need to see the object as an ifreq.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 929d24120f3..e9c88172ec5 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -832,11 +832,10 @@ out:
  * user space and kernel space.
  */
 static int ioctl_standard_call(struct net_device *	dev,
-			       struct ifreq *		ifr,
+			       struct iwreq		*iwr,
 			       unsigned int		cmd,
 			       iw_handler		handler)
 {
-	struct iwreq *				iwr = (struct iwreq *) ifr;
 	const struct iw_ioctl_description *	descr;
 	struct iw_request_info			info;
 	int					ret = -EINVAL;
@@ -984,10 +983,9 @@ out:
 	return err;
 }
 
-static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr,
 			      unsigned int cmd, iw_handler handler)
 {
-	struct iwreq *iwr = (struct iwreq *) ifr;
 	int extra_size = 0, ret = -EINVAL;
 	const struct iw_priv_args *descr;
 	struct iw_request_info info;
@@ -1015,7 +1013,7 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
 }
 
 /* ---------------------------------------------------------------- */
-typedef int (*wext_ioctl_func)(struct net_device *, struct ifreq *,
+typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
 			       unsigned int, iw_handler);
 
 /*
@@ -1027,6 +1025,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 				  wext_ioctl_func standard,
 				  wext_ioctl_func private)
 {
+	struct iwreq *iwr = (struct iwreq *) ifr;
 	struct net_device *dev;
 	iw_handler	handler;
 
@@ -1041,11 +1040,11 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
 	if (cmd == SIOCGIWSTATS)
-		return standard(dev, ifr, cmd,
+		return standard(dev, iwr, cmd,
 				&iw_handler_get_iwstats);
 
 	if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
-		return standard(dev, ifr, cmd,
+		return standard(dev, iwr, cmd,
 				&iw_handler_get_private);
 
 	/* Basic check */
@@ -1057,9 +1056,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	if (handler) {
 		/* Standard and private are not the same */
 		if (cmd < SIOCIWFIRSTPRIV)
-			return standard(dev, ifr, cmd, handler);
+			return standard(dev, iwr, cmd, handler);
 		else
-			return private(dev, ifr, cmd, handler);
+			return private(dev, iwr, cmd, handler);
 	}
 	/* Old driver API : call driver ioctl handler */
 	if (dev->do_ioctl)
-- 
cgit v1.2.3-70-g09d2


From a67fa76d8be4e24e2d61cd76438a893d4c2886f7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Jun 2008 07:36:30 -0700
Subject: wext: Pull top-level ioctl dispatch logic into helper function.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index e9c88172ec5..09022cbb58b 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1079,8 +1079,10 @@ static int wext_permission_check(unsigned int cmd)
 }
 
 /* entry point from dev ioctl */
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
-		      void __user *arg)
+static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
+			       unsigned int cmd,
+			       wext_ioctl_func standard,
+			       wext_ioctl_func private)
 {
 	int ret = wext_permission_check(cmd);
 
@@ -1089,12 +1091,24 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 
 	dev_load(net, ifr->ifr_name);
 	rtnl_lock();
-	ret = wireless_process_ioctl(net, ifr, cmd,
-				     ioctl_standard_call,
-				     ioctl_private_call);
+	ret = wireless_process_ioctl(net, ifr, cmd, standard, private);
 	rtnl_unlock();
-	if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct iwreq)))
+
+	return ret;
+}
+
+int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+		      void __user *arg)
+{
+	int ret = wext_ioctl_dispatch(net, ifr, cmd,
+				      ioctl_standard_call,
+				      ioctl_private_call);
+
+	if (ret >= 0 &&
+	    IW_IS_GET(cmd) &&
+	    copy_to_user(arg, ifr, sizeof(struct iwreq)))
 		return -EFAULT;
+
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 87de87d5e47f94b4ea647a5bd1bc8dc1f7930db4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Jun 2008 09:14:03 -0700
Subject: wext: Dispatch and handle compat ioctls entirely in
 net/wireless/wext.c

Next we can kill the hacks in fs/compat_ioctl.c and also
dispatch compat ioctls down into the driver and 80211 protocol
helper layers in order to handle iw_point objects embedded in
stream replies which need to be translated.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/compat_ioctl.c        |   6 ---
 include/linux/wireless.h |  13 ++++++
 include/net/wext.h       |   7 ++++
 net/socket.c             |  10 +++++
 net/wireless/wext.c      | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 134 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 97dba0d9234..8ab850bf2ee 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1757,12 +1757,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
 	return sys_ioctl(fd, cmd, (unsigned long)tdata);
 }
 
-struct compat_iw_point {
-	compat_caddr_t pointer;
-	__u16 length;
-	__u16 flags;
-};
-
 static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	struct iwreq __user *iwr;
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 4a95a0e5eec..79d84687582 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -677,6 +677,19 @@ struct	iw_point
   __u16		flags;		/* Optional params */
 };
 
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+
+#include <linux/compat.h>
+
+struct compat_iw_point {
+	compat_caddr_t pointer;
+	__u16 length;
+	__u16 flags;
+};
+#endif
+#endif
+
 /*
  *	A frequency
  *	For numbers lower than 10^9, we encode the number in 'm' and
diff --git a/include/net/wext.h b/include/net/wext.h
index 80b31d826b7..6d76a39a9c5 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -12,6 +12,8 @@ extern int wext_proc_init(struct net *net);
 extern void wext_proc_exit(struct net *net);
 extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 			     void __user *arg);
+extern int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
+				    unsigned long arg);
 #else
 static inline int wext_proc_init(struct net *net)
 {
@@ -26,6 +28,11 @@ static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned
 {
 	return -EINVAL;
 }
+static inline int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
+					   unsigned long arg)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif /* __NET_WEXT_H */
diff --git a/net/socket.c b/net/socket.c
index 66c4a8cf6db..81fe8251304 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,6 +90,7 @@
 #include <asm/unistd.h>
 
 #include <net/compat.h>
+#include <net/wext.h>
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
@@ -2210,10 +2211,19 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd,
 {
 	struct socket *sock = file->private_data;
 	int ret = -ENOIOCTLCMD;
+	struct sock *sk;
+	struct net *net;
+
+	sk = sock->sk;
+	net = sock_net(sk);
 
 	if (sock->ops->compat_ioctl)
 		ret = sock->ops->compat_ioctl(sock, cmd, arg);
 
+	if (ret == -ENOIOCTLCMD &&
+	    (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
+		ret = compat_wext_handle_ioctl(net, cmd, arg);
+
 	return ret;
 }
 #endif
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 09022cbb58b..1a4636a9fcd 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1112,6 +1112,110 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+static int compat_standard_call(struct net_device	*dev,
+				struct iwreq		*iwr,
+				unsigned int		cmd,
+				iw_handler		handler)
+{
+	const struct iw_ioctl_description *descr;
+	struct compat_iw_point *iwp_compat;
+	struct iw_request_info info;
+	struct iw_point iwp;
+	int err;
+
+	descr = standard_ioctl + (cmd - SIOCIWFIRST);
+
+	if (descr->header_type != IW_HEADER_TYPE_POINT)
+		return ioctl_standard_call(dev, iwr, cmd, handler);
+
+	iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+	iwp.pointer = compat_ptr(iwp_compat->pointer);
+	iwp.length = iwp_compat->length;
+	iwp.flags = iwp_compat->flags;
+
+	info.cmd = cmd;
+	info.flags = 0;
+
+	err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, &info);
+
+	iwp_compat->pointer = ptr_to_compat(iwp.pointer);
+	iwp_compat->length = iwp.length;
+	iwp_compat->flags = iwp.flags;
+
+	return err;
+}
+
+static int compat_private_call(struct net_device *dev, struct iwreq *iwr,
+			       unsigned int cmd, iw_handler handler)
+{
+	const struct iw_priv_args *descr;
+	struct iw_request_info info;
+	int ret, extra_size;
+
+	extra_size = get_priv_descr_and_size(dev, cmd, &descr);
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not. */
+	if (extra_size == 0) {
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+	} else {
+		struct compat_iw_point *iwp_compat;
+		struct iw_point iwp;
+
+		iwp_compat = (struct compat_iw_point *) &iwr->u.data;
+		iwp.pointer = compat_ptr(iwp_compat->pointer);
+		iwp.length = iwp_compat->length;
+		iwp.flags = iwp_compat->flags;
+
+		ret = ioctl_private_iw_point(&iwp, cmd, descr,
+					     handler, dev, &info, extra_size);
+
+		iwp_compat->pointer = ptr_to_compat(iwp.pointer);
+		iwp_compat->length = iwp.length;
+		iwp_compat->flags = iwp.flags;
+	}
+
+	/* Call commit handler if needed and defined */
+	if (ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	return ret;
+}
+
+int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
+			     unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct iwreq iwr;
+	char *colon;
+	int ret;
+
+	if (copy_from_user(&iwr, argp, sizeof(struct iwreq)))
+		return -EFAULT;
+
+	iwr.ifr_name[IFNAMSIZ-1] = 0;
+	colon = strchr(iwr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd,
+				  compat_standard_call,
+				  compat_private_call);
+
+	if (ret >= 0 &&
+	    IW_IS_GET(cmd) &&
+	    copy_to_user(argp, &iwr, sizeof(struct iwreq)))
+		return -EFAULT;
+
+	return ret;
+}
+#endif
+
 /************************* EVENT PROCESSING *************************/
 /*
  * Process events generated by the wireless layer or the driver.
-- 
cgit v1.2.3-70-g09d2


From 0f5cabba49021d36e9f76bd97d7fa0f4a408063f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Jun 2008 07:39:16 -0700
Subject: wext: Create IW_REQUEST_FLAG_COMPAT and set it as needed.

Now low-level WEXT ioctl handlers can do compat handling
when necessary.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iw_handler.h |  2 +-
 net/wireless/wext.c      | 73 ++++++++++++++++++++++--------------------------
 2 files changed, 34 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 369d50e08b9..c99a8eec84e 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -256,7 +256,7 @@
 #define EIWCOMMIT	EINPROGRESS
 
 /* Flags available in struct iw_request_info */
-#define IW_REQUEST_FLAG_NONE	0x0000	/* No flag so far */
+#define IW_REQUEST_FLAG_COMPAT	0x0001	/* Compat ioctl call */
 
 /* Type of headers we know about (basically union iwreq_data) */
 #define IW_HEADER_TYPE_NULL	0	/* Not available */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 1a4636a9fcd..273a8435999 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -834,10 +834,10 @@ out:
 static int ioctl_standard_call(struct net_device *	dev,
 			       struct iwreq		*iwr,
 			       unsigned int		cmd,
+			       struct iw_request_info	*info,
 			       iw_handler		handler)
 {
 	const struct iw_ioctl_description *	descr;
-	struct iw_request_info			info;
 	int					ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
@@ -845,15 +845,11 @@ static int ioctl_standard_call(struct net_device *	dev,
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
 	/* Check if we have a pointer to user space data or not */
 	if (descr->header_type != IW_HEADER_TYPE_POINT) {
 
 		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), NULL);
+		ret = handler(dev, info, &(iwr->u), NULL);
 
 		/* Generate an event to notify listeners of the change */
 		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
@@ -861,7 +857,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 			wireless_send_event(dev, cmd, &(iwr->u), NULL);
 	} else {
 		ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr,
-					      handler, dev, &info);
+					      handler, dev, info);
 	}
 
 	/* Call commit handler if needed and defined */
@@ -984,25 +980,21 @@ out:
 }
 
 static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr,
-			      unsigned int cmd, iw_handler handler)
+			      unsigned int cmd, struct iw_request_info *info,
+			      iw_handler handler)
 {
 	int extra_size = 0, ret = -EINVAL;
 	const struct iw_priv_args *descr;
-	struct iw_request_info info;
 
 	extra_size = get_priv_descr_and_size(dev, cmd, &descr);
 
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
 	/* Check if we have a pointer to user space data or not. */
 	if (extra_size == 0) {
 		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+		ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u));
 	} else {
 		ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr,
-					     handler, dev, &info, extra_size);
+					     handler, dev, info, extra_size);
 	}
 
 	/* Call commit handler if needed and defined */
@@ -1014,7 +1006,8 @@ static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr,
 
 /* ---------------------------------------------------------------- */
 typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
-			       unsigned int, iw_handler);
+			       unsigned int, struct iw_request_info *,
+			       iw_handler);
 
 /*
  * Main IOCTl dispatcher.
@@ -1022,6 +1015,7 @@ typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
  */
 static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 				  unsigned int cmd,
+				  struct iw_request_info *info,
 				  wext_ioctl_func standard,
 				  wext_ioctl_func private)
 {
@@ -1040,11 +1034,11 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
 	if (cmd == SIOCGIWSTATS)
-		return standard(dev, iwr, cmd,
+		return standard(dev, iwr, cmd, info,
 				&iw_handler_get_iwstats);
 
 	if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
-		return standard(dev, iwr, cmd,
+		return standard(dev, iwr, cmd, info,
 				&iw_handler_get_private);
 
 	/* Basic check */
@@ -1056,9 +1050,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	if (handler) {
 		/* Standard and private are not the same */
 		if (cmd < SIOCIWFIRSTPRIV)
-			return standard(dev, iwr, cmd, handler);
+			return standard(dev, iwr, cmd, info, handler);
 		else
-			return private(dev, iwr, cmd, handler);
+			return private(dev, iwr, cmd, info, handler);
 	}
 	/* Old driver API : call driver ioctl handler */
 	if (dev->do_ioctl)
@@ -1080,7 +1074,7 @@ static int wext_permission_check(unsigned int cmd)
 
 /* entry point from dev ioctl */
 static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
-			       unsigned int cmd,
+			       unsigned int cmd, struct iw_request_info *info,
 			       wext_ioctl_func standard,
 			       wext_ioctl_func private)
 {
@@ -1091,7 +1085,7 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
 
 	dev_load(net, ifr->ifr_name);
 	rtnl_lock();
-	ret = wireless_process_ioctl(net, ifr, cmd, standard, private);
+	ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private);
 	rtnl_unlock();
 
 	return ret;
@@ -1100,10 +1094,12 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
 int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 		      void __user *arg)
 {
-	int ret = wext_ioctl_dispatch(net, ifr, cmd,
-				      ioctl_standard_call,
-				      ioctl_private_call);
+	struct iw_request_info info = { .cmd = cmd, .flags = 0 };
+	int ret;
 
+	ret = wext_ioctl_dispatch(net, ifr, cmd, &info,
+				  ioctl_standard_call,
+				  ioctl_private_call);
 	if (ret >= 0 &&
 	    IW_IS_GET(cmd) &&
 	    copy_to_user(arg, ifr, sizeof(struct iwreq)))
@@ -1116,28 +1112,25 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
 static int compat_standard_call(struct net_device	*dev,
 				struct iwreq		*iwr,
 				unsigned int		cmd,
+				struct iw_request_info	*info,
 				iw_handler		handler)
 {
 	const struct iw_ioctl_description *descr;
 	struct compat_iw_point *iwp_compat;
-	struct iw_request_info info;
 	struct iw_point iwp;
 	int err;
 
 	descr = standard_ioctl + (cmd - SIOCIWFIRST);
 
 	if (descr->header_type != IW_HEADER_TYPE_POINT)
-		return ioctl_standard_call(dev, iwr, cmd, handler);
+		return ioctl_standard_call(dev, iwr, cmd, info, handler);
 
 	iwp_compat = (struct compat_iw_point *) &iwr->u.data;
 	iwp.pointer = compat_ptr(iwp_compat->pointer);
 	iwp.length = iwp_compat->length;
 	iwp.flags = iwp_compat->flags;
 
-	info.cmd = cmd;
-	info.flags = 0;
-
-	err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, &info);
+	err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info);
 
 	iwp_compat->pointer = ptr_to_compat(iwp.pointer);
 	iwp_compat->length = iwp.length;
@@ -1147,22 +1140,18 @@ static int compat_standard_call(struct net_device	*dev,
 }
 
 static int compat_private_call(struct net_device *dev, struct iwreq *iwr,
-			       unsigned int cmd, iw_handler handler)
+			       unsigned int cmd, struct iw_request_info *info,
+			       iw_handler handler)
 {
 	const struct iw_priv_args *descr;
-	struct iw_request_info info;
 	int ret, extra_size;
 
 	extra_size = get_priv_descr_and_size(dev, cmd, &descr);
 
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
 	/* Check if we have a pointer to user space data or not. */
 	if (extra_size == 0) {
 		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+		ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u));
 	} else {
 		struct compat_iw_point *iwp_compat;
 		struct iw_point iwp;
@@ -1173,7 +1162,7 @@ static int compat_private_call(struct net_device *dev, struct iwreq *iwr,
 		iwp.flags = iwp_compat->flags;
 
 		ret = ioctl_private_iw_point(&iwp, cmd, descr,
-					     handler, dev, &info, extra_size);
+					     handler, dev, info, extra_size);
 
 		iwp_compat->pointer = ptr_to_compat(iwp.pointer);
 		iwp_compat->length = iwp.length;
@@ -1191,6 +1180,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 			     unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
+	struct iw_request_info info;
 	struct iwreq iwr;
 	char *colon;
 	int ret;
@@ -1203,7 +1193,10 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 	if (colon)
 		*colon = 0;
 
-	ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd,
+	info.cmd = cmd;
+	info.flags = IW_REQUEST_FLAG_COMPAT;
+
+	ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info,
 				  compat_standard_call,
 				  compat_private_call);
 
-- 
cgit v1.2.3-70-g09d2


From ccc580571cf0799d0460a085a7632b77753f083e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 16 Jun 2008 18:50:49 -0700
Subject: wext: Emit event stream entries correctly when compat.

Three major portions to this change:

1) Add IW_EV_COMPAT_LCP_LEN, IW_EV_COMPAT_POINT_OFF,
   and IW_EV_COMPAT_POINT_LEN helper defines.

2) Delete iw_stream_check_add_*(), they are unused.

3) Add iw_request_info argument to iwe_stream_add_*(), and use it to
   size the event and pointer lengths correctly depending upon whether
   IW_REQUEST_FLAG_COMPAT is set or not.

4) The mechanical transformations to the drivers and wireless stack
   bits to get the iw_request_info passed down into the routines
   modified in #3.  Also, explicit references to IW_EV_LCP_LEN are
   replaced with iwe_stream_lcp_len(info).

With a lot of help and bug fixes from Masakazu Mokuno.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ps3_gelic_wireless.c           |  30 +++---
 drivers/net/wireless/airo.c                |  43 ++++++---
 drivers/net/wireless/atmel.c               |  24 +++--
 drivers/net/wireless/hostap/hostap.h       |   3 +-
 drivers/net/wireless/hostap/hostap_ap.c    |  32 +++----
 drivers/net/wireless/hostap/hostap_ioctl.c |  58 +++++------
 drivers/net/wireless/libertas/scan.c       |  36 +++----
 drivers/net/wireless/orinoco.c             |  30 +++---
 drivers/net/wireless/prism54/isl_ioctl.c   |  49 +++++-----
 drivers/net/wireless/rndis_wlan.c          |  32 ++++---
 drivers/net/wireless/wl3501_cs.c           |  10 +-
 drivers/net/wireless/zd1201.c              |  21 ++--
 include/linux/wireless.h                   |  15 +++
 include/net/iw_handler.h                   | 149 ++++++++++-------------------
 net/ieee80211/ieee80211_wx.c               |  48 ++++++----
 net/mac80211/ieee80211_i.h                 |   5 +-
 net/mac80211/mlme.c                        |  66 +++++++------
 net/mac80211/wext.c                        |   2 +-
 18 files changed, 345 insertions(+), 308 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ps3_gelic_wireless.c b/drivers/net/ps3_gelic_wireless.c
index aa963ac1e37..6b2dee0cf3a 100644
--- a/drivers/net/ps3_gelic_wireless.c
+++ b/drivers/net/ps3_gelic_wireless.c
@@ -571,6 +571,7 @@ static void gelic_wl_parse_ie(u8 *data, size_t len,
  * independent format
  */
 static char *gelic_wl_translate_scan(struct net_device *netdev,
+				     struct iw_request_info *info,
 				     char *ev,
 				     char *stop,
 				     struct gelic_wl_scan_info *network)
@@ -588,26 +589,26 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, &scan->bssid[2], ETH_ALEN);
-	ev = iwe_stream_add_event(ev, stop, &iwe, IW_EV_ADDR_LEN);
+	ev = iwe_stream_add_event(info, ev, stop, &iwe, IW_EV_ADDR_LEN);
 
 	/* ESSID */
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.flags = 1;
 	iwe.u.data.length = strnlen(scan->essid, 32);
-	ev = iwe_stream_add_point(ev, stop, &iwe, scan->essid);
+	ev = iwe_stream_add_point(info, ev, stop, &iwe, scan->essid);
 
 	/* FREQUENCY */
 	iwe.cmd = SIOCGIWFREQ;
 	iwe.u.freq.m = be16_to_cpu(scan->channel);
 	iwe.u.freq.e = 0; /* table value in MHz */
 	iwe.u.freq.i = 0;
-	ev = iwe_stream_add_event(ev, stop, &iwe, IW_EV_FREQ_LEN);
+	ev = iwe_stream_add_event(info, ev, stop, &iwe, IW_EV_FREQ_LEN);
 
 	/* RATES */
 	iwe.cmd = SIOCGIWRATE;
 	iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
 	/* to stuff multiple values in one event */
-	tmp = ev + IW_EV_LCP_LEN;
+	tmp = ev + iwe_stream_lcp_len(info);
 	/* put them in ascendant order (older is first) */
 	i = 0;
 	j = 0;
@@ -620,16 +621,16 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 		else
 		    rate = scan->rate[i++] & 0x7f;
 		iwe.u.bitrate.value = rate * 500000; /* 500kbps unit */
-		tmp = iwe_stream_add_value(ev, tmp, stop, &iwe,
+		tmp = iwe_stream_add_value(info, ev, tmp, stop, &iwe,
 					   IW_EV_PARAM_LEN);
 	}
 	while (j < network->rate_ext_len) {
 		iwe.u.bitrate.value = (scan->ext_rate[j++] & 0x7f) * 500000;
-		tmp = iwe_stream_add_value(ev, tmp, stop, &iwe,
+		tmp = iwe_stream_add_value(info, ev, tmp, stop, &iwe,
 					   IW_EV_PARAM_LEN);
 	}
 	/* Check if we added any rate */
-	if (IW_EV_LCP_LEN < (tmp - ev))
+	if (iwe_stream_lcp_len(info) < (tmp - ev))
 		ev = tmp;
 
 	/* ENCODE */
@@ -639,7 +640,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	ev = iwe_stream_add_point(ev, stop, &iwe, scan->essid);
+	ev = iwe_stream_add_point(info, ev, stop, &iwe, scan->essid);
 
 	/* MODE */
 	iwe.cmd = SIOCGIWMODE;
@@ -649,7 +650,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		ev = iwe_stream_add_event(ev, stop, &iwe, IW_EV_UINT_LEN);
+		ev = iwe_stream_add_event(info, ev, stop, &iwe, IW_EV_UINT_LEN);
 	}
 
 	/* QUAL */
@@ -659,7 +660,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 	iwe.u.qual.level = be16_to_cpu(scan->rssi);
 	iwe.u.qual.qual = be16_to_cpu(scan->rssi);
 	iwe.u.qual.noise = 0;
-	ev  = iwe_stream_add_event(ev, stop, &iwe, IW_EV_QUAL_LEN);
+	ev  = iwe_stream_add_event(info, ev, stop, &iwe, IW_EV_QUAL_LEN);
 
 	/* RSN */
 	memset(&iwe, 0, sizeof(iwe));
@@ -669,7 +670,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 		if (len) {
 			iwe.cmd = IWEVGENIE;
 			iwe.u.data.length = len;
-			ev = iwe_stream_add_point(ev, stop, &iwe, buf);
+			ev = iwe_stream_add_point(info, ev, stop, &iwe, buf);
 		}
 	} else {
 		/* this scan info has IE data */
@@ -684,7 +685,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 			memcpy(buf, ie_info.wpa.data, ie_info.wpa.len);
 			iwe.cmd = IWEVGENIE;
 			iwe.u.data.length = ie_info.wpa.len;
-			ev = iwe_stream_add_point(ev, stop, &iwe, buf);
+			ev = iwe_stream_add_point(info, ev, stop, &iwe, buf);
 		}
 
 		if (ie_info.rsn.len && (ie_info.rsn.len <= sizeof(buf))) {
@@ -692,7 +693,7 @@ static char *gelic_wl_translate_scan(struct net_device *netdev,
 			memcpy(buf, ie_info.rsn.data, ie_info.rsn.len);
 			iwe.cmd = IWEVGENIE;
 			iwe.u.data.length = ie_info.rsn.len;
-			ev = iwe_stream_add_point(ev, stop, &iwe, buf);
+			ev = iwe_stream_add_point(info, ev, stop, &iwe, buf);
 		}
 	}
 
@@ -737,7 +738,8 @@ static int gelic_wl_get_scan(struct net_device *netdev,
 		if (wl->scan_age == 0 ||
 		    time_after(scan_info->last_scanned + wl->scan_age,
 			       this_time))
-			ev = gelic_wl_translate_scan(netdev, ev, stop,
+			ev = gelic_wl_translate_scan(netdev, info,
+						     ev, stop,
 						     scan_info);
 		else
 			pr_debug("%s:entry too old\n", __func__);
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index e30f8b79ea8..73d66a80c4a 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -7156,6 +7156,7 @@ out:
  * format that the Wireless Tools will understand - Jean II
  */
 static inline char *airo_translate_scan(struct net_device *dev,
+					struct iw_request_info *info,
 					char *current_ev,
 					char *end_buf,
 					BSSListRid *bss)
@@ -7172,7 +7173,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_ADDR_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_ADDR_LEN);
 
 	/* Other entries will be displayed in the order we give them */
 
@@ -7182,7 +7184,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 		iwe.u.data.length = 32;
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.flags = 1;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, bss->ssid);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, bss->ssid);
 
 	/* Add mode */
 	iwe.cmd = SIOCGIWMODE;
@@ -7192,7 +7195,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 	}
 
 	/* Add frequency */
@@ -7203,7 +7207,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 	 */
 	iwe.u.freq.m = frequency_list[iwe.u.freq.m - 1] * 100000;
 	iwe.u.freq.e = 1;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_FREQ_LEN);
 
 	dBm = le16_to_cpu(bss->dBm);
 
@@ -7223,7 +7228,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 				| IW_QUAL_DBM;
 	}
 	iwe.u.qual.noise = ai->wstats.qual.noise;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_QUAL_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_QUAL_LEN);
 
 	/* Add encryption capability */
 	iwe.cmd = SIOCGIWENCODE;
@@ -7232,11 +7238,12 @@ static inline char *airo_translate_scan(struct net_device *dev,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, bss->ssid);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, bss->ssid);
 
 	/* Rate : stuffing multiple values in a single event require a bit
 	 * more of magic - Jean II */
-	current_val = current_ev + IW_EV_LCP_LEN;
+	current_val = current_ev + iwe_stream_lcp_len(info);
 
 	iwe.cmd = SIOCGIWRATE;
 	/* Those two flags are ignored... */
@@ -7249,10 +7256,12 @@ static inline char *airo_translate_scan(struct net_device *dev,
 		/* Bit rate given in 500 kb/s units (+ 0x80) */
 		iwe.u.bitrate.value = ((bss->rates[i] & 0x7f) * 500000);
 		/* Add new value to event */
-		current_val = iwe_stream_add_value(current_ev, current_val, end_buf, &iwe, IW_EV_PARAM_LEN);
+		current_val = iwe_stream_add_value(info, current_ev,
+						   current_val, end_buf,
+						   &iwe, IW_EV_PARAM_LEN);
 	}
 	/* Check if we added any event */
-	if((current_val - current_ev) > IW_EV_LCP_LEN)
+	if ((current_val - current_ev) > iwe_stream_lcp_len(info))
 		current_ev = current_val;
 
 	/* Beacon interval */
@@ -7261,7 +7270,8 @@ static inline char *airo_translate_scan(struct net_device *dev,
 		iwe.cmd = IWEVCUSTOM;
 		sprintf(buf, "bcn_int=%d", bss->beaconInterval);
 		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
 		kfree(buf);
 	}
 
@@ -7295,8 +7305,10 @@ static inline char *airo_translate_scan(struct net_device *dev,
 					iwe.cmd = IWEVGENIE;
 					iwe.u.data.length = min(info_element->len + 2,
 								  MAX_WPA_IE_LEN);
-					current_ev = iwe_stream_add_point(current_ev, end_buf,
-							&iwe, (char *) info_element);
+					current_ev = iwe_stream_add_point(
+							info, current_ev,
+							end_buf, &iwe,
+							(char *) info_element);
 				}
 				break;
 
@@ -7304,8 +7316,9 @@ static inline char *airo_translate_scan(struct net_device *dev,
 				iwe.cmd = IWEVGENIE;
 				iwe.u.data.length = min(info_element->len + 2,
 							  MAX_WPA_IE_LEN);
-				current_ev = iwe_stream_add_point(current_ev, end_buf,
-						&iwe, (char *) info_element);
+				current_ev = iwe_stream_add_point(
+					info, current_ev, end_buf,
+					&iwe, (char *) info_element);
 				break;
 
 			default:
@@ -7344,7 +7357,7 @@ static int airo_get_scan(struct net_device *dev,
 
 	list_for_each_entry (net, &ai->network_list, list) {
 		/* Translate to WE format this entry */
-		current_ev = airo_translate_scan(dev, current_ev,
+		current_ev = airo_translate_scan(dev, info, current_ev,
 						 extra + dwrq->length,
 						 &net->bss);
 
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 7bb2646ae0e..28b6ff3eaa3 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -2310,30 +2310,40 @@ static int atmel_get_scan(struct net_device *dev,
 		iwe.cmd = SIOCGIWAP;
 		iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 		memcpy(iwe.u.ap_addr.sa_data, priv->BSSinfo[i].BSSID, 6);
-		current_ev = iwe_stream_add_event(current_ev, extra + IW_SCAN_MAX_DATA, &iwe, IW_EV_ADDR_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, IW_EV_ADDR_LEN);
 
 		iwe.u.data.length =  priv->BSSinfo[i].SSIDsize;
 		if (iwe.u.data.length > 32)
 			iwe.u.data.length = 32;
 		iwe.cmd = SIOCGIWESSID;
 		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(current_ev, extra + IW_SCAN_MAX_DATA, &iwe, priv->BSSinfo[i].SSID);
+		current_ev = iwe_stream_add_point(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, priv->BSSinfo[i].SSID);
 
 		iwe.cmd = SIOCGIWMODE;
 		iwe.u.mode = priv->BSSinfo[i].BSStype;
-		current_ev = iwe_stream_add_event(current_ev, extra + IW_SCAN_MAX_DATA, &iwe, IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, IW_EV_UINT_LEN);
 
 		iwe.cmd = SIOCGIWFREQ;
 		iwe.u.freq.m = priv->BSSinfo[i].channel;
 		iwe.u.freq.e = 0;
-		current_ev = iwe_stream_add_event(current_ev, extra + IW_SCAN_MAX_DATA, &iwe, IW_EV_FREQ_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, IW_EV_FREQ_LEN);
 
 		/* Add quality statistics */
 		iwe.cmd = IWEVQUAL;
 		iwe.u.qual.level = priv->BSSinfo[i].RSSI;
 		iwe.u.qual.qual  = iwe.u.qual.level;
 		/* iwe.u.qual.noise  = SOMETHING */
-		current_ev = iwe_stream_add_event(current_ev, extra + IW_SCAN_MAX_DATA , &iwe, IW_EV_QUAL_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, IW_EV_QUAL_LEN);
 
 
 		iwe.cmd = SIOCGIWENCODE;
@@ -2342,7 +2352,9 @@ static int atmel_get_scan(struct net_device *dev,
 		else
 			iwe.u.data.flags = IW_ENCODE_DISABLED;
 		iwe.u.data.length = 0;
-		current_ev = iwe_stream_add_point(current_ev, extra + IW_SCAN_MAX_DATA, &iwe, NULL);
+		current_ev = iwe_stream_add_point(info, current_ev,
+						  extra + IW_SCAN_MAX_DATA,
+						  &iwe, NULL);
 	}
 
 	/* Length of data */
diff --git a/drivers/net/wireless/hostap/hostap.h b/drivers/net/wireless/hostap/hostap.h
index 547ba84dc79..3a386a636cc 100644
--- a/drivers/net/wireless/hostap/hostap.h
+++ b/drivers/net/wireless/hostap/hostap.h
@@ -67,7 +67,8 @@ void * ap_crypt_get_ptrs(struct ap_data *ap, u8 *addr, int permanent,
 int prism2_ap_get_sta_qual(local_info_t *local, struct sockaddr addr[],
 			   struct iw_quality qual[], int buf_size,
 			   int aplist);
-int prism2_ap_translate_scan(struct net_device *dev, char *buffer);
+int prism2_ap_translate_scan(struct net_device *dev,
+			     struct iw_request_info *info, char *buffer);
 int prism2_hostapd(struct ap_data *ap, struct prism2_hostapd_param *param);
 
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index 0acd9589c48..06b23df8f69 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -2420,7 +2420,8 @@ int prism2_ap_get_sta_qual(local_info_t *local, struct sockaddr addr[],
 
 /* Translate our list of Access Points & Stations to a card independant
  * format that the Wireless Tools will understand - Jean II */
-int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
+int prism2_ap_translate_scan(struct net_device *dev,
+			     struct iw_request_info *info, char *buffer)
 {
 	struct hostap_interface *iface;
 	local_info_t *local;
@@ -2449,8 +2450,8 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 		iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 		memcpy(iwe.u.ap_addr.sa_data, sta->addr, ETH_ALEN);
 		iwe.len = IW_EV_ADDR_LEN;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_ADDR_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_ADDR_LEN);
 
 		/* Use the mode to indicate if it's a station or
 		 * an Access Point */
@@ -2461,8 +2462,8 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 		else
 			iwe.u.mode = IW_MODE_INFRA;
 		iwe.len = IW_EV_UINT_LEN;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 
 		/* Some quality */
 		memset(&iwe, 0, sizeof(iwe));
@@ -2477,8 +2478,8 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 		iwe.u.qual.noise = HFA384X_LEVEL_TO_dBm(sta->last_rx_silence);
 		iwe.u.qual.updated = sta->last_rx_updated;
 		iwe.len = IW_EV_QUAL_LEN;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_QUAL_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_QUAL_LEN);
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
 		if (sta->ap) {
@@ -2486,8 +2487,8 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 			iwe.cmd = SIOCGIWESSID;
 			iwe.u.data.length = sta->u.ap.ssid_len;
 			iwe.u.data.flags = 1;
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
-							  &iwe,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf, &iwe,
 							  sta->u.ap.ssid);
 
 			memset(&iwe, 0, sizeof(iwe));
@@ -2497,10 +2498,9 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 					IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
 			else
 				iwe.u.data.flags = IW_ENCODE_DISABLED;
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
-							  &iwe,
-							  sta->u.ap.ssid
-							  /* 0 byte memcpy */);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf, &iwe,
+							  sta->u.ap.ssid);
 
 			if (sta->u.ap.channel > 0 &&
 			    sta->u.ap.channel <= FREQ_COUNT) {
@@ -2510,7 +2510,7 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 					* 100000;
 				iwe.u.freq.e = 1;
 				current_ev = iwe_stream_add_event(
-					current_ev, end_buf, &iwe,
+					info, current_ev, end_buf, &iwe,
 					IW_EV_FREQ_LEN);
 			}
 
@@ -2519,8 +2519,8 @@ int prism2_ap_translate_scan(struct net_device *dev, char *buffer)
 			sprintf(buf, "beacon_interval=%d",
 				sta->listen_interval);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
-							  &iwe, buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf, &iwe, buf);
 		}
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
 
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 0ca0bfeb0ad..ed52d98317c 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -1793,6 +1793,7 @@ static int prism2_ioctl_siwscan(struct net_device *dev,
 
 #ifndef PRISM2_NO_STATION_MODES
 static char * __prism2_translate_scan(local_info_t *local,
+				      struct iw_request_info *info,
 				      struct hfa384x_hostscan_result *scan,
 				      struct hostap_bss_info *bss,
 				      char *current_ev, char *end_buf)
@@ -1823,7 +1824,7 @@ static char * __prism2_translate_scan(local_info_t *local,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
 					  IW_EV_ADDR_LEN);
 
 	/* Other entries will be displayed in the order we give them */
@@ -1832,7 +1833,8 @@ static char * __prism2_translate_scan(local_info_t *local,
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.length = ssid_len;
 	iwe.u.data.flags = 1;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, ssid);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, ssid);
 
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = SIOCGIWMODE;
@@ -1847,8 +1849,8 @@ static char * __prism2_translate_scan(local_info_t *local,
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 	}
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -1864,8 +1866,8 @@ static char * __prism2_translate_scan(local_info_t *local,
 	if (chan > 0) {
 		iwe.u.freq.m = freq_list[chan - 1] * 100000;
 		iwe.u.freq.e = 1;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_FREQ_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_FREQ_LEN);
 	}
 
 	if (scan) {
@@ -1884,8 +1886,8 @@ static char * __prism2_translate_scan(local_info_t *local,
 			| IW_QUAL_NOISE_UPDATED
 			| IW_QUAL_QUAL_INVALID
 			| IW_QUAL_DBM;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_QUAL_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_QUAL_LEN);
 	}
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -1895,13 +1897,13 @@ static char * __prism2_translate_scan(local_info_t *local,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, "");
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, "");
 
 	/* TODO: add SuppRates into BSS table */
 	if (scan) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = SIOCGIWRATE;
-		current_val = current_ev + IW_EV_LCP_LEN;
+		current_val = current_ev + iwe_stream_lcp_len(info);
 		pos = scan->sup_rates;
 		for (i = 0; i < sizeof(scan->sup_rates); i++) {
 			if (pos[i] == 0)
@@ -1909,11 +1911,11 @@ static char * __prism2_translate_scan(local_info_t *local,
 			/* Bit rate given in 500 kb/s units (+ 0x80) */
 			iwe.u.bitrate.value = ((pos[i] & 0x7f) * 500000);
 			current_val = iwe_stream_add_value(
-				current_ev, current_val, end_buf, &iwe,
+				info, current_ev, current_val, end_buf, &iwe,
 				IW_EV_PARAM_LEN);
 		}
 		/* Check if we added any event */
-		if ((current_val - current_ev) > IW_EV_LCP_LEN)
+		if ((current_val - current_ev) > iwe_stream_lcp_len(info))
 			current_ev = current_val;
 	}
 
@@ -1924,15 +1926,15 @@ static char * __prism2_translate_scan(local_info_t *local,
 		iwe.cmd = IWEVCUSTOM;
 		sprintf(buf, "bcn_int=%d", le16_to_cpu(scan->beacon_interval));
 		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
 
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVCUSTOM;
 		sprintf(buf, "resp_rate=%d", le16_to_cpu(scan->rate));
 		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
 
 		if (local->last_scan_type == PRISM2_HOSTSCAN &&
 		    (capabilities & WLAN_CAPABILITY_IBSS)) {
@@ -1940,8 +1942,8 @@ static char * __prism2_translate_scan(local_info_t *local,
 			iwe.cmd = IWEVCUSTOM;
 			sprintf(buf, "atim=%d", le16_to_cpu(scan->atim));
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
-							  &iwe, buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf, &iwe, buf);
 		}
 	}
 	kfree(buf);
@@ -1950,16 +1952,16 @@ static char * __prism2_translate_scan(local_info_t *local,
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->wpa_ie_len;
-		current_ev = iwe_stream_add_point(
-			current_ev, end_buf, &iwe, bss->wpa_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->wpa_ie);
 	}
 
 	if (bss && bss->rsn_ie_len > 0 && bss->rsn_ie_len <= MAX_WPA_IE_LEN) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->rsn_ie_len;
-		current_ev = iwe_stream_add_point(
-			current_ev, end_buf, &iwe, bss->rsn_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->rsn_ie);
 	}
 
 	return current_ev;
@@ -1969,6 +1971,7 @@ static char * __prism2_translate_scan(local_info_t *local,
 /* Translate scan data returned from the card to a card independant
  * format that the Wireless Tools will understand - Jean II */
 static inline int prism2_translate_scan(local_info_t *local,
+					struct iw_request_info *info,
 					char *buffer, int buflen)
 {
 	struct hfa384x_hostscan_result *scan;
@@ -1999,13 +2002,14 @@ static inline int prism2_translate_scan(local_info_t *local,
 			if (memcmp(bss->bssid, scan->bssid, ETH_ALEN) == 0) {
 				bss->included = 1;
 				current_ev = __prism2_translate_scan(
-					local, scan, bss, current_ev, end_buf);
+					local, info, scan, bss, current_ev,
+					end_buf);
 				found++;
 			}
 		}
 		if (!found) {
 			current_ev = __prism2_translate_scan(
-				local, scan, NULL, current_ev, end_buf);
+				local, info, scan, NULL, current_ev, end_buf);
 		}
 		/* Check if there is space for one more entry */
 		if ((end_buf - current_ev) <= IW_EV_ADDR_LEN) {
@@ -2023,7 +2027,7 @@ static inline int prism2_translate_scan(local_info_t *local,
 		bss = list_entry(ptr, struct hostap_bss_info, list);
 		if (bss->included)
 			continue;
-		current_ev = __prism2_translate_scan(local, NULL, bss,
+		current_ev = __prism2_translate_scan(local, info, NULL, bss,
 						     current_ev, end_buf);
 		/* Check if there is space for one more entry */
 		if ((end_buf - current_ev) <= IW_EV_ADDR_LEN) {
@@ -2070,7 +2074,7 @@ static inline int prism2_ioctl_giwscan_sta(struct net_device *dev,
 	}
 	local->scan_timestamp = 0;
 
-	res = prism2_translate_scan(local, extra, data->length);
+	res = prism2_translate_scan(local, info, extra, data->length);
 
 	if (res >= 0) {
 		data->length = res;
@@ -2103,7 +2107,7 @@ static int prism2_ioctl_giwscan(struct net_device *dev,
 		 * Jean II */
 
 		/* Translate to WE format */
-		res = prism2_ap_translate_scan(dev, extra);
+		res = prism2_ap_translate_scan(dev, info, extra);
 		if (res >= 0) {
 			printk(KERN_DEBUG "Scan result translation succeeded "
 			       "(length=%d)\n", res);
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c
index d448c9702a0..343ed38f772 100644
--- a/drivers/net/wireless/libertas/scan.c
+++ b/drivers/net/wireless/libertas/scan.c
@@ -776,8 +776,9 @@ out:
 #define MAX_CUSTOM_LEN 64
 
 static inline char *lbs_translate_scan(struct lbs_private *priv,
-				       char *start, char *stop,
-				       struct bss_descriptor *bss)
+					    struct iw_request_info *info,
+					    char *start, char *stop,
+					    struct bss_descriptor *bss)
 {
 	struct chan_freq_power *cfp;
 	char *current_val;	/* For rates */
@@ -801,24 +802,24 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, &bss->bssid, ETH_ALEN);
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN);
 
 	/* SSID */
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.flags = 1;
 	iwe.u.data.length = min((uint32_t) bss->ssid_len, (uint32_t) IW_ESSID_MAX_SIZE);
-	start = iwe_stream_add_point(start, stop, &iwe, bss->ssid);
+	start = iwe_stream_add_point(info, start, stop, &iwe, bss->ssid);
 
 	/* Mode */
 	iwe.cmd = SIOCGIWMODE;
 	iwe.u.mode = bss->mode;
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_UINT_LEN);
 
 	/* Frequency */
 	iwe.cmd = SIOCGIWFREQ;
 	iwe.u.freq.m = (long)cfp->freq * 100000;
 	iwe.u.freq.e = 1;
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN);
 
 	/* Add quality statistics */
 	iwe.cmd = IWEVQUAL;
@@ -852,7 +853,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 		nf = priv->NF[TYPE_RXPD][TYPE_AVG] / AVG_SCALE;
 		iwe.u.qual.level = CAL_RSSI(snr, nf);
 	}
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN);
 
 	/* Add encryption capability */
 	iwe.cmd = SIOCGIWENCODE;
@@ -862,9 +863,9 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	}
 	iwe.u.data.length = 0;
-	start = iwe_stream_add_point(start, stop, &iwe, bss->ssid);
+	start = iwe_stream_add_point(info, start, stop, &iwe, bss->ssid);
 
-	current_val = start + IW_EV_LCP_LEN;
+	current_val = start + iwe_stream_lcp_len(info);
 
 	iwe.cmd = SIOCGIWRATE;
 	iwe.u.bitrate.fixed = 0;
@@ -874,19 +875,19 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 	for (j = 0; bss->rates[j] && (j < sizeof(bss->rates)); j++) {
 		/* Bit rate given in 500 kb/s units */
 		iwe.u.bitrate.value = bss->rates[j] * 500000;
-		current_val = iwe_stream_add_value(start, current_val,
-					 stop, &iwe, IW_EV_PARAM_LEN);
+		current_val = iwe_stream_add_value(info, start, current_val,
+						   stop, &iwe, IW_EV_PARAM_LEN);
 	}
 	if ((bss->mode == IW_MODE_ADHOC) && priv->adhoccreate
 	    && !lbs_ssid_cmp(priv->curbssparams.ssid,
 			     priv->curbssparams.ssid_len,
 			     bss->ssid, bss->ssid_len)) {
 		iwe.u.bitrate.value = 22 * 500000;
-		current_val = iwe_stream_add_value(start, current_val,
+		current_val = iwe_stream_add_value(info, start, current_val,
 						   stop, &iwe, IW_EV_PARAM_LEN);
 	}
 	/* Check if we added any event */
-	if((current_val - start) > IW_EV_LCP_LEN)
+	if ((current_val - start) > iwe_stream_lcp_len(info))
 		start = current_val;
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -895,7 +896,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 		memcpy(buf, bss->wpa_ie, bss->wpa_ie_len);
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->wpa_ie_len;
-		start = iwe_stream_add_point(start, stop, &iwe, buf);
+		start = iwe_stream_add_point(info, start, stop, &iwe, buf);
 	}
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -904,7 +905,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 		memcpy(buf, bss->rsn_ie, bss->rsn_ie_len);
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->rsn_ie_len;
-		start = iwe_stream_add_point(start, stop, &iwe, buf);
+		start = iwe_stream_add_point(info, start, stop, &iwe, buf);
 	}
 
 	if (bss->mesh) {
@@ -915,7 +916,8 @@ static inline char *lbs_translate_scan(struct lbs_private *priv,
 		p += snprintf(p, MAX_CUSTOM_LEN, "mesh-type: olpc");
 		iwe.u.data.length = p - custom;
 		if (iwe.u.data.length)
-			start = iwe_stream_add_point(start, stop, &iwe, custom);
+			start = iwe_stream_add_point(info, start, stop,
+						     &iwe, custom);
 	}
 
 out:
@@ -1036,7 +1038,7 @@ int lbs_get_scan(struct net_device *dev, struct iw_request_info *info,
 		}
 
 		/* Translate to WE format this entry */
-		next_ev = lbs_translate_scan(priv, ev, stop, iter_bss);
+		next_ev = lbs_translate_scan(priv, info, ev, stop, iter_bss);
 		if (next_ev == NULL)
 			continue;
 		ev = next_ev;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 6d13a0d15a0..b047306bf38 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -4046,6 +4046,7 @@ static int orinoco_ioctl_setscan(struct net_device *dev,
  * format that the Wireless Tools will understand - Jean II
  * Return message length or -errno for fatal errors */
 static inline char *orinoco_translate_scan(struct net_device *dev,
+					   struct iw_request_info *info,
 					   char *current_ev,
 					   char *end_buf,
 					   union hermes_scan_info *bss,
@@ -4062,7 +4063,8 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, bss->a.bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_ADDR_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_ADDR_LEN);
 
 	/* Other entries will be displayed in the order we give them */
 
@@ -4072,7 +4074,8 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 		iwe.u.data.length = 32;
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.flags = 1;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, bss->a.essid);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, bss->a.essid);
 
 	/* Add mode */
 	iwe.cmd = SIOCGIWMODE;
@@ -4082,7 +4085,8 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 	}
 
 	channel = bss->s.channel;
@@ -4091,7 +4095,7 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 		iwe.cmd = SIOCGIWFREQ;
 		iwe.u.freq.m = channel_frequency[channel-1] * 100000;
 		iwe.u.freq.e = 1;
-		current_ev = iwe_stream_add_event(current_ev, end_buf,
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
 						  &iwe, IW_EV_FREQ_LEN);
 	}
 
@@ -4106,7 +4110,8 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 		iwe.u.qual.qual = iwe.u.qual.level - iwe.u.qual.noise;
 	else
 		iwe.u.qual.qual = 0;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_QUAL_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_QUAL_LEN);
 
 	/* Add encryption capability */
 	iwe.cmd = SIOCGIWENCODE;
@@ -4115,7 +4120,8 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, bss->a.essid);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, bss->a.essid);
 
 	/* Add EXTRA: Age to display seconds since last beacon/probe response
 	 * for given network. */
@@ -4126,11 +4132,12 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 		      jiffies_to_msecs(jiffies - last_scanned));
 	iwe.u.data.length = p - custom;
 	if (iwe.u.data.length)
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, custom);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, custom);
 
 	/* Bit rate is not available in Lucent/Agere firmwares */
 	if (priv->firmware_type != FIRMWARE_TYPE_AGERE) {
-		char *current_val = current_ev + IW_EV_LCP_LEN;
+		char *current_val = current_ev + iwe_stream_lcp_len(info);
 		int i;
 		int step;
 
@@ -4149,12 +4156,13 @@ static inline char *orinoco_translate_scan(struct net_device *dev,
 				break;
 			/* Bit rate given in 500 kb/s units (+ 0x80) */
 			iwe.u.bitrate.value = ((bss->p.rates[i] & 0x7f) * 500000);
-			current_val = iwe_stream_add_value(current_ev, current_val,
+			current_val = iwe_stream_add_value(info, current_ev,
+							   current_val,
 							   end_buf, &iwe,
 							   IW_EV_PARAM_LEN);
 		}
 		/* Check if we added any event */
-		if ((current_val - current_ev) > IW_EV_LCP_LEN)
+		if ((current_val - current_ev) > iwe_stream_lcp_len(info))
 			current_ev = current_val;
 	}
 
@@ -4190,7 +4198,7 @@ static int orinoco_ioctl_getscan(struct net_device *dev,
 
 	list_for_each_entry(bss, &priv->bss_list, list) {
 		/* Translate to WE format this entry */
-		current_ev = orinoco_translate_scan(dev, current_ev,
+		current_ev = orinoco_translate_scan(dev, info, current_ev,
 						    extra + srq->length,
 						    &bss->bss,
 						    bss->last_scanned);
diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c
index 5b375b28903..97fa14e0a47 100644
--- a/drivers/net/wireless/prism54/isl_ioctl.c
+++ b/drivers/net/wireless/prism54/isl_ioctl.c
@@ -571,8 +571,9 @@ prism54_set_scan(struct net_device *dev, struct iw_request_info *info,
  */
 
 static char *
-prism54_translate_bss(struct net_device *ndev, char *current_ev,
-		      char *end_buf, struct obj_bss *bss, char noise)
+prism54_translate_bss(struct net_device *ndev, struct iw_request_info *info,
+		      char *current_ev, char *end_buf, struct obj_bss *bss,
+		      char noise)
 {
 	struct iw_event iwe;	/* Temporary buffer */
 	short cap;
@@ -584,8 +585,8 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 	memcpy(iwe.u.ap_addr.sa_data, bss->address, 6);
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	iwe.cmd = SIOCGIWAP;
-	current_ev =
-	    iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_ADDR_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_ADDR_LEN);
 
 	/* The following entries will be displayed in the same order we give them */
 
@@ -593,7 +594,7 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 	iwe.u.data.length = bss->ssid.length;
 	iwe.u.data.flags = 1;
 	iwe.cmd = SIOCGIWESSID;
-	current_ev = iwe_stream_add_point(current_ev, end_buf,
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
 					  &iwe, bss->ssid.octets);
 
 	/* Capabilities */
@@ -610,9 +611,8 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 		iwe.u.mode = IW_MODE_ADHOC;
 	iwe.cmd = SIOCGIWMODE;
 	if (iwe.u.mode)
-		current_ev =
-		    iwe_stream_add_event(current_ev, end_buf, &iwe,
-					 IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 
 	/* Encryption capability */
 	if (cap & CAP_CRYPT)
@@ -621,14 +621,15 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
 	iwe.cmd = SIOCGIWENCODE;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, NULL);
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, NULL);
 
 	/* Add frequency. (short) bss->channel is the frequency in MHz */
 	iwe.u.freq.m = bss->channel;
 	iwe.u.freq.e = 6;
 	iwe.cmd = SIOCGIWFREQ;
-	current_ev =
-	    iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_FREQ_LEN);
 
 	/* Add quality statistics */
 	iwe.u.qual.level = bss->rssi;
@@ -636,20 +637,20 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 	/* do a simple SNR for quality */
 	iwe.u.qual.qual = bss->rssi - noise;
 	iwe.cmd = IWEVQUAL;
-	current_ev =
-	    iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_QUAL_LEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+					  &iwe, IW_EV_QUAL_LEN);
 
 	/* Add WPA/RSN Information Element, if any */
 	wpa_ie_len = prism54_wpa_bss_ie_get(priv, bss->address, wpa_ie);
 	if (wpa_ie_len > 0) {
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = min(wpa_ie_len, (size_t)MAX_WPA_IE_LEN);
-		current_ev = iwe_stream_add_point(current_ev, end_buf,
-				&iwe, wpa_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, wpa_ie);
 	}
 	/* Do the bitrates */
 	{
-		char *	current_val = current_ev + IW_EV_LCP_LEN;
+		char *current_val = current_ev + iwe_stream_lcp_len(info);
 		int i;
 		int mask;
 
@@ -662,14 +663,14 @@ prism54_translate_bss(struct net_device *ndev, char *current_ev,
 		for(i = 0; i < sizeof(scan_rate_list); i++) {
 			if(bss->rates & mask) {
 				iwe.u.bitrate.value = (scan_rate_list[i] * 500000);
-				current_val = iwe_stream_add_value(current_ev, current_val,
-								   end_buf, &iwe,
-								   IW_EV_PARAM_LEN);
+				current_val = iwe_stream_add_value(
+					info, current_ev, current_val,
+					end_buf, &iwe, IW_EV_PARAM_LEN);
 			}
 			mask <<= 1;
 		}
 		/* Check if we added any event */
-		if ((current_val - current_ev) > IW_EV_LCP_LEN)
+		if ((current_val - current_ev) > iwe_stream_lcp_len(info))
 			current_ev = current_val;
 	}
 
@@ -710,7 +711,7 @@ prism54_get_scan(struct net_device *ndev, struct iw_request_info *info,
 
 	/* ok now, scan the list and translate its info */
 	for (i = 0; i < (int) bsslist->nr; i++) {
-		current_ev = prism54_translate_bss(ndev, current_ev,
+		current_ev = prism54_translate_bss(ndev, info, current_ev,
 						   extra + dwrq->length,
 						   &(bsslist->bsslist[i]),
 						   noise);
@@ -2704,6 +2705,7 @@ prism2_ioctl_scan_req(struct net_device *ndev,
                      struct prism2_hostapd_param *param)
 {
 	islpci_private *priv = netdev_priv(ndev);
+	struct iw_request_info info;
 	int i, rvalue;
 	struct obj_bsslist *bsslist;
 	u32 noise = 0;
@@ -2727,9 +2729,12 @@ prism2_ioctl_scan_req(struct net_device *ndev,
 	rvalue |= mgt_get_request(priv, DOT11_OID_BSSLIST, 0, NULL, &r);
 	bsslist = r.ptr;
 
+	info.cmd = PRISM54_HOSTAPD;
+	info.flags = 0;
+
 	/* ok now, scan the list and translate its info */
 	for (i = 0; i < min(IW_MAX_AP, (int) bsslist->nr); i++)
-		current_ev = prism54_translate_bss(ndev, current_ev,
+		current_ev = prism54_translate_bss(ndev, &info, current_ev,
 						   extra + IW_SCAN_MAX_DATA,
 						   &(bsslist->bsslist[i]),
 						   noise);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index a36d2c85e26..65c50025c88 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -1648,7 +1648,9 @@ static int rndis_iw_set_scan(struct net_device *dev,
 
 
 static char *rndis_translate_scan(struct net_device *dev,
-    char *cev, char *end_buf, struct ndis_80211_bssid_ex *bssid)
+				  struct iw_request_info *info, char *cev,
+				  char *end_buf,
+				  struct ndis_80211_bssid_ex *bssid)
 {
 #ifdef DEBUG
 	struct usbnet *usbdev = dev->priv;
@@ -1667,14 +1669,14 @@ static char *rndis_translate_scan(struct net_device *dev,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, bssid->mac, ETH_ALEN);
-	cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_ADDR_LEN);
+	cev = iwe_stream_add_event(info, cev, end_buf, &iwe, IW_EV_ADDR_LEN);
 
 	devdbg(usbdev, "SSID(%d) %s", le32_to_cpu(bssid->ssid.length),
 						bssid->ssid.essid);
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.essid.length = le32_to_cpu(bssid->ssid.length);
 	iwe.u.essid.flags = 1;
-	cev = iwe_stream_add_point(cev, end_buf, &iwe, bssid->ssid.essid);
+	cev = iwe_stream_add_point(info, cev, end_buf, &iwe, bssid->ssid.essid);
 
 	devdbg(usbdev, "MODE %d", le32_to_cpu(bssid->net_infra));
 	iwe.cmd = SIOCGIWMODE;
@@ -1690,12 +1692,12 @@ static char *rndis_translate_scan(struct net_device *dev,
 		iwe.u.mode = IW_MODE_AUTO;
 		break;
 	}
-	cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_UINT_LEN);
+	cev = iwe_stream_add_event(info, cev, end_buf, &iwe, IW_EV_UINT_LEN);
 
 	devdbg(usbdev, "FREQ %d kHz", le32_to_cpu(bssid->config.ds_config));
 	iwe.cmd = SIOCGIWFREQ;
 	dsconfig_to_freq(le32_to_cpu(bssid->config.ds_config), &iwe.u.freq);
-	cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_FREQ_LEN);
+	cev = iwe_stream_add_event(info, cev, end_buf, &iwe, IW_EV_FREQ_LEN);
 
 	devdbg(usbdev, "QUAL %d", le32_to_cpu(bssid->rssi));
 	iwe.cmd = IWEVQUAL;
@@ -1704,7 +1706,7 @@ static char *rndis_translate_scan(struct net_device *dev,
 	iwe.u.qual.updated = IW_QUAL_QUAL_UPDATED
 			| IW_QUAL_LEVEL_UPDATED
 			| IW_QUAL_NOISE_INVALID;
-	cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_QUAL_LEN);
+	cev = iwe_stream_add_event(info, cev, end_buf, &iwe, IW_EV_QUAL_LEN);
 
 	devdbg(usbdev, "ENCODE %d", le32_to_cpu(bssid->privacy));
 	iwe.cmd = SIOCGIWENCODE;
@@ -1714,10 +1716,10 @@ static char *rndis_translate_scan(struct net_device *dev,
 	else
 		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
 
-	cev = iwe_stream_add_point(cev, end_buf, &iwe, NULL);
+	cev = iwe_stream_add_point(info, cev, end_buf, &iwe, NULL);
 
 	devdbg(usbdev, "RATES:");
-	current_val = cev + IW_EV_LCP_LEN;
+	current_val = cev + iwe_stream_lcp_len(info);
 	iwe.cmd = SIOCGIWRATE;
 	for (i = 0; i < sizeof(bssid->rates); i++) {
 		if (bssid->rates[i] & 0x7f) {
@@ -1725,13 +1727,13 @@ static char *rndis_translate_scan(struct net_device *dev,
 				((bssid->rates[i] & 0x7f) *
 				500000);
 			devdbg(usbdev, " %d", iwe.u.bitrate.value);
-			current_val = iwe_stream_add_value(cev,
+			current_val = iwe_stream_add_value(info, cev,
 				current_val, end_buf, &iwe,
 				IW_EV_PARAM_LEN);
 		}
 	}
 
-	if ((current_val - cev) > IW_EV_LCP_LEN)
+	if ((current_val - cev) > iwe_stream_lcp_len(info))
 		cev = current_val;
 
 	beacon = le32_to_cpu(bssid->config.beacon_period);
@@ -1739,14 +1741,14 @@ static char *rndis_translate_scan(struct net_device *dev,
 	iwe.cmd = IWEVCUSTOM;
 	snprintf(sbuf, sizeof(sbuf), "bcn_int=%d", beacon);
 	iwe.u.data.length = strlen(sbuf);
-	cev = iwe_stream_add_point(cev, end_buf, &iwe, sbuf);
+	cev = iwe_stream_add_point(info, cev, end_buf, &iwe, sbuf);
 
 	atim = le32_to_cpu(bssid->config.atim_window);
 	devdbg(usbdev, "ATIM %d", atim);
 	iwe.cmd = IWEVCUSTOM;
 	snprintf(sbuf, sizeof(sbuf), "atim=%u", atim);
 	iwe.u.data.length = strlen(sbuf);
-	cev = iwe_stream_add_point(cev, end_buf, &iwe, sbuf);
+	cev = iwe_stream_add_point(info, cev, end_buf, &iwe, sbuf);
 
 	ie = (void *)(bssid->ies + sizeof(struct ndis_80211_fixed_ies));
 	ie_len = min(bssid_len - (int)sizeof(*bssid),
@@ -1760,7 +1762,7 @@ static char *rndis_translate_scan(struct net_device *dev,
 					(ie->id == MFIE_TYPE_RSN) ? 2 : 1);
 			iwe.cmd = IWEVGENIE;
 			iwe.u.data.length = min(ie->len + 2, MAX_WPA_IE_LEN);
-			cev = iwe_stream_add_point(cev, end_buf, &iwe,
+			cev = iwe_stream_add_point(info, cev, end_buf, &iwe,
 								(u8 *)ie);
 		}
 
@@ -1803,8 +1805,8 @@ static int rndis_iw_get_scan(struct net_device *dev,
 	devdbg(usbdev, "SIOCGIWSCAN: %d BSSIDs found", count);
 
 	while (count && ((void *)bssid + bssid_len) <= (buf + len)) {
-		cev = rndis_translate_scan(dev, cev, extra + IW_SCAN_MAX_DATA,
-									bssid);
+		cev = rndis_translate_scan(dev, info, cev,
+					   extra + IW_SCAN_MAX_DATA, bssid);
 		bssid = (void *)bssid + bssid_len;
 		bssid_len = le32_to_cpu(bssid->length);
 		count--;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 42a36b3f3ff..377141995e3 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1624,25 +1624,25 @@ static int wl3501_get_scan(struct net_device *dev, struct iw_request_info *info,
 		iwe.cmd			= SIOCGIWAP;
 		iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 		memcpy(iwe.u.ap_addr.sa_data, this->bss_set[i].bssid, ETH_ALEN);
-		current_ev = iwe_stream_add_event(current_ev,
+		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_ADDR_LEN);
 		iwe.cmd		  = SIOCGIWESSID;
 		iwe.u.data.flags  = 1;
 		iwe.u.data.length = this->bss_set[i].ssid.el.len;
-		current_ev = iwe_stream_add_point(current_ev,
+		current_ev = iwe_stream_add_point(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe,
 						  this->bss_set[i].ssid.essid);
 		iwe.cmd	   = SIOCGIWMODE;
 		iwe.u.mode = this->bss_set[i].bss_type;
-		current_ev = iwe_stream_add_event(current_ev,
+		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_UINT_LEN);
 		iwe.cmd = SIOCGIWFREQ;
 		iwe.u.freq.m = this->bss_set[i].ds_pset.chan;
 		iwe.u.freq.e = 0;
-		current_ev = iwe_stream_add_event(current_ev,
+		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_FREQ_LEN);
 		iwe.cmd = SIOCGIWENCODE;
@@ -1651,7 +1651,7 @@ static int wl3501_get_scan(struct net_device *dev, struct iw_request_info *info,
 		else
 			iwe.u.data.flags = IW_ENCODE_DISABLED;
 		iwe.u.data.length = 0;
-		current_ev = iwe_stream_add_point(current_ev,
+		current_ev = iwe_stream_add_point(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, NULL);
 	}
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index d5c0c66188c..07e4d1f7320 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -1152,32 +1152,36 @@ static int zd1201_get_scan(struct net_device *dev,
 		iwe.cmd = SIOCGIWAP;
 		iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 		memcpy(iwe.u.ap_addr.sa_data, zd->rxdata+i+6, 6);
-		cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_ADDR_LEN);
+		cev = iwe_stream_add_event(info, cev, end_buf,
+					   &iwe, IW_EV_ADDR_LEN);
 
 		iwe.cmd = SIOCGIWESSID;
 		iwe.u.data.length = zd->rxdata[i+16];
 		iwe.u.data.flags = 1;
-		cev = iwe_stream_add_point(cev, end_buf, &iwe, zd->rxdata+i+18);
+		cev = iwe_stream_add_point(info, cev, end_buf,
+					   &iwe, zd->rxdata+i+18);
 
 		iwe.cmd = SIOCGIWMODE;
 		if (zd->rxdata[i+14]&0x01)
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_UINT_LEN);
+		cev = iwe_stream_add_event(info, cev, end_buf,
+					   &iwe, IW_EV_UINT_LEN);
 		
 		iwe.cmd = SIOCGIWFREQ;
 		iwe.u.freq.m = zd->rxdata[i+0];
 		iwe.u.freq.e = 0;
-		cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_FREQ_LEN);
+		cev = iwe_stream_add_event(info, cev, end_buf,
+					   &iwe, IW_EV_FREQ_LEN);
 		
 		iwe.cmd = SIOCGIWRATE;
 		iwe.u.bitrate.fixed = 0;
 		iwe.u.bitrate.disabled = 0;
 		for (j=0; j<10; j++) if (zd->rxdata[i+50+j]) {
 			iwe.u.bitrate.value = (zd->rxdata[i+50+j]&0x7f)*500000;
-			cev=iwe_stream_add_event(cev, end_buf, &iwe,
-			    IW_EV_PARAM_LEN);
+			cev = iwe_stream_add_event(info, cev, end_buf,
+						   &iwe, IW_EV_PARAM_LEN);
 		}
 		
 		iwe.cmd = SIOCGIWENCODE;
@@ -1186,14 +1190,15 @@ static int zd1201_get_scan(struct net_device *dev,
 			iwe.u.data.flags = IW_ENCODE_ENABLED;
 		else
 			iwe.u.data.flags = IW_ENCODE_DISABLED;
-		cev = iwe_stream_add_point(cev, end_buf, &iwe, NULL);
+		cev = iwe_stream_add_point(info, cev, end_buf, &iwe, NULL);
 		
 		iwe.cmd = IWEVQUAL;
 		iwe.u.qual.qual = zd->rxdata[i+4];
 		iwe.u.qual.noise= zd->rxdata[i+2]/10-100;
 		iwe.u.qual.level = (256+zd->rxdata[i+4]*100)/255-100;
 		iwe.u.qual.updated = 7;
-		cev = iwe_stream_add_event(cev, end_buf, &iwe, IW_EV_QUAL_LEN);
+		cev = iwe_stream_add_event(info, cev, end_buf,
+					   &iwe, IW_EV_QUAL_LEN);
 	}
 
 	if (!enabled_save)
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 79d84687582..d7958f9b52c 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1113,6 +1113,21 @@ struct iw_event
 #define IW_EV_POINT_LEN	(IW_EV_LCP_LEN + sizeof(struct iw_point) - \
 			 IW_EV_POINT_OFF)
 
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+struct __compat_iw_event {
+	__u16		len;			/* Real length of this stuff */
+	__u16		cmd;			/* Wireless IOCTL */
+	compat_caddr_t	pointer;
+};
+#define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer)
+#define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length)
+#define IW_EV_COMPAT_POINT_LEN	\
+	(IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \
+	 IW_EV_COMPAT_POINT_OFF)
+#endif
+#endif
+
 /* Size of the Event prefix when packed in stream */
 #define IW_EV_LCP_PK_LEN	(4)
 /* Size of the various events when packed in stream */
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index c99a8eec84e..51b9a37de99 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -478,105 +478,58 @@ extern void wireless_spy_update(struct net_device *	dev,
  * Function that are so simple that it's more efficient inlining them
  */
 
-/*------------------------------------------------------------------*/
-/*
- * Wrapper to add an Wireless Event to a stream of events.
- */
-static inline char *
-iwe_stream_add_event(char *	stream,		/* Stream of events */
-		     char *	ends,		/* End of stream */
-		     struct iw_event *iwe,	/* Payload */
-		     int	event_len)	/* Real size of payload */
+static inline int iwe_stream_lcp_len(struct iw_request_info *info)
 {
-	/* Check if it's possible */
-	if(likely((stream + event_len) < ends)) {
-		iwe->len = event_len;
-		/* Beware of alignement issues on 64 bits */
-		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_LCP_LEN,
-		       ((char *) iwe) + IW_EV_LCP_LEN,
-		       event_len - IW_EV_LCP_LEN);
-		stream += event_len;
-	}
-	return stream;
+#ifdef CONFIG_COMPAT
+	if (info->flags & IW_REQUEST_FLAG_COMPAT)
+		return IW_EV_COMPAT_LCP_LEN;
+#endif
+	return IW_EV_LCP_LEN;
 }
 
-/*------------------------------------------------------------------*/
-/*
- * Wrapper to add an short Wireless Event containing a pointer to a
- * stream of events.
- */
-static inline char *
-iwe_stream_add_point(char *	stream,		/* Stream of events */
-		     char *	ends,		/* End of stream */
-		     struct iw_event *iwe,	/* Payload length + flags */
-		     char *	extra)		/* More payload */
+static inline int iwe_stream_point_len(struct iw_request_info *info)
 {
-	int	event_len = IW_EV_POINT_LEN + iwe->u.data.length;
-	/* Check if it's possible */
-	if(likely((stream + event_len) < ends)) {
-		iwe->len = event_len;
-		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_LCP_LEN,
-		       ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length);
-		stream += event_len;
-	}
-	return stream;
+#ifdef CONFIG_COMPAT
+	if (info->flags & IW_REQUEST_FLAG_COMPAT)
+		return IW_EV_COMPAT_POINT_LEN;
+#endif
+	return IW_EV_POINT_LEN;
 }
 
-/*------------------------------------------------------------------*/
-/*
- * Wrapper to add a value to a Wireless Event in a stream of events.
- * Be careful, this one is tricky to use properly :
- * At the first run, you need to have (value = event + IW_EV_LCP_LEN).
- */
-static inline char *
-iwe_stream_add_value(char *	event,		/* Event in the stream */
-		     char *	value,		/* Value in event */
-		     char *	ends,		/* End of stream */
-		     struct iw_event *iwe,	/* Payload */
-		     int	event_len)	/* Real size of payload */
+static inline int iwe_stream_event_len_adjust(struct iw_request_info *info,
+					      int event_len)
 {
-	/* Don't duplicate LCP */
-	event_len -= IW_EV_LCP_LEN;
-
-	/* Check if it's possible */
-	if(likely((value + event_len) < ends)) {
-		/* Add new value */
-		memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len);
-		value += event_len;
-		/* Patch LCP */
-		iwe->len = value - event;
-		memcpy(event, (char *) iwe, IW_EV_LCP_LEN);
+#ifdef CONFIG_COMPAT
+	if (info->flags & IW_REQUEST_FLAG_COMPAT) {
+		event_len -= IW_EV_LCP_LEN;
+		event_len += IW_EV_COMPAT_LCP_LEN;
 	}
-	return value;
+#endif
+
+	return event_len;
 }
 
 /*------------------------------------------------------------------*/
 /*
  * Wrapper to add an Wireless Event to a stream of events.
- * Same as above, with explicit error check...
  */
 static inline char *
-iwe_stream_check_add_event(char *	stream,		/* Stream of events */
-			   char *	ends,		/* End of stream */
-			   struct iw_event *iwe,	/* Payload */
-			   int		event_len,	/* Size of payload */
-			   int *	perr)		/* Error report */
+iwe_stream_add_event(struct iw_request_info *info, char *stream, char *ends,
+		     struct iw_event *iwe, int event_len)
 {
-	/* Check if it's possible, set error if not */
+	int lcp_len = iwe_stream_lcp_len(info);
+
+	event_len = iwe_stream_event_len_adjust(info, event_len);
+
+	/* Check if it's possible */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
 		/* Beware of alignement issues on 64 bits */
 		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_LCP_LEN,
-		       ((char *) iwe) + IW_EV_LCP_LEN,
-		       event_len - IW_EV_LCP_LEN);
+		memcpy(stream + lcp_len, &iwe->u,
+		       event_len - lcp_len);
 		stream += event_len;
-	} else
-		*perr = -E2BIG;
+	}
 	return stream;
 }
 
@@ -584,27 +537,25 @@ iwe_stream_check_add_event(char *	stream,		/* Stream of events */
 /*
  * Wrapper to add an short Wireless Event containing a pointer to a
  * stream of events.
- * Same as above, with explicit error check...
  */
 static inline char *
-iwe_stream_check_add_point(char *	stream,		/* Stream of events */
-			   char *	ends,		/* End of stream */
-			   struct iw_event *iwe,	/* Payload length + flags */
-			   char *	extra,		/* More payload */
-			   int *	perr)		/* Error report */
+iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends,
+		     struct iw_event *iwe, char *extra)
 {
-	int	event_len = IW_EV_POINT_LEN + iwe->u.data.length;
+	int event_len = iwe_stream_point_len(info) + iwe->u.data.length;
+	int point_len = iwe_stream_point_len(info);
+	int lcp_len   = iwe_stream_lcp_len(info);
+
 	/* Check if it's possible */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
 		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_LCP_LEN,
-		       ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
+		memcpy(stream + lcp_len,
+		       ((char *) &iwe->u) + IW_EV_POINT_OFF,
 		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-		memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length);
+		memcpy(stream + point_len, extra, iwe->u.data.length);
 		stream += event_len;
-	} else
-		*perr = -E2BIG;
+	}
 	return stream;
 }
 
@@ -613,29 +564,25 @@ iwe_stream_check_add_point(char *	stream,		/* Stream of events */
  * Wrapper to add a value to a Wireless Event in a stream of events.
  * Be careful, this one is tricky to use properly :
  * At the first run, you need to have (value = event + IW_EV_LCP_LEN).
- * Same as above, with explicit error check...
  */
 static inline char *
-iwe_stream_check_add_value(char *	event,		/* Event in the stream */
-			   char *	value,		/* Value in event */
-			   char *	ends,		/* End of stream */
-			   struct iw_event *iwe,	/* Payload */
-			   int		event_len,	/* Size of payload */
-			   int *	perr)		/* Error report */
+iwe_stream_add_value(struct iw_request_info *info, char *event, char *value,
+		     char *ends, struct iw_event *iwe, int event_len)
 {
+	int lcp_len = iwe_stream_lcp_len(info);
+
 	/* Don't duplicate LCP */
 	event_len -= IW_EV_LCP_LEN;
 
 	/* Check if it's possible */
 	if(likely((value + event_len) < ends)) {
 		/* Add new value */
-		memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len);
+		memcpy(value, &iwe->u, event_len);
 		value += event_len;
 		/* Patch LCP */
 		iwe->len = value - event;
-		memcpy(event, (char *) iwe, IW_EV_LCP_LEN);
-	} else
-		*perr = -E2BIG;
+		memcpy(event, (char *) iwe, lcp_len);
+	}
 	return value;
 }
 
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 822606b615c..973832dd7fa 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -43,8 +43,9 @@ static const char *ieee80211_modes[] = {
 
 #define MAX_CUSTOM_LEN 64
 static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
-					   char *start, char *stop,
-					   struct ieee80211_network *network)
+				      char *start, char *stop,
+				      struct ieee80211_network *network,
+				      struct iw_request_info *info)
 {
 	char custom[MAX_CUSTOM_LEN];
 	char *p;
@@ -57,7 +58,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN);
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN);
 
 	/* Remaining entries will be displayed in the order we provide them */
 
@@ -66,17 +67,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	iwe.u.data.flags = 1;
 	if (network->flags & NETWORK_EMPTY_ESSID) {
 		iwe.u.data.length = sizeof("<hidden>");
-		start = iwe_stream_add_point(start, stop, &iwe, "<hidden>");
+		start = iwe_stream_add_point(info, start, stop,
+					     &iwe, "<hidden>");
 	} else {
 		iwe.u.data.length = min(network->ssid_len, (u8) 32);
-		start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+		start = iwe_stream_add_point(info, start, stop,
+					     &iwe, network->ssid);
 	}
 
 	/* Add the protocol name */
 	iwe.cmd = SIOCGIWNAME;
 	snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s",
 		 ieee80211_modes[network->mode]);
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN);
 
 	/* Add mode */
 	iwe.cmd = SIOCGIWMODE;
@@ -86,7 +89,8 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
 
-		start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN);
+		start = iwe_stream_add_event(info, start, stop,
+					     &iwe, IW_EV_UINT_LEN);
 	}
 
 	/* Add channel and frequency */
@@ -95,7 +99,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel);
 	iwe.u.freq.e = 6;
 	iwe.u.freq.i = 0;
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN);
 
 	/* Add encryption capability */
 	iwe.cmd = SIOCGIWENCODE;
@@ -104,12 +108,13 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+	start = iwe_stream_add_point(info, start, stop,
+				     &iwe, network->ssid);
 
 	/* Add basic and extended rates */
 	/* Rate : stuffing multiple values in a single event require a bit
 	 * more of magic - Jean II */
-	current_val = start + IW_EV_LCP_LEN;
+	current_val = start + iwe_stream_lcp_len(info);
 	iwe.cmd = SIOCGIWRATE;
 	/* Those two flags are ignored... */
 	iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
@@ -124,17 +129,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		/* Bit rate given in 500 kb/s units (+ 0x80) */
 		iwe.u.bitrate.value = ((rate & 0x7f) * 500000);
 		/* Add new value to event */
-		current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN);
+		current_val = iwe_stream_add_value(info, start, current_val,
+						   stop, &iwe, IW_EV_PARAM_LEN);
 	}
 	for (; j < network->rates_ex_len; j++) {
 		rate = network->rates_ex[j] & 0x7F;
 		/* Bit rate given in 500 kb/s units (+ 0x80) */
 		iwe.u.bitrate.value = ((rate & 0x7f) * 500000);
 		/* Add new value to event */
-		current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN);
+		current_val = iwe_stream_add_value(info, start, current_val,
+						   stop, &iwe, IW_EV_PARAM_LEN);
 	}
 	/* Check if we added any rate */
-	if((current_val - start) > IW_EV_LCP_LEN)
+	if ((current_val - start) > iwe_stream_lcp_len(info))
 		start = current_val;
 
 	/* Add quality statistics */
@@ -181,14 +188,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		iwe.u.qual.level = network->stats.signal;
 	}
 
-	start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN);
+	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN);
 
 	iwe.cmd = IWEVCUSTOM;
 	p = custom;
 
 	iwe.u.data.length = p - custom;
 	if (iwe.u.data.length)
-		start = iwe_stream_add_point(start, stop, &iwe, custom);
+		start = iwe_stream_add_point(info, start, stop, &iwe, custom);
 
 	memset(&iwe, 0, sizeof(iwe));
 	if (network->wpa_ie_len) {
@@ -196,7 +203,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		memcpy(buf, network->wpa_ie, network->wpa_ie_len);
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = network->wpa_ie_len;
-		start = iwe_stream_add_point(start, stop, &iwe, buf);
+		start = iwe_stream_add_point(info, start, stop, &iwe, buf);
 	}
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -205,7 +212,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		memcpy(buf, network->rsn_ie, network->rsn_ie_len);
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = network->rsn_ie_len;
-		start = iwe_stream_add_point(start, stop, &iwe, buf);
+		start = iwe_stream_add_point(info, start, stop, &iwe, buf);
 	}
 
 	/* Add EXTRA: Age to display seconds since last beacon/probe response
@@ -217,7 +224,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 		      jiffies_to_msecs(jiffies - network->last_scanned));
 	iwe.u.data.length = p - custom;
 	if (iwe.u.data.length)
-		start = iwe_stream_add_point(start, stop, &iwe, custom);
+		start = iwe_stream_add_point(info, start, stop, &iwe, custom);
 
 	/* Add spectrum management information */
 	iwe.cmd = -1;
@@ -238,7 +245,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 
 	if (iwe.cmd == IWEVCUSTOM) {
 		iwe.u.data.length = p - custom;
-		start = iwe_stream_add_point(start, stop, &iwe, custom);
+		start = iwe_stream_add_point(info, start, stop, &iwe, custom);
 	}
 
 	return start;
@@ -272,7 +279,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 
 		if (ieee->scan_age == 0 ||
 		    time_after(network->last_scanned + ieee->scan_age, jiffies))
-			ev = ieee80211_translate_scan(ieee, ev, stop, network);
+			ev = ieee80211_translate_scan(ieee, ev, stop, network,
+						      info);
 		else
 			IEEE80211_DEBUG_SCAN("Not showing network '%s ("
 					     "%s)' due to age (%dms).\n",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 14fccf16b80..80a9e7c07b4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/etherdevice.h>
 #include <net/wireless.h>
+#include <net/iw_handler.h>
 #include "key.h"
 #include "sta_info.h"
 
@@ -867,7 +868,9 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid);
 int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
 void ieee80211_sta_req_auth(struct net_device *dev,
 			    struct ieee80211_if_sta *ifsta);
-int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len);
+int ieee80211_sta_scan_results(struct net_device *dev,
+			       struct iw_request_info *info,
+			       char *buf, size_t len);
 ieee80211_rx_result ieee80211_sta_rx_scan(
 	struct net_device *dev, struct sk_buff *skb,
 	struct ieee80211_rx_status *rx_status);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 55659a730dc..e06d6450f21 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4087,6 +4087,7 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
 
 static char *
 ieee80211_sta_scan_result(struct net_device *dev,
+			  struct iw_request_info *info,
 			  struct ieee80211_sta_bss *bss,
 			  char *current_ev, char *end_buf)
 {
@@ -4101,7 +4102,7 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	iwe.cmd = SIOCGIWAP;
 	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
 					  IW_EV_ADDR_LEN);
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -4109,13 +4110,13 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	if (bss_mesh_cfg(bss)) {
 		iwe.u.data.length = bss_mesh_id_len(bss);
 		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  bss_mesh_id(bss));
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss_mesh_id(bss));
 	} else {
 		iwe.u.data.length = bss->ssid_len;
 		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  bss->ssid);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->ssid);
 	}
 
 	if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
@@ -4128,22 +4129,22 @@ ieee80211_sta_scan_result(struct net_device *dev,
 			iwe.u.mode = IW_MODE_MASTER;
 		else
 			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
-						  IW_EV_UINT_LEN);
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
 	}
 
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = SIOCGIWFREQ;
 	iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
 	iwe.u.freq.e = 0;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
 					  IW_EV_FREQ_LEN);
 
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = SIOCGIWFREQ;
 	iwe.u.freq.m = bss->freq;
 	iwe.u.freq.e = 6;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
 					  IW_EV_FREQ_LEN);
 	memset(&iwe, 0, sizeof(iwe));
 	iwe.cmd = IWEVQUAL;
@@ -4151,7 +4152,7 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	iwe.u.qual.level = bss->signal;
 	iwe.u.qual.noise = bss->noise;
 	iwe.u.qual.updated = local->wstats_flags;
-	current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
 					  IW_EV_QUAL_LEN);
 
 	memset(&iwe, 0, sizeof(iwe));
@@ -4161,35 +4162,36 @@ ieee80211_sta_scan_result(struct net_device *dev,
 	else
 		iwe.u.data.flags = IW_ENCODE_DISABLED;
 	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, "");
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, "");
 
 	if (bss && bss->wpa_ie) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->wpa_ie_len;
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  bss->wpa_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->wpa_ie);
 	}
 
 	if (bss && bss->rsn_ie) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->rsn_ie_len;
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  bss->rsn_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->rsn_ie);
 	}
 
 	if (bss && bss->ht_ie) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVGENIE;
 		iwe.u.data.length = bss->ht_ie_len;
-		current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
-						  bss->ht_ie);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->ht_ie);
 	}
 
 	if (bss && bss->supp_rates_len > 0) {
 		/* display all supported rates in readable format */
-		char *p = current_ev + IW_EV_LCP_LEN;
+		char *p = current_ev + iwe_stream_lcp_len(info);
 		int i;
 
 		memset(&iwe, 0, sizeof(iwe));
@@ -4200,7 +4202,7 @@ ieee80211_sta_scan_result(struct net_device *dev,
 		for (i = 0; i < bss->supp_rates_len; i++) {
 			iwe.u.bitrate.value = ((bss->supp_rates[i] &
 							0x7f) * 500000);
-			p = iwe_stream_add_value(current_ev, p,
+			p = iwe_stream_add_value(info, current_ev, p,
 					end_buf, &iwe, IW_EV_PARAM_LEN);
 		}
 		current_ev = p;
@@ -4214,7 +4216,8 @@ ieee80211_sta_scan_result(struct net_device *dev,
 			iwe.cmd = IWEVCUSTOM;
 			sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			kfree(buf);
 		}
@@ -4229,31 +4232,36 @@ ieee80211_sta_scan_result(struct net_device *dev,
 			iwe.cmd = IWEVCUSTOM;
 			sprintf(buf, "Mesh network (version %d)", cfg[0]);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			sprintf(buf, "Path Selection Protocol ID: "
 				"0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
 							cfg[4]);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			sprintf(buf, "Path Selection Metric ID: "
 				"0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
 							cfg[8]);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			sprintf(buf, "Congestion Control Mode ID: "
 				"0x%02X%02X%02X%02X", cfg[9], cfg[10],
 							cfg[11], cfg[12]);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			sprintf(buf, "Channel Precedence: "
 				"0x%02X%02X%02X%02X", cfg[13], cfg[14],
 							cfg[15], cfg[16]);
 			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(current_ev, end_buf,
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
 							  &iwe, buf);
 			kfree(buf);
 		}
@@ -4263,7 +4271,9 @@ ieee80211_sta_scan_result(struct net_device *dev,
 }
 
 
-int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len)
+int ieee80211_sta_scan_results(struct net_device *dev,
+			       struct iw_request_info *info,
+			       char *buf, size_t len)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	char *current_ev = buf;
@@ -4276,8 +4286,8 @@ int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len)
 			spin_unlock_bh(&local->sta_bss_lock);
 			return -E2BIG;
 		}
-		current_ev = ieee80211_sta_scan_result(dev, bss, current_ev,
-						       end_buf);
+		current_ev = ieee80211_sta_scan_result(dev, info, bss,
+						       current_ev, end_buf);
 	}
 	spin_unlock_bh(&local->sta_bss_lock);
 	return current_ev - buf;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 5af3862e719..f47d13bdf7f 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -567,7 +567,7 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return -EAGAIN;
 
-	res = ieee80211_sta_scan_results(dev, extra, data->length);
+	res = ieee80211_sta_scan_results(dev, info, extra, data->length);
 	if (res >= 0) {
 		data->length = res;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 22196d3648581b253f927186b30075fb005287b0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 01:06:01 -0700
Subject: decnet: Remove SOCK_SLEEP_{PRE,POST} usage.

Just expand the wait sequence.  And as a nice side-effect
the timeout is respected now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/af_decnet.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index fc2efe899e9..931bdf9cb75 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1719,6 +1719,8 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 * See if there is data ready to read, sleep if there isn't
 	 */
 	for(;;) {
+		DEFINE_WAIT(wait);
+
 		if (sk->sk_err)
 			goto out;
 
@@ -1748,14 +1750,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 			goto out;
 		}
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
-		SOCK_SLEEP_PRE(sk)
-
-		if (!dn_data_ready(sk, queue, flags, target))
-			schedule();
-
-		SOCK_SLEEP_POST(sk)
-		clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		finish_wait(sk->sk_sleep, &wait);
 	}
 
 	for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) {
@@ -2002,18 +2001,19 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 		 * size.
 		 */
 		if (dn_queue_too_long(scp, queue, flags)) {
+			DEFINE_WAIT(wait);
+
 			if (flags & MSG_DONTWAIT) {
 				err = -EWOULDBLOCK;
 				goto out;
 			}
 
-			SOCK_SLEEP_PRE(sk)
-
-			if (dn_queue_too_long(scp, queue, flags))
-				schedule();
-
-			SOCK_SLEEP_POST(sk)
-
+			prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			sk_wait_event(sk, &timeo,
+				      !dn_queue_too_long(scp, queue, flags));
+			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			finish_wait(sk->sk_sleep, &wait);
 			continue;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From ee5850defcbd98d3a9cb3e0ae93511e7c89bdecd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 01:21:03 -0700
Subject: llc: Use sock_graft() instead of by-hand version.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 97101dcde4c..5bcc452a247 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -128,10 +128,8 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
 
 static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
 {
+	sock_graft(sk, sock);
 	sk->sk_type	= sock->type;
-	sk->sk_sleep	= &sock->wait;
-	sk->sk_socket	= sock;
-	sock->sk	= sk;
 	sock->ops	= &llc_ui_ops;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9375cb8a1232d2a15fe34bec4d3474872e02faec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 02:20:54 -0700
Subject: ax25: Use sock_graft() and remove bogus sk_socket and sk_sleep init.

The way that listening sockets work in ax25 is that the packet input
code path creates new socks via ax25_make_new() and attaches them
to the incoming SKB.  This SKB gets queued up into the listening
socket's receive queue.

When accept()'d the sock gets hooked up to the real parent socket.
Alternatively, if the listening socket is closed and released, any
unborn socks stuff up in the receive queue get released.

So during this time period these sockets are unreachable in any
other way, so no wakeup events nor references to their ->sk_socket
and ->sk_sleep members can occur.  And even if they do, all such
paths have to make NULL checks.

So do not deceptively initialize them in ax25_make_new() to the
values in the listening socket.  Leave them at NULL.

Finally, use sock_graft() in ax25_accept().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2712544cf0c..97eaa23ad9e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -893,13 +893,11 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
 
 	sk->sk_destruct = ax25_free_sock;
 	sk->sk_type     = osk->sk_type;
-	sk->sk_socket   = osk->sk_socket;
 	sk->sk_priority = osk->sk_priority;
 	sk->sk_protocol = osk->sk_protocol;
 	sk->sk_rcvbuf   = osk->sk_rcvbuf;
 	sk->sk_sndbuf   = osk->sk_sndbuf;
 	sk->sk_state    = TCP_ESTABLISHED;
-	sk->sk_sleep    = osk->sk_sleep;
 	sock_copy_flags(sk, osk);
 
 	oax25 = ax25_sk(osk);
@@ -1361,13 +1359,11 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 		goto out;
 
 	newsk		 = skb->sk;
-	newsk->sk_socket = newsock;
-	newsk->sk_sleep	 = &newsock->wait;
+	sock_graft(newsk, newsock);
 
 	/* Now attach up the new socket */
 	kfree_skb(skb);
 	sk->sk_ack_backlog--;
-	newsock->sk    = newsk;
 	newsock->state = SS_CONNECTED;
 
 out:
-- 
cgit v1.2.3-70-g09d2


From 7b66767f969edcbdd573aca8063beee7534d242b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 02:36:44 -0700
Subject: netrom: Use sock_graft() and remove bogus sk_socket and sk_sleep
 init.

This is the netrom variant of changeset
9375cb8a1232d2a15fe34bec4d3474872e02faec
("ax25: Use sock_graft() and remove bogus sk_socket and sk_sleep init.")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4bae8b998ca..58779624cdb 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -475,13 +475,11 @@ static struct sock *nr_make_new(struct sock *osk)
 	sock_init_data(NULL, sk);
 
 	sk->sk_type     = osk->sk_type;
-	sk->sk_socket   = osk->sk_socket;
 	sk->sk_priority = osk->sk_priority;
 	sk->sk_protocol = osk->sk_protocol;
 	sk->sk_rcvbuf   = osk->sk_rcvbuf;
 	sk->sk_sndbuf   = osk->sk_sndbuf;
 	sk->sk_state    = TCP_ESTABLISHED;
-	sk->sk_sleep    = osk->sk_sleep;
 	sock_copy_flags(sk, osk);
 
 	skb_queue_head_init(&nr->ack_queue);
@@ -810,13 +808,11 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 		goto out_release;
 
 	newsk = skb->sk;
-	newsk->sk_socket = newsock;
-	newsk->sk_sleep = &newsock->wait;
+	sock_graft(newsk, newsock);
 
 	/* Now attach up the new socket */
 	kfree_skb(skb);
 	sk_acceptq_removed(sk);
-	newsock->sk = newsk;
 
 out_release:
 	release_sock(sk);
-- 
cgit v1.2.3-70-g09d2


From 44ccff1f539c8c5bbfc1eacd41cb9ef65022a4ca Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 02:39:21 -0700
Subject: rose: Use sock_graft() and remove bogus sk_socket and sk_sleep init.

This is the rose variant of changeset
9375cb8a1232d2a15fe34bec4d3474872e02faec
("ax25: Use sock_graft() and remove bogus sk_socket and sk_sleep init.")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 1ebf6529440..af86bcb604e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -566,13 +566,11 @@ static struct sock *rose_make_new(struct sock *osk)
 #endif
 
 	sk->sk_type     = osk->sk_type;
-	sk->sk_socket   = osk->sk_socket;
 	sk->sk_priority = osk->sk_priority;
 	sk->sk_protocol = osk->sk_protocol;
 	sk->sk_rcvbuf   = osk->sk_rcvbuf;
 	sk->sk_sndbuf   = osk->sk_sndbuf;
 	sk->sk_state    = TCP_ESTABLISHED;
-	sk->sk_sleep    = osk->sk_sleep;
 	sock_copy_flags(sk, osk);
 
 	init_timer(&rose->timer);
@@ -924,14 +922,12 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 		goto out_release;
 
 	newsk = skb->sk;
-	newsk->sk_socket = newsock;
-	newsk->sk_sleep = &newsock->wait;
+	sock_graft(newsk, newsock);
 
 	/* Now attach up the new socket */
 	skb->sk = NULL;
 	kfree_skb(skb);
 	sk->sk_ack_backlog--;
-	newsock->sk = newsk;
 
 out_release:
 	release_sock(sk);
-- 
cgit v1.2.3-70-g09d2


From b61d38e05542200f6d001b5ea9975dc0dc5bab27 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 02:44:35 -0700
Subject: x25: Use sock_graft() and remove bogus sk_socket and sk_sleep init.

This is the x25 variant of changeset
9375cb8a1232d2a15fe34bec4d3474872e02faec
("ax25: Use sock_graft() and remove bogus sk_socket and sk_sleep init.")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 6ba67c523c1..bcb091f713e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -555,13 +555,11 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25 = x25_sk(sk);
 
 	sk->sk_type        = osk->sk_type;
-	sk->sk_socket      = osk->sk_socket;
 	sk->sk_priority    = osk->sk_priority;
 	sk->sk_protocol    = osk->sk_protocol;
 	sk->sk_rcvbuf      = osk->sk_rcvbuf;
 	sk->sk_sndbuf      = osk->sk_sndbuf;
 	sk->sk_state       = TCP_ESTABLISHED;
-	sk->sk_sleep       = osk->sk_sleep;
 	sk->sk_backlog_rcv = osk->sk_backlog_rcv;
 	sock_copy_flags(sk, osk);
 
@@ -808,14 +806,12 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (!skb->sk)
 		goto out2;
 	newsk		 = skb->sk;
-	newsk->sk_socket = newsock;
-	newsk->sk_sleep  = &newsock->wait;
+	sock_graft(newsk, newsock);
 
 	/* Now attach up the new socket */
 	skb->sk = NULL;
 	kfree_skb(skb);
 	sk->sk_ack_backlog--;
-	newsock->sk    = newsk;
 	newsock->state = SS_CONNECTED;
 	rc = 0;
 out2:
-- 
cgit v1.2.3-70-g09d2


From 0efffaf9d57f54a864b739176d1f659a69486e8b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 03:01:47 -0700
Subject: econet: Use sock_orphan() instead of open-coded (and buggy) variant.

It doesn't grab the sk_callback_lock, it doesn't NULL out
the sk->sk_sleep waitqueue pointer, etc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 7c9bb13b153..d35127bb84e 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -573,9 +573,7 @@ static int econet_release(struct socket *sock)
 
 	sk->sk_state_change(sk);	/* It is useless. Just for sanity. */
 
-	sock->sk = NULL;
-	sk->sk_socket = NULL;
-	sock_set_flag(sk, SOCK_DEAD);
+	sock_orphan(sk);
 
 	/* Purge queues */
 
-- 
cgit v1.2.3-70-g09d2


From c751e4f8b32a3869bb4fec12100952abd9baa0e1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 03:05:13 -0700
Subject: x25: Use sock_orphan() instead of open-coded (and buggy) variant.

It doesn't grab the sk_callback_lock, it doesn't NULL out
the sk->sk_sleep waitqueue pointer, etc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index bcb091f713e..7b1c6ef0455 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -612,8 +612,7 @@ static int x25_release(struct socket *sock)
 			break;
 	}
 
-	sock->sk	= NULL;
-	sk->sk_socket	= NULL;	/* Not used, but we should do this */
+	sock_orphan(sk);
 out:
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 48c5732f4ac0621a2fdde006d55a6621a47a728f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 03:19:58 -0700
Subject: netrom: Kill spurious NULL'ing of sk->sk_socket.

In nr_release(), one code path calls sock_orphan() which
will NULL out sk->sk_socket already.

In the other case, handling states other than NR_STATE_{0,1,2,3},
seems to not be possible other than due to bugs.  Even for an
uninitialized nr->state value, that would be zero or NR_STATE_0.
It might be wise to stick a WARN_ON() here.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 58779624cdb..74884f4a625 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -536,11 +536,9 @@ static int nr_release(struct socket *sock)
 		sk->sk_state_change(sk);
 		sock_orphan(sk);
 		sock_set_flag(sk, SOCK_DESTROY);
-		sk->sk_socket   = NULL;
 		break;
 
 	default:
-		sk->sk_socket = NULL;
 		break;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 3d00fb9eb11ac49d4035f756d116deeeaf99a26b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 17 Jun 2008 15:54:14 -0700
Subject: sctp: fix error path in sctp_proc_init

After the sctp_remaddr_proc_init failed, the proper rollback is
not the sctp_remaddr_proc_exit, but the sctp_assocs_proc_exit.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 23aaffb97ca..98c6a882016 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -119,7 +119,7 @@ static __init int sctp_proc_init(void)
 	return 0;
 
 out_remaddr_proc_init:
-	sctp_remaddr_proc_exit();
+	sctp_assocs_proc_exit();
 out_assocs_proc_init:
 	sctp_eps_proc_exit();
 out_eps_proc_init:
-- 
cgit v1.2.3-70-g09d2


From 43aa1920117801fe9ae3d1fad886b62511e09bee Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 17 Jun 2008 16:09:45 -0700
Subject: bridge: handle process all link-local frames

Any frame addressed to link-local addresses should be processed by local
receive path. The earlier code would process them only if STP was enabled.
Since there are other frames like LACP for bonding, we should always
process them.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 0145e941671..30b88777c3d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -134,14 +134,11 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_PAUSE))
 			goto drop;
 
-		/* Process STP BPDU's through normal netif_receive_skb() path */
-		if (p->br->stp_enabled != BR_NO_STP) {
-			if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-				    NULL, br_handle_local_finish))
-				return NULL;
-			else
-				return skb;
-		}
+		if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+			    NULL, br_handle_local_finish))
+			return NULL;	/* frame consumed by filter */
+		else
+			return skb;	/* continue processing */
 	}
 
 	switch (p->state) {
-- 
cgit v1.2.3-70-g09d2


From 92c0574f11598c8036f81e27d2e8bdd6eed7d76d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 17 Jun 2008 16:10:06 -0700
Subject: bridge: make bridge address settings sticky

Normally, the bridge just chooses the smallest mac address as the
bridge id and mac address of bridge device. But if the administrator
has explictly set the interface address then don't change it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 1 +
 net/bridge/br_private.h | 2 ++
 net/bridge/br_stp_if.c  | 4 ++++
 3 files changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index a6ffc6c2a69..d9449df7cad 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -87,6 +87,7 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 	spin_lock_bh(&br->lock);
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
 	br_stp_change_bridge_id(br, addr->sa_data);
+	br->flags |= BR_SET_MAC_ADDR;
 	spin_unlock_bh(&br->lock);
 
 	return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 83ff5861c2d..8593c9f6a30 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -92,6 +92,8 @@ struct net_bridge
 	struct hlist_head		hash[BR_HASH_SIZE];
 	struct list_head		age_list;
 	unsigned long			feature_mask;
+	unsigned long			flags;
+#define BR_SET_MAC_ADDR		0x00000001
 
 	/* STP */
 	bridge_id			designated_root;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 1a4e5c37a0c..9a52ac5b452 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -214,6 +214,10 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
 	const unsigned char *addr = br_mac_zero;
 	struct net_bridge_port *p;
 
+	/* user has chosen a value so keep it */
+	if (br->flags & BR_SET_MAC_ADDR)
+		return;
+
 	list_for_each_entry(p, &br->port_list, list) {
 		if (addr == br_mac_zero ||
 		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
-- 
cgit v1.2.3-70-g09d2


From f586287e0fed366d80822666f70487472ab8793a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 17 Jun 2008 16:16:13 -0700
Subject: bridge: fix IPV6=n build

Fix bridge netfilter code so that it uses CONFIG_IPV6 as needed:

net/built-in.o: In function `ebt_filter_ip6':
ebt_ip6.c:(.text+0x87c37): undefined reference to `ipv6_skip_exthdr'
net/built-in.o: In function `ebt_log_packet':
ebt_log.c:(.text+0x88dee): undefined reference to `ipv6_skip_exthdr'
make[1]: *** [.tmp_vmlinux1] Error 1

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/Kconfig   | 2 +-
 net/bridge/netfilter/Makefile  | 2 +-
 net/bridge/netfilter/ebt_log.c | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index fb684c2ff8b..540df4106be 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -85,7 +85,7 @@ config BRIDGE_EBT_IP
 
 config BRIDGE_EBT_IP6
 	tristate "ebt: IP6 filter support"
-	depends on BRIDGE_NF_EBTABLES
+	depends on BRIDGE_NF_EBTABLES && IPV6
 	help
 	  This option adds the IP6 match, which allows basic IPV6 header field
 	  filtering.
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index dd960645b41..0718699540b 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o
 obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o
 obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o
 obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o
-obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip6.o
+obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o
 obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o
 obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o
 obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c883ec8a28b..2f430d4ae91 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -123,6 +123,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
 		goto out;
 	}
 
+#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE)
 	if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto ==
 	   htons(ETH_P_IPV6)) {
 		const struct ipv6hdr *ih;
@@ -146,6 +147,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
 		print_ports(skb, nexthdr, offset_ph);
 		goto out;
 	}
+#endif
 
 	if ((bitmask & EBT_LOG_ARP) &&
 	    ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
-- 
cgit v1.2.3-70-g09d2


From 61c33e012964ce358b42d2a1e9cd309af5dab02b Mon Sep 17 00:00:00 2001
From: Mitchell Blank Jr <mitch@sfgoth.com>
Date: Tue, 17 Jun 2008 16:20:06 -0700
Subject: atm: use const where reasonable

From: Mitchell Blank Jr <mitch@sfgoth.com>

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/addr.c   | 10 +++++-----
 net/atm/addr.h   |  4 ++--
 net/atm/br2684.c | 14 +++++++-------
 net/atm/common.c |  8 ++++----
 net/atm/lec.c    | 55 +++++++++++++++++++++++++++----------------------------
 net/atm/lec.h    | 10 +++++-----
 6 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/atm/addr.c b/net/atm/addr.c
index 6afa77d63bb..82e85abc303 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -9,7 +9,7 @@
 #include "signaling.h"
 #include "addr.h"
 
-static int check_addr(struct sockaddr_atmsvc *addr)
+static int check_addr(const struct sockaddr_atmsvc *addr)
 {
 	int i;
 
@@ -23,7 +23,7 @@ static int check_addr(struct sockaddr_atmsvc *addr)
 	return -EINVAL;
 }
 
-static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
+static int identical(const struct sockaddr_atmsvc *a, const struct sockaddr_atmsvc *b)
 {
 	if (*a->sas_addr.prv)
 		if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN))
@@ -35,7 +35,7 @@ static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
 	return !strcmp(a->sas_addr.pub, b->sas_addr.pub);
 }
 
-static void notify_sigd(struct atm_dev *dev)
+static void notify_sigd(const struct atm_dev *dev)
 {
 	struct sockaddr_atmpvc pvc;
 
@@ -63,7 +63,7 @@ void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype)
 		notify_sigd(dev);
 }
 
-int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
 		 enum atm_addr_type_t atype)
 {
 	unsigned long flags;
@@ -98,7 +98,7 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
 	return 0;
 }
 
-int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
 		 enum atm_addr_type_t atype)
 {
 	unsigned long flags;
diff --git a/net/atm/addr.h b/net/atm/addr.h
index f39433ad45d..6837e9e7eb1 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -10,9 +10,9 @@
 #include <linux/atmdev.h>
 
 void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type);
-int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
 		 enum atm_addr_type_t type);
-int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
 		 enum atm_addr_type_t type);
 int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf,
 		 size_t size, enum atm_addr_type_t type);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 05fafdc2eea..8d9a6f15888 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -52,12 +52,12 @@ static void skb_debug(const struct sk_buff *skb)
 #define ETHERTYPE_IPV6	0x86, 0xdd
 #define PAD_BRIDGED	0x00, 0x00
 
-static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
-static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
-static unsigned char llc_oui_pid_pad[] =
+static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
+static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
+static const unsigned char llc_oui_pid_pad[] =
 			{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
-static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
-static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
+static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
+static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
 
 enum br2684_encaps {
 	e_vc = BR2684_ENCAPS_VC,
@@ -217,8 +217,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	return 1;
 }
 
-static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
-						   struct br2684_dev *brdev)
+static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
+						   const struct br2684_dev *brdev)
 {
 	return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next);	/* 1 vcc/dev right now */
 }
diff --git a/net/atm/common.c b/net/atm/common.c
index c865517ba44..d34edbe754c 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -262,7 +262,7 @@ static int adjust_tp(struct atm_trafprm *tp,unsigned char aal)
 }
 
 
-static int check_ci(struct atm_vcc *vcc, short vpi, int vci)
+static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 {
 	struct hlist_head *head = &vcc_hash[vci &
 					(VCC_HTABLE_SIZE - 1)];
@@ -290,7 +290,7 @@ static int check_ci(struct atm_vcc *vcc, short vpi, int vci)
 }
 
 
-static int find_ci(struct atm_vcc *vcc, short *vpi, int *vci)
+static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci)
 {
 	static short p;        /* poor man's per-device cache */
 	static int c;
@@ -646,7 +646,7 @@ static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
 }
 
 
-static int check_tp(struct atm_trafprm *tp)
+static int check_tp(const struct atm_trafprm *tp)
 {
 	/* @@@ Should be merged with adjust_tp */
 	if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0;
@@ -663,7 +663,7 @@ static int check_tp(struct atm_trafprm *tp)
 }
 
 
-static int check_qos(struct atm_qos *qos)
+static int check_qos(const struct atm_qos *qos)
 {
 	int error;
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 653aca3573a..5799fb52365 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -65,36 +65,36 @@ static int lec_close(struct net_device *dev);
 static struct net_device_stats *lec_get_stats(struct net_device *dev);
 static void lec_init(struct net_device *dev);
 static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
-					  unsigned char *mac_addr);
+					  const unsigned char *mac_addr);
 static int lec_arp_remove(struct lec_priv *priv,
 			  struct lec_arp_table *to_remove);
 /* LANE2 functions */
-static void lane2_associate_ind(struct net_device *dev, u8 *mac_address,
-				u8 *tlvs, u32 sizeoftlvs);
-static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+static void lane2_associate_ind(struct net_device *dev, const u8 *mac_address,
+				const u8 *tlvs, u32 sizeoftlvs);
+static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force,
 			 u8 **tlvs, u32 *sizeoftlvs);
-static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
-			       u8 *tlvs, u32 sizeoftlvs);
+static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
+			       const u8 *tlvs, u32 sizeoftlvs);
 
-static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+static int lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 			   unsigned long permanent);
 static void lec_arp_check_empties(struct lec_priv *priv,
 				  struct atm_vcc *vcc, struct sk_buff *skb);
 static void lec_arp_destroy(struct lec_priv *priv);
 static void lec_arp_init(struct lec_priv *priv);
 static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
-				       unsigned char *mac_to_find,
+				       const unsigned char *mac_to_find,
 				       int is_rdesc,
 				       struct lec_arp_table **ret_entry);
-static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
-			   unsigned char *atm_addr, unsigned long remoteflag,
+static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
+			   const unsigned char *atm_addr, unsigned long remoteflag,
 			   unsigned int targetless_le_arp);
 static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id);
 static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc);
 static void lec_set_flush_tran_id(struct lec_priv *priv,
-				  unsigned char *atm_addr,
+				  const unsigned char *atm_addr,
 				  unsigned long tran_id);
-static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+static void lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 			  struct atm_vcc *vcc,
 			  void (*old_push) (struct atm_vcc *vcc,
 					    struct sk_buff *skb));
@@ -634,7 +634,7 @@ static struct atm_dev lecatm_dev = {
  */
 static int
 send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
-	     unsigned char *mac_addr, unsigned char *atm_addr,
+	     const unsigned char *mac_addr, const unsigned char *atm_addr,
 	     struct sk_buff *data)
 {
 	struct sock *sk;
@@ -705,10 +705,9 @@ static void lec_init(struct net_device *dev)
 	dev->set_multicast_list = lec_set_multicast_list;
 	dev->do_ioctl = NULL;
 	printk("%s: Initialized!\n", dev->name);
-	return;
 }
 
-static unsigned char lec_ctrl_magic[] = {
+static const unsigned char lec_ctrl_magic[] = {
 	0xff,
 	0x00,
 	0x01,
@@ -1276,7 +1275,7 @@ module_exit(lane_module_cleanup);
  * lec will be used.
  * If dst_mac == NULL, targetless LE_ARP will be sent
  */
-static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force,
 			 u8 **tlvs, u32 *sizeoftlvs)
 {
 	unsigned long flags;
@@ -1322,8 +1321,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
  * Returns 1 for success, 0 for failure (out of memory)
  *
  */
-static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
-			       u8 *tlvs, u32 sizeoftlvs)
+static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
+			       const u8 *tlvs, u32 sizeoftlvs)
 {
 	int retval;
 	struct sk_buff *skb;
@@ -1358,8 +1357,8 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
  * LANE2: 3.1.5, LE_ASSOCIATE.indication
  *
  */
-static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
-				u8 *tlvs, u32 sizeoftlvs)
+static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
+				const u8 *tlvs, u32 sizeoftlvs)
 {
 #if 0
 	int i = 0;
@@ -1744,7 +1743,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
  * Find entry by mac_address
  */
 static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
-					  unsigned char *mac_addr)
+					  const unsigned char *mac_addr)
 {
 	struct hlist_node *node;
 	struct hlist_head *head;
@@ -1764,7 +1763,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 }
 
 static struct lec_arp_table *make_entry(struct lec_priv *priv,
-					unsigned char *mac_addr)
+					const unsigned char *mac_addr)
 {
 	struct lec_arp_table *to_return;
 
@@ -1921,7 +1920,7 @@ restart:
  *
  */
 static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
-				       unsigned char *mac_to_find, int is_rdesc,
+				       const unsigned char *mac_to_find, int is_rdesc,
 				       struct lec_arp_table **ret_entry)
 {
 	unsigned long flags;
@@ -2017,7 +2016,7 @@ out:
 }
 
 static int
-lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 		unsigned long permanent)
 {
 	unsigned long flags;
@@ -2047,8 +2046,8 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
  * Notifies:  Response to arp_request (atm_addr != NULL)
  */
 static void
-lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
-	       unsigned char *atm_addr, unsigned long remoteflag,
+lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
+	       const unsigned char *atm_addr, unsigned long remoteflag,
 	       unsigned int targetless_le_arp)
 {
 	unsigned long flags;
@@ -2148,7 +2147,7 @@ out:
  * Notifies: Vcc setup ready
  */
 static void
-lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	      struct atm_vcc *vcc,
 	      void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
 {
@@ -2336,7 +2335,7 @@ restart:
 
 static void
 lec_set_flush_tran_id(struct lec_priv *priv,
-		      unsigned char *atm_addr, unsigned long tran_id)
+		      const unsigned char *atm_addr, unsigned long tran_id)
 {
 	unsigned long flags;
 	struct hlist_node *node;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index b41cda7ea1e..0d376682c1a 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -42,12 +42,12 @@ struct lecdatahdr_8025 {
  *
  */
 struct lane2_ops {
-	int (*resolve) (struct net_device *dev, u8 *dst_mac, int force,
+	int (*resolve) (struct net_device *dev, const u8 *dst_mac, int force,
 			u8 **tlvs, u32 *sizeoftlvs);
-	int (*associate_req) (struct net_device *dev, u8 *lan_dst,
-			      u8 *tlvs, u32 sizeoftlvs);
-	void (*associate_indicator) (struct net_device *dev, u8 *mac_addr,
-				     u8 *tlvs, u32 sizeoftlvs);
+	int (*associate_req) (struct net_device *dev, const u8 *lan_dst,
+			      const u8 *tlvs, u32 sizeoftlvs);
+	void (*associate_indicator) (struct net_device *dev, const u8 *mac_addr,
+				     const u8 *tlvs, u32 sizeoftlvs);
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From fe2c802ab62aa63d276deafa905875f3455f2621 Mon Sep 17 00:00:00 2001
From: Bernard Pidoux <f6bvp@amsat.org>
Date: Tue, 17 Jun 2008 17:08:32 -0700
Subject: rose: improving AX25 routing frames via ROSE network

ROSE network is organized through nodes connected via hamradio or Internet.
AX25 packet radio frames sent to a remote ROSE address destination are routed
through these nodes.

Without the present patch, automatic routing mechanism did not work optimally
due to an improper parameter checking.

rose_get_neigh() function is called either by rose_connect() or by
rose_route_frame().

In the case of a call from rose_connect(), f0 timer is checked to find if a connection
is already pending. In that case it returns the address of the neighbour, or returns a NULL otherwise.

When called by rose_route_frame() the purpose was to route a packet AX25 frame
through an adjacent node given a destination rose address.
However, in that case, t0 timer checked does not indicate if the adjacent node
is actually connected even if the timer is not null. Thus, for each frame sent, the
function often tried to start a new connexion even if the adjacent node was already connected.

The patch adds a "new" parameter that is true when the function is called by
rose route_frame().
This instructs rose_get_neigh() to check node parameter "restarted".
If restarted is true it means that the route to the destination address is opened via a neighbour
node already connected.
If "restarted" is false the function returns a NULL.
In that case the calling function will initiate a new connection as before.

This results in a fast routing of frames, from nodes to nodes, until
destination is reached, as originaly specified by ROSE protocole.

Signed-off-by: Bernard Pidoux <f6bvp@amsat.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rose.h    |  2 +-
 net/rose/af_rose.c    |  4 ++--
 net/rose/rose_route.c | 29 ++++++++++++++++++-----------
 3 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/rose.h b/include/net/rose.h
index e5bb084d875..cbd5364b2c8 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -201,7 +201,7 @@ extern void rose_link_device_down(struct net_device *);
 extern struct net_device *rose_dev_first(void);
 extern struct net_device *rose_dev_get(rose_address *);
 extern struct rose_route *rose_route_free_lci(unsigned int, struct rose_neigh *);
-extern struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, unsigned char *);
+extern struct rose_neigh *rose_get_neigh(rose_address *, unsigned char *, unsigned char *, int);
 extern int  rose_rt_ioctl(unsigned int, void __user *);
 extern void rose_link_failed(ax25_cb *, int);
 extern int  rose_route_frame(struct sk_buff *, ax25_cb *);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index af86bcb604e..46461a69cd0 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -757,7 +757,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	sock->state = SS_UNCONNECTED;
 
 	rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
-					 &diagnostic);
+					 &diagnostic, 0);
 	if (!rose->neighbour) {
 		err = -ENETUNREACH;
 		goto out_release;
@@ -853,7 +853,7 @@ rose_try_next_neigh:
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 	/* Try next neighbour */
-		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
+		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0);
 		if (rose->neighbour)
 			goto rose_try_next_neigh;
 
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index bd593871c81..a81066a1010 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -662,27 +662,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig
 }
 
 /*
- *	Find a neighbour given a ROSE address.
+ *	Find a neighbour or a route given a ROSE address.
  */
 struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
-	unsigned char *diagnostic)
+	unsigned char *diagnostic, int new)
 {
 	struct rose_neigh *res = NULL;
 	struct rose_node *node;
 	int failed = 0;
 	int i;
 
-	spin_lock_bh(&rose_node_list_lock);
+	if (!new) spin_lock_bh(&rose_node_list_lock);
 	for (node = rose_node_list; node != NULL; node = node->next) {
 		if (rosecmpm(addr, &node->address, node->mask) == 0) {
 			for (i = 0; i < node->count; i++) {
-				if (!rose_ftimer_running(node->neighbour[i])) {
-					res = node->neighbour[i];
-					goto out;
-				} else
-					failed = 1;
+				if (new) {
+					if (node->neighbour[i]->restarted) {
+						res = node->neighbour[i];
+						goto out;
+					}
+				}
+				else {
+					if (!rose_ftimer_running(node->neighbour[i])) {
+						res = node->neighbour[i];
+						goto out;
+					} else
+						failed = 1;
+				}
 			}
-			break;
 		}
 	}
 
@@ -695,7 +702,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
 	}
 
 out:
-	spin_unlock_bh(&rose_node_list_lock);
+	if (!new) spin_unlock_bh(&rose_node_list_lock);
 
 	return res;
 }
@@ -1018,7 +1025,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 		rose_route = rose_route->next;
 	}
 
-	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
+	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) {
 		rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic);
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From c1da4ac752b8b0411791d26c678fcf23d2eed242 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Fri, 13 Jun 2008 18:12:00 -0700
Subject: net/core: add NETDEV_BONDING_FAILOVER event

Add NETDEV_BONDING_FAILOVER event to be used in a successive patch
by bonding to announce fail-over for the active-backup mode through the
netdev events notifier chain mechanism. Such an event can be of use for the
RDMA CM (communication manager) to let native RDMA ULPs (eg NFS-RDMA, iSER)
always be aligned with the IP stack, in the sense that they use the same
ports/links as the stack does. More usages can be done to allow monitoring
tools based on netlink events being aware to bonding fail-over.

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/netdevice.h | 1 +
 include/linux/notifier.h  | 1 +
 net/core/dev.c            | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f27fd200933..e92fc839ab1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1479,6 +1479,7 @@ extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
+extern void		netdev_bonding_change(struct net_device *dev);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 0ff6224d172..bd3d72ddf33 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -197,6 +197,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_GOING_DOWN	0x0009
 #define NETDEV_CHANGENAME	0x000A
 #define NETDEV_FEAT_CHANGE	0x000B
+#define NETDEV_BONDING_FAILOVER 0x000C
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index 68d8df0992a..0e45742e715 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -961,6 +961,12 @@ void netdev_state_change(struct net_device *dev)
 	}
 }
 
+void netdev_bonding_change(struct net_device *dev)
+{
+	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+}
+EXPORT_SYMBOL(netdev_bonding_change);
+
 /**
  *	dev_load 	- load a network module
  *	@net: the applicable net namespace
-- 
cgit v1.2.3-70-g09d2


From b8a9787eddb0e4665f31dd1d64584732b2b5d051 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 13 Jun 2008 18:12:04 -0700
Subject: bonding: Allow setting max_bonds to zero

	Permit bonding to function rationally if max_bonds is set to
zero.  This will load the module, but create no master devices (which can
be created via sysfs).

	Requires some change to bond_create_sysfs; currently, the
netdev sysfs directory is determined from the first bonding device created,
but this is no longer possible.  Instead, an interface from net/core is
created to create and destroy files in net_class.

	Based on a patch submitted by Phil Oester <kernel@linuxaces.com>.
Modified by Jay Vosburgh to fix the sysfs issue mentioned above and to
update the documentation.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/networking/bonding.txt |  3 ++-
 drivers/net/bonding/bond_main.c      |  6 +++---
 drivers/net/bonding/bond_sysfs.c     | 22 +++-------------------
 include/linux/netdevice.h            |  3 +++
 net/core/net-sysfs.c                 | 13 +++++++++++++
 5 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 370b7da73ab..7fa7fe71d7a 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -376,7 +376,8 @@ max_bonds
 	Specifies the number of bonding devices to create for this
 	instance of the bonding driver.  E.g., if max_bonds is 3, and
 	the bonding driver is not already loaded, then bond0, bond1
-	and bond2 will be created.  The default value is 1.
+	and bond2 will be created.  The default value is 1.  Specifying
+	a value of 0 will load bonding, but will not create any devices.
 
 miimon
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b6d66a8ab9..d57b65dc2c7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4750,11 +4750,11 @@ static int bond_check_params(struct bond_params *params)
 		}
 	}
 
-	if (max_bonds < 1 || max_bonds > INT_MAX) {
+	if (max_bonds < 0 || max_bonds > INT_MAX) {
 		printk(KERN_WARNING DRV_NAME
 		       ": Warning: max_bonds (%d) not in range %d-%d, so it "
 		       "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
-		       max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS);
+		       max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
 		max_bonds = BOND_DEFAULT_MAX_BONDS;
 	}
 
@@ -4953,7 +4953,7 @@ static int bond_check_params(struct bond_params *params)
 
 		printk("\n");
 
-	} else {
+	} else if (max_bonds) {
 		/* miimon and arp_interval not set, we need one so things
 		 * work as expected, see bonding.txt for details
 		 */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index dd265c69b0d..6caac0ffb2f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -53,7 +53,6 @@ extern struct bond_parm_tbl arp_validate_tbl[];
 extern struct bond_parm_tbl fail_over_mac_tbl[];
 
 static int expected_refcount = -1;
-static struct class *netdev_class;
 /*--------------------------- Data Structures -----------------------------*/
 
 /* Bonding sysfs lock.  Why can't we just use the subsystem lock?
@@ -1447,19 +1446,9 @@ static struct attribute_group bonding_group = {
  */
 int bond_create_sysfs(void)
 {
-	int ret = 0;
-	struct bonding *firstbond;
-
-	/* get the netdev class pointer */
-	firstbond = container_of(bond_dev_list.next, struct bonding, bond_list);
-	if (!firstbond)
-		return -ENODEV;
-
-	netdev_class = firstbond->dev->dev.class;
-	if (!netdev_class)
-		return -ENODEV;
+	int ret;
 
-	ret = class_create_file(netdev_class, &class_attr_bonding_masters);
+	ret = netdev_class_create_file(&class_attr_bonding_masters);
 	/*
 	 * Permit multiple loads of the module by ignoring failures to
 	 * create the bonding_masters sysfs file.  Bonding devices
@@ -1478,10 +1467,6 @@ int bond_create_sysfs(void)
 			printk(KERN_ERR
 			       "network device named %s already exists in sysfs",
 			       class_attr_bonding_masters.attr.name);
-		else {
-			netdev_class = NULL;
-			return 0;
-		}
 	}
 
 	return ret;
@@ -1493,8 +1478,7 @@ int bond_create_sysfs(void)
  */
 void bond_destroy_sysfs(void)
 {
-	if (netdev_class)
-		class_remove_file(netdev_class, &class_attr_bonding_masters);
+	netdev_class_remove_file(&class_attr_bonding_masters);
 }
 
 /*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e92fc839ab1..9ccbfac3fd9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1506,6 +1506,9 @@ extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
 #endif
 
+extern int netdev_class_create_file(struct class_attribute *class_attr);
+extern void netdev_class_remove_file(struct class_attribute *class_attr);
+
 extern void linkwatch_run_queue(void);
 
 extern int netdev_compute_features(unsigned long all, unsigned long one);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index dccd737ea2e..3f794131921 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -468,6 +468,19 @@ int netdev_register_kobject(struct net_device *net)
 	return device_add(dev);
 }
 
+int netdev_class_create_file(struct class_attribute *class_attr)
+{
+	return class_create_file(&net_class, class_attr);
+}
+
+void netdev_class_remove_file(struct class_attribute *class_attr)
+{
+	class_remove_file(&net_class, class_attr);
+}
+
+EXPORT_SYMBOL(netdev_class_create_file);
+EXPORT_SYMBOL(netdev_class_remove_file);
+
 void netdev_initialize_kobject(struct net_device *net)
 {
 	struct device *device = &(net->dev);
-- 
cgit v1.2.3-70-g09d2


From cb61cb9b8b5ef6c2697d84e5015e314626eb2fba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 17 Jun 2008 21:04:56 -0700
Subject: udp: sk_drops handling

In commits 33c732c36169d7022ad7d6eb474b0c9be43a2dc1 ([IPV4]: Add raw
drops counter) and a92aa318b4b369091fd80433c80e62838db8bc1c ([IPV6]:
Add raw drops counter), Wang Chen added raw drops counter for
/proc/net/raw & /proc/net/raw6

This patch adds this capability to UDP sockets too (/proc/net/udp &
/proc/net/udp6).

This means that 'RcvbufErrors' errors found in /proc/net/snmp can be also
be examined for each udp socket.

# grep Udp: /proc/net/snmp
Udp: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors
Udp: 23971006 75 899420 16390693 146348 0

# cat /proc/net/udp
 sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt  ---
uid  timeout inode ref pointer drops
 75: 00000000:02CB 00000000:0000 07 00000000:00000000 00:00000000 00000000  ---
  0        0 2358 2 ffff81082a538c80 0
111: 00000000:006F 00000000:0000 07 00000000:00000000 00:00000000 00000000  ---
  0        0 2286 2 ffff81042dd35c80 146348

In this example, only port 111 (0x006F) was flooded by messages that
user program could not read fast enough. 146348 messages were lost.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  2 +-
 net/ipv4/raw.c     |  2 +-
 net/ipv4/udp.c     | 11 +++++++----
 net/ipv6/raw.c     |  2 +-
 net/ipv6/udp.c     | 11 +++++++----
 5 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 83f74b11d09..a7c30412de6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -166,7 +166,7 @@ struct sock_common {
   *	@sk_err: last error
   *	@sk_err_soft: errors that don't cause failure but are the cause of a
   *		      persistent failure not just 'timed out'
-  *	@sk_drops: raw drops counter
+  *	@sk_drops: raw/udp drops counter
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
   *	@sk_priority: %SO_PRIORITY setting
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7d449468409..925fdf18cf9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -944,7 +944,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_printf(seq, "  sl  local_address rem_address   st tx_queue "
 				"rx_queue tr tm->when retrnsmt   uid  timeout "
-				"inode  drops\n");
+				"inode ref pointer drops\n");
 	else
 		raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
 	return 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 11eabf13614..3bbf6fb6e4f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1040,8 +1040,10 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM)
+		if (rc == -ENOMEM) {
 			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+			atomic_inc(&sk->sk_drops);
+		}
 		goto drop;
 	}
 
@@ -1629,12 +1631,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 	__u16 srcp	  = ntohs(inet->sport);
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n",
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		atomic_read(&sp->sk_wmem_alloc),
 		atomic_read(&sp->sk_rmem_alloc),
 		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp, len);
+		atomic_read(&sp->sk_refcnt), sp,
+		atomic_read(&sp->sk_drops), len);
 }
 
 int udp4_seq_show(struct seq_file *seq, void *v)
@@ -1643,7 +1646,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "%-127s\n",
 			   "  sl  local_address rem_address   st tx_queue "
 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
-			   "inode");
+			   "inode ref pointer drops");
 	else {
 		struct udp_iter_state *state = seq->private;
 		int len;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 456777d7a40..34cfb3f41c2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1251,7 +1251,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
 			   "local_address                         "
 			   "remote_address                        "
 			   "st tx_queue rx_queue tr tm->when retrnsmt"
-			   "   uid  timeout inode  drops\n");
+			   "   uid  timeout inode ref pointer drops\n");
 	else
 		raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
 	return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 432edaa882f..f91e1df0d25 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -297,8 +297,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM)
+		if (rc == -ENOMEM) {
 			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+			atomic_inc(&sk->sk_drops);
+		}
 		goto drop;
 	}
 
@@ -955,7 +957,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 	srcp  = ntohs(inet->sport);
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
 		   bucket,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -967,7 +969,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
 		   0, 0L, 0,
 		   sock_i_uid(sp), 0,
 		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp);
+		   atomic_read(&sp->sk_refcnt), sp,
+		   atomic_read(&sp->sk_drops));
 }
 
 int udp6_seq_show(struct seq_file *seq, void *v)
@@ -978,7 +981,7 @@ int udp6_seq_show(struct seq_file *seq, void *v)
 			   "local_address                         "
 			   "remote_address                        "
 			   "st tx_queue rx_queue tr tm->when retrnsmt"
-			   "   uid  timeout inode\n");
+			   "   uid  timeout inode ref pointer drops\n");
 	else
 		udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 30902dc3cb0ea1cfc7ac2b17bcf478ff98420d74 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 21:26:37 -0700
Subject: ax25: Fix std timer socket destroy handling.

Tihomir Heidelberg - 9a4gl, reports:

--------------------
I would like to direct you attention to one problem existing in ax.25
kernel since 2.4. If listening socket is closed and its SKB queue is
released but those sockets get weird. Those "unAccepted()" sockets
should be destroyed in ax25_std_heartbeat_expiry, but it will not
happen. And there is also a note about that in ax25_std_timer.c:
/* Magic here: If we listen() and a new link dies before it
is accepted() it isn't 'dead' so doesn't get removed. */

This issue cause ax25d to stop accepting new connections and I had to
restarted ax25d approximately each day and my services were unavailable.
Also netstat -n -l shows invalid source and device for those listening
sockets. It is strange why ax25d's listening socket get weird because of
this issue, but definitely when I solved this bug I do not have problems
with ax25d anymore and my ax25d can run for months without problems.
--------------------

Actually as far as I can see, this problem is even in releases
as far back as 2.2.x as well.

It seems senseless to special case this test on TCP_LISTEN state.
Anything still stuck in state 0 has no external references and
we can just simply kill it off directly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_std_timer.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 96e4b927325..cdc7e751ef3 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -39,11 +39,9 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
-		/* Magic here: If we listen() and a new link dies before it
-		   is accepted() it isn't 'dead' so doesn't get removed. */
-		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
-		    (sk->sk_state == TCP_LISTEN &&
-		     sock_flag(sk, SOCK_DEAD))) {
+		if (!sk ||
+		    sock_flag(sk, SOCK_DESTROY) ||
+		    sock_flag(sk, SOCK_DEAD)) {
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
-- 
cgit v1.2.3-70-g09d2


From 972692e0db9b0a62329ca394062b58917ddbd03c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Jun 2008 22:41:38 -0700
Subject: net: Add sk_set_socket() helper.

In order to more easily grep for all things that set
sk->sk_socket, add sk_set_socket() helper inline function.

Suggested (although only half-seriously) by Evgeniy Polyakov.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 9 +++++++--
 net/core/sock.c    | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index a7c30412de6..3f4897ab432 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -990,6 +990,11 @@ static inline void sock_put(struct sock *sk)
 extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 			  const int nested);
 
+static inline void sk_set_socket(struct sock *sk, struct socket *sock)
+{
+	sk->sk_socket = sock;
+}
+
 /* Detach socket from process context.
  * Announce socket dead, detach it from wait queue and inode.
  * Note that parent inode held reference count on this struct sock,
@@ -1001,7 +1006,7 @@ static inline void sock_orphan(struct sock *sk)
 {
 	write_lock_bh(&sk->sk_callback_lock);
 	sock_set_flag(sk, SOCK_DEAD);
-	sk->sk_socket = NULL;
+	sk_set_socket(sk, NULL);
 	sk->sk_sleep  = NULL;
 	write_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1011,7 +1016,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 	write_lock_bh(&sk->sk_callback_lock);
 	sk->sk_sleep = &parent->wait;
 	parent->sk = sk;
-	sk->sk_socket = parent;
+	sk_set_socket(sk, parent);
 	security_sock_graft(sk, parent);
 	write_unlock_bh(&sk->sk_callback_lock);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 3879bf65897..2c0ba52e530 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1066,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		 * to be taken into account in all callers. -acme
 		 */
 		sk_refcnt_debug_inc(newsk);
-		newsk->sk_socket = NULL;
+		sk_set_socket(newsk, NULL);
 		newsk->sk_sleep	 = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
@@ -1702,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
 	sk->sk_state		=	TCP_CLOSE;
-	sk->sk_socket		=	sock;
+	sk_set_socket(sk, sock);
 
 	sock_set_flag(sk, SOCK_ZAPPED);
 
-- 
cgit v1.2.3-70-g09d2


From dd574dbfcc9e74e7dd8fd59ae0075d23e71a3da1 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 18 Jun 2008 00:51:09 -0700
Subject: ipv6: minor cleanup in net/ipv6/tcp_ipv6.c [RESEND ].

In net/ipv6/tcp_ipv6.c:

  - Remove unneeded tcp_v6_send_check() declaration.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index daefc18d50a..09be09cc1aa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -70,8 +70,6 @@
 
 static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void	tcp_v6_send_check(struct sock *sk, int len,
-				  struct sk_buff *skb);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-- 
cgit v1.2.3-70-g09d2


From dad9b335c6940de2746a9788eb456d09cf102f81 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Wed, 18 Jun 2008 01:48:28 -0700
Subject: netdevice: Fix promiscuity and allmulti overflow

Max of promiscuity and allmulti plus positive @inc can cause overflow.
Fox example: when allmulti=0xFFFFFFFF, any caller give dev_set_allmulti() a
positive @inc will cause allmulti be off.
This is not what we want, though it's rare case.
The fix is that only negative @inc will cause allmulti or promiscuity be off
and when any caller makes the counters touch the roof, we return error.

Change of v2:
Change void function dev_set_promiscuity/allmulti to return int.
So callers can get the overflow error.
Caller's fix will be done later.

Change of v3:
1. Since we return error to caller, we don't need to print KERN_ERROR,
KERN_WARNING is enough.
2. In dev_set_promiscuity(), if __dev_set_promiscuity() failed, we
return at once.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 55 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 47 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4bf613cd9e2..45dce2b58d4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1476,8 +1476,8 @@ extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *ad
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
 extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
 extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
-extern void		dev_set_promiscuity(struct net_device *dev, int inc);
-extern void		dev_set_allmulti(struct net_device *dev, int inc);
+extern int		dev_set_promiscuity(struct net_device *dev, int inc);
+extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
 extern void		netdev_bonding_change(struct net_device *dev);
 extern void		netdev_features_change(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0e45742e715..a495f712d38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2771,16 +2771,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	return 0;
 }
 
-static void __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
 
 	ASSERT_RTNL();
 
-	if ((dev->promiscuity += inc) == 0)
-		dev->flags &= ~IFF_PROMISC;
-	else
-		dev->flags |= IFF_PROMISC;
+	dev->flags |= IFF_PROMISC;
+	dev->promiscuity += inc;
+	if (dev->promiscuity == 0) {
+		/*
+		 * Avoid overflow.
+		 * If inc causes overflow, untouch promisc and return error.
+		 */
+		if (inc < 0)
+			dev->flags &= ~IFF_PROMISC;
+		else {
+			dev->promiscuity -= inc;
+			printk(KERN_WARNING "%s: promiscuity touches roof, "
+				"set promiscuity failed, promiscuity feature "
+				"of device might be broken.\n", dev->name);
+			return -EOVERFLOW;
+		}
+	}
 	if (dev->flags != old_flags) {
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -2798,6 +2811,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
 		if (dev->change_rx_flags)
 			dev->change_rx_flags(dev, IFF_PROMISC);
 	}
+	return 0;
 }
 
 /**
@@ -2809,14 +2823,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
  *	remains above zero the interface remains promiscuous. Once it hits zero
  *	the device reverts back to normal filtering operation. A negative inc
  *	value is used to drop promiscuity on the device.
+ *	Return 0 if successful or a negative errno code on error.
  */
-void dev_set_promiscuity(struct net_device *dev, int inc)
+int dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
+	int err;
 
-	__dev_set_promiscuity(dev, inc);
+	err = __dev_set_promiscuity(dev, inc);
+	if (!err)
+		return err;
 	if (dev->flags != old_flags)
 		dev_set_rx_mode(dev);
+	return err;
 }
 
 /**
@@ -2829,22 +2848,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
  *	to all interfaces. Once it hits zero the device reverts back to normal
  *	filtering operation. A negative @inc value is used to drop the counter
  *	when releasing a resource needing all multicasts.
+ *	Return 0 if successful or a negative errno code on error.
  */
 
-void dev_set_allmulti(struct net_device *dev, int inc)
+int dev_set_allmulti(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
 
 	ASSERT_RTNL();
 
 	dev->flags |= IFF_ALLMULTI;
-	if ((dev->allmulti += inc) == 0)
-		dev->flags &= ~IFF_ALLMULTI;
+	dev->allmulti += inc;
+	if (dev->allmulti == 0) {
+		/*
+		 * Avoid overflow.
+		 * If inc causes overflow, untouch allmulti and return error.
+		 */
+		if (inc < 0)
+			dev->flags &= ~IFF_ALLMULTI;
+		else {
+			dev->allmulti -= inc;
+			printk(KERN_WARNING "%s: allmulti touches roof, "
+				"set allmulti failed, allmulti feature of "
+				"device might be broken.\n", dev->name);
+			return -EOVERFLOW;
+		}
+	}
 	if (dev->flags ^ old_flags) {
 		if (dev->change_rx_flags)
 			dev->change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
 	}
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 7115e632f90952454ab6426e0d2151327162a30f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 19 Jun 2008 16:07:48 -0700
Subject: sctp: Validate Initiate Tag when handling ICMP message

This patch add to validate initiate tag and chunk type if verification
tag is 0 when handling ICMP message.

RFC 4960, Appendix C. ICMP Handling

ICMP6) An implementation MUST validate that the Verification Tag
contained in the ICMP message matches the Verification Tag of the peer.
If the Verification Tag is not 0 and does NOT match, discard the ICMP
message.  If it is 0 and the ICMP message contains enough bytes to
verify that the chunk type is an INIT chunk and that the Initiate Tag
matches the tag of the peer, continue with ICMP7.  If the ICMP message
is too short or the chunk type or the Initiate Tag does not match,
silently discard the packet.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index ca6b022b1df..d354a23972d 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -430,6 +430,9 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
 	struct sock *sk = NULL;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport = NULL;
+	struct sctp_init_chunk *chunkhdr;
+	__u32 vtag = ntohl(sctphdr->vtag);
+	int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
 	*app = NULL; *tpp = NULL;
 
@@ -451,8 +454,28 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
 
 	sk = asoc->base.sk;
 
-	if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	/* RFC 4960, Appendix C. ICMP Handling
+	 *
+	 * ICMP6) An implementation MUST validate that the Verification Tag
+	 * contained in the ICMP message matches the Verification Tag of
+	 * the peer.  If the Verification Tag is not 0 and does NOT
+	 * match, discard the ICMP message.  If it is 0 and the ICMP
+	 * message contains enough bytes to verify that the chunk type is
+	 * an INIT chunk and that the Initiate Tag matches the tag of the
+	 * peer, continue with ICMP7.  If the ICMP message is too short
+	 * or the chunk type or the Initiate Tag does not match, silently
+	 * discard the packet.
+	 */
+	if (vtag == 0) {
+		chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr
+				+ sizeof(struct sctphdr));
+		if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
+			  + sizeof(__be32) ||
+		    chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
+		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+			goto out;
+		}
+	} else if (vtag != asoc->c.peer_vtag) {
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 2e3216cd54b142ba605e87522e15f42e0c4e3996 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 19 Jun 2008 16:08:18 -0700
Subject: sctp: Follow security requirement of responding with 1 packet

RFC 4960, Section 11.4. Protection of Non-SCTP-Capable Hosts

When an SCTP stack receives a packet containing multiple control or
DATA chunks and the processing of the packet requires the sending of
multiple chunks in response, the sender of the response chunk(s) MUST
NOT send more than one packet.  If bundling is supported, multiple
response chunks that fit into a single packet MAY be bundled together
into one single response packet.  If bundling is not supported, then
the sender MUST NOT send more than one response chunk and MUST
discard all other responses.  Note that this rule does NOT apply to a
SACK chunk, since a SACK chunk is, in itself, a response to DATA and
a SACK does not require a response of more DATA.

We implement this by not servicing our outqueue until we reach the end
of the packet.  This enables maximum bundling.  We also identify
'response' chunks and make sure that we only send 1 packet when sending
such chunks.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  2 +-
 net/sctp/associola.c       |  1 +
 net/sctp/output.c          |  7 +++++--
 net/sctp/outqueue.c        | 34 ++++++++++++++++++++++++----------
 net/sctp/sm_sideeffect.c   | 27 ++++++++++++++++++---------
 net/sctp/sm_statefuns.c    | 16 +++++-----------
 6 files changed, 54 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fbc27ac8a09..82116e84ee3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -827,7 +827,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *,
 				     __u16 sport, __u16 dport);
 struct sctp_packet *sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
 sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *,
-                                       struct sctp_chunk *);
+                                       struct sctp_chunk *, int);
 sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *,
                                      struct sctp_chunk *);
 int sctp_packet_transmit(struct sctp_packet *);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 35b6a023a6d..ff1dc5bd936 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1025,6 +1025,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 	struct sctp_chunk *chunk;
 	struct sock *sk;
 	struct sctp_inq *inqueue;
+	struct sctp_outq *outq;
 	int state;
 	sctp_subtype_t subtype;
 	int error = 0;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 6d45bae93b4..abcd00dc05e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -157,7 +157,8 @@ void sctp_packet_free(struct sctp_packet *packet)
  * packet can be sent only after receiving the COOKIE_ACK.
  */
 sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
-				       struct sctp_chunk *chunk)
+				       struct sctp_chunk *chunk,
+				       int one_packet)
 {
 	sctp_xmit_t retval;
 	int error = 0;
@@ -175,7 +176,9 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 			/* If we have an empty packet, then we can NOT ever
 			 * return PMTU_FULL.
 			 */
-			retval = sctp_packet_append_chunk(packet, chunk);
+			if (!one_packet)
+				retval = sctp_packet_append_chunk(packet,
+								  chunk);
 		}
 		break;
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index ace6770e904..70ead8dc348 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -702,6 +702,7 @@ int sctp_outq_uncork(struct sctp_outq *q)
 	return error;
 }
 
+
 /*
  * Try to flush an outqueue.
  *
@@ -725,6 +726,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 	sctp_xmit_t status;
 	int error = 0;
 	int start_timer = 0;
+	int one_packet = 0;
 
 	/* These transports have chunks to send. */
 	struct list_head transport_list;
@@ -830,20 +832,33 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			if (sctp_test_T_bit(chunk)) {
 				packet->vtag = asoc->c.my_vtag;
 			}
-		case SCTP_CID_SACK:
-		case SCTP_CID_HEARTBEAT:
+		/* The following chunks are "response" chunks, i.e.
+		 * they are generated in response to something we
+		 * received.  If we are sending these, then we can
+		 * send only 1 packet containing these chunks.
+		 */
 		case SCTP_CID_HEARTBEAT_ACK:
-		case SCTP_CID_SHUTDOWN:
 		case SCTP_CID_SHUTDOWN_ACK:
-		case SCTP_CID_ERROR:
-		case SCTP_CID_COOKIE_ECHO:
 		case SCTP_CID_COOKIE_ACK:
-		case SCTP_CID_ECN_ECNE:
+		case SCTP_CID_COOKIE_ECHO:
+		case SCTP_CID_ERROR:
 		case SCTP_CID_ECN_CWR:
-		case SCTP_CID_ASCONF:
 		case SCTP_CID_ASCONF_ACK:
+			one_packet = 1;
+			/* Fall throught */
+
+		case SCTP_CID_SACK:
+		case SCTP_CID_HEARTBEAT:
+		case SCTP_CID_SHUTDOWN:
+		case SCTP_CID_ECN_ECNE:
+		case SCTP_CID_ASCONF:
 		case SCTP_CID_FWD_TSN:
-			sctp_packet_transmit_chunk(packet, chunk);
+			status = sctp_packet_transmit_chunk(packet, chunk,
+							    one_packet);
+			if (status  != SCTP_XMIT_OK) {
+				/* put the chunk back */
+				list_add(&chunk->list, &q->control_chunk_list);
+			}
 			break;
 
 		default:
@@ -974,7 +989,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 					atomic_read(&chunk->skb->users) : -1);
 
 			/* Add the chunk to the packet.  */
-			status = sctp_packet_transmit_chunk(packet, chunk);
+			status = sctp_packet_transmit_chunk(packet, chunk, 0);
 
 			switch (status) {
 			case SCTP_XMIT_PMTU_FULL:
@@ -1239,7 +1254,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 * Make sure the empty queue handler will get run later.
 	 */
 	q->empty = (list_empty(&q->out_chunk_list) &&
-		    list_empty(&q->control_chunk_list) &&
 		    list_empty(&q->retransmit));
 	if (!q->empty)
 		goto finish;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b083312c725..9732c797e8e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -664,7 +664,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 				 struct sctp_association *asoc,
 				 struct sctp_sackhdr *sackh)
 {
-	int err;
+	int err = 0;
 
 	if (sctp_outq_sack(&asoc->outqueue, sackh)) {
 		/* There are no more TSNs awaiting SACK.  */
@@ -672,11 +672,6 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 				 SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
 				 asoc->state, asoc->ep, asoc, NULL,
 				 GFP_ATOMIC);
-	} else {
-		/* Windows may have opened, so we need
-		 * to check if we have DATA to transmit
-		 */
-		err = sctp_outq_flush(&asoc->outqueue, 0);
 	}
 
 	return err;
@@ -1481,8 +1476,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			break;
 
 		case SCTP_CMD_DISCARD_PACKET:
-			/* We need to discard the whole packet.  */
+			/* We need to discard the whole packet.
+			 * Uncork the queue since there might be
+			 * responses pending
+			 */
 			chunk->pdiscard = 1;
+			if (asoc) {
+				sctp_outq_uncork(&asoc->outqueue);
+				local_cork = 0;
+			}
 			break;
 
 		case SCTP_CMD_RTO_PENDING:
@@ -1553,8 +1555,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 	}
 
 out:
-	if (local_cork)
-		sctp_outq_uncork(&asoc->outqueue);
+	/* If this is in response to a received chunk, wait until
+	 * we are done with the packet to open the queue so that we don't
+	 * send multiple packets in response to a single request.
+	 */
+	if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) {
+		if (chunk->end_of_packet || chunk->singleton)
+			sctp_outq_uncork(&asoc->outqueue);
+	} else if (local_cork)
+			sctp_outq_uncork(&asoc->outqueue);
 	return error;
 nomem:
 	error = -ENOMEM;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0c9d5a6950f..b66a41d03c0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -795,8 +795,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
-
 	/* This will send the COOKIE ACK */
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 
@@ -883,7 +881,6 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 	if (asoc->autoclose)
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
 	/* It may also notify its ULP about the successful
 	 * establishment of the association with a Communication Up
@@ -1781,7 +1778,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 		goto nomem;
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
@@ -1898,12 +1894,13 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 
 		}
 	}
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
 	if (!repl)
 		goto nomem;
 
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
 	if (ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ev));
@@ -1911,9 +1908,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 					SCTP_ULPEVENT(ai_ev));
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
-
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
@@ -3970,9 +3964,6 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		break;
 	case SCTP_CID_ACTION_DISCARD_ERR:
-		/* Discard the packet.  */
-		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-
 		/* Generate an ERROR chunk as response. */
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
@@ -3982,6 +3973,9 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
 					SCTP_CHUNK(err_chunk));
 		}
+
+		/* Discard the packet.  */
+		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		return SCTP_DISPOSITION_CONSUME;
 		break;
 	case SCTP_CID_ACTION_SKIP:
-- 
cgit v1.2.3-70-g09d2


From 0187bdfb05674147774ca79a79942537f3ad54bd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 19 Jun 2008 16:15:47 -0700
Subject: net: Disable LRO on devices that are forwarding

Large Receive Offload (LRO) is only appropriate for packets that are
destined for the host, and should be disabled if received packets may be
forwarded.  It can also confuse the GSO on output.

Add dev_disable_lro() function which uses the appropriate ethtool ops to
disable LRO if enabled.

Add calls to dev_disable_lro() in br_add_if() and functions that enable
IPv4 and IPv6 forwarding.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/bridge/br_if.c        |  1 +
 net/core/dev.c            | 24 ++++++++++++++++++++++++
 net/ipv4/devinet.c        | 21 ++++++++++++++++-----
 net/ipv6/addrconf.c       |  6 ++++++
 5 files changed, 48 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45dce2b58d4..1304ad2d710 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -886,6 +886,7 @@ extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
 extern int		dev_alloc_name(struct net_device *dev, const char *name);
 extern int		dev_open(struct net_device *dev);
 extern int		dev_close(struct net_device *dev);
+extern void		dev_disable_lro(struct net_device *dev);
 extern int		dev_queue_xmit(struct sk_buff *skb);
 extern int		register_netdevice(struct net_device *dev);
 extern void		unregister_netdevice(struct net_device *dev);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 143c954681b..832a561500d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -387,6 +387,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+	dev_disable_lro(dev);
 	dev_set_promiscuity(dev, 1);
 
 	list_add_rcu(&p->list, &br->port_list);
diff --git a/net/core/dev.c b/net/core/dev.c
index a495f712d38..f6944ecd5b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,7 @@
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ethtool.h>
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <net/net_namespace.h>
@@ -1123,6 +1124,29 @@ int dev_close(struct net_device *dev)
 }
 
 
+/**
+ *	dev_disable_lro - disable Large Receive Offload on a device
+ *	@dev: device
+ *
+ *	Disable Large Receive Offload (LRO) on a net device.  Must be
+ *	called under RTNL.  This is needed if received packets may be
+ *	forwarded to another interface.
+ */
+void dev_disable_lro(struct net_device *dev)
+{
+	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
+	    dev->ethtool_ops->set_flags) {
+		u32 flags = dev->ethtool_ops->get_flags(dev);
+		if (flags & ETH_FLAG_LRO) {
+			flags &= ~ETH_FLAG_LRO;
+			dev->ethtool_ops->set_flags(dev, flags);
+		}
+	}
+	WARN_ON(dev->features & NETIF_F_LRO);
+}
+EXPORT_SYMBOL(dev_disable_lro);
+
+
 static int dev_boot_phase = 1;
 
 /*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f8c0b0aea93..9de2514946c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -168,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	in_dev->dev = dev;
 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
 		goto out_kfree;
+	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
+		dev_disable_lro(dev);
 	/* Reference in_dev->dev */
 	dev_hold(dev);
 	/* Account for reference dev->ip_ptr (below) */
@@ -1241,6 +1243,8 @@ static void inet_forward_change(struct net *net)
 	read_lock(&dev_base_lock);
 	for_each_netdev(net, dev) {
 		struct in_device *in_dev;
+		if (on)
+			dev_disable_lro(dev);
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev)
@@ -1248,8 +1252,6 @@ static void inet_forward_change(struct net *net)
 		rcu_read_unlock();
 	}
 	read_unlock(&dev_base_lock);
-
-	rt_cache_flush(0);
 }
 
 static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1335,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
 
-		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
-			inet_forward_change(net);
-		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
+		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
+			rtnl_lock();
+			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
+				inet_forward_change(net);
+			} else if (*valp) {
+				struct ipv4_devconf *cnf = ctl->extra1;
+				struct in_device *idev =
+					container_of(cnf, struct in_device, cnf);
+				dev_disable_lro(idev->dev);
+			}
+			rtnl_unlock();
 			rt_cache_flush(0);
+		}
 	}
 
 	return ret;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9be6be3a7ff..84127d854cf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -348,6 +348,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 		kfree(ndev);
 		return NULL;
 	}
+	if (ndev->cnf.forwarding)
+		dev_disable_lro(dev);
 	/* We refer to the device */
 	dev_hold(dev);
 
@@ -442,6 +444,8 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (!idev)
 		return;
 	dev = idev->dev;
+	if (idev->cnf.forwarding)
+		dev_disable_lro(dev);
 	if (dev && (dev->flags & IFF_MULTICAST)) {
 		if (idev->cnf.forwarding)
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
@@ -487,12 +491,14 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->forwarding)
 		return;
 
+	rtnl_lock();
 	if (p == &net->ipv6.devconf_all->forwarding) {
 		__s32 newf = net->ipv6.devconf_all->forwarding;
 		net->ipv6.devconf_dflt->forwarding = newf;
 		addrconf_forward_change(net, newf);
 	} else if ((!*p) ^ (!old))
 		dev_forward_change((struct inet6_dev *)table->extra1);
+	rtnl_unlock();
 
 	if (*p)
 		rt6_purge_dflt_routers(net);
-- 
cgit v1.2.3-70-g09d2


From 4497b0763cb1afae463f5e144c28b5d806e28b60 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 19 Jun 2008 16:22:28 -0700
Subject: net: Discard and warn about LRO'd skbs received for forwarding

Add skb_warn_if_lro() to test whether an skb was received with LRO and
warn if so.

Change br_forward(), ip_forward() and ip6_forward() to call it) and
discard the skb if it returns true.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  | 14 ++++++++++++++
 net/bridge/br_forward.c |  2 +-
 net/core/skbuff.c       |  8 ++++++++
 net/ipv4/ip_forward.c   |  3 +++
 net/ipv6/ip6_output.c   |  3 +++
 5 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 299ec4b3141..2220b9e2dab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1702,6 +1702,20 @@ static inline int skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+extern void __skb_warn_lro_forwarding(const struct sk_buff *skb);
+
+static inline bool skb_warn_if_lro(const struct sk_buff *skb)
+{
+	/* LRO sets gso_size but not gso_type, whereas if GSO is really
+	 * wanted then gso_type will be set. */
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) {
+		__skb_warn_lro_forwarding(skb);
+		return true;
+	}
+	return false;
+}
+
 static inline void skb_forward_csum(struct sk_buff *skb)
 {
 	/* Unfortunately we don't support this one.  Any brave souls? */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 512645727f5..bdd9ccea17c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -89,7 +89,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 /* called with rcu_read_lock */
 void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-	if (should_deliver(to, skb)) {
+	if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) {
 		__br_forward(to, skb);
 		return;
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3e18f8525e8..2df012be973 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2583,6 +2583,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
 	return true;
 }
 
+void __skb_warn_lro_forwarding(const struct sk_buff *skb)
+{
+	if (net_ratelimit())
+		pr_warning("%s: received packets cannot be forwarded"
+			   " while LRO is enabled\n", skb->dev->name);
+}
+
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(kfree_skb);
@@ -2616,6 +2623,7 @@ EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
 EXPORT_SYMBOL(skb_append_datato_frags);
+EXPORT_SYMBOL(__skb_warn_lro_forwarding);
 
 EXPORT_SYMBOL_GPL(skb_to_sgvec);
 EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 37d36a3f33c..da14725916d 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -56,6 +56,9 @@ int ip_forward(struct sk_buff *skb)
 	struct rtable *rt;	/* Route we use */
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
 		goto drop;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 40a2813a63d..fd7cd1bfe15 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -407,6 +407,9 @@ int ip6_forward(struct sk_buff *skb)
 	if (ipv6_devconf.forwarding == 0)
 		goto error;
 
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 		goto drop;
-- 
cgit v1.2.3-70-g09d2


From 0f474d9bc59e23f1952e75d2f6edcaeb358eb32d Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 20 Jun 2008 10:34:47 -0700
Subject: sctp: Kill unused variable in sctp_assoc_bh_rcv()

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ff1dc5bd936..35b6a023a6d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1025,7 +1025,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 	struct sctp_chunk *chunk;
 	struct sock *sk;
 	struct sctp_inq *inqueue;
-	struct sctp_outq *outq;
 	int state;
 	sctp_subtype_t subtype;
 	int error = 0;
-- 
cgit v1.2.3-70-g09d2


From cddf63d99d0d145f18b293c3d0de4af7dab2a922 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 May 2008 19:16:06 +0200
Subject: irnet_ppp: BKL pushdown

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 net/irda/irnet/irnet.h     | 1 +
 net/irda/irnet/irnet_ppp.c | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index b001c361ad3..bccf4d0059f 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -241,6 +241,7 @@
 #include <linux/module.h>
 
 #include <linux/kernel.h>
+#include <linux/smp_lock.h>
 #include <linux/skbuff.h>
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index e0eab5927c4..e84a70dd346 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -479,6 +479,7 @@ dev_irnet_open(struct inode *	inode,
   ap = kzalloc(sizeof(*ap), GFP_KERNEL);
   DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
 
+  lock_kernel();
   /* initialize the irnet structure */
   ap->file = file;
 
@@ -500,6 +501,7 @@ dev_irnet_open(struct inode *	inode,
     {
       DERROR(FS_ERROR, "Can't setup IrDA link...\n");
       kfree(ap);
+      unlock_kernel();
       return err;
     }
 
@@ -510,6 +512,7 @@ dev_irnet_open(struct inode *	inode,
   file->private_data = ap;
 
   DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
+  unlock_kernel();
   return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From bedbdd8bada194a690d2901801bf8451965086b3 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Tue, 10 Jun 2008 08:40:35 -0400
Subject: knfsd: Replace lock_kernel with a mutex for nfsd thread
 startup/shutdown locking.

This removes the BKL from the RPC service creation codepath. The BKL
really isn't adequate for this job since some of this info needs
protection across sleeps.

Also, add some comments to try and clarify how the locking should work
and to make it clear that the BKL isn't necessary as long as there is
adequate locking between tasks when touching the svc_serv fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c          | 37 +++++++++++++++++++++++--------------
 fs/nfsd/nfssvc.c          | 45 ++++++++++++++++++++++++++++++++-------------
 include/linux/nfsd/nfsd.h |  1 +
 net/sunrpc/svc.c          | 15 +++++++++------
 4 files changed, 65 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4fee9..049d2a9c771 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -450,22 +450,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	int i;
 	int rv;
 	int len;
-    	int npools = nfsd_nrpools();
+	int npools;
 	int *nthreads;
 
+	mutex_lock(&nfsd_mutex);
+	npools = nfsd_nrpools();
 	if (npools == 0) {
 		/*
 		 * NFS is shut down.  The admin can start it by
 		 * writing to the threads file but NOT the pool_threads
 		 * file, sorry.  Report zero threads.
 		 */
+		mutex_unlock(&nfsd_mutex);
 		strcpy(buf, "0\n");
 		return strlen(buf);
 	}
 
 	nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+	rv = -ENOMEM;
 	if (nthreads == NULL)
-		return -ENOMEM;
+		goto out_free;
 
 	if (size > 0) {
 		for (i = 0; i < npools; i++) {
@@ -496,10 +500,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 		mesg += len;
 	}
 
+	mutex_unlock(&nfsd_mutex);
 	return (mesg-buf);
 
 out_free:
 	kfree(nthreads);
+	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
 
@@ -566,14 +572,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
 	return len;
 }
 
-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
 {
 	if (size == 0) {
 		int len = 0;
-		lock_kernel();
+
 		if (nfsd_serv)
 			len = svc_xprt_names(nfsd_serv, buf, 0);
-		unlock_kernel();
 		return len;
 	}
 	/* Either a single 'fd' number is written, in which
@@ -603,9 +608,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 			/* Decrease the count, but don't shutdown the
 			 * the service
 			 */
-			lock_kernel();
 			nfsd_serv->sv_nrthreads--;
-			unlock_kernel();
 		}
 		return err < 0 ? err : 0;
 	}
@@ -614,10 +617,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		int len = 0;
 		if (!toclose)
 			return -ENOMEM;
-		lock_kernel();
 		if (nfsd_serv)
 			len = svc_sock_names(buf, nfsd_serv, toclose);
-		unlock_kernel();
 		if (len >= 0)
 			lockd_down();
 		kfree(toclose);
@@ -655,7 +656,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
 			if (port == 0)
 				return -EINVAL;
-			lock_kernel();
 			if (nfsd_serv) {
 				xprt = svc_find_xprt(nfsd_serv, transport,
 						     AF_UNSPEC, port);
@@ -666,13 +666,22 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 				} else
 					err = -ENOTCONN;
 			}
-			unlock_kernel();
 			return err < 0 ? err : 0;
 		}
 	}
 	return -EINVAL;
 }
 
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+	ssize_t rv;
+	mutex_lock(&nfsd_mutex);
+	rv = __write_ports(file, buf, size);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
+}
+
+
 int nfsd_max_blksize;
 
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +700,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 		if (bsize > NFSSVC_MAXBLKSIZE)
 			bsize = NFSSVC_MAXBLKSIZE;
 		bsize &= ~(1024-1);
-		lock_kernel();
+		mutex_lock(&nfsd_mutex);
 		if (nfsd_serv && nfsd_serv->sv_nrthreads) {
-			unlock_kernel();
+			mutex_unlock(&nfsd_mutex);
 			return -EBUSY;
 		}
 		nfsd_max_blksize = bsize;
-		unlock_kernel();
+		mutex_unlock(&nfsd_mutex);
 	}
 	return sprintf(buf, "%d\n", nfsd_max_blksize);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f4b13..512bd04c6dd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -53,11 +53,27 @@
 extern struct svc_program	nfsd_program;
 static void			nfsd(struct svc_rqst *rqstp);
 struct timeval			nfssvc_boot;
-       struct svc_serv 		*nfsd_serv;
 static atomic_t			nfsd_busy;
 static unsigned long		nfsd_last_call;
 static DEFINE_SPINLOCK(nfsd_call_lock);
 
+/*
+ * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
+ *
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
+ */
+DEFINE_MUTEX(nfsd_mutex);
+struct svc_serv 		*nfsd_serv;
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static struct svc_version *	nfsd_acl_version[] = {
@@ -190,13 +206,14 @@ void nfsd_reset_versions(void)
 	}
 }
 
+
 int nfsd_create_serv(void)
 {
 	int err = 0;
-	lock_kernel();
+
+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
 		svc_get(nfsd_serv);
-		unlock_kernel();
 		return 0;
 	}
 	if (nfsd_max_blksize == 0) {
@@ -223,7 +240,7 @@ int nfsd_create_serv(void)
 				      nfsd, SIG_NOCLEAN, THIS_MODULE);
 	if (nfsd_serv == NULL)
 		err = -ENOMEM;
-	unlock_kernel();
+
 	do_gettimeofday(&nfssvc_boot);		/* record boot time */
 	return err;
 }
@@ -282,6 +299,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 	int tot = 0;
 	int err = 0;
 
+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
 	if (nfsd_serv == NULL || n <= 0)
 		return 0;
 
@@ -316,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 		nthreads[0] = 1;
 
 	/* apply the new numbers */
-	lock_kernel();
 	svc_get(nfsd_serv);
 	for (i = 0; i < n; i++) {
 		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +343,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 			break;
 	}
 	svc_destroy(nfsd_serv);
-	unlock_kernel();
 
 	return err;
 }
@@ -334,8 +351,8 @@ int
 nfsd_svc(unsigned short port, int nrservs)
 {
 	int	error;
-	
-	lock_kernel();
+
+	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
 	error = -EINVAL;
 	if (nrservs <= 0)
@@ -363,7 +380,7 @@ nfsd_svc(unsigned short port, int nrservs)
  failure:
 	svc_destroy(nfsd_serv);		/* Release server */
  out:
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);
 	return error;
 }
 
@@ -399,7 +416,7 @@ nfsd(struct svc_rqst *rqstp)
 	sigset_t shutdown_mask, allowed_mask;
 
 	/* Lock module and set up kernel thread */
-	lock_kernel();
+	mutex_lock(&nfsd_mutex);
 	daemonize("nfsd");
 
 	/* After daemonize() this kernel thread shares current->fs
@@ -417,11 +434,13 @@ nfsd(struct svc_rqst *rqstp)
 	siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
 	siginitsetinv(&allowed_mask, ALLOWED_SIGS);
 
+
 	nfsdstats.th_cnt++;
 
 	rqstp->rq_task = current;
 
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);
+
 
 	/*
 	 * We want less throttling in balance_dirty_pages() so that nfs to
@@ -477,7 +496,7 @@ nfsd(struct svc_rqst *rqstp)
 	/* Clear signals before calling svc_exit_thread() */
 	flush_signals(current);
 
-	lock_kernel();
+	mutex_lock(&nfsd_mutex);
 
 	nfsdstats.th_cnt --;
 
@@ -486,7 +505,7 @@ out:
 	svc_exit_thread(rqstp);
 
 	/* Release module */
-	unlock_kernel();
+	mutex_unlock(&nfsd_mutex);
 	module_put_and_exit(0);
 }
 
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 41d30c9c9de..88d85b96442 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
+extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
 
 extern struct seq_operations nfs_exports_op;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01c7e311b90..7bffaff2a3a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -461,7 +461,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 EXPORT_SYMBOL(svc_create_pooled);
 
 /*
- * Destroy an RPC service.  Should be called with the BKL held
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
  */
 void
 svc_destroy(struct svc_serv *serv)
@@ -578,9 +579,10 @@ out_enomem:
 EXPORT_SYMBOL(svc_prepare_thread);
 
 /*
- * Create a thread in the given pool.  Caller must hold BKL.
- * On a NUMA or SMP machine, with a multi-pool serv, the thread
- * will be restricted to run on the cpus belonging to the pool.
+ * Create a thread in the given pool.  Caller must hold BKL or another lock to
+ * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
+ * multi-pool serv, the thread will be restricted to run on the cpus belonging
+ * to the pool.
  */
 static int
 __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
@@ -674,7 +676,7 @@ found_pool:
  * of threads the given number.  If `pool' is non-NULL, applies
  * only to threads in that pool, otherwise round-robins between
  * all pools.  Must be called with a svc_get() reference and
- * the BKL held.
+ * the BKL or another lock to protect access to svc_serv fields.
  *
  * Destroying threads relies on the service threads filling in
  * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
@@ -722,7 +724,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 EXPORT_SYMBOL(svc_set_num_threads);
 
 /*
- * Called from a server thread as it's exiting.  Caller must hold BKL.
+ * Called from a server thread as it's exiting. Caller must hold the BKL or
+ * the "service mutex", whichever is appropriate for the service.
  */
 void
 svc_exit_thread(struct svc_rqst *rqstp)
-- 
cgit v1.2.3-70-g09d2


From 9867d76ca16b3f455f9ca83861f4ce5c94a25928 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 10 Jun 2008 08:40:38 -0400
Subject: knfsd: convert knfsd to kthread API

This patch is rather large, but I couldn't figure out a way to break it
up that would remain bisectable. It does several things:

- change svc_thread_fn typedef to better match what kthread_create expects
- change svc_pool_map_set_cpumask to be more kthread friendly. Make it
  take a task arg and and get rid of the "oldmask"
- have svc_set_num_threads call kthread_create directly
- eliminate __svc_create_thread

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfssvc.c           |  45 ++++++++++++--------
 include/linux/sunrpc/svc.h |   2 +-
 net/sunrpc/svc.c           | 100 +++++++++++++++------------------------------
 3 files changed, 64 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6339cb70a08..9e215681371 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
 #include <linux/smp_lock.h>
 #include <linux/freezer.h>
 #include <linux/fs_struct.h>
+#include <linux/kthread.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
@@ -46,7 +47,7 @@
 #define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
 
 extern struct svc_program	nfsd_program;
-static void			nfsd(struct svc_rqst *rqstp);
+static int			nfsd(void *vrqstp);
 struct timeval			nfssvc_boot;
 static atomic_t			nfsd_busy;
 static unsigned long		nfsd_last_call;
@@ -407,18 +408,19 @@ update_thread_usage(int busy_threads)
 /*
  * This is the NFS server kernel thread
  */
-static void
-nfsd(struct svc_rqst *rqstp)
+static int
+nfsd(void *vrqstp)
 {
+	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
 	struct fs_struct *fsp;
-	int		err;
 	sigset_t shutdown_mask, allowed_mask;
+	int err, preverr = 0;
+	unsigned int signo;
 
 	/* Lock module and set up kernel thread */
 	mutex_lock(&nfsd_mutex);
-	daemonize("nfsd");
 
-	/* After daemonize() this kernel thread shares current->fs
+	/* At this point, the thread shares current->fs
 	 * with the init process. We need to create files with a
 	 * umask of 0 instead of init's umask. */
 	fsp = copy_fs_struct(current->fs);
@@ -433,14 +435,18 @@ nfsd(struct svc_rqst *rqstp)
 	siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
 	siginitsetinv(&allowed_mask, ALLOWED_SIGS);
 
+	/*
+	 * thread is spawned with all signals set to SIG_IGN, re-enable
+	 * the ones that matter
+	 */
+	for (signo = 1; signo <= _NSIG; signo++) {
+		if (!sigismember(&shutdown_mask, signo))
+			allow_signal(signo);
+	}
 
 	nfsdstats.th_cnt++;
-
-	rqstp->rq_task = current;
-
 	mutex_unlock(&nfsd_mutex);
 
-
 	/*
 	 * We want less throttling in balance_dirty_pages() so that nfs to
 	 * localhost doesn't cause nfsd to lock up due to all the client's
@@ -462,15 +468,25 @@ nfsd(struct svc_rqst *rqstp)
 		 */
 		while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
 			;
-		if (err < 0)
+		if (err == -EINTR)
 			break;
+		else if (err < 0) {
+			if (err != preverr) {
+				printk(KERN_WARNING "%s: unexpected error "
+					"from svc_recv (%d)\n", __func__, -err);
+				preverr = err;
+			}
+			schedule_timeout_uninterruptible(HZ);
+			continue;
+		}
+
 		update_thread_usage(atomic_read(&nfsd_busy));
 		atomic_inc(&nfsd_busy);
 
 		/* Lock the export hash tables for reading. */
 		exp_readlock();
 
-		/* Process request with signals blocked.  */
+		/* Process request with signals blocked. */
 		sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
 
 		svc_process(rqstp);
@@ -481,14 +497,10 @@ nfsd(struct svc_rqst *rqstp)
 		atomic_dec(&nfsd_busy);
 	}
 
-	if (err != -EINTR)
-		printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
-
 	/* Clear signals before calling svc_exit_thread() */
 	flush_signals(current);
 
 	mutex_lock(&nfsd_mutex);
-
 	nfsdstats.th_cnt --;
 
 out:
@@ -498,6 +510,7 @@ out:
 	/* Release module */
 	mutex_unlock(&nfsd_mutex);
 	module_put_and_exit(0);
+	return 0;
 }
 
 static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4b54c5fdcfd..011d6d8100d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
 /*
  * This is the RPC server thread function prototype
  */
-typedef void		(*svc_thread_fn)(struct svc_rqst *);
+typedef int		(*svc_thread_fn)(void *);
 
 /*
  *
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7bffaff2a3a..03a9f1a9e75 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)
 
 
 /*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
  * will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
  */
-static inline int
-svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+static inline void
+svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
 {
 	struct svc_pool_map *m = &svc_pool_map;
+	unsigned int node = m->pool_to[pidx];
 
 	/*
 	 * The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
 	 */
 	BUG_ON(m->count == 0);
 
-	switch (m->mode)
-	{
-	default:
-		return 0;
+	switch (m->mode) {
 	case SVC_POOL_PERCPU:
 	{
-		unsigned int cpu = m->pool_to[pidx];
-
-		*oldmask = current->cpus_allowed;
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		return 1;
+		set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+		break;
 	}
 	case SVC_POOL_PERNODE:
 	{
-		unsigned int node = m->pool_to[pidx];
 		node_to_cpumask_ptr(nodecpumask, node);
-
-		*oldmask = current->cpus_allowed;
-		set_cpus_allowed_ptr(current, nodecpumask);
-		return 1;
+		set_cpus_allowed_ptr(task, nodecpumask);
+		break;
 	}
 	}
 }
@@ -578,47 +569,6 @@ out_enomem:
 }
 EXPORT_SYMBOL(svc_prepare_thread);
 
-/*
- * Create a thread in the given pool.  Caller must hold BKL or another lock to
- * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
- * multi-pool serv, the thread will be restricted to run on the cpus belonging
- * to the pool.
- */
-static int
-__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
-		    struct svc_pool *pool)
-{
-	struct svc_rqst	*rqstp;
-	int		error = -ENOMEM;
-	int		have_oldmask = 0;
-	cpumask_t	uninitialized_var(oldmask);
-
-	rqstp = svc_prepare_thread(serv, pool);
-	if (IS_ERR(rqstp)) {
-		error = PTR_ERR(rqstp);
-		goto out;
-	}
-
-	if (serv->sv_nrpools > 1)
-		have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
-	error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
-	if (have_oldmask)
-		set_cpus_allowed(current, oldmask);
-
-	if (error < 0)
-		goto out_thread;
-	svc_sock_update_bufs(serv);
-	error = 0;
-out:
-	return error;
-
-out_thread:
-	svc_exit_thread(rqstp);
-	goto out;
-}
-
 /*
  * Choose a pool in which to create a new thread, for svc_set_num_threads
  */
@@ -688,7 +638,9 @@ found_pool:
 int
 svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
-	struct task_struct *victim;
+	struct svc_rqst	*rqstp;
+	struct task_struct *task;
+	struct svc_pool *chosen_pool;
 	int error = 0;
 	unsigned int state = serv->sv_nrthreads-1;
 
@@ -704,18 +656,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	/* create new threads */
 	while (nrservs > 0) {
 		nrservs--;
+		chosen_pool = choose_pool(serv, pool, &state);
+
+		rqstp = svc_prepare_thread(serv, chosen_pool);
+		if (IS_ERR(rqstp)) {
+			error = PTR_ERR(rqstp);
+			break;
+		}
+
 		__module_get(serv->sv_module);
-		error = __svc_create_thread(serv->sv_function, serv,
-					    choose_pool(serv, pool, &state));
-		if (error < 0) {
+		task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+		if (IS_ERR(task)) {
+			error = PTR_ERR(task);
 			module_put(serv->sv_module);
+			svc_exit_thread(rqstp);
 			break;
 		}
+
+		rqstp->rq_task = task;
+		if (serv->sv_nrpools > 1)
+			svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+		svc_sock_update_bufs(serv);
+		wake_up_process(task);
 	}
 	/* destroy old threads */
 	while (nrservs < 0 &&
-	       (victim = choose_victim(serv, pool, &state)) != NULL) {
-		send_sig(serv->sv_kill_signal, victim, 1);
+	       (task = choose_victim(serv, pool, &state)) != NULL) {
+		send_sig(serv->sv_kill_signal, task, 1);
 		nrservs++;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a75c5d01e4235a7dd785548ac756f248b1b40107 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 10 Jun 2008 08:40:39 -0400
Subject: sunrpc: remove sv_kill_signal field from svc_serv struct

Since we no longer make any distinction between shutdown signals with
nfsd, then it becomes easier to just standardize on a particular signal
to use to bring it down (SIGINT, in this case).

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfssvc.c           | 3 +--
 include/linux/sunrpc/svc.h | 5 ++---
 net/sunrpc/svc.c           | 5 ++---
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9e215681371..26c81149d49 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -236,8 +236,7 @@ int nfsd_create_serv(void)
 
 	atomic_set(&nfsd_busy, 0);
 	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-				      nfsd_last_thread, nfsd, SIGINT,
-				      THIS_MODULE);
+				      nfsd_last_thread, nfsd, THIS_MODULE);
 	if (nfsd_serv == NULL)
 		err = -ENOMEM;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 011d6d8100d..dc69068d94c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -80,7 +80,6 @@ struct svc_serv {
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-	int			sv_kill_signal;	/* signal to kill threads */
 };
 
 /*
@@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
 					struct svc_pool *pool);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-			void (*shutdown)(struct svc_serv*),
-			svc_thread_fn, int sig, struct module *);
+			void (*shutdown)(struct svc_serv*), svc_thread_fn,
+			struct module *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void		   svc_destroy(struct svc_serv *);
 int		   svc_process(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 03a9f1a9e75..5a32cb7c4bb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -434,7 +434,7 @@ EXPORT_SYMBOL(svc_create);
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 		void (*shutdown)(struct svc_serv *serv),
-		  svc_thread_fn func, int sig, struct module *mod)
+		  svc_thread_fn func, struct module *mod)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
@@ -443,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 
 	if (serv != NULL) {
 		serv->sv_function = func;
-		serv->sv_kill_signal = sig;
 		serv->sv_module = mod;
 	}
 
@@ -683,7 +682,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	/* destroy old threads */
 	while (nrservs < 0 &&
 	       (task = choose_victim(serv, pool, &state)) != NULL) {
-		send_sig(serv->sv_kill_signal, task, 1);
+		send_sig(SIGINT, task, 1);
 		nrservs++;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From d00953a53e9a2edbe005c1e596f1e96a8a293401 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 30 Apr 2008 12:45:53 -0400
Subject: gss_krb5: create a define for token header size and clean up ptr
 location

cleanup:
Document token header size with a #define instead of open-coding it.

Don't needlessly increment "ptr" past the beginning of the header
which makes the values passed to functions more understandable and
eliminates the need for extra "krb5_hdr" pointer.

Clean up some intersecting  white-space issues flagged by checkpatch.pl.

This leaves the checksum length hard-coded at 8 for DES.  A later patch
cleans that up.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/gss_krb5.h       |  3 +++
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 26 +++++++++---------
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 16 +++++------
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 50 +++++++++++++++++------------------
 4 files changed, 49 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index a10f1fb0bf7..e7bbdba474d 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -51,6 +51,9 @@ struct krb5_ctx {
 
 extern spinlock_t krb5_seq_lock;
 
+/* The length of the Kerberos GSS token header */
+#define GSS_KRB5_TOK_HDR_LEN	(16)
+
 #define KG_TOK_MIC_MSG    0x0101
 #define KG_TOK_WRAP_MSG   0x0201
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 5f1d36dfbcf..b8f42ef7178 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	unsigned char		*ptr, *msg_start;
 	s32			now;
 	u32			seq_send;
 
@@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 
 	now = get_seconds();
 
-	token->len = g_token_size(&ctx->mech_used, 24);
+	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
 
 	ptr = token->data;
-	g_make_token_header(&ctx->mech_used, 24, &ptr);
+	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
 
-	*ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
-	*ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
+	/* ptr now at header described in rfc 1964, section 1.2.1: */
+	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
 
-	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
-	krb5_hdr = ptr - 2;
-	msg_start = krb5_hdr + 24;
+	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
 
-	*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(krb5_hdr + 4, 0xff, 4);
+	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	memset(ptr + 4, 0xff, 4);
 
-	if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
+	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
 			  md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;
 
-	memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = ctx->seq_send++;
 	spin_unlock(&krb5_seq_lock);
 
 	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-			      seq_send, krb5_hdr + 16, krb5_hdr + 8))
+			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+			      ptr + 8))
 		return GSS_S_FAILURE;
 
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index d91a5d00480..066ec73c84d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 					read_token->len))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
-	    (*ptr++ != ( KG_TOK_MIC_MSG    &0xff))   )
+	if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_MIC_MSG & 0xff)))
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	/* XXX sanity-check bodysize?? */
 
-	signalg = ptr[0] + (ptr[1] << 8);
+	signalg = ptr[2] + (ptr[3] << 8);
 	if (signalg != SGN_ALG_DES_MAC_MD5)
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	sealalg = ptr[2] + (ptr[3] << 8);
+	sealalg = ptr[4] + (ptr[5] << 8);
 	if (sealalg != SEAL_ALG_NONE)
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
+	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
@@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
+	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
 		return GSS_S_FAILURE;
 
 	if ((ctx->initiate && direction != 0xff) ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index b00b1b42630..283cb25c623 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -122,7 +122,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	int			blocksize = 0, plainlen;
-	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	unsigned char		*ptr, *msg_start;
 	s32			now;
 	int			headlen;
 	struct page		**tmp_pages;
@@ -149,26 +149,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	buf->len += headlen;
 	BUG_ON((buf->len - offset - headlen) % blocksize);
 
-	g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr);
+	g_make_token_header(&kctx->mech_used,
+				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
 
 
-	*ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
-	*ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
+	/* ptr now at header described in rfc 1964, section 1.2.1: */
+	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
 
-	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
-	krb5_hdr = ptr - 2;
-	msg_start = krb5_hdr + 24;
+	msg_start = ptr + 24;
 
-	*(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(krb5_hdr + 4, 0xff, 4);
-	*(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
+	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	memset(ptr + 4, 0xff, 4);
+	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
 
 	make_confounder(msg_start, blocksize);
 
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum("md5", krb5_hdr, 8, buf,
+	if (make_checksum("md5", ptr, 8, buf,
 				offset + headlen - blocksize, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -176,7 +176,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
 			  md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;
-	memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = kctx->seq_send++;
@@ -185,7 +185,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
 	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
-			       seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
 	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
@@ -219,38 +219,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 					buf->len - offset))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
-	    (*ptr++ !=  (KG_TOK_WRAP_MSG    &0xff))   )
+	if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_WRAP_MSG & 0xff)))
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	/* XXX sanity-check bodysize?? */
 
 	/* get the sign and seal algorithms */
 
-	signalg = ptr[0] + (ptr[1] << 8);
+	signalg = ptr[2] + (ptr[3] << 8);
 	if (signalg != SGN_ALG_DES_MAC_MD5)
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	sealalg = ptr[2] + (ptr[3] << 8);
+	sealalg = ptr[4] + (ptr[5] << 8);
 	if (sealalg != SEAL_ALG_DES)
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	if (gss_decrypt_xdr_buf(kctx->enc, buf,
-			ptr + 22 - (unsigned char *)buf->head[0].iov_base))
+			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr - 2, 8, buf,
-		 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+	if (make_checksum("md5", ptr, 8, buf,
+		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
 			   md5cksum.data, md5cksum.len))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
@@ -262,8 +262,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
-				    &seqnum))
+	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+				    &direction, &seqnum))
 		return GSS_S_BAD_SIG;
 
 	if ((kctx->initiate && direction != 0xff) ||
@@ -274,7 +274,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 	 * better to copy and encrypt at the same time. */
 
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
-	data_start = ptr + 22 + blocksize;
+	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);
-- 
cgit v1.2.3-70-g09d2


From db8add57898751b9c0ff93ead25f20d21da5ddd0 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 30 Apr 2008 12:45:58 -0400
Subject: gss_krb5: move gss_krb5_crypto into the krb5 module

The gss_krb5_crypto.o object belongs in the rpcsec_gss_krb5 module.
Also, there is no need to export symbols from gss_krb5_crypto.c

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/auth_gss/Makefile          |  4 ++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 10 ----------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index f3431a7e33d..4de8bcf26fa 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,12 +5,12 @@
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
 auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
-	gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
+	gss_mech_switch.o svcauth_gss.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o gss_krb5_wrap.o
+	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
 
 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 1d52308ca32..c93fca20455 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -83,8 +83,6 @@ out:
 	return ret;
 }
 
-EXPORT_SYMBOL(krb5_encrypt);
-
 u32
 krb5_decrypt(
      struct crypto_blkcipher *tfm,
@@ -118,8 +116,6 @@ out:
 	return ret;
 }
 
-EXPORT_SYMBOL(krb5_decrypt);
-
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
@@ -161,8 +157,6 @@ out:
 	return err ? GSS_S_FAILURE : 0;
 }
 
-EXPORT_SYMBOL(make_checksum);
-
 struct encryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
 	struct blkcipher_desc desc;
@@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 	return ret;
 }
 
-EXPORT_SYMBOL(gss_encrypt_xdr_buf);
-
 struct decryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
 	struct blkcipher_desc desc;
@@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 
 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
 }
-
-EXPORT_SYMBOL(gss_decrypt_xdr_buf);
-- 
cgit v1.2.3-70-g09d2


From 863a24882ed0a57ff25daaf39885f3a47b706e4b Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 30 Apr 2008 12:46:08 -0400
Subject: gss_krb5: Use random value to initialize confounder

Initialize the value used for the confounder to a random value
rather than starting from zero.
Allow for confounders of length 8 or 16 (which will be needed for AES).

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/auth_gss/gss_krb5_wrap.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 283cb25c623..ae8e69b59c4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -87,8 +87,8 @@ out:
 	return 0;
 }
 
-static inline void
-make_confounder(char *p, int blocksize)
+static void
+make_confounder(char *p, u32 conflen)
 {
 	static u64 i = 0;
 	u64 *q = (u64 *)p;
@@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize)
 	 * uniqueness would mean worrying about atomicity and rollover, and I
 	 * don't care enough. */
 
-	BUG_ON(blocksize != 8);
-	*q = i++;
+	/* initialize to random value */
+	if (i == 0) {
+		i = random32();
+		i = (i << 32) | random32();
+	}
+
+	switch (conflen) {
+	case 16:
+		*q++ = i++;
+		/* fall through */
+	case 8:
+		*q++ = i++;
+		break;
+	default:
+		BUG();
+	}
 }
 
 /* Assumptions: the head and tail of inbuf are ours to play with.
-- 
cgit v1.2.3-70-g09d2


From 88a6f4ad76be425f47df7f892baf913bcd466fb3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 24 Jun 2008 13:30:45 -0700
Subject: netfilter: ip6table_mangle: don't reroute in LOCAL_IN

Rerouting should only happen in LOCAL_OUT, in INPUT its useless
since the packet has already chosen its final destination.

Noticed by Alexey Dobriyan <adobriyan@gmail.com>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6table_mangle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 27a5e8b48d9..f405cea21a8 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_local_hook,
+		.hook		= ip6t_route_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_LOCAL_IN,
-- 
cgit v1.2.3-70-g09d2


From 59d393ad92f719d9ef36b96eae56d4817a7eeb10 Mon Sep 17 00:00:00 2001
From: Tony Vroon <tony@linx.net>
Date: Wed, 11 Jun 2008 16:23:56 -0400
Subject: mac80211: implement EU regulatory domain

Implement missing EU regulatory domain for mac80211. Based on the
information in IEEE 802.11-2007 (specifically pages 1142, 1143 & 1148)
and ETSI 301 893 (V1.4.1).
With thanks to Johannes Berg.

Signed-off-by: Tony Vroon <tony@linx.net>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 185488da246..855bff4b325 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = {
 				     IEEE80211_CHAN_RADAR),
 };
 
+static const struct ieee80211_channel_range ieee80211_EU_channels[] = {
+	/* IEEE 802.11b/g, channels 1..13 */
+	RANGE_PWR(2412, 2472, 20, 6, 0),
+	/* IEEE 802.11a, channel 36*/
+	RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channel 40*/
+	RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channel 44*/
+	RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
+	/* IEEE 802.11a, channels 48..64 */
+	RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS |
+				     IEEE80211_CHAN_RADAR),
+	/* IEEE 802.11a, channels 100..140 */
+	RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS |
+				     IEEE80211_CHAN_RADAR),
+};
+
 #define REGDOM(_code)							\
 	{								\
 		.code = __stringify(_code),				\
@@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = {
 static const struct ieee80211_regdomain ieee80211_regdoms[] = {
 	REGDOM(US),
 	REGDOM(JP),
+	REGDOM(EU),
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From 8691e5a8f691cc2a4fda0651e8d307aaba0e7d68 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 6 Jun 2008 11:18:06 +0200
Subject: smp_call_function: get rid of the unused nonatomic/retry argument

It's never used and the comments refer to nonatomic and retry
interchangably. So get rid of it.

Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/alpha/kernel/core_marvel.c      |  2 +-
 arch/alpha/kernel/smp.c              |  6 +++---
 arch/alpha/oprofile/common.c         |  6 +++---
 arch/arm/oprofile/op_model_mpcore.c  |  2 +-
 arch/arm/vfp/vfpmodule.c             |  2 +-
 arch/cris/arch-v32/kernel/smp.c      |  5 ++---
 arch/ia64/kernel/mca.c               |  2 +-
 arch/ia64/kernel/palinfo.c           |  2 +-
 arch/ia64/kernel/perfmon.c           |  2 +-
 arch/ia64/kernel/process.c           |  2 +-
 arch/ia64/kernel/smpboot.c           |  2 +-
 arch/ia64/kernel/uncached.c          |  5 ++---
 arch/ia64/sn/kernel/sn2/sn_hwperf.c  |  2 +-
 arch/m32r/kernel/smp.c               |  4 ++--
 arch/mips/kernel/smp.c               |  4 ++--
 arch/mips/mm/c-r4k.c                 | 18 +++++++++---------
 arch/mips/pmc-sierra/yosemite/prom.c |  2 +-
 arch/mips/sibyte/cfe/setup.c         |  2 +-
 arch/mips/sibyte/sb1250/prom.c       |  2 +-
 arch/powerpc/kernel/smp.c            |  2 +-
 arch/s390/appldata/appldata_base.c   |  4 ++--
 arch/s390/kernel/smp.c               | 16 ++++++----------
 arch/s390/kernel/time.c              |  4 ++--
 arch/sh/kernel/smp.c                 | 10 +++++-----
 arch/sparc64/kernel/smp.c            | 12 ++++--------
 arch/um/kernel/smp.c                 |  3 +--
 arch/x86/kernel/cpu/mtrr/main.c      |  4 ++--
 arch/x86/kernel/cpuid.c              |  2 +-
 arch/x86/kernel/ldt.c                |  2 +-
 arch/x86/kernel/nmi_32.c             |  2 +-
 arch/x86/kernel/nmi_64.c             |  2 +-
 arch/x86/kernel/smp.c                |  2 +-
 arch/x86/kernel/vsyscall_64.c        |  2 +-
 arch/x86/kvm/vmx.c                   |  2 +-
 arch/x86/kvm/x86.c                   |  2 +-
 arch/x86/lib/msr-on-cpu.c            |  8 ++++----
 arch/x86/mach-voyager/voyager_smp.c  |  2 +-
 arch/x86/xen/smp.c                   |  2 +-
 drivers/acpi/processor_idle.c        |  2 +-
 drivers/cpuidle/cpuidle.c            |  2 +-
 include/asm-alpha/smp.h              |  2 +-
 include/asm-sparc/smp.h              |  2 +-
 include/linux/smp.h                  |  8 ++++----
 kernel/smp.c                         |  6 ++----
 kernel/softirq.c                     |  2 +-
 kernel/time/tick-broadcast.c         |  2 +-
 net/core/flow.c                      |  2 +-
 net/iucv/iucv.c                      | 14 +++++++-------
 virt/kvm/kvm_main.c                  |  6 +++---
 49 files changed, 95 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index ced4aae8b80..04dcc5e5d4c 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -662,7 +662,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 		if (smp_processor_id() != boot_cpuid)
 			smp_call_function_single(boot_cpuid,
 						 __marvel_access_rtc,
-						 &rtc_access, 1, 1);
+						 &rtc_access, 1);
 		else
 			__marvel_access_rtc(&rtc_access);
 #else
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 95c905be915..44114c8dbb2 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -710,7 +710,7 @@ flush_tlb_mm(struct mm_struct *mm)
 		}
 	}
 
-	if (smp_call_function(ipi_flush_tlb_mm, mm, 1, 1)) {
+	if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) {
 		printk(KERN_CRIT "flush_tlb_mm: timed out\n");
 	}
 
@@ -763,7 +763,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 	data.mm = mm;
 	data.addr = addr;
 
-	if (smp_call_function(ipi_flush_tlb_page, &data, 1, 1)) {
+	if (smp_call_function(ipi_flush_tlb_page, &data, 1)) {
 		printk(KERN_CRIT "flush_tlb_page: timed out\n");
 	}
 
@@ -815,7 +815,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 		}
 	}
 
-	if (smp_call_function(ipi_flush_icache_page, mm, 1, 1)) {
+	if (smp_call_function(ipi_flush_icache_page, mm, 1)) {
 		printk(KERN_CRIT "flush_icache_page: timed out\n");
 	}
 
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index 9fc0eeb4f0a..7c3d5ec6ec6 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -65,7 +65,7 @@ op_axp_setup(void)
 	model->reg_setup(&reg, ctr, &sys);
 
 	/* Configure the registers on all cpus.  */
-	(void)smp_call_function(model->cpu_setup, &reg, 0, 1);
+	(void)smp_call_function(model->cpu_setup, &reg, 1);
 	model->cpu_setup(&reg);
 	return 0;
 }
@@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy)
 static int
 op_axp_start(void)
 {
-	(void)smp_call_function(op_axp_cpu_start, NULL, 0, 1);
+	(void)smp_call_function(op_axp_cpu_start, NULL, 1);
 	op_axp_cpu_start(NULL);
 	return 0;
 }
@@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy)
 static void
 op_axp_stop(void)
 {
-	(void)smp_call_function(op_axp_cpu_stop, NULL, 0, 1);
+	(void)smp_call_function(op_axp_cpu_stop, NULL, 1);
 	op_axp_cpu_stop(NULL);
 }
 
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 74fae604565..4458705021e 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -201,7 +201,7 @@ static int em_call_function(int (*fn)(void))
 	data.ret = 0;
 
 	preempt_disable();
-	smp_call_function(em_func, &data, 1, 1);
+	smp_call_function(em_func, &data, 1);
 	em_func(&data);
 	preempt_enable();
 
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 32455c633f1..c0d2c9bb952 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -352,7 +352,7 @@ static int __init vfp_init(void)
 	else if (vfpsid & FPSID_NODOUBLE) {
 		printk("no double precision support\n");
 	} else {
-		smp_call_function(vfp_enable, NULL, 1, 1);
+		smp_call_function(vfp_enable, NULL, 1);
 
 		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;  /* Extract the architecture version */
 		printk("implementor %02x architecture %d part %02x variant %x rev %x\n",
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index a9c3334e46c..952a24b2f5a 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -194,7 +194,7 @@ void stop_this_cpu(void* dummy)
 /* Other calls */
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 1, 0);
+	smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 int setup_profiling_timer(unsigned int multiplier)
@@ -316,8 +316,7 @@ int send_ipi(int vector, int wait, cpumask_t cpu_mask)
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-int smp_call_function(void (*func)(void *info), void *info,
-		      int nonatomic, int wait)
+int smp_call_function(void (*func)(void *info), void *info, int wait)
 {
 	cpumask_t cpu_mask = CPU_MASK_ALL;
 	struct call_data_struct data;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 705176b434b..9cd818cc700 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1881,7 +1881,7 @@ static int __cpuinit mca_cpu_callback(struct notifier_block *nfb,
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust,
-					 NULL, 1, 0);
+					 NULL, 0);
 		break;
 	}
 	return NOTIFY_OK;
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 9dc00f7fe10..e5c57f413ca 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -921,7 +921,7 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 
 
 	/* will send IPI to other CPU and wait for completion of remote call */
-	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) {
 		printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
 		       "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
 		return 0;
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 7714a97b010..9baa48255c1 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1820,7 +1820,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
 	int ret;
 
 	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
-	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
+	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1);
 	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index a3a34b4eb03..fabaf08d9a6 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -286,7 +286,7 @@ void cpu_idle_wait(void)
 {
 	smp_mb();
 	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
+	smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index eaa1b6795a1..9d1d429c6c5 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -317,7 +317,7 @@ ia64_sync_itc (unsigned int master)
 
 	go[MASTER] = 1;
 
-	if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
+	if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
 		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
 		return;
 	}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index e77995a6e3e..8eff8c1d40a 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -123,8 +123,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
 	if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
 		atomic_set(&uc_pool->status, 0);
-		status = smp_call_function(uncached_ipi_visibility, uc_pool,
-					   0, 1);
+		status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
 		if (status || atomic_read(&uc_pool->status))
 			goto failed;
 	} else if (status != PAL_VISIBILITY_OK)
@@ -146,7 +145,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 	if (status != PAL_STATUS_SUCCESS)
 		goto failed;
 	atomic_set(&uc_pool->status, 0);
-	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 0, 1);
+	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
 	if (status || atomic_read(&uc_pool->status))
 		goto failed;
 
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 8cc0c4753d8..636588e7e06 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -629,7 +629,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
 		if (use_ipi) {
 			/* use an interprocessor interrupt to call SAL */
 			smp_call_function_single(cpu, sn_hwperf_call_sal,
-				op_info, 1, 1);
+				op_info, 1);
 		}
 		else {
 			/* migrate the task before calling SAL */ 
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 74eb7bcd5a4..7577f971ea4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -212,7 +212,7 @@ void smp_flush_tlb_all(void)
 	local_irq_save(flags);
 	__flush_tlb_all();
 	local_irq_restore(flags);
-	smp_call_function(flush_tlb_all_ipi, NULL, 1, 1);
+	smp_call_function(flush_tlb_all_ipi, NULL, 1);
 	preempt_enable();
 }
 
@@ -505,7 +505,7 @@ void smp_invalidate_interrupt(void)
  *==========================================================================*/
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 1, 0);
+	smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 /*==========================================================================*
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index c75b26cb61d..7a9ae830be8 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -167,7 +167,7 @@ static void stop_this_cpu(void *dummy)
 
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 1, 0);
+	smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -266,7 +266,7 @@ static void flush_tlb_mm_ipi(void *mm)
 static inline void smp_on_other_tlbs(void (*func) (void *info), void *info)
 {
 #ifndef CONFIG_MIPS_MT_SMTC
-	smp_call_function(func, info, 1, 1);
+	smp_call_function(func, info, 1);
 #endif
 }
 
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 27096751ddc..71df3390c07 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -43,12 +43,12 @@
  *    primary cache.
  */
 static inline void r4k_on_each_cpu(void (*func) (void *info), void *info,
-                                   int retry, int wait)
+                                   int wait)
 {
 	preempt_disable();
 
 #if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
-	smp_call_function(func, info, retry, wait);
+	smp_call_function(func, info, wait);
 #endif
 	func(info);
 	preempt_enable();
@@ -350,7 +350,7 @@ static inline void local_r4k___flush_cache_all(void * args)
 
 static void r4k___flush_cache_all(void)
 {
-	r4k_on_each_cpu(local_r4k___flush_cache_all, NULL, 1, 1);
+	r4k_on_each_cpu(local_r4k___flush_cache_all, NULL, 1);
 }
 
 static inline int has_valid_asid(const struct mm_struct *mm)
@@ -397,7 +397,7 @@ static void r4k_flush_cache_range(struct vm_area_struct *vma,
 	int exec = vma->vm_flags & VM_EXEC;
 
 	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
-		r4k_on_each_cpu(local_r4k_flush_cache_range, vma, 1, 1);
+		r4k_on_each_cpu(local_r4k_flush_cache_range, vma, 1);
 }
 
 static inline void local_r4k_flush_cache_mm(void * args)
@@ -429,7 +429,7 @@ static void r4k_flush_cache_mm(struct mm_struct *mm)
 	if (!cpu_has_dc_aliases)
 		return;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_mm, mm, 1, 1);
+	r4k_on_each_cpu(local_r4k_flush_cache_mm, mm, 1);
 }
 
 struct flush_cache_page_args {
@@ -521,7 +521,7 @@ static void r4k_flush_cache_page(struct vm_area_struct *vma,
 	args.addr = addr;
 	args.pfn = pfn;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_page, &args, 1, 1);
+	r4k_on_each_cpu(local_r4k_flush_cache_page, &args, 1);
 }
 
 static inline void local_r4k_flush_data_cache_page(void * addr)
@@ -535,7 +535,7 @@ static void r4k_flush_data_cache_page(unsigned long addr)
 		local_r4k_flush_data_cache_page((void *)addr);
 	else
 		r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr,
-			        1, 1);
+			        1);
 }
 
 struct flush_icache_range_args {
@@ -571,7 +571,7 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end)
 	args.start = start;
 	args.end = end;
 
-	r4k_on_each_cpu(local_r4k_flush_icache_range, &args, 1, 1);
+	r4k_on_each_cpu(local_r4k_flush_icache_range, &args, 1);
 	instruction_hazard();
 }
 
@@ -672,7 +672,7 @@ static void local_r4k_flush_cache_sigtramp(void * arg)
 
 static void r4k_flush_cache_sigtramp(unsigned long addr)
 {
-	r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr, 1, 1);
+	r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr, 1);
 }
 
 static void r4k_flush_icache_all(void)
diff --git a/arch/mips/pmc-sierra/yosemite/prom.c b/arch/mips/pmc-sierra/yosemite/prom.c
index 35dc435846a..cf4c868715a 100644
--- a/arch/mips/pmc-sierra/yosemite/prom.c
+++ b/arch/mips/pmc-sierra/yosemite/prom.c
@@ -64,7 +64,7 @@ static void prom_exit(void)
 #ifdef CONFIG_SMP
 	if (smp_processor_id())
 		/* CPU 1 */
-		smp_call_function(prom_cpu0_exit, NULL, 1, 1);
+		smp_call_function(prom_cpu0_exit, NULL, 1);
 #endif
 	prom_cpu0_exit(NULL);
 }
diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c
index 33fce826f8b..fd9604d5555 100644
--- a/arch/mips/sibyte/cfe/setup.c
+++ b/arch/mips/sibyte/cfe/setup.c
@@ -74,7 +74,7 @@ static void __noreturn cfe_linux_exit(void *arg)
 		if (!reboot_smp) {
 			/* Get CPU 0 to do the cfe_exit */
 			reboot_smp = 1;
-			smp_call_function(cfe_linux_exit, arg, 1, 0);
+			smp_call_function(cfe_linux_exit, arg, 0);
 		}
 	} else {
 		printk("Passing control back to CFE...\n");
diff --git a/arch/mips/sibyte/sb1250/prom.c b/arch/mips/sibyte/sb1250/prom.c
index cf8f6b3de86..65b1af66b67 100644
--- a/arch/mips/sibyte/sb1250/prom.c
+++ b/arch/mips/sibyte/sb1250/prom.c
@@ -66,7 +66,7 @@ static void prom_linux_exit(void)
 {
 #ifdef CONFIG_SMP
 	if (smp_processor_id()) {
-		smp_call_function(prom_cpu0_exit, NULL, 1, 1);
+		smp_call_function(prom_cpu0_exit, NULL, 1);
 	}
 #endif
 	while(1);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 37a5ab410dc..5191b46a611 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -168,7 +168,7 @@ static void stop_this_cpu(void *dummy)
 
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, NULL, 0, 0);
+	smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 extern struct gettimeofday_struct do_gtod;
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ad40729bec3..837a3b3e775 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -209,7 +209,7 @@ __appldata_vtimer_setup(int cmd)
 			per_cpu(appldata_timer, i).expires = per_cpu_interval;
 			smp_call_function_single(i, add_virt_timer_periodic,
 						 &per_cpu(appldata_timer, i),
-						 0, 1);
+						 1);
 		}
 		appldata_timer_active = 1;
 		P_INFO("Monitoring timer started.\n");
@@ -236,7 +236,7 @@ __appldata_vtimer_setup(int cmd)
 			args.timer = &per_cpu(appldata_timer, i);
 			args.expires = per_cpu_interval;
 			smp_call_function_single(i, __appldata_mod_vtimer_wrap,
-						 &args, 0, 1);
+						 &args, 1);
 		}
 	}
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5d4fa4b1c74..276b105fb2a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -109,7 +109,7 @@ static void do_call_function(void)
 }
 
 static void __smp_call_function_map(void (*func) (void *info), void *info,
-				    int nonatomic, int wait, cpumask_t map)
+				    int wait, cpumask_t map)
 {
 	struct call_data_struct data;
 	int cpu, local = 0;
@@ -162,7 +162,6 @@ out:
  * smp_call_function:
  * @func: the function to run; this must be fast and non-blocking
  * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
  * @wait: if true, wait (atomically) until function has completed on other CPUs
  *
  * Run a function on all other CPUs.
@@ -170,15 +169,14 @@ out:
  * You must not call this function with disabled interrupts, from a
  * hardware interrupt handler or from a bottom half.
  */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
+int smp_call_function(void (*func) (void *info), void *info, int wait)
 {
 	cpumask_t map;
 
 	spin_lock(&call_lock);
 	map = cpu_online_map;
 	cpu_clear(smp_processor_id(), map);
-	__smp_call_function_map(func, info, nonatomic, wait, map);
+	__smp_call_function_map(func, info, wait, map);
 	spin_unlock(&call_lock);
 	return 0;
 }
@@ -189,7 +187,6 @@ EXPORT_SYMBOL(smp_call_function);
  * @cpu: the CPU where func should run
  * @func: the function to run; this must be fast and non-blocking
  * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
  * @wait: if true, wait (atomically) until function has completed on other CPUs
  *
  * Run a function on one processor.
@@ -198,11 +195,10 @@ EXPORT_SYMBOL(smp_call_function);
  * hardware interrupt handler or from a bottom half.
  */
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
+			     int wait)
 {
 	spin_lock(&call_lock);
-	__smp_call_function_map(func, info, nonatomic, wait,
-				cpumask_of_cpu(cpu));
+	__smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
 	spin_unlock(&call_lock);
 	return 0;
 }
@@ -228,7 +224,7 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
 {
 	spin_lock(&call_lock);
 	cpu_clear(smp_processor_id(), mask);
-	__smp_call_function_map(func, info, 0, wait, mask);
+	__smp_call_function_map(func, info, wait, mask);
 	spin_unlock(&call_lock);
 	return 0;
 }
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 7aec676fefd..bf7bf2c2236 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -690,7 +690,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	 */
 	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
+	smp_call_function(etr_sync_cpu_start, NULL, 0);
 	local_irq_disable();
 	etr_enable_sync_clock();
 
@@ -729,7 +729,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
-	smp_call_function(etr_sync_cpu_end,NULL,0,0);
+	smp_call_function(etr_sync_cpu_end,NULL,0);
 	preempt_enable();
 	return rc;
 }
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 2ed8dceb297..71781ba2675 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -168,7 +168,7 @@ static void stop_this_cpu(void *unused)
 
 void smp_send_stop(void)
 {
-	smp_call_function(stop_this_cpu, 0, 1, 0);
+	smp_call_function(stop_this_cpu, 0, 0);
 }
 
 void arch_send_call_function_ipi(cpumask_t mask)
@@ -223,7 +223,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 	preempt_disable();
 
 	if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
-		smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1);
+		smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1);
 	} else {
 		int i;
 		for (i = 0; i < num_online_cpus(); i++)
@@ -260,7 +260,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
 		fd.vma = vma;
 		fd.addr1 = start;
 		fd.addr2 = end;
-		smp_call_function(flush_tlb_range_ipi, (void *)&fd, 1, 1);
+		smp_call_function(flush_tlb_range_ipi, (void *)&fd, 1);
 	} else {
 		int i;
 		for (i = 0; i < num_online_cpus(); i++)
@@ -303,7 +303,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 
 		fd.vma = vma;
 		fd.addr1 = page;
-		smp_call_function(flush_tlb_page_ipi, (void *)&fd, 1, 1);
+		smp_call_function(flush_tlb_page_ipi, (void *)&fd, 1);
 	} else {
 		int i;
 		for (i = 0; i < num_online_cpus(); i++)
@@ -327,6 +327,6 @@ void flush_tlb_one(unsigned long asid, unsigned long vaddr)
 	fd.addr1 = asid;
 	fd.addr2 = vaddr;
 
-	smp_call_function(flush_tlb_one_ipi, (void *)&fd, 1, 1);
+	smp_call_function(flush_tlb_one_ipi, (void *)&fd, 1);
 	local_flush_tlb_one(asid, vaddr);
 }
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b82d017a174..c099d96f123 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -807,7 +807,6 @@ extern unsigned long xcall_call_function;
  * smp_call_function(): Run a function on all other CPUs.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
  * Returns 0 on success, else a negative status code. Does not return until
@@ -817,8 +816,7 @@ extern unsigned long xcall_call_function;
  * hardware interrupt handler or from a bottom half handler.
  */
 static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info,
-					  int nonatomic, int wait,
-					  cpumask_t mask)
+					  int wait, cpumask_t mask)
 {
 	struct call_data_struct data;
 	int cpus;
@@ -853,11 +851,9 @@ out_unlock:
 	return 0;
 }
 
-int smp_call_function(void (*func)(void *info), void *info,
-		      int nonatomic, int wait)
+int smp_call_function(void (*func)(void *info), void *info, int wait)
 {
-	return sparc64_smp_call_function_mask(func, info, nonatomic, wait,
-						cpu_online_map);
+	return sparc64_smp_call_function_mask(func, info, wait, cpu_online_map);
 }
 
 void smp_call_function_client(int irq, struct pt_regs *regs)
@@ -894,7 +890,7 @@ static void tsb_sync(void *info)
 
 void smp_tsb_sync(struct mm_struct *mm)
 {
-	sparc64_smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+	sparc64_smp_call_function_mask(tsb_sync, mm, 1, mm->cpu_vm_mask);
 }
 
 extern unsigned long xcall_flush_tlb_mm;
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index e1062ec36d4..be2d50c3aa9 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -214,8 +214,7 @@ void smp_call_function_slave(int cpu)
 	atomic_inc(&scf_finished);
 }
 
-int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic,
-		      int wait)
+int smp_call_function(void (*_func)(void *info), void *_info, int wait)
 {
 	int cpus = num_online_cpus() - 1;
 	int i;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d932..290652cefdd 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -222,7 +222,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
 	atomic_set(&data.gate,0);
 
 	/*  Start the ball rolling on other CPUs  */
-	if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+	if (smp_call_function(ipi_handler, &data, 0) != 0)
 		panic("mtrr: timed out waiting for other CPUs\n");
 
 	local_irq_save(flags);
@@ -822,7 +822,7 @@ void mtrr_ap_init(void)
  */
 void mtrr_save_state(void)
 {
-	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
 }
 
 static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a6224..336dd43c915 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -95,7 +95,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 	for (; count; count -= 16) {
 		cmd.eax = pos;
 		cmd.ecx = pos >> 32;
-		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
 		if (copy_to_user(tmp, &cmd, 16))
 			return -EFAULT;
 		tmp += 16;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 0224c3637c7..cb0a6398c64 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 		load_LDT(pc);
 		mask = cpumask_of_cpu(smp_processor_id());
 		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-			smp_call_function(flush_ldt, NULL, 1, 1);
+			smp_call_function(flush_ldt, NULL, 1);
 		preempt_enable();
 #else
 		load_LDT(pc);
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 84160f74eeb..5562dab0bd2 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -87,7 +87,7 @@ int __init check_nmi_watchdog(void)
 
 #ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
 #endif
 
 	for_each_possible_cpu(cpu)
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 5a29ded994f..2f1e4f503c9 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -96,7 +96,7 @@ int __init check_nmi_watchdog(void)
 
 #ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
 #endif
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 575aa3d7248..56546e8a13a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -164,7 +164,7 @@ static void native_smp_send_stop(void)
 	if (reboot_force)
 		return;
 
-	smp_call_function(stop_this_cpu, NULL, 0, 0);
+	smp_call_function(stop_this_cpu, NULL, 0);
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d56..0a03d57f9b3 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -278,7 +278,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 {
 	long cpu = (long)arg;
 	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
 	return NOTIFY_DONE;
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 540e9517907..5534fe59b5f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -335,7 +335,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
 {
 	if (vmx->vcpu.cpu == -1)
 		return;
-	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
+	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
 	vmx->launched = 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63a77caa59f..0faa2546b1c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4044,6 +4044,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	 * So need not to call smp_call_function_single() in that case.
 	 */
 	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();
 }
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 57d043fa893..d5a2b39f882 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
 
 	rv.msr_no = msr_no;
 	if (safe) {
-		smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
 		err = rv.err;
 	} else {
-		smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	}
 	*l = rv.l;
 	*h = rv.h;
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
 	rv.l = l;
 	rv.h = h;
 	if (safe) {
-		smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
 		err = rv.err;
 	} else {
-		smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
 	}
 
 	return err;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index cb34407a993..04f596eab74 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1113,7 +1113,7 @@ int safe_smp_processor_id(void)
 /* broadcast a halt to all other CPUs */
 static void voyager_smp_send_stop(void)
 {
-	smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
+	smp_call_function(smp_stop_cpu_function, NULL, 1);
 }
 
 /* this function is triggered in time.c when a clock tick fires
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b3786e749b8..a1651d029ea 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -331,7 +331,7 @@ static void stop_self(void *v)
 
 void xen_smp_send_stop(void)
 {
-	smp_call_function(stop_self, NULL, 0, 0);
+	smp_call_function(stop_self, NULL, 0);
 }
 
 void xen_smp_send_reschedule(int cpu)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 556ee158519..4976e5db2b3 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1339,7 +1339,7 @@ static void smp_callback(void *v)
 static int acpi_processor_latency_notify(struct notifier_block *b,
 		unsigned long l, void *v)
 {
-	smp_call_function(smp_callback, NULL, 0, 1);
+	smp_call_function(smp_callback, NULL, 1);
 	return NOTIFY_OK;
 }
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 23554b676d6..5405769020a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -340,7 +340,7 @@ static void smp_callback(void *v)
 static int cpuidle_latency_notify(struct notifier_block *b,
 		unsigned long l, void *v)
 {
-	smp_call_function(smp_callback, NULL, 0, 1);
+	smp_call_function(smp_callback, NULL, 1);
 	return NOTIFY_OK;
 }
 
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h
index 2f60a362d75..544c69af816 100644
--- a/include/asm-alpha/smp.h
+++ b/include/asm-alpha/smp.h
@@ -53,7 +53,7 @@ extern void arch_send_call_function_ipi(cpumask_t mask);
 #else /* CONFIG_SMP */
 
 #define hard_smp_processor_id()		0
-#define smp_call_function_on_cpu(func,info,retry,wait,cpu)    ({ 0; })
+#define smp_call_function_on_cpu(func,info,wait,cpu)    ({ 0; })
 
 #endif /* CONFIG_SMP */
 
diff --git a/include/asm-sparc/smp.h b/include/asm-sparc/smp.h
index e6d56159972..b61e74bea06 100644
--- a/include/asm-sparc/smp.h
+++ b/include/asm-sparc/smp.h
@@ -72,7 +72,7 @@ static inline void xc5(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3, unsigned long arg4, unsigned long arg5)
 { smp_cross_call(func, arg1, arg2, arg3, arg4, arg5); }
 
-static inline int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait)
+static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
 {
 	xc1((smpfunc_t)func, (unsigned long)info);
 	return 0;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index eac3e062250..338cad1b954 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -62,11 +62,11 @@ extern void smp_cpus_done(unsigned int max_cpus);
 /*
  * Call a function on all other processors
  */
-int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
+int smp_call_function(void(*func)(void *info), void *info, int wait);
 int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
 				int wait);
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
-				int retry, int wait);
+				int wait);
 void __smp_call_function_single(int cpuid, struct call_single_data *data);
 
 /*
@@ -119,7 +119,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
 {
 	return 0;
 }
-#define smp_call_function(func, info, retry, wait) \
+#define smp_call_function(func, info, wait) \
 			(up_smp_call_function(func, info))
 #define on_each_cpu(func,info,retry,wait)	\
 	({					\
@@ -131,7 +131,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
-#define smp_call_function_single(cpuid, func, info, retry, wait) \
+#define smp_call_function_single(cpuid, func, info, wait) \
 ({ \
 	WARN_ON(cpuid != 0);	\
 	local_irq_disable();	\
diff --git a/kernel/smp.c b/kernel/smp.c
index f77b75c027a..7e0432a4a0e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -195,7 +195,6 @@ void generic_smp_call_function_single_interrupt(void)
  * smp_call_function_single - Run a function on a specific CPU
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
- * @retry: Unused
  * @wait: If true, wait until function has completed on other CPUs.
  *
  * Returns 0 on success, else a negative status code. Note that @wait
@@ -203,7 +202,7 @@ void generic_smp_call_function_single_interrupt(void)
  * we fall back to on-stack allocation.
  */
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int retry, int wait)
+			     int wait)
 {
 	struct call_single_data d;
 	unsigned long flags;
@@ -339,7 +338,6 @@ EXPORT_SYMBOL(smp_call_function_mask);
  * smp_call_function(): Run a function on all other CPUs.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
- * @natomic: Unused
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
  * Returns 0 on success, else a negative status code.
@@ -351,7 +349,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-int smp_call_function(void (*func)(void *), void *info, int natomic, int wait)
+int smp_call_function(void (*func)(void *), void *info, int wait)
 {
 	int ret;
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 36e06174004..d73afb4764e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -679,7 +679,7 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
 	int ret = 0;
 
 	preempt_disable();
-	ret = smp_call_function(func, info, retry, wait);
+	ret = smp_call_function(func, info, wait);
 	local_irq_disable();
 	func(info);
 	local_irq_enable();
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 57a1f02e5ec..75e718539dc 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -266,7 +266,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 		       "offline CPU #%d\n", *oncpu);
 	else
 		smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
-					 &reason, 1, 1);
+					 &reason, 1);
 }
 
 /*
diff --git a/net/core/flow.c b/net/core/flow.c
index 19991175fde..5cf81052d04 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -298,7 +298,7 @@ void flow_cache_flush(void)
 	init_completion(&info.completion);
 
 	local_bh_disable();
-	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	smp_call_function(flow_cache_flush_per_cpu, &info, 0);
 	flow_cache_flush_tasklet((unsigned long)&info);
 	local_bh_enable();
 
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 91897076213..94d5a45c3a5 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -480,7 +480,7 @@ static void iucv_setmask_mp(void)
 		if (cpu_isset(cpu, iucv_buffer_cpumask) &&
 		    !cpu_isset(cpu, iucv_irq_cpumask))
 			smp_call_function_single(cpu, iucv_allow_cpu,
-						 NULL, 0, 1);
+						 NULL, 1);
 	preempt_enable();
 }
 
@@ -498,7 +498,7 @@ static void iucv_setmask_up(void)
 	cpumask = iucv_irq_cpumask;
 	cpu_clear(first_cpu(iucv_irq_cpumask), cpumask);
 	for_each_cpu_mask(cpu, cpumask)
-		smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1);
+		smp_call_function_single(cpu, iucv_block_cpu, NULL, 1);
 }
 
 /**
@@ -523,7 +523,7 @@ static int iucv_enable(void)
 	rc = -EIO;
 	preempt_disable();
 	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
+		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
@@ -580,7 +580,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 	case CPU_ONLINE_FROZEN:
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
+		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
@@ -589,10 +589,10 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 		if (cpus_empty(cpumask))
 			/* Can't offline last IUCV enabled cpu. */
 			return NOTIFY_BAD;
-		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1);
+		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
 		if (cpus_empty(iucv_irq_cpumask))
 			smp_call_function_single(first_cpu(iucv_buffer_cpumask),
-						 iucv_allow_cpu, NULL, 0, 1);
+						 iucv_allow_cpu, NULL, 1);
 		break;
 	}
 	return NOTIFY_OK;
@@ -652,7 +652,7 @@ static void iucv_cleanup_queue(void)
 	 * pending interrupts force them to the work queue by calling
 	 * an empty function on all cpus.
 	 */
-	smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+	smp_call_function(__iucv_cleanup_queue, NULL, 1);
 	spin_lock_irq(&iucv_queue_lock);
 	list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
 		/* Remove stale work items from the task queue. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2d29e260da3..ea1f595f8a8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1266,12 +1266,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 	case CPU_UP_CANCELED:
 		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 		       cpu);
-		smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
+		smp_call_function_single(cpu, hardware_disable, NULL, 1);
 		break;
 	case CPU_ONLINE:
 		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 		       cpu);
-		smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
+		smp_call_function_single(cpu, hardware_enable, NULL, 1);
 		break;
 	}
 	return NOTIFY_OK;
@@ -1474,7 +1474,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu,
 				kvm_arch_check_processor_compat,
-				&r, 0, 1);
+				&r, 1);
 		if (r < 0)
 			goto out_free_1;
 	}
-- 
cgit v1.2.3-70-g09d2


From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 9 May 2008 09:39:44 +0200
Subject: on_each_cpu(): kill unused 'retry' parameter

It's not even passed on to smp_call_function() anymore, since that
was removed. So kill it.

Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/alpha/kernel/process.c            |  2 +-
 arch/alpha/kernel/smp.c                |  4 ++--
 arch/arm/kernel/smp.c                  |  6 +++---
 arch/ia64/kernel/mca.c                 |  4 ++--
 arch/ia64/kernel/perfmon.c             |  4 ++--
 arch/ia64/kernel/smp.c                 |  4 ++--
 arch/mips/kernel/irq-rm9000.c          |  4 ++--
 arch/mips/kernel/smp.c                 |  4 ++--
 arch/mips/oprofile/common.c            |  6 +++---
 arch/parisc/kernel/cache.c             |  6 +++---
 arch/parisc/kernel/smp.c               |  2 +-
 arch/parisc/mm/init.c                  |  2 +-
 arch/powerpc/kernel/rtas.c             |  2 +-
 arch/powerpc/kernel/tau_6xx.c          |  4 ++--
 arch/powerpc/kernel/time.c             |  2 +-
 arch/powerpc/mm/slice.c                |  2 +-
 arch/powerpc/oprofile/common.c         |  6 +++---
 arch/s390/kernel/smp.c                 |  6 +++---
 arch/s390/kernel/time.c                |  2 +-
 arch/sh/kernel/smp.c                   |  4 ++--
 arch/sparc64/mm/hugetlbpage.c          |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_64.c    |  6 +++---
 arch/x86/kernel/cpu/mcheck/non-fatal.c |  2 +-
 arch/x86/kernel/cpu/perfctr-watchdog.c |  4 ++--
 arch/x86/kernel/io_apic_32.c           |  2 +-
 arch/x86/kernel/io_apic_64.c           |  2 +-
 arch/x86/kernel/nmi_32.c               |  4 ++--
 arch/x86/kernel/nmi_64.c               |  4 ++--
 arch/x86/kernel/tlb_32.c               |  2 +-
 arch/x86/kernel/tlb_64.c               |  2 +-
 arch/x86/kernel/vsyscall_64.c          |  2 +-
 arch/x86/kvm/vmx.c                     |  2 +-
 arch/x86/mach-voyager/voyager_smp.c    |  2 +-
 arch/x86/mm/pageattr.c                 |  4 ++--
 arch/x86/oprofile/nmi_int.c            | 10 +++++-----
 drivers/char/agp/generic.c             |  2 +-
 drivers/lguest/x86/core.c              |  4 ++--
 fs/buffer.c                            |  2 +-
 include/linux/smp.h                    |  4 ++--
 kernel/hrtimer.c                       |  2 +-
 kernel/profile.c                       |  6 +++---
 kernel/rcupdate.c                      |  2 +-
 kernel/softirq.c                       |  2 +-
 mm/page_alloc.c                        |  2 +-
 mm/slab.c                              |  4 ++--
 mm/slub.c                              |  2 +-
 net/iucv/iucv.c                        |  2 +-
 virt/kvm/kvm_main.c                    |  8 ++++----
 48 files changed, 84 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 96ed82fd9ee..351407e07e7 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -160,7 +160,7 @@ common_shutdown(int mode, char *restart_cmd)
 	struct halt_info args;
 	args.mode = mode;
 	args.restart_cmd = restart_cmd;
-	on_each_cpu(common_shutdown_1, &args, 1, 0);
+	on_each_cpu(common_shutdown_1, &args, 0);
 }
 
 void
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 44114c8dbb2..83df541650f 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -657,7 +657,7 @@ void
 smp_imb(void)
 {
 	/* Must wait other processors to flush their icache before continue. */
-	if (on_each_cpu(ipi_imb, NULL, 1, 1))
+	if (on_each_cpu(ipi_imb, NULL, 1))
 		printk(KERN_CRIT "smp_imb: timed out\n");
 }
 EXPORT_SYMBOL(smp_imb);
@@ -673,7 +673,7 @@ flush_tlb_all(void)
 {
 	/* Although we don't have any data to pass, we do want to
 	   synchronize with the other processors.  */
-	if (on_each_cpu(ipi_flush_tlb_all, NULL, 1, 1)) {
+	if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) {
 		printk(KERN_CRIT "flush_tlb_all: timed out\n");
 	}
 }
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 6344466b211..5a7c09564d1 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -604,7 +604,7 @@ static inline void ipi_flush_tlb_kernel_range(void *arg)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(ipi_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
@@ -631,7 +631,7 @@ void flush_tlb_kernel_page(unsigned long kaddr)
 
 	ta.ta_start = kaddr;
 
-	on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1, 1);
+	on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma,
@@ -654,5 +654,5 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	ta.ta_start = start;
 	ta.ta_end = end;
 
-	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1, 1);
+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
 }
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 9cd818cc700..7dd96c12717 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -707,7 +707,7 @@ ia64_mca_cmc_vector_enable (void *dummy)
 static void
 ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
 {
-	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0);
 }
 
 /*
@@ -719,7 +719,7 @@ ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
 static void
 ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused)
 {
-	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0);
 }
 
 /*
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9baa48255c1..19d4493c619 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6508,7 +6508,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
 	}
 
 	/* save the current system wide pmu states */
-	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
 	if (ret) {
 		DPRINT(("on_each_cpu() failed: %d\n", ret));
 		goto cleanup_reserve;
@@ -6553,7 +6553,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
 
 	pfm_alt_intr_handler = NULL;
 
-	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
 	if (ret) {
 		DPRINT(("on_each_cpu() failed: %d\n", ret));
 	}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 19152dcbf6e..3676468612b 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -285,7 +285,7 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
 void
 smp_flush_tlb_all (void)
 {
-	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
 }
 
 void
@@ -308,7 +308,7 @@ smp_flush_tlb_mm (struct mm_struct *mm)
 	 * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
 	 * rather trivial.
 	 */
-	on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
+	on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
 }
 
 void arch_send_call_function_single_ipi(int cpu)
diff --git a/arch/mips/kernel/irq-rm9000.c b/arch/mips/kernel/irq-rm9000.c
index ed9febe63d7..b47e4615ec1 100644
--- a/arch/mips/kernel/irq-rm9000.c
+++ b/arch/mips/kernel/irq-rm9000.c
@@ -49,7 +49,7 @@ static void local_rm9k_perfcounter_irq_startup(void *args)
 
 static unsigned int rm9k_perfcounter_irq_startup(unsigned int irq)
 {
-	on_each_cpu(local_rm9k_perfcounter_irq_startup, (void *) irq, 0, 1);
+	on_each_cpu(local_rm9k_perfcounter_irq_startup, (void *) irq, 1);
 
 	return 0;
 }
@@ -66,7 +66,7 @@ static void local_rm9k_perfcounter_irq_shutdown(void *args)
 
 static void rm9k_perfcounter_irq_shutdown(unsigned int irq)
 {
-	on_each_cpu(local_rm9k_perfcounter_irq_shutdown, (void *) irq, 0, 1);
+	on_each_cpu(local_rm9k_perfcounter_irq_shutdown, (void *) irq, 1);
 }
 
 static struct irq_chip rm9k_irq_controller = {
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 7a9ae830be8..4410f172b8a 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -246,7 +246,7 @@ static void flush_tlb_all_ipi(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(flush_tlb_all_ipi, NULL, 1, 1);
+	on_each_cpu(flush_tlb_all_ipi, NULL, 1);
 }
 
 static void flush_tlb_mm_ipi(void *mm)
@@ -366,7 +366,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 		.addr2 = end,
 	};
 
-	on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1, 1);
+	on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1);
 }
 
 static void flush_tlb_page_ipi(void *info)
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index b5f6f71b27b..dd2fbd6645c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -27,7 +27,7 @@ static int op_mips_setup(void)
 	model->reg_setup(ctr);
 
 	/* Configure the registers on all cpus.  */
-	on_each_cpu(model->cpu_setup, NULL, 0, 1);
+	on_each_cpu(model->cpu_setup, NULL, 1);
 
         return 0;
 }
@@ -58,7 +58,7 @@ static int op_mips_create_files(struct super_block * sb, struct dentry * root)
 
 static int op_mips_start(void)
 {
-	on_each_cpu(model->cpu_start, NULL, 0, 1);
+	on_each_cpu(model->cpu_start, NULL, 1);
 
 	return 0;
 }
@@ -66,7 +66,7 @@ static int op_mips_start(void)
 static void op_mips_stop(void)
 {
 	/* Disable performance monitoring for all counters.  */
-	on_each_cpu(model->cpu_stop, NULL, 0, 1);
+	on_each_cpu(model->cpu_stop, NULL, 1);
 }
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e10d25d2d9c..5259d8c2067 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -51,12 +51,12 @@ static struct pdc_btlb_info btlb_info __read_mostly;
 void
 flush_data_cache(void)
 {
-	on_each_cpu(flush_data_cache_local, NULL, 1, 1);
+	on_each_cpu(flush_data_cache_local, NULL, 1);
 }
 void 
 flush_instruction_cache(void)
 {
-	on_each_cpu(flush_instruction_cache_local, NULL, 1, 1);
+	on_each_cpu(flush_instruction_cache_local, NULL, 1);
 }
 #endif
 
@@ -515,7 +515,7 @@ static void cacheflush_h_tmp_function(void *dummy)
 
 void flush_cache_all(void)
 {
-	on_each_cpu(cacheflush_h_tmp_function, NULL, 1, 1);
+	on_each_cpu(cacheflush_h_tmp_function, NULL, 1);
 }
 
 void flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 126105c76a4..d47f3975c9c 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -292,7 +292,7 @@ void arch_send_call_function_single_ipi(int cpu)
 void
 smp_flush_tlb_all(void)
 {
-	on_each_cpu(flush_tlb_all_local, NULL, 1, 1);
+	on_each_cpu(flush_tlb_all_local, NULL, 1);
 }
 
 /*
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index ce0da689a89..b4d6c8777ed 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -1053,7 +1053,7 @@ void flush_tlb_all(void)
 	    do_recycle++;
 	}
 	spin_unlock(&sid_lock);
-	on_each_cpu(flush_tlb_all_local, NULL, 1, 1);
+	on_each_cpu(flush_tlb_all_local, NULL, 1);
 	if (do_recycle) {
 	    spin_lock(&sid_lock);
 	    recycle_sids(recycle_ndirty,recycle_dirty_array);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 34843c31841..647f3e8677d 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -747,7 +747,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
 	/* Call function on all CPUs.  One of us will make the
 	 * rtas call
 	 */
-	if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0))
+	if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
 		data.error = -EINVAL;
 
 	wait_for_completion(&done);
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 368a4934f7e..c3a56d65c5a 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -192,7 +192,7 @@ static void tau_timeout_smp(unsigned long unused)
 
 	/* schedule ourselves to be run again */
 	mod_timer(&tau_timer, jiffies + shrink_timer) ;
-	on_each_cpu(tau_timeout, NULL, 1, 0);
+	on_each_cpu(tau_timeout, NULL, 0);
 }
 
 /*
@@ -234,7 +234,7 @@ int __init TAU_init(void)
 	tau_timer.expires = jiffies + shrink_timer;
 	add_timer(&tau_timer);
 
-	on_each_cpu(TAU_init_smp, NULL, 1, 0);
+	on_each_cpu(TAU_init_smp, NULL, 0);
 
 	printk("Thermal assist unit ");
 #ifdef CONFIG_TAU_INT
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 73401e83739..f1a38a6c1e2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -322,7 +322,7 @@ void snapshot_timebases(void)
 {
 	if (!cpu_has_feature(CPU_FTR_PURR))
 		return;
-	on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+	on_each_cpu(snapshot_tb_and_purr, NULL, 1);
 }
 
 /*
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index ad928edafb0..2bd12d965db 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -218,7 +218,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 	mb();
 
 	/* XXX this is sub-optimal but will do for now */
-	on_each_cpu(slice_flush_segments, mm, 0, 1);
+	on_each_cpu(slice_flush_segments, mm, 1);
 #ifdef CONFIG_SPU_BASE
 	spu_flush_all_slbs(mm);
 #endif
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 4908dc98f9c..17807acb05d 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -65,7 +65,7 @@ static int op_powerpc_setup(void)
 
 	/* Configure the registers on all cpus.	 If an error occurs on one
 	 * of the cpus, op_per_cpu_rc will be set to the error */
-	on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
+	on_each_cpu(op_powerpc_cpu_setup, NULL, 1);
 
 out:	if (op_per_cpu_rc) {
 		/* error on setup release the performance counter hardware */
@@ -100,7 +100,7 @@ static int op_powerpc_start(void)
 	if (model->global_start)
 		return model->global_start(ctr);
 	if (model->start) {
-		on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
+		on_each_cpu(op_powerpc_cpu_start, NULL, 1);
 		return op_per_cpu_rc;
 	}
 	return -EIO; /* No start function is defined for this
@@ -115,7 +115,7 @@ static inline void op_powerpc_cpu_stop(void *dummy)
 static void op_powerpc_stop(void)
 {
 	if (model->stop)
-		on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1);
+		on_each_cpu(op_powerpc_cpu_stop, NULL, 1);
         if (model->global_stop)
                 model->global_stop();
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 276b105fb2a..b6781030cfb 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -299,7 +299,7 @@ static void smp_ptlb_callback(void *info)
 
 void smp_ptlb_all(void)
 {
-	on_each_cpu(smp_ptlb_callback, NULL, 0, 1);
+	on_each_cpu(smp_ptlb_callback, NULL, 1);
 }
 EXPORT_SYMBOL(smp_ptlb_all);
 #endif /* ! CONFIG_64BIT */
@@ -347,7 +347,7 @@ void smp_ctl_set_bit(int cr, int bit)
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.orvals[cr] = 1 << bit;
-	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_set_bit);
 
@@ -361,7 +361,7 @@ void smp_ctl_clear_bit(int cr, int bit)
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.andvals[cr] = ~(1L << bit);
-	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
 
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index bf7bf2c2236..6037ed2b747 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -909,7 +909,7 @@ static void etr_work_fn(struct work_struct *work)
 	if (!eacr.ea) {
 		/* Both ports offline. Reset everything. */
 		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(etr_disable_sync_clock, NULL, 0, 1);
+		on_each_cpu(etr_disable_sync_clock, NULL, 1);
 		del_timer_sync(&etr_timer);
 		etr_update_eacr(eacr);
 		set_bit(ETR_FLAG_EACCES, &etr_flags);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 71781ba2675..60c50841143 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -197,7 +197,7 @@ static void flush_tlb_all_ipi(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(flush_tlb_all_ipi, 0, 1, 1);
+	on_each_cpu(flush_tlb_all_ipi, 0, 1);
 }
 
 static void flush_tlb_mm_ipi(void *mm)
@@ -284,7 +284,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 	fd.addr1 = start;
 	fd.addr2 = end;
-	on_each_cpu(flush_tlb_kernel_range_ipi, (void *)&fd, 1, 1);
+	on_each_cpu(flush_tlb_kernel_range_ipi, (void *)&fd, 1);
 }
 
 static void flush_tlb_page_ipi(void *info)
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 6cfab2e4d34..ebefd2a1437 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -344,7 +344,7 @@ void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 			 * also executing in this address space.
 			 */
 			mm->context.sparc64_ctx_val = ctx;
-			on_each_cpu(context_reload, mm, 0, 0);
+			on_each_cpu(context_reload, mm, 0);
 		}
 		spin_unlock(&ctx_alloc_lock);
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae..43b7cb59491 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -363,7 +363,7 @@ static void mcheck_check_cpu(void *info)
 
 static void mcheck_timer(struct work_struct *work)
 {
-	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+	on_each_cpu(mcheck_check_cpu, NULL, 1);
 
 	/*
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
@@ -612,7 +612,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	 * Collect entries that were still getting written before the
 	 * synchronize.
 	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+	on_each_cpu(collect_tscs, cpu_tsc, 1);
 	for (i = next; i < MCE_LOG_LEN; i++) {
 		if (mcelog.entry[i].finished &&
 		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -737,7 +737,7 @@ static void mce_restart(void)
 	if (next_interval)
 		cancel_delayed_work(&mcheck_work);
 	/* Timer race is harmless here */
-	on_each_cpu(mce_init, NULL, 1, 1);
+	on_each_cpu(mce_init, NULL, 1);
 	next_interval = check_interval * HZ;
 	if (next_interval)
 		schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec..cc1fccdd31e 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
 static void mce_work_fn(struct work_struct *work)
 {
-	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	on_each_cpu(mce_checkregs, NULL, 1);
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 }
 
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe..58043f06d7e 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -180,7 +180,7 @@ void disable_lapic_nmi_watchdog(void)
 	if (atomic_read(&nmi_active) <= 0)
 		return;
 
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 	wd_ops->unreserve();
 
 	BUG_ON(atomic_read(&nmi_active) != 0);
@@ -202,7 +202,7 @@ void enable_lapic_nmi_watchdog(void)
 		return;
 	}
 
-	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
 	touch_nmi_watchdog();
 }
 
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 4dc8600d9d2..720640ff36c 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1565,7 +1565,7 @@ void /*__init*/ print_local_APIC(void * dummy)
 
 void print_all_local_APICs (void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1, 1);
+	on_each_cpu(print_local_APIC, NULL, 1);
 }
 
 void /*__init*/ print_PIC(void)
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index ef1a8dfcc52..4504c7f5001 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1146,7 +1146,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 
 void print_all_local_APICs (void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1, 1);
+	on_each_cpu(print_local_APIC, NULL, 1);
 }
 
 void __apicdebuginit print_PIC(void)
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 5562dab0bd2..11008e0857c 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -218,7 +218,7 @@ static void __acpi_nmi_enable(void *__unused)
 void acpi_nmi_enable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_enable, NULL, 1);
 }
 
 static void __acpi_nmi_disable(void *__unused)
@@ -232,7 +232,7 @@ static void __acpi_nmi_disable(void *__unused)
 void acpi_nmi_disable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
 void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 2f1e4f503c9..bbdcb17b3df 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -225,7 +225,7 @@ static void __acpi_nmi_enable(void *__unused)
 void acpi_nmi_enable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_enable, NULL, 1);
 }
 
 static void __acpi_nmi_disable(void *__unused)
@@ -239,7 +239,7 @@ static void __acpi_nmi_disable(void *__unused)
 void acpi_nmi_disable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
 void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363851a..fec1ecedc9b 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index a1f07d79320..184a367516d 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -270,5 +270,5 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0a03d57f9b3..0dcae19ed62 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,7 @@ static int __init vsyscall_init(void)
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2);
 #endif
-	on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+	on_each_cpu(cpu_vsyscall_init, NULL, 1);
 	hotcpu_notifier(cpu_vsyscall_notifier, 0);
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5534fe59b5f..10ce6ee4c49 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (vmx->vmcs) {
-		on_each_cpu(__vcpu_clear, vmx, 0, 1);
+		on_each_cpu(__vcpu_clear, vmx, 1);
 		free_vmcs(vmx->vmcs);
 		vmx->vmcs = NULL;
 	}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 04f596eab74..abea08459a7 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1072,7 +1072,7 @@ static void do_flush_tlb_all(void *info)
 /* flush the TLB of every active CPU in the system */
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
+	on_each_cpu(do_flush_tlb_all, 0, 1);
 }
 
 /* used to set up the trampoline for other CPUs when the memory manager
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37..9b836ba9ded 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -106,7 +106,7 @@ static void cpa_flush_all(unsigned long cache)
 {
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
+	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 
 static void __cpa_flush_range(void *arg)
@@ -127,7 +127,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	BUG_ON(irqs_disabled());
 	WARN_ON(PAGE_ALIGN(start) != start);
 
-	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+	on_each_cpu(__cpa_flush_range, NULL, 1);
 
 	if (!cache)
 		return;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cc48d3fde54..3238ad32ffd 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -218,8 +218,8 @@ static int nmi_setup(void)
 		}
 
 	}
-	on_each_cpu(nmi_save_registers, NULL, 0, 1);
-	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	on_each_cpu(nmi_save_registers, NULL, 1);
+	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
 	return 0;
 }
@@ -271,7 +271,7 @@ static void nmi_shutdown(void)
 {
 	struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
 	nmi_enabled = 0;
-	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 	unregister_die_notifier(&profile_exceptions_nb);
 	model->shutdown(msrs);
 	free_msrs();
@@ -285,7 +285,7 @@ static void nmi_cpu_start(void *dummy)
 
 static int nmi_start(void)
 {
-	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_start, NULL, 1);
 	return 0;
 }
 
@@ -297,7 +297,7 @@ static void nmi_cpu_stop(void *dummy)
 
 static void nmi_stop(void)
 {
-	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_stop, NULL, 1);
 }
 
 struct op_counter_config counter_config[OP_MAX_COUNTER];
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 564daaa6c7d..eaa1a355bb3 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -1249,7 +1249,7 @@ static void ipi_handler(void *null)
 
 void global_cache_flush(void)
 {
-	if (on_each_cpu(ipi_handler, NULL, 1, 1) != 0)
+	if (on_each_cpu(ipi_handler, NULL, 1) != 0)
 		panic(PFX "timed out waiting for the other CPUs!\n");
 }
 EXPORT_SYMBOL(global_cache_flush);
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 2e554a4ab33..95dfda52b4f 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -478,7 +478,7 @@ void __init lguest_arch_host_init(void)
 		cpu_had_pge = 1;
 		/* adjust_pge is a helper function which sets or unsets the PGE
 		 * bit on its CPU, depending on the argument (0 == unset). */
-		on_each_cpu(adjust_pge, (void *)0, 0, 1);
+		on_each_cpu(adjust_pge, (void *)0, 1);
 		/* Turn off the feature in the global feature set. */
 		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
 	}
@@ -493,7 +493,7 @@ void __exit lguest_arch_host_fini(void)
 	if (cpu_had_pge) {
 		set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
 		/* adjust_pge's argument "1" means set PGE. */
-		on_each_cpu(adjust_pge, (void *)1, 0, 1);
+		on_each_cpu(adjust_pge, (void *)1, 1);
 	}
 	put_online_cpus();
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index a073f3f4f01..5c23ef560d0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1464,7 +1464,7 @@ static void invalidate_bh_lru(void *arg)
 	
 void invalidate_bh_lrus(void)
 {
-	on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
+	on_each_cpu(invalidate_bh_lru, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 338cad1b954..55261101d09 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -89,7 +89,7 @@ static inline void init_call_single_data(void)
 /*
  * Call a function on all processors
  */
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
+int on_each_cpu(void (*func) (void *info), void *info, int wait);
 
 #define MSG_ALL_BUT_SELF	0x8000	/* Assume <32768 CPU's */
 #define MSG_ALL			0x8001
@@ -121,7 +121,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
 }
 #define smp_call_function(func, info, wait) \
 			(up_smp_call_function(func, info))
-#define on_each_cpu(func,info,retry,wait)	\
+#define on_each_cpu(func,info,wait)		\
 	({					\
 		local_irq_disable();		\
 		func(info);			\
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 421be5fe5cc..50e8616d795 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -623,7 +623,7 @@ static void retrigger_next_event(void *arg)
 void clock_was_set(void)
 {
 	/* Retrigger the CPU local events everywhere */
-	on_each_cpu(retrigger_next_event, NULL, 0, 1);
+	on_each_cpu(retrigger_next_event, NULL, 1);
 }
 
 /*
diff --git a/kernel/profile.c b/kernel/profile.c
index ae7ead82cbc..58926411eb2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -252,7 +252,7 @@ static void profile_flip_buffers(void)
 	mutex_lock(&profile_flip_mutex);
 	j = per_cpu(cpu_profile_flip, get_cpu());
 	put_cpu();
-	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	on_each_cpu(__profile_flip_buffers, NULL, 1);
 	for_each_online_cpu(cpu) {
 		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
 		for (i = 0; i < NR_PROFILE_HIT; ++i) {
@@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void)
 	mutex_lock(&profile_flip_mutex);
 	i = per_cpu(cpu_profile_flip, get_cpu());
 	put_cpu();
-	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	on_each_cpu(__profile_flip_buffers, NULL, 1);
 	for_each_online_cpu(cpu) {
 		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
 		memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
@@ -558,7 +558,7 @@ static int __init create_hash_tables(void)
 out_cleanup:
 	prof_on = 0;
 	smp_mb();
-	on_each_cpu(profile_nop, NULL, 0, 1);
+	on_each_cpu(profile_nop, NULL, 1);
 	for_each_online_cpu(cpu) {
 		struct page *page;
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16..6addab5e6d8 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -127,7 +127,7 @@ void rcu_barrier(void)
 	 * until all the callbacks are queued.
 	 */
 	rcu_read_lock();
-	on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+	on_each_cpu(rcu_barrier_func, NULL, 1);
 	rcu_read_unlock();
 	wait_for_completion(&rcu_barrier_completion);
 	mutex_unlock(&rcu_barrier_mutex);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d73afb4764e..c159fd09477 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -674,7 +674,7 @@ __init int spawn_ksoftirqd(void)
 /*
  * Call a function on all processors
  */
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
 {
 	int ret = 0;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2f552955a02..53242344a77 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -918,7 +918,7 @@ void drain_local_pages(void *arg)
  */
 void drain_all_pages(void)
 {
-	on_each_cpu(drain_local_pages, NULL, 0, 1);
+	on_each_cpu(drain_local_pages, NULL, 1);
 }
 
 #ifdef CONFIG_HIBERNATION
diff --git a/mm/slab.c b/mm/slab.c
index 046607f05f3..0772abb412b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2454,7 +2454,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
 	struct kmem_list3 *l3;
 	int node;
 
-	on_each_cpu(do_drain, cachep, 1, 1);
+	on_each_cpu(do_drain, cachep, 1);
 	check_irq_on();
 	for_each_online_node(node) {
 		l3 = cachep->nodelists[node];
@@ -3939,7 +3939,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	}
 	new->cachep = cachep;
 
-	on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
+	on_each_cpu(do_ccupdate_local, (void *)new, 1);
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
diff --git a/mm/slub.c b/mm/slub.c
index 0987d1cd943..44715eb70c0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1497,7 +1497,7 @@ static void flush_cpu_slab(void *d)
 static void flush_all(struct kmem_cache *s)
 {
 #ifdef CONFIG_SMP
-	on_each_cpu(flush_cpu_slab, s, 1, 1);
+	on_each_cpu(flush_cpu_slab, s, 1);
 #else
 	unsigned long flags;
 
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 94d5a45c3a5..a178e27e7b1 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -545,7 +545,7 @@ out:
  */
 static void iucv_disable(void)
 {
-	on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1);
+	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
 	kfree(iucv_path_table);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ea1f595f8a8..d4eae6af073 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1286,7 +1286,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
 		 * in vmx root mode.
 		 */
 		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-		on_each_cpu(hardware_disable, NULL, 0, 1);
+		on_each_cpu(hardware_disable, NULL, 1);
 	}
 	return NOTIFY_OK;
 }
@@ -1479,7 +1479,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 			goto out_free_1;
 	}
 
-	on_each_cpu(hardware_enable, NULL, 0, 1);
+	on_each_cpu(hardware_enable, NULL, 1);
 	r = register_cpu_notifier(&kvm_cpu_notifier);
 	if (r)
 		goto out_free_2;
@@ -1525,7 +1525,7 @@ out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-	on_each_cpu(hardware_disable, NULL, 0, 1);
+	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
 out_free_0:
@@ -1547,7 +1547,7 @@ void kvm_exit(void)
 	sysdev_class_unregister(&kvm_sysdev_class);
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
-	on_each_cpu(hardware_disable, NULL, 0, 1);
+	on_each_cpu(hardware_disable, NULL, 1);
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	kvm_exit_debug();
-- 
cgit v1.2.3-70-g09d2


From c8fcd905a59a535bff93a120ac44b09ce24e13e6 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:22:57 -0300
Subject: rfkill: fix minor typo in kernel doc

Fix a minor typo in an exported function documentation

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 4e10a95de83..f95081a4a02 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -412,7 +412,7 @@ int rfkill_register(struct rfkill *rfkill)
 EXPORT_SYMBOL(rfkill_register);
 
 /**
- * rfkill_unregister - Uegister a rfkill structure.
+ * rfkill_unregister - Unregister a rfkill structure.
  * @rfkill: rfkill structure to be unregistered
  *
  * This function should be called by the network driver during device
-- 
cgit v1.2.3-70-g09d2


From 28f089c18464810ec9e91ee10a89adbb02ad7765 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:22:58 -0300
Subject: rfkill: handle SW_RFKILL_ALL events

Teach rfkill-input how to handle SW_RFKILL_ALL events (new name for the
SW_RADIO event).

SW_RFKILL_ALL is an absolute enable-or-disable command that is tied to all
radios in a system.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-input.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index e4b051dbed6..9d6c9255bf2 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -55,6 +55,22 @@ static void rfkill_task_handler(struct work_struct *work)
 	mutex_unlock(&task->mutex);
 }
 
+static void rfkill_schedule_set(struct rfkill_task *task,
+				enum rfkill_state desired_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->lock, flags);
+
+	if (time_after(jiffies, task->last + msecs_to_jiffies(200))) {
+		task->desired_state = desired_state;
+		task->last = jiffies;
+		schedule_work(&task->work);
+	}
+
+	spin_unlock_irqrestore(&task->lock, flags);
+}
+
 static void rfkill_schedule_toggle(struct rfkill_task *task)
 {
 	unsigned long flags;
@@ -87,9 +103,9 @@ static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB);
 static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX);
 
 static void rfkill_event(struct input_handle *handle, unsigned int type,
-			unsigned int code, int down)
+			unsigned int code, int data)
 {
-	if (type == EV_KEY && down == 1) {
+	if (type == EV_KEY && data == 1) {
 		switch (code) {
 		case KEY_WLAN:
 			rfkill_schedule_toggle(&rfkill_wlan);
@@ -106,6 +122,26 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		default:
 			break;
 		}
+	} else if (type == EV_SW) {
+		switch (code) {
+		case SW_RFKILL_ALL:
+			/* EVERY radio type. data != 0 means radios ON */
+			rfkill_schedule_set(&rfkill_wimax,
+					    (data)? RFKILL_STATE_ON:
+						    RFKILL_STATE_OFF);
+			rfkill_schedule_set(&rfkill_uwb,
+					    (data)? RFKILL_STATE_ON:
+						    RFKILL_STATE_OFF);
+			rfkill_schedule_set(&rfkill_bt,
+					    (data)? RFKILL_STATE_ON:
+						    RFKILL_STATE_OFF);
+			rfkill_schedule_set(&rfkill_wlan,
+					    (data)? RFKILL_STATE_ON:
+						    RFKILL_STATE_OFF);
+			break;
+		default:
+			break;
+		}
 	}
 }
 
@@ -168,6 +204,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT_MASK(EV_KEY) },
 		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
+		.evbit = { BIT(EV_SW) },
+		.swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) },
+	},
 	{ }
 };
 
-- 
cgit v1.2.3-70-g09d2


From e954b0b85b9e737564b8ad9738de5816747b5901 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:22:59 -0300
Subject: rfkill: add parameter to disable radios by default

Currently, radios are always enabled when their rfkill interface is
registered.  This is not optimal, the safest state for a radio is to be
offline unless the user turns it on.

Add a module parameter that causes all radios to be disabled when their
rfkill interface is registered.  The module default is not changed so
unless the parameter is used, radios will still be forced to their enabled
state when they are registered.

The new rfkill module parameter is called "default_state".

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index f95081a4a02..3edc585dcfa 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -39,6 +39,11 @@ MODULE_LICENSE("GPL");
 static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
 static DEFINE_MUTEX(rfkill_mutex);
 
+static unsigned int rfkill_default_state = RFKILL_STATE_ON;
+module_param_named(default_state, rfkill_default_state, uint, 0444);
+MODULE_PARM_DESC(default_state,
+		 "Default initial state for all radio types, 0 = radio off");
+
 static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX];
 
 
@@ -436,8 +441,12 @@ static int __init rfkill_init(void)
 	int error;
 	int i;
 
+	if (rfkill_default_state != RFKILL_STATE_OFF &&
+	    rfkill_default_state != RFKILL_STATE_ON)
+		return -EINVAL;
+
 	for (i = 0; i < ARRAY_SIZE(rfkill_states); i++)
-		rfkill_states[i] = RFKILL_STATE_ON;
+		rfkill_states[i] = rfkill_default_state;
 
 	error = class_register(&rfkill_class);
 	if (error) {
-- 
cgit v1.2.3-70-g09d2


From 801e49af4c1a9b988ba0d25de2b368c99c3bf2b3 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:00 -0300
Subject: rfkill: add read-write rfkill switch support

Currently, rfkill support for read/write rfkill switches is hacked through
a round-trip over the input layer and rfkill-input to let a driver sync
rfkill->state to hardware changes.

This is buggy and sub-optimal.  It causes real problems.  It is best to
think of the rfkill class as supporting only write-only switches at the
moment.

In order to implement the read/write functionality properly:

Add a get_state() hook that is called by the class every time it needs to
fetch the current state of the switch.  Add a call to this hook every time
the *current* state of the radio plays a role in a decision.

Also add a force_state() method that can be used to forcefully syncronize
the class' idea of the current state of the switch.  This allows for a
faster implementation of the read/write functionality, as a driver which
get events on switch changes can avoid the need for a get_state() hook.

If the get_state() hook is left as NULL, current behaviour is maintained,
so this change is fully backwards compatible with the current rfkill
drivers.

For hardware that issues events when the rfkill state changes, leave
get_state() NULL in the rfkill struct, set the initial state properly
before registering with the rfkill class, and use the force_state() method
in the driver to keep the rfkill interface up-to-date.

get_state() can be called by the class from atomic context. It must not
sleep.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h |  5 +++++
 net/rfkill/rfkill.c    | 53 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index ca89ae1b021..844e9611486 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -61,6 +61,8 @@ enum rfkill_state {
  * @data: Pointer to the RF button drivers private data which will be
  *	passed along when toggling radio state.
  * @toggle_radio(): Mandatory handler to control state of the radio.
+ * @get_state(): handler to read current radio state from hardware,
+ *      may be called from atomic context, should return 0 on success.
  * @led_trigger: A LED trigger for this button's LED.
  * @dev: Device structure integrating the switch into device tree.
  * @node: Used to place switch into list of all switches known to the
@@ -80,6 +82,7 @@ struct rfkill {
 
 	void *data;
 	int (*toggle_radio)(void *data, enum rfkill_state state);
+	int (*get_state)(void *data, enum rfkill_state *state);
 
 #ifdef CONFIG_RFKILL_LEDS
 	struct led_trigger led_trigger;
@@ -95,6 +98,8 @@ void rfkill_free(struct rfkill *rfkill);
 int rfkill_register(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
+int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state);
+
 /**
  * rfkill_get_led_name - Get the LED trigger name for the button's LED.
  * This function might return a NULL pointer if registering of the
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 3edc585dcfa..4ae4486c77e 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -62,19 +62,39 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 #endif /* CONFIG_RFKILL_LEDS */
 }
 
+static void update_rfkill_state(struct rfkill *rfkill)
+{
+	enum rfkill_state newstate;
+
+	if (rfkill->get_state) {
+		mutex_lock(&rfkill->mutex);
+		if (!rfkill->get_state(rfkill->data, &newstate))
+			rfkill->state = newstate;
+		mutex_unlock(&rfkill->mutex);
+	}
+}
+
 static int rfkill_toggle_radio(struct rfkill *rfkill,
 				enum rfkill_state state)
 {
 	int retval = 0;
+	enum rfkill_state oldstate, newstate;
+
+	oldstate = rfkill->state;
+
+	if (rfkill->get_state &&
+	    !rfkill->get_state(rfkill->data, &newstate))
+		rfkill->state = newstate;
 
 	if (state != rfkill->state) {
 		retval = rfkill->toggle_radio(rfkill->data, state);
-		if (!retval) {
+		if (!retval)
 			rfkill->state = state;
-			rfkill_led_trigger(rfkill, state);
-		}
 	}
 
+	if (rfkill->state != oldstate)
+		rfkill_led_trigger(rfkill, rfkill->state);
+
 	return retval;
 }
 
@@ -105,6 +125,32 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 }
 EXPORT_SYMBOL(rfkill_switch_all);
 
+/**
+ * rfkill_force_state - Force the internal rfkill radio state
+ * @rfkill: pointer to the rfkill class to modify.
+ * @state: the current radio state the class should be forced to.
+ *
+ * This function updates the internal state of the radio cached
+ * by the rfkill class.  It should be used when the driver gets
+ * a notification by the firmware/hardware of the current *real*
+ * state of the radio rfkill switch.
+ *
+ * It may not be called from an atomic context.
+ */
+int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
+{
+	if (state != RFKILL_STATE_OFF &&
+	    state != RFKILL_STATE_ON)
+		return -EINVAL;
+
+	mutex_lock(&rfkill->mutex);
+	rfkill->state = state;
+	mutex_unlock(&rfkill->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(rfkill_force_state);
+
 static ssize_t rfkill_name_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
@@ -147,6 +193,7 @@ static ssize_t rfkill_state_show(struct device *dev,
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
+	update_rfkill_state(rfkill);
 	return sprintf(buf, "%d\n", rfkill->state);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 477576a073699783abb53ae14993d5d41c66301d Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:01 -0300
Subject: rfkill: add the WWAN radio type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unfortunately, instead of adding a generic Wireless WAN type, a technology-
specific type (WiMAX) was added.  That's useless for other WWAN devices,
such as EDGE, UMTS, X-RTT and other such radios.

Add a WWAN rfkill type for generic wireless WAN devices.  No keys are added
as most devices really want to use KEY_WLAN for WWAN control (in a cycle of
none, WLAN, WWAN, WLAN+WWAN) and need no specific keycode added.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Iñaky Pérez-González <inaky.perez-gonzalez@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h    | 2 ++
 net/rfkill/rfkill-input.c | 4 ++++
 net/rfkill/rfkill.c       | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 844e9611486..c0cab7d3782 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -34,12 +34,14 @@
  * RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
  * RFKILL_TYPE_UWB: switch is on a ultra wideband device.
  * RFKILL_TYPE_WIMAX: switch is on a WiMAX device.
+ * RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
  */
 enum rfkill_type {
 	RFKILL_TYPE_WLAN ,
 	RFKILL_TYPE_BLUETOOTH,
 	RFKILL_TYPE_UWB,
 	RFKILL_TYPE_WIMAX,
+	RFKILL_TYPE_WWAN,
 	RFKILL_TYPE_MAX,
 };
 
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 9d6c9255bf2..29c13d308b3 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -101,6 +101,7 @@ static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
 static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH);
 static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB);
 static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX);
+static DEFINE_RFKILL_TASK(rfkill_wwan, RFKILL_TYPE_WWAN);
 
 static void rfkill_event(struct input_handle *handle, unsigned int type,
 			unsigned int code, int data)
@@ -126,6 +127,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		switch (code) {
 		case SW_RFKILL_ALL:
 			/* EVERY radio type. data != 0 means radios ON */
+			rfkill_schedule_set(&rfkill_wwan,
+					    (data)? RFKILL_STATE_ON:
+						    RFKILL_STATE_OFF);
 			rfkill_schedule_set(&rfkill_wimax,
 					    (data)? RFKILL_STATE_ON:
 						    RFKILL_STATE_OFF);
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 4ae4486c77e..79f3bbb027f 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -180,6 +180,9 @@ static ssize_t rfkill_type_show(struct device *dev,
 	case RFKILL_TYPE_WIMAX:
 		type = "wimax";
 		break;
+	case RFKILL_TYPE_WWAN:
+		type = "wwan";
+		break;
 	default:
 		BUG();
 	}
-- 
cgit v1.2.3-70-g09d2


From 526324b61a9667ed9a71f0a8a8899cf675346c76 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:02 -0300
Subject: rfkill: rework suspend and resume handlers

The resume handler should reset the wireless transmitter rfkill
state to exactly what it was when the system was suspended.  Do it,
and do it using the normal routines for state change while at it.

The suspend handler should force-switch the transmitter to blocked
state, ignoring caches.  Do it.

Also take an opportunity shot to rfkill_remove_switch() and also
force the transmitter to blocked state there, bypassing caches.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 79f3bbb027f..fb566902030 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -75,24 +75,25 @@ static void update_rfkill_state(struct rfkill *rfkill)
 }
 
 static int rfkill_toggle_radio(struct rfkill *rfkill,
-				enum rfkill_state state)
+				enum rfkill_state state,
+				int force)
 {
 	int retval = 0;
 	enum rfkill_state oldstate, newstate;
 
 	oldstate = rfkill->state;
 
-	if (rfkill->get_state &&
+	if (rfkill->get_state && !force &&
 	    !rfkill->get_state(rfkill->data, &newstate))
 		rfkill->state = newstate;
 
-	if (state != rfkill->state) {
+	if (force || state != rfkill->state) {
 		retval = rfkill->toggle_radio(rfkill->data, state);
 		if (!retval)
 			rfkill->state = state;
 	}
 
-	if (rfkill->state != oldstate)
+	if (force || rfkill->state != oldstate)
 		rfkill_led_trigger(rfkill, rfkill->state);
 
 	return retval;
@@ -107,7 +108,6 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
  * a specific switch is claimed by userspace in which case it is
  * left alone.
  */
-
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 {
 	struct rfkill *rfkill;
@@ -118,7 +118,7 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 
 	list_for_each_entry(rfkill, &rfkill_list, node) {
 		if ((!rfkill->user_claim) && (rfkill->type == type))
-			rfkill_toggle_radio(rfkill, state);
+			rfkill_toggle_radio(rfkill, state, 0);
 	}
 
 	mutex_unlock(&rfkill_mutex);
@@ -214,7 +214,8 @@ static ssize_t rfkill_state_store(struct device *dev,
 	if (mutex_lock_interruptible(&rfkill->mutex))
 		return -ERESTARTSYS;
 	error = rfkill_toggle_radio(rfkill,
-			state ? RFKILL_STATE_ON : RFKILL_STATE_OFF);
+			state ? RFKILL_STATE_ON : RFKILL_STATE_OFF,
+			0);
 	mutex_unlock(&rfkill->mutex);
 
 	return error ? error : count;
@@ -255,7 +256,8 @@ static ssize_t rfkill_claim_store(struct device *dev,
 	if (rfkill->user_claim != claim) {
 		if (!claim)
 			rfkill_toggle_radio(rfkill,
-					    rfkill_states[rfkill->type]);
+					    rfkill_states[rfkill->type],
+					    0);
 		rfkill->user_claim = claim;
 	}
 
@@ -288,12 +290,11 @@ static int rfkill_suspend(struct device *dev, pm_message_t state)
 
 	if (dev->power.power_state.event != state.event) {
 		if (state.event & PM_EVENT_SLEEP) {
-			mutex_lock(&rfkill->mutex);
-
-			if (rfkill->state == RFKILL_STATE_ON)
-				rfkill->toggle_radio(rfkill->data,
-						     RFKILL_STATE_OFF);
+			/* Stop transmitter, keep state, no notifies */
+			update_rfkill_state(rfkill);
 
+			mutex_lock(&rfkill->mutex);
+			rfkill->toggle_radio(rfkill->data, RFKILL_STATE_OFF);
 			mutex_unlock(&rfkill->mutex);
 		}
 
@@ -310,8 +311,8 @@ static int rfkill_resume(struct device *dev)
 	if (dev->power.power_state.event != PM_EVENT_ON) {
 		mutex_lock(&rfkill->mutex);
 
-		if (rfkill->state == RFKILL_STATE_ON)
-			rfkill->toggle_radio(rfkill->data, RFKILL_STATE_ON);
+		/* restore radio state AND notify everybody */
+		rfkill_toggle_radio(rfkill, rfkill->state, 1);
 
 		mutex_unlock(&rfkill->mutex);
 	}
@@ -338,7 +339,7 @@ static int rfkill_add_switch(struct rfkill *rfkill)
 
 	mutex_lock(&rfkill_mutex);
 
-	error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]);
+	error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0);
 	if (!error)
 		list_add_tail(&rfkill->node, &rfkill_list);
 
@@ -351,7 +352,7 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
 {
 	mutex_lock(&rfkill_mutex);
 	list_del_init(&rfkill->node);
-	rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF);
+	rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF, 1);
 	mutex_unlock(&rfkill_mutex);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 79399a8d1908f6a406e82d23c5a9937e1722ed3a Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:03 -0300
Subject: rfkill: add notifier chains support

Add a notifier chain for use by the rfkill class.  This notifier chain
signals the following events (more to be added when needed):

  1. rfkill: rfkill device state has changed

A pointer to the rfkill struct will be passed as a parameter.

The notifier message types have been added to include/linux/rfkill.h
instead of to include/linux/notifier.h in order to avoid the madness of
modifying a header used globally (and that triggers an almost full tree
rebuild every time it is touched) with information that is of interest only
to code that includes the rfkill.h header.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h |  7 +++++
 net/rfkill/rfkill.c    | 70 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c0cab7d3782..98667becdee 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -117,4 +117,11 @@ static inline char *rfkill_get_led_name(struct rfkill *rfkill)
 #endif
 }
 
+/* rfkill notification chain */
+#define RFKILL_STATE_CHANGED		0x0001	/* state of a normal rfkill
+						   switch has changed */
+
+int register_rfkill_notifier(struct notifier_block *nb);
+int unregister_rfkill_notifier(struct notifier_block *nb);
+
 #endif /* RFKILL_H */
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index fb566902030..a561e350a70 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -46,6 +46,49 @@ MODULE_PARM_DESC(default_state,
 
 static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX];
 
+static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list);
+
+
+/**
+ * register_rfkill_notifier - Add notifier to rfkill notifier chain
+ * @nb: pointer to the new entry to add to the chain
+ *
+ * See blocking_notifier_chain_register() for return value and further
+ * observations.
+ *
+ * Adds a notifier to the rfkill notifier chain.  The chain will be
+ * called with a pointer to the relevant rfkill structure as a parameter,
+ * refer to include/linux/rfkill.h for the possible events.
+ *
+ * Notifiers added to this chain are to always return NOTIFY_DONE.  This
+ * chain is a blocking notifier chain: notifiers can sleep.
+ *
+ * Calls to this chain may have been done through a workqueue.  One must
+ * assume unordered asynchronous behaviour, there is no way to know if
+ * actions related to the event that generated the notification have been
+ * carried out already.
+ */
+int register_rfkill_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&rfkill_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_rfkill_notifier);
+
+/**
+ * unregister_rfkill_notifier - remove notifier from rfkill notifier chain
+ * @nb: pointer to the entry to remove from the chain
+ *
+ * See blocking_notifier_chain_unregister() for return value and further
+ * observations.
+ *
+ * Removes a notifier from the rfkill notifier chain.
+ */
+int unregister_rfkill_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_rfkill_notifier);
+
 
 static void rfkill_led_trigger(struct rfkill *rfkill,
 			       enum rfkill_state state)
@@ -62,14 +105,25 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 #endif /* CONFIG_RFKILL_LEDS */
 }
 
+static void notify_rfkill_state_change(struct rfkill *rfkill)
+{
+	blocking_notifier_call_chain(&rfkill_notifier_list,
+			RFKILL_STATE_CHANGED,
+			rfkill);
+}
+
 static void update_rfkill_state(struct rfkill *rfkill)
 {
-	enum rfkill_state newstate;
+	enum rfkill_state newstate, oldstate;
 
 	if (rfkill->get_state) {
 		mutex_lock(&rfkill->mutex);
-		if (!rfkill->get_state(rfkill->data, &newstate))
+		if (!rfkill->get_state(rfkill->data, &newstate)) {
+			oldstate = rfkill->state;
 			rfkill->state = newstate;
+			if (oldstate != newstate)
+				notify_rfkill_state_change(rfkill);
+		}
 		mutex_unlock(&rfkill->mutex);
 	}
 }
@@ -93,8 +147,10 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 			rfkill->state = state;
 	}
 
-	if (force || rfkill->state != oldstate)
+	if (force || rfkill->state != oldstate) {
 		rfkill_led_trigger(rfkill, rfkill->state);
+		notify_rfkill_state_change(rfkill);
+	}
 
 	return retval;
 }
@@ -139,12 +195,20 @@ EXPORT_SYMBOL(rfkill_switch_all);
  */
 int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 {
+	enum rfkill_state oldstate;
+
 	if (state != RFKILL_STATE_OFF &&
 	    state != RFKILL_STATE_ON)
 		return -EINVAL;
 
 	mutex_lock(&rfkill->mutex);
+
+	oldstate = rfkill->state;
 	rfkill->state = state;
+
+	if (state != oldstate)
+		notify_rfkill_state_change(rfkill);
+
 	mutex_unlock(&rfkill->mutex);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 99c632e5a304e1f76350eb9e8b2493514de8b60c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:04 -0300
Subject: rfkill: add type string helper

We will need access to the rfkill switch type in string format for more
than just sysfs.  Therefore, move it to a generic helper.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index a561e350a70..3c7773475ea 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -224,34 +224,31 @@ static ssize_t rfkill_name_show(struct device *dev,
 	return sprintf(buf, "%s\n", rfkill->name);
 }
 
-static ssize_t rfkill_type_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
+static const char *rfkill_get_type_str(enum rfkill_type type)
 {
-	struct rfkill *rfkill = to_rfkill(dev);
-	const char *type;
-
-	switch (rfkill->type) {
+	switch (type) {
 	case RFKILL_TYPE_WLAN:
-		type = "wlan";
-		break;
+		return "wlan";
 	case RFKILL_TYPE_BLUETOOTH:
-		type = "bluetooth";
-		break;
+		return "bluetooth";
 	case RFKILL_TYPE_UWB:
-		type = "ultrawideband";
-		break;
+		return "ultrawideband";
 	case RFKILL_TYPE_WIMAX:
-		type = "wimax";
-		break;
+		return "wimax";
 	case RFKILL_TYPE_WWAN:
-		type = "wwan";
-		break;
+		return "wwan";
 	default:
 		BUG();
 	}
+}
+
+static ssize_t rfkill_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
 
-	return sprintf(buf, "%s\n", type);
+	return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
 }
 
 static ssize_t rfkill_state_show(struct device *dev,
-- 
cgit v1.2.3-70-g09d2


From ffb67c34e436fb163c4067936ccec797354fa6c6 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:05 -0300
Subject: rfkill: add uevent notifications

Use the notification chains to also send uevents, so that userspace can be
notified of state changes of every rfkill switch.

Userspace should use these events for OSD/status report applications and
rfkill GUI frontends.  HAL might want to broadcast them over DBUS, for
example.  It might be also useful for userspace implementations of
rfkill-input, or to use HAL as the platform driver which promotes rfkill
switch change events into input events (to synchronize all other switches)
when necessary for platforms that lack a convenient platform-specific
kernel module to do it.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 3c7773475ea..dd1c3f18f31 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -386,12 +386,51 @@ static int rfkill_resume(struct device *dev)
 #define rfkill_resume NULL
 #endif
 
+static int rfkill_blocking_uevent_notifier(struct notifier_block *nb,
+					unsigned long eventid,
+					void *data)
+{
+	struct rfkill *rfkill = (struct rfkill *)data;
+
+	switch (eventid) {
+	case RFKILL_STATE_CHANGED:
+		kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rfkill_blocking_uevent_nb = {
+	.notifier_call	= rfkill_blocking_uevent_notifier,
+	.priority	= 0,
+};
+
+static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	int error;
+
+	error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name);
+	if (error)
+		return error;
+	error = add_uevent_var(env, "RFKILL_TYPE=%s",
+				rfkill_get_type_str(rfkill->type));
+	if (error)
+		return error;
+	error = add_uevent_var(env, "RFKILL_STATE=%d", rfkill->state);
+	return error;
+}
+
 static struct class rfkill_class = {
 	.name		= "rfkill",
 	.dev_release	= rfkill_release,
 	.dev_attrs	= rfkill_dev_attrs,
 	.suspend	= rfkill_suspend,
 	.resume		= rfkill_resume,
+	.dev_uevent	= rfkill_dev_uevent,
 };
 
 static int rfkill_add_switch(struct rfkill *rfkill)
@@ -566,11 +605,14 @@ static int __init rfkill_init(void)
 		return error;
 	}
 
+	register_rfkill_notifier(&rfkill_blocking_uevent_nb);
+
 	return 0;
 }
 
 static void __exit rfkill_exit(void)
 {
+	unregister_rfkill_notifier(&rfkill_blocking_uevent_nb);
 	class_unregister(&rfkill_class);
 }
 
-- 
cgit v1.2.3-70-g09d2


From fbc6af2f3c46df4722f5161d0ad20dd87cd7dfa9 Mon Sep 17 00:00:00 2001
From: Fabien Crespel <fabien@crespel.net>
Date: Mon, 23 Jun 2008 17:23:06 -0300
Subject: rfkill: drop current_state from tasks in rfkill-input

The whole current_state thing seems completely useless and a source of
problems in rfkill-input, since state comparison is already done in rfkill,
and rfkill-input is more than likely to become out of sync with the real
state.

Signed-off-by: Fabien Crespel <fabien@crespel.net>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-input.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 29c13d308b3..d285f9a9d82 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -30,27 +30,15 @@ struct rfkill_task {
 	spinlock_t lock; /* for accessing last and desired state */
 	unsigned long last; /* last schedule */
 	enum rfkill_state desired_state; /* on/off */
-	enum rfkill_state current_state; /* on/off */
 };
 
 static void rfkill_task_handler(struct work_struct *work)
 {
 	struct rfkill_task *task = container_of(work, struct rfkill_task, work);
-	enum rfkill_state state;
 
 	mutex_lock(&task->mutex);
 
-	/*
-	 * Use temp variable to fetch desired state to keep it
-	 * consistent even if rfkill_schedule_toggle() runs in
-	 * another thread or interrupts us.
-	 */
-	state = task->desired_state;
-
-	if (state != task->current_state) {
-		rfkill_switch_all(task->type, state);
-		task->current_state = state;
-	}
+	rfkill_switch_all(task->type, task->desired_state);
 
 	mutex_unlock(&task->mutex);
 }
@@ -94,7 +82,6 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
 		.mutex = __MUTEX_INITIALIZER(n.mutex),	\
 		.lock = __SPIN_LOCK_UNLOCKED(n.lock),	\
 		.desired_state = RFKILL_STATE_ON,	\
-		.current_state = RFKILL_STATE_ON,	\
 	}
 
 static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
-- 
cgit v1.2.3-70-g09d2


From 4081f00dc45abce6bdac352a6354c07ce15db45b Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:23:07 -0300
Subject: rfkill: do not allow userspace to override ALL RADIOS OFF

SW_RFKILL_ALL is the "emergency power-off all radios" input event.  It must
be handled, and must always do the same thing as far as the rfkill system
is concerned: all transmitters are to go *immediately* offline.

For safety, do NOT allow userspace to override EV_SW SW_RFKILL_ALL OFF.  As
long as rfkill-input is loaded, that event will *always* be processed, and
it will *always* force all rfkill switches to disable all wireless
transmitters, regardless of user_claim attribute or anything else.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-input.c | 47 ++++++++++++++++++++++++++++++++---------------
 net/rfkill/rfkill-input.h |  1 +
 net/rfkill/rfkill.c       | 18 ++++++++++++++++++
 3 files changed, 51 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index d285f9a9d82..5d4c8b2446f 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -43,11 +43,26 @@ static void rfkill_task_handler(struct work_struct *work)
 	mutex_unlock(&task->mutex);
 }
 
+static void rfkill_task_epo_handler(struct work_struct *work)
+{
+	rfkill_epo();
+}
+
+static DECLARE_WORK(epo_work, rfkill_task_epo_handler);
+
+static void rfkill_schedule_epo(void)
+{
+	schedule_work(&epo_work);
+}
+
 static void rfkill_schedule_set(struct rfkill_task *task,
 				enum rfkill_state desired_state)
 {
 	unsigned long flags;
 
+	if (unlikely(work_pending(&epo_work)))
+		return;
+
 	spin_lock_irqsave(&task->lock, flags);
 
 	if (time_after(jiffies, task->last + msecs_to_jiffies(200))) {
@@ -63,6 +78,9 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
 {
 	unsigned long flags;
 
+	if (unlikely(work_pending(&epo_work)))
+		return;
+
 	spin_lock_irqsave(&task->lock, flags);
 
 	if (time_after(jiffies, task->last + msecs_to_jiffies(200))) {
@@ -114,21 +132,20 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		switch (code) {
 		case SW_RFKILL_ALL:
 			/* EVERY radio type. data != 0 means radios ON */
-			rfkill_schedule_set(&rfkill_wwan,
-					    (data)? RFKILL_STATE_ON:
-						    RFKILL_STATE_OFF);
-			rfkill_schedule_set(&rfkill_wimax,
-					    (data)? RFKILL_STATE_ON:
-						    RFKILL_STATE_OFF);
-			rfkill_schedule_set(&rfkill_uwb,
-					    (data)? RFKILL_STATE_ON:
-						    RFKILL_STATE_OFF);
-			rfkill_schedule_set(&rfkill_bt,
-					    (data)? RFKILL_STATE_ON:
-						    RFKILL_STATE_OFF);
-			rfkill_schedule_set(&rfkill_wlan,
-					    (data)? RFKILL_STATE_ON:
-						    RFKILL_STATE_OFF);
+			/* handle EPO (emergency power off) through shortcut */
+			if (data) {
+				rfkill_schedule_set(&rfkill_wwan,
+						    RFKILL_STATE_ON);
+				rfkill_schedule_set(&rfkill_wimax,
+						    RFKILL_STATE_ON);
+				rfkill_schedule_set(&rfkill_uwb,
+						    RFKILL_STATE_ON);
+				rfkill_schedule_set(&rfkill_bt,
+						    RFKILL_STATE_ON);
+				rfkill_schedule_set(&rfkill_wlan,
+						    RFKILL_STATE_ON);
+			} else
+				rfkill_schedule_epo();
 			break;
 		default:
 			break;
diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h
index 4dae5006fc7..f63d0504568 100644
--- a/net/rfkill/rfkill-input.h
+++ b/net/rfkill/rfkill-input.h
@@ -12,5 +12,6 @@
 #define __RFKILL_INPUT_H
 
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state);
+void rfkill_epo(void);
 
 #endif /* __RFKILL_INPUT_H */
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index dd1c3f18f31..7d07175c407 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -181,6 +181,24 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 }
 EXPORT_SYMBOL(rfkill_switch_all);
 
+/**
+ * rfkill_epo - emergency power off all transmitters
+ *
+ * This kicks all rfkill devices to RFKILL_STATE_OFF, ignoring
+ * everything in its path but rfkill_mutex.
+ */
+void rfkill_epo(void)
+{
+	struct rfkill *rfkill;
+
+	mutex_lock(&rfkill_mutex);
+	list_for_each_entry(rfkill, &rfkill_list, node) {
+		rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF, 1);
+	}
+	mutex_unlock(&rfkill_mutex);
+}
+EXPORT_SYMBOL_GPL(rfkill_epo);
+
 /**
  * rfkill_force_state - Force the internal rfkill radio state
  * @rfkill: pointer to the rfkill class to modify.
-- 
cgit v1.2.3-70-g09d2


From 5005657cbd0fd6f277f807c0612a6b6d4396a02c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 23 Jun 2008 17:46:42 -0300
Subject: rfkill: rename the rfkill_state states and add block-locked state

The current naming of rfkill_state causes a lot of confusion: not only the
"kill" in rfkill suggests negative logic, but also the fact that rfkill cannot
turn anything on (it can just force something off or stop forcing something
off) is often forgotten.

Rename RFKILL_STATE_OFF to RFKILL_STATE_SOFT_BLOCKED (transmitter is blocked
and will not operate; state can be changed by a toggle_radio request), and
RFKILL_STATE_ON to RFKILL_STATE_UNBLOCKED (transmitter is not blocked, and may
operate).

Also, add a new third state, RFKILL_STATE_HARD_BLOCKED (transmitter is blocked
and will not operate; state cannot be changed through a toggle_radio request),
which is used by drivers to indicate a wireless transmiter was blocked by a
hardware rfkill line that accepts no overrides.

Keep the old names as #defines, but document them as deprecated.  This way,
drivers can be converted to the new names *and* verified to actually use rfkill
correctly one by one.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt  | 56 +++++++++++++++++++++++++++++------
 include/linux/rfkill.h    | 32 ++++++++++++++++++--
 net/rfkill/rfkill-input.c | 29 +++++++++---------
 net/rfkill/rfkill.c       | 75 ++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 152 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index cf230c1ad9e..5316cea95ce 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -60,9 +60,20 @@ The second option provides an rfkill input handler. This handler will listen to
 all rfkill key events and will toggle the radio accordingly.  With this option
 enabled userspace could either do nothing or simply perform monitoring tasks.
 
-When a rfkill switch is in the RFKILL_STATE_ON, the wireless transmitter (radio
-TX circuit for example) is *enabled*.  When the rfkill switch is in the
-RFKILL_STATE_OFF, the wireless transmitter is to be *blocked* from operating.
+When a rfkill switch is in the RFKILL_STATE_UNBLOCKED, the wireless transmitter
+(radio TX circuit for example) is *enabled*.  When the rfkill switch is in the
+RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the wireless
+transmitter is to be *blocked* from operating.
+
+RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change
+that state.  RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio()
+will not be able to change the state and will return with a suitable error if
+attempts are made to set the state to RFKILL_STATE_UNBLOCKED.
+
+RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is
+locked in the BLOCKED state by a hardwire rfkill line (typically an input pin
+that, when active, forces the transmitter to be disabled) which the driver
+CANNOT override.
 
 Full rfkill functionality requires two different subsystems to cooperate: the
 input layer and the rfkill class.  The input layer issues *commands* to the
@@ -122,10 +133,10 @@ Userspace input handlers (uevents) or kernel input handlers (rfkill-input):
 	  action).
 	* rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0
 	  (power off all transmitters) in a special way: it ignores any
-	  overrides and local state cache and forces all transmitters to
-	  the OFF state (including those which are already supposed to be
-	  OFF).  Note that the opposite event (power on all transmitters)
-	  is handled normally.
+	  overrides and local state cache and forces all transmitters to the
+	  RFKILL_STATE_SOFT_BLOCKED state (including those which are already
+	  supposed to be BLOCKED).  Note that the opposite event (power on all
+	  transmitters) is handled normally.
 
 Userspace uevent handler or kernel platform-specific drivers hooked to the
 rfkill notifier chain:
@@ -284,6 +295,19 @@ You should:
 	  YOU CAN ACCESS state DIRECTLY)
 	- rfkill_register()
 
+The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through
+a suitable return of get_state() or through rfkill_force_state().
+
+When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch
+it to a different state is through a suitable return of get_state() or through
+rfkill_force_state().
+
+If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED
+when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should
+not return an error.  Instead, it should try to double-block the transmitter,
+so that its state will change from RFKILL_STATE_HARD_BLOCKED to
+RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease.
+
 Please refer to the source for more documentation.
 
 ===============================================================================
@@ -322,13 +346,27 @@ change by writing to the "state" attribute in order for anything to happen.
 
 Take particular care to implement EV_SW SW_RFKILL_ALL properly.  When that
 switch is set to OFF, *every* rfkill device *MUST* be immediately put into the
-OFF state, no questions asked.
+RFKILL_STATE_SOFT_BLOCKED state, no questions asked.
 
 The following sysfs entries will be created:
 
 	name: Name assigned by driver to this key (interface or driver name).
 	type: Name of the key type ("wlan", "bluetooth", etc).
-	state: Current state of the key. 1: On, 0: Off.
+	state: Current state of the transmitter
+		0: RFKILL_STATE_SOFT_BLOCKED
+			transmitter is forced off, but you can override it
+			by a write to the state attribute, or through input
+			events (if rfkill-input is loaded).
+		1: RFKILL_STATE_UNBLOCKED
+			transmiter is NOT forced off, and may operate if
+			all other conditions for such operation are met
+			(such as interface is up and configured, etc).
+		2: RFKILL_STATE_HARD_BLOCKED
+			transmitter is forced off by something outside of
+			the driver's control.
+
+			You cannot set a device to this state through
+			writes to the state attribute.
 	claim: 1: Userspace handles events, 0: Kernel handles events
 
 Both the "state" and "claim" entries are also writable. For the "state" entry
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 98667becdee..c5f6e54ec6a 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -46,16 +46,25 @@ enum rfkill_type {
 };
 
 enum rfkill_state {
-	RFKILL_STATE_OFF	= 0,	/* Radio output blocked */
-	RFKILL_STATE_ON		= 1,	/* Radio output active */
+	RFKILL_STATE_SOFT_BLOCKED = 0,	/* Radio output blocked */
+	RFKILL_STATE_UNBLOCKED    = 1,	/* Radio output allowed */
+	RFKILL_STATE_HARD_BLOCKED = 2,	/* Output blocked, non-overrideable */
 };
 
+/*
+ * These are DEPRECATED, drivers using them should be verified to
+ * comply with the rfkill usage guidelines in Documentation/rfkill.txt
+ * and then converted to use the new names for rfkill_state
+ */
+#define RFKILL_STATE_OFF RFKILL_STATE_SOFT_BLOCKED
+#define RFKILL_STATE_ON  RFKILL_STATE_UNBLOCKED
+
 /**
  * struct rfkill - rfkill control structure.
  * @name: Name of the switch.
  * @type: Radio type which the button controls, the value stored
  *	here should be a value from enum rfkill_type.
- * @state: State of the switch, "ON" means radio can operate.
+ * @state: State of the switch, "UNBLOCKED" means radio can operate.
  * @user_claim_unsupported: Whether the hardware supports exclusive
  *	RF-kill control by userspace. Set this before registering.
  * @user_claim: Set when the switch is controlled exlusively by userspace.
@@ -63,8 +72,12 @@ enum rfkill_state {
  * @data: Pointer to the RF button drivers private data which will be
  *	passed along when toggling radio state.
  * @toggle_radio(): Mandatory handler to control state of the radio.
+ *	only RFKILL_STATE_SOFT_BLOCKED and RFKILL_STATE_UNBLOCKED are
+ *	valid parameters.
  * @get_state(): handler to read current radio state from hardware,
  *      may be called from atomic context, should return 0 on success.
+ *      Either this handler OR judicious use of rfkill_force_state() is
+ *      MANDATORY for any driver capable of RFKILL_STATE_HARD_BLOCKED.
  * @led_trigger: A LED trigger for this button's LED.
  * @dev: Device structure integrating the switch into device tree.
  * @node: Used to place switch into list of all switches known to the
@@ -102,6 +115,19 @@ void rfkill_unregister(struct rfkill *rfkill);
 
 int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state);
 
+/**
+ * rfkill_state_complement - return complementar state
+ * @state: state to return the complement of
+ *
+ * Returns RFKILL_STATE_SOFT_BLOCKED if @state is RFKILL_STATE_UNBLOCKED,
+ * returns RFKILL_STATE_UNBLOCKED otherwise.
+ */
+static inline enum rfkill_state rfkill_state_complement(enum rfkill_state state)
+{
+	return (state == RFKILL_STATE_UNBLOCKED) ?
+		RFKILL_STATE_SOFT_BLOCKED : RFKILL_STATE_UNBLOCKED;
+}
+
 /**
  * rfkill_get_led_name - Get the LED trigger name for the button's LED.
  * This function might return a NULL pointer if registering of the
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 5d4c8b2446f..8aa82273014 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -84,7 +84,8 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
 	spin_lock_irqsave(&task->lock, flags);
 
 	if (time_after(jiffies, task->last + msecs_to_jiffies(200))) {
-		task->desired_state = !task->desired_state;
+		task->desired_state =
+				rfkill_state_complement(task->desired_state);
 		task->last = jiffies;
 		schedule_work(&task->work);
 	}
@@ -92,14 +93,14 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
 	spin_unlock_irqrestore(&task->lock, flags);
 }
 
-#define DEFINE_RFKILL_TASK(n, t)			\
-	struct rfkill_task n = {			\
-		.work = __WORK_INITIALIZER(n.work,	\
-				rfkill_task_handler),	\
-		.type = t,				\
-		.mutex = __MUTEX_INITIALIZER(n.mutex),	\
-		.lock = __SPIN_LOCK_UNLOCKED(n.lock),	\
-		.desired_state = RFKILL_STATE_ON,	\
+#define DEFINE_RFKILL_TASK(n, t)				\
+	struct rfkill_task n = {				\
+		.work = __WORK_INITIALIZER(n.work,		\
+				rfkill_task_handler),		\
+		.type = t,					\
+		.mutex = __MUTEX_INITIALIZER(n.mutex),		\
+		.lock = __SPIN_LOCK_UNLOCKED(n.lock),		\
+		.desired_state = RFKILL_STATE_UNBLOCKED,	\
 	}
 
 static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
@@ -135,15 +136,15 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 			/* handle EPO (emergency power off) through shortcut */
 			if (data) {
 				rfkill_schedule_set(&rfkill_wwan,
-						    RFKILL_STATE_ON);
+						    RFKILL_STATE_UNBLOCKED);
 				rfkill_schedule_set(&rfkill_wimax,
-						    RFKILL_STATE_ON);
+						    RFKILL_STATE_UNBLOCKED);
 				rfkill_schedule_set(&rfkill_uwb,
-						    RFKILL_STATE_ON);
+						    RFKILL_STATE_UNBLOCKED);
 				rfkill_schedule_set(&rfkill_bt,
-						    RFKILL_STATE_ON);
+						    RFKILL_STATE_UNBLOCKED);
 				rfkill_schedule_set(&rfkill_wlan,
-						    RFKILL_STATE_ON);
+						    RFKILL_STATE_UNBLOCKED);
 			} else
 				rfkill_schedule_epo();
 			break;
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 7d07175c407..ce0e23148cd 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -39,7 +39,7 @@ MODULE_LICENSE("GPL");
 static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
 static DEFINE_MUTEX(rfkill_mutex);
 
-static unsigned int rfkill_default_state = RFKILL_STATE_ON;
+static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED;
 module_param_named(default_state, rfkill_default_state, uint, 0444);
 MODULE_PARM_DESC(default_state,
 		 "Default initial state for all radio types, 0 = radio off");
@@ -98,7 +98,7 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 
 	if (!led->name)
 		return;
-	if (state == RFKILL_STATE_OFF)
+	if (state != RFKILL_STATE_UNBLOCKED)
 		led_trigger_event(led, LED_OFF);
 	else
 		led_trigger_event(led, LED_FULL);
@@ -128,6 +128,28 @@ static void update_rfkill_state(struct rfkill *rfkill)
 	}
 }
 
+/**
+ * rfkill_toggle_radio - wrapper for toggle_radio hook
+ * calls toggle_radio taking into account a lot of "small"
+ * details.
+ * @rfkill: the rfkill struct to use
+ * @force: calls toggle_radio even if cache says it is not needed,
+ *	and also makes sure notifications of the state will be
+ *	sent even if it didn't change
+ * @state: the new state to call toggle_radio() with
+ *
+ * This wrappen protects and enforces the API for toggle_radio
+ * calls.  Note that @force cannot override a (possibly cached)
+ * state of RFKILL_STATE_HARD_BLOCKED.  Any device making use of
+ * RFKILL_STATE_HARD_BLOCKED implements either get_state() or
+ * rfkill_force_state(), so the cache either is bypassed or valid.
+ *
+ * Note that we do call toggle_radio for RFKILL_STATE_SOFT_BLOCKED
+ * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to
+ * give the driver a hint that it should double-BLOCK the transmitter.
+ *
+ * Caller must have aquired rfkill_mutex.
+ */
 static int rfkill_toggle_radio(struct rfkill *rfkill,
 				enum rfkill_state state,
 				int force)
@@ -141,9 +163,28 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 	    !rfkill->get_state(rfkill->data, &newstate))
 		rfkill->state = newstate;
 
+	switch (state) {
+	case RFKILL_STATE_HARD_BLOCKED:
+		/* typically happens when refreshing hardware state,
+		 * such as on resume */
+		state = RFKILL_STATE_SOFT_BLOCKED;
+		break;
+	case RFKILL_STATE_UNBLOCKED:
+		/* force can't override this, only rfkill_force_state() can */
+		if (rfkill->state == RFKILL_STATE_HARD_BLOCKED)
+			return -EPERM;
+		break;
+	case RFKILL_STATE_SOFT_BLOCKED:
+		/* nothing to do, we want to give drivers the hint to double
+		 * BLOCK even a transmitter that is already in state
+		 * RFKILL_STATE_HARD_BLOCKED */
+		break;
+	}
+
 	if (force || state != rfkill->state) {
 		retval = rfkill->toggle_radio(rfkill->data, state);
-		if (!retval)
+		/* never allow a HARD->SOFT downgrade! */
+		if (!retval && rfkill->state != RFKILL_STATE_HARD_BLOCKED)
 			rfkill->state = state;
 	}
 
@@ -184,7 +225,7 @@ EXPORT_SYMBOL(rfkill_switch_all);
 /**
  * rfkill_epo - emergency power off all transmitters
  *
- * This kicks all rfkill devices to RFKILL_STATE_OFF, ignoring
+ * This kicks all rfkill devices to RFKILL_STATE_SOFT_BLOCKED, ignoring
  * everything in its path but rfkill_mutex.
  */
 void rfkill_epo(void)
@@ -193,7 +234,7 @@ void rfkill_epo(void)
 
 	mutex_lock(&rfkill_mutex);
 	list_for_each_entry(rfkill, &rfkill_list, node) {
-		rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF, 1);
+		rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
 	}
 	mutex_unlock(&rfkill_mutex);
 }
@@ -215,8 +256,9 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 {
 	enum rfkill_state oldstate;
 
-	if (state != RFKILL_STATE_OFF &&
-	    state != RFKILL_STATE_ON)
+	if (state != RFKILL_STATE_SOFT_BLOCKED &&
+	    state != RFKILL_STATE_UNBLOCKED &&
+	    state != RFKILL_STATE_HARD_BLOCKED)
 		return -EINVAL;
 
 	mutex_lock(&rfkill->mutex);
@@ -290,11 +332,14 @@ static ssize_t rfkill_state_store(struct device *dev,
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	/* RFKILL_STATE_HARD_BLOCKED is illegal here... */
+	if (state != RFKILL_STATE_UNBLOCKED &&
+	    state != RFKILL_STATE_SOFT_BLOCKED)
+		return -EINVAL;
+
 	if (mutex_lock_interruptible(&rfkill->mutex))
 		return -ERESTARTSYS;
-	error = rfkill_toggle_radio(rfkill,
-			state ? RFKILL_STATE_ON : RFKILL_STATE_OFF,
-			0);
+	error = rfkill_toggle_radio(rfkill, state, 0);
 	mutex_unlock(&rfkill->mutex);
 
 	return error ? error : count;
@@ -373,7 +418,8 @@ static int rfkill_suspend(struct device *dev, pm_message_t state)
 			update_rfkill_state(rfkill);
 
 			mutex_lock(&rfkill->mutex);
-			rfkill->toggle_radio(rfkill->data, RFKILL_STATE_OFF);
+			rfkill->toggle_radio(rfkill->data,
+						RFKILL_STATE_SOFT_BLOCKED);
 			mutex_unlock(&rfkill->mutex);
 		}
 
@@ -470,7 +516,7 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
 {
 	mutex_lock(&rfkill_mutex);
 	list_del_init(&rfkill->node);
-	rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF, 1);
+	rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
 	mutex_unlock(&rfkill_mutex);
 }
 
@@ -610,8 +656,9 @@ static int __init rfkill_init(void)
 	int error;
 	int i;
 
-	if (rfkill_default_state != RFKILL_STATE_OFF &&
-	    rfkill_default_state != RFKILL_STATE_ON)
+	/* RFKILL_STATE_HARD_BLOCKED is illegal here... */
+	if (rfkill_default_state != RFKILL_STATE_SOFT_BLOCKED &&
+	    rfkill_default_state != RFKILL_STATE_UNBLOCKED)
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(rfkill_states); i++)
-- 
cgit v1.2.3-70-g09d2


From f2df38596a81b6c24f4586b0b4befeaebf3e02db Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Sun, 15 Jun 2008 18:23:29 +0300
Subject: mac80211: 11h Infrastructure - Parsing

This patch introduces parsing of 11h and 11d related elements from incoming
management frames.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 54 +++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/ieee80211_i.h |  9 ++++++++
 net/mac80211/mlme.c        | 19 ++++++++++++++++
 3 files changed, 79 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2998e3b5f16..8546f09e462 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -469,6 +469,40 @@ struct ieee80211s_hdr {
 	u8 eaddr3[6];
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_quiet_ie
+ *
+ * This structure refers to "Quiet information element"
+ */
+struct ieee80211_quiet_ie {
+	u8 count;
+	u8 period;
+	__le16 duration;
+	__le16 offset;
+} __attribute__ ((packed));
+
+/**
+ * struct ieee80211_msrment_ie
+ *
+ * This structure refers to "Measurement Request/Report information element"
+ */
+struct ieee80211_msrment_ie {
+	u8 token;
+	u8 mode;
+	u8 type;
+	u8 request[0];
+} __attribute__ ((packed));
+
+/**
+ * struct ieee80211_channel_sw_ie
+ *
+ * This structure refers to "Channel Switch Announcement information element"
+ */
+struct ieee80211_channel_sw_ie {
+	u8 mode;
+	u8 new_ch_num;
+	u8 count;
+} __attribute__ ((packed));
 
 struct ieee80211_mgmt {
 	__le16 frame_control;
@@ -544,10 +578,15 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 element_id;
 					u8 length;
-					u8 switch_mode;
-					u8 new_chan;
-					u8 switch_count;
+					struct ieee80211_channel_sw_ie sw_elem;
 				} __attribute__((packed)) chan_switch;
+				struct{
+					u8 action_code;
+					u8 dialog_token;
+					u8 element_id;
+					u8 length;
+					struct ieee80211_msrment_ie msr_elem;
+				} __attribute__((packed)) measurement;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
@@ -875,6 +914,15 @@ enum ieee80211_category {
 	WLAN_CATEGORY_WMM = 17,
 };
 
+/* SPECTRUM_MGMT action code */
+enum ieee80211_spectrum_mgmt_actioncode {
+	WLAN_ACTION_SPCT_MSR_REQ = 0,
+	WLAN_ACTION_SPCT_MSR_RPRT = 1,
+	WLAN_ACTION_SPCT_TPC_REQ = 2,
+	WLAN_ACTION_SPCT_TPC_RPRT = 3,
+	WLAN_ACTION_SPCT_CHL_SWITCH = 4,
+};
+
 /* BACK action code */
 enum ieee80211_back_actioncode {
 	WLAN_ACTION_ADDBA_REQ = 0,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 80a9e7c07b4..af352c05c98 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -791,6 +791,10 @@ struct ieee802_11_elems {
 	u8 *preq;
 	u8 *prep;
 	u8 *perr;
+	u8 *ch_switch_elem;
+	u8 *country_elem;
+	u8 *pwr_constr_elem;
+	u8 *quiet_elem; 	/* first quite element */
 
 	/* length of them, respectively */
 	u8 ssid_len;
@@ -815,6 +819,11 @@ struct ieee802_11_elems {
 	u8 preq_len;
 	u8 prep_len;
 	u8 perr_len;
+	u8 ch_switch_elem_len;
+	u8 country_elem_len;
+	u8 pwr_constr_elem_len;
+	u8 quiet_elem_len;
+	u8 num_of_quiet_elem;	/* can be more the one */
 };
 
 static inline struct ieee80211_local *hw_to_local(
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e06d6450f21..32453561fe3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -204,6 +204,25 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->perr = pos;
 			elems->perr_len = elen;
 			break;
+		case WLAN_EID_CHANNEL_SWITCH:
+			elems->ch_switch_elem = pos;
+			elems->ch_switch_elem_len = elen;
+			break;
+		case WLAN_EID_QUIET:
+			if (!elems->quiet_elem) {
+				elems->quiet_elem = pos;
+				elems->quiet_elem_len = elen;
+			}
+			elems->num_of_quiet_elem++;
+			break;
+		case WLAN_EID_COUNTRY:
+			elems->country_elem = pos;
+			elems->country_elem_len = elen;
+			break;
+		case WLAN_EID_PWR_CONSTRAINT:
+			elems->pwr_constr_elem = pos;
+			elems->pwr_constr_elem_len = elen;
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3-70-g09d2


From b662348662f9661f9259c7186c1bdb65620045f1 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Mon, 16 Jun 2008 16:09:49 +0300
Subject: mac80211: 11h - Handling measurement request

This patch handles the 11h measurement request information element.
This is minimal requested implementation - refuse measurement.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 10 ++++++
 net/mac80211/mlme.c       | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8546f09e462..cffd6d0094f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -739,11 +739,21 @@ struct ieee80211_ht_addt_info {
 #define WLAN_CAPABILITY_SHORT_PREAMBLE	(1<<5)
 #define WLAN_CAPABILITY_PBCC		(1<<6)
 #define WLAN_CAPABILITY_CHANNEL_AGILITY	(1<<7)
+
 /* 802.11h */
 #define WLAN_CAPABILITY_SPECTRUM_MGMT	(1<<8)
 #define WLAN_CAPABILITY_QOS		(1<<9)
 #define WLAN_CAPABILITY_SHORT_SLOT_TIME	(1<<10)
 #define WLAN_CAPABILITY_DSSS_OFDM	(1<<13)
+/* measurement */
+#define IEEE80211_SPCT_MSR_RPRT_MODE_LATE	(1<<0)
+#define IEEE80211_SPCT_MSR_RPRT_MODE_INCAPABLE	(1<<1)
+#define IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED	(1<<2)
+
+#define IEEE80211_SPCT_MSR_RPRT_TYPE_BASIC	0
+#define IEEE80211_SPCT_MSR_RPRT_TYPE_CCA	1
+#define IEEE80211_SPCT_MSR_RPRT_TYPE_RPI	2
+
 
 /* 802.11g ERP information element */
 #define WLAN_ERP_NON_ERP_PRESENT (1<<0)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 32453561fe3..6d9a4facf9d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1720,6 +1720,71 @@ void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr)
 	}
 }
 
+static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
+					struct ieee80211_msrment_ie *request_ie,
+					const u8 *da, const u8 *bssid,
+					u8 dialog_token)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *msr_report;
+
+	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
+				sizeof(struct ieee80211_msrment_ie));
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer for "
+				"measurement report frame\n", dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
+	memset(msr_report, 0, 24);
+	memcpy(msr_report->da, da, ETH_ALEN);
+	memcpy(msr_report->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(msr_report->bssid, bssid, ETH_ALEN);
+	msr_report->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+						IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
+	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
+	msr_report->u.action.u.measurement.action_code =
+				WLAN_ACTION_SPCT_MSR_RPRT;
+	msr_report->u.action.u.measurement.dialog_token = dialog_token;
+
+	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
+	msr_report->u.action.u.measurement.length =
+			sizeof(struct ieee80211_msrment_ie);
+
+	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
+		sizeof(struct ieee80211_msrment_ie));
+	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
+	msr_report->u.action.u.measurement.msr_elem.mode |=
+			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
+	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
+
+	ieee80211_sta_tx(dev, skb, 0);
+}
+
+static void ieee80211_sta_process_measurement_req(struct net_device *dev,
+						struct ieee80211_mgmt *mgmt,
+						size_t len)
+{
+	/*
+	 * Ignoring measurement request is spec violation.
+	 * Mandatory measurements must be reported optional
+	 * measurements might be refused or reported incapable
+	 * For now just refuse
+	 * TODO: Answer basic measurement as unmeasured
+	 */
+	ieee80211_send_refuse_measurement_request(dev,
+			&mgmt->u.action.u.measurement.msr_elem,
+			mgmt->sa, mgmt->bssid,
+			mgmt->u.action.u.measurement.dialog_token);
+}
+
+
 static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 				   struct ieee80211_if_sta *ifsta,
 				   struct ieee80211_mgmt *mgmt,
@@ -3044,11 +3109,24 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 				     struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 
 	if (len < IEEE80211_MIN_ACTION_SIZE)
 		return;
 
 	switch (mgmt->u.action.category) {
+	case WLAN_CATEGORY_SPECTRUM_MGMT:
+		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
+			break;
+		switch (mgmt->u.action.u.chan_switch.action_code) {
+		case WLAN_ACTION_SPCT_MSR_REQ:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.measurement)))
+				break;
+			ieee80211_sta_process_measurement_req(dev, mgmt, len);
+			break;
+		}
+		break;
 	case WLAN_CATEGORY_BACK:
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
-- 
cgit v1.2.3-70-g09d2


From 135a2110c55c71d7ccaf5ac66968b993347fe8e2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 16 Jun 2008 20:55:29 +0200
Subject: mac80211: remove shared key todo

Adding shared key authentication is not going to happen anyway.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6d9a4facf9d..75c4e9743fb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1837,11 +1837,12 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 	       auth_transaction, status_code);
 
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		/* IEEE 802.11 standard does not require authentication in IBSS
+		/*
+		 * IEEE 802.11 standard does not require authentication in IBSS
 		 * networks and most implementations do not seem to use it.
 		 * However, try to reply to authentication attempts if someone
 		 * has actually implemented this.
-		 * TODO: Could implement shared key authentication. */
+		 */
 		if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) {
 			printk(KERN_DEBUG "%s: unexpected IBSS authentication "
 			       "frame (alg=%d transaction=%d)\n",
-- 
cgit v1.2.3-70-g09d2


From 5a9f7b047e81a73a1ce3e42ef87c28a61fd4df24 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 18 Jun 2008 14:58:09 +0200
Subject: mac80211: use separate spinlock for sta flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

David Ellingsworth posted a bug that was only noticable on UP/NO-PREEMPT
and Michael correctly analysed it to be a spin_lock_bh() section within
a spin_lock_irqsave() section. This adds a separate spinlock for the
sta_info flags to fix that issue and avoid having to take much care
about where the sta flag manipulation functions are called.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reported-By: ﻿David Ellingsworth <david@identd.dyndns.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c |  1 +
 net/mac80211/sta_info.h | 40 +++++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c24770cb02c..b3c733162fc 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -235,6 +235,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 
 	spin_lock_init(&sta->lock);
+	spin_lock_init(&sta->flaglock);
 
 	memcpy(sta->addr, addr, ETH_ALEN);
 	sta->local = local;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 95753f860ac..fd228c198e3 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -164,6 +164,7 @@ struct sta_ampdu_mlme {
  * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
  *	only used in AP (and IBSS?) mode
  * @flags: STA flags, see &enum ieee80211_sta_info_flags
+ * @flaglock: spinlock for flags accesses
  * @ps_tx_buf: buffer of frames to transmit to this station
  *	when it leaves power saving state
  * @tx_filtered: buffer of frames we already tried to transmit
@@ -186,6 +187,7 @@ struct sta_info {
 	struct rate_control_ref *rate_ctrl;
 	void *rate_ctrl_priv;
 	spinlock_t lock;
+	spinlock_t flaglock;
 	struct ieee80211_ht_info ht_info;
 	u64 supp_rates[IEEE80211_NUM_BANDS];
 	u8 addr[ETH_ALEN];
@@ -198,7 +200,10 @@ struct sta_info {
 	 */
 	u8 pin_status;
 
-	/* frequently updated information, locked with lock spinlock */
+	/*
+	 * frequently updated, locked with own spinlock (flaglock),
+	 * use the accessors defined below
+	 */
 	u32 flags;
 
 	/*
@@ -293,34 +298,41 @@ static inline enum plink_state sta_plink_state(struct sta_info *sta)
 
 static inline void set_sta_flags(struct sta_info *sta, const u32 flags)
 {
-	spin_lock_bh(&sta->lock);
+	unsigned long irqfl;
+
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	sta->flags |= flags;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 }
 
 static inline void clear_sta_flags(struct sta_info *sta, const u32 flags)
 {
-	spin_lock_bh(&sta->lock);
+	unsigned long irqfl;
+
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	sta->flags &= ~flags;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 }
 
 static inline void set_and_clear_sta_flags(struct sta_info *sta,
 					   const u32 set, const u32 clear)
 {
-	spin_lock_bh(&sta->lock);
+	unsigned long irqfl;
+
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	sta->flags |= set;
 	sta->flags &= ~clear;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 }
 
 static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags)
 {
 	u32 ret;
+	unsigned long irqfl;
 
-	spin_lock_bh(&sta->lock);
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	ret = sta->flags & flags;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 
 	return ret;
 }
@@ -329,11 +341,12 @@ static inline u32 test_and_clear_sta_flags(struct sta_info *sta,
 					   const u32 flags)
 {
 	u32 ret;
+	unsigned long irqfl;
 
-	spin_lock_bh(&sta->lock);
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	ret = sta->flags & flags;
 	sta->flags &= ~flags;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 
 	return ret;
 }
@@ -341,10 +354,11 @@ static inline u32 test_and_clear_sta_flags(struct sta_info *sta,
 static inline u32 get_sta_flags(struct sta_info *sta)
 {
 	u32 ret;
+	unsigned long irqfl;
 
-	spin_lock_bh(&sta->lock);
+	spin_lock_irqsave(&sta->flaglock, irqfl);
 	ret = sta->flags;
-	spin_unlock_bh(&sta->lock);
+	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From 97b045d62bffae5a91a286b56ac51db0c4385687 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 20 Jun 2008 01:22:30 +0200
Subject: mac80211: add single function calling tx handlers

This modifies mac80211 to only have a single function calling the
TX handlers rather than them being invoked in multiple places.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 82 +++++++++++++++++++++++++++----------------------------
 1 file changed, 40 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ce06e791bf4..7a14a39ebd7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1083,13 +1083,46 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 	return IEEE80211_TX_OK;
 }
 
+/*
+ * Invoke TX handlers, return 0 on success and non-zero if the
+ * frame was dropped or queued.
+ */
+static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_local *local = tx->local;
+	struct sk_buff *skb = tx->skb;
+	ieee80211_tx_handler *handler;
+	ieee80211_tx_result res = TX_DROP;
+	int i;
+
+	for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) {
+		res = (*handler)(tx);
+		if (res != TX_CONTINUE)
+			break;
+	}
+
+	if (unlikely(res == TX_DROP)) {
+		I802_DEBUG_INC(local->tx_handlers_drop);
+		dev_kfree_skb(skb);
+		for (i = 0; i < tx->num_extra_frag; i++)
+			if (tx->extra_frag[i])
+				dev_kfree_skb(tx->extra_frag[i]);
+		kfree(tx->extra_frag);
+		return -1;
+	} else if (unlikely(res == TX_QUEUED)) {
+		I802_DEBUG_INC(local->tx_handlers_queued);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct sta_info *sta;
-	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
-	ieee80211_tx_result res = TX_DROP, res_prepare;
+	ieee80211_tx_result res_prepare;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret, i;
 	u16 queue;
@@ -1118,26 +1151,8 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 	tx.channel = local->hw.conf.channel;
 	info->band = tx.channel->band;
 
-	for (handler = ieee80211_tx_handlers; *handler != NULL;
-	     handler++) {
-		res = (*handler)(&tx);
-		if (res != TX_CONTINUE)
-			break;
-	}
-
-	if (WARN_ON(tx.skb != skb))
-		goto drop;
-
-	if (unlikely(res == TX_DROP)) {
-		I802_DEBUG_INC(local->tx_handlers_drop);
-		goto drop;
-	}
-
-	if (unlikely(res == TX_QUEUED)) {
-		I802_DEBUG_INC(local->tx_handlers_queued);
-		rcu_read_unlock();
-		return 0;
-	}
+	if (invoke_tx_handlers(&tx))
+		goto out;
 
 	if (tx.extra_frag) {
 		for (i = 0; i < tx.num_extra_frag; i++) {
@@ -1198,6 +1213,7 @@ retry:
 		store->last_frag_rate_ctrl_probe =
 			!!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG);
 	}
+ out:
 	rcu_read_unlock();
 	return 0;
 
@@ -1948,9 +1964,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb = NULL;
 	struct sta_info *sta;
-	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
-	ieee80211_tx_result res = TX_DROP;
 	struct net_device *bdev;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_ap *bss = NULL;
@@ -2001,25 +2015,9 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	tx.channel = local->hw.conf.channel;
 	info->band = tx.channel->band;
 
-	for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) {
-		res = (*handler)(&tx);
-		if (res == TX_DROP || res == TX_QUEUED)
-			break;
-	}
-
-	if (WARN_ON(tx.skb != skb))
-		res = TX_DROP;
-
-	if (res == TX_DROP) {
-		I802_DEBUG_INC(local->tx_handlers_drop);
-		dev_kfree_skb(skb);
-		skb = NULL;
-	} else if (res == TX_QUEUED) {
-		I802_DEBUG_INC(local->tx_handlers_queued);
+	if (invoke_tx_handlers(&tx))
 		skb = NULL;
-	}
-
-out:
+ out:
 	rcu_read_unlock();
 
 	return skb;
-- 
cgit v1.2.3-70-g09d2


From 9ae705cfd390f9077eec856ea4dff374d166de33 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 20 Jun 2008 11:54:24 +0200
Subject: mac80211: rename TKIP debugging Kconfig symbol

... to MAC80211_TKIP_DEBUG rather than TKIP_DEBUG.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig |  2 +-
 net/mac80211/tkip.c  | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 590e00b2766..0d3661d9b6a 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -150,7 +150,7 @@ config MAC80211_LOWTX_FRAME_DUMP
 	  If unsure, say N and insert the debugging code
 	  you require into the driver you are debugging.
 
-config TKIP_DEBUG
+config MAC80211_TKIP_DEBUG
 	bool "TKIP debugging"
 	depends on MAC80211_DEBUG
 
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index e710243d82e..69980788998 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -167,7 +167,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
 	ctx = &key->u.tkip.tx;
 
-#ifdef CONFIG_TKIP_DEBUG
+#ifdef CONFIG_MAC80211_TKIP_DEBUG
 	printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n",
 			iv16, iv32);
 
@@ -177,7 +177,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 		printk(KERN_DEBUG "Wrap around of iv16 in the middle of a "
 			"fragmented packet\n");
 	}
-#endif /* CONFIG_TKIP_DEBUG */
+#endif
 
 	/* Update the p1k only when the iv16 in the packet wraps around, this
 	 * might occur after the wrap around of iv16 in the key in case of
@@ -240,7 +240,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	keyid = pos[3];
 	iv32 = get_unaligned_le32(pos + 4);
 	pos += 8;
-#ifdef CONFIG_TKIP_DEBUG
+#ifdef CONFIG_MAC80211_TKIP_DEBUG
 	{
 		int i;
 		printk(KERN_DEBUG "TKIP decrypt: data(len=%zd)", payload_len);
@@ -250,7 +250,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 		printk(KERN_DEBUG "TKIP decrypt: iv16=%04x iv32=%08x\n",
 		       iv16, iv32);
 	}
-#endif /* CONFIG_TKIP_DEBUG */
+#endif
 
 	if (!(keyid & (1 << 5)))
 		return TKIP_DECRYPT_NO_EXT_IV;
@@ -262,14 +262,14 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	    (iv32 < key->u.tkip.rx[queue].iv32 ||
 	     (iv32 == key->u.tkip.rx[queue].iv32 &&
 	      iv16 <= key->u.tkip.rx[queue].iv16))) {
-#ifdef CONFIG_TKIP_DEBUG
+#ifdef CONFIG_MAC80211_TKIP_DEBUG
 		DECLARE_MAC_BUF(mac);
 		printk(KERN_DEBUG "TKIP replay detected for RX frame from "
 		       "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n",
 		       print_mac(mac, ta),
 		       iv32, iv16, key->u.tkip.rx[queue].iv32,
 		       key->u.tkip.rx[queue].iv16);
-#endif /* CONFIG_TKIP_DEBUG */
+#endif
 		return TKIP_DECRYPT_REPLAY;
 	}
 
@@ -283,7 +283,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	    key->u.tkip.rx[queue].iv32 != iv32) {
 		/* IV16 wrapped around - perform TKIP phase 1 */
 		tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32);
-#ifdef CONFIG_TKIP_DEBUG
+#ifdef CONFIG_MAC80211_TKIP_DEBUG
 		{
 			int i;
 			DECLARE_MAC_BUF(mac);
@@ -299,7 +299,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 				printk("%04x ", key->u.tkip.rx[queue].p1k[i]);
 			printk("\n");
 		}
-#endif /* CONFIG_TKIP_DEBUG */
+#endif
 		if (key->local->ops->update_tkip_key &&
 			key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 			u8 bcast[ETH_ALEN] =
@@ -316,7 +316,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	}
 
 	tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key);
-#ifdef CONFIG_TKIP_DEBUG
+#ifdef CONFIG_MAC80211_TKIP_DEBUG
 	{
 		int i;
 		printk(KERN_DEBUG "TKIP decrypt: Phase2 rc4key=");
@@ -324,7 +324,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 			printk("%02x ", rc4key[i]);
 		printk("\n");
 	}
-#endif /* CONFIG_TKIP_DEBUG */
+#endif
 
 	res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
  done:
-- 
cgit v1.2.3-70-g09d2


From ffd7891dc909b3648e87f7cf8f84a6dc12fc1cc6 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Sat, 21 Jun 2008 10:02:46 -0400
Subject: mac80211: Let drivers have access to TKIP key offets for TX and RX
 MIC

Some drivers may want to to use the TKIP key offsets for TX and RX
MIC so lets move this out. Lets also clear up a bit how this is used
internally in mac80211.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  5 ++++-
 include/net/mac80211.h  |  7 ++++++-
 net/mac80211/key.h      | 37 ++++++++++++-------------------------
 net/mac80211/tkip.c     | 10 +++++-----
 net/mac80211/wpa.c      | 20 ++++++++++++++------
 5 files changed, 41 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index aa8411e2a16..2be7c63bc0f 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -241,7 +241,10 @@ enum nl80211_attrs {
 	NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
 };
 
-#define NL80211_MAX_SUPP_RATES	32
+#define NL80211_MAX_SUPP_RATES			32
+#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
+#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY	16
+#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
 
 /**
  * enum nl80211_iftype - (virtual) interface types
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7ab4ff6159a..19f1e412a0f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -595,7 +595,12 @@ enum ieee80211_key_flags {
  * @flags: key flags, see &enum ieee80211_key_flags.
  * @keyidx: the key index (0-3)
  * @keylen: key material length
- * @key: key material
+ * @key: key material. For ALG_TKIP the key is encoded as a 256-bit (32 byte)
+ * 	data block:
+ * 	- Temporal Encryption Key (128 bits)
+ * 	- Temporal Authenticator Tx MIC Key (64 bits)
+ * 	- Temporal Authenticator Rx MIC Key (64 bits)
+ *
  */
 struct ieee80211_key_conf {
 	enum ieee80211_key_alg alg;
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index a0f774aafa4..425816e0996 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -16,31 +16,18 @@
 #include <linux/rcupdate.h>
 #include <net/mac80211.h>
 
-/* ALG_TKIP
- * struct ieee80211_key::key is encoded as a 256-bit (32 byte) data block:
- * Temporal Encryption Key (128 bits)
- * Temporal Authenticator Tx MIC Key (64 bits)
- * Temporal Authenticator Rx MIC Key (64 bits)
- */
-
-#define WEP_IV_LEN 4
-#define WEP_ICV_LEN 4
-
-#define ALG_TKIP_KEY_LEN 32
-/* Starting offsets for each key */
-#define ALG_TKIP_TEMP_ENCR_KEY 0
-#define ALG_TKIP_TEMP_AUTH_TX_MIC_KEY 16
-#define ALG_TKIP_TEMP_AUTH_RX_MIC_KEY 24
-#define TKIP_IV_LEN 8
-#define TKIP_ICV_LEN 4
-
-#define ALG_CCMP_KEY_LEN 16
-#define CCMP_HDR_LEN 8
-#define CCMP_MIC_LEN 8
-#define CCMP_TK_LEN 16
-#define CCMP_PN_LEN 6
-
-#define NUM_RX_DATA_QUEUES 17
+#define WEP_IV_LEN		4
+#define WEP_ICV_LEN		4
+#define ALG_TKIP_KEY_LEN	32
+#define ALG_CCMP_KEY_LEN	16
+#define CCMP_HDR_LEN		8
+#define CCMP_MIC_LEN		8
+#define CCMP_TK_LEN		16
+#define CCMP_PN_LEN		6
+#define TKIP_IV_LEN		8
+#define TKIP_ICV_LEN		4
+
+#define NUM_RX_DATA_QUEUES	17
 
 struct ieee80211_local;
 struct ieee80211_sub_if_data;
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 69980788998..995f7af3d25 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -164,7 +164,7 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 	iv16 = data[2] | (data[0] << 8);
 	iv32 = get_unaligned_le32(&data[4]);
 
-	tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+	tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
 	ctx = &key->u.tkip.tx;
 
 #ifdef CONFIG_MAC80211_TKIP_DEBUG
@@ -205,7 +205,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
 {
 	u8 rc4key[16];
 	struct tkip_ctx *ctx = &key->u.tkip.tx;
-	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+	const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
 
 	/* Calculate per-packet key */
 	if (ctx->iv16 == 0 || !ctx->initialized)
@@ -231,7 +231,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	u32 iv16;
 	u8 rc4key[16], keyid, *pos = payload;
 	int res;
-	const u8 *tk = &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY];
+	const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
 
 	if (payload_len < 12)
 		return -1;
@@ -286,13 +286,13 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 #ifdef CONFIG_MAC80211_TKIP_DEBUG
 		{
 			int i;
+			u8 key_offset = NL80211_TKIP_DATA_OFFSET_ENCR_KEY;
 			DECLARE_MAC_BUF(mac);
 			printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s"
 			       " TK=", print_mac(mac, ta));
 			for (i = 0; i < 16; i++)
 				printk("%02x ",
-				       key->conf.key[
-						ALG_TKIP_TEMP_ENCR_KEY + i]);
+				       key->conf.key[key_offset + i]);
 			printk("\n");
 			printk(KERN_DEBUG "TKIP decrypt: P1K=");
 			for (i = 0; i < 5; i++)
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 345e10e9b31..f809761fbfb 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -49,7 +49,7 @@ static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da,
 ieee80211_tx_result
 ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 {
-	u8 *data, *sa, *da, *key, *mic, qos_tid;
+	u8 *data, *sa, *da, *key, *mic, qos_tid, key_offset;
 	size_t data_len;
 	u16 fc;
 	struct sk_buff *skb = tx->skb;
@@ -88,8 +88,12 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 #else
 	authenticator = 1;
 #endif
-	key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY :
-				 ALG_TKIP_TEMP_AUTH_RX_MIC_KEY];
+	/* At this point we know we're using ALG_TKIP. To get the MIC key
+	 * we now will rely on the offset from the ieee80211_key_conf::key */
+	key_offset = authenticator ?
+		NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY :
+		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY;
+	key = &tx->key->conf.key[key_offset];
 	mic = skb_put(skb, MICHAEL_MIC_LEN);
 	michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
 
@@ -100,7 +104,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 ieee80211_rx_result
 ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 {
-	u8 *data, *sa, *da, *key = NULL, qos_tid;
+	u8 *data, *sa, *da, *key = NULL, qos_tid, key_offset;
 	size_t data_len;
 	u16 fc;
 	u8 mic[MICHAEL_MIC_LEN];
@@ -131,8 +135,12 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 #else
 	authenticator = 1;
 #endif
-	key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY :
-				 ALG_TKIP_TEMP_AUTH_TX_MIC_KEY];
+	/* At this point we know we're using ALG_TKIP. To get the MIC key
+	 * we now will rely on the offset from the ieee80211_key_conf::key */
+	key_offset = authenticator ?
+		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY :
+		NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY;
+	key = &rx->key->conf.key[key_offset];
 	michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
 	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
 		if (!(rx->flags & IEEE80211_RX_RA_MATCH))
-- 
cgit v1.2.3-70-g09d2


From 70217d7f83a12e0a16c726951ddf8f7f0ba7d1a4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 22 Jun 2008 16:45:23 -0700
Subject: mac80211: wep.c use new frame control helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wep.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index e7b6344c900..35b664d00e2 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -84,20 +84,17 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
 				struct sk_buff *skb,
 				struct ieee80211_key *key)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u16 fc;
-	int hdrlen;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 	u8 *newhdr;
 
-	fc = le16_to_cpu(hdr->frame_control);
-	fc |= IEEE80211_FCTL_PROTECTED;
-	hdr->frame_control = cpu_to_le16(fc);
+	hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 
 	if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN ||
 		    skb_headroom(skb) < WEP_IV_LEN))
 		return NULL;
 
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	newhdr = skb_push(skb, WEP_IV_LEN);
 	memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen);
 	ieee80211_wep_get_iv(local, key, newhdr + hdrlen);
@@ -109,12 +106,10 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
 				    struct sk_buff *skb,
 				    struct ieee80211_key *key)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u16 fc;
-	int hdrlen;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen);
 	skb_pull(skb, WEP_IV_LEN);
 }
@@ -224,17 +219,15 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
 	u32 klen;
 	u8 *rc4key;
 	u8 keyidx;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u16 fc;
-	int hdrlen;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 	size_t len;
 	int ret = 0;
 
-	fc = le16_to_cpu(hdr->frame_control);
-	if (!(fc & IEEE80211_FCTL_PROTECTED))
+	if (!ieee80211_has_protected(hdr->frame_control))
 		return -1;
 
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	if (skb->len < 8 + hdrlen)
 		return -1;
@@ -281,17 +274,15 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
 
 u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u16 fc;
-	int hdrlen;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 	u8 *ivpos;
 	u32 iv;
 
-	fc = le16_to_cpu(hdr->frame_control);
-	if (!(fc & IEEE80211_FCTL_PROTECTED))
+	if (!ieee80211_has_protected(hdr->frame_control))
 		return NULL;
 
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	ivpos = skb->data + hdrlen;
 	iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2];
 
-- 
cgit v1.2.3-70-g09d2


From 065e9605f941b8bc4dbfa1f14ba98eb0da7e3fbe Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 22 Jun 2008 16:45:27 -0700
Subject: mac80211: tx.c use new frame control helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 54 +++++++++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7a14a39ebd7..bf3600a0477 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -52,9 +52,8 @@ static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdat
 static void ieee80211_dump_frame(const char *ifname, const char *title,
 				 const struct sk_buff *skb)
 {
-	const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	u16 fc;
-	int hdrlen;
+	const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
 	DECLARE_MAC_BUF(mac);
 
 	printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len);
@@ -63,13 +62,12 @@ static void ieee80211_dump_frame(const char *ifname, const char *title,
 		return;
 	}
 
-	fc = le16_to_cpu(hdr->frame_control);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (hdrlen > skb->len)
 		hdrlen = skb->len;
 	if (hdrlen >= 4)
 		printk(" FC=0x%04x DUR=0x%04x",
-		       fc, le16_to_cpu(hdr->duration_id));
+		    le16_to_cpu(hdr->frame_control), le16_to_cpu(hdr->duration_id));
 	if (hdrlen >= 10)
 		printk(" A1=%s", print_mac(mac, hdr->addr1));
 	if (hdrlen >= 16)
@@ -281,7 +279,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 
-	if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24)
+	if (ieee80211_hdrlen(hdr->frame_control) >= 24)
 		ieee80211_include_sequence(tx->sdata, hdr);
 
 	return TX_CONTINUE;
@@ -542,8 +540,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result
 ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
-	u16 fc = le16_to_cpu(hdr->frame_control);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 	u16 dur;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_supported_band *sband;
@@ -595,7 +592,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	/* Transmit data frames using short preambles if the driver supports
 	 * short preambles at the selected rate and short preambles are
 	 * available on the network at the current point in time. */
-	if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
+	if (ieee80211_is_data(hdr->frame_control) &&
 	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
 	    tx->sdata->bss_conf.use_short_preamble &&
 	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
@@ -647,7 +644,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result
 ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 	size_t hdrlen, per_fragm, num_fragm, payload_len, left;
 	struct sk_buff **frags, *first, *frag;
 	int i;
@@ -670,7 +667,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 
 	first = tx->skb;
 
-	hdrlen = ieee80211_get_hdrlen(tx->fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	payload_len = first->len - hdrlen;
 	per_fragm = frag_threshold - hdrlen - FCS_LEN;
 	num_fragm = DIV_ROUND_UP(payload_len, per_fragm);
@@ -1395,7 +1392,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	int ret = 1, head_need;
-	u16 ethertype, hdrlen,  meshhdrlen = 0, fc;
+	u16 ethertype, hdrlen,  meshhdrlen = 0;
+	__le16 fc;
 	struct ieee80211_hdr hdr;
 	struct ieee80211s_hdr mesh_hdr;
 	const u8 *encaps_data;
@@ -1418,12 +1416,12 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	/* convert Ethernet header to proper 802.11 header (based on
 	 * operation mode) */
 	ethertype = (skb->data[12] << 8) | skb->data[13];
-	fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
 
 	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_AP:
 	case IEEE80211_IF_TYPE_VLAN:
-		fc |= IEEE80211_FCTL_FROMDS;
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		/* DA BSSID SA */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
 		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
@@ -1431,7 +1429,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		hdrlen = 24;
 		break;
 	case IEEE80211_IF_TYPE_WDS:
-		fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
 		memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN);
 		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
@@ -1441,7 +1439,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		break;
 #ifdef CONFIG_MAC80211_MESH
 	case IEEE80211_IF_TYPE_MESH_POINT:
-		fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
 		if (is_multicast_ether_addr(skb->data))
 			memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -1471,7 +1469,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		break;
 #endif
 	case IEEE80211_IF_TYPE_STA:
-		fc |= IEEE80211_FCTL_TODS;
+		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
 		/* BSSID SA DA */
 		memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN);
 		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
@@ -1506,7 +1504,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	/* receiver and we are QoS enabled, use a QoS type frame */
 	if (sta_flags & WLAN_STA_WME &&
 	    ieee80211_num_regular_queues(&local->hw) >= 4) {
-		fc |= IEEE80211_STYPE_QOS_DATA;
+		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
 		hdrlen += 2;
 	}
 
@@ -1534,7 +1532,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		goto fail;
 	}
 
-	hdr.frame_control = cpu_to_le16(fc);
+	hdr.frame_control = fc;
 	hdr.duration_id = 0;
 	hdr.seq_ctrl = 0;
 
@@ -1603,7 +1601,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		h_pos += meshhdrlen;
 	}
 
-	if (fc & IEEE80211_STYPE_QOS_DATA) {
+	if (ieee80211_is_data_qos(fc)) {
 		__le16 *qos_control;
 
 		qos_control = (__le16*) skb_push(skb, 2);
@@ -1861,8 +1859,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		mgmt = (struct ieee80211_mgmt *)
 			skb_put(skb, 24 + sizeof(mgmt->u.beacon));
 		memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
-		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						   IEEE80211_STYPE_BEACON);
+		mgmt->frame_control =
+		    cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
 		memset(mgmt->da, 0xff, ETH_ALEN);
 		memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 		/* BSSID is left zeroed, wildcard value */
@@ -1930,10 +1928,9 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       struct ieee80211_rts *rts)
 {
 	const struct ieee80211_hdr *hdr = frame;
-	u16 fctl;
 
-	fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS;
-	rts->frame_control = cpu_to_le16(fctl);
+	rts->frame_control =
+	    cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS);
 	rts->duration = ieee80211_rts_duration(hw, vif, frame_len,
 					       frame_txctl);
 	memcpy(rts->ra, hdr->addr1, sizeof(rts->ra));
@@ -1947,10 +1944,9 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			     struct ieee80211_cts *cts)
 {
 	const struct ieee80211_hdr *hdr = frame;
-	u16 fctl;
 
-	fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS;
-	cts->frame_control = cpu_to_le16(fctl);
+	cts->frame_control =
+	    cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS);
 	cts->duration = ieee80211_ctstoself_duration(hw, vif,
 						     frame_len, frame_txctl);
 	memcpy(cts->ra, hdr->addr1, sizeof(cts->ra));
-- 
cgit v1.2.3-70-g09d2


From 182503abf450d39417c2cc6a2c49b4731117d21b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 22 Jun 2008 16:45:29 -0700
Subject: mac80211: rx.c use new frame control helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c32a0bcd53b..8962d1355f0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -61,7 +61,7 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
 				    int present_fcs_len,
 				    int radiotap_len)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
 	if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
 		return 1;
@@ -2123,7 +2123,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	struct tid_ampdu_rx *tid_agg_rx;
 	u16 sc;
 	u16 mpdu_seq_num;
-	u8 ret = 0, *qc;
+	u8 ret = 0;
 	int tid;
 
 	sta = sta_info_get(local, hdr->addr2);
@@ -2135,8 +2135,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	if (!ieee80211_is_data_qos(hdr->frame_control))
 		goto end_reorder;
 
-	qc = ieee80211_get_qos_ctl(hdr);
-	tid = qc[0] & QOS_CONTROL_TID_MASK;
+	tid = *ieee80211_get_qos_ctl(hdr) & QOS_CONTROL_TID_MASK;
 
 	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
 		goto end_reorder;
-- 
cgit v1.2.3-70-g09d2


From 4e3996fe899651e00d3085110cc6e92f6a78ee3e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 22 Jun 2008 16:45:32 -0700
Subject: mac80211: mlme.c use new frame control helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75c4e9743fb..7b4d4d46843 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3271,33 +3271,32 @@ ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
 		      struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_mgmt *mgmt;
-	u16 fc;
+	__le16 fc;
 
 	if (skb->len < 2)
 		return RX_DROP_UNUSABLE;
 
 	mgmt = (struct ieee80211_mgmt *) skb->data;
-	fc = le16_to_cpu(mgmt->frame_control);
+	fc = mgmt->frame_control;
 
-	if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL)
+	if (ieee80211_is_ctl(fc))
 		return RX_CONTINUE;
 
 	if (skb->len < 24)
 		return RX_DROP_MONITOR;
 
-	if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) {
-		if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) {
-			ieee80211_rx_mgmt_probe_resp(dev, mgmt,
-						     skb->len, rx_status);
-			dev_kfree_skb(skb);
-			return RX_QUEUED;
-		} else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
-			ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len,
-						 rx_status);
-			dev_kfree_skb(skb);
-			return RX_QUEUED;
-		}
+	if (ieee80211_is_probe_resp(fc)) {
+		ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status);
+		dev_kfree_skb(skb);
+		return RX_QUEUED;
 	}
+
+	if (ieee80211_is_beacon(fc)) {
+		ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status);
+		dev_kfree_skb(skb);
+		return RX_QUEUED;
+	}
+
 	return RX_CONTINUE;
 }
 
@@ -3875,7 +3874,7 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 {
 	struct sk_buff *skb;
 	struct ieee80211_hdr *nullfunc;
-	u16 fc;
+	__le16 fc;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
 	if (!skb) {
@@ -3887,11 +3886,11 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 
 	nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
 	memset(nullfunc, 0, 24);
-	fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
-	     IEEE80211_FCTL_TODS;
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
+			 IEEE80211_FCTL_TODS);
 	if (powersave)
-		fc |= IEEE80211_FCTL_PM;
-	nullfunc->frame_control = cpu_to_le16(fc);
+		fc |= cpu_to_le16(IEEE80211_FCTL_PM);
+	nullfunc->frame_control = fc;
 	memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
 	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
-- 
cgit v1.2.3-70-g09d2


From fa6adfe9e625a6a843a1abed5f4e7a000c11952c Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 24 Jun 2008 13:37:58 +0300
Subject: mac80211: don't return -EINVAL upon iwconfig wlan0 rts auto

This patch avoids returning -EINVAL upon iwconfig wlan0 rts auto. If
rts->fixed is 0, then we should choose a default value instead of failing.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wext.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index f47d13bdf7f..3cbaf5301d0 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -721,6 +721,9 @@ static int ieee80211_ioctl_siwrts(struct net_device *dev,
 
 	if (rts->disabled)
 		local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
+	else if (!rts->fixed)
+		/* if the rts value is not fixed, then take default */
+		local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 	else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD)
 		return -EINVAL;
 	else
-- 
cgit v1.2.3-70-g09d2


From b9fcc4f2987a757acb3af43aa31dc860bb957970 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 24 Jun 2008 13:37:59 +0300
Subject: mac80211: update the authentication method

This patch updates the authentication method upon giwencode ioctl.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wext.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 3cbaf5301d0..5e76ab120cd 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -952,6 +952,19 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev,
 	erq->length = sdata->keys[idx]->conf.keylen;
 	erq->flags |= IW_ENCODE_ENABLED;
 
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+		switch (ifsta->auth_alg) {
+		case WLAN_AUTH_OPEN:
+		case WLAN_AUTH_LEAP:
+			erq->flags |= IW_ENCODE_OPEN;
+			break;
+		case WLAN_AUTH_SHARED_KEY:
+			erq->flags |= IW_ENCODE_RESTRICTED;
+			break;
+		}
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From f37d08bddc5cb8de18e55f2b0a401b3eb6269af4 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 24 Jun 2008 15:50:17 +0300
Subject: mac80211: add phy information to giwname

This patch add phy information to giwname.

Quoting:
It's not useless, it's supposed to tell you about the protocol
capability of the device, like "IEEE 802.11b" or "IEEE 802.11abg"

Jean

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wext.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 5e76ab120cd..df0531c2814 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -135,7 +135,39 @@ static int ieee80211_ioctl_giwname(struct net_device *dev,
 				   struct iw_request_info *info,
 				   char *name, char *extra)
 {
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_supported_band *sband;
+	u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0;
+
+
+	sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ];
+	if (sband) {
+		is_a = 1;
+		is_ht |= sband->ht_info.ht_supported;
+	}
+
+	sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ];
+	if (sband) {
+		int i;
+		/* Check for mandatory rates */
+		for (i = 0; i < sband->n_bitrates; i++) {
+			if (sband->bitrates[i].bitrate == 10)
+				is_b = 1;
+			if (sband->bitrates[i].bitrate == 60)
+				is_g = 1;
+		}
+		is_ht |= sband->ht_info.ht_supported;
+	}
+
 	strcpy(name, "IEEE 802.11");
+	if (is_a)
+		strcat(name, "a");
+	if (is_b)
+		strcat(name, "b");
+	if (is_g)
+		strcat(name, "g");
+	if (is_ht)
+		strcat(name, "n");
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 59959a6150c8af737898e83f727e824dbed7b0fa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Jun 2008 19:59:56 +0200
Subject: mac80211: make workqueue freezable

This patch makes the mac80211 workqueue freezable making it
interact a bit better with system suspend and not try to ping
the AP while the hardware is down.

This doesn't really help with implementing proper suspend in
any way but makes some bad things trigger less.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5c5396edad3..b661ee5bb82 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1691,7 +1691,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	list_add_tail(&sdata->list, &local->interfaces);
 
 	name = wiphy_dev(local->hw.wiphy)->driver->name;
-	local->hw.workqueue = create_singlethread_workqueue(name);
+	local->hw.workqueue = create_freezeable_workqueue(name);
 	if (!local->hw.workqueue) {
 		result = -ENOMEM;
 		goto fail_workqueue;
-- 
cgit v1.2.3-70-g09d2


From 03f93c3d4c8aa9ed2e2b0a949ece658053527d71 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 25 Jun 2008 14:36:27 +0300
Subject: mac80211: fix tx fragmentation

This patch fixes TX fragmentation caused by
tx handlers reordering and 'tx info to cb' patches

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 68 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bf3600a0477..52ab85c4341 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -85,8 +85,8 @@ static inline void ieee80211_dump_frame(const char *ifname, const char *title,
 }
 #endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */
 
-static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
-			      int next_frag_len)
+static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
+				 int next_frag_len)
 {
 	int rate, mrate, erp, dur, i;
 	struct ieee80211_rate *txrate;
@@ -138,7 +138,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 
 	/* data/mgmt */
 	if (0 /* FIX: data/mgmt during CFP */)
-		return 32768;
+		return cpu_to_le16(32768);
 
 	if (group_addr) /* Group address as the destination - no ACK */
 		return 0;
@@ -208,7 +208,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 				tx->sdata->bss_conf.use_short_preamble);
 	}
 
-	return dur;
+	return cpu_to_le16(dur);
 }
 
 static int inline is_ieee80211_device(struct net_device *dev,
@@ -541,7 +541,6 @@ static ieee80211_tx_result
 ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-	u16 dur;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_supported_band *sband;
 
@@ -599,14 +598,6 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 	}
 
-	/* Setup duration field for the first fragment of the frame. Duration
-	 * for remaining fragments will be updated when they are being sent
-	 * to low-level driver in ieee80211_tx(). */
-	dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1),
-				 (tx->flags & IEEE80211_TX_FRAGMENTED) ?
-				 tx->extra_frag[0]->len : 0);
-	hdr->duration_id = cpu_to_le16(dur);
-
 	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
 	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
 		struct ieee80211_rate *rate;
@@ -708,6 +699,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 		fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
 		copylen = left > per_fragm ? per_fragm : left;
 		memcpy(skb_put(frag, copylen), pos, copylen);
+		memcpy(frag->cb, first->cb, sizeof(frag->cb));
+		skb_copy_queue_mapping(frag, first);
 
 		pos += copylen;
 		left -= copylen;
@@ -751,6 +744,36 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 	return TX_DROP;
 }
 
+static ieee80211_tx_result
+ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+	int next_len, i;
+	int group_addr = is_multicast_ether_addr(hdr->addr1);
+
+	if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) {
+		hdr->duration_id = ieee80211_duration(tx, group_addr, 0);
+		return TX_CONTINUE;
+	}
+
+	hdr->duration_id = ieee80211_duration(tx, group_addr,
+					      tx->extra_frag[0]->len);
+
+	for (i = 0; i < tx->num_extra_frag; i++) {
+		if (i + 1 < tx->num_extra_frag) {
+			next_len = tx->extra_frag[i + 1]->len;
+		} else {
+			next_len = 0;
+			tx->rate_idx = tx->last_frag_rate_idx;
+		}
+
+		hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data;
+		hdr->duration_id = ieee80211_duration(tx, 0, next_len);
+	}
+
+	return TX_CONTINUE;
+}
+
 static ieee80211_tx_result
 ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 {
@@ -785,6 +808,7 @@ static ieee80211_tx_handler ieee80211_tx_handlers[] =
 	ieee80211_tx_h_fragment,
 	/* handlers after fragment must be aware of tx info fragmentation! */
 	ieee80211_tx_h_encrypt,
+	ieee80211_tx_h_calculate_duration,
 	ieee80211_tx_h_stats,
 	NULL
 };
@@ -1151,24 +1175,6 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
 	if (invoke_tx_handlers(&tx))
 		goto out;
 
-	if (tx.extra_frag) {
-		for (i = 0; i < tx.num_extra_frag; i++) {
-			int next_len, dur;
-			struct ieee80211_hdr *hdr =
-				(struct ieee80211_hdr *)
-				tx.extra_frag[i]->data;
-
-			if (i + 1 < tx.num_extra_frag) {
-				next_len = tx.extra_frag[i + 1]->len;
-			} else {
-				next_len = 0;
-				tx.rate_idx = tx.last_frag_rate_idx;
-			}
-			dur = ieee80211_duration(&tx, 0, next_len);
-			hdr->duration_id = cpu_to_le16(dur);
-		}
-	}
-
 retry:
 	ret = __ieee80211_tx(local, skb, &tx);
 	if (ret) {
-- 
cgit v1.2.3-70-g09d2


From 00eb7fe77eb455f807c396f9917f0f623d4c84bb Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 26 Jun 2008 12:13:46 +0300
Subject: mac80211: fix an oops in several failure paths in key allocation

This patch fixes an oops in several failure paths in key allocation. This
Oops occurs when freeing a key that has not been linked yet, so the
key->sdata is not set.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 150d66dbda9..220e83be3ef 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -380,6 +380,15 @@ void ieee80211_key_free(struct ieee80211_key *key)
 	if (!key)
 		return;
 
+	if (!key->sdata) {
+		/* The key has not been linked yet, simply free it
+		 * and don't Oops */
+		if (key->conf.alg == ALG_CCMP)
+			ieee80211_aes_key_free(key->u.ccmp.tfm);
+		kfree(key);
+		return;
+	}
+
 	spin_lock_irqsave(&key->sdata->local->key_lock, flags);
 	__ieee80211_key_free(key);
 	spin_unlock_irqrestore(&key->sdata->local->key_lock, flags);
-- 
cgit v1.2.3-70-g09d2


From 57413ebc4e0f1e471a3b4db4aff9a85c083d090e Mon Sep 17 00:00:00 2001
From: Miquel van Smoorenburg <miquels@cistron.nl>
Date: Fri, 27 Jun 2008 17:23:57 -0700
Subject: tcp: calculate tcp_mem based on low memory instead of all memory

The tcp_mem array which contains limits on the total amount of memory
used by TCP sockets is calculated based on nr_all_pages.  On a 32 bits
x86 system, we should base this on the number of lowmem pages.

Signed-off-by: Miquel van Smoorenburg <miquels@cistron.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fc54a48fde1..850825dc86e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -260,6 +260,8 @@
 #include <linux/socket.h>
 #include <linux/random.h>
 #include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/crypto.h>
@@ -2620,7 +2622,7 @@ __setup("thash_entries=", set_thash_entries);
 void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
-	unsigned long limit;
+	unsigned long nr_pages, limit;
 	int order, i, max_share;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -2689,8 +2691,9 @@ void __init tcp_init(void)
 	 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
 	 * memory, with a floor of 128 pages.
 	 */
-	limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
-	limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+	nr_pages = totalram_pages - totalhigh_pages;
+	limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+	limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
 	limit = max(limit, 128UL);
 	sysctl_tcp_mem[0] = limit / 4 * 3;
 	sysctl_tcp_mem[1] = limit;
-- 
cgit v1.2.3-70-g09d2


From db43a282d3ec92ea45109c5551fff3dcc5afef02 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 27 Jun 2008 17:27:21 -0700
Subject: tcp: fix for splice receive when used with software LRO

If an skb has nr_frags set to zero but its frag_list is not empty (as
it can happen if software LRO is enabled), and a previous
tcp_read_sock has consumed the linear part of the skb, then
__skb_splice_bits:

(a) incorrectly reports an error and

(b) forgets to update the offset to account for the linear part

Any of the two problems will cause the subsequent __skb_splice_bits
call (the one that handles the frag_list skbs) to either skip data,
or, if the unadjusted offset is greater then the size of the next skb
in the frag_list, make tcp_splice_read loop forever.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e556d31211..366621610e7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1292,12 +1292,14 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 {
 	unsigned int nr_pages = spd->nr_pages;
 	unsigned int poff, plen, len, toff, tlen;
-	int headlen, seg;
+	int headlen, seg, error = 0;
 
 	toff = *offset;
 	tlen = *total_len;
-	if (!tlen)
+	if (!tlen) {
+		error = 1;
 		goto err;
+	}
 
 	/*
 	 * if the offset is greater than the linear part, go directly to
@@ -1339,7 +1341,8 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		 * just jump directly to update and return, no point
 		 * in going over fragments when the output is full.
 		 */
-		if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
+		error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb);
+		if (error)
 			goto done;
 
 		tlen -= plen;
@@ -1369,7 +1372,8 @@ map_frag:
 		if (!plen)
 			break;
 
-		if (spd_fill_page(spd, f->page, plen, poff, skb))
+		error = spd_fill_page(spd, f->page, plen, poff, skb);
+		if (error)
 			break;
 
 		tlen -= plen;
@@ -1382,7 +1386,10 @@ done:
 		return 0;
 	}
 err:
-	return 1;
+	/* update the offset to reflect the linear part skip, if any */
+	if (!error)
+		*offset = toff;
+	return error;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From ec0d215f9420564fc8286dcf93d2d068bb53a07e Mon Sep 17 00:00:00 2001
From: Rainer Weikusat <rweikusat@mssgmbh.com>
Date: Fri, 27 Jun 2008 19:34:18 -0700
Subject: af_unix: fix 'poll for write'/connected DGRAM sockets

For n:1 'datagram connections' (eg /dev/log), the unix_dgram_sendmsg
routine implements a form of receiver-imposed flow control by
comparing the length of the receive queue of the 'peer socket' with
the max_ack_backlog value stored in the corresponding sock structure,
either blocking the thread which caused the send-routine to be called
or returning EAGAIN. This routine is used by both SOCK_DGRAM and
SOCK_SEQPACKET sockets. The poll-implementation for these socket types
is datagram_poll from core/datagram.c. A socket is deemed to be
writeable by this routine when the memory presently consumed by
datagrams owned by it is less than the configured socket send buffer
size. This is always wrong for PF_UNIX non-stream sockets connected to
server sockets dealing with (potentially) multiple clients if the
abovementioned receive queue is currently considered to be full.
'poll' will then return, indicating that the socket is writeable, but
a subsequent write result in EAGAIN, effectively causing an (usual)
application to 'poll for writeability by repeated send request with
O_NONBLOCK set' until it has consumed its time quantum.

The change below uses a suitably modified variant of the datagram_poll
routines for both type of PF_UNIX sockets, which tests if the
recv-queue of the peer a socket is connected to is presently
considered to be 'full' as part of the 'is this socket
writeable'-checking code. The socket being polled is additionally
put onto the peer_wait wait queue associated with its peer, because the
unix_dgram_recvmsg routine does a wake up on this queue after a
datagram was received and the 'other wakeup call' is done implicitly
as part of skb destruction, meaning, a process blocked in poll
because of a full peer receive queue could otherwise sleep forever
if no datagram owned by its socket was already sitting on this queue.
Among this change is a small (inline) helper routine named
'unix_recvq_full', which consolidates the actual testing code (in three
different places) into a single location.

Signed-off-by: Rainer Weikusat <rweikusat@mssgmbh.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 52 ++++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 657835f227d..783317dacd3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -487,8 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int);
 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
-static unsigned int unix_datagram_poll(struct file *, struct socket *,
-				       poll_table *);
+static unsigned int unix_dgram_poll(struct file *, struct socket *,
+				    poll_table *);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_shutdown(struct socket *, int);
 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
@@ -534,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.socketpair =	unix_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	unix_getname,
-	.poll =		unix_datagram_poll,
+	.poll =		unix_dgram_poll,
 	.ioctl =	unix_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
@@ -555,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.socketpair =	unix_socketpair,
 	.accept =	unix_accept,
 	.getname =	unix_getname,
-	.poll =		unix_datagram_poll,
+	.poll =		unix_dgram_poll,
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
@@ -1994,29 +1994,13 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 	return mask;
 }
 
-static unsigned int unix_datagram_poll(struct file *file, struct socket *sock,
-				       poll_table *wait)
+static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
+				    poll_table *wait)
 {
-	struct sock *sk = sock->sk, *peer;
-	unsigned int mask;
+	struct sock *sk = sock->sk, *other;
+	unsigned int mask, writable;
 
 	poll_wait(file, sk->sk_sleep, wait);
-
-	peer = unix_peer_get(sk);
-	if (peer) {
-		if (peer != sk) {
-			/*
-			 * Writability of a connected socket additionally
-			 * depends on the state of the receive queue of the
-			 * peer.
-			 */
-			poll_wait(file, &unix_sk(peer)->peer_wait, wait);
-		} else {
-			sock_put(peer);
-			peer = NULL;
-		}
-	}
-
 	mask = 0;
 
 	/* exceptional events? */
@@ -2042,14 +2026,26 @@ static unsigned int unix_datagram_poll(struct file *file, struct socket *sock,
 	}
 
 	/* writable? */
-	if (unix_writable(sk) && !(peer && unix_recvq_full(peer)))
+	writable = unix_writable(sk);
+	if (writable) {
+		other = unix_peer_get(sk);
+		if (other) {
+			if (unix_peer(other) != sk) {
+				poll_wait(file, &unix_sk(other)->peer_wait,
+					  wait);
+				if (unix_recvq_full(other))
+					writable = 0;
+			}
+
+			sock_put(other);
+		}
+	}
+
+	if (writable)
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-	if (peer)
-		sock_put(peer);
-
 	return mask;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5dbaec5dc6a4895db8bf9765a867418481ed7311 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Fri, 27 Jun 2008 19:35:16 -0700
Subject: netdevice: Fix typo of dev_unicast_add() comment

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c421a1f8f0b..56b46579ff4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2973,7 +2973,7 @@ EXPORT_SYMBOL(dev_unicast_delete);
 /**
  *	dev_unicast_add		- add a secondary unicast address
  *	@dev: device
- *	@addr: address to delete
+ *	@addr: address to add
  *	@alen: length of @addr
  *
  *	Add a secondary unicast address to the device or increase
-- 
cgit v1.2.3-70-g09d2


From 01e123d79a23000f85c4cfb12a957908c0b2c3d8 Mon Sep 17 00:00:00 2001
From: WANG Cong <wcong@critical-links.com>
Date: Fri, 27 Jun 2008 19:51:35 -0700
Subject: pkt_sched: ERR_PTR() ususally encodes an negative errno, not
 positive.

Note, in the following patch, 'err' is initialized as:

int err = -ENOBUFS;

Signed-off-by: WANG Cong <wcong@critical-links.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d355e5e47fe..13afa721439 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -468,7 +468,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 
 	return sch;
 errout:
-	return ERR_PTR(-err);
+	return ERR_PTR(err);
 }
 
 struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
-- 
cgit v1.2.3-70-g09d2


From ede16af4cdbd21fa15d4178beb7c6fcbcccd07e9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 27 Jun 2008 19:54:05 -0700
Subject: pkt_sched: Remove CONFIG_NET_SCH_RR

Commit d62733c8e437fdb58325617c4b3331769ba82d70
([SCHED]: Qdisc changes and sch_rr added for multiqueue)
added a NET_SCH_RR option that was unused since the code
went unconditionally into sch_prio.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6447d..9437b27ff84 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,17 +106,6 @@ config NET_SCH_PRIO
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_prio.
 
-config NET_SCH_RR
-	tristate "Multi Band Round Robin Queuing (RR)"
-	select NET_SCH_PRIO
-	---help---
-	  Say Y here if you want to use an n-band round robin packet
-	  scheduler.
-
-	  The module uses sch_prio for its framework and is aliased as
-	  sch_rr, so it will load sch_prio, although it is referred
-	  to using sch_rr.
-
 config NET_SCH_RED
 	tristate "Random Early Detection (RED)"
 	---help---
-- 
cgit v1.2.3-70-g09d2


From 7be87351a1f6430426e88b4fcde353ab3330caff Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 27 Jun 2008 20:00:19 -0700
Subject: tcp: /proc/net/tcp rto,ato values not scaled properly (v2)

I found another case where we are sending information to userspace
in the wrong HZ scale.  This should have been fixed back in 2.5 :-(

This means an ABI change but as it stands there is no way for an application
like ss to get the right value.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 +++---
 net/ipv6/tcp_ipv6.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 12695be2c25..ffe869ac1bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2291,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 	}
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
-			"%08X %5d %8d %lu %d %p %u %u %u %u %d%n",
+			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
 		i, src, srcp, dest, destp, sk->sk_state,
 		tp->write_seq - tp->snd_una,
 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
@@ -2303,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		icsk->icsk_probes_out,
 		sock_i_ino(sk),
 		atomic_read(&sk->sk_refcnt), sk,
-		icsk->icsk_rto,
-		icsk->icsk_ack.ato,
+		jiffies_to_clock_t(icsk->icsk_rto),
+		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		tp->snd_cwnd,
 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cb46749d4c3..40ea9c36d24 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2036,7 +2036,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -2052,8 +2052,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   icsk->icsk_probes_out,
 		   sock_i_ino(sp),
 		   atomic_read(&sp->sk_refcnt), sp,
-		   icsk->icsk_rto,
-		   icsk->icsk_ack.ato,
+		   jiffies_to_clock_t(icsk->icsk_rto),
+		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
 		   );
-- 
cgit v1.2.3-70-g09d2


From 10b595aff138961b520bfed51d664fd99980f6e9 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Fri, 27 Jun 2008 20:02:14 -0700
Subject: netlink: Fix some doc comments in net/netlink/attr.c

Fix some doc comments to match function and attribute names in
net/netlink/attr.c.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/attr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 47bbf45ae5d..2d106cfe1d2 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -132,6 +132,7 @@ errout:
  * @maxtype: maximum attribute type to be expected
  * @head: head of attribute stream
  * @len: length of attribute stream
+ * @policy: validation policy
  *
  * Parses a stream of attributes and stores a pointer to each attribute in
  * the tb array accessable via the attribute type. Attributes with a type
@@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
 /**
  * nla_strlcpy - Copy string attribute payload into a sized buffer
  * @dst: where to copy the string to
- * @src: attribute to copy the string from
+ * @nla: attribute to copy the string from
  * @dstsize: size of destination buffer
  *
  * Copies at most dstsize - 1 bytes into the destination buffer.
@@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 }
 
 /**
- * nla_reserve - reserve room for attribute without header
+ * nla_reserve_nohdr - reserve room for attribute without header
  * @skb: socket buffer to reserve room on
- * @len: length of attribute payload
+ * @attrlen: length of attribute payload
  *
  * Reserves room for attribute payload without a header.
  *
-- 
cgit v1.2.3-70-g09d2


From 9a375803feaadb6c34e0807bd9325885dcca5c00 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 27 Jun 2008 20:06:08 -0700
Subject: inet fragments: fix race between inet_frag_find and
 inet_frag_secret_rebuild

The problem is that while we work w/o the inet_frags.lock even
read-locked the secret rebuild timer may occur (on another CPU, since
BHs are still disabled in the inet_frag_find) and change the rnd seed
for ipv4/6 fragments.

It was caused by my patch fd9e63544cac30a34c951f0ec958038f0529e244
([INET]: Omit double hash calculations in xxx_frag_intern) late
in the 2.6.24 kernel, so this should probably be queued to -stable.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 16 +++++++++++-----
 net/ipv4/ip_fragment.c                  |  2 ++
 net/ipv6/netfilter/nf_conntrack_reasm.c |  3 ++-
 net/ipv6/reassembly.c                   |  2 ++
 4 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4ed429bd595..0546a0bc97e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor);
 
 static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 		struct inet_frag_queue *qp_in, struct inet_frags *f,
-		unsigned int hash, void *arg)
+		void *arg)
 {
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
 	struct hlist_node *n;
 #endif
+	unsigned int hash;
 
 	write_lock(&f->lock);
+	/*
+	 * While we stayed w/o the lock other CPU could update
+	 * the rnd seed, so we need to re-calculate the hash
+	 * chain. Fortunatelly the qp_in can be used to get one.
+	 */
+	hash = f->hashfn(qp_in);
 #ifdef CONFIG_SMP
 	/* With SMP race we have to recheck hash table, because
 	 * such entry could be created on other cpu, while we
@@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 }
 
 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
-		struct inet_frags *f, void *arg, unsigned int hash)
+		struct inet_frags *f, void *arg)
 {
 	struct inet_frag_queue *q;
 
@@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
 	if (q == NULL)
 		return NULL;
 
-	return inet_frag_intern(nf, q, f, hash, arg);
+	return inet_frag_intern(nf, q, f, arg);
 }
 
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
@@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	struct inet_frag_queue *q;
 	struct hlist_node *n;
 
-	read_lock(&f->lock);
 	hlist_for_each_entry(q, n, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
 			atomic_inc(&q->refcnt);
@@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key, hash);
+	return inet_frag_create(nf, f, key);
 }
 EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd6ce6ac635..37221f65915 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 
 	arg.iph = iph;
 	arg.user = user;
+
+	read_lock(&ip4_frags.lock);
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
 	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e65e26e210e..cf20bc4fd60 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -207,9 +207,10 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 	arg.id = id;
 	arg.src = src;
 	arg.dst = dst;
+
+	read_lock_bh(&nf_frags.lock);
 	hash = ip6qhashfn(id, src, dst);
 
-	local_bh_disable();
 	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
 	local_bh_enable();
 	if (q == NULL)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 798cabc7535..a60d7d12971 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -247,6 +247,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	arg.id = id;
 	arg.src = src;
 	arg.dst = dst;
+
+	read_lock(&ip6_frags.lock);
 	hash = ip6qhashfn(id, src, dst);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
-- 
cgit v1.2.3-70-g09d2


From 251a4b320f2352598f84e4452ab538aa8064af52 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Fri, 27 Jun 2008 20:09:00 -0700
Subject: net/inet_lro: remove setting skb->ip_summed when not LRO-able

When an SKB cannot be chained to a session, the current code attempts
to "restore" its ip_summed field from lro_mgr->ip_summed. However,
lro_mgr->ip_summed does not hold the original value; in fact, we'd
better not touch skb->ip_summed since it is not modified by the code
in the path leading to a failure to chain it.  Also use a cleaer
comment to the describe the ip_summed field of struct net_lro_mgr.

Issue raised by Or Gerlitz <ogerlitz@voltaire.com>

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_lro.h | 6 +++++-
 net/ipv4/inet_lro.c      | 3 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h
index 80335b7d77c..c4335faebb6 100644
--- a/include/linux/inet_lro.h
+++ b/include/linux/inet_lro.h
@@ -84,7 +84,11 @@ struct net_lro_mgr {
 				    from received packets and eth protocol
 				    is still ETH_P_8021Q */
 
-	u32 ip_summed;      /* Set in non generated SKBs in page mode */
+	/*
+	 * Set for generated SKBs that are not added to
+	 * the frag list in fragmented mode
+	 */
+	u32 ip_summed;
 	u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY
 			     * or CHECKSUM_NONE */
 
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4a4d49fca1f..cfd034a2b96 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
 out2: /* send aggregated SKBs to stack */
 	lro_flush(lro_mgr, lro_desc);
 
-out:  /* Original SKB has to be posted to stack */
-	skb->ip_summed = lro_mgr->ip_summed;
+out:
 	return 1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 59d88c00cafe5192b058abf4f3ce17c2e27d1c09 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 27 Jun 2008 20:12:32 -0700
Subject: netlabel: Fix a problem when dumping the default IPv6 static labels

There is a missing "!" in a conditional statement which is causing entries to
be skipped when dumping the default IPv6 static label entries.  This can be
demonstrated by running the following:

 # netlabelctl unlbl add default address:::1 \
                                 label:system_u:object_r:unlabeled_t:s0
 # netlabelctl -p unlbl list

... you will notice that the entry for the IPv6 localhost address is not
displayed but does exist (works correctly, causes collisions when attempting
to add duplicate entries, etc.).

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_unlabeled.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 0099da5b259..52b2611a6eb 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1534,7 +1534,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 		}
 	}
 	list_for_each_entry_rcu(addr6, &iface->addr6_list, list) {
-		if (addr6->valid || iter_addr6++ < skip_addr6)
+		if (!addr6->valid || iter_addr6++ < skip_addr6)
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
 					   iface,
-- 
cgit v1.2.3-70-g09d2


From d420895efb259a78dda50f95289571faa6e10e41 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 27 Jun 2008 20:14:54 -0700
Subject: ipv6 route: Convert rt6_device_match() to use RT6_LOOKUP_F_xxx flags.

The commit 77d16f450ae0452d7d4b009f78debb1294fb435c ("[IPV6] ROUTE:
Unify RT6_F_xxx and RT6_SELECT_F_xxx flags") intended to pass various
routing lookup hints around RT6_LOOKUP_F_xxx flags, but conversion was
missing for rt6_device_match().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d1f3e19b06c..7ff687020fa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -240,7 +240,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
 static inline struct rt6_info *rt6_device_match(struct net *net,
 						    struct rt6_info *rt,
 						    int oif,
-						    int strict)
+						    int flags)
 {
 	struct rt6_info *local = NULL;
 	struct rt6_info *sprt;
@@ -253,7 +253,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 			if (dev->flags & IFF_LOOPBACK) {
 				if (sprt->rt6i_idev == NULL ||
 				    sprt->rt6i_idev->dev->ifindex != oif) {
-					if (strict && oif)
+					if (flags & RT6_LOOKUP_F_IFACE && oif)
 						continue;
 					if (local && (!oif ||
 						      local->rt6i_idev->dev->ifindex == oif))
@@ -266,7 +266,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 		if (local)
 			return local;
 
-		if (strict)
+		if (flags & RT6_LOOKUP_F_IFACE)
 			return net->ipv6.ip6_null_entry;
 	}
 	return rt;
-- 
cgit v1.2.3-70-g09d2


From 84ebe1cdae56707b9aa1b40ae5aa7d817ba745f5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 30 Jun 2008 12:41:30 -0700
Subject: netfilter: nf_conntrack_tcp: fixing to check the lower bound of valid
 ACK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lost connections was reported by Thomas Bätzler (running 2.6.25 kernel) on
the netfilter mailing list (see the thread "Weird nat/conntrack Problem
with PASV FTP upload"). He provided tcpdump recordings which helped to
find a long lingering bug in conntrack.

In TCP connection tracking, checking the lower bound of valid ACK could
lead to mark valid packets as INVALID because:

 - We have got a "higher or equal" inequality, but the test checked
   the "higher" condition only; fixed.
 - If the packet contains a SACK option, it could occur that the ACK
   value was before the left edge of our (S)ACK "window": if a previous
   packet from the other party intersected the right edge of the window
   of the receiver, we could move forward the window parameters beyond
   accepting a valid ack. Therefore in this patch we check the rightmost
   SACK edge instead of the ACK value in the lower bound of valid (S)ACK
   test.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ba94004fe32..271cd01d57a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -331,12 +331,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 
    I.   Upper bound for valid data:	seq <= sender.td_maxend
    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
-   III.	Upper bound for valid ack:      sack <= receiver.td_end
-   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
+   III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
+   IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
 
-   where sack is the highest right edge of sack block found in the packet.
+   where sack is the highest right edge of sack block found in the packet
+   or ack in the case of packet without SACK option.
 
-   The upper bound limit for a valid ack is not ignored -
+   The upper bound limit for a valid (s)ack is not ignored -
    we doesn't have to deal with fragments.
 */
 
@@ -606,12 +607,12 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		 before(seq, sender->td_maxend + 1),
 		 after(end, sender->td_end - receiver->td_maxwin - 1),
 		 before(sack, receiver->td_end + 1),
-		 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 
 	if (before(seq, sender->td_maxend + 1) &&
 	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
 	    before(sack, receiver->td_end + 1) &&
-	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
+	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 		/*
 		 * Take into account window scaling (RFC 1323).
 		 */
-- 
cgit v1.2.3-70-g09d2


From 23976efedd5ecb420b87455787c537eb4aed1981 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 28 Jun 2008 02:50:13 +0300
Subject: mac80211: don't accept WEP keys other than WEP40 and WEP104

This patch makes mac80211 refuse a WEP key whose length is not WEP40 nor
WEP104.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 9 +++++++++
 net/mac80211/wext.c    | 7 +++++++
 2 files changed, 16 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dae3f9ec115..bcd1623245c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -595,6 +595,15 @@ enum ieee80211_key_alg {
 	ALG_CCMP,
 };
 
+/**
+ * enum ieee80211_key_len - key length
+ * @WEP40: WEP 5 byte long key
+ * @WEP104: WEP 13 byte long key
+ */
+enum ieee80211_key_len {
+	LEN_WEP40 = 5,
+	LEN_WEP104 = 13,
+};
 
 /**
  * enum ieee80211_key_flags - key flags
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 6106cb79060..e8404212ad5 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr,
 			}
 		}
 
+		if (alg == ALG_WEP &&
+			key_len != LEN_WEP40 && key_len != LEN_WEP104) {
+			ieee80211_key_free(key);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
 		ieee80211_key_link(key, sdata, sta);
 
 		if (set_tx_key || (!sta && !sdata->default_key && key))
-- 
cgit v1.2.3-70-g09d2


From 7b1e78d5052a92e95d851fbb0236a712264fe7e8 Mon Sep 17 00:00:00 2001
From: Yi Zhu <yi.zhu@intel.com>
Date: Wed, 18 Jun 2008 17:53:43 +0300
Subject: mac80211: add MAC80211_VERBOSE_SPECT_MGMT_DEBUG Kconfig option

The patch introduces MAC80211_VERBOSE_SPECT_MGMT_DEBUG Kconfig option to
suppress Spectrum Management 802.11h related debug logs.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 0d3661d9b6a..594bc7319ca 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -177,3 +177,10 @@ config MAC80211_VERBOSE_MPL_DEBUG
 	---help---
 	  Say Y here to print out verbose mesh peer link
 	  debug messages.
+
+config MAC80211_VERBOSE_SPECT_MGMT_DEBUG
+	bool "Verbose Spectrum Management (IEEE 802.11h)debugging"
+	depends on MAC80211_DEBUG
+	---help---
+	  Say Y here to print out verbose Spectrum Management (IEEE 802.11h)
+	  debug messages.
-- 
cgit v1.2.3-70-g09d2


From 06ff47bc9595848b818ac79e7d8069337c6e58b1 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 18 Jun 2008 17:53:44 +0300
Subject: mac80211: add spectrum capabilities

This patch add spectrum capability and required information
elements to association request providing AP has requested it and
it is supported by the driver

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  5 +++++
 net/mac80211/mlme.c    | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 02c79e6b309..a7044958c75 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -710,6 +710,10 @@ enum ieee80211_tkip_key_type {
  * @IEEE80211_HW_NOISE_DBM:
  *	Hardware can provide noise (radio interference) values in units dBm,
  *      decibel difference from one milliwatt.
+ *
+ * @IEEE80211_HW_SPECTRUM_MGMT:
+ * 	Hardware supports spectrum management defined in 802.11h
+ * 	Measurement, Channel Switch, Quieting, TPC
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE		= 1<<0,
@@ -721,6 +725,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SIGNAL_DB				= 1<<6,
 	IEEE80211_HW_SIGNAL_DBM				= 1<<7,
 	IEEE80211_HW_NOISE_DBM				= 1<<8,
+	IEEE80211_HW_SPECTRUM_MGMT			= 1<<9,
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7b4d4d46843..c9f7c13d2e7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -747,6 +747,10 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		 * b-only mode) */
 		rates_len = ieee80211_compatible_rates(bss, sband, &rates);
 
+		if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+		    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
+			capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+
 		ieee80211_rx_bss_put(dev, bss);
 	} else {
 		rates = ~0;
@@ -814,6 +818,26 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		}
 	}
 
+	if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
+		/* 1. power capabilities */
+		pos = skb_put(skb, 4);
+		*pos++ = WLAN_EID_PWR_CAPABILITY;
+		*pos++ = 2;
+		*pos++ = 0; /* min tx power */
+		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
+
+		/* 2. supported channels */
+		/* TODO: get this in reg domain format */
+		pos = skb_put(skb, 2 * sband->n_channels + 2);
+		*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
+		*pos++ = 2 * sband->n_channels;
+		for (i = 0; i < sband->n_channels; i++) {
+			*pos++ = ieee80211_frequency_to_channel(
+					sband->channels[i].center_freq);
+			*pos++ = 1; /* one channel in the subband*/
+		}
+	}
+
 	if (ifsta->extra_ie) {
 		pos = skb_put(skb, ifsta->extra_ie_len);
 		memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len);
-- 
cgit v1.2.3-70-g09d2


From bf998f686430107fb8790ef6713f8e352a3deede Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 24 Jun 2008 13:38:00 +0300
Subject: mac80211: add last beacon time in scan list

This patch adds the interval between the scan results and the last time a
beacon was received in the result of the scan.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c9f7c13d2e7..cffef44cec0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4340,6 +4340,13 @@ ieee80211_sta_scan_result(struct net_device *dev,
 			current_ev = iwe_stream_add_point(info, current_ev,
 							  end_buf,
 							  &iwe, buf);
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = IWEVCUSTOM;
+			sprintf(buf, " Last beacon: %dms ago",
+				jiffies_to_msecs(jiffies - bss->last_update));
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf, &iwe, buf);
 			kfree(buf);
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From e5f5e7339cd95d07937e6f8081b46fba86c742a7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 26 Jun 2008 13:38:13 +0300
Subject: build algorithms into the mac80211 module

The old infrastructure was:
- the default algorithm is built into mac80211
- other algorithms get into their own modules

The implementation of this complicated scheme was horrible
(just look at net/mac80211/Makefile), and anyone adding a new
algorithm would most likely not get it right at his first attempt.

This patch therefore builds all enabled algorithms into the mac80211
module.

The user interface for the rate control algorithms changes as follows:
- first the user can choose which algorithms to enable (currently only
  MAC80211_RC_PID is available)
- if more than one algorithm is enabled (currently not possible since
  only one algorithm is present) the user then chooses the default one

Note:
- MAC80211_RC_PID is always enables for CONFIG_EMBEDDED=n

Technical changes:
- all selected algorithms get into the mac80211 module
- net/mac80211/Makefile can now become much less complicated
- support for rc80211_pid_algo.c being modular is no longer required
- this includes unexporting mesh_plink_broken

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig            | 31 +++++++++----------------------
 net/mac80211/Makefile           | 18 ++++--------------
 net/mac80211/mesh_pathtbl.c     |  1 -
 net/mac80211/rate.h             |  4 +---
 net/mac80211/rc80211_pid_algo.c | 10 ----------
 5 files changed, 14 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 594bc7319ca..661d3c29148 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -27,6 +27,14 @@ comment "QoS/HT support needs CONFIG_NETDEVICES_MULTIQUEUE"
 menu "Rate control algorithm selection"
 	depends on MAC80211 != n
 
+config MAC80211_RC_PID
+	bool "PID controller based rate control algorithm" if EMBEDDED
+	default y
+	---help---
+	  This option enables a TX rate control algorithm for
+	  mac80211 that uses a PID controller to select the TX
+	  rate.
+
 choice
 	prompt "Default rate control algorithm"
 	default MAC80211_RC_DEFAULT_PID
@@ -38,40 +46,19 @@ choice
 
 config MAC80211_RC_DEFAULT_PID
 	bool "PID controller based rate control algorithm"
-	select MAC80211_RC_PID
+	depends on MAC80211_RC_PID
 	---help---
 	  Select the PID controller based rate control as the
 	  default rate control algorithm. You should choose
 	  this unless you know what you are doing.
 
-config MAC80211_RC_DEFAULT_NONE
-	bool "No default algorithm"
-	depends on EMBEDDED
-	help
-	  Selecting this option will select no default algorithm
-	  and allow you to not build any. Do not choose this
-	  option unless you know your driver comes with another
-	  suitable algorithm.
 endchoice
 
-comment "Selecting 'y' for an algorithm will"
-comment "build the algorithm into mac80211."
-
 config MAC80211_RC_DEFAULT
 	string
 	default "pid" if MAC80211_RC_DEFAULT_PID
 	default ""
 
-config MAC80211_RC_PID
-	tristate "PID controller based rate control algorithm"
-	---help---
-	  This option enables a TX rate control algorithm for
-	  mac80211 that uses a PID controller to select the TX
-	  rate.
-
-	  Say Y or M unless you're sure you want to use a
-	  different rate control algorithm.
-
 endmenu
 
 config MAC80211_MESH
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 1d2a4e010e5..fa47438e338 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,13 +1,5 @@
 obj-$(CONFIG_MAC80211) += mac80211.o
 
-# objects for PID algorithm
-rc80211_pid-y := rc80211_pid_algo.o
-rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
-
-# build helper for PID algorithm
-rc-pid-y := $(rc80211_pid-y)
-rc-pid-m := rc80211_pid.o
-
 # mac80211 objects
 mac80211-y := \
 	main.o \
@@ -42,10 +34,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
 	mesh_plink.o \
 	mesh_hwmp.o
 
+# objects for PID algorithm
+rc80211_pid-y := rc80211_pid_algo.o
+rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
 
-# Build rate control algorithm(s)
-CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE
-mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID))
-
-# Modular rate algorithms are assigned to mac80211-m - make separate modules
-obj-m += $(mac80211-m)
+mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 947b13b4072..5f88a2e6ee5 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -262,7 +262,6 @@ void mesh_plink_broken(struct sta_info *sta)
 	}
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(mesh_plink_broken);
 
 /**
  * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 0ed9c8a2f56..ede7ab56f65 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -162,9 +162,7 @@ void rate_control_deinitialize(struct ieee80211_local *local);
 
 
 /* Rate control algorithms */
-#if defined(RC80211_PID_COMPILE) || \
-	(defined(CONFIG_MAC80211_RC_PID) && \
-	 !defined(CONFIG_MAC80211_RC_PID_MODULE))
+#ifdef CONFIG_MAC80211_RC_PID
 extern int rc80211_pid_init(void);
 extern void rc80211_pid_exit(void);
 #else
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index e8945413e4a..62388f8e902 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -540,11 +540,6 @@ static struct rate_control_ops mac80211_rcpid = {
 #endif
 };
 
-MODULE_DESCRIPTION("PID controller based rate control algorithm");
-MODULE_AUTHOR("Stefano Brivio");
-MODULE_AUTHOR("Mattias Nissler");
-MODULE_LICENSE("GPL");
-
 int __init rc80211_pid_init(void)
 {
 	return ieee80211_rate_control_register(&mac80211_rcpid);
@@ -554,8 +549,3 @@ void rc80211_pid_exit(void)
 {
 	ieee80211_rate_control_unregister(&mac80211_rcpid);
 }
-
-#ifdef CONFIG_MAC80211_RC_PID_MODULE
-module_init(rc80211_pid_init);
-module_exit(rc80211_pid_exit);
-#endif
-- 
cgit v1.2.3-70-g09d2


From ae6a44e3afae0e813e9ab707a2317b08e21f4ab2 Mon Sep 17 00:00:00 2001
From: Ester Kummer <ester.kummer@intel.com>
Date: Fri, 27 Jun 2008 18:54:48 +0300
Subject: mac80211: removing duplicated parsing of information elements

This patch removes the duplicated parsing of information elements
in ieee80211_rx_bss_info and in ieee_rx_mgmt_beacon

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Ester Kummer <ester.kummer@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 188 +++++++++++++++++++++++++++-------------------------
 1 file changed, 97 insertions(+), 91 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cffef44cec0..1e4054b273d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2646,11 +2646,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len,
 				  struct ieee80211_rx_status *rx_status,
+				  struct ieee802_11_elems *elems,
 				  int beacon)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee802_11_elems elems;
-	size_t baselen;
 	int freq, clen;
 	struct ieee80211_sta_bss *bss;
 	struct sta_info *sta;
@@ -2669,29 +2668,24 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	       print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da));
 #endif
 
-	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
-	if (baselen > len)
-		return;
-
 	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
-	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && elems.mesh_id &&
-	    elems.mesh_config && mesh_matches_local(&elems, dev)) {
-		u64 rates = ieee80211_sta_get_rates(local, &elems,
+	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
+	    elems->mesh_config && mesh_matches_local(elems, dev)) {
+		u64 rates = ieee80211_sta_get_rates(local, elems,
 						rx_status->band);
 
 		mesh_neighbour_update(mgmt->sa, rates, dev,
-				      mesh_peer_accepts_plinks(&elems, dev));
+				      mesh_peer_accepts_plinks(elems, dev));
 	}
 
 	rcu_read_lock();
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
 	    (sta = sta_info_get(local, mgmt->sa))) {
 		u64 prev_rates;
-		u64 supp_rates = ieee80211_sta_get_rates(local, &elems,
+		u64 supp_rates = ieee80211_sta_get_rates(local, elems,
 							rx_status->band);
 
 		prev_rates = sta->supp_rates[rx_status->band];
@@ -2716,8 +2710,8 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 	rcu_read_unlock();
 
-	if (elems.ds_params && elems.ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+	if (elems->ds_params && elems->ds_params_len == 1)
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
 	else
 		freq = rx_status->freq;
 
@@ -2727,23 +2721,23 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		return;
 
 #ifdef CONFIG_MAC80211_MESH
-	if (elems.mesh_config)
-		bss = ieee80211_rx_mesh_bss_get(dev, elems.mesh_id,
-				elems.mesh_id_len, elems.mesh_config, freq);
+	if (elems->mesh_config)
+		bss = ieee80211_rx_mesh_bss_get(dev, elems->mesh_id,
+				elems->mesh_id_len, elems->mesh_config, freq);
 	else
 #endif
 		bss = ieee80211_rx_bss_get(dev, mgmt->bssid, freq,
-					   elems.ssid, elems.ssid_len);
+					   elems->ssid, elems->ssid_len);
 	if (!bss) {
 #ifdef CONFIG_MAC80211_MESH
-		if (elems.mesh_config)
-			bss = ieee80211_rx_mesh_bss_add(dev, elems.mesh_id,
-				elems.mesh_id_len, elems.mesh_config,
-				elems.mesh_config_len, freq);
+		if (elems->mesh_config)
+			bss = ieee80211_rx_mesh_bss_add(dev, elems->mesh_id,
+				elems->mesh_id_len, elems->mesh_config,
+				elems->mesh_config_len, freq);
 		else
 #endif
 			bss = ieee80211_rx_bss_add(dev, mgmt->bssid, freq,
-						   elems.ssid, elems.ssid_len);
+						  elems->ssid, elems->ssid_len);
 		if (!bss)
 			return;
 	} else {
@@ -2756,43 +2750,43 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	}
 
 	/* save the ERP value so that it is available at association time */
-	if (elems.erp_info && elems.erp_info_len >= 1) {
-		bss->erp_value = elems.erp_info[0];
+	if (elems->erp_info && elems->erp_info_len >= 1) {
+		bss->erp_value = elems->erp_info[0];
 		bss->has_erp_value = 1;
 	}
 
-	if (elems.ht_cap_elem &&
-	     (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len ||
-	     memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) {
+	if (elems->ht_cap_elem &&
+	     (!bss->ht_ie || bss->ht_ie_len != elems->ht_cap_elem_len ||
+	     memcmp(bss->ht_ie, elems->ht_cap_elem, elems->ht_cap_elem_len))) {
 		kfree(bss->ht_ie);
-		bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC);
+		bss->ht_ie = kmalloc(elems->ht_cap_elem_len + 2, GFP_ATOMIC);
 		if (bss->ht_ie) {
-			memcpy(bss->ht_ie, elems.ht_cap_elem - 2,
-				elems.ht_cap_elem_len + 2);
-			bss->ht_ie_len = elems.ht_cap_elem_len + 2;
+			memcpy(bss->ht_ie, elems->ht_cap_elem - 2,
+				elems->ht_cap_elem_len + 2);
+			bss->ht_ie_len = elems->ht_cap_elem_len + 2;
 		} else
 			bss->ht_ie_len = 0;
-	} else if (!elems.ht_cap_elem && bss->ht_ie) {
+	} else if (!elems->ht_cap_elem && bss->ht_ie) {
 		kfree(bss->ht_ie);
 		bss->ht_ie = NULL;
 		bss->ht_ie_len = 0;
 	}
 
-	if (elems.ht_info_elem &&
+	if (elems->ht_info_elem &&
 	     (!bss->ht_add_ie ||
-	     bss->ht_add_ie_len != elems.ht_info_elem_len ||
-	     memcmp(bss->ht_add_ie, elems.ht_info_elem,
-			elems.ht_info_elem_len))) {
+	     bss->ht_add_ie_len != elems->ht_info_elem_len ||
+	     memcmp(bss->ht_add_ie, elems->ht_info_elem,
+			elems->ht_info_elem_len))) {
 		kfree(bss->ht_add_ie);
 		bss->ht_add_ie =
-			kmalloc(elems.ht_info_elem_len + 2, GFP_ATOMIC);
+			kmalloc(elems->ht_info_elem_len + 2, GFP_ATOMIC);
 		if (bss->ht_add_ie) {
-			memcpy(bss->ht_add_ie, elems.ht_info_elem - 2,
-				elems.ht_info_elem_len + 2);
-			bss->ht_add_ie_len = elems.ht_info_elem_len + 2;
+			memcpy(bss->ht_add_ie, elems->ht_info_elem - 2,
+				elems->ht_info_elem_len + 2);
+			bss->ht_add_ie_len = elems->ht_info_elem_len + 2;
 		} else
 			bss->ht_add_ie_len = 0;
-	} else if (!elems.ht_info_elem && bss->ht_add_ie) {
+	} else if (!elems->ht_info_elem && bss->ht_add_ie) {
 		kfree(bss->ht_add_ie);
 		bss->ht_add_ie = NULL;
 		bss->ht_add_ie_len = 0;
@@ -2802,20 +2796,20 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
 	bss->supp_rates_len = 0;
-	if (elems.supp_rates) {
+	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-		if (clen > elems.supp_rates_len)
-			clen = elems.supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len], elems.supp_rates,
+		if (clen > elems->supp_rates_len)
+			clen = elems->supp_rates_len;
+		memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates,
 		       clen);
 		bss->supp_rates_len += clen;
 	}
-	if (elems.ext_supp_rates) {
+	if (elems->ext_supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-		if (clen > elems.ext_supp_rates_len)
-			clen = elems.ext_supp_rates_len;
+		if (clen > elems->ext_supp_rates_len)
+			clen = elems->ext_supp_rates_len;
 		memcpy(&bss->supp_rates[bss->supp_rates_len],
-		       elems.ext_supp_rates, clen);
+		       elems->ext_supp_rates, clen);
 		bss->supp_rates_len += clen;
 	}
 
@@ -2839,33 +2833,33 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		return;
 	}
 
-	if (elems.wpa &&
-	    (!bss->wpa_ie || bss->wpa_ie_len != elems.wpa_len ||
-	     memcmp(bss->wpa_ie, elems.wpa, elems.wpa_len))) {
+	if (elems->wpa &&
+	    (!bss->wpa_ie || bss->wpa_ie_len != elems->wpa_len ||
+	     memcmp(bss->wpa_ie, elems->wpa, elems->wpa_len))) {
 		kfree(bss->wpa_ie);
-		bss->wpa_ie = kmalloc(elems.wpa_len + 2, GFP_ATOMIC);
+		bss->wpa_ie = kmalloc(elems->wpa_len + 2, GFP_ATOMIC);
 		if (bss->wpa_ie) {
-			memcpy(bss->wpa_ie, elems.wpa - 2, elems.wpa_len + 2);
-			bss->wpa_ie_len = elems.wpa_len + 2;
+			memcpy(bss->wpa_ie, elems->wpa - 2, elems->wpa_len + 2);
+			bss->wpa_ie_len = elems->wpa_len + 2;
 		} else
 			bss->wpa_ie_len = 0;
-	} else if (!elems.wpa && bss->wpa_ie) {
+	} else if (!elems->wpa && bss->wpa_ie) {
 		kfree(bss->wpa_ie);
 		bss->wpa_ie = NULL;
 		bss->wpa_ie_len = 0;
 	}
 
-	if (elems.rsn &&
-	    (!bss->rsn_ie || bss->rsn_ie_len != elems.rsn_len ||
-	     memcmp(bss->rsn_ie, elems.rsn, elems.rsn_len))) {
+	if (elems->rsn &&
+	    (!bss->rsn_ie || bss->rsn_ie_len != elems->rsn_len ||
+	     memcmp(bss->rsn_ie, elems->rsn, elems->rsn_len))) {
 		kfree(bss->rsn_ie);
-		bss->rsn_ie = kmalloc(elems.rsn_len + 2, GFP_ATOMIC);
+		bss->rsn_ie = kmalloc(elems->rsn_len + 2, GFP_ATOMIC);
 		if (bss->rsn_ie) {
-			memcpy(bss->rsn_ie, elems.rsn - 2, elems.rsn_len + 2);
-			bss->rsn_ie_len = elems.rsn_len + 2;
+			memcpy(bss->rsn_ie, elems->rsn - 2, elems->rsn_len + 2);
+			bss->rsn_ie_len = elems->rsn_len + 2;
 		} else
 			bss->rsn_ie_len = 0;
-	} else if (!elems.rsn && bss->rsn_ie) {
+	} else if (!elems->rsn && bss->rsn_ie) {
 		kfree(bss->rsn_ie);
 		bss->rsn_ie = NULL;
 		bss->rsn_ie_len = 0;
@@ -2885,20 +2879,21 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	 * inclusion of the WMM Parameters in beacons, however, is optional.
 	 */
 
-	if (elems.wmm_param &&
-	    (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_param_len ||
-	     memcmp(bss->wmm_ie, elems.wmm_param, elems.wmm_param_len))) {
+	if (elems->wmm_param &&
+	    (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_param_len ||
+	     memcmp(bss->wmm_ie, elems->wmm_param, elems->wmm_param_len))) {
 		kfree(bss->wmm_ie);
-		bss->wmm_ie = kmalloc(elems.wmm_param_len + 2, GFP_ATOMIC);
+		bss->wmm_ie = kmalloc(elems->wmm_param_len + 2, GFP_ATOMIC);
 		if (bss->wmm_ie) {
-			memcpy(bss->wmm_ie, elems.wmm_param - 2,
-			       elems.wmm_param_len + 2);
-			bss->wmm_ie_len = elems.wmm_param_len + 2;
+			memcpy(bss->wmm_ie, elems->wmm_param - 2,
+			       elems->wmm_param_len + 2);
+			bss->wmm_ie_len = elems->wmm_param_len + 2;
 		} else
 			bss->wmm_ie_len = 0;
-	} else if (elems.wmm_info &&
-		    (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_info_len ||
-		     memcmp(bss->wmm_ie, elems.wmm_info, elems.wmm_info_len))) {
+	} else if (elems->wmm_info &&
+		    (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_info_len ||
+		     memcmp(bss->wmm_ie, elems->wmm_info,
+						elems->wmm_info_len))) {
 		 /* As for certain AP's Fifth bit is not set in WMM IE in
 		  * beacon frames.So while parsing the beacon frame the
 		  * wmm_info structure is used instead of wmm_param.
@@ -2908,14 +2903,14 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		  * n-band association.
 		  */
 		kfree(bss->wmm_ie);
-		bss->wmm_ie = kmalloc(elems.wmm_info_len + 2, GFP_ATOMIC);
+		bss->wmm_ie = kmalloc(elems->wmm_info_len + 2, GFP_ATOMIC);
 		if (bss->wmm_ie) {
-			memcpy(bss->wmm_ie, elems.wmm_info - 2,
-			       elems.wmm_info_len + 2);
-			bss->wmm_ie_len = elems.wmm_info_len + 2;
+			memcpy(bss->wmm_ie, elems->wmm_info - 2,
+			       elems->wmm_info_len + 2);
+			bss->wmm_ie_len = elems->wmm_info_len + 2;
 		} else
 			bss->wmm_ie_len = 0;
-	} else if (!elems.wmm_param && !elems.wmm_info && bss->wmm_ie) {
+	} else if (!elems->wmm_param && !elems->wmm_info && bss->wmm_ie) {
 		kfree(bss->wmm_ie);
 		bss->wmm_ie = NULL;
 		bss->wmm_ie_len = 0;
@@ -2926,8 +2921,9 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	    !local->sta_sw_scanning && !local->sta_hw_scanning &&
 	    bss->capability & WLAN_CAPABILITY_IBSS &&
 	    bss->freq == local->oper_channel->center_freq &&
-	    elems.ssid_len == sdata->u.sta.ssid_len &&
-	    memcmp(elems.ssid, sdata->u.sta.ssid, sdata->u.sta.ssid_len) == 0) {
+	    elems->ssid_len == sdata->u.sta.ssid_len &&
+	    memcmp(elems->ssid, sdata->u.sta.ssid,
+				sdata->u.sta.ssid_len) == 0) {
 		if (rx_status->flag & RX_FLAG_TSFT) {
 			/* in order for correct IBSS merging we need mactime
 			 *
@@ -2986,7 +2982,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev,
 					 size_t len,
 					 struct ieee80211_rx_status *rx_status)
 {
-	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 0);
+	size_t baselen;
+	struct ieee802_11_elems elems;
+
+	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
+	if (baselen > len)
+		return;
+
+	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
+				&elems);
+
+	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 0);
 }
 
 
@@ -3003,7 +3009,14 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	struct ieee80211_conf *conf = &local->hw.conf;
 	u32 changed = 0;
 
-	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
+	/* Process beacon from the current BSS */
+	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
+	if (baselen > len)
+		return;
+
+	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
+
+	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 1);
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
@@ -3014,13 +3027,6 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
-	/* Process beacon from the current BSS */
-	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
-	if (baselen > len)
-		return;
-
-	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
-
 	ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
 				 elems.wmm_param_len);
 
-- 
cgit v1.2.3-70-g09d2


From 5479d0e73958710b9a255337eaa2ba47eb492def Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sat, 28 Jun 2008 03:15:03 +0300
Subject: mac80211: fix warning: unused variable invoke_tx_handlers

This patch fixes warning: unused variable in invoke_tx_handlers
when compiling without MAC80211_DEBUG option

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 52ab85c4341..8f1c574bc8f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1110,7 +1110,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
  */
 static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_local *local = tx->local;
 	struct sk_buff *skb = tx->skb;
 	ieee80211_tx_handler *handler;
 	ieee80211_tx_result res = TX_DROP;
@@ -1123,7 +1122,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	}
 
 	if (unlikely(res == TX_DROP)) {
-		I802_DEBUG_INC(local->tx_handlers_drop);
+		I802_DEBUG_INC(tx->local->tx_handlers_drop);
 		dev_kfree_skb(skb);
 		for (i = 0; i < tx->num_extra_frag; i++)
 			if (tx->extra_frag[i])
@@ -1131,7 +1130,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 		kfree(tx->extra_frag);
 		return -1;
 	} else if (unlikely(res == TX_QUEUED)) {
-		I802_DEBUG_INC(local->tx_handlers_queued);
+		I802_DEBUG_INC(tx->local->tx_handlers_queued);
 		return -1;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 4faeb86070c38c0df9f5a23c3f6acf5538492a33 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Mon, 30 Jun 2008 17:23:16 +0800
Subject: mac80211: add beacon timestamp to beacon template in IBSS

This patch adds a beacon timestamp to the beacon template used in IBSS
mode. This way the underlying driver can update its TSF accordingly.
According the spec station should adopt the highest TSF from an incoming
beacons in the cell.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e4054b273d..0a310d09ab0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2503,6 +2503,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 		mgmt->u.beacon.beacon_int =
 			cpu_to_le16(local->hw.conf.beacon_int);
+		mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp);
 		mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability);
 
 		pos = skb_put(skb, 2 + ifsta->ssid_len);
-- 
cgit v1.2.3-70-g09d2


From 6dbf4bcac98bbc76ef425b3a2b4169f31199f6c7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 1 Jul 2008 19:29:07 -0700
Subject: icmp: fix units for ratelimit

Convert the sysctl values for icmp ratelimit to use milliseconds instead
of jiffies which is based on kernel configured HZ.
Internal kernel jiffies are not a proper unit for any userspace API.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 10 ++++++----
 net/ipv4/sysctl_net_ipv4.c             |  3 ++-
 net/ipv6/icmp.c                        |  3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 17a6e46fbd4..71c7bea9716 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -548,8 +548,9 @@ icmp_echo_ignore_broadcasts - BOOLEAN
 icmp_ratelimit - INTEGER
 	Limit the maximal rates for sending ICMP packets whose type matches
 	icmp_ratemask (see below) to specific targets.
-	0 to disable any limiting, otherwise the maximal rate in jiffies(1)
-	Default: 100
+	0 to disable any limiting,
+	otherwise the minimal space between responses in milliseconds.
+	Default: 1000
 
 icmp_ratemask - INTEGER
 	Mask made of ICMP types for which rates are being limited.
@@ -1027,8 +1028,9 @@ max_addresses - INTEGER
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
-	0 to disable any limiting, otherwise the maximal rate in jiffies(1)
-	Default: 100
+	0 to disable any limiting,
+	otherwise the minimal space between responses in milliseconds.
+	Default: 1000
 
 
 IPv6 Update by:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 90160700320..14ef202a225 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -793,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_ms_jiffies,
+		.strategy	= &sysctl_ms_jiffies
 	},
 	{
 		.ctl_name	= NET_IPV4_ICMP_RATEMASK,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 399d41f6543..abedf95fdf2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -954,7 +954,8 @@ ctl_table ipv6_icmp_table_template[] = {
 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_ms_jiffies,
+		.strategy	= &sysctl_ms_jiffies
 	},
 	{ .ctl_name = 0 },
 };
-- 
cgit v1.2.3-70-g09d2


From ff31ab56c0e900235f653e375fc3b01ba2d8d6a3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 1 Jul 2008 19:52:38 -0700
Subject: net-sched: change tcf_destroy_chain() to clear start of filter list

Pass double tcf_proto pointers to tcf_destroy_chain() to make it
clear the start of the filter list for more consistency.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 net/mac80211/wme.c        | 3 +--
 net/sched/sch_api.c       | 6 +++---
 net/sched/sch_atm.c       | 5 ++---
 net/sched/sch_cbq.c       | 8 +++-----
 net/sched/sch_dsmark.c    | 2 +-
 net/sched/sch_hfsc.c      | 2 +-
 net/sched/sch_htb.c       | 4 ++--
 net/sched/sch_ingress.c   | 2 +-
 net/sched/sch_prio.c      | 2 +-
 net/sched/sch_sfq.c       | 2 +-
 11 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ab502ec1c61..a87fc0312ed 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -178,7 +178,7 @@ extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct Qdisc_ops *ops, u32 parentid);
 extern void tcf_destroy(struct tcf_proto *tp);
-extern void tcf_destroy_chain(struct tcf_proto *fl);
+extern void tcf_destroy_chain(struct tcf_proto **fl);
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 635b996c8c3..5d09e8698b5 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -323,8 +323,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
 	struct ieee80211_hw *hw = &local->hw;
 	int queue;
 
-	tcf_destroy_chain(q->filter_list);
-	q->filter_list = NULL;
+	tcf_destroy_chain(&q->filter_list);
 
 	for (queue=0; queue < hw->queues; queue++) {
 		skb_queue_purge(&q->requeued[queue]);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c40773cdbe4..10f01ad0438 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1252,12 +1252,12 @@ void tcf_destroy(struct tcf_proto *tp)
 	kfree(tp);
 }
 
-void tcf_destroy_chain(struct tcf_proto *fl)
+void tcf_destroy_chain(struct tcf_proto **fl)
 {
 	struct tcf_proto *tp;
 
-	while ((tp = fl) != NULL) {
-		fl = tp->next;
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
 		tcf_destroy(tp);
 	}
 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 33527341638..8e5f70ba3a1 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -160,7 +160,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	*prev = flow->next;
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
-	tcf_destroy_chain(flow->filter_list);
+	tcf_destroy_chain(&flow->filter_list);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %d\n",
 			file_count(flow->sock->file));
@@ -588,8 +588,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
 	/* races ? */
 	while ((flow = p->flows)) {
-		tcf_destroy_chain(flow->filter_list);
-		flow->filter_list = NULL;
+		tcf_destroy_chain(&flow->filter_list);
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 09969c1fbc0..2a3c97f7dc6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1704,7 +1704,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 
 	BUG_TRAP(!cl->filters);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1728,10 +1728,8 @@ cbq_destroy(struct Qdisc* sch)
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
 	for (h = 0; h < 16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next) {
-			tcf_destroy_chain(cl->filter_list);
-			cl->filter_list = NULL;
-		}
+		for (cl = q->classes[h]; cl; cl = cl->next)
+			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 64465bacbe7..c4c1317cd47 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -416,7 +416,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 
 	pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
 
-	tcf_destroy_chain(p->filter_list);
+	tcf_destroy_chain(&p->filter_list);
 	qdisc_destroy(p->q);
 	kfree(p->mask);
 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fdfaa3fcc16..eca83a3be29 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1123,7 +1123,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 	qdisc_destroy(cl->qdisc);
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
 	if (cl != &q->root)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6807c97985a..3fb58f428f7 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1238,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
 
-	tcf_destroy_chain(cl->filter_list);
+	tcf_destroy_chain(&cl->filter_list);
 
 	while (!list_empty(&cl->children))
 		htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1267,7 +1267,7 @@ static void htb_destroy(struct Qdisc *sch)
 	   and surprisingly it worked in 2.4. But it must precede it
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 274b1ddb160..956c80ad596 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = qdisc_priv(sch);
 
-	tcf_destroy_chain(p->filter_list);
+	tcf_destroy_chain(&p->filter_list);
 }
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4aa2b45dad0..5532f1031ab 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -219,7 +219,7 @@ prio_destroy(struct Qdisc* sch)
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 	for (prio=0; prio<q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f0463d757a9..6a97afbfb95 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(q->filter_list);
+	tcf_destroy_chain(&q->filter_list);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 }
-- 
cgit v1.2.3-70-g09d2


From a4aebb83cf0da0363684f1c339f7e6149a3e74c1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 1 Jul 2008 19:53:09 -0700
Subject: net-sched: fix filter destruction in atm/hfsc qdisc destruction

Filters need to be destroyed before beginning to destroy classes
since the destination class needs to still be alive to unbind the
filter.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c  | 4 +++-
 net/sched/sch_hfsc.c | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 8e5f70ba3a1..db0e23ae85f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -586,9 +586,11 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	struct atm_flow_data *flow;
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
+	for (flow = p->flows; flow; flow = flow->next)
+		tcf_destroy_chain(&flow->filter_list);
+
 	/* races ? */
 	while ((flow = p->flows)) {
-		tcf_destroy_chain(&flow->filter_list);
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index eca83a3be29..e817aa00441 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1540,6 +1540,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 	struct hfsc_class *cl, *next;
 	unsigned int i;
 
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry(cl, &q->clhash[i], hlist)
+			tcf_destroy_chain(&cl->filter_list);
+	}
 	for (i = 0; i < HFSC_HSIZE; i++) {
 		list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
 			hfsc_destroy_class(sch, cl);
-- 
cgit v1.2.3-70-g09d2


From 8487460720fd03a0f4ecd032f017b0a8468028da Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 1 Jul 2008 19:55:09 -0700
Subject: netlink: Unneeded local variable

We already have a variable, which has the same capability.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9b97f8006c9..349aba18955 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -886,7 +886,7 @@ retry:
 		return netlink_unicast_kernel(sk, skb);
 
 	if (sk_filter(sk, skb)) {
-		int err = skb->len;
+		err = skb->len;
 		kfree_skb(skb);
 		sock_put(sk);
 		return err;
-- 
cgit v1.2.3-70-g09d2


From 8fde8a076940969d32805b853efdce8b988d7dda Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 1 Jul 2008 19:55:40 -0700
Subject: net: Tyop of sk_filter() comment

Parameter "needlock" no long exists.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 4f8369729a4..df374435583 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
  *	sk_filter - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
- *	@needlock: set to 1 if the sock is not locked by caller.
  *
  * Run the filter code and then cut skb->data to correct size returned by
  * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
-- 
cgit v1.2.3-70-g09d2


From 93b3cff9915322d6fa36bac0064714a7076230e4 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 1 Jul 2008 19:57:19 -0700
Subject: netdevice: Fix wrong string handle in kernel command line parsing

v1->v2: Use strlcpy() to ensure s[i].name be null-termination.

1. In netdev_boot_setup_add(), a long name will leak.
   ex. : dev=21,0x1234,0x1234,0x2345,eth123456789verylongname.........
2. In netdev_boot_setup_check(), mismatch will happen if s[i].name
   is a substring of dev->name.
   ex. : dev=...eth1 dev=...eth11

[ With feedback from Ben Hutchings. ]

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 56b46579ff4..fca23a3bf12 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -454,7 +454,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map)
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 			memset(s[i].name, 0, sizeof(s[i].name));
-			strcpy(s[i].name, name);
+			strlcpy(s[i].name, name, IFNAMSIZ);
 			memcpy(&s[i].map, map, sizeof(s[i].map));
 			break;
 		}
@@ -479,7 +479,7 @@ int netdev_boot_setup_check(struct net_device *dev)
 
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
-		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
+		    !strcmp(dev->name, s[i].name)) {
 			dev->irq 	= s[i].map.irq;
 			dev->base_addr 	= s[i].map.base_addr;
 			dev->mem_start 	= s[i].map.mem_start;
-- 
cgit v1.2.3-70-g09d2


From 2fe195cfe3e53c144d247b2768e37732e8eae4d8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 1 Jul 2008 19:59:37 -0700
Subject: net: fib_rules: fix error code for unsupported families

The errno code returned must be negative.

Fixes "RTNETLINK answers: Unknown error 18446744073709551519".

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e3e9ab0f74e..277a2302eb3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	ops = lookup_rules_ops(net, frh->family);
 	if (ops == NULL) {
-		err = EAFNOSUPPORT;
+		err = -EAFNOSUPPORT;
 		goto errout;
 	}
 
@@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	ops = lookup_rules_ops(net, frh->family);
 	if (ops == NULL) {
-		err = EAFNOSUPPORT;
+		err = -EAFNOSUPPORT;
 		goto errout;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From ecbed6a41900126e7b9509e12a8d0cc22176e3eb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 1 Jul 2008 20:06:22 -0700
Subject: sctp: Mark GET_PEER|LOCAL_ADDR_OLD deprecated.

Socket options SCTP_GET_PEER_ADDR_OLD, SCTP_GET_PEER_ADDR_NUM_OLD,
SCTP_GET_LOCAL_ADDR_OLD, and SCTP_GET_PEER_LOCAL_ADDR_NUM_OLD
have been replaced by newer versions a since 2005.  It's time
to officially deprecate them and schedule them for removal.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt | 12 ++++++++++++
 net/sctp/socket.c                          | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5b3f31faed5..5378511a5f9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -312,3 +312,15 @@ When:	2.6.26
 Why:	Implementation became generic; users should now include
 	linux/semaphore.h instead.
 Who:	Matthew Wilcox <willy@linux.intel.com>
+
+---------------------------
+
+What:	SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD,
+	SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD
+When: 	June 2009
+Why:    A newer version of the options have been introduced in 2005 that
+	removes the limitions of the old API.  The sctp library has been
+        converted to use these new options at the same time.  Any user
+	space app that directly uses the old options should convert to using
+	the new options.
+Who:	Vlad Yasevich <vladislav.yasevich@hp.com>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 43460a1cb6d..df5572c39f0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4223,6 +4223,8 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len,
 	if (copy_from_user(&id, optval, sizeof(sctp_assoc_t)))
 		return -EFAULT;
 
+	printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD "
+			    "socket option deprecated\n");
 	/* For UDP-style sockets, id specifies the association to query.  */
 	asoc = sctp_id2assoc(sk, id);
 	if (!asoc)
@@ -4262,6 +4264,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len,
 
 	if (getaddrs.addr_num <= 0) return -EINVAL;
 
+	printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD "
+			    "socket option deprecated\n");
+
 	/* For UDP-style sockets, id specifies the association to query.  */
 	asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
 	if (!asoc)
@@ -4355,6 +4360,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 	if (copy_from_user(&id, optval, sizeof(sctp_assoc_t)))
 		return -EFAULT;
 
+	printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD "
+			    "socket option deprecated\n");
+
 	/*
 	 *  For UDP-style sockets, id specifies the association to query.
 	 *  If the id field is set to the value '0' then the locally bound
@@ -4515,6 +4523,10 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	if (getaddrs.addr_num <= 0 ||
 	    getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
 		return -EINVAL;
+
+	printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD "
+			    "socket option deprecated\n");
+
 	/*
 	 *  For UDP-style sockets, id specifies the association to query.
 	 *  If the id field is set to the value '0' then the locally bound
-- 
cgit v1.2.3-70-g09d2


From 0853ad66b14feb12acde7ac13b7c3b75770a0adc Mon Sep 17 00:00:00 2001
From: Santwona Behera <santwona.behera@sun.com>
Date: Wed, 2 Jul 2008 03:47:41 -0700
Subject: netdev: Add support for rx flow hash configuration, using ethtool.

Added new interfaces to ethtool to configure receive network flow
distribution across multiple rx rings using hashing.

Signed-off-by: Santwona Behera <santwona.behera@sun.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 33 +++++++++++++++++++++++++++++++++
 net/core/ethtool.c      | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c8d21635786..8bb5e87df36 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -272,6 +272,12 @@ enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 };
 
+struct ethtool_rxnfc {
+	__u32		cmd;
+	__u32		flow_type;
+	__u64		data;
+};
+
 #ifdef __KERNEL__
 
 struct net_device;
@@ -396,6 +402,8 @@ struct ethtool_ops {
 	/* the following hooks are obsolete */
 	int	(*self_test_count)(struct net_device *);/* use get_sset_count */
 	int	(*get_stats_count)(struct net_device *);/* use get_sset_count */
+	int	(*get_rxhash)(struct net_device *, struct ethtool_rxnfc *);
+	int	(*set_rxhash)(struct net_device *, struct ethtool_rxnfc *);
 };
 #endif /* __KERNEL__ */
 
@@ -442,6 +450,9 @@ struct ethtool_ops {
 #define ETHTOOL_GPFLAGS		0x00000027 /* Get driver-private flags bitmap */
 #define ETHTOOL_SPFLAGS		0x00000028 /* Set driver-private flags bitmap */
 
+#define	ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
+#define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
@@ -528,4 +539,26 @@ struct ethtool_ops {
 #define WAKE_MAGIC		(1 << 5)
 #define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
 
+/* L3-L4 network traffic flow types */
+#define	TCP_V4_FLOW	0x01
+#define	UDP_V4_FLOW	0x02
+#define	SCTP_V4_FLOW	0x03
+#define	AH_ESP_V4_FLOW	0x04
+#define	TCP_V6_FLOW	0x05
+#define	UDP_V6_FLOW	0x06
+#define	SCTP_V6_FLOW	0x07
+#define	AH_ESP_V6_FLOW	0x08
+
+/* L3-L4 network traffic flow hash options */
+#define	RXH_DEV_PORT	(1 << 0)
+#define	RXH_L2DA	(1 << 1)
+#define	RXH_VLAN	(1 << 2)
+#define	RXH_L3_PROTO	(1 << 3)
+#define	RXH_IP_SRC	(1 << 4)
+#define	RXH_IP_DST	(1 << 5)
+#define	RXH_L4_B_0_1	(1 << 6) /* src port in case of TCP/UDP/SCTP */
+#define	RXH_L4_B_2_3	(1 << 7) /* dst port in case of TCP/UDP/SCTP */
+#define	RXH_DISCARD	(1 << 31)
+
+
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0133b5ebd54..14ada537f89 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
+static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_rxnfc cmd;
+
+	if (!dev->ethtool_ops->set_rxhash)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_rxhash(dev, &cmd);
+}
+
+static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_rxnfc info;
+
+	if (!dev->ethtool_ops->get_rxhash)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	dev->ethtool_ops->get_rxhash(dev, &info);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
+	case ETHTOOL_GRXFH:
 		break;
 	default:
 		if (!capable(CAP_NET_ADMIN))
@@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_set_value(dev, useraddr,
 				       dev->ethtool_ops->set_priv_flags);
 		break;
+	case ETHTOOL_GRXFH:
+		rc = ethtool_get_rxhash(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFH:
+		rc = ethtool_set_rxhash(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-70-g09d2


From d9e8a70fa20dc3eaa00859a6eac0adfaef910c77 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 30 Jun 2008 15:10:44 +0200
Subject: mac80211: get rid of function pointers in TX path

This changes the TX path to no longer use function pointers for
TX handlers but rather invoke them directly. If debugging is
enabled, mark the TX handlers noinline because otherwise they
all get inlined into invoke_tx_handlers() which makes it harder
to see where a bug is.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  6 +++++
 net/mac80211/tx.c          | 63 +++++++++++++++++++++-------------------------
 2 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index af352c05c98..46705ae7a84 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -954,4 +954,10 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
 				     struct ieee80211_hdr *hdr);
 
+#ifdef CONFIG_MAC80211_DEBUG
+#define debug_noinline noinline
+#else
+#define debug_noinline
+#endif
+
 #endif /* IEEE80211_I_H */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8f1c574bc8f..7b930d3c2fb 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -220,7 +220,7 @@ static int inline is_ieee80211_device(struct net_device *dev,
 
 /* tx handlers */
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -274,7 +274,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
@@ -432,7 +432,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
 {
 	if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
@@ -444,7 +444,7 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
 		return ieee80211_tx_h_multicast_ps_buf(tx);
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_key *key;
@@ -493,7 +493,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
 	struct rate_selection rsel;
@@ -537,7 +537,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
@@ -632,7 +632,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
@@ -724,7 +724,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	return TX_DROP;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 {
 	if (!tx->key)
@@ -744,7 +744,7 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 	return TX_DROP;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
@@ -774,7 +774,7 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result
+static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 {
 	int i;
@@ -795,24 +795,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 }
 
 
-typedef ieee80211_tx_result (*ieee80211_tx_handler)(struct ieee80211_tx_data *);
-static ieee80211_tx_handler ieee80211_tx_handlers[] =
-{
-	ieee80211_tx_h_check_assoc,
-	ieee80211_tx_h_sequence,
-	ieee80211_tx_h_ps_buf,
-	ieee80211_tx_h_select_key,
-	ieee80211_tx_h_michael_mic_add,
-	ieee80211_tx_h_rate_ctrl,
-	ieee80211_tx_h_misc,
-	ieee80211_tx_h_fragment,
-	/* handlers after fragment must be aware of tx info fragmentation! */
-	ieee80211_tx_h_encrypt,
-	ieee80211_tx_h_calculate_duration,
-	ieee80211_tx_h_stats,
-	NULL
-};
-
 /* actual transmit path */
 
 /*
@@ -1111,16 +1093,29 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
-	ieee80211_tx_handler *handler;
 	ieee80211_tx_result res = TX_DROP;
 	int i;
 
-	for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) {
-		res = (*handler)(tx);
-		if (res != TX_CONTINUE)
-			break;
-	}
+#define CALL_TXH(txh)		\
+	res = txh(tx);		\
+	if (res != TX_CONTINUE)	\
+		goto txh_done;
+
+	CALL_TXH(ieee80211_tx_h_check_assoc)
+	CALL_TXH(ieee80211_tx_h_sequence)
+	CALL_TXH(ieee80211_tx_h_ps_buf)
+	CALL_TXH(ieee80211_tx_h_select_key)
+	CALL_TXH(ieee80211_tx_h_michael_mic_add)
+	CALL_TXH(ieee80211_tx_h_rate_ctrl)
+	CALL_TXH(ieee80211_tx_h_misc)
+	CALL_TXH(ieee80211_tx_h_fragment)
+	/* handlers after fragment must be aware of tx info fragmentation! */
+	CALL_TXH(ieee80211_tx_h_encrypt)
+	CALL_TXH(ieee80211_tx_h_calculate_duration)
+	CALL_TXH(ieee80211_tx_h_stats)
+#undef CALL_TXH
 
+ txh_done:
 	if (unlikely(res == TX_DROP)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop);
 		dev_kfree_skb(skb);
-- 
cgit v1.2.3-70-g09d2


From 49461622edf74cd1e1a1056cee3ca8dd90cd9556 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 30 Jun 2008 15:10:45 +0200
Subject: mac80211: get rid of function pointers in RX path

This changes the RX path to no longer use function pointers for
RX handlers but rather invoke them directly. If debugging is
enabled, mark the RX handlers noinline because otherwise they
all get inlined into ieee80211_invoke_rx_handlers() which makes
it harder to see where a bug is.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 97 +++++++++++++++++++++++++------------------------------
 1 file changed, 44 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8962d1355f0..289112777e9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -386,7 +386,7 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx)
 
 /* rx handlers */
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_local *local = rx->local;
@@ -463,7 +463,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 }
 
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr;
@@ -522,7 +522,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 }
 
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
@@ -710,7 +710,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 	return sent;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 {
 	struct sta_info *sta = rx->sta;
@@ -858,7 +858,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 	return NULL;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr;
@@ -974,7 +974,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
@@ -1049,7 +1049,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx)
 {
 	u16 fc = rx->fc;
@@ -1367,7 +1367,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	}
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
@@ -1484,7 +1484,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
@@ -1515,7 +1515,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_local *local = rx->local;
@@ -1559,7 +1559,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
-static ieee80211_rx_result
+static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata;
@@ -1732,66 +1732,57 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx)
 	dev_kfree_skb(skb);
 }
 
-typedef ieee80211_rx_result (*ieee80211_rx_handler)(struct ieee80211_rx_data *);
-static ieee80211_rx_handler ieee80211_rx_handlers[] =
-{
-	ieee80211_rx_h_passive_scan,
-	ieee80211_rx_h_check,
-	ieee80211_rx_h_decrypt,
-	ieee80211_rx_h_sta_process,
-	ieee80211_rx_h_defragment,
-	ieee80211_rx_h_ps_poll,
-	ieee80211_rx_h_michael_mic_verify,
-	/* this must be after decryption - so header is counted in MPDU mic
-	 * must be before pae and data, so QOS_DATA format frames
-	 * are not passed to user space by these functions
-	 */
-	ieee80211_rx_h_remove_qos_control,
-	ieee80211_rx_h_amsdu,
-	ieee80211_rx_h_data,
-	ieee80211_rx_h_ctrl,
-	ieee80211_rx_h_mgmt,
-	NULL
-};
 
 static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_rx_data *rx,
 					 struct sk_buff *skb)
 {
-	ieee80211_rx_handler *handler;
 	ieee80211_rx_result res = RX_DROP_MONITOR;
 
 	rx->skb = skb;
 	rx->sdata = sdata;
 	rx->dev = sdata->dev;
 
-	for (handler = ieee80211_rx_handlers; *handler != NULL; handler++) {
-		res = (*handler)(rx);
-
-		switch (res) {
-		case RX_CONTINUE:
-			continue;
-		case RX_DROP_UNUSABLE:
-		case RX_DROP_MONITOR:
-			I802_DEBUG_INC(sdata->local->rx_handlers_drop);
-			if (rx->sta)
-				rx->sta->rx_dropped++;
-			break;
-		case RX_QUEUED:
-			I802_DEBUG_INC(sdata->local->rx_handlers_queued);
-			break;
-		}
-		break;
-	}
-
+#define CALL_RXH(rxh)		\
+	res = rxh(rx);		\
+	if (res != RX_CONTINUE)	\
+		goto rxh_done;
+
+	CALL_RXH(ieee80211_rx_h_passive_scan)
+	CALL_RXH(ieee80211_rx_h_check)
+	CALL_RXH(ieee80211_rx_h_decrypt)
+	CALL_RXH(ieee80211_rx_h_sta_process)
+	CALL_RXH(ieee80211_rx_h_defragment)
+	CALL_RXH(ieee80211_rx_h_ps_poll)
+	CALL_RXH(ieee80211_rx_h_michael_mic_verify)
+	/* must be after MMIC verify so header is counted in MPDU mic */
+	CALL_RXH(ieee80211_rx_h_remove_qos_control)
+	CALL_RXH(ieee80211_rx_h_amsdu)
+	CALL_RXH(ieee80211_rx_h_data)
+	CALL_RXH(ieee80211_rx_h_ctrl)
+	CALL_RXH(ieee80211_rx_h_mgmt)
+
+#undef CALL_RXH
+
+ rxh_done:
 	switch (res) {
-	case RX_CONTINUE:
 	case RX_DROP_MONITOR:
+		I802_DEBUG_INC(sdata->local->rx_handlers_drop);
+		if (rx->sta)
+			rx->sta->rx_dropped++;
+		/* fall through */
+	case RX_CONTINUE:
 		ieee80211_rx_cooked_monitor(rx);
 		break;
 	case RX_DROP_UNUSABLE:
+		I802_DEBUG_INC(sdata->local->rx_handlers_drop);
+		if (rx->sta)
+			rx->sta->rx_dropped++;
 		dev_kfree_skb(rx->skb);
 		break;
+	case RX_QUEUED:
+		I802_DEBUG_INC(sdata->local->rx_handlers_queued);
+		break;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From f4ea83dd743d3e1bec8fdf954ac911c6b12ae87a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 30 Jun 2008 15:10:46 +0200
Subject: mac80211: rework debug settings and make debugging safer

This patch reworks the mac80211 debug settings making them more focused
and adding help text for those that didn't have one. It also removes a
number of printks that can be triggered remotely and add no value, e.g.
"too short deauthentication frame received - ignoring".

If somebody really needs to debug that they should just add a monitor
interface and look at the frames in wireshark.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig       | 130 +++++++++++++++++++---------
 net/mac80211/ieee80211_i.h |   2 +-
 net/mac80211/iface.c       |   6 +-
 net/mac80211/main.c        |  35 ++++++--
 net/mac80211/mlme.c        | 206 +++++++++++----------------------------------
 net/mac80211/rx.c          | 106 +++--------------------
 net/mac80211/sta_info.c    |   2 +
 net/mac80211/tx.c          |   9 +-
 net/mac80211/wep.c         |  13 +--
 net/mac80211/wme.c         |   1 -
 net/mac80211/wpa.c         |  28 +-----
 11 files changed, 196 insertions(+), 342 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 661d3c29148..11a1e7fa195 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -88,10 +88,16 @@ config MAC80211_DEBUGFS
 
 	  Say N unless you know you need this.
 
+menuconfig MAC80211_DEBUG_MENU
+	bool "Select mac80211 debugging features"
+	depends on MAC80211
+	---help---
+	  This option collects various mac80211 debug settings.
+
 config MAC80211_DEBUG_PACKET_ALIGNMENT
 	bool "Enable packet alignment debugging"
-	depends on MAC80211
-	help
+	depends on MAC80211_DEBUG_MENU
+	---help---
 	  This option is recommended for driver authors and strongly
 	  discouraged for everybody else, it will trigger a warning
 	  when a driver hands mac80211 a buffer that is aligned in
@@ -100,33 +106,95 @@ config MAC80211_DEBUG_PACKET_ALIGNMENT
 
 	  Say N unless you're writing a mac80211 based driver.
 
-config MAC80211_DEBUG
-	bool "Enable debugging output"
-	depends on MAC80211
+config MAC80211_NOINLINE
+	bool "Do not inline TX/RX handlers"
+	depends on MAC80211_DEBUG_MENU
 	---help---
-	  This option will enable debug tracing output for the
-	  ieee80211 network stack.
+	  This option affects code generation in mac80211, when
+	  selected some functions are marked "noinline" to allow
+	  easier debugging of problems in the transmit and receive
+	  paths.
+
+	  This option increases code size a bit and inserts a lot
+	  of function calls in the code, but is otherwise safe to
+	  enable.
 
-	  If you are not trying to debug or develop the ieee80211
-	  subsystem, you most likely want to say N here.
+	  If unsure, say N unless you expect to be finding problems
+	  in mac80211.
+
+config MAC80211_VERBOSE_DEBUG
+	bool "Verbose debugging output"
+	depends on MAC80211_DEBUG_MENU
+	---help---
+	  Selecting this option causes mac80211 to print out
+	  many debugging messages. It should not be selected
+	  on production systems as some of the messages are
+	  remotely triggerable.
+
+	  Do not select this option.
 
 config MAC80211_HT_DEBUG
-	bool "Enable HT debugging output"
-	depends on MAC80211_DEBUG
+	bool "Verbose HT debugging"
+	depends on MAC80211_DEBUG_MENU
 	---help---
 	  This option enables 802.11n High Throughput features
 	  debug tracing output.
 
-	  If you are not trying to debug of develop the ieee80211
-	  subsystem, you most likely want to say N here.
+	  It should not be selected on production systems as some
+	  of the messages are remotely triggerable.
 
-config MAC80211_VERBOSE_DEBUG
-	bool "Verbose debugging output"
-	depends on MAC80211_DEBUG
+	  Do not select this option.
+
+config MAC80211_TKIP_DEBUG
+	bool "Verbose TKIP debugging"
+	depends on MAC80211_DEBUG_MENU
+	---help---
+	  Selecting this option causes mac80211 to print out
+	  very verbose TKIP debugging messages. It should not
+	  be selected on production systems as those messages
+	  are remotely triggerable.
+
+	  Do not select this option.
+
+config MAC80211_IBSS_DEBUG
+	bool "Verbose IBSS debugging"
+	depends on MAC80211_DEBUG_MENU
+	---help---
+	  Selecting this option causes mac80211 to print out
+	  very verbose IBSS debugging messages. It should not
+	  be selected on production systems as those messages
+	  are remotely triggerable.
+
+	  Do not select this option.
+
+config MAC80211_VERBOSE_PS_DEBUG
+	bool "Verbose powersave mode debugging"
+	depends on MAC80211_DEBUG_MENU
+	---help---
+	  Selecting this option causes mac80211 to print out very
+	  verbose power save mode debugging messages (when mac80211
+	  is an AP and has power saving stations.)
+	  It should not be selected on production systems as those
+	  messages are remotely triggerable.
+
+	  Do not select this option.
+
+config MAC80211_VERBOSE_MPL_DEBUG
+	bool "Verbose mesh peer link debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very
+	  verbose mesh peer link debugging messages (when mac80211
+	  is taking part in a mesh network).
+	  It should not be selected on production systems as those
+	  messages are remotely triggerable.
+
+	  Do not select this option.
 
 config MAC80211_LOWTX_FRAME_DUMP
 	bool "Debug frame dumping"
-	depends on MAC80211_DEBUG
+	depends on MAC80211_DEBUG_MENU
 	---help---
 	  Selecting this option will cause the stack to
 	  print a message for each frame that is handed
@@ -137,33 +205,17 @@ config MAC80211_LOWTX_FRAME_DUMP
 	  If unsure, say N and insert the debugging code
 	  you require into the driver you are debugging.
 
-config MAC80211_TKIP_DEBUG
-	bool "TKIP debugging"
-	depends on MAC80211_DEBUG
-
 config MAC80211_DEBUG_COUNTERS
 	bool "Extra statistics for TX/RX debugging"
 	depends on MAC80211_DEBUG
-
-config MAC80211_IBSS_DEBUG
-	bool "Support for IBSS testing"
-	depends on MAC80211_DEBUG
-	---help---
-	  Say Y here if you intend to debug the IBSS code.
-
-config MAC80211_VERBOSE_PS_DEBUG
-	bool "Verbose powersave mode debugging"
-	depends on MAC80211_DEBUG
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_DEBUGFS
 	---help---
-	  Say Y here to print out verbose powersave
-	  mode debug messages.
+	  Selecting this option causes mac80211 to keep additional
+	  and very verbose statistics about TX and RX handler use
+	  and show them in debugfs.
 
-config MAC80211_VERBOSE_MPL_DEBUG
-	bool "Verbose mesh peer link debugging"
-	depends on MAC80211_DEBUG && MAC80211_MESH
-	---help---
-	  Say Y here to print out verbose mesh peer link
-	  debug messages.
+	  If unsure, say N.
 
 config MAC80211_VERBOSE_SPECT_MGMT_DEBUG
 	bool "Verbose Spectrum Management (IEEE 802.11h)debugging"
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 46705ae7a84..f90da1bbec4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -954,7 +954,7 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
 				     struct ieee80211_hdr *hdr);
 
-#ifdef CONFIG_MAC80211_DEBUG
+#ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
 #else
 #define debug_noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 98447270238..eeb16926aa7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -184,9 +184,9 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 		sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
 				      MONITOR_FLAG_OTHER_BSS;
 		break;
-	default:
-		printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x",
-		       dev->name, __func__, type);
+	case IEEE80211_IF_TYPE_INVALID:
+		BUG();
+		break;
 	}
 	ieee80211_debugfs_change_if_type(sdata, oldtype);
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b661ee5bb82..f18cfd72787 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -151,9 +151,7 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
 	/* FIX: what would be proper limits for MTU?
 	 * This interface uses 802.3 frames. */
 	if (new_mtu < 256 ||
-		new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) {
-		printk(KERN_WARNING "%s: invalid MTU %d\n",
-		       dev->name, new_mtu);
+	    new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) {
 		return -EINVAL;
 	}
 
@@ -589,7 +587,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	sta = sta_info_get(local, ra);
 	if (!sta) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find the station\n");
+#endif
 		ret = -ENOENT;
 		goto exit;
 	}
@@ -617,9 +617,11 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	sta->ampdu_mlme.tid_tx[tid] =
 			kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
 	if (!sta->ampdu_mlme.tid_tx[tid]) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
 					tid);
+#endif
 		ret = -ENOMEM;
 		goto err_unlock_sta;
 	}
@@ -689,7 +691,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
 				jiffies + ADDBA_RESP_INTERVAL;
 	add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+#ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
+#endif
 	goto exit;
 
 err_unlock_queue:
@@ -771,8 +775,10 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	DECLARE_MAC_BUF(mac);
 
 	if (tid >= STA_TID_NUM) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
 				tid, STA_TID_NUM);
+#endif
 		return;
 	}
 
@@ -780,8 +786,10 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	sta = sta_info_get(local, ra);
 	if (!sta) {
 		rcu_read_unlock();
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find station: %s\n",
 				print_mac(mac, ra));
+#endif
 		return;
 	}
 
@@ -789,8 +797,10 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	spin_lock_bh(&sta->lock);
 
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
 				*state);
+#endif
 		spin_unlock_bh(&sta->lock);
 		rcu_read_unlock();
 		return;
@@ -801,7 +811,9 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	*state |= HT_ADDBA_DRV_READY_MSK;
 
 	if (*state == HT_AGG_STATE_OPERATIONAL) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
+#endif
 		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 	}
 	spin_unlock_bh(&sta->lock);
@@ -818,8 +830,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	DECLARE_MAC_BUF(mac);
 
 	if (tid >= STA_TID_NUM) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
 				tid, STA_TID_NUM);
+#endif
 		return;
 	}
 
@@ -831,8 +845,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	rcu_read_lock();
 	sta = sta_info_get(local, ra);
 	if (!sta) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Could not find station: %s\n",
 				print_mac(mac, ra));
+#endif
 		rcu_read_unlock();
 		return;
 	}
@@ -842,7 +858,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	 * ieee80211_stop_tx_ba_session will let only
 	 * one stop call to pass through per sta/tid */
 	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
+#endif
 		rcu_read_unlock();
 		return;
 	}
@@ -884,9 +902,11 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
 	struct sk_buff *skb = dev_alloc_skb(0);
 
 	if (unlikely(!skb)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: Not enough memory, "
 			       "dropping start BA session", skb->dev->name);
+#endif
 		return;
 	}
 	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
@@ -907,9 +927,11 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
 	struct sk_buff *skb = dev_alloc_skb(0);
 
 	if (unlikely(!skb)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: Not enough memory, "
 			       "dropping stop BA session", skb->dev->name);
+#endif
 		return;
 	}
 	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
@@ -1236,9 +1258,8 @@ static void ieee80211_tasklet_handler(unsigned long data)
 						 ra_tid->ra, ra_tid->tid);
 			dev_kfree_skb(skb);
 			break ;
-		default: /* should never get here! */
-			printk(KERN_ERR "%s: Unknown message type (%d)\n",
-			       wiphy_name(local->hw.wiphy), skb->pkt_type);
+		default:
+			WARN_ON(1);
 			dev_kfree_skb(skb);
 			break;
 		}
@@ -1365,12 +1386,14 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		return;
 	}
 
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	if (net_ratelimit())
 		printk(KERN_DEBUG "%s: dropped TX filtered frame, "
 		       "queue_len=%d PS=%d @%lu\n",
 		       wiphy_name(local->hw.wiphy),
 		       skb_queue_len(&sta->tx_filtered),
 		       !!test_sta_flags(sta, WLAN_STA_PS), jiffies);
+#endif
 	dev_kfree_skb(skb);
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0a310d09ab0..4a3bddd206d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -346,7 +346,7 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
 		params.txop = pos[2] | (pos[3] << 8);
-#ifdef CONFIG_MAC80211_DEBUG
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
 		       "cWmin=%d cWmax=%d txop=%d\n",
 		       dev->name, queue, aci, acm, params.aifs, params.cw_min,
@@ -371,6 +371,7 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
 	u32 changed = 0;
 
 	if (use_protection != bss_conf->use_cts_prot) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: CTS protection %s (BSSID="
 			       "%s)\n",
@@ -378,11 +379,13 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
 			       use_protection ? "enabled" : "disabled",
 			       print_mac(mac, ifsta->bssid));
 		}
+#endif
 		bss_conf->use_cts_prot = use_protection;
 		changed |= BSS_CHANGED_ERP_CTS_PROT;
 	}
 
 	if (use_short_preamble != bss_conf->use_short_preamble) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: switched to %s barker preamble"
 			       " (BSSID=%s)\n",
@@ -390,6 +393,7 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
 			       use_short_preamble ? "short" : "long",
 			       print_mac(mac, ifsta->bssid));
 		}
+#endif
 		bss_conf->use_short_preamble = use_short_preamble;
 		changed |= BSS_CHANGED_ERP_PREAMBLE;
 	}
@@ -1175,14 +1179,10 @@ static void ieee80211_auth_challenge(struct net_device *dev,
 	u8 *pos;
 	struct ieee802_11_elems elems;
 
-	printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
 	pos = mgmt->u.auth.variable;
 	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
-	if (!elems.challenge) {
-		printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
-		       "frame\n", dev->name);
+	if (!elems.challenge)
 		return;
-	}
 	ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2,
 			    elems.challenge_len + 2, 1);
 }
@@ -1364,9 +1364,11 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 	sta->ampdu_mlme.tid_rx[tid] =
 			kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
 	if (!sta->ampdu_mlme.tid_rx[tid]) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
 					tid);
+#endif
 		goto end;
 	}
 	/* rx timer */
@@ -1382,9 +1384,11 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
 	tid_agg_rx->reorder_buf =
 		kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
 	if (!tid_agg_rx->reorder_buf) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_ERR "can not allocate reordering buffer "
 			       "to tid %d\n", tid);
+#endif
 		kfree(sta->ampdu_mlme.tid_rx[tid]);
 		goto end;
 	}
@@ -1451,8 +1455,6 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
 		spin_unlock_bh(&sta->lock);
-		printk(KERN_DEBUG "state not HT_ADDBA_REQUESTED_MSK:"
-			"%d\n", *state);
 		goto addba_resp_exit;
 	}
 
@@ -1471,22 +1473,14 @@ static void ieee80211_sta_process_addba_resp(struct net_device *dev,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
 			== WLAN_STATUS_SUCCESS) {
-		if (*state & HT_ADDBA_RECEIVED_MSK)
-			printk(KERN_DEBUG "double addBA response\n");
-
 		*state |= HT_ADDBA_RECEIVED_MSK;
 		sta->ampdu_mlme.addba_req_num[tid] = 0;
 
-		if (*state == HT_AGG_STATE_OPERATIONAL) {
-			printk(KERN_DEBUG "Aggregation on for tid %d \n", tid);
+		if (*state == HT_AGG_STATE_OPERATIONAL)
 			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
-		}
 
 		spin_unlock_bh(&sta->lock);
-		printk(KERN_DEBUG "recipient accepted agg: tid %d \n", tid);
 	} else {
-		printk(KERN_DEBUG "recipient rejected agg: tid %d \n", tid);
-
 		sta->ampdu_mlme.addba_req_num[tid]++;
 		/* this will allow the state check in stop_BA_session */
 		*state = HT_AGG_STATE_OPERATIONAL;
@@ -1585,7 +1579,7 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 					ra, tid, NULL);
 	if (ret)
 		printk(KERN_DEBUG "HW problem - can not stop rx "
-				"aggergation for tid %d\n", tid);
+				"aggregation for tid %d\n", tid);
 
 	/* shutdown timer has not expired */
 	if (initiator != WLAN_BACK_TIMER)
@@ -1691,12 +1685,16 @@ void sta_addba_resp_timer_expired(unsigned long data)
 	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
 		spin_unlock_bh(&sta->lock);
 		*state = HT_AGG_STATE_IDLE;
+#ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
 				"expecting addBA response there", tid);
+#endif
 		goto timer_expired_exit;
 	}
 
+#ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
+#endif
 
 	/* go through the state check in stop_BA_session */
 	*state = HT_AGG_STATE_OPERATIONAL;
@@ -1724,7 +1722,9 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
 					 timer_to_tid[0]);
 
+#ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
+#endif
 	ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr,
 					 (u16)*ptid, WLAN_BACK_TIMER,
 					 WLAN_REASON_QSTA_TIMEOUT);
@@ -1819,47 +1819,24 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 	DECLARE_MAC_BUF(mac);
 
 	if (ifsta->state != IEEE80211_AUTHENTICATE &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
-		printk(KERN_DEBUG "%s: authentication frame received from "
-		       "%s, but not in authenticate state - ignored\n",
-		       dev->name, print_mac(mac, mgmt->sa));
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return;
-	}
 
-	if (len < 24 + 6) {
-		printk(KERN_DEBUG "%s: too short (%zd) authentication frame "
-		       "received from %s - ignored\n",
-		       dev->name, len, print_mac(mac, mgmt->sa));
+	if (len < 24 + 6)
 		return;
-	}
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
-		printk(KERN_DEBUG "%s: authentication frame received from "
-		       "unknown AP (SA=%s BSSID=%s) - "
-		       "ignored\n", dev->name, print_mac(mac, mgmt->sa),
-		       print_mac(mac, mgmt->bssid));
+	    memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0)
 		return;
-	}
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) {
-		printk(KERN_DEBUG "%s: authentication frame received from "
-		       "unknown BSSID (SA=%s BSSID=%s) - "
-		       "ignored\n", dev->name, print_mac(mac, mgmt->sa),
-		       print_mac(mac, mgmt->bssid));
+	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
-	}
 
 	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 	status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
-	printk(KERN_DEBUG "%s: RX authentication from %s (alg=%d "
-	       "transaction=%d status=%d)\n",
-	       dev->name, print_mac(mac, mgmt->sa), auth_alg,
-	       auth_transaction, status_code);
-
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		/*
 		 * IEEE 802.11 standard does not require authentication in IBSS
@@ -1867,26 +1844,16 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 		 * However, try to reply to authentication attempts if someone
 		 * has actually implemented this.
 		 */
-		if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) {
-			printk(KERN_DEBUG "%s: unexpected IBSS authentication "
-			       "frame (alg=%d transaction=%d)\n",
-			       dev->name, auth_alg, auth_transaction);
+		if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
 			return;
-		}
 		ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0);
 	}
 
 	if (auth_alg != ifsta->auth_alg ||
-	    auth_transaction != ifsta->auth_transaction) {
-		printk(KERN_DEBUG "%s: unexpected authentication frame "
-		       "(alg=%d transaction=%d)\n",
-		       dev->name, auth_alg, auth_transaction);
+	    auth_transaction != ifsta->auth_transaction)
 		return;
-	}
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
-		printk(KERN_DEBUG "%s: AP denied authentication (auth_alg=%d "
-		       "code=%d)\n", dev->name, ifsta->auth_alg, status_code);
 		if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG) {
 			u8 algs[3];
 			const int num_algs = ARRAY_SIZE(algs);
@@ -1915,9 +1882,6 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 				    !ieee80211_sta_wep_configured(dev))
 					continue;
 				ifsta->auth_alg = algs[pos];
-				printk(KERN_DEBUG "%s: set auth_alg=%d for "
-				       "next try\n",
-				       dev->name, ifsta->auth_alg);
 				break;
 			}
 		}
@@ -1947,27 +1911,14 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
 	u16 reason_code;
 	DECLARE_MAC_BUF(mac);
 
-	if (len < 24 + 2) {
-		printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame "
-		       "received from %s - ignored\n",
-		       dev->name, len, print_mac(mac, mgmt->sa));
+	if (len < 24 + 2)
 		return;
-	}
 
-	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
-		printk(KERN_DEBUG "%s: deauthentication frame received from "
-		       "unknown AP (SA=%s BSSID=%s) - "
-		       "ignored\n", dev->name, print_mac(mac, mgmt->sa),
-		       print_mac(mac, mgmt->bssid));
+	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN))
 		return;
-	}
 
 	reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
-	printk(KERN_DEBUG "%s: RX deauthentication from %s"
-	       " (reason=%d)\n",
-	       dev->name, print_mac(mac, mgmt->sa), reason_code);
-
 	if (ifsta->flags & IEEE80211_STA_AUTHENTICATED)
 		printk(KERN_DEBUG "%s: deauthenticated\n", dev->name);
 
@@ -1992,27 +1943,14 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
 	u16 reason_code;
 	DECLARE_MAC_BUF(mac);
 
-	if (len < 24 + 2) {
-		printk(KERN_DEBUG "%s: too short (%zd) disassociation frame "
-		       "received from %s - ignored\n",
-		       dev->name, len, print_mac(mac, mgmt->sa));
+	if (len < 24 + 2)
 		return;
-	}
 
-	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
-		printk(KERN_DEBUG "%s: disassociation frame received from "
-		       "unknown AP (SA=%s BSSID=%s) - "
-		       "ignored\n", dev->name, print_mac(mac, mgmt->sa),
-		       print_mac(mac, mgmt->bssid));
+	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN))
 		return;
-	}
 
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
-	printk(KERN_DEBUG "%s: RX disassociation from %s"
-	       " (reason=%d)\n",
-	       dev->name, print_mac(mac, mgmt->sa), reason_code);
-
 	if (ifsta->flags & IEEE80211_STA_ASSOCIATED)
 		printk(KERN_DEBUG "%s: disassociated\n", dev->name);
 
@@ -2048,27 +1986,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	/* AssocResp and ReassocResp have identical structure, so process both
 	 * of them in this function. */
 
-	if (ifsta->state != IEEE80211_ASSOCIATE) {
-		printk(KERN_DEBUG "%s: association frame received from "
-		       "%s, but not in associate state - ignored\n",
-		       dev->name, print_mac(mac, mgmt->sa));
+	if (ifsta->state != IEEE80211_ASSOCIATE)
 		return;
-	}
 
-	if (len < 24 + 6) {
-		printk(KERN_DEBUG "%s: too short (%zd) association frame "
-		       "received from %s - ignored\n",
-		       dev->name, len, print_mac(mac, mgmt->sa));
+	if (len < 24 + 6)
 		return;
-	}
 
-	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
-		printk(KERN_DEBUG "%s: association frame received from "
-		       "unknown AP (SA=%s BSSID=%s) - "
-		       "ignored\n", dev->name, print_mac(mac, mgmt->sa),
-		       print_mac(mac, mgmt->bssid));
+	if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0)
 		return;
-	}
 
 	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
@@ -2663,12 +2588,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN))
 		return; /* ignore ProbeResp to foreign address */
 
-#if 0
-	printk(KERN_DEBUG "%s: RX %s from %s to %s\n",
-	       dev->name, beacon ? "Beacon" : "Probe Response",
-	       print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da));
-#endif
-
 	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
@@ -2698,15 +2617,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 			sta->supp_rates[rx_status->band] =
 				sdata->u.sta.supp_rates_bits[rx_status->band];
 		}
-		if (sta->supp_rates[rx_status->band] != prev_rates) {
-			printk(KERN_DEBUG "%s: updated supp_rates set for "
-			       "%s based on beacon info (0x%llx & 0x%llx -> "
-			       "0x%llx)\n",
-			       dev->name, print_mac(mac, sta->addr),
-			       (unsigned long long) prev_rates,
-			       (unsigned long long) supp_rates,
-			       (unsigned long long) sta->supp_rates[rx_status->band]);
-		}
 	}
 
 	rcu_read_unlock();
@@ -2962,11 +2872,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 		if (beacon_timestamp > rx_timestamp) {
 #ifndef CONFIG_MAC80211_IBSS_DEBUG
-			if (net_ratelimit())
+			printk(KERN_DEBUG "%s: beacon TSF higher than "
+			       "local TSF - IBSS merge with BSSID %s\n",
+			       dev->name, print_mac(mac, mgmt->bssid));
 #endif
-				printk(KERN_DEBUG "%s: beacon TSF higher than "
-				       "local TSF - IBSS merge with BSSID %s\n",
-				       dev->name, print_mac(mac, mgmt->bssid));
 			ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss);
 			ieee80211_ibss_add_sta(dev, NULL,
 					       mgmt->bssid, mgmt->sa,
@@ -3106,11 +3015,11 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 	pos = mgmt->u.probe_req.variable;
 	if (pos[0] != WLAN_EID_SSID ||
 	    pos + 2 + pos[1] > end) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
-			       "from %s\n",
-			       dev->name, print_mac(mac, mgmt->sa));
-		}
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+		printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
+		       "from %s\n",
+		       dev->name, print_mac(mac, mgmt->sa));
+#endif
 		return;
 	}
 	if (pos[1] != 0 &&
@@ -3179,11 +3088,6 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 				break;
 			ieee80211_sta_process_delba(dev, mgmt, len);
 			break;
-		default:
-			if (net_ratelimit())
-			   printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n",
-					dev->name);
-			break;
 		}
 		break;
 	case PLINK_CATEGORY:
@@ -3194,11 +3098,6 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			mesh_rx_path_sel_frame(dev, mgmt, len);
 		break;
-	default:
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: Rx unknown action frame - "
-			"category=%d\n", dev->name, mgmt->u.action.category);
-		break;
 	}
 }
 
@@ -3234,11 +3133,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
 		skb_queue_tail(&ifsta->skb_queue, skb);
 		queue_work(local->hw.workqueue, &ifsta->work);
 		return;
-	default:
-		printk(KERN_DEBUG "%s: received unknown management frame - "
-		       "stype=%d\n", dev->name,
-		       (fc & IEEE80211_FCTL_STYPE) >> 4);
-		break;
 	}
 
  fail:
@@ -3367,8 +3261,10 @@ static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time)
 	spin_lock_irqsave(&local->sta_lock, flags);
 	list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: expiring inactive STA %s\n",
 			       dev->name, print_mac(mac, sta->addr));
+#endif
 			__sta_info_unlink(&sta);
 			if (sta)
 				list_add(&sta->list, &tmp_list);
@@ -3451,13 +3347,10 @@ void ieee80211_sta_work(struct work_struct *work)
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) {
-		printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface "
-		       "(type=%d)\n", dev->name, sdata->vif.type);
+	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+		    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
 		return;
-	}
 	ifsta = &sdata->u.sta;
 
 	while ((skb = skb_dequeue(&ifsta->skb_queue)))
@@ -3511,8 +3404,7 @@ void ieee80211_sta_work(struct work_struct *work)
 		break;
 #endif
 	default:
-		printk(KERN_DEBUG "ieee80211_sta_work: Unknown state %d\n",
-		       ifsta->state);
+		WARN_ON(1);
 		break;
 	}
 
@@ -3547,8 +3439,6 @@ static void ieee80211_sta_reset_auth(struct net_device *dev,
 		ifsta->auth_alg = WLAN_AUTH_LEAP;
 	else
 		ifsta->auth_alg = WLAN_AUTH_OPEN;
-	printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name,
-	       ifsta->auth_alg);
 	ifsta->auth_transaction = -1;
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 	ifsta->auth_tries = ifsta->assoc_tries = 0;
@@ -4474,8 +4364,10 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 	if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid))
 		return NULL;
 
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n",
 	       wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name);
+#endif
 
 	sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
 	if (!sta)
@@ -4502,7 +4394,7 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
-	printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n",
+	printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
 	       dev->name, reason);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
@@ -4520,7 +4412,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
-	printk(KERN_DEBUG "%s: disassociate(reason=%d)\n",
+	printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
 	       dev->name, reason);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 289112777e9..6a88e8f9bff 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -613,11 +613,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		rx->key->tx_rx_count++;
 		/* TODO: add threshold stuff again */
 	} else {
-#ifdef CONFIG_MAC80211_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: RX protected frame,"
-			       " but have no key\n", rx->dev->name);
-#endif /* CONFIG_MAC80211_DEBUG */
 		return RX_DROP_MONITOR;
 	}
 
@@ -789,7 +784,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 		sdata->fragment_next = 0;
 
 	if (!skb_queue_empty(&entry->skb_list)) {
-#ifdef CONFIG_MAC80211_DEBUG
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		struct ieee80211_hdr *hdr =
 			(struct ieee80211_hdr *) entry->skb_list.next->data;
 		DECLARE_MAC_BUF(mac);
@@ -801,7 +796,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 		       jiffies - entry->first_frag_time, entry->seq,
 		       entry->last_frag, print_mac(mac, hdr->addr1),
 		       print_mac(mac2, hdr->addr2));
-#endif /* CONFIG_MAC80211_DEBUG */
+#endif
 		__skb_queue_purge(&entry->skb_list);
 	}
 
@@ -922,18 +917,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 				break;
 		}
 		rpn = rx->key->u.ccmp.rx_pn[rx->queue];
-		if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) {
-			if (net_ratelimit())
-				printk(KERN_DEBUG "%s: defrag: CCMP PN not "
-				       "sequential A2=%s"
-				       " PN=%02x%02x%02x%02x%02x%02x "
-				       "(expected %02x%02x%02x%02x%02x%02x)\n",
-				       rx->dev->name, print_mac(mac, hdr->addr2),
-				       rpn[0], rpn[1], rpn[2], rpn[3], rpn[4],
-				       rpn[5], pn[0], pn[1], pn[2], pn[3],
-				       pn[4], pn[5]);
+		if (memcmp(pn, rpn, CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
-		}
 		memcpy(entry->last_pn, pn, CCMP_PN_LEN);
 	}
 
@@ -1037,7 +1022,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		 *	  have nothing buffered for it?
 		 */
 		printk(KERN_DEBUG "%s: STA %s sent PS Poll even "
-		       "though there is no buffered frames for it\n",
+		       "though there are no buffered frames for it\n",
 		       rx->dev->name, print_mac(mac, rx->sta->addr));
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 	}
@@ -1073,14 +1058,8 @@ static int
 ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
 {
 	if (unlikely(!rx->sta ||
-	    !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) {
-#ifdef CONFIG_MAC80211_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: dropped frame "
-			       "(unauthorized port)\n", rx->dev->name);
-#endif /* CONFIG_MAC80211_DEBUG */
+	    !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED)))
 		return -EACCES;
-	}
 
 	return 0;
 }
@@ -1160,16 +1139,8 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 		memcpy(src, hdr->addr2, ETH_ALEN);
 
 		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
-			     sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) {
-			if (net_ratelimit())
-				printk(KERN_DEBUG "%s: dropped ToDS frame "
-				       "(BSSID=%s SA=%s DA=%s)\n",
-				       dev->name,
-				       print_mac(mac, hdr->addr1),
-				       print_mac(mac2, hdr->addr2),
-				       print_mac(mac3, hdr->addr3));
+			     sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
 			return -1;
-		}
 		break;
 	case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
 		/* RA TA DA SA */
@@ -1177,17 +1148,8 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 		memcpy(src, hdr->addr4, ETH_ALEN);
 
 		 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS &&
-			     sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) {
-			 if (net_ratelimit())
-				 printk(KERN_DEBUG "%s: dropped FromDS&ToDS "
-				       "frame (RA=%s TA=%s DA=%s SA=%s)\n",
-				       rx->dev->name,
-				       print_mac(mac, hdr->addr1),
-				       print_mac(mac2, hdr->addr2),
-				       print_mac(mac3, hdr->addr3),
-				       print_mac(mac4, hdr->addr4));
+			     sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
 			return -1;
-		}
 		break;
 	case IEEE80211_FCTL_FROMDS:
 		/* DA BSSID SA */
@@ -1204,27 +1166,13 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 		memcpy(dst, hdr->addr1, ETH_ALEN);
 		memcpy(src, hdr->addr2, ETH_ALEN);
 
-		if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
-			if (net_ratelimit()) {
-				printk(KERN_DEBUG "%s: dropped IBSS frame "
-				       "(DA=%s SA=%s BSSID=%s)\n",
-				       dev->name,
-				       print_mac(mac, hdr->addr1),
-				       print_mac(mac2, hdr->addr2),
-				       print_mac(mac3, hdr->addr3));
-			}
+		if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 			return -1;
-		}
 		break;
 	}
 
-	if (unlikely(skb->len - hdrlen < 8)) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "%s: RX too short data frame "
-			       "payload\n", dev->name);
-		}
+	if (unlikely(skb->len - hdrlen < 8))
 		return -1;
-	}
 
 	payload = skb->data + hdrlen;
 	ethertype = (payload[6] << 8) | payload[7];
@@ -1416,10 +1364,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 
 		padding = ((4 - subframe_len) & 0x3);
 		/* the last MSDU has no padding */
-		if (subframe_len > remaining) {
-			printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name);
+		if (subframe_len > remaining)
 			return RX_DROP_UNUSABLE;
-		}
 
 		skb_pull(skb, sizeof(struct ethhdr));
 		/* if last subframe reuse skb */
@@ -1440,8 +1386,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
 							padding);
 			if (!eth) {
-				printk(KERN_DEBUG "%s: wrong buffer size\n",
-				       dev->name);
 				dev_kfree_skb(frame);
 				return RX_DROP_UNUSABLE;
 			}
@@ -1593,31 +1537,16 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 	else
 		keyidx = -1;
 
-	if (net_ratelimit())
-		printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC "
-		       "failure from %s to %s keyidx=%d\n",
-		       dev->name, print_mac(mac, hdr->addr2),
-		       print_mac(mac2, hdr->addr1), keyidx);
-
 	if (!rx->sta) {
 		/*
 		 * Some hardware seem to generate incorrect Michael MIC
 		 * reports; ignore them to avoid triggering countermeasures.
 		 */
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
-			       "error for unknown address %s\n",
-			       dev->name, print_mac(mac, hdr->addr2));
 		goto ignore;
 	}
 
-	if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) {
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
-			       "error for a frame with no PROTECTED flag (src "
-			       "%s)\n", dev->name, print_mac(mac, hdr->addr2));
+	if (!(rx->fc & IEEE80211_FCTL_PROTECTED))
 		goto ignore;
-	}
 
 	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
 		/*
@@ -1626,24 +1555,13 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 		 * group keys and only the AP is sending real multicast
 		 * frames in the BSS.
 		 */
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: ignored Michael MIC error for "
-			       "a frame with non-zero keyidx (%d)"
-			       " (src %s)\n", dev->name, keyidx,
-			       print_mac(mac, hdr->addr2));
 		goto ignore;
 	}
 
 	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
 	    ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
-	     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) {
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: ignored spurious Michael MIC "
-			       "error for a frame that cannot be encrypted "
-			       "(fc=0x%04x) (src %s)\n",
-			       dev->name, rx->fc, print_mac(mac, hdr->addr2));
+	     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))
 		goto ignore;
-	}
 
 	mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr);
  ignore:
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index b3c733162fc..d8a16b7f6a6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -554,8 +554,10 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 
 		sdata = sta->sdata;
 		local->total_ps_buffered--;
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "Buffered frame expired (STA "
 		       "%s)\n", print_mac(mac, sta->addr));
+#endif
 		dev_kfree_skb(skb);
 
 		if (skb_queue_empty(&sta->ps_tx_buf))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7b930d3c2fb..9bd9faac3c3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -327,8 +327,10 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 	rcu_read_unlock();
 
 	local->total_ps_buffered = total;
+#ifdef MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n",
 	       wiphy_name(local->hw.wiphy), purged);
+#endif
 }
 
 static ieee80211_tx_result
@@ -358,11 +360,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 			purge_old_ps_buffers(tx->local);
 		if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
 		    AP_MAX_BC_BUFFER) {
+#ifdef MAC80211_VERBOSE_PS_DEBUG
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: BC TX buffer full - "
 				       "dropping the oldest frame\n",
 				       tx->dev->name);
 			}
+#endif
 			dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf));
 		} else
 			tx->local->total_ps_buffered++;
@@ -403,11 +407,13 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 			purge_old_ps_buffers(tx->local);
 		if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) {
 			struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf);
+#ifdef MAC80211_VERBOSE_PS_DEBUG
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: STA %s TX "
 				       "buffer full - dropping oldest frame\n",
 				       tx->dev->name, print_mac(mac, sta->addr));
 			}
+#endif
 			dev_kfree_skb(old);
 		} else
 			tx->local->total_ps_buffered++;
@@ -713,7 +719,6 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 
  fail:
-	printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name);
 	if (frags) {
 		for (i = 0; i < num_fragm - 1; i++)
 			if (frags[i])
@@ -1404,8 +1409,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (unlikely(skb->len < ETH_HLEN)) {
-		printk(KERN_DEBUG "%s: short skb (len=%d)\n",
-		       dev->name, skb->len);
 		ret = 0;
 		goto fail;
 	}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 35b664d00e2..872d2fcd1a5 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -253,11 +253,8 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
 
 	if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
 				       skb->data + hdrlen + WEP_IV_LEN,
-				       len)) {
-		if (net_ratelimit())
-			printk(KERN_DEBUG "WEP decrypt failed (ICV)\n");
+				       len))
 		ret = -1;
-	}
 
 	kfree(rc4key);
 
@@ -301,14 +298,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
 		return RX_CONTINUE;
 
 	if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
-		if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) {
-#ifdef CONFIG_MAC80211_DEBUG
-			if (net_ratelimit())
-				printk(KERN_DEBUG "%s: RX WEP frame, decrypt "
-				       "failed\n", rx->dev->name);
-#endif /* CONFIG_MAC80211_DEBUG */
+		if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key))
 			return RX_DROP_UNUSABLE;
-		}
 	} else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) {
 		ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
 		/* remove ICV */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index cfa8fbb0736..f23b5a4d4ac 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -680,7 +680,6 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 	if (!qdisc || !qdisc->dequeue)
 		return;
 
-	printk(KERN_DEBUG "requeue: qlen = %d\n", qdisc->q.qlen);
 	for (len = qdisc->q.qlen; len > 0; len--) {
 		skb = qdisc->dequeue(qdisc);
 		root_qd->q.qlen--;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index f809761fbfb..b414d5d92f3 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -146,9 +146,6 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 		if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 			return RX_DROP_UNUSABLE;
 
-		printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from "
-		       "%s\n", rx->dev->name, print_mac(mac, sa));
-
 		mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx,
 						(void *) skb->data);
 		return RX_DROP_UNUSABLE;
@@ -282,15 +279,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 					  hdr->addr1, hwaccel, rx->queue,
 					  &rx->tkip_iv32,
 					  &rx->tkip_iv16);
-	if (res != TKIP_DECRYPT_OK || wpa_test) {
-#ifdef CONFIG_MAC80211_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "%s: TKIP decrypt failed for RX "
-			       "frame from %s (res=%d)\n", rx->dev->name,
-			       print_mac(mac, rx->sta->addr), res);
-#endif /* CONFIG_MAC80211_DEBUG */
+	if (res != TKIP_DECRYPT_OK || wpa_test)
 		return RX_DROP_UNUSABLE;
-	}
 
 	/* Trim ICV */
 	skb_trim(skb, skb->len - TKIP_ICV_LEN);
@@ -512,16 +502,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	(void) ccmp_hdr2pn(pn, skb->data + hdrlen);
 
 	if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) {
-#ifdef CONFIG_MAC80211_DEBUG
-		u8 *ppn = key->u.ccmp.rx_pn[rx->queue];
-
-		printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from "
-		       "%s (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN "
-		       "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name,
-		       print_mac(mac, rx->sta->addr),
-		       pn[0], pn[1], pn[2], pn[3], pn[4], pn[5],
-		       ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]);
-#endif /* CONFIG_MAC80211_DEBUG */
 		key->u.ccmp.replays++;
 		return RX_DROP_UNUSABLE;
 	}
@@ -541,12 +521,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 			    skb->data + hdrlen + CCMP_HDR_LEN, data_len,
 			    skb->data + skb->len - CCMP_MIC_LEN,
 			    skb->data + hdrlen + CCMP_HDR_LEN)) {
-#ifdef CONFIG_MAC80211_DEBUG
-			if (net_ratelimit())
-				printk(KERN_DEBUG "%s: CCMP decrypt failed "
-				       "for RX frame from %s\n", rx->dev->name,
-				       print_mac(mac, rx->sta->addr));
-#endif /* CONFIG_MAC80211_DEBUG */
 			return RX_DROP_UNUSABLE;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From ab96dddbedf4bb8a7a0fe44012efc1d99598c36f Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:54:04 -0500
Subject: svcrdma: Add a type for keeping NFS RPC mapping

Create a new data structure to hold the remote client address space
to local server address space mapping.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 27 +++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma.c           | 19 +++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 26 ++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 05eb4664d0d..bd8749cc808 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
+/*
+ * NFS_ requests are mapped on the client side by the chunk lists in
+ * the RPCRDMA header. During the fetching of the RPC from the client
+ * and the writing of the reply to the client, the memory in the
+ * client and the memory in the server must be mapped as contiguous
+ * vaddr/len for access by the hardware. These data strucures keep
+ * these mappings.
+ *
+ * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
+ * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
+ * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
+ * mapping of the reply.
+ */
+struct svc_rdma_chunk_sge {
+	int start;		/* sge no for this chunk */
+	int count;		/* sge count for this chunk */
+};
+struct svc_rdma_req_map {
+	unsigned long count;
+	union {
+		struct kvec sge[RPCSVC_MAXPAGES];
+		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+	};
+};
+
 #define RDMACTXT_F_LAST_CTXT	2
 
 struct svcxprt_rdma {
@@ -173,6 +198,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
+extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern struct svc_xprt_class svc_rdma_class;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 88c0ca20bb1..171f2053e90 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,9 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
+/* Temporary NFS request map cache */
+struct kmem_cache *svc_rdma_map_cachep;
+
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -241,6 +244,7 @@ void svc_rdma_cleanup(void)
 		svcrdma_table_header = NULL;
 	}
 	svc_unreg_xprt_class(&svc_rdma_class);
+	kmem_cache_destroy(svc_rdma_map_cachep);
 }
 
 int svc_rdma_init(void)
@@ -255,9 +259,24 @@ int svc_rdma_init(void)
 		svcrdma_table_header =
 			register_sysctl_table(svcrdma_root_table);
 
+	/* Create the temporary map cache */
+	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
+						sizeof(struct svc_rdma_req_map),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL);
+	if (!svc_rdma_map_cachep) {
+		printk(KERN_INFO "Could not allocate map cache.\n");
+		goto err;
+	}
+
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
 	return 0;
+
+ err:
+	unregister_sysctl_table(svcrdma_table_header);
+	return -ENOMEM;
 }
 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
 MODULE_DESCRIPTION("SVC RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e132509d1db..ae90758d8e9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -173,6 +173,32 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 	atomic_dec(&xprt->sc_ctxt_used);
 }
 
+/* Temporary NFS request map cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_map_cachep;
+
+/*
+ * Temporary NFS req mappings are shared across all transport
+ * instances. These are short lived and should be bounded by the number
+ * of concurrent server threads * depth of the SQ.
+ */
+struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+{
+	struct svc_rdma_req_map *map;
+	while (1) {
+		map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
+		if (map)
+			break;
+		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+	}
+	map->count = 0;
+	return map;
+}
+
+void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+{
+	kmem_cache_free(svc_rdma_map_cachep, map);
+}
+
 /* ib_cq event handler */
 static void cq_event_handler(struct ib_event *event, void *context)
 {
-- 
cgit v1.2.3-70-g09d2


From 34d16e42a6ab74a4a4389c061dfa3c6609e08fa0 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 2 Jul 2008 14:56:13 -0500
Subject: svcrdma: Use RPC reply map for RDMA_WRITE processing

Use the new svc_rdma_req_map data type for mapping the client side memory
to the server side memory. Move the DMA mapping to the context pointed to
by each WR individually so that it is unmapped after the WR completes.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 163 ++++++++++++++-----------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   5 +-
 2 files changed, 80 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index fb82b1b683f..bdc11a30e93 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -63,52 +63,44 @@
  * SGE[2..sge_count-2] data from xdr->pages[]
  * SGE[sge_count-1]    data from xdr->tail.
  *
+ * The max SGE we need is the length of the XDR / pagesize + one for
+ * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
+ * reserves a page for both the request and the reply header, and this
+ * array is only concerned with the reply we are assured that we have
+ * on extra page for the RPCRMDA header.
  */
-static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
-				 struct xdr_buf *xdr,
-				 struct ib_sge *sge,
-				 int *sge_count)
+static void xdr_to_sge(struct svcxprt_rdma *xprt,
+		       struct xdr_buf *xdr,
+		       struct svc_rdma_req_map *vec)
 {
-	/* Max we need is the length of the XDR / pagesize + one for
-	 * head + one for tail + one for RPCRDMA header
-	 */
 	int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
 	int sge_no;
-	u32 byte_count = xdr->len;
 	u32 sge_bytes;
 	u32 page_bytes;
-	int page_off;
+	u32 page_off;
 	int page_no;
 
+	BUG_ON(xdr->len !=
+	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
 	/* Head SGE */
-	sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device,
-					     xdr->head[0].iov_base,
-					     xdr->head[0].iov_len,
-					     DMA_TO_DEVICE);
-	sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
-	byte_count -= sge_bytes;
-	sge[sge_no].length = sge_bytes;
-	sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
 	sge_no++;
 
 	/* pages SGE */
 	page_no = 0;
 	page_bytes = xdr->page_len;
 	page_off = xdr->page_base;
-	while (byte_count && page_bytes) {
-		sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off));
-		sge[sge_no].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					xdr->pages[page_no], page_off,
-					sge_bytes, DMA_TO_DEVICE);
-		sge_bytes = min(sge_bytes, page_bytes);
-		byte_count -= sge_bytes;
+	while (page_bytes) {
+		vec->sge[sge_no].iov_base =
+			page_address(xdr->pages[page_no]) + page_off;
+		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
 		page_bytes -= sge_bytes;
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		vec->sge[sge_no].iov_len = sge_bytes;
 
 		sge_no++;
 		page_no++;
@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
 	}
 
 	/* Tail SGE */
-	if (byte_count && xdr->tail[0].iov_len) {
-		sge[sge_no].addr =
-			ib_dma_map_single(xprt->sc_cm_id->device,
-					  xdr->tail[0].iov_base,
-					  xdr->tail[0].iov_len,
-					  DMA_TO_DEVICE);
-		sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
-		byte_count -= sge_bytes;
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+	if (xdr->tail[0].iov_len) {
+		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
 		sge_no++;
 	}
 
 	BUG_ON(sge_no > sge_max);
-	BUG_ON(byte_count != 0);
-
-	*sge_count = sge_no;
-	return sge;
+	vec->count = sge_no;
 }
 
-
 /* Assumptions:
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		      u32 rmr, u64 to,
 		      u32 xdr_off, int write_len,
-		      struct ib_sge *xdr_sge, int sge_count)
+		      struct svc_rdma_req_map *vec)
 {
-	struct svc_rdma_op_ctxt *tmp_sge_ctxt;
 	struct ib_send_wr write_wr;
 	struct ib_sge *sge;
 	int xdr_sge_no;
@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	int sge_off;
 	int bc;
 	struct svc_rdma_op_ctxt *ctxt;
-	int ret = 0;
 
-	BUG_ON(sge_count > RPCSVC_MAXPAGES);
+	BUG_ON(vec->count > RPCSVC_MAXPAGES);
 	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
-		"write_len=%d, xdr_sge=%p, sge_count=%d\n",
+		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
 		rmr, (unsigned long long)to, xdr_off,
-		write_len, xdr_sge, sge_count);
+		write_len, vec->sge, vec->count);
 
 	ctxt = svc_rdma_get_context(xprt);
-	ctxt->count = 0;
-	tmp_sge_ctxt = svc_rdma_get_context(xprt);
-	sge = tmp_sge_ctxt->sge;
+	ctxt->direction = DMA_TO_DEVICE;
+	sge = ctxt->sge;
 
 	/* Find the SGE associated with xdr_off */
-	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count;
+	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
 	     xdr_sge_no++) {
-		if (xdr_sge[xdr_sge_no].length > bc)
+		if (vec->sge[xdr_sge_no].iov_len > bc)
 			break;
-		bc -= xdr_sge[xdr_sge_no].length;
+		bc -= vec->sge[xdr_sge_no].iov_len;
 	}
 
 	sge_off = bc;
@@ -180,21 +158,27 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	sge_no = 0;
 
 	/* Copy the remaining SGE */
-	while (bc != 0 && xdr_sge_no < sge_count) {
-		sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off;
-		sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
+	while (bc != 0 && xdr_sge_no < vec->count) {
+		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
 		sge_bytes = min((size_t)bc,
-				(size_t)(xdr_sge[xdr_sge_no].length-sge_off));
+				(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
 		sge[sge_no].length = sge_bytes;
-
+		sge[sge_no].addr =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  (void *)
+					  vec->sge[xdr_sge_no].iov_base + sge_off,
+					  sge_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(sge[sge_no].addr))
+			goto err;
 		sge_off = 0;
 		sge_no++;
+		ctxt->count++;
 		xdr_sge_no++;
 		bc -= sge_bytes;
 	}
 
 	BUG_ON(bc != 0);
-	BUG_ON(xdr_sge_no > sge_count);
+	BUG_ON(xdr_sge_no > vec->count);
 
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
@@ -209,21 +193,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 
 	/* Post It */
 	atomic_inc(&rdma_stat_write);
-	if (svc_rdma_send(xprt, &write_wr)) {
-		svc_rdma_put_context(ctxt, 1);
-		/* Fatal error, close transport */
-		ret = -EIO;
-	}
-	svc_rdma_put_context(tmp_sge_ctxt, 0);
-	return ret;
+	if (svc_rdma_send(xprt, &write_wr))
+		goto err;
+	return 0;
+ err:
+	svc_rdma_put_context(ctxt, 0);
+	/* Fatal error, close transport */
+	return -EIO;
 }
 
 static int send_write_chunks(struct svcxprt_rdma *xprt,
 			     struct rpcrdma_msg *rdma_argp,
 			     struct rpcrdma_msg *rdma_resp,
 			     struct svc_rqst *rqstp,
-			     struct ib_sge *sge,
-			     int sge_count)
+			     struct svc_rdma_req_map *vec)
 {
 	u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
 	int write_len;
@@ -269,8 +252,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
 					 rs_offset + chunk_off,
 					 xdr_off,
 					 this_write,
-					 sge,
-					 sge_count);
+					 vec);
 			if (ret) {
 				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
 					ret);
@@ -292,8 +274,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 			     struct rpcrdma_msg *rdma_argp,
 			     struct rpcrdma_msg *rdma_resp,
 			     struct svc_rqst *rqstp,
-			     struct ib_sge *sge,
-			     int sge_count)
+			     struct svc_rdma_req_map *vec)
 {
 	u32 xfer_len = rqstp->rq_res.len;
 	int write_len;
@@ -341,8 +322,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 					 rs_offset + chunk_off,
 					 xdr_off,
 					 this_write,
-					 sge,
-					 sge_count);
+					 vec);
 			if (ret) {
 				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
 					ret);
@@ -380,7 +360,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      struct page *page,
 		      struct rpcrdma_msg *rdma_resp,
 		      struct svc_rdma_op_ctxt *ctxt,
-		      int sge_count,
+		      struct svc_rdma_req_map *vec,
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
@@ -413,10 +393,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
 
 	/* Determine how many of our SGE are to be transmitted */
-	for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) {
-		sge_bytes = min((size_t)ctxt->sge[sge_no].length,
-				(size_t)byte_count);
+	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
+		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
+		ctxt->sge[sge_no].addr =
+			ib_dma_map_single(rdma->sc_cm_id->device,
+					  vec->sge[sge_no].iov_base,
+					  sge_bytes, DMA_TO_DEVICE);
+		ctxt->sge[sge_no].length = sge_bytes;
+		ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
 	}
 	BUG_ON(byte_count != 0);
 
@@ -428,8 +413,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;
+		/* If there are more pages than SGE, terminate SGE list */
+		if (page_no+1 >= sge_no)
+			ctxt->sge[page_no+1].length = 0;
 	}
-
 	BUG_ON(sge_no > rdma->sc_max_sge);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
@@ -473,20 +460,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	enum rpcrdma_proc reply_type;
 	int ret;
 	int inline_bytes;
-	struct ib_sge *sge;
-	int sge_count = 0;
 	struct page *res_page;
 	struct svc_rdma_op_ctxt *ctxt;
+	struct svc_rdma_req_map *vec;
 
 	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
 	/* Get the RDMA request header. */
 	rdma_argp = xdr_start(&rqstp->rq_arg);
 
-	/* Build an SGE for the XDR */
+	/* Build an req vec for the XDR */
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->direction = DMA_TO_DEVICE;
-	sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count);
+	vec = svc_rdma_get_req_map();
+	xdr_to_sge(rdma, &rqstp->rq_res, vec);
 
 	inline_bytes = rqstp->rq_res.len;
 
@@ -503,7 +490,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
 	/* Send any write-chunk data and build resp write-list */
 	ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
-				rqstp, sge, sge_count);
+				rqstp, vec);
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
 		       ret);
@@ -513,7 +500,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
 	/* Send any reply-list data and update resp reply-list */
 	ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
-				rqstp, sge, sge_count);
+				rqstp, vec);
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
 		       ret);
@@ -521,11 +508,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	}
 	inline_bytes -= ret;
 
-	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count,
+	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
 			 inline_bytes);
+	svc_rdma_put_req_map(vec);
 	dprintk("svcrdma: send_reply returns %d\n", ret);
 	return ret;
  error:
+	svc_rdma_put_req_map(vec);
 	svc_rdma_put_context(ctxt, 0);
 	put_page(res_page);
 	return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ae90758d8e9..fc86338bcbb 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -387,10 +387,13 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 
 		switch (ctxt->wr_op) {
 		case IB_WR_SEND:
-		case IB_WR_RDMA_WRITE:
 			svc_rdma_put_context(ctxt, 1);
 			break;
 
+		case IB_WR_RDMA_WRITE:
+			svc_rdma_put_context(ctxt, 0);
+			break;
+
 		case IB_WR_RDMA_READ:
 			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 				struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-- 
cgit v1.2.3-70-g09d2


From f820c57ebf5493d4602cc00577c8b0fadd27a7b8 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Tue, 27 May 2008 17:03:14 -0500
Subject: svcrdma: Use reply and chunk map for RDMA_READ processing

Modify the RDMA_READ processing to use the reply and chunk list mapping data
types. Also add a special purpose 'hdr_count' field in in the context to hold
the header page count instead of overloading the SGE length field and
corrupting the DMA map length.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h         |  1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 83 +++++++++++++++------------------
 2 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index bd8749cc808..fd5e8a1c17d 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
+	int hdr_count;
 	struct list_head free_list;
 	struct xdr_buf arg;
 	struct list_head dto_q;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 06ab4841537..d25971b42a7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-struct chunk_sge {
-	int start;		/* sge no for this chunk */
-	int count;		/* sge count for this chunk */
-};
-
 /* Encode a read-chunk-list as an array of IB SGE
  *
  * Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 			   struct svc_rqst *rqstp,
 			   struct svc_rdma_op_ctxt *head,
 			   struct rpcrdma_msg *rmsgp,
-			   struct ib_sge *sge,
-			   struct chunk_sge *ch_sge_ary,
+			   struct svc_rdma_req_map *rpl_map,
+			   struct svc_rdma_req_map *chl_map,
 			   int ch_count,
 			   int byte_count)
 {
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	head->arg.head[0] = rqstp->rq_arg.head[0];
 	head->arg.tail[0] = rqstp->rq_arg.tail[0];
 	head->arg.pages = &head->pages[head->count];
-	head->sge[0].length = head->count; /* save count of hdr pages */
+	head->hdr_count = head->count; /* save count of hdr pages */
 	head->arg.page_base = 0;
 	head->arg.page_len = ch_bytes;
 	head->arg.len = rqstp->rq_arg.len + ch_bytes;
 	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
 	head->count++;
-	ch_sge_ary[0].start = 0;
+	chl_map->ch[0].start = 0;
 	while (byte_count) {
+		rpl_map->sge[sge_no].iov_base =
+			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
 		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		sge[sge_no].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no],
-					page_off, sge_bytes,
-					DMA_FROM_DEVICE);
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		rpl_map->sge[sge_no].iov_len = sge_bytes;
 		/*
 		 * Don't bump head->count here because the same page
 		 * may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 		 * SGE, move to the next SGE
 		 */
 		if (ch_bytes == 0) {
-			ch_sge_ary[ch_no].count =
-				sge_no - ch_sge_ary[ch_no].start;
+			chl_map->ch[ch_no].count =
+				sge_no - chl_map->ch[ch_no].start;
 			ch_no++;
 			ch++;
-			ch_sge_ary[ch_no].start = sge_no;
+			chl_map->ch[ch_no].start = sge_no;
 			ch_bytes = ch->rc_target.rs_length;
 			/* If bytes remaining account for next chunk */
 			if (byte_count) {
@@ -220,18 +211,24 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	return sge_no;
 }
 
-static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
-			      struct ib_sge *sge,
+static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+			      struct svc_rdma_op_ctxt *ctxt,
+			      struct kvec *vec,
 			      u64 *sgl_offset,
 			      int count)
 {
 	int i;
 
 	ctxt->count = count;
+	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
-		ctxt->sge[i].addr = sge[i].addr;
-		ctxt->sge[i].length = sge[i].length;
-		*sgl_offset = *sgl_offset + sge[i].length;
+		ctxt->sge[i].addr =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  vec[i].iov_base, vec[i].iov_len,
+					  DMA_FROM_DEVICE);
+		ctxt->sge[i].length = vec[i].iov_len;
+		ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
+		*sgl_offset = *sgl_offset + vec[i].iov_len;
 	}
 }
 
@@ -282,34 +279,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	struct ib_send_wr read_wr;
 	int err = 0;
 	int ch_no;
-	struct ib_sge *sge;
 	int ch_count;
 	int byte_count;
 	int sge_count;
 	u64 sgl_offset;
 	struct rpcrdma_read_chunk *ch;
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_op_ctxt *tmp_sge_ctxt;
-	struct svc_rdma_op_ctxt *tmp_ch_ctxt;
-	struct chunk_sge *ch_sge_ary;
+	struct svc_rdma_req_map *rpl_map;
+	struct svc_rdma_req_map *chl_map;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
 	if (!ch)
 		return 0;
 
-	/* Allocate temporary contexts to keep SGE */
-	BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
-	tmp_sge_ctxt = svc_rdma_get_context(xprt);
-	sge = tmp_sge_ctxt->sge;
-	tmp_ch_ctxt = svc_rdma_get_context(xprt);
-	ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
+	/* Allocate temporary reply and chunk maps */
+	rpl_map = svc_rdma_get_req_map();
+	chl_map = svc_rdma_get_req_map();
 
 	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
-				    sge, ch_sge_ary,
+				    rpl_map, chl_map,
 				    ch_count, byte_count);
 	sgl_offset = 0;
 	ch_no = 0;
@@ -331,14 +323,15 @@ next_sge:
 		read_wr.wr.rdma.remote_addr =
 			get_unaligned(&(ch->rc_target.rs_offset)) +
 			sgl_offset;
-		read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
+		read_wr.sg_list = ctxt->sge;
 		read_wr.num_sge =
-			rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
-		rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
+			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
+		rdma_set_ctxt_sge(xprt, ctxt,
+				  &rpl_map->sge[chl_map->ch[ch_no].start],
 				  &sgl_offset,
 				  read_wr.num_sge);
 		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == ch_sge_ary[ch_no].count)) {
+		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
 			/*
 			 * Mark the last RDMA_READ with a bit to
 			 * indicate all RPC data has been fetched from
@@ -358,9 +351,9 @@ next_sge:
 		}
 		atomic_inc(&rdma_stat_read);
 
-		if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
-			ch_sge_ary[ch_no].count -= read_wr.num_sge;
-			ch_sge_ary[ch_no].start += read_wr.num_sge;
+		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
+			chl_map->ch[ch_no].count -= read_wr.num_sge;
+			chl_map->ch[ch_no].start += read_wr.num_sge;
 			goto next_sge;
 		}
 		sgl_offset = 0;
@@ -368,8 +361,8 @@ next_sge:
 	}
 
  out:
-	svc_rdma_put_context(tmp_sge_ctxt, 0);
-	svc_rdma_put_context(tmp_ch_ctxt, 0);
+	svc_rdma_put_req_map(rpl_map);
+	svc_rdma_put_req_map(chl_map);
 
 	/* Detach arg pages. svc_recv will replenish them */
 	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -399,7 +392,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 	/* Point rq_arg.pages past header */
-	rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
+	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
 	rqstp->rq_arg.page_base = head->arg.page_base;
 
-- 
cgit v1.2.3-70-g09d2


From e6ab9143719ff76f0b95f0866c4d0f6c743ad2e0 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 12:08:48 -0500
Subject: svcrdma: Move the DMA unmap logic to the CQ handler

Separate DMA unmap from context destruction and perform DMA unmapping
in the SQ/RQ CQ reap functions. This is necessary to support software
based RDMA implementations that actually copy the data in their
ib_dma_unmap callback functions and architectures that don't have
cache coherent I/O busses.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fc86338bcbb..7e8ee66458e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -150,6 +150,18 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	return ctxt;
 }
 
+static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
+{
+	struct svcxprt_rdma *xprt = ctxt->xprt;
+	int i;
+	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+		ib_dma_unmap_single(xprt->sc_cm_id->device,
+				    ctxt->sge[i].addr,
+				    ctxt->sge[i].length,
+				    ctxt->direction);
+	}
+}
+
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 {
 	struct svcxprt_rdma *xprt;
@@ -161,12 +173,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 		for (i = 0; i < ctxt->count; i++)
 			put_page(ctxt->pages[i]);
 
-	for (i = 0; i < ctxt->count; i++)
-		ib_dma_unmap_single(xprt->sc_cm_id->device,
-				    ctxt->sge[i].addr,
-				    ctxt->sge[i].length,
-				    ctxt->direction);
-
 	spin_lock_bh(&xprt->sc_ctxt_lock);
 	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
 	spin_unlock_bh(&xprt->sc_ctxt_lock);
@@ -328,6 +334,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
 		ctxt->wc_status = wc.status;
 		ctxt->byte_len = wc.byte_len;
+		svc_rdma_unmap_dma(ctxt);
 		if (wc.status != IB_WC_SUCCESS) {
 			/* Close the transport */
 			dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
@@ -377,6 +384,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
 		xprt = ctxt->xprt;
 
+		svc_rdma_unmap_dma(ctxt);
 		if (wc.status != IB_WC_SUCCESS)
 			/* Close the transport */
 			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-- 
cgit v1.2.3-70-g09d2


From 87295b6c5c7fd7bbc0ce3e7f42d2adbbac7352b9 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:17:44 -0500
Subject: svcrdma: Add dma map count and WARN_ON

Add a dma map count in order to verify that all DMA mapping resources
have been freed when the transport is closed.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 1 +
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 3 +++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 +++++
 4 files changed, 10 insertions(+)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd5e8a1c17d..ab93afc03c4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -130,6 +130,7 @@ struct svcxprt_rdma {
 
 	struct ib_pd         *sc_pd;
 
+	atomic_t	     sc_dma_used;
 	atomic_t	     sc_ctxt_used;
 	struct list_head     sc_ctxt_free;
 	int		     sc_ctxt_cnt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d25971b42a7..b4b17f44cb2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -222,6 +222,7 @@ static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
 	ctxt->count = count;
 	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
+		atomic_inc(&xprt->sc_dma_used);
 		ctxt->sge[i].addr =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  vec[i].iov_base, vec[i].iov_len,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bdc11a30e93..a19b22b452a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -163,6 +163,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		sge_bytes = min((size_t)bc,
 				(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
 		sge[sge_no].length = sge_bytes;
+		atomic_inc(&xprt->sc_dma_used);
 		sge[sge_no].addr =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  (void *)
@@ -385,6 +386,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	ctxt->count = 1;
 
 	/* Prepare the SGE for the RPCRDMA Header */
+	atomic_inc(&rdma->sc_dma_used);
 	ctxt->sge[0].addr =
 		ib_dma_map_page(rdma->sc_cm_id->device,
 				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
@@ -396,6 +398,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
+		atomic_inc(&rdma->sc_dma_used);
 		ctxt->sge[sge_no].addr =
 			ib_dma_map_single(rdma->sc_cm_id->device,
 					  vec->sge[sge_no].iov_base,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 7e8ee66458e..6fddd588c03 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -155,6 +155,7 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	int i;
 	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+		atomic_dec(&xprt->sc_dma_used);
 		ib_dma_unmap_single(xprt->sc_cm_id->device,
 				    ctxt->sge[i].addr,
 				    ctxt->sge[i].length,
@@ -519,6 +520,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	cma_xprt->sc_max_requests = svcrdma_max_requests;
 	cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
 	atomic_set(&cma_xprt->sc_sq_count, 0);
+	atomic_set(&cma_xprt->sc_ctxt_used, 0);
 
 	if (!listener) {
 		int reqs = cma_xprt->sc_max_requests;
@@ -569,6 +571,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
+		atomic_inc(&xprt->sc_dma_used);
 		pa = ib_dma_map_page(xprt->sc_cm_id->device,
 				     page, 0, PAGE_SIZE,
 				     DMA_FROM_DEVICE);
@@ -1049,6 +1052,7 @@ static void __svc_rdma_free(struct work_struct *work)
 
 	/* Warn if we leaked a resource or under-referenced */
 	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
+	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
 
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1169,6 +1173,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
+	atomic_inc(&xprt->sc_dma_used);
 	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
 				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 	sge.lkey = xprt->sc_phys_mr->lkey;
-- 
cgit v1.2.3-70-g09d2


From 94dba4918d4570bfa98776e54a5fa527c848dc78 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:20:24 -0500
Subject: svcrdma: Remove unneeded spin locks from __svc_rdma_free

At the time __svc_rdma_free is called, we are guaranteed that all references
to this transport are gone. There is, therefore, no need to protect the
resource lists with a spin lock.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6fddd588c03..7647789a1f6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1027,7 +1027,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	 * cm_id because the device ptr is needed to unmap the dma in
 	 * svc_rdma_put_context.
 	 */
-	spin_lock_bh(&rdma->sc_read_complete_lock);
 	while (!list_empty(&rdma->sc_read_complete_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_entry(rdma->sc_read_complete_q.next,
@@ -1036,10 +1035,8 @@ static void __svc_rdma_free(struct work_struct *work)
 		list_del_init(&ctxt->dto_q);
 		svc_rdma_put_context(ctxt, 1);
 	}
-	spin_unlock_bh(&rdma->sc_read_complete_lock);
 
 	/* Destroy queued, but not processed recv completions */
-	spin_lock_bh(&rdma->sc_rq_dto_lock);
 	while (!list_empty(&rdma->sc_rq_dto_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_entry(rdma->sc_rq_dto_q.next,
@@ -1048,7 +1045,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		list_del_init(&ctxt->dto_q);
 		svc_rdma_put_context(ctxt, 1);
 	}
-	spin_unlock_bh(&rdma->sc_rq_dto_lock);
 
 	/* Warn if we leaked a resource or under-referenced */
 	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
-- 
cgit v1.2.3-70-g09d2


From 36ef25e464dbc5820c22a9b2f619b787eda594df Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Mon, 19 May 2008 19:00:24 -0500
Subject: svcrdma: Limit ORD based on client's advertised IRD

When adapters have differing IRD limits, the RDMA transport will fail to
connect properly. The RDMA transport should use the client's advertised
inbound read limit when computing its outbound read limit. For iWARP
transports, there is currently no standard for exchanging IRD/ORD
during connection establishment so the 'responder_resources' field in the
connect event is the local device's limit. The RDMA transport can be
configured to use a smaller ORD by writing the desired number to the
/proc/sys/sunrpc/svc_rdma/max_outbound_read_requests file.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 7647789a1f6..80104f4999d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -606,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
  * will call the recvfrom method on the listen xprt which will accept the new
  * connection.
  */
-static void handle_connect_req(struct rdma_cm_id *new_cma_id)
+static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
 {
 	struct svcxprt_rdma *listen_xprt = new_cma_id->context;
 	struct svcxprt_rdma *newxprt;
@@ -623,6 +623,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
 	dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
 		newxprt, newxprt->sc_cm_id, listen_xprt);
 
+	/* Save client advertised inbound read limit for use later in accept. */
+	newxprt->sc_ord = client_ird;
+
 	/* Set the local and remote addresses in the transport */
 	sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
 	svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
@@ -659,7 +662,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
 			"event=%d\n", cma_id, cma_id->context, event->event);
-		handle_connect_req(cma_id);
+		handle_connect_req(cma_id,
+				   event->param.conn.responder_resources);
 		break;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
@@ -833,8 +837,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 				   (size_t)svcrdma_max_requests);
 	newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
 
-	newxprt->sc_ord =  min((size_t)devattr.max_qp_rd_atom,
-			       (size_t)svcrdma_ord);
+	/*
+	 * Limit ORD based on client limit, local device limit, and
+	 * configured svcrdma limit.
+	 */
+	newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+	newxprt->sc_ord = min_t(size_t,	svcrdma_ord, newxprt->sc_ord);
 
 	newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
 	if (IS_ERR(newxprt->sc_pd)) {
-- 
cgit v1.2.3-70-g09d2


From 902a94e0889be1f9fcefc0e1b602b06136e01812 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:57:05 -0500
Subject: svcrdma: Add flush_scheduled_work to module exit function

Make certain all transports pending free are flushed from the wq
before unloading the module.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 171f2053e90..527acfd3828 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -239,6 +239,7 @@ static ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
 	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
+	flush_scheduled_work();
 	if (svcrdma_table_header) {
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
-- 
cgit v1.2.3-70-g09d2


From bf5927d84e70d522f234ca247b27d27c63878b93 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 14:05:54 -0500
Subject: svcrdma: Create a kmem cache for the WR contexts

Create a kmem cache to hold WR contexts. Next we will convert
the WR context get and put services to use this kmem cache.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 527acfd3828..87101177825 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,8 +69,9 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-/* Temporary NFS request map cache */
+/* Temporary NFS request map and context caches */
 struct kmem_cache *svc_rdma_map_cachep;
+struct kmem_cache *svc_rdma_ctxt_cachep;
 
 /*
  * This function implements reading and resetting an atomic_t stat
@@ -246,6 +247,7 @@ void svc_rdma_cleanup(void)
 	}
 	svc_unreg_xprt_class(&svc_rdma_class);
 	kmem_cache_destroy(svc_rdma_map_cachep);
+	kmem_cache_destroy(svc_rdma_ctxt_cachep);
 }
 
 int svc_rdma_init(void)
@@ -268,14 +270,27 @@ int svc_rdma_init(void)
 						NULL);
 	if (!svc_rdma_map_cachep) {
 		printk(KERN_INFO "Could not allocate map cache.\n");
-		goto err;
+		goto err0;
+	}
+
+	/* Create the temporary context cache */
+	svc_rdma_ctxt_cachep =
+		kmem_cache_create("svc_rdma_ctxt_cache",
+				  sizeof(struct svc_rdma_op_ctxt),
+				  0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
+	if (!svc_rdma_ctxt_cachep) {
+		printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
+		goto err1;
 	}
 
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
 	return 0;
-
- err:
+ err1:
+	kmem_cache_destroy(svc_rdma_map_cachep);
+ err0:
 	unregister_sysctl_table(svcrdma_table_header);
 	return -ENOMEM;
 }
-- 
cgit v1.2.3-70-g09d2


From 8948896c9e098c6fd31a6a698a598a7cbd7fa40e Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 15:14:02 -0500
Subject: svcrdma: Change WR context get/put to use the kmem cache

Change the WR context pool to be shared across mount points. This
reduces the RDMA transport memory footprint significantly since
idle mounts don't consume WR context memory.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   6 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 121 +++----------------------------
 2 files changed, 12 insertions(+), 115 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d8d74c4ab50..ef2e3a20bf3 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -73,7 +73,6 @@ extern atomic_t rdma_stat_sq_prod;
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
 	int hdr_count;
-	struct list_head free_list;
 	struct xdr_buf arg;
 	struct list_head dto_q;
 	enum ib_wr_opcode wr_op;
@@ -131,11 +130,6 @@ struct svcxprt_rdma {
 
 	atomic_t	     sc_dma_used;
 	atomic_t	     sc_ctxt_used;
-	struct list_head     sc_ctxt_free;
-	int		     sc_ctxt_cnt;
-	int		     sc_ctxt_bump;
-	int		     sc_ctxt_max;
-	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_rq_dto_q;
 	spinlock_t	     sc_rq_dto_lock;
 	struct ib_qp         *sc_qp;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 80104f4999d..19ddc382b77 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,69 +84,23 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
 
-static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
-{
-	int target;
-	int at_least_one = 0;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
-		     xprt->sc_ctxt_max);
-
-	spin_lock_bh(&xprt->sc_ctxt_lock);
-	while (xprt->sc_ctxt_cnt < target) {
-		xprt->sc_ctxt_cnt++;
-		spin_unlock_bh(&xprt->sc_ctxt_lock);
-
-		ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-
-		spin_lock_bh(&xprt->sc_ctxt_lock);
-		if (ctxt) {
-			at_least_one = 1;
-			INIT_LIST_HEAD(&ctxt->free_list);
-			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-		} else {
-			/* kmalloc failed...give up for now */
-			xprt->sc_ctxt_cnt--;
-			break;
-		}
-	}
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
-	dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
-		xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
-	return at_least_one;
-}
+/* WR context cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_ctxt_cachep;
 
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt;
 
 	while (1) {
-		spin_lock_bh(&xprt->sc_ctxt_lock);
-		if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
-			/* Try to bump my cache. */
-			spin_unlock_bh(&xprt->sc_ctxt_lock);
-
-			if (rdma_bump_context_cache(xprt))
-				continue;
-
-			printk(KERN_INFO "svcrdma: sleeping waiting for "
-			       "context memory on xprt=%p\n",
-			       xprt);
-			schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-			continue;
-		}
-		ctxt = list_entry(xprt->sc_ctxt_free.next,
-				  struct svc_rdma_op_ctxt,
-				  free_list);
-		list_del_init(&ctxt->free_list);
-		spin_unlock_bh(&xprt->sc_ctxt_lock);
-		ctxt->xprt = xprt;
-		INIT_LIST_HEAD(&ctxt->dto_q);
-		ctxt->count = 0;
-		atomic_inc(&xprt->sc_ctxt_used);
-		break;
+		ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
+		if (ctxt)
+			break;
+		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
+	ctxt->xprt = xprt;
+	INIT_LIST_HEAD(&ctxt->dto_q);
+	ctxt->count = 0;
+	atomic_inc(&xprt->sc_ctxt_used);
 	return ctxt;
 }
 
@@ -174,9 +128,7 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 		for (i = 0; i < ctxt->count; i++)
 			put_page(ctxt->pages[i]);
 
-	spin_lock_bh(&xprt->sc_ctxt_lock);
-	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
 	atomic_dec(&xprt->sc_ctxt_used);
 }
 
@@ -461,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
 	tasklet_schedule(&dto_tasklet);
 }
 
-static void create_context_cache(struct svcxprt_rdma *xprt,
-				 int ctxt_count, int ctxt_bump, int ctxt_max)
-{
-	struct svc_rdma_op_ctxt *ctxt;
-	int i;
-
-	xprt->sc_ctxt_max = ctxt_max;
-	xprt->sc_ctxt_bump = ctxt_bump;
-	xprt->sc_ctxt_cnt = 0;
-	atomic_set(&xprt->sc_ctxt_used, 0);
-
-	INIT_LIST_HEAD(&xprt->sc_ctxt_free);
-	for (i = 0; i < ctxt_count; i++) {
-		ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-		if (ctxt) {
-			INIT_LIST_HEAD(&ctxt->free_list);
-			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-			xprt->sc_ctxt_cnt++;
-		}
-	}
-}
-
-static void destroy_context_cache(struct svcxprt_rdma *xprt)
-{
-	while (!list_empty(&xprt->sc_ctxt_free)) {
-		struct svc_rdma_op_ctxt *ctxt;
-		ctxt = list_entry(xprt->sc_ctxt_free.next,
-				  struct svc_rdma_op_ctxt,
-				  free_list);
-		list_del_init(&ctxt->free_list);
-		kfree(ctxt);
-	}
-}
-
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 					     int listener)
 {
@@ -511,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_read_complete_lock);
-	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 
 	cma_xprt->sc_ord = svcrdma_ord;
@@ -522,20 +439,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	atomic_set(&cma_xprt->sc_sq_count, 0);
 	atomic_set(&cma_xprt->sc_ctxt_used, 0);
 
-	if (!listener) {
-		int reqs = cma_xprt->sc_max_requests;
-		create_context_cache(cma_xprt,
-				     reqs << 1, /* starting size */
-				     reqs,	/* bump amount */
-				     reqs +
-				     cma_xprt->sc_sq_depth +
-				     RPCRDMA_MAX_THREADS + 1); /* max */
-		if (list_empty(&cma_xprt->sc_ctxt_free)) {
-			kfree(cma_xprt);
-			return NULL;
-		}
-		clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
-	} else
+	if (listener)
 		set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
 
 	return cma_xprt;
@@ -1077,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	/* Destroy the CM ID */
 	rdma_destroy_id(rdma->sc_cm_id);
 
-	destroy_context_cache(rdma);
 	kfree(rdma);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 40b215e594b65a3488576c9d24b367548e18902a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 3 Jul 2008 01:05:41 -0700
Subject: tcp: de-bloat a bit with factoring NET_INC_STATS_BH out

There are some places in TCP that select one MIB index to
bump snmp statistics like this:

	if (<something>)
		NET_INC_STATS_BH(<some_id>);
	else if (<something_else>)
		NET_INC_STATS_BH(<some_other_id>);
	...
	else
		NET_INC_STATS_BH(<default_id>);

or in a more tricky but still similar way.

On the other hand, this NET_INC_STATS_BH is a camouflaged
increment of percpu variable, which is not that small.

Factoring those cases out de-bloats 235 bytes on non-preemptible
i386 config and drives parts of the code into 80 columns.

add/remove: 0/0 grow/shrink: 0/7 up/down: 0/-235 (-235)
function                                     old     new   delta
tcp_fastretrans_alert                       1437    1424     -13
tcp_dsack_set                                137     124     -13
tcp_xmit_retransmit_queue                    690     676     -14
tcp_try_undo_recovery                        283     265     -18
tcp_sacktag_write_queue                     1550    1515     -35
tcp_update_reordering                        162     106     -56
tcp_retransmit_timer                         990     904     -86

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 46 ++++++++++++++++++++++++++++++++--------------
 net/ipv4/tcp_output.c |  7 +++++--
 net/ipv4/tcp_timer.c  | 15 +++++++++------
 3 files changed, 46 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index de30e70ff25..d6ea970a151 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -947,17 +947,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	if (metric > tp->reordering) {
+		int mib_idx;
+
 		tp->reordering = min(TCP_MAX_REORDERING, metric);
 
 		/* This exciting event is worth to be remembered. 8) */
 		if (ts)
-			NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
+			mib_idx = LINUX_MIB_TCPTSREORDER;
 		else if (tcp_is_reno(tp))
-			NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
+			mib_idx = LINUX_MIB_TCPRENOREORDER;
 		else if (tcp_is_fack(tp))
-			NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
+			mib_idx = LINUX_MIB_TCPFACKREORDER;
 		else
-			NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+			mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+		NET_INC_STATS_BH(mib_idx);
 #if FASTRETRANS_DEBUG > 1
 		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
 		       tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1456,18 +1460,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 		if (!tcp_is_sackblock_valid(tp, dup_sack,
 					    sp[used_sacks].start_seq,
 					    sp[used_sacks].end_seq)) {
+			int mib_idx;
+
 			if (dup_sack) {
 				if (!tp->undo_marker)
-					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
+					mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
 				else
-					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
+					mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
 			} else {
 				/* Don't count olds caused by ACK reordering */
 				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
 				    !after(sp[used_sacks].end_seq, tp->snd_una))
 					continue;
-				NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
+				mib_idx = LINUX_MIB_TCPSACKDISCARD;
 			}
+
+			NET_INC_STATS_BH(mib_idx);
 			if (i == 0)
 				first_sack_index = -1;
 			continue;
@@ -2380,15 +2388,19 @@ static int tcp_try_undo_recovery(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tcp_may_undo(tp)) {
+		int mib_idx;
+
 		/* Happy end! We did not retransmit anything
 		 * or our original transmission succeeded.
 		 */
 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
 		tcp_undo_cwr(sk, 1);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
-			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+			mib_idx = LINUX_MIB_TCPLOSSUNDO;
 		else
-			NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
+			mib_idx = LINUX_MIB_TCPFULLUNDO;
+
+		NET_INC_STATS_BH(mib_idx);
 		tp->undo_marker = 0;
 	}
 	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
@@ -2560,7 +2572,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
 	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
-	int fast_rexmit = 0;
+	int fast_rexmit = 0, mib_idx;
 
 	if (WARN_ON(!tp->packets_out && tp->sacked_out))
 		tp->sacked_out = 0;
@@ -2683,9 +2695,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		/* Otherwise enter Recovery state */
 
 		if (tcp_is_reno(tp))
-			NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
+			mib_idx = LINUX_MIB_TCPRENORECOVERY;
 		else
-			NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
+			mib_idx = LINUX_MIB_TCPSACKRECOVERY;
+
+		NET_INC_STATS_BH(mib_idx);
 
 		tp->high_seq = tp->snd_nxt;
 		tp->prior_ssthresh = 0;
@@ -3700,10 +3714,14 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+		int mib_idx;
+
 		if (before(seq, tp->rcv_nxt))
-			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+			mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
 		else
-			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+			mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
+
+		NET_INC_STATS_BH(mib_idx);
 
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8f83ab43270..edef2afe905 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1985,14 +1985,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 			if (sacked & TCPCB_LOST) {
 				if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+					int mib_idx;
+
 					if (tcp_retransmit_skb(sk, skb)) {
 						tp->retransmit_skb_hint = NULL;
 						return;
 					}
 					if (icsk->icsk_ca_state != TCP_CA_Loss)
-						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
+						mib_idx = LINUX_MIB_TCPFASTRETRANS;
 					else
-						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
+						mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+					NET_INC_STATS_BH(mib_idx);
 
 					if (skb == tcp_write_queue_head(sk))
 						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3e358cbb124..6a480d1fd8f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -326,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk)
 		goto out;
 
 	if (icsk->icsk_retransmits == 0) {
+		int mib_idx;
+
 		if (icsk->icsk_ca_state == TCP_CA_Disorder ||
 		    icsk->icsk_ca_state == TCP_CA_Recovery) {
 			if (tcp_is_sack(tp)) {
 				if (icsk->icsk_ca_state == TCP_CA_Recovery)
-					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+					mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
 				else
-					NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
+					mib_idx = LINUX_MIB_TCPSACKFAILURES;
 			} else {
 				if (icsk->icsk_ca_state == TCP_CA_Recovery)
-					NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
+					mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
 				else
-					NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
+					mib_idx = LINUX_MIB_TCPRENOFAILURES;
 			}
 		} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
-			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
+			mib_idx = LINUX_MIB_TCPLOSSFAILURES;
 		} else {
-			NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
+			mib_idx = LINUX_MIB_TCPTIMEOUTS;
 		}
+		NET_INC_STATS_BH(mib_idx);
 	}
 
 	if (tcp_use_frto(sk)) {
-- 
cgit v1.2.3-70-g09d2


From d68b82705a4a754e5773f412c6b8f1e65259bc8b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 25 Jun 2008 16:26:47 +0900
Subject: ipv6: Do not assign non-valid address on interface.

Check the type of the address when adding a new one on interface.
- the unspecified address (::) is always disallowed (RFC4291 2.5.2)
- the loopback address is disallowed unless the interface is (one of)
  loopback (RFC4291 2.5.3).
- multicast addresses are disallowed.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/addrconf.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 84127d854cf..8b6875f0203 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -578,6 +578,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	struct rt6_info *rt;
 	int hash;
 	int err = 0;
+	int addr_type = ipv6_addr_type(addr);
+
+	if (addr_type == IPV6_ADDR_ANY ||
+	    addr_type & IPV6_ADDR_MULTICAST ||
+	    (!(idev->dev->flags & IFF_LOOPBACK) &&
+	     addr_type & IPV6_ADDR_LOOPBACK))
+		return ERR_PTR(-EADDRNOTAVAIL);
 
 	rcu_read_lock_bh();
 	if (idev->dead) {
-- 
cgit v1.2.3-70-g09d2


From f81b2e7d8cf8c6a52b7a5224c3b89cee5aeb6811 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 25 Jun 2008 16:55:26 +0900
Subject: ipv6: Do not forward packets with the unspecified source address.

RFC4291 2.5.2.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ip6_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fd7cd1bfe15..871bdec09ed 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -498,7 +498,8 @@ int ip6_forward(struct sk_buff *skb)
 		int addrtype = ipv6_addr_type(&hdr->saddr);
 
 		/* This check is security critical. */
-		if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
+		if (addrtype == IPV6_ADDR_ANY ||
+		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 			goto error;
 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
-- 
cgit v1.2.3-70-g09d2


From 5ce83afaac956238c3c25f60a899c511e9d8cbf4 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 25 Jun 2008 16:58:17 +0900
Subject: ipv6: Assume the loopback address in link-local scope.

Handle interface property strictly when looking up a route
for the loopback address (RFC4291 2.5.3).

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 751e98f9b8b..dbad96c58ba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -228,7 +228,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 static inline int rt6_need_strict(struct in6_addr *daddr)
 {
 	return (ipv6_addr_type(daddr) &
-		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 778d80be52699596bf70e0eb0761cf5e1e46088d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 28 Jun 2008 14:17:11 +0900
Subject: ipv6: Add disable_ipv6 sysctl to disable IPv6 operaion on specific
 interface.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 Documentation/networking/ip-sysctl.txt |  4 ++++
 include/linux/ipv6.h                   |  2 ++
 net/ipv6/addrconf.c                    | 11 +++++++++++
 net/ipv6/ip6_input.c                   |  3 ++-
 net/ipv6/ip6_output.c                  |  7 +++++++
 5 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 71c7bea9716..dae980e8f1b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1025,6 +1025,10 @@ max_addresses - INTEGER
 	autoconfigured addresses.
 	Default: 16
 
+disable_ipv6 - BOOLEAN
+	Disable IPv6 operation.
+	Default: FALSE (enable IPv6 operation)
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cde056e0818..d9d7f9b69eb 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -163,6 +163,7 @@ struct ipv6_devconf {
 #ifdef CONFIG_IPV6_MROUTE
 	__s32		mc_forwarding;
 #endif
+	__s32		disable_ipv6;
 	void		*sysctl;
 };
 
@@ -194,6 +195,7 @@ enum {
 	DEVCONF_OPTIMISTIC_DAD,
 	DEVCONF_ACCEPT_SOURCE_ROUTE,
 	DEVCONF_MC_FORWARDING,
+	DEVCONF_DISABLE_IPV6,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8b6875f0203..8c5cff50bbe 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -183,6 +183,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
 #endif
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
+	.disable_ipv6		= 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -215,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 #endif
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
+	.disable_ipv6		= 0,
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -3657,6 +3659,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #ifdef CONFIG_IPV6_MROUTE
 	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
 #endif
+	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -4215,6 +4218,14 @@ static struct addrconf_sysctl_table
 			.proc_handler	=	&proc_dointvec,
 		},
 #endif
+		{
+			.ctl_name	=	CTL_UNNUMBERED,
+			.procname	=	"disable_ipv6",
+			.data		=	&ipv6_devconf.disable_ipv6,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 34e5a96623a..ea81c614dde 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -71,7 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
+	    !idev || unlikely(idev->cnf.disable_ipv6)) {
 		IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
 		rcu_read_unlock();
 		goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 871bdec09ed..0981c1ef305 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -173,6 +173,13 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 
 int ip6_output(struct sk_buff *skb)
 {
+	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+	if (unlikely(idev->cnf.disable_ipv6)) {
+		IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+		kfree_skb(skb);
+		return 0;
+	}
+
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 				dst_allfrag(skb->dst))
 		return ip6_fragment(skb, ip6_output2);
-- 
cgit v1.2.3-70-g09d2


From 1b34be74cbf18f5d58cc85c7c4afcd9f7d74accd Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 28 Jun 2008 14:18:38 +0900
Subject: ipv6 addrconf: add accept_dad sysctl to control DAD operation.

- If 0, disable DAD.
- If 1, perform DAD (default).
- If >1, perform DAD and disable IPv6 operation if DAD for MAC-based
  link-local address has been failed (RFC4862 5.4.5).

We do not follow RFC4862 by default.  Refer to the netdev thread entitled
"Linux IPv6 DAD not full conform to RFC 4862 ?"
	http://www.spinics.net/lists/netdev/msg52027.html

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++++
 include/linux/ipv6.h                   |  2 ++
 net/ipv6/addrconf.c                    | 35 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index dae980e8f1b..72f6d52e52e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1029,6 +1029,13 @@ disable_ipv6 - BOOLEAN
 	Disable IPv6 operation.
 	Default: FALSE (enable IPv6 operation)
 
+accept_dad - INTEGER
+	Whether to accept DAD (Duplicate Address Detection).
+	0: Disable DAD
+	1: Enable DAD (default)
+	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
+	   link-local address has been found.
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index d9d7f9b69eb..391ad0843a4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -164,6 +164,7 @@ struct ipv6_devconf {
 	__s32		mc_forwarding;
 #endif
 	__s32		disable_ipv6;
+	__s32		accept_dad;
 	void		*sysctl;
 };
 
@@ -196,6 +197,7 @@ enum {
 	DEVCONF_ACCEPT_SOURCE_ROUTE,
 	DEVCONF_MC_FORWARDING,
 	DEVCONF_DISABLE_IPV6,
+	DEVCONF_ACCEPT_DAD,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8c5cff50bbe..2ec73e62202 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -119,6 +119,7 @@ static void ipv6_regen_rndid(unsigned long data);
 static int desync_factor = MAX_DESYNC_FACTOR * HZ;
 #endif
 
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
 
 /*
@@ -184,6 +185,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
+	.accept_dad		= 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -217,6 +219,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
+	.accept_dad		= 1,
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -380,6 +383,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	 */
 	in6_dev_hold(ndev);
 
+	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
+		ndev->cnf.accept_dad = -1;
+
 #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
 	if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
 		printk(KERN_INFO
@@ -1421,6 +1427,20 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
+	struct inet6_dev *idev = ifp->idev;
+	if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
+		struct in6_addr addr;
+
+		addr.s6_addr32[0] = htonl(0xfe800000);
+		addr.s6_addr32[1] = 0;
+
+		if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+		    ipv6_addr_equal(&ifp->addr, &addr)) {
+			/* DAD failed for link-local based on MAC address */
+			idev->cnf.disable_ipv6 = 1;
+		}
+	}
+
 	if (net_ratelimit())
 		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
 	addrconf_dad_stop(ifp);
@@ -2753,6 +2773,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	spin_lock_bh(&ifp->lock);
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
@@ -2800,6 +2821,11 @@ static void addrconf_dad_timer(unsigned long data)
 		read_unlock_bh(&idev->lock);
 		goto out;
 	}
+	if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) {
+		read_unlock_bh(&idev->lock);
+		addrconf_dad_failure(ifp);
+		return;
+	}
 	spin_lock_bh(&ifp->lock);
 	if (ifp->probes == 0) {
 		/*
@@ -3660,6 +3686,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
 #endif
 	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
+	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -4226,6 +4253,14 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
 		},
+		{
+			.ctl_name	=	CTL_UNNUMBERED,
+			.procname	=	"accept_dad",
+			.data		=	&ipv6_devconf.accept_dad,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
-- 
cgit v1.2.3-70-g09d2


From dd3abc4ef52597ec8268274222574b2700ba3ded Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 2 Jul 2008 18:30:18 +0900
Subject: ipv6 route: Prefer outgoing interface with source address assigned.

Outgoing interface is selected by the route decision if unspecified.
Let's prefer routes via interface(s) with the address assigned if we
have multiple routes with same cost.
With help from Naohiro Ooiwa <nooiwa@miraclelinux.com>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/route.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbad96c58ba..5d6c166dfbb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -237,15 +237,20 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
 
 static inline struct rt6_info *rt6_device_match(struct net *net,
 						    struct rt6_info *rt,
+						    struct in6_addr *saddr,
 						    int oif,
 						    int flags)
 {
 	struct rt6_info *local = NULL;
 	struct rt6_info *sprt;
 
-	if (oif) {
-		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
-			struct net_device *dev = sprt->rt6i_dev;
+	if (!oif && ipv6_addr_any(saddr))
+		goto out;
+
+	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+		struct net_device *dev = sprt->rt6i_dev;
+
+		if (oif) {
 			if (dev->ifindex == oif)
 				return sprt;
 			if (dev->flags & IFF_LOOPBACK) {
@@ -259,14 +264,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 				}
 				local = sprt;
 			}
+		} else {
+			if (ipv6_chk_addr(net, saddr, dev,
+					  flags & RT6_LOOKUP_F_IFACE))
+				return sprt;
 		}
+	}
 
+	if (oif) {
 		if (local)
 			return local;
 
 		if (flags & RT6_LOOKUP_F_IFACE)
 			return net->ipv6.ip6_null_entry;
 	}
+out:
 	return rt;
 }
 
@@ -539,7 +551,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
 	rt = fn->leaf;
-	rt = rt6_device_match(net, rt, fl->oif, flags);
+	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 	BACKTRACK(net, &fl->fl6_src);
 out:
 	dst_use(&rt->u.dst, jiffies);
-- 
cgit v1.2.3-70-g09d2


From 623d1a1af77bd52a389c6eda5920e28eb2ee468b Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Thu, 3 Jul 2008 12:13:30 +0800
Subject: ipv6: Do cleanup for ip6_mr_init.

If do not do it, we will get following issues:
1. Leaving junks after inet6_init failing halfway.
2. Leaving proc and notifier junks after ipv6 modules unloading.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/mroute6.h |  3 ++-
 net/ipv6/af_inet6.c     | 11 ++++++++++-
 net/ipv6/ip6mr.c        | 38 +++++++++++++++++++++++++++++++++-----
 3 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index e7989593142..4c4d6f57d5c 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -135,7 +135,8 @@ extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int);
 extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ip6_mr_input(struct sk_buff *skb);
 extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
-extern void ip6_mr_init(void);
+extern int ip6_mr_init(void);
+extern void ip6_mr_cleanup(void);
 
 struct mif_device
 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3ce8d2f318c..6b39af1acb5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -953,7 +953,9 @@ static int __init inet6_init(void)
 	if (err)
 		goto icmp_fail;
 #ifdef CONFIG_IPV6_MROUTE
-	ip6_mr_init();
+	err = ip6_mr_init();
+	if (err)
+		goto ipmr_fail;
 #endif
 	err = ndisc_init();
 	if (err)
@@ -1057,6 +1059,10 @@ netfilter_fail:
 igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
+#ifdef CONFIG_IPV6_MROUTE
+	ip6_mr_cleanup();
+ipmr_fail:
+#endif
 	icmpv6_cleanup();
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
@@ -1111,6 +1117,9 @@ static void __exit inet6_exit(void)
 	ipv6_netfilter_fini();
 	igmp6_cleanup();
 	ndisc_cleanup();
+#ifdef CONFIG_IPV6_MROUTE
+	ip6_mr_cleanup();
+#endif
 	icmpv6_cleanup();
 	rawv6_exit();
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 90e763073dc..cfac26d674e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -948,23 +948,51 @@ static struct notifier_block ip6_mr_notifier = {
  *	Setup for IP multicast routing
  */
 
-void __init ip6_mr_init(void)
+int __init ip6_mr_init(void)
 {
+	int err;
+
 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
 				       sizeof(struct mfc6_cache),
 				       0, SLAB_HWCACHE_ALIGN,
 				       NULL);
 	if (!mrt_cachep)
-		panic("cannot allocate ip6_mrt_cache");
+		return -ENOMEM;
 
 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
-	register_netdevice_notifier(&ip6_mr_notifier);
+	err = register_netdevice_notifier(&ip6_mr_notifier);
+	if (err)
+		goto reg_notif_fail;
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+		goto proc_vif_fail;
+	if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
+				     0, &ip6mr_mfc_fops))
+		goto proc_cache_fail;
+#endif
+	return 0;
+reg_notif_fail:
+	kmem_cache_destroy(mrt_cachep);
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
-	proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
+proc_vif_fail:
+	unregister_netdevice_notifier(&ip6_mr_notifier);
+proc_cache_fail:
+	proc_net_remove(&init_net, "ip6_mr_vif");
 #endif
+	return err;
 }
 
+void ip6_mr_cleanup(void)
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_remove(&init_net, "ip6_mr_cache");
+	proc_net_remove(&init_net, "ip6_mr_vif");
+#endif
+	unregister_netdevice_notifier(&ip6_mr_notifier);
+	del_timer(&ipmr_expire_timer);
+	kmem_cache_destroy(mrt_cachep);
+}
 
 static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 {
-- 
cgit v1.2.3-70-g09d2


From 03d2f897e9fb3218989baa2139a951ce7f5414bf Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Thu, 3 Jul 2008 12:13:36 +0800
Subject: ipv4: Do cleanup for ip_mr_init

Same as ip6_mr_init(), make ip_mr_init() return errno if fails.
But do not do error handling in inet_init(), just print a msg.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/igmp.h   |  1 -
 include/linux/mroute.h |  3 +--
 net/ipv4/af_inet.c     |  5 +++--
 net/ipv4/ipmr.c        | 28 ++++++++++++++++++++++++----
 4 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f5a1a0db2e8..7bb3c095c15 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -228,7 +228,6 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 		struct group_filter __user *optval, int __user *optlen);
 extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif);
-extern void ip_mr_init(void);
 extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
 extern void ip_mc_up(struct in_device *);
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index de4decfa1bf..df8efd42bf8 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -147,8 +147,7 @@ static inline int ip_mroute_opt(int opt)
 extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int);
 extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
-extern void ip_mr_init(void);
-
+extern int ip_mr_init(void);
 
 struct vif_device
 {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 42bd24b64b5..dc411335c14 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1479,14 +1479,15 @@ static int __init inet_init(void)
 	 *	Initialise the multicast router
 	 */
 #if defined(CONFIG_IP_MROUTE)
-	ip_mr_init();
+	if (ip_mr_init())
+		printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
 #endif
 	/*
 	 *	Initialise per-cpu ipv4 mibs
 	 */
 
 	if (init_ipv4_mibs())
-		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
+		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
 
 	ipv4_proc_init();
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 300ab0c2919..438fab9c62a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1878,16 +1878,36 @@ static struct net_protocol pim_protocol = {
  *	Setup for IP multicast routing
  */
 
-void __init ip_mr_init(void)
+int __init ip_mr_init(void)
 {
+	int err;
+
 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
 				       sizeof(struct mfc_cache),
 				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 				       NULL);
+	if (!mrt_cachep)
+		return -ENOMEM;
+
 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
-	register_netdevice_notifier(&ip_mr_notifier);
+	err = register_netdevice_notifier(&ip_mr_notifier);
+	if (err)
+		goto reg_notif_fail;
 #ifdef CONFIG_PROC_FS
-	proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
-	proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
+	err = -ENOMEM;
+	if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
+		goto proc_vif_fail;
+	if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
+		goto proc_cache_fail;
 #endif
+	return 0;
+reg_notif_fail:
+	kmem_cache_destroy(mrt_cachep);
+#ifdef CONFIG_PROC_FS
+proc_vif_fail:
+	unregister_netdevice_notifier(&ip_mr_notifier);
+proc_cache_fail:
+	proc_net_remove(&init_net, "ip_mr_vif");
+#endif
+	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From e0835f8fa56d2d308486f8a34cf1c4480cd27f4e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 3 Jul 2008 16:51:22 +0900
Subject: ipv4,ipv6 mroute: Add some helper inline functions to remove ugly
 ifdefs.

ip{,v6}_mroute_{set,get}sockopt() should not matter by optimization but
it would be better not to depend on optimization semantically.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/mroute.h  | 27 +++++++++++++++++++++++++++
 include/linux/mroute6.h | 32 ++++++++++++++++++++++++++++++++
 net/ipv6/af_inet6.c     |  8 --------
 3 files changed, 59 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index df8efd42bf8..07112ee9293 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -144,10 +144,37 @@ static inline int ip_mroute_opt(int opt)
 }
 #endif
 
+#ifdef CONFIG_IP_MROUTE
 extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int);
 extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
 extern int ip_mr_init(void);
+#else
+static inline
+int ip_mroute_setsockopt(struct sock *sock,
+			 int optname, char __user *optval, int optlen)
+{
+	return -ENOPROTOOPT;
+}
+
+static inline
+int ip_mroute_getsockopt(struct sock *sock,
+			 int optname, char __user *optval, int __user *optlen)
+{
+	return -ENOPROTOOPT;
+}
+
+static inline
+int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline int ip_mr_init(void)
+{
+	return 0;
+}
+#endif
 
 struct vif_device
 {
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 4c4d6f57d5c..5cf50473a10 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -131,12 +131,44 @@ static inline int ip6_mroute_opt(int opt)
 
 struct sock;
 
+#ifdef CONFIG_IPV6_MROUTE
 extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int);
 extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ip6_mr_input(struct sk_buff *skb);
 extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
 extern int ip6_mr_init(void);
 extern void ip6_mr_cleanup(void);
+#else
+static inline
+int ip6_mroute_setsockopt(struct sock *sock,
+			  int optname, char __user *optval, int optlen)
+{
+	return -ENOPROTOOPT;
+}
+
+static inline
+int ip6_mroute_getsockopt(struct sock *sock,
+			  int optname, char __user *optval, int __user *optlen)
+{
+	return -ENOPROTOOPT;
+}
+
+static inline
+int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline int ip6_mr_init(void)
+{
+	return 0;
+}
+
+static inline void ip6_mr_cleanup(void)
+{
+	return;
+}
+#endif
 
 struct mif_device
 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6b39af1acb5..3d828bc4b1c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -59,9 +59,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#ifdef CONFIG_IPV6_MROUTE
 #include <linux/mroute6.h>
-#endif
 
 MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
@@ -952,11 +950,9 @@ static int __init inet6_init(void)
 	err = icmpv6_init();
 	if (err)
 		goto icmp_fail;
-#ifdef CONFIG_IPV6_MROUTE
 	err = ip6_mr_init();
 	if (err)
 		goto ipmr_fail;
-#endif
 	err = ndisc_init();
 	if (err)
 		goto ndisc_fail;
@@ -1059,10 +1055,8 @@ netfilter_fail:
 igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
-#ifdef CONFIG_IPV6_MROUTE
 	ip6_mr_cleanup();
 ipmr_fail:
-#endif
 	icmpv6_cleanup();
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
@@ -1117,9 +1111,7 @@ static void __exit inet6_exit(void)
 	ipv6_netfilter_fini();
 	igmp6_cleanup();
 	ndisc_cleanup();
-#ifdef CONFIG_IPV6_MROUTE
 	ip6_mr_cleanup();
-#endif
 	icmpv6_cleanup();
 	rawv6_exit();
 
-- 
cgit v1.2.3-70-g09d2


From 81b23b4a7acd9b37a269c62d02479d4f645dd20a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 3 Jul 2008 03:22:02 -0700
Subject: tcp: net/ipv4/tcp.c needs linux/scatterlist.h

alpha:

net/ipv4/tcp.c: In function 'tcp_calc_md5_hash':
net/ipv4/tcp.c:2479: error: implicit declaration of function 'sg_init_table'    net/ipv4/tcp.c:2482: error: implicit declaration of function 'sg_set_buf'
net/ipv4/tcp.c:2507: error: implicit declaration of function 'sg_mark_end'

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 850825dc86e..de53024664e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -255,6 +255,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/skbuff.h>
+#include <linux/scatterlist.h>
 #include <linux/splice.h>
 #include <linux/net.h>
 #include <linux/socket.h>
-- 
cgit v1.2.3-70-g09d2


From 374e7b59498ce0785b3727794b351221528a5159 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Thu, 3 Jul 2008 03:31:21 -0700
Subject: tcp: fix a size_t < 0 comparison in tcp_read_sock

<used> should be of type int (not size_t) since recv_actor can return
negative values and it is also used in a < 0 comparison.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index de53024664e..1d723de1868 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1209,7 +1209,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		return -ENOTCONN;
 	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
 		if (offset < skb->len) {
-			size_t used, len;
+			int used;
+			size_t len;
 
 			len = skb->len - offset;
 			/* Stop reading if we hit a patch of urgent data */
-- 
cgit v1.2.3-70-g09d2


From ab1b20467cd2214ad89a95d007047cd2a6b5bf5d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 Jul 2008 03:53:42 -0700
Subject: bridge: fix use-after-free in br_cleanup_bridges()

Unregistering a bridge device may cause virtual devices stacked on the
bridge, like vlan or macvlan devices, to be unregistered as well.
br_cleanup_bridges() uses for_each_netdev_safe() to iterate over all
devices during cleanup. This is not enough however, if one of the
additionally unregistered devices is next in the list to the bridge
device, it will get freed as well and the iteration continues on
the freed element.

Restart iteration after each bridge device removal from the beginning to
fix this, similar to what rtnl_link_unregister() does.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c2397f503b0..f38cc5317b8 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -442,12 +442,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 
 void __exit br_cleanup_bridges(void)
 {
-	struct net_device *dev, *nxt;
+	struct net_device *dev;
 
 	rtnl_lock();
-	for_each_netdev_safe(&init_net, dev, nxt)
-		if (dev->priv_flags & IFF_EBRIDGE)
+restart:
+	for_each_netdev(&init_net, dev) {
+		if (dev->priv_flags & IFF_EBRIDGE) {
 			del_br(dev->priv);
+			goto restart;
+		}
+	}
 	rtnl_unlock();
 
 }
-- 
cgit v1.2.3-70-g09d2


From b620754bfeb8b0e0c6622b03d5ee2f1af1d3082f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 3 Jul 2008 15:26:35 -0400
Subject: svcrpc: fix handling of garbage args

To return garbage_args, the accept_stat must be 0, and we must have a
verifier.  So we shouldn't be resetting the write pointer as we reject
the call.

Also, we must add the two placeholder words here regardless of success
of the unwrap, to ensure the output buffer is left in a consistent state
for svcauth_gss_release().

This fixes a BUG() in svcauth_gss.c:svcauth_gss_release().

Thanks to Aime Le Rouzic for bug report, debugging help, and testing.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Tested-by: Aime Le Rouzic <aime.le-rouzic@bull.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5905d56737d..81ae3d62a0c 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		case RPC_GSS_SVC_NONE:
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
+			/* placeholders for length and seq. number: */
+			svc_putnl(resv, 0);
+			svc_putnl(resv, 0);
 			if (unwrap_integ_data(&rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			break;
+		case RPC_GSS_SVC_PRIVACY:
 			/* placeholders for length and seq. number: */
 			svc_putnl(resv, 0);
 			svc_putnl(resv, 0);
-			break;
-		case RPC_GSS_SVC_PRIVACY:
 			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
-			/* placeholders for length and seq. number: */
-			svc_putnl(resv, 0);
-			svc_putnl(resv, 0);
 			break;
 		default:
 			goto auth_err;
@@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		goto out;
 	}
 garbage_args:
-	/* Restore write pointer to its original value: */
-	xdr_ressize_check(rqstp, reject_stat);
 	ret = SVC_GARBAGE;
 	goto out;
 auth_err:
-- 
cgit v1.2.3-70-g09d2


From 76e6ebfb40a2455c18234dcb0f9df37533215461 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:00:44 -0700
Subject: netns: add namespace parameter to rt_cache_flush

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h     |  2 +-
 net/ipv4/arp.c          |  2 +-
 net/ipv4/devinet.c      |  8 +++++---
 net/ipv4/fib_frontend.c | 17 +++++++++--------
 net/ipv4/fib_hash.c     |  6 +++---
 net/ipv4/fib_rules.c    |  2 +-
 net/ipv4/fib_trie.c     |  6 +++---
 net/ipv4/route.c        |  8 ++++----
 8 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/net/route.h b/include/net/route.h
index fc836ff824c..3140cc50085 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -111,7 +111,7 @@ struct in_device;
 extern int		ip_rt_init(void);
 extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
 				       __be32 src, struct net_device *dev);
-extern void		rt_cache_flush(int how);
+extern void		rt_cache_flush(struct net *net, int how);
 extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 20c515a1be2..29df75a6bcc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1197,7 +1197,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&arp_tbl, dev);
-		rt_cache_flush(0);
+		rt_cache_flush(dev_net(dev), 0);
 		break;
 	default:
 		break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9de2514946c..2e667e2f90d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1348,7 +1348,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 				dev_disable_lro(idev->dev);
 			}
 			rtnl_unlock();
-			rt_cache_flush(0);
+			rt_cache_flush(net, 0);
 		}
 	}
 
@@ -1362,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	int *valp = ctl->data;
 	int val = *valp;
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
-		rt_cache_flush(0);
+		rt_cache_flush(net, 0);
 
 	return ret;
 }
@@ -1375,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 {
 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
 				      newval, newlen);
+	struct net *net = table->extra2;
 
 	if (ret == 1)
-		rt_cache_flush(0);
+		rt_cache_flush(net, 0);
 
 	return ret;
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5ad01d63f83..65c1503f8cc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -144,7 +144,7 @@ static void fib_flush(struct net *net)
 	}
 
 	if (flushed)
-		rt_cache_flush(-1);
+		rt_cache_flush(net, -1);
 }
 
 /*
@@ -897,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down_dev(dev, force))
 		fib_flush(dev_net(dev));
-	rt_cache_flush(0);
+	rt_cache_flush(dev_net(dev), 0);
 	arp_ifdown(dev);
 }
 
 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+	struct net_device *dev = ifa->ifa_dev->dev;
 
 	switch (event) {
 	case NETDEV_UP:
 		fib_add_ifaddr(ifa);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		fib_sync_up(ifa->ifa_dev->dev);
+		fib_sync_up(dev);
 #endif
-		rt_cache_flush(-1);
+		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa);
@@ -919,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			   Disable IP.
 			 */
-			fib_disable_ip(ifa->ifa_dev->dev, 1);
+			fib_disable_ip(dev, 1);
 		} else {
-			rt_cache_flush(-1);
+			rt_cache_flush(dev_net(dev), -1);
 		}
 		break;
 	}
@@ -949,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
-		rt_cache_flush(-1);
+		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
 		fib_disable_ip(dev, 0);
 		break;
 	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
-		rt_cache_flush(0);
+		rt_cache_flush(dev_net(dev), 0);
 		break;
 	}
 	return NOTIFY_DONE;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index eeec4bf982b..c8cac6c7f88 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -472,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
-				rt_cache_flush(-1);
+				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 			rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
 				  &cfg->fc_nlinfo, NLM_F_REPLACE);
 			return 0;
@@ -532,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 
 	if (new_f)
 		fz->fz_nent++;
-	rt_cache_flush(-1);
+	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 
 	rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
@@ -614,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 		write_unlock_bh(&fib_hash_lock);
 
 		if (fa->fa_state & FA_S_ACCESSED)
-			rt_cache_flush(-1);
+			rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 		fn_free_alias(fa, f);
 		if (kill_fn) {
 			fn_free_node(f);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1fb56876be5..bc05de41308 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -260,7 +260,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 
 static void fib4_rule_flush_cache(void)
 {
-	rt_cache_flush(-1);
+	rt_cache_flush(&init_net, -1);
 }
 
 static struct fib_rules_ops fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 394db9c941a..d16ae4623be 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1271,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
-				rt_cache_flush(-1);
+				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
 
@@ -1316,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
-	rt_cache_flush(-1);
+	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 succeeded:
@@ -1664,7 +1664,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
-		rt_cache_flush(-1);
+		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 
 	fib_release_info(fa->fa_info);
 	alias_free_mem_rcu(fa);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fe3a0223728..cedc366505b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -791,7 +791,7 @@ static void rt_cache_invalidate(void)
  * delay < 0  : invalidate cache (fast : entries will be deleted later)
  * delay >= 0 : invalidate & flush cache (can be long)
  */
-void rt_cache_flush(int delay)
+void rt_cache_flush(struct net *net, int delay)
 {
 	rt_cache_invalidate();
 	if (delay >= 0)
@@ -2825,7 +2825,7 @@ done:
 
 void ip_rt_multicast_event(struct in_device *in_dev)
 {
-	rt_cache_flush(0);
+	rt_cache_flush(dev_net(in_dev->dev), 0);
 }
 
 #ifdef CONFIG_SYSCTL
@@ -2837,7 +2837,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 {
 	if (write) {
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		rt_cache_flush(flush_delay);
+		rt_cache_flush(&init_net, flush_delay);
 		return 0;
 	}
 
@@ -2857,7 +2857,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 		return -EINVAL;
 	if (get_user(delay, (int __user *)newval))
 		return -EFAULT;
-	rt_cache_flush(delay);
+	rt_cache_flush(&init_net, delay);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ae299fc051aa68ca6ef1807c37bb92d9b6ff817c Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:01:28 -0700
Subject: net: add fib_rules_ops to flush_cache method

This is required to pass namespace context into rt_cache_flush called from
->flush_cache.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 2 +-
 net/core/fib_rules.c    | 2 +-
 net/decnet/dn_rules.c   | 2 +-
 net/ipv4/fib_rules.c    | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index a5c6ccc5bb1..c2bb5cae651 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -62,7 +62,7 @@ struct fib_rules_ops
 
 	/* Called after modifications to the rules set, must flush
 	 * the route cache if one exists. */
-	void			(*flush_cache)(void);
+	void			(*flush_cache)(struct fib_rules_ops *ops);
 
 	int			nlgroup;
 	const struct nla_policy	*policy;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e3e9ab0f74e..1c2943a119f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops)
 static void flush_route_cache(struct fib_rules_ops *ops)
 {
 	if (ops->flush_cache)
-		ops->flush_cache();
+		ops->flush_cache(ops);
 }
 
 int fib_rules_register(struct fib_rules_ops *ops)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 5b7539b7fe0..14fbca55e90 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -229,7 +229,7 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
 	return 0;
 }
 
-static void dn_fib_rule_flush_cache(void)
+static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 {
 	dn_rt_cache_flush(-1);
 }
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index bc05de41308..6080d712082 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 	       + nla_total_size(4); /* flow */
 }
 
-static void fib4_rule_flush_cache(void)
+static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 {
-	rt_cache_flush(&init_net, -1);
+	rt_cache_flush(ops->fro_net, -1);
 }
 
 static struct fib_rules_ops fib4_rules_ops_template = {
-- 
cgit v1.2.3-70-g09d2


From 639e104facec20f64f2eb940851ae45e5f255e6b Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:02:06 -0700
Subject: ipv4: remove static flush_delay variable

flush delay is used as an external storage for net.ipv4.route.flush sysctl
entry. It is write-only.

The ctl_table->data for this entry is used once. Fix this case to point
to the stack to remove global variable. Do this to avoid additional
variable on struct net in the next patch.

Possible race (as it was before) accessing this local variable is removed
using flush_mutex.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cedc366505b..790de32cd7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2829,14 +2829,20 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 }
 
 #ifdef CONFIG_SYSCTL
-static int flush_delay;
-
 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
+		int flush_delay;
+		static DEFINE_MUTEX(flush_mutex);
+
+		mutex_lock(&flush_mutex);
+		ctl->data = &flush_delay;
 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		ctl->data = NULL;
+		mutex_unlock(&flush_mutex);
+
 		rt_cache_flush(&init_net, flush_delay);
 		return 0;
 	}
@@ -2865,7 +2871,6 @@ ctl_table ipv4_route_table[] = {
 	{
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
 		.procname	= "flush",
-		.data		= &flush_delay,
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
-- 
cgit v1.2.3-70-g09d2


From 39a23e75087ce815abbddbd565b9a2e567ac47da Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:02:33 -0700
Subject: netns: register net.ipv4.route.flush in each namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  1 +
 net/ipv4/route.c         | 79 ++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 70 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 6ef90b5fafb..a29adf1ba0f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -18,6 +18,7 @@ struct netns_ipv4 {
 	struct ctl_table_header	*forw_hdr;
 	struct ctl_table_header	*frags_hdr;
 	struct ctl_table_header	*ipv4_hdr;
+	struct ctl_table_header *route_hdr;
 #endif
 	struct ipv4_devconf	*devconf_all;
 	struct ipv4_devconf	*devconf_dflt;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 790de32cd7d..6fe799de9b9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2835,6 +2835,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 {
 	if (write) {
 		int flush_delay;
+		struct net *net;
 		static DEFINE_MUTEX(flush_mutex);
 
 		mutex_lock(&flush_mutex);
@@ -2843,7 +2844,8 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 		ctl->data = NULL;
 		mutex_unlock(&flush_mutex);
 
-		rt_cache_flush(&init_net, flush_delay);
+		net = (struct net *)ctl->extra1;
+		rt_cache_flush(net, flush_delay);
 		return 0;
 	}
 
@@ -2859,23 +2861,17 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 						size_t newlen)
 {
 	int delay;
+	struct net *net;
 	if (newlen != sizeof(int))
 		return -EINVAL;
 	if (get_user(delay, (int __user *)newval))
 		return -EFAULT;
-	rt_cache_flush(&init_net, delay);
+	net = (struct net *)table->extra1;
+	rt_cache_flush(net, delay);
 	return 0;
 }
 
 ctl_table ipv4_route_table[] = {
-	{
-		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
-		.procname	= "flush",
-		.maxlen		= sizeof(int),
-		.mode		= 0200,
-		.proc_handler	= &ipv4_sysctl_rtcache_flush,
-		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
-	},
 	{
 		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
 		.procname	= "gc_thresh",
@@ -3014,6 +3010,66 @@ ctl_table ipv4_route_table[] = {
 	},
 	{ .ctl_name = 0 }
 };
+
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+	{ },
+};
+
+
+static struct ctl_table ipv4_route_flush_table[] = {
+	{
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{ .ctl_name = 0 },
+};
+
+static __net_init int sysctl_route_net_init(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	tbl = ipv4_route_flush_table;
+	if (net != &init_net) {
+		tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+	}
+	tbl[0].extra1 = net;
+
+	net->ipv4.route_hdr =
+		register_net_sysctl_table(net, ipv4_route_path, tbl);
+	if (net->ipv4.route_hdr == NULL)
+		goto err_reg;
+	return 0;
+
+err_reg:
+	if (tbl != ipv4_route_flush_table)
+		kfree(tbl);
+err_dup:
+	return -ENOMEM;
+}
+
+static __net_exit void sysctl_route_net_exit(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	tbl = net->ipv4.route_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->ipv4.route_hdr);
+	BUG_ON(tbl == ipv4_route_flush_table);
+	kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_route_ops = {
+	.init = sysctl_route_net_init,
+	.exit = sysctl_route_net_exit,
+};
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -3090,6 +3146,9 @@ int __init ip_rt_init(void)
 #endif
 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
 
+#ifdef CONFIG_SYSCTL
+	register_pernet_subsys(&sysctl_route_ops);
+#endif
 	return rc;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9f5e97e53675caeda48e9988122a30470f4d309d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:02:59 -0700
Subject: netns: make rt_secret_rebuild timer per namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  2 ++
 net/ipv4/route.c         | 40 ++++++++++++++++++++++++++++++----------
 2 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a29adf1ba0f..356617f81be 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -46,5 +46,7 @@ struct netns_ipv4 {
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
 	int sysctl_icmp_errors_use_inbound_ifaddr;
+
+	struct timer_list rt_secret_timer;
 };
 #endif
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6fe799de9b9..f99d9dbb772 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -132,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly	= 10 * 60 * HZ;
 
 static void rt_worker_func(struct work_struct *work);
 static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
-static struct timer_list rt_secret_timer;
 
 /*
  *	Interface to generic destination cache.
@@ -801,10 +800,11 @@ void rt_cache_flush(struct net *net, int delay)
 /*
  * We change rt_genid and let gc do the cleanup
  */
-static void rt_secret_rebuild(unsigned long dummy)
+static void rt_secret_rebuild(unsigned long __net)
 {
+	struct net *net = (struct net *)__net;
 	rt_cache_invalidate();
-	mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
+	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
 /*
@@ -3072,6 +3072,31 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 };
 #endif
 
+
+static __net_init int rt_secret_timer_init(struct net *net)
+{
+	net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
+	net->ipv4.rt_secret_timer.data = (unsigned long)net;
+	init_timer_deferrable(&net->ipv4.rt_secret_timer);
+
+	net->ipv4.rt_secret_timer.expires =
+		jiffies + net_random() % ip_rt_secret_interval +
+		ip_rt_secret_interval;
+	add_timer(&net->ipv4.rt_secret_timer);
+	return 0;
+}
+
+static __net_exit void rt_secret_timer_exit(struct net *net)
+{
+	del_timer_sync(&net->ipv4.rt_secret_timer);
+}
+
+static __net_initdata struct pernet_operations rt_secret_timer_ops = {
+	.init = rt_secret_timer_init,
+	.exit = rt_secret_timer_exit,
+};
+
+
 #ifdef CONFIG_NET_CLS_ROUTE
 struct ip_rt_acct *ip_rt_acct __read_mostly;
 #endif /* CONFIG_NET_CLS_ROUTE */
@@ -3124,19 +3149,14 @@ int __init ip_rt_init(void)
 	devinet_init();
 	ip_fib_init();
 
-	rt_secret_timer.function = rt_secret_rebuild;
-	rt_secret_timer.data = 0;
-	init_timer_deferrable(&rt_secret_timer);
-
 	/* All the timers, started at system startup tend
 	   to synchronize. Perturb it a bit.
 	 */
 	schedule_delayed_work(&expires_work,
 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
-	rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
-		ip_rt_secret_interval;
-	add_timer(&rt_secret_timer);
+	if (register_pernet_subsys(&rt_secret_timer_ops))
+		printk(KERN_ERR "Unable to setup rt_secret_timer\n");
 
 	if (ip_rt_proc_init())
 		printk(KERN_ERR "Unable to create route proc files\n");
-- 
cgit v1.2.3-70-g09d2


From 86c657f6b5bbf2f3ec2213eca528998134a9b344 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:03:31 -0700
Subject: netns: add struct net parameter to rt_cache_invalidate

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f99d9dbb772..9725223ffe9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -778,7 +778,7 @@ static void rt_worker_func(struct work_struct *work)
  * many times (2^24) without giving recent rt_genid.
  * Jenkins hash is strong enough that litle changes of rt_genid are OK.
  */
-static void rt_cache_invalidate(void)
+static void rt_cache_invalidate(struct net *net)
 {
 	unsigned char shuffle;
 
@@ -792,7 +792,7 @@ static void rt_cache_invalidate(void)
  */
 void rt_cache_flush(struct net *net, int delay)
 {
-	rt_cache_invalidate();
+	rt_cache_invalidate(net);
 	if (delay >= 0)
 		rt_do_flush(!in_softirq());
 }
@@ -803,7 +803,7 @@ void rt_cache_flush(struct net *net, int delay)
 static void rt_secret_rebuild(unsigned long __net)
 {
 	struct net *net = (struct net *)__net;
-	rt_cache_invalidate();
+	rt_cache_invalidate(net);
 	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
-- 
cgit v1.2.3-70-g09d2


From b00180defdeeac8e07e3dc02e53e7395d42bbd19 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:04:09 -0700
Subject: ipv4: pass current value of rt_genid into rt_hash

Basically, there is no difference to atomic_read internally or pass it as
a parameter as rt_hash is inline.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9725223ffe9..e4e37edbad6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -256,11 +256,12 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 #define RT_CACHE_STAT_INC(field) \
 	(__raw_get_cpu_var(rt_cache_stat).field++)
 
-static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
+		int genid)
 {
 	return jhash_3words((__force u32)(__be32)(daddr),
 			    (__force u32)(__be32)(saddr),
-			    idx, atomic_read(&rt_genid))
+			    idx, genid)
 		& rt_hash_mask;
 }
 
@@ -1180,7 +1181,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 	for (i = 0; i < 2; i++) {
 		for (k = 0; k < 2; k++) {
-			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+						atomic_read(&rt_genid));
 
 			rthp=&rt_hash_table[hash].chain;
 
@@ -1295,7 +1297,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
 			   rt->u.dst.expires) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
-						rt->fl.oif);
+						rt->fl.oif,
+						atomic_read(&rt_genid));
 #if RT_CACHE_DEBUG >= 1
 			printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
 					  NIPQUAD_FMT "/%02x dropped\n",
@@ -1444,7 +1447,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 	for (k = 0; k < 2; k++) {
 		for (i = 0; i < 2; i++) {
-			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+						atomic_read(&rt_genid));
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1709,7 +1713,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	RT_CACHE_STAT_INC(in_slow_mc);
 
 	in_dev_put(in_dev);
-	hash = rt_hash(daddr, saddr, dev->ifindex);
+	hash = rt_hash(daddr, saddr, dev->ifindex, atomic_read(&rt_genid));
 	return rt_intern_hash(hash, rth, &skb->rtable);
 
 e_nobufs:
@@ -1870,7 +1874,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 		return err;
 
 	/* put it into the cache */
-	hash = rt_hash(daddr, saddr, fl->iif);
+	hash = rt_hash(daddr, saddr, fl->iif, atomic_read(&rt_genid));
 	return rt_intern_hash(hash, rth, &skb->rtable);
 }
 
@@ -2026,7 +2030,7 @@ local_input:
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
-	hash = rt_hash(daddr, saddr, fl.iif);
+	hash = rt_hash(daddr, saddr, fl.iif, atomic_read(&rt_genid));
 	err = rt_intern_hash(hash, rth, &skb->rtable);
 	goto done;
 
@@ -2077,7 +2081,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	net = dev_net(dev);
 	tos &= IPTOS_RT_MASK;
-	hash = rt_hash(daddr, saddr, iif);
+	hash = rt_hash(daddr, saddr, iif, atomic_read(&rt_genid));
 
 	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2266,7 +2270,8 @@ static int ip_mkroute_output(struct rtable **rp,
 	int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
 	unsigned hash;
 	if (err == 0) {
-		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
+		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+			       atomic_read(&rt_genid));
 		err = rt_intern_hash(hash, rth, rp);
 	}
 
@@ -2478,7 +2483,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	unsigned hash;
 	struct rtable *rth;
 
-	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
+	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif,
+		       atomic_read(&rt_genid));
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-- 
cgit v1.2.3-70-g09d2


From e84f84f276473dcc673f360e8ff3203148bdf0e2 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:04:32 -0700
Subject: netns: place rt_genid into struct net

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  1 +
 net/ipv4/route.c         | 76 +++++++++++++++++++++++++++---------------------
 2 files changed, 44 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 356617f81be..a6ed83853dc 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -48,5 +48,6 @@ struct netns_ipv4 {
 	int sysctl_icmp_errors_use_inbound_ifaddr;
 
 	struct timer_list rt_secret_timer;
+	atomic_t rt_genid;
 };
 #endif
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4e37edbad6..67c3ed772c2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -250,7 +250,6 @@ static inline void rt_hash_lock_init(void)
 static struct rt_hash_bucket 	*rt_hash_table __read_mostly;
 static unsigned			rt_hash_mask __read_mostly;
 static unsigned int		rt_hash_log  __read_mostly;
-static atomic_t			rt_genid __read_mostly;
 
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 #define RT_CACHE_STAT_INC(field) \
@@ -265,6 +264,11 @@ static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
 		& rt_hash_mask;
 }
 
+static inline int rt_genid(struct net *net)
+{
+	return atomic_read(&net->ipv4.rt_genid);
+}
+
 #ifdef CONFIG_PROC_FS
 struct rt_cache_iter_state {
 	struct seq_net_private p;
@@ -334,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 	struct rt_cache_iter_state *st = seq->private;
 	if (*pos)
 		return rt_cache_get_idx(seq, *pos - 1);
-	st->genid = atomic_read(&rt_genid);
+	st->genid = rt_genid(seq_file_net(seq));
 	return SEQ_START_TOKEN;
 }
 
@@ -681,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
 	return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
 }
 
+static inline int rt_is_expired(struct rtable *rth)
+{
+	return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+}
+
 /*
  * Perform a full scan of hash table and free all entries.
  * Can be called by a softirq or a process.
@@ -736,7 +745,7 @@ static void rt_check_expire(void)
 			continue;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
-			if (rth->rt_genid != atomic_read(&rt_genid)) {
+			if (rt_is_expired(rth)) {
 				*rthp = rth->u.dst.rt_next;
 				rt_free(rth);
 				continue;
@@ -784,7 +793,7 @@ static void rt_cache_invalidate(struct net *net)
 	unsigned char shuffle;
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
-	atomic_add(shuffle + 1U, &rt_genid);
+	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 }
 
 /*
@@ -881,7 +890,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 			rthp = &rt_hash_table[k].chain;
 			spin_lock_bh(rt_hash_lock_addr(k));
 			while ((rth = *rthp) != NULL) {
-				if (rth->rt_genid == atomic_read(&rt_genid) &&
+				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
 					rthp = &rth->u.dst.rt_next;
@@ -963,7 +972,7 @@ restart:
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
-		if (rth->rt_genid != atomic_read(&rt_genid)) {
+		if (rt_is_expired(rth)) {
 			*rthp = rth->u.dst.rt_next;
 			rt_free(rth);
 			continue;
@@ -1139,7 +1148,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	ip_rt_put(rt);
 	while ((aux = *rthp) != NULL) {
-		if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
+		if (aux == rt || rt_is_expired(aux)) {
 			*rthp = aux->u.dst.rt_next;
 			rt_free(aux);
 			continue;
@@ -1182,7 +1191,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 	for (i = 0; i < 2; i++) {
 		for (k = 0; k < 2; k++) {
 			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
-						atomic_read(&rt_genid));
+						rt_genid(net));
 
 			rthp=&rt_hash_table[hash].chain;
 
@@ -1194,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
-				    rth->rt_genid != atomic_read(&rt_genid) ||
+				    rt_is_expired(rth) ||
 				    !net_eq(dev_net(rth->u.dst.dev), net)) {
 					rthp = &rth->u.dst.rt_next;
 					continue;
@@ -1233,7 +1242,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
 				rt->u.dst.xfrm		= NULL;
-				rt->rt_genid		= atomic_read(&rt_genid);
+				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
 				/* Gateway is different ... */
@@ -1298,7 +1307,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			   rt->u.dst.expires) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
-						atomic_read(&rt_genid));
+						rt_genid(dev_net(dst->dev)));
 #if RT_CACHE_DEBUG >= 1
 			printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
 					  NIPQUAD_FMT "/%02x dropped\n",
@@ -1448,7 +1457,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 	for (k = 0; k < 2; k++) {
 		for (i = 0; i < 2; i++) {
 			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
-						atomic_read(&rt_genid));
+						rt_genid(net));
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1463,7 +1472,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->fl.iif != 0 ||
 				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
 				    !net_eq(dev_net(rth->u.dst.dev), net) ||
-				    rth->rt_genid != atomic_read(&rt_genid))
+				    !rt_is_expired(rth))
 					continue;
 
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1698,7 +1707,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
-	rth->rt_genid	= atomic_read(&rt_genid);
+	rth->rt_genid	= rt_genid(dev_net(dev));
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	if (our) {
@@ -1713,7 +1722,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	RT_CACHE_STAT_INC(in_slow_mc);
 
 	in_dev_put(in_dev);
-	hash = rt_hash(daddr, saddr, dev->ifindex, atomic_read(&rt_genid));
+	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
 	return rt_intern_hash(hash, rth, &skb->rtable);
 
 e_nobufs:
@@ -1839,7 +1848,7 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	rth->u.dst.input = ip_forward;
 	rth->u.dst.output = ip_output;
-	rth->rt_genid = atomic_read(&rt_genid);
+	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
 
 	rt_set_nexthop(rth, res, itag);
 
@@ -1874,7 +1883,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
 		return err;
 
 	/* put it into the cache */
-	hash = rt_hash(daddr, saddr, fl->iif, atomic_read(&rt_genid));
+	hash = rt_hash(daddr, saddr, fl->iif,
+		       rt_genid(dev_net(rth->u.dst.dev)));
 	return rt_intern_hash(hash, rth, &skb->rtable);
 }
 
@@ -2000,7 +2010,7 @@ local_input:
 		goto e_nobufs;
 
 	rth->u.dst.output= ip_rt_bug;
-	rth->rt_genid = atomic_read(&rt_genid);
+	rth->rt_genid = rt_genid(net);
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
@@ -2030,7 +2040,7 @@ local_input:
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
-	hash = rt_hash(daddr, saddr, fl.iif, atomic_read(&rt_genid));
+	hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
 	err = rt_intern_hash(hash, rth, &skb->rtable);
 	goto done;
 
@@ -2081,7 +2091,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	net = dev_net(dev);
 	tos &= IPTOS_RT_MASK;
-	hash = rt_hash(daddr, saddr, iif, atomic_read(&rt_genid));
+	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
 	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2093,7 +2103,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
 		    rth->fl.mark == skb->mark &&
 		    net_eq(dev_net(rth->u.dst.dev), net) &&
-		    rth->rt_genid == atomic_read(&rt_genid)) {
+		    !rt_is_expired(rth)) {
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2221,7 +2231,7 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_spec_dst= fl->fl4_src;
 
 	rth->u.dst.output=ip_output;
-	rth->rt_genid = atomic_read(&rt_genid);
+	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2271,7 +2281,7 @@ static int ip_mkroute_output(struct rtable **rp,
 	unsigned hash;
 	if (err == 0) {
 		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
-			       atomic_read(&rt_genid));
+			       rt_genid(dev_net(dev_out)));
 		err = rt_intern_hash(hash, rth, rp);
 	}
 
@@ -2483,8 +2493,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	unsigned hash;
 	struct rtable *rth;
 
-	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif,
-		       atomic_read(&rt_genid));
+	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2497,7 +2506,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
 		    net_eq(dev_net(rth->u.dst.dev), net) &&
-		    rth->rt_genid == atomic_read(&rt_genid)) {
+		    !rt_is_expired(rth)) {
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
@@ -2528,7 +2537,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 };
 
 
-static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
+static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
 {
 	struct rtable *ort = *rp;
 	struct rtable *rt = (struct rtable *)
@@ -2552,7 +2561,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
 		rt->idev = ort->idev;
 		if (rt->idev)
 			in_dev_hold(rt->idev);
-		rt->rt_genid = atomic_read(&rt_genid);
+		rt->rt_genid = rt_genid(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
 		rt->rt_dst = ort->rt_dst;
@@ -2588,7 +2597,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
 		err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
 				    flags ? XFRM_LOOKUP_WAIT : 0);
 		if (err == -EREMOTE)
-			err = ipv4_dst_blackhole(rp, flp);
+			err = ipv4_dst_blackhole(net, rp, flp);
 
 		return err;
 	}
@@ -2807,7 +2816,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
 			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
 				continue;
-			if (rt->rt_genid != atomic_read(&rt_genid))
+			if (rt_is_expired(rt))
 				continue;
 			skb->dst = dst_clone(&rt->u.dst);
 			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -3081,6 +3090,10 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 
 static __net_init int rt_secret_timer_init(struct net *net)
 {
+	atomic_set(&net->ipv4.rt_genid,
+			(int) ((num_physpages ^ (num_physpages>>8)) ^
+			(jiffies ^ (jiffies >> 7))));
+
 	net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
 	net->ipv4.rt_secret_timer.data = (unsigned long)net;
 	init_timer_deferrable(&net->ipv4.rt_secret_timer);
@@ -3121,9 +3134,6 @@ int __init ip_rt_init(void)
 {
 	int rc = 0;
 
-	atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
-			     (jiffies ^ (jiffies >> 7))));
-
 #ifdef CONFIG_NET_CLS_ROUTE
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
 	if (!ip_rt_acct)
-- 
cgit v1.2.3-70-g09d2


From 32cb5b4e035e3d7b52f1e9de87920645a00e5234 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 5 Jul 2008 19:06:12 -0700
Subject: netns: selective flush of rt_cache

dst cache is marked as expired on the per/namespace basis by previous
path. Right now we have to implement selective cache shrinking. This
procedure has been ported from older OpenVz codebase.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 67c3ed772c2..113cd2512ba 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -699,6 +699,7 @@ static void rt_do_flush(int process_context)
 {
 	unsigned int i;
 	struct rtable *rth, *next;
+	struct rtable * tail;
 
 	for (i = 0; i <= rt_hash_mask; i++) {
 		if (process_context && need_resched())
@@ -708,11 +709,39 @@ static void rt_do_flush(int process_context)
 			continue;
 
 		spin_lock_bh(rt_hash_lock_addr(i));
+#ifdef CONFIG_NET_NS
+		{
+		struct rtable ** prev, * p;
+
+		rth = rt_hash_table[i].chain;
+
+		/* defer releasing the head of the list after spin_unlock */
+		for (tail = rth; tail; tail = tail->u.dst.rt_next)
+			if (!rt_is_expired(tail))
+				break;
+		if (rth != tail)
+			rt_hash_table[i].chain = tail;
+
+		/* call rt_free on entries after the tail requiring flush */
+		prev = &rt_hash_table[i].chain;
+		for (p = *prev; p; p = next) {
+			next = p->u.dst.rt_next;
+			if (!rt_is_expired(p)) {
+				prev = &p->u.dst.rt_next;
+			} else {
+				*prev = next;
+				rt_free(p);
+			}
+		}
+		}
+#else
 		rth = rt_hash_table[i].chain;
 		rt_hash_table[i].chain = NULL;
+		tail = NULL;
+#endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth; rth = next) {
+		for (; rth != tail; rth = next) {
 			next = rth->u.dst.rt_next;
 			rt_free(rth);
 		}
-- 
cgit v1.2.3-70-g09d2


From 629ca23c331ec75ac87b016debbb3c4d2fe62650 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 5 Jul 2008 21:18:07 -0700
Subject: MIB: add struct net to UDP_INC_STATS_USER

Nothing special - all the places already have a struct sock
at hands, so use the sock_net() net.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  2 +-
 net/ipv4/udp.c    | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 7a868485524..13319094b13 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -158,7 +158,7 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 /*
  * 	SNMP statistics for UDP and UDP-Lite
  */
-#define UDP_INC_STATS_USER(field, is_udplite)			       do {   \
+#define UDP_INC_STATS_USER(net, field, is_udplite)	      do { (void)net; \
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field);       \
 	else		SNMP_INC_STATS_USER(udp_statistics, field);  }  while(0)
 #define UDP_INC_STATS_BH(field, is_udplite) 			       do  {  \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3bbf6fb6e4f..c97da5394d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -526,7 +526,8 @@ out:
 	up->len = 0;
 	up->pending = 0;
 	if (!err)
-		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+		UDP_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_OUTDATAGRAMS, is_udplite);
 	return err;
 }
 
@@ -725,7 +726,8 @@ out:
 	 * seems like overkill.
 	 */
 	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
+		UDP_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_SNDBUFERRORS, is_udplite);
 	}
 	return err;
 
@@ -888,7 +890,8 @@ try_again:
 		goto out_free;
 
 	if (!peeked)
-		UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+		UDP_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_INDATAGRAMS, is_udplite);
 
 	sock_recv_timestamp(msg, sk, skb);
 
@@ -917,7 +920,7 @@ out:
 csum_copy_err:
 	lock_sock(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
-		UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	release_sock(sk);
 
 	if (noblock)
-- 
cgit v1.2.3-70-g09d2


From 0283328e2360bbf3081e97f1b720dd4c4dbae111 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 5 Jul 2008 21:18:48 -0700
Subject: MIB: add struct net to UDP_INC_STATS_BH

Two special cases here - one is rxrpc - I put init_net there
explicitly, since we haven't touched this part yet. The second
place is in __udp4_lib_rcv - we already have a struct net there,
but I have to move its initialization above to make it ready
at the "drop" label.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h    |  6 +++---
 net/ipv4/udp.c       | 18 ++++++++++--------
 net/rxrpc/ar-input.c |  5 +++--
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 13319094b13..7b18cfb2fe0 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -161,7 +161,7 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 #define UDP_INC_STATS_USER(net, field, is_udplite)	      do { (void)net; \
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field);       \
 	else		SNMP_INC_STATS_USER(udp_statistics, field);  }  while(0)
-#define UDP_INC_STATS_BH(field, is_udplite) 			       do  {  \
+#define UDP_INC_STATS_BH(net, field, is_udplite) 	      do { (void)net; \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field);         \
 	else		SNMP_INC_STATS_BH(udp_statistics, field);    }  while(0)
 
@@ -176,12 +176,12 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 #define UDPX_INC_STATS_BH(sk, field) \
 	do { \
 		if ((sk)->sk_family == AF_INET) \
-			UDP_INC_STATS_BH(field, 0); \
+			UDP_INC_STATS_BH(sock_net(sk), field, 0); \
 		else \
 			UDP6_INC_STATS_BH(field, 0); \
 	} while (0);
 #else
-#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(field, 0)
+#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0)
 #endif
 
 /* /proc */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c97da5394d7..7187121e922 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -991,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
 			ret = (*up->encap_rcv)(sk, skb);
 			if (ret <= 0) {
-				UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
+				UDP_INC_STATS_BH(sock_net(sk),
+						 UDP_MIB_INDATAGRAMS,
 						 is_udplite);
 				return -ret;
 			}
@@ -1044,7 +1045,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM) {
-			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+			UDP_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_RCVBUFERRORS, is_udplite);
 			atomic_inc(&sk->sk_drops);
 		}
 		goto drop;
@@ -1053,7 +1055,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	return 0;
 
 drop:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	kfree_skb(skb);
 	return -1;
 }
@@ -1161,7 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	struct rtable *rt = (struct rtable*)skb->dst;
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
-	struct net *net;
+	struct net *net = dev_net(skb->dev);
 
 	/*
 	 *  Validate the packet.
@@ -1183,7 +1185,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
-	net = dev_net(skb->dev);
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(net, skb, uh,
 				saddr, daddr, udptable);
@@ -1217,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp_lib_checksum_complete(skb))
 		goto csum_error;
 
-	UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+	UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
 	/*
@@ -1251,7 +1252,7 @@ csum_error:
 		       ntohs(uh->dest),
 		       ulen);
 drop:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
 	return 0;
 }
@@ -1458,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		spin_lock_bh(&rcvq->lock);
 		while ((skb = skb_peek(rcvq)) != NULL &&
 		       udp_lib_checksum_complete(skb)) {
-			UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+			UDP_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_INERRORS, is_lite);
 			__skb_unlink(skb, rcvq);
 			kfree_skb(skb);
 		}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index f8a699e9296..f98c8027e5c 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -21,6 +21,7 @@
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include <net/udp.h>
+#include <net/net_namespace.h>
 #include "ar-internal.h"
 
 unsigned long rxrpc_ack_timeout = 1;
@@ -708,12 +709,12 @@ void rxrpc_data_ready(struct sock *sk, int count)
 	if (skb_checksum_complete(skb)) {
 		rxrpc_free_skb(skb);
 		rxrpc_put_local(local);
-		UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0);
+		UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0);
 		_leave(" [CSUM failed]");
 		return;
 	}
 
-	UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0);
+	UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0);
 
 	/* the socket buffer we have is owned by UDP, with UDP's data all over
 	 * it, but we really want our own */
-- 
cgit v1.2.3-70-g09d2


From 235b9f7ac53489011d32efeb89e12e308fdd2c64 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 5 Jul 2008 21:19:20 -0700
Subject: MIB: add struct net to UDP6_INC_STATS_USER

As simple as the patch #1 in this set.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  2 +-
 net/ipv6/udp.c    | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 7b18cfb2fe0..bb5b9ec8563 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -168,7 +168,7 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 #define UDP6_INC_STATS_BH(field, is_udplite) 			      do  {  \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
 	else		SNMP_INC_STATS_BH(udp_stats_in6, field);    } while(0)
-#define UDP6_INC_STATS_USER(field, is_udplite)			       do {    \
+#define UDP6_INC_STATS_USER(net, field, is_udplite)	    do { (void)net;    \
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field);         \
 	else		SNMP_INC_STATS_USER(udp_stats_in6, field);    } while(0)
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f91e1df0d25..833f715e4bf 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -166,7 +166,8 @@ try_again:
 		goto out_free;
 
 	if (!peeked)
-		UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+		UDP6_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_INDATAGRAMS, is_udplite);
 
 	sock_recv_timestamp(msg, sk, skb);
 
@@ -213,7 +214,7 @@ out:
 csum_copy_err:
 	lock_sock(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
-		UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+		UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	release_sock(sk);
 
 	if (flags & MSG_DONTWAIT)
@@ -591,7 +592,8 @@ out:
 	up->len = 0;
 	up->pending = 0;
 	if (!err)
-		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+		UDP6_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_OUTDATAGRAMS, is_udplite);
 	return err;
 }
 
@@ -873,7 +875,8 @@ out:
 	 * seems like overkill.
 	 */
 	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-		UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
+		UDP6_INC_STATS_USER(sock_net(sk),
+				UDP_MIB_SNDBUFERRORS, is_udplite);
 	}
 	return err;
 
-- 
cgit v1.2.3-70-g09d2


From ef28d1a20f9f18ebf1be15ef6f097a76f9a63499 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 5 Jul 2008 21:19:40 -0700
Subject: MIB: add struct net to UDP6_INC_STATS_BH

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  4 ++--
 net/ipv6/udp.c    | 13 +++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/udp.h b/include/net/udp.h
index bb5b9ec8563..3e551592aa7 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -165,7 +165,7 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field);         \
 	else		SNMP_INC_STATS_BH(udp_statistics, field);    }  while(0)
 
-#define UDP6_INC_STATS_BH(field, is_udplite) 			      do  {  \
+#define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { (void)net;  \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
 	else		SNMP_INC_STATS_BH(udp_stats_in6, field);    } while(0)
 #define UDP6_INC_STATS_USER(net, field, is_udplite)	    do { (void)net;    \
@@ -178,7 +178,7 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 		if ((sk)->sk_family == AF_INET) \
 			UDP_INC_STATS_BH(sock_net(sk), field, 0); \
 		else \
-			UDP6_INC_STATS_BH(field, 0); \
+			UDP6_INC_STATS_BH(sock_net(sk), field, 0); \
 	} while (0);
 #else
 #define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 833f715e4bf..d1477b350f7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -299,7 +299,8 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM) {
-			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+			UDP6_INC_STATS_BH(sock_net(sk),
+					UDP_MIB_RCVBUFERRORS, is_udplite);
 			atomic_inc(&sk->sk_drops);
 		}
 		goto drop;
@@ -307,7 +308,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 
 	return 0;
 drop:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	kfree_skb(skb);
 	return -1;
 }
@@ -439,7 +440,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	struct net_device *dev = skb->dev;
 	struct in6_addr *saddr, *daddr;
 	u32 ulen = 0;
-	struct net *net;
+	struct net *net = dev_net(skb->dev);
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
@@ -474,7 +475,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp6_csum_init(skb, uh, proto))
 		goto discard;
 
-	net = dev_net(skb->dev);
 	/*
 	 *	Multicast receive code
 	 */
@@ -497,7 +497,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 		if (udp_lib_checksum_complete(skb))
 			goto discard;
-		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
+				proto == IPPROTO_UDPLITE);
 
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
 
@@ -522,7 +523,7 @@ short_packet:
 		       ulen, skb->len);
 
 discard:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From a19800d704177caaa5874baf5819307c5b7d5e4f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:25:39 -0700
Subject: net: Add STP demux layer

Add small STP demux layer for demuxing STP PDUs based on MAC address.
This is needed to run both GARP and STP in parallel (or even load the
modules) since both use LLC_SAP_BSPAN.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/stp.h |  14 ++++++++
 net/802/Kconfig   |   3 ++
 net/802/Makefile  |   1 +
 net/802/stp.c     | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig       |   1 +
 5 files changed, 121 insertions(+)
 create mode 100644 include/net/stp.h
 create mode 100644 net/802/Kconfig
 create mode 100644 net/802/stp.c

(limited to 'net')

diff --git a/include/net/stp.h b/include/net/stp.h
new file mode 100644
index 00000000000..ad447f10541
--- /dev/null
+++ b/include/net/stp.h
@@ -0,0 +1,14 @@
+#ifndef _NET_STP_H
+#define _NET_STP_H
+
+struct stp_proto {
+	unsigned char	group_address[ETH_ALEN];
+	void		(*rcv)(const struct stp_proto *, struct sk_buff *,
+			       struct net_device *);
+	void		*data;
+};
+
+extern int stp_proto_register(const struct stp_proto *proto);
+extern void stp_proto_unregister(const struct stp_proto *proto);
+
+#endif /* _NET_STP_H */
diff --git a/net/802/Kconfig b/net/802/Kconfig
new file mode 100644
index 00000000000..01cb0943626
--- /dev/null
+++ b/net/802/Kconfig
@@ -0,0 +1,3 @@
+config STP
+	tristate
+	select LLC
diff --git a/net/802/Makefile b/net/802/Makefile
index 68569ffddea..c441d895ac2 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_FDDI)	+=                 fddi.o
 obj-$(CONFIG_HIPPI)	+=                 hippi.o
 obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
+obj-$(CONFIG_STP)	+= stp.o
diff --git a/net/802/stp.c b/net/802/stp.c
new file mode 100644
index 00000000000..0b7a24452d1
--- /dev/null
+++ b/net/802/stp.c
@@ -0,0 +1,102 @@
+/*
+ *	STP SAP demux
+ *
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/llc.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/stp.h>
+
+/* 01:80:c2:00:00:20 - 01:80:c2:00:00:2F */
+#define GARP_ADDR_MIN	0x20
+#define GARP_ADDR_MAX	0x2F
+#define GARP_ADDR_RANGE	(GARP_ADDR_MAX - GARP_ADDR_MIN)
+
+static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
+static const struct stp_proto *stp_proto __read_mostly;
+
+static struct llc_sap *sap __read_mostly;
+static unsigned int sap_registered;
+static DEFINE_MUTEX(stp_proto_mutex);
+
+/* Called under rcu_read_lock from LLC */
+static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *pt, struct net_device *orig_dev)
+{
+	const struct ethhdr *eh = eth_hdr(skb);
+	const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+	const struct stp_proto *proto;
+
+	if (pdu->ssap != LLC_SAP_BSPAN ||
+	    pdu->dsap != LLC_SAP_BSPAN ||
+	    pdu->ctrl_1 != LLC_PDU_TYPE_U)
+		goto err;
+
+	if (eh->h_dest[5] >= GARP_ADDR_MIN && eh->h_dest[5] <= GARP_ADDR_MAX) {
+		proto = rcu_dereference(garp_protos[eh->h_dest[5] -
+						    GARP_ADDR_MIN]);
+		if (proto &&
+		    compare_ether_addr(eh->h_dest, proto->group_address))
+			goto err;
+	} else
+		proto = rcu_dereference(stp_proto);
+
+	if (!proto)
+		goto err;
+
+	proto->rcv(proto, skb, dev);
+	return 0;
+
+err:
+	kfree_skb(skb);
+	return 0;
+}
+
+int stp_proto_register(const struct stp_proto *proto)
+{
+	int err = 0;
+
+	mutex_lock(&stp_proto_mutex);
+	if (sap_registered++ == 0) {
+		sap = llc_sap_open(LLC_SAP_BSPAN, stp_pdu_rcv);
+		if (!sap) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+	if (is_zero_ether_addr(proto->group_address))
+		rcu_assign_pointer(stp_proto, proto);
+	else
+		rcu_assign_pointer(garp_protos[proto->group_address[5] -
+					       GARP_ADDR_MIN], proto);
+out:
+	mutex_unlock(&stp_proto_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(stp_proto_register);
+
+void stp_proto_unregister(const struct stp_proto *proto)
+{
+	mutex_lock(&stp_proto_mutex);
+	if (is_zero_ether_addr(proto->group_address))
+		rcu_assign_pointer(stp_proto, NULL);
+	else
+		rcu_assign_pointer(garp_protos[proto->group_address[5] -
+					       GARP_ADDR_MIN], NULL);
+	synchronize_rcu();
+
+	if (--sap_registered == 0)
+		llc_sap_put(sap);
+	mutex_unlock(&stp_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(stp_proto_unregister);
+
+MODULE_LICENSE("GPL");
diff --git a/net/Kconfig b/net/Kconfig
index acbf7c60e89..b9866875174 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -181,6 +181,7 @@ source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
 source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
+source "net/802/Kconfig"
 source "net/bridge/Kconfig"
 source "net/8021q/Kconfig"
 source "net/decnet/Kconfig"
-- 
cgit v1.2.3-70-g09d2


From 7c85fbf0657f216557b0c9c4a2e4e07f37d8bb8c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:25:56 -0700
Subject: bridge: Use STP demux

Use the STP demux layer for receiving STP PDUs instead of directly
registering with LLC.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig       |  1 +
 net/bridge/br.c          | 16 +++++++++-------
 net/bridge/br_private.h  |  5 +++--
 net/bridge/br_stp_bpdu.c | 12 +++---------
 4 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 12265aff709..e143ca67888 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -5,6 +5,7 @@
 config BRIDGE
 	tristate "802.1d Ethernet Bridging"
 	select LLC
+	select STP
 	---help---
 	  If you say Y here, then your Linux box will be able to act as an
 	  Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br.c b/net/bridge/br.c
index cede010f4dd..573acdf6f9f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -18,21 +18,24 @@
 #include <linux/init.h>
 #include <linux/llc.h>
 #include <net/llc.h>
+#include <net/stp.h>
 
 #include "br_private.h"
 
 int (*br_should_route_hook)(struct sk_buff *skb);
 
-static struct llc_sap *br_stp_sap;
+static const struct stp_proto br_stp_proto = {
+	.rcv	= br_stp_rcv,
+};
 
 static int __init br_init(void)
 {
 	int err;
 
-	br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv);
-	if (!br_stp_sap) {
+	err = stp_proto_register(&br_stp_proto);
+	if (err < 0) {
 		printk(KERN_ERR "bridge: can't register sap for STP\n");
-		return -EADDRINUSE;
+		return err;
 	}
 
 	err = br_fdb_init();
@@ -65,13 +68,13 @@ err_out2:
 err_out1:
 	br_fdb_fini();
 err_out:
-	llc_sap_put(br_stp_sap);
+	stp_proto_unregister(&br_stp_proto);
 	return err;
 }
 
 static void __exit br_deinit(void)
 {
-	rcu_assign_pointer(br_stp_sap->rcv_func, NULL);
+	stp_proto_unregister(&br_stp_proto);
 
 	br_netlink_fini();
 	unregister_netdevice_notifier(&br_device_notifier);
@@ -82,7 +85,6 @@ static void __exit br_deinit(void)
 	synchronize_net();
 
 	br_netfilter_fini();
-	llc_sap_put(br_stp_sap);
 	br_fdb_get_hook = NULL;
 	br_fdb_put_hook = NULL;
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8593c9f6a30..815ed38925b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -226,8 +226,9 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p,
 extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
 
 /* br_stp_bpdu.c */
-extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
-		      struct packet_type *pt, struct net_device *orig_dev);
+struct stp_proto;
+extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+		       struct net_device *dev);
 
 /* br_stp_timer.c */
 extern void br_stp_timer_init(struct net_bridge *br);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 9dc2de65696..99647617451 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -18,6 +18,7 @@
 #include <net/net_namespace.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
+#include <net/stp.h>
 #include <asm/unaligned.h>
 
 #include "br_private.h"
@@ -131,10 +132,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
  *
  * NO locks, but rcu_read_lock (preempt_disabled)
  */
-int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
-	       struct packet_type *pt, struct net_device *orig_dev)
+void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+		struct net_device *dev)
 {
-	const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p = rcu_dereference(dev->br_port);
 	struct net_bridge *br;
@@ -146,11 +146,6 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!p)
 		goto err;
 
-	if (pdu->ssap != LLC_SAP_BSPAN
-	    || pdu->dsap != LLC_SAP_BSPAN
-	    || pdu->ctrl_1 != LLC_PDU_TYPE_U)
-		goto err;
-
 	if (!pskb_may_pull(skb, 4))
 		goto err;
 
@@ -224,5 +219,4 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
 	spin_unlock(&br->lock);
  err:
 	kfree_skb(skb);
-	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From eca9ebac651f774d8b10fce7c5d173c3c3d3394f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:26:13 -0700
Subject: net: Add GARP applicant-only participant

Add an implementation of the GARP (Generic Attribute Registration Protocol)
applicant-only participant. This will be used by the following patch to
add GVRP support to the VLAN code.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 include/net/garp.h        | 127 ++++++++++
 net/802/Kconfig           |   4 +
 net/802/Makefile          |   1 +
 net/802/garp.c            | 633 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 767 insertions(+)
 create mode 100644 include/net/garp.h
 create mode 100644 net/802/garp.c

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 56dadb528f6..e009c6fbf5c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -740,6 +740,8 @@ struct net_device
 	struct net_bridge_port	*br_port;
 	/* macvlan */
 	struct macvlan_port	*macvlan_port;
+	/* GARP */
+	struct garp_port	*garp_port;
 
 	/* class/net/name entry */
 	struct device		dev;
diff --git a/include/net/garp.h b/include/net/garp.h
new file mode 100644
index 00000000000..73c772395f5
--- /dev/null
+++ b/include/net/garp.h
@@ -0,0 +1,127 @@
+#ifndef _NET_GARP_H
+#define _NET_GARP_H
+
+#include <net/stp.h>
+
+#define GARP_PROTOCOL_ID	0x1
+#define GARP_END_MARK		0x0
+
+struct garp_pdu_hdr {
+	__be16	protocol;
+};
+
+struct garp_msg_hdr {
+	u8	attrtype;
+};
+
+enum garp_attr_event {
+	GARP_LEAVE_ALL,
+	GARP_JOIN_EMPTY,
+	GARP_JOIN_IN,
+	GARP_LEAVE_EMPTY,
+	GARP_LEAVE_IN,
+	GARP_EMPTY,
+};
+
+struct garp_attr_hdr {
+	u8	len;
+	u8	event;
+	u8	data[];
+};
+
+struct garp_skb_cb {
+	u8	cur_type;
+};
+
+static inline struct garp_skb_cb *garp_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct garp_skb_cb) >
+		     FIELD_SIZEOF(struct sk_buff, cb));
+	return (struct garp_skb_cb *)skb->cb;
+}
+
+enum garp_applicant_state {
+	GARP_APPLICANT_INVALID,
+	GARP_APPLICANT_VA,
+	GARP_APPLICANT_AA,
+	GARP_APPLICANT_QA,
+	GARP_APPLICANT_LA,
+	GARP_APPLICANT_VP,
+	GARP_APPLICANT_AP,
+	GARP_APPLICANT_QP,
+	GARP_APPLICANT_VO,
+	GARP_APPLICANT_AO,
+	GARP_APPLICANT_QO,
+	__GARP_APPLICANT_MAX
+};
+#define GARP_APPLICANT_MAX	(__GARP_APPLICANT_MAX - 1)
+
+enum garp_event {
+	GARP_EVENT_REQ_JOIN,
+	GARP_EVENT_REQ_LEAVE,
+	GARP_EVENT_R_JOIN_IN,
+	GARP_EVENT_R_JOIN_EMPTY,
+	GARP_EVENT_R_EMPTY,
+	GARP_EVENT_R_LEAVE_IN,
+	GARP_EVENT_R_LEAVE_EMPTY,
+	GARP_EVENT_TRANSMIT_PDU,
+	__GARP_EVENT_MAX
+};
+#define GARP_EVENT_MAX		(__GARP_EVENT_MAX - 1)
+
+enum garp_action {
+	GARP_ACTION_NONE,
+	GARP_ACTION_S_JOIN_IN,
+	GARP_ACTION_S_LEAVE_EMPTY,
+};
+
+struct garp_attr {
+	struct rb_node			node;
+	enum garp_applicant_state	state;
+	u8				type;
+	u8				dlen;
+	unsigned char			data[];
+};
+
+enum garp_applications {
+	__GARP_APPLICATION_MAX
+};
+#define GARP_APPLICATION_MAX	(__GARP_APPLICATION_MAX - 1)
+
+struct garp_application {
+	enum garp_applications	type;
+	unsigned int		maxattr;
+	struct stp_proto	proto;
+};
+
+struct garp_applicant {
+	struct garp_application	*app;
+	struct net_device	*dev;
+	struct timer_list	join_timer;
+
+	spinlock_t		lock;
+	struct sk_buff_head	queue;
+	struct sk_buff		*pdu;
+	struct rb_root		gid;
+};
+
+struct garp_port {
+	struct garp_applicant	*applicants[GARP_APPLICATION_MAX + 1];
+};
+
+extern int	garp_register_application(struct garp_application *app);
+extern void	garp_unregister_application(struct garp_application *app);
+
+extern int	garp_init_applicant(struct net_device *dev,
+				    struct garp_application *app);
+extern void	garp_uninit_applicant(struct net_device *dev,
+				      struct garp_application *app);
+
+extern int	garp_request_join(const struct net_device *dev,
+				  const struct garp_application *app,
+				  const void *data, u8 len, u8 type);
+extern void	garp_request_leave(const struct net_device *dev,
+				   const struct garp_application *app,
+				   const void *data, u8 len, u8 type);
+
+#endif /* _NET_GARP_H */
diff --git a/net/802/Kconfig b/net/802/Kconfig
index 01cb0943626..be33d27c8e6 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -1,3 +1,7 @@
 config STP
 	tristate
 	select LLC
+
+config GARP
+	tristate
+	select STP
diff --git a/net/802/Makefile b/net/802/Makefile
index c441d895ac2..7893d679910 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_HIPPI)	+=                 hippi.o
 obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
+obj-$(CONFIG_GARP)	+= garp.o
diff --git a/net/802/garp.c b/net/802/garp.c
new file mode 100644
index 00000000000..3b78f7b74fd
--- /dev/null
+++ b/net/802/garp.c
@@ -0,0 +1,633 @@
+/*
+ *	IEEE 802.1D Generic Attribute Registration Protocol (GARP)
+ *
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/llc.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/garp.h>
+#include <asm/unaligned.h>
+
+static unsigned int garp_join_time __read_mostly = 200;
+module_param(garp_join_time, uint, 0644);
+MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const struct garp_state_trans {
+	u8	state;
+	u8	action;
+} garp_applicant_state_table[GARP_APPLICANT_MAX + 1][GARP_EVENT_MAX + 1] = {
+	[GARP_APPLICANT_VA] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_AA,
+						    .action = GARP_ACTION_S_JOIN_IN },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_AA },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_LA },
+	},
+	[GARP_APPLICANT_AA] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_QA,
+						    .action = GARP_ACTION_S_JOIN_IN },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QA },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_LA },
+	},
+	[GARP_APPLICANT_QA] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QA },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_LA },
+	},
+	[GARP_APPLICANT_LA] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_VO,
+						    .action = GARP_ACTION_S_LEAVE_EMPTY },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_LA },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_LA },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_LA },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_VA },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_INVALID },
+	},
+	[GARP_APPLICANT_VP] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_AA,
+						    .action = GARP_ACTION_S_JOIN_IN },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_AP },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_VO },
+	},
+	[GARP_APPLICANT_AP] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_QA,
+						    .action = GARP_ACTION_S_JOIN_IN },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QP },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_AO },
+	},
+	[GARP_APPLICANT_QP] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QP },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_QO },
+	},
+	[GARP_APPLICANT_VO] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_AO },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_VP },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_INVALID },
+	},
+	[GARP_APPLICANT_AO] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QO },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_AP },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_INVALID },
+	},
+	[GARP_APPLICANT_QO] = {
+		[GARP_EVENT_TRANSMIT_PDU]	= { .state = GARP_APPLICANT_INVALID },
+		[GARP_EVENT_R_JOIN_IN]		= { .state = GARP_APPLICANT_QO },
+		[GARP_EVENT_R_JOIN_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_EMPTY]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_IN]		= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_R_LEAVE_EMPTY]	= { .state = GARP_APPLICANT_VO },
+		[GARP_EVENT_REQ_JOIN]		= { .state = GARP_APPLICANT_QP },
+		[GARP_EVENT_REQ_LEAVE]		= { .state = GARP_APPLICANT_INVALID },
+	},
+};
+
+static int garp_attr_cmp(const struct garp_attr *attr,
+			 const void *data, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->dlen != len)
+		return attr->dlen - len;
+	return memcmp(attr->data, data, len);
+}
+
+static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app,
+					  const void *data, u8 len, u8 type)
+{
+	struct rb_node *parent = app->gid.rb_node;
+	struct garp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct garp_attr, node);
+		d = garp_attr_cmp(attr, data, len, type);
+		if (d < 0)
+			parent = parent->rb_left;
+		else if (d > 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new)
+{
+	struct rb_node *parent = NULL, **p = &app->gid.rb_node;
+	struct garp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct garp_attr, node);
+		d = garp_attr_cmp(attr, new->data, new->dlen, new->type);
+		if (d < 0)
+			p = &parent->rb_left;
+		else if (d > 0)
+			p = &parent->rb_right;
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &app->gid);
+}
+
+static struct garp_attr *garp_attr_create(struct garp_applicant *app,
+					  const void *data, u8 len, u8 type)
+{
+	struct garp_attr *attr;
+
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = GARP_APPLICANT_VO;
+	attr->type  = type;
+	attr->dlen  = len;
+	memcpy(attr->data, data, len);
+	garp_attr_insert(app, attr);
+	return attr;
+}
+
+static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr)
+{
+	rb_erase(&attr->node, &app->gid);
+	kfree(attr);
+}
+
+static int garp_pdu_init(struct garp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct garp_pdu_hdr *gp;
+
+#define LLC_RESERVE	sizeof(struct llc_pdu_un)
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = htons(ETH_P_802_2);
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE);
+
+	gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp));
+	put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol);
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int garp_pdu_append_end_mark(struct garp_applicant *app)
+{
+	if (skb_tailroom(app->pdu) < sizeof(u8))
+		return -1;
+	*(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK;
+	return 0;
+}
+
+static void garp_pdu_queue(struct garp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	garp_pdu_append_end_mark(app);
+	garp_pdu_append_end_mark(app);
+
+	llc_pdu_header_init(app->pdu, LLC_PDU_TYPE_U, LLC_SAP_BSPAN,
+			    LLC_SAP_BSPAN, LLC_PDU_CMD);
+	llc_pdu_init_as_ui_cmd(app->pdu);
+	llc_mac_hdr_init(app->pdu, app->dev->dev_addr,
+			 app->app->proto.group_address);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void garp_queue_xmit(struct garp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype)
+{
+	struct garp_msg_hdr *gm;
+
+	if (skb_tailroom(app->pdu) < sizeof(*gm))
+		return -1;
+	gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm));
+	gm->attrtype = attrtype;
+	garp_cb(app->pdu)->cur_type = attrtype;
+	return 0;
+}
+
+static int garp_pdu_append_attr(struct garp_applicant *app,
+				const struct garp_attr *attr,
+				enum garp_attr_event event)
+{
+	struct garp_attr_hdr *ga;
+	unsigned int len;
+	int err;
+again:
+	if (!app->pdu) {
+		err = garp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	if (garp_cb(app->pdu)->cur_type != attr->type) {
+		if (garp_cb(app->pdu)->cur_type &&
+		    garp_pdu_append_end_mark(app) < 0)
+			goto queue;
+		if (garp_pdu_append_msg(app, attr->type) < 0)
+			goto queue;
+	}
+
+	len = sizeof(*ga) + attr->dlen;
+	if (skb_tailroom(app->pdu) < len)
+		goto queue;
+	ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len);
+	ga->len   = len;
+	ga->event = event;
+	memcpy(ga->data, attr->data, attr->dlen);
+	return 0;
+
+queue:
+	garp_pdu_queue(app);
+	goto again;
+}
+
+static void garp_attr_event(struct garp_applicant *app,
+			    struct garp_attr *attr, enum garp_event event)
+{
+	enum garp_applicant_state state;
+
+	state = garp_applicant_state_table[attr->state][event].state;
+	if (state == GARP_APPLICANT_INVALID)
+		return;
+
+	switch (garp_applicant_state_table[attr->state][event].action) {
+	case GARP_ACTION_NONE:
+		break;
+	case GARP_ACTION_S_JOIN_IN:
+		garp_pdu_append_attr(app, attr, GARP_JOIN_IN);
+		break;
+	case GARP_ACTION_S_LEAVE_EMPTY:
+		garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY);
+		/* As a pure applicant, sending a leave message implies that
+		 * the attribute was unregistered and can be destroyed. */
+		garp_attr_destroy(app, attr);
+		return;
+	default:
+		WARN_ON(1);
+	}
+
+	attr->state = state;
+}
+
+int garp_request_join(const struct net_device *dev,
+		      const struct garp_application *appl,
+		      const void *data, u8 len, u8 type)
+{
+	struct garp_port *port = dev->garp_port;
+	struct garp_applicant *app = port->applicants[appl->type];
+	struct garp_attr *attr;
+
+	spin_lock_bh(&app->lock);
+	attr = garp_attr_create(app, data, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	garp_attr_event(app, attr, GARP_EVENT_REQ_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(garp_request_join);
+
+void garp_request_leave(const struct net_device *dev,
+			const struct garp_application *appl,
+			const void *data, u8 len, u8 type)
+{
+	struct garp_port *port = dev->garp_port;
+	struct garp_applicant *app = port->applicants[appl->type];
+	struct garp_attr *attr;
+
+	spin_lock_bh(&app->lock);
+	attr = garp_attr_lookup(app, data, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	garp_attr_event(app, attr, GARP_EVENT_REQ_LEAVE);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(garp_request_leave);
+
+static void garp_gid_event(struct garp_applicant *app, enum garp_event event)
+{
+	struct rb_node *node, *next;
+	struct garp_attr *attr;
+
+	for (node = rb_first(&app->gid);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct garp_attr, node);
+		garp_attr_event(app, attr, event);
+	}
+}
+
+static void garp_join_timer_arm(struct garp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void garp_join_timer(unsigned long data)
+{
+	struct garp_applicant *app = (struct garp_applicant *)data;
+
+	spin_lock(&app->lock);
+	garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+	garp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	garp_queue_xmit(app);
+	garp_join_timer_arm(app);
+}
+
+static int garp_pdu_parse_end_mark(struct sk_buff *skb)
+{
+	if (!pskb_may_pull(skb, sizeof(u8)))
+		return -1;
+	if (*skb->data == GARP_END_MARK) {
+		skb_pull(skb, sizeof(u8));
+		return -1;
+	}
+	return 0;
+}
+
+static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb,
+			       u8 attrtype)
+{
+	const struct garp_attr_hdr *ga;
+	struct garp_attr *attr;
+	enum garp_event event;
+	unsigned int dlen;
+
+	if (!pskb_may_pull(skb, sizeof(*ga)))
+		return -1;
+	ga = (struct garp_attr_hdr *)skb->data;
+	if (ga->len < sizeof(*ga))
+		return -1;
+
+	if (!pskb_may_pull(skb, ga->len))
+		return -1;
+	skb_pull(skb, ga->len);
+	dlen = sizeof(*ga) - ga->len;
+
+	if (attrtype > app->app->maxattr)
+		return 0;
+
+	switch (ga->event) {
+	case GARP_LEAVE_ALL:
+		if (dlen != 0)
+			return -1;
+		garp_gid_event(app, GARP_EVENT_R_LEAVE_EMPTY);
+		return 0;
+	case GARP_JOIN_EMPTY:
+		event = GARP_EVENT_R_JOIN_EMPTY;
+		break;
+	case GARP_JOIN_IN:
+		event = GARP_EVENT_R_JOIN_IN;
+		break;
+	case GARP_LEAVE_EMPTY:
+		event = GARP_EVENT_R_LEAVE_EMPTY;
+		break;
+	case GARP_EMPTY:
+		event = GARP_EVENT_R_EMPTY;
+		break;
+	default:
+		return 0;
+	}
+
+	if (dlen == 0)
+		return -1;
+	attr = garp_attr_lookup(app, ga->data, dlen, attrtype);
+	if (attr == NULL)
+		return 0;
+	garp_attr_event(app, attr, event);
+	return 0;
+}
+
+static int garp_pdu_parse_msg(struct garp_applicant *app, struct sk_buff *skb)
+{
+	const struct garp_msg_hdr *gm;
+
+	if (!pskb_may_pull(skb, sizeof(*gm)))
+		return -1;
+	gm = (struct garp_msg_hdr *)skb->data;
+	if (gm->attrtype == 0)
+		return -1;
+	skb_pull(skb, sizeof(*gm));
+
+	while (skb->len > 0) {
+		if (garp_pdu_parse_attr(app, skb, gm->attrtype) < 0)
+			return -1;
+		if (garp_pdu_parse_end_mark(skb) < 0)
+			break;
+	}
+	return 0;
+}
+
+static void garp_pdu_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+			 struct net_device *dev)
+{
+	struct garp_application *appl = proto->data;
+	struct garp_port *port;
+	struct garp_applicant *app;
+	const struct garp_pdu_hdr *gp;
+
+	port = rcu_dereference(dev->garp_port);
+	if (!port)
+		goto err;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (!app)
+		goto err;
+
+	if (!pskb_may_pull(skb, sizeof(*gp)))
+		goto err;
+	gp = (struct garp_pdu_hdr *)skb->data;
+	if (get_unaligned(&gp->protocol) != htons(GARP_PROTOCOL_ID))
+		goto err;
+	skb_pull(skb, sizeof(*gp));
+
+	spin_lock(&app->lock);
+	while (skb->len > 0) {
+		if (garp_pdu_parse_msg(app, skb) < 0)
+			break;
+		if (garp_pdu_parse_end_mark(skb) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+err:
+	kfree_skb(skb);
+}
+
+static int garp_init_port(struct net_device *dev)
+{
+	struct garp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->garp_port, port);
+	return 0;
+}
+
+static void garp_release_port(struct net_device *dev)
+{
+	struct garp_port *port = dev->garp_port;
+	unsigned int i;
+
+	for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
+		if (port->applicants[i])
+			return;
+	}
+	rcu_assign_pointer(dev->garp_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+}
+
+int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
+{
+	struct garp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!dev->garp_port) {
+		err = garp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->gid = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->garp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app);
+	garp_join_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	garp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(garp_init_applicant);
+
+void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
+{
+	struct garp_port *port = dev->garp_port;
+	struct garp_applicant *app = port->applicants[appl->type];
+
+	ASSERT_RTNL();
+
+	rcu_assign_pointer(port->applicants[appl->type], NULL);
+	synchronize_rcu();
+
+	/* Delete timer and generate a final TRANSMIT_PDU event to flush out
+	 * all pending messages before the applicant is gone. */
+	del_timer_sync(&app->join_timer);
+	garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+	garp_pdu_queue(app);
+	garp_queue_xmit(app);
+
+	dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+	kfree(app);
+	garp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(garp_uninit_applicant);
+
+int garp_register_application(struct garp_application *appl)
+{
+	appl->proto.rcv = garp_pdu_rcv;
+	appl->proto.data = appl;
+	return stp_proto_register(&appl->proto);
+}
+EXPORT_SYMBOL_GPL(garp_register_application);
+
+void garp_unregister_application(struct garp_application *appl)
+{
+	stp_proto_unregister(&appl->proto);
+}
+EXPORT_SYMBOL_GPL(garp_unregister_application);
-- 
cgit v1.2.3-70-g09d2


From b3ce0325f2bf1bb737cb8fc7c349ce8fefdd9e40 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:26:27 -0700
Subject: vlan: Change vlan_dev_set_vlan_flag() to handle multiple flags at
 once

Change vlan_dev_set_vlan_flag() to handle multiple flags at once and
rename to vlan_dev_change_flags(). This allows to to use it from the
netlink interface, which in turn allows to handle necessary adjustments
when changing flags centrally.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c         |  6 +++---
 net/8021q/vlan.h         |  3 +--
 net/8021q/vlan_dev.c     | 20 +++++++++-----------
 net/8021q/vlan_netlink.c |  4 +---
 4 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ab2225da0ee..b591bfca1ab 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -591,9 +591,9 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		err = vlan_dev_set_vlan_flag(dev,
-					     args.u.flag,
-					     args.vlan_qos);
+		err = vlan_dev_change_flags(dev,
+					    args.vlan_qos ? args.u.flag : 0,
+					    args.u.flag);
 		break;
 
 	case SET_VLAN_NAME_TYPE_CMD:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5229a72c7ea..639e2544a80 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -28,8 +28,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, short vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, short vlan_prio);
-int vlan_dev_set_vlan_flag(const struct net_device *dev,
-			   u32 flag, short flag_val);
+int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5d055c242ed..76c665cdab6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -507,18 +507,16 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 }
 
 /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
-int vlan_dev_set_vlan_flag(const struct net_device *dev,
-			   u32 flag, short flag_val)
+int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 {
-	/* verify flag is supported */
-	if (flag == VLAN_FLAG_REORDER_HDR) {
-		if (flag_val)
-			vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
-		else
-			vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
-		return 0;
-	}
-	return -EINVAL;
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	u32 old_flags = vlan->flags;
+
+	if (mask & ~VLAN_FLAG_REORDER_HDR)
+		return -EINVAL;
+
+	vlan->flags = (old_flags & ~mask) | (flags & mask);
+	return 0;
 }
 
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c93e69ec28e..fd7cb195d53 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -75,7 +75,6 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 static int vlan_changelink(struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct ifla_vlan_flags *flags;
 	struct ifla_vlan_qos_mapping *m;
 	struct nlattr *attr;
@@ -83,8 +82,7 @@ static int vlan_changelink(struct net_device *dev,
 
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
-		vlan->flags = (vlan->flags & ~flags->mask) |
-			      (flags->flags & flags->mask);
+		vlan_dev_change_flags(dev, flags->flags, flags->mask);
 	}
 	if (data[IFLA_VLAN_INGRESS_QOS]) {
 		nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
-- 
cgit v1.2.3-70-g09d2


From ce305002e1c9b90c2c151ce18bab0b895dd55ae6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:26:41 -0700
Subject: vlan: Move device unregistration before lower dev cleanup

Move the unregister_netdevice() call for the VLAN device before cleanup
for the lower device. This is needed by GVRP so it can send a leave
message before the applicant on the lower device is cleaned up.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b591bfca1ab..8cae2daeb1c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -165,6 +165,8 @@ void unregister_vlan_dev(struct net_device *dev)
 
 	synchronize_net();
 
+	unregister_netdevice(dev);
+
 	/* If the group is now empty, kill off the group. */
 	if (grp->nr_vlans == 0) {
 		if (real_dev->features & NETIF_F_HW_VLAN_RX)
@@ -178,8 +180,6 @@ void unregister_vlan_dev(struct net_device *dev)
 
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
-
-	unregister_netdevice(dev);
 }
 
 static void vlan_transfer_operstate(const struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 70c03b49b80ba3634958acc31853771019c0ebd3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 21:26:57 -0700
Subject: vlan: Add GVRP support

Add GVRP support for dynamically registering VLANs with switches.

By default GVRP is disabled because we only support the applicant-only
participant model, which means it should not be enabled on vlans that
are members of a bridge. Since there is currently no way to cleanly
determine that, the user is responsible for enabling it.

The code is pretty small and low impact, its wrapped in a config
option though because it depends on the GARP implementation and
the STP core.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h  |  1 +
 include/net/garp.h       |  1 +
 net/8021q/Kconfig        | 10 ++++++++
 net/8021q/Makefile       |  9 +++----
 net/8021q/vlan.c         | 23 ++++++++++++++---
 net/8021q/vlan.h         | 16 ++++++++++++
 net/8021q/vlan_dev.c     | 18 +++++++++++--
 net/8021q/vlan_gvrp.c    | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/8021q/vlan_netlink.c |  3 ++-
 9 files changed, 134 insertions(+), 13 deletions(-)
 create mode 100644 net/8021q/vlan_gvrp.c

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 15ace02b7b2..5190452ac7d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -402,6 +402,7 @@ enum vlan_ioctl_cmds {
 
 enum vlan_flags {
 	VLAN_FLAG_REORDER_HDR	= 0x1,
+	VLAN_FLAG_GVRP		= 0x2,
 };
 
 enum vlan_name_types {
diff --git a/include/net/garp.h b/include/net/garp.h
index 73c772395f5..825f172caba 100644
--- a/include/net/garp.h
+++ b/include/net/garp.h
@@ -84,6 +84,7 @@ struct garp_attr {
 };
 
 enum garp_applications {
+	GARP_APPLICATION_GVRP,
 	__GARP_APPLICATION_MAX
 };
 #define GARP_APPLICATION_MAX	(__GARP_APPLICATION_MAX - 1)
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index c4a382e450e..fa073a54963 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -17,3 +17,13 @@ config VLAN_8021Q
 	  will be called 8021q.
 
 	  If unsure, say N.
+
+config VLAN_8021Q_GVRP
+	bool "GVRP (GARP VLAN Registration Protocol) support"
+	depends on VLAN_8021Q
+	select GARP
+	help
+	  Select this to enable GVRP end-system support. GVRP is used for
+	  automatic propagation of registered VLANs to switches.
+
+	  If unsure, say N.
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 10ca7f486c3..3006e9ed7b0 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -4,9 +4,6 @@
 
 obj-$(CONFIG_VLAN_8021Q) += 8021q.o
 
-8021q-objs := vlan.o vlan_dev.o vlan_netlink.o
-
-ifeq ($(CONFIG_PROC_FS),y)
-8021q-objs += vlanproc.o
-endif
-
+8021q-y				:= vlan.o vlan_dev.o vlan_netlink.o
+8021q-$(CONFIG_VLAN_8021Q_GVRP)	+= vlan_gvrp.o
+8021q-$(CONFIG_PROC_FS)		+= vlanproc.o
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8cae2daeb1c..b529110c935 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,6 +169,8 @@ void unregister_vlan_dev(struct net_device *dev)
 
 	/* If the group is now empty, kill off the group. */
 	if (grp->nr_vlans == 0) {
+		vlan_gvrp_uninit_applicant(real_dev);
+
 		if (real_dev->features & NETIF_F_HW_VLAN_RX)
 			real_dev->vlan_rx_register(real_dev, NULL);
 
@@ -249,15 +251,18 @@ int register_vlan_dev(struct net_device *dev)
 		ngrp = grp = vlan_group_alloc(real_dev);
 		if (!grp)
 			return -ENOBUFS;
+		err = vlan_gvrp_init_applicant(real_dev);
+		if (err < 0)
+			goto out_free_group;
 	}
 
 	err = vlan_group_prealloc_vid(grp, vlan_id);
 	if (err < 0)
-		goto out_free_group;
+		goto out_uninit_applicant;
 
 	err = register_netdevice(dev);
 	if (err < 0)
-		goto out_free_group;
+		goto out_uninit_applicant;
 
 	/* Account for reference in struct vlan_dev_info */
 	dev_hold(real_dev);
@@ -278,6 +283,9 @@ int register_vlan_dev(struct net_device *dev)
 
 	return 0;
 
+out_uninit_applicant:
+	if (ngrp)
+		vlan_gvrp_uninit_applicant(real_dev);
 out_free_group:
 	if (ngrp)
 		vlan_group_free(ngrp);
@@ -713,14 +721,20 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err2;
 
-	err = vlan_netlink_init();
+	err = vlan_gvrp_init();
 	if (err < 0)
 		goto err3;
 
+	err = vlan_netlink_init();
+	if (err < 0)
+		goto err4;
+
 	dev_add_pack(&vlan_packet_type);
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
+err4:
+	vlan_gvrp_uninit();
 err3:
 	unregister_netdevice_notifier(&vlan_notifier_block);
 err2:
@@ -745,8 +759,9 @@ static void __exit vlan_cleanup_module(void)
 		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
 
 	unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
-
 	synchronize_net();
+
+	vlan_gvrp_uninit();
 }
 
 module_init(vlan_proto_init);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 639e2544a80..097b2e04c92 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -37,6 +37,22 @@ void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev);
 
+#ifdef CONFIG_VLAN_8021Q_GVRP
+extern int vlan_gvrp_request_join(const struct net_device *dev);
+extern void vlan_gvrp_request_leave(const struct net_device *dev);
+extern int vlan_gvrp_init_applicant(struct net_device *dev);
+extern void vlan_gvrp_uninit_applicant(struct net_device *dev);
+extern int vlan_gvrp_init(void);
+extern void vlan_gvrp_uninit(void);
+#else
+static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; }
+static inline void vlan_gvrp_request_leave(const struct net_device *dev) {}
+static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; }
+static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {}
+static inline int vlan_gvrp_init(void) { return 0; }
+static inline void vlan_gvrp_uninit(void) {}
+#endif
+
 int vlan_netlink_init(void);
 void vlan_netlink_fini(void);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 76c665cdab6..a0617bf7cec 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -512,10 +512,17 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	u32 old_flags = vlan->flags;
 
-	if (mask & ~VLAN_FLAG_REORDER_HDR)
+	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
+
+	if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) {
+		if (vlan->flags & VLAN_FLAG_GVRP)
+			vlan_gvrp_request_join(dev);
+		else
+			vlan_gvrp_request_leave(dev);
+	}
 	return 0;
 }
 
@@ -550,12 +557,19 @@ static int vlan_dev_open(struct net_device *dev)
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(real_dev, 1);
 
+	if (vlan->flags & VLAN_FLAG_GVRP)
+		vlan_gvrp_request_join(dev);
+
 	return 0;
 }
 
 static int vlan_dev_stop(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct net_device *real_dev = vlan->real_dev;
+
+	if (vlan->flags & VLAN_FLAG_GVRP)
+		vlan_gvrp_request_leave(dev);
 
 	dev_mc_unsync(real_dev, dev);
 	dev_unicast_unsync(real_dev, dev);
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
new file mode 100644
index 00000000000..db978160836
--- /dev/null
+++ b/net/8021q/vlan_gvrp.c
@@ -0,0 +1,66 @@
+/*
+ * 	IEEE 802.1Q GARP VLAN Registration Protocol (GVRP)
+ *
+ * 	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <net/garp.h>
+#include "vlan.h"
+
+#define GARP_GVRP_ADDRESS	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 }
+
+enum gvrp_attributes {
+	GVRP_ATTR_INVALID,
+	GVRP_ATTR_VID,
+	__GVRP_ATTR_MAX
+};
+#define GVRP_ATTR_MAX	(__GVRP_ATTR_MAX - 1)
+
+static struct garp_application vlan_gvrp_app __read_mostly = {
+	.proto.group_address	= GARP_GVRP_ADDRESS,
+	.maxattr		= GVRP_ATTR_MAX,
+	.type			= GARP_APPLICATION_GVRP,
+};
+
+int vlan_gvrp_request_join(const struct net_device *dev)
+{
+	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	__be16 vid = htons(vlan->vlan_id);
+
+	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
+				 &vid, sizeof(vid), GVRP_ATTR_VID);
+}
+
+void vlan_gvrp_request_leave(const struct net_device *dev)
+{
+	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	__be16 vid = htons(vlan->vlan_id);
+
+	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
+			   &vid, sizeof(vid), GVRP_ATTR_VID);
+}
+
+int vlan_gvrp_init_applicant(struct net_device *dev)
+{
+	return garp_init_applicant(dev, &vlan_gvrp_app);
+}
+
+void vlan_gvrp_uninit_applicant(struct net_device *dev)
+{
+	garp_uninit_applicant(dev, &vlan_gvrp_app);
+}
+
+int __init vlan_gvrp_init(void)
+{
+	return garp_register_application(&vlan_gvrp_app);
+}
+
+void vlan_gvrp_uninit(void)
+{
+	garp_unregister_application(&vlan_gvrp_app);
+}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index fd7cb195d53..e9c91dcecc9 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -59,7 +59,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	}
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
-		if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR)
+		if ((flags->flags & flags->mask) &
+		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
 			return -EINVAL;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 6fe1c7a5556807e9d7154a2d2fb938d8a9e47e5f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:21:31 -0700
Subject: net-sched: add dynamically sized qdisc class hash helpers

Currently all qdiscs which allow to create classes uses a fixed sized hash
table with size 16 to hash the classes. This causes a large bottleneck
when using thousands of classes and unbound filters.

Add helpers for dynamically sized class hashes to fix this. The following
patches will convert the qdiscs to use them.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  42 +++++++++++++++++++
 net/sched/sch_api.c       | 104 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a87fc0312ed..073f2580b83 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -167,6 +167,48 @@ extern void qdisc_unlock_tree(struct net_device *dev);
 extern struct Qdisc noop_qdisc;
 extern struct Qdisc_ops noop_qdisc_ops;
 
+struct Qdisc_class_common
+{
+	u32			classid;
+	struct hlist_node	hnode;
+};
+
+struct Qdisc_class_hash
+{
+	struct hlist_head	*hash;
+	unsigned int		hashsize;
+	unsigned int		hashmask;
+	unsigned int		hashelems;
+};
+
+static inline unsigned int qdisc_class_hash(u32 id, u32 mask)
+{
+	id ^= id >> 8;
+	id ^= id >> 4;
+	return id & mask;
+}
+
+static inline struct Qdisc_class_common *
+qdisc_class_find(struct Qdisc_class_hash *hash, u32 id)
+{
+	struct Qdisc_class_common *cl;
+	struct hlist_node *n;
+	unsigned int h;
+
+	h = qdisc_class_hash(id, hash->hashmask);
+	hlist_for_each_entry(cl, n, &hash->hash[h], hnode) {
+		if (cl->classid == id)
+			return cl;
+	}
+	return NULL;
+}
+
+extern int qdisc_class_hash_init(struct Qdisc_class_hash *);
+extern void qdisc_class_hash_insert(struct Qdisc_class_hash *, struct Qdisc_class_common *);
+extern void qdisc_class_hash_remove(struct Qdisc_class_hash *, struct Qdisc_class_common *);
+extern void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
+extern void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
+
 extern void dev_init_scheduler(struct net_device *dev);
 extern void dev_shutdown(struct net_device *dev);
 extern void dev_activate(struct net_device *dev);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 10f01ad0438..e9ebc7af049 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -316,6 +316,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
+struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
+{
+	unsigned int size = n * sizeof(struct hlist_head), i;
+	struct hlist_head *h;
+
+	if (size <= PAGE_SIZE)
+		h = kmalloc(size, GFP_KERNEL);
+	else
+		h = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(size));
+
+	if (h != NULL) {
+		for (i = 0; i < n; i++)
+			INIT_HLIST_HEAD(&h[i]);
+	}
+	return h;
+}
+
+static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
+{
+	unsigned int size = n * sizeof(struct hlist_head);
+
+	if (size <= PAGE_SIZE)
+		kfree(h);
+	else
+		free_pages((unsigned long)h, get_order(size));
+}
+
+void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
+{
+	struct Qdisc_class_common *cl;
+	struct hlist_node *n, *next;
+	struct hlist_head *nhash, *ohash;
+	unsigned int nsize, nmask, osize;
+	unsigned int i, h;
+
+	/* Rehash when load factor exceeds 0.75 */
+	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
+		return;
+	nsize = clhash->hashsize * 2;
+	nmask = nsize - 1;
+	nhash = qdisc_class_hash_alloc(nsize);
+	if (nhash == NULL)
+		return;
+
+	ohash = clhash->hash;
+	osize = clhash->hashsize;
+
+	sch_tree_lock(sch);
+	for (i = 0; i < osize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
+			h = qdisc_class_hash(cl->classid, nmask);
+			hlist_add_head(&cl->hnode, &nhash[h]);
+		}
+	}
+	clhash->hash     = nhash;
+	clhash->hashsize = nsize;
+	clhash->hashmask = nmask;
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_free(ohash, osize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_grow);
+
+int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
+{
+	unsigned int size = 4;
+
+	clhash->hash = qdisc_class_hash_alloc(size);
+	if (clhash->hash == NULL)
+		return -ENOMEM;
+	clhash->hashsize  = size;
+	clhash->hashmask  = size - 1;
+	clhash->hashelems = 0;
+	return 0;
+}
+EXPORT_SYMBOL(qdisc_class_hash_init);
+
+void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
+{
+	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_destroy);
+
+void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
+			     struct Qdisc_class_common *cl)
+{
+	unsigned int h;
+
+	INIT_HLIST_NODE(&cl->hnode);
+	h = qdisc_class_hash(cl->classid, clhash->hashmask);
+	hlist_add_head(&cl->hnode, &clhash->hash[h]);
+	clhash->hashelems++;
+}
+EXPORT_SYMBOL(qdisc_class_hash_insert);
+
+void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
+			     struct Qdisc_class_common *cl)
+{
+	hlist_del(&cl->hnode);
+	clhash->hashelems--;
+}
+EXPORT_SYMBOL(qdisc_class_hash_remove);
+
 /* Allocate an unique handle from space managed by kernel */
 
 static u32 qdisc_alloc_handle(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From be0d39d52ce35554e856de7e9ea37ac1fa4a7f91 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:21:47 -0700
Subject: net-sched: sch_hfsc: use dynamic class hash helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 81 ++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index e817aa00441..3a8267246a4 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -113,7 +113,7 @@ enum hfsc_class_flags
 
 struct hfsc_class
 {
-	u32		classid;	/* class id */
+	struct Qdisc_class_common cl_common;
 	unsigned int	refcnt;		/* usage count */
 
 	struct gnet_stats_basic bstats;
@@ -134,7 +134,6 @@ struct hfsc_class
 	struct rb_node vt_node;		/* parent's vt_tree member */
 	struct rb_root cf_tree;		/* active children sorted by cl_f */
 	struct rb_node cf_node;		/* parent's cf_heap member */
-	struct list_head hlist;		/* hash list member */
 	struct list_head dlist;		/* drop list member */
 
 	u64	cl_total;		/* total work in bytes */
@@ -177,13 +176,11 @@ struct hfsc_class
 	unsigned long	cl_nactive;	/* number of active children */
 };
 
-#define HFSC_HSIZE	16
-
 struct hfsc_sched
 {
 	u16	defcls;				/* default class id */
 	struct hfsc_class root;			/* root class */
-	struct list_head clhash[HFSC_HSIZE];	/* class hash */
+	struct Qdisc_class_hash clhash;		/* class hash */
 	struct rb_root eligible;		/* eligible tree */
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
@@ -933,26 +930,16 @@ hfsc_adjust_levels(struct hfsc_class *cl)
 	} while ((cl = cl->cl_parent) != NULL);
 }
 
-static inline unsigned int
-hfsc_hash(u32 h)
-{
-	h ^= h >> 8;
-	h ^= h >> 4;
-
-	return h & (HFSC_HSIZE - 1);
-}
-
 static inline struct hfsc_class *
 hfsc_find_class(u32 classid, struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hfsc_class *cl;
+	struct Qdisc_class_common *clc;
 
-	list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
-		if (cl->classid == classid)
-			return cl;
-	}
-	return NULL;
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct hfsc_class, cl_common);
 }
 
 static void
@@ -1032,7 +1019,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (cl != NULL) {
 		if (parentid) {
-			if (cl->cl_parent && cl->cl_parent->classid != parentid)
+			if (cl->cl_parent &&
+			    cl->cl_parent->cl_common.classid != parentid)
 				return -EINVAL;
 			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
 				return -EINVAL;
@@ -1091,8 +1079,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (usc != NULL)
 		hfsc_change_usc(cl, usc, 0);
 
+	cl->cl_common.classid = classid;
 	cl->refcnt    = 1;
-	cl->classid   = classid;
 	cl->sched     = q;
 	cl->cl_parent = parent;
 	cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
@@ -1103,7 +1091,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->cf_tree = RB_ROOT;
 
 	sch_tree_lock(sch);
-	list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
+	qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
 	list_add_tail(&cl->siblings, &parent->children);
 	if (parent->level == 0)
 		hfsc_purge_queue(sch, parent);
@@ -1111,6 +1099,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
+	qdisc_class_hash_grow(sch, &q->clhash);
+
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
 				  &sch->dev->queue_lock, tca[TCA_RATE]);
@@ -1145,7 +1135,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	hfsc_adjust_levels(cl->cl_parent);
 
 	hfsc_purge_queue(sch, cl);
-	list_del(&cl->hlist);
+	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
 	if (--cl->refcnt == 0)
 		hfsc_destroy_class(sch, cl);
@@ -1212,7 +1202,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		return -EINVAL;
 	if (new == NULL) {
 		new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-					cl->classid);
+					cl->cl_common.classid);
 		if (new == NULL)
 			new = &noop_qdisc;
 	}
@@ -1345,8 +1335,9 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 	struct nlattr *nest;
 
-	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
-	tcm->tcm_handle = cl->classid;
+	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid :
+					  TC_H_ROOT;
+	tcm->tcm_handle = cl->cl_common.classid;
 	if (cl->level == 0)
 		tcm->tcm_info = cl->qdisc->handle;
 
@@ -1390,14 +1381,16 @@ static void
 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hlist_node *n;
 	struct hfsc_class *cl;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < HFSC_HSIZE; i++) {
-		list_for_each_entry(cl, &q->clhash[i], hlist) {
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i],
+				     cl_common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -1433,21 +1426,22 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct tc_hfsc_qopt *qopt;
-	unsigned int i;
+	int err;
 
 	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 	qopt = nla_data(opt);
 
 	q->defcls = qopt->defcls;
-	for (i = 0; i < HFSC_HSIZE; i++)
-		INIT_LIST_HEAD(&q->clhash[i]);
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
 	skb_queue_head_init(&q->requeue);
 
+	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
-	q->root.classid = sch->handle;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
 					  sch->handle);
@@ -1457,7 +1451,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	q->root.vt_tree = RB_ROOT;
 	q->root.cf_tree = RB_ROOT;
 
-	list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
+	qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
+	qdisc_class_hash_grow(sch, &q->clhash);
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 
@@ -1520,10 +1515,11 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl;
+	struct hlist_node *n;
 	unsigned int i;
 
-	for (i = 0; i < HFSC_HSIZE; i++) {
-		list_for_each_entry(cl, &q->clhash[i], hlist)
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
 	__skb_queue_purge(&q->requeue);
@@ -1537,17 +1533,20 @@ static void
 hfsc_destroy_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hfsc_class *cl, *next;
+	struct hlist_node *n, *next;
+	struct hfsc_class *cl;
 	unsigned int i;
 
-	for (i = 0; i < HFSC_HSIZE; i++) {
-		list_for_each_entry(cl, &q->clhash[i], hlist)
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
-	for (i = 0; i < HFSC_HSIZE; i++) {
-		list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  cl_common.hnode)
 			hfsc_destroy_class(sch, cl);
 	}
+	qdisc_class_hash_destroy(&q->clhash);
 	__skb_queue_purge(&q->requeue);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
-- 
cgit v1.2.3-70-g09d2


From d77fea2eb9206833c7aa1b013044ddeb5225b92c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:22:05 -0700
Subject: net-sched: sch_cbq: use dynamic class hash helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 112 ++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 2a3c97f7dc6..968b4c73c9c 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -73,11 +73,10 @@ struct cbq_sched_data;
 
 struct cbq_class
 {
-	struct cbq_class	*next;		/* hash table link */
+	struct Qdisc_class_common common;
 	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
 
 /* Parameters */
-	u32			classid;
 	unsigned char		priority;	/* class priority */
 	unsigned char		priority2;	/* priority to be used after overlimit */
 	unsigned char		ewma_log;	/* time constant for idle time calculation */
@@ -144,7 +143,7 @@ struct cbq_class
 
 struct cbq_sched_data
 {
-	struct cbq_class	*classes[16];		/* Hash table of all classes */
+	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
 	int			nclasses[TC_CBQ_MAXPRIO+1];
 	unsigned		quanta[TC_CBQ_MAXPRIO+1];
 
@@ -177,23 +176,15 @@ struct cbq_sched_data
 
 #define L2T(cl,len)	qdisc_l2t((cl)->R_tab,len)
 
-
-static __inline__ unsigned cbq_hash(u32 h)
-{
-	h ^= h>>8;
-	h ^= h>>4;
-	return h&0xF;
-}
-
 static __inline__ struct cbq_class *
 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 {
-	struct cbq_class *cl;
+	struct Qdisc_class_common *clc;
 
-	for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
-		if (cl->classid == classid)
-			return cl;
-	return NULL;
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct cbq_class, common);
 }
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -1071,13 +1062,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 {
 	struct cbq_class *cl;
-	unsigned h;
+	struct hlist_node *n;
+	unsigned int h;
 
 	if (q->quanta[prio] == 0)
 		return;
 
-	for (h=0; h<16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next) {
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
 			/* BUGGGG... Beware! This expression suffer of
 			   arithmetic overflows!
 			 */
@@ -1086,7 +1078,7 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 					q->quanta[prio];
 			}
 			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
+				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
 				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
 			}
 		}
@@ -1114,10 +1106,12 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 		if (split->defaults[i])
 			continue;
 
-		for (h=0; h<16; h++) {
+		for (h = 0; h < q->clhash.hashsize; h++) {
+			struct hlist_node *n;
 			struct cbq_class *c;
 
-			for (c = q->classes[h]; c; c = c->next) {
+			hlist_for_each_entry(c, n, &q->clhash.hash[h],
+					     common.hnode) {
 				if (c->split == split && c->level < level &&
 				    c->defmap&(1<<i)) {
 					split->defaults[i] = c;
@@ -1135,12 +1129,12 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
 	if (splitid == 0) {
 		if ((split = cl->split) == NULL)
 			return;
-		splitid = split->classid;
+		splitid = split->common.classid;
 	}
 
-	if (split == NULL || split->classid != splitid) {
+	if (split == NULL || split->common.classid != splitid) {
 		for (split = cl->tparent; split; split = split->tparent)
-			if (split->classid == splitid)
+			if (split->common.classid == splitid)
 				break;
 	}
 
@@ -1163,13 +1157,7 @@ static void cbq_unlink_class(struct cbq_class *this)
 	struct cbq_class *cl, **clp;
 	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 
-	for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
-		if (cl == this) {
-			*clp = cl->next;
-			cl->next = NULL;
-			break;
-		}
-	}
+	qdisc_class_hash_remove(&q->clhash, &this->common);
 
 	if (this->tparent) {
 		clp=&this->sibling;
@@ -1195,12 +1183,10 @@ static void cbq_unlink_class(struct cbq_class *this)
 static void cbq_link_class(struct cbq_class *this)
 {
 	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-	unsigned h = cbq_hash(this->classid);
 	struct cbq_class *parent = this->tparent;
 
 	this->sibling = this;
-	this->next = q->classes[h];
-	q->classes[h] = this;
+	qdisc_class_hash_insert(&q->clhash, &this->common);
 
 	if (parent == NULL)
 		return;
@@ -1242,6 +1228,7 @@ cbq_reset(struct Qdisc* sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
+	struct hlist_node *n;
 	int prio;
 	unsigned h;
 
@@ -1258,8 +1245,8 @@ cbq_reset(struct Qdisc* sch)
 	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
 		q->active[prio] = NULL;
 
-	for (h = 0; h < 16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next) {
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
 			qdisc_reset(cl->q);
 
 			cl->next_alive = NULL;
@@ -1406,9 +1393,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
 		return -EINVAL;
 
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		goto put_rtab;
+
 	q->link.refcnt = 1;
 	q->link.sibling = &q->link;
-	q->link.classid = sch->handle;
+	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
 	if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
 					    sch->handle)))
@@ -1441,6 +1432,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 
 	cbq_addprio(q, &q->link);
 	return 0;
+
+put_rtab:
+	qdisc_put_rtab(q->link.R_tab);
+	return err;
 }
 
 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
@@ -1521,7 +1516,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 	struct tc_cbq_fopt opt;
 
 	if (cl->split || cl->defmap) {
-		opt.split = cl->split ? cl->split->classid : 0;
+		opt.split = cl->split ? cl->split->common.classid : 0;
 		opt.defmap = cl->defmap;
 		opt.defchange = ~0;
 		NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
@@ -1602,10 +1597,10 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct nlattr *nest;
 
 	if (cl->tparent)
-		tcm->tcm_parent = cl->tparent->classid;
+		tcm->tcm_parent = cl->tparent->common.classid;
 	else
 		tcm->tcm_parent = TC_H_ROOT;
-	tcm->tcm_handle = cl->classid;
+	tcm->tcm_handle = cl->common.classid;
 	tcm->tcm_info = cl->q->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -1650,8 +1645,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	if (cl) {
 		if (new == NULL) {
-			if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-						     cl->classid)) == NULL)
+			new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+						cl->common.classid);
+			if (new == NULL)
 				return -ENOBUFS;
 		} else {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1716,6 +1712,7 @@ static void
 cbq_destroy(struct Qdisc* sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct hlist_node *n, *next;
 	struct cbq_class *cl;
 	unsigned h;
 
@@ -1727,18 +1724,16 @@ cbq_destroy(struct Qdisc* sch)
 	 * classes from root to leafs which means that filters can still
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
-	for (h = 0; h < 16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next)
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
-	for (h = 0; h < 16; h++) {
-		struct cbq_class *next;
-
-		for (cl = q->classes[h]; cl; cl = next) {
-			next = cl->next;
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
+					  common.hnode)
 			cbq_destroy_class(sch, cl);
-		}
 	}
+	qdisc_class_hash_destroy(&q->clhash);
 }
 
 static void cbq_put(struct Qdisc *sch, unsigned long arg)
@@ -1781,7 +1776,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (cl) {
 		/* Check parent */
 		if (parentid) {
-			if (cl->tparent && cl->tparent->classid != parentid)
+			if (cl->tparent &&
+			    cl->tparent->common.classid != parentid)
 				return -EINVAL;
 			if (!cl->tparent && parentid != TC_H_ROOT)
 				return -EINVAL;
@@ -1883,7 +1879,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->refcnt = 1;
 	if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
 		cl->q = &noop_qdisc;
-	cl->classid = classid;
+	cl->common.classid = classid;
 	cl->tparent = parent;
 	cl->qdisc = sch;
 	cl->allot = parent->allot;
@@ -1916,6 +1912,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
 	sch_tree_unlock(sch);
 
+	qdisc_class_hash_grow(sch, &q->clhash);
+
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
 				  &sch->dev->queue_lock, tca[TCA_RATE]);
@@ -2008,15 +2006,15 @@ static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	struct hlist_node *n;
 	unsigned h;
 
 	if (arg->stop)
 		return;
 
-	for (h = 0; h < 16; h++) {
-		struct cbq_class *cl;
-
-		for (cl = q->classes[h]; cl; cl = cl->next) {
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
-- 
cgit v1.2.3-70-g09d2


From fbd8f1379aeeb3e44a59302a6b2850636130bb2a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:22:19 -0700
Subject: net-sched: sch_htb: move hash and sibling list removal to htb_delete

Hash list removal currently happens twice (once in htb_delete, once
in htb_destroy_class), which makes it harder to use the dynamically
sized class hash without adding special cases for HTB. The reason is
that qdisc destruction destroys classes in hierarchical order, which
is not necessary if filters are destroyed in a separate iteration
during qdisc destruction.

Adjust qdisc destruction to follow the same scheme as other hierarchical
qdiscs by first performing a filter destruction pass, then destroying
all classes in hash order.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0284791169c..d01fe3a1a62 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1226,8 +1226,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
-	struct htb_sched *q = qdisc_priv(sch);
-
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
 		qdisc_destroy(cl->un.leaf.q);
@@ -1237,21 +1235,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 	qdisc_put_rtab(cl->ceil);
 
 	tcf_destroy_chain(&cl->filter_list);
-
-	while (!list_empty(&cl->children))
-		htb_destroy_class(sch, list_entry(cl->children.next,
-						  struct htb_class, sibling));
-
-	/* note: this delete may happen twice (see htb_delete) */
-	hlist_del_init(&cl->hlist);
-	list_del(&cl->sibling);
-
-	if (cl->prio_activity)
-		htb_deactivate(q, cl);
-
-	if (cl->cmode != HTB_CAN_SEND)
-		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
-
 	kfree(cl);
 }
 
@@ -1259,6 +1242,9 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
+	struct hlist_node *n, *next;
+	struct htb_class *cl;
+	unsigned int i;
 
 	qdisc_watchdog_cancel(&q->watchdog);
 	/* This line used to be after htb_destroy_class call below
@@ -1267,10 +1253,14 @@ static void htb_destroy(struct Qdisc *sch)
 	   unbind_filter on it (without Oops). */
 	tcf_destroy_chain(&q->filter_list);
 
-	while (!list_empty(&q->root))
-		htb_destroy_class(sch, list_entry(q->root.next,
-						  struct htb_class, sibling));
-
+	for (i = 0; i < HTB_HSIZE; i++) {
+		hlist_for_each_entry(cl, n, q->hash + i, hlist)
+			tcf_destroy_chain(&cl->filter_list);
+	}
+	for (i = 0; i < HTB_HSIZE; i++) {
+		hlist_for_each_entry_safe(cl, n, next, q->hash + i, hlist)
+			htb_destroy_class(sch, cl);
+	}
 	__skb_queue_purge(&q->direct_queue);
 }
 
@@ -1302,12 +1292,16 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
 	}
 
-	/* delete from hash and active; remainder in destroy_class */
-	hlist_del_init(&cl->hlist);
+	/* delete from hash, sibling list and active */
+	hlist_del(&cl->hlist);
+	list_del(&cl->sibling);
 
 	if (cl->prio_activity)
 		htb_deactivate(q, cl);
 
+	if (cl->cmode != HTB_CAN_SEND)
+		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
 	if (last_child)
 		htb_parent_to_leaf(q, cl, new_q);
 
-- 
cgit v1.2.3-70-g09d2


From f4c1f3e0c59be0e6566d9c00b1d8b204ffb861a2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:22:35 -0700
Subject: net-sched: sch_htb: use dynamic class hash helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 100 ++++++++++++++++++++++------------------------------
 1 file changed, 42 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d01fe3a1a62..e7461a17d71 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -51,7 +51,6 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16		/* classid hash size */
 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
 #define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
 
@@ -72,8 +71,8 @@ enum htb_cmode {
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
 struct htb_class {
+	struct Qdisc_class_common common;
 	/* general class parameters */
-	u32 classid;
 	struct gnet_stats_basic bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
@@ -83,7 +82,6 @@ struct htb_class {
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
-	struct hlist_node hlist;	/* classid hash list item */
 	struct list_head sibling;	/* sibling list item */
 	struct list_head children;	/* children list */
 
@@ -141,7 +139,7 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 
 struct htb_sched {
 	struct list_head root;	/* root classes list */
-	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct Qdisc_class_hash clhash;
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* self list - roots of self generating tree */
@@ -176,32 +174,16 @@ struct htb_sched {
 	long direct_pkts;
 };
 
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
-	h ^= h >> 8;		/* stolen from cbq_hash */
-	h ^= h >> 4;
-	return h & 0xf;
-}
-
 /* find class in global hash table using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
-	struct htb_class *cl;
+	struct Qdisc_class_common *clc;
 
-	if (TC_H_MAJ(handle) != sch->handle)
+	clc = qdisc_class_find(&q->clhash, handle);
+	if (clc == NULL)
 		return NULL;
-
-	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
-		if (cl->classid == handle)
-			return cl;
-	}
-	return NULL;
+	return container_of(clc, struct htb_class, common);
 }
 
 /**
@@ -282,7 +264,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
 		parent = *p;
 		c = rb_entry(parent, struct htb_class, node[prio]);
 
-		if (cl->classid > c->classid)
+		if (cl->common.classid > c->common.classid)
 			p = &parent->rb_right;
 		else
 			p = &parent->rb_left;
@@ -446,7 +428,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 				/* we are removing child which is pointed to from
 				   parent feed - forget the pointer but remember
 				   classid */
-				p->un.inner.last_ptr_id[prio] = cl->classid;
+				p->un.inner.last_ptr_id[prio] = cl->common.classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
 
@@ -751,10 +733,10 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 	while (n) {
 		struct htb_class *cl =
 		    rb_entry(n, struct htb_class, node[prio]);
-		if (id == cl->classid)
+		if (id == cl->common.classid)
 			return n;
 
-		if (id > cl->classid) {
+		if (id > cl->common.classid) {
 			n = n->rb_right;
 		} else {
 			r = n;
@@ -864,7 +846,7 @@ next:
 		if (!cl->warned) {
 			printk(KERN_WARNING
 			       "htb: class %X isn't work conserving ?!\n",
-			       cl->classid);
+			       cl->common.classid);
 			cl->warned = 1;
 		}
 		q->nwc_hit++;
@@ -975,13 +957,12 @@ static unsigned int htb_drop(struct Qdisc *sch)
 static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	int i;
-
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
+	struct htb_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
 
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
 			if (cl->level)
 				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
@@ -1040,8 +1021,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	INIT_LIST_HEAD(&q->root);
-	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_HLIST_HEAD(q->hash + i);
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
@@ -1096,8 +1078,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct tc_htb_opt opt;
 
 	spin_lock_bh(&sch->dev->queue_lock);
-	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
-	tcm->tcm_handle = cl->classid;
+	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
+	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
@@ -1152,7 +1134,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (cl && !cl->level) {
 		if (new == NULL &&
 		    (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-					     cl->classid))
+					     cl->common.classid))
 		    == NULL)
 			return -ENOBUFS;
 		sch_tree_lock(sch);
@@ -1253,14 +1235,16 @@ static void htb_destroy(struct Qdisc *sch)
 	   unbind_filter on it (without Oops). */
 	tcf_destroy_chain(&q->filter_list);
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		hlist_for_each_entry(cl, n, q->hash + i, hlist)
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
-	for (i = 0; i < HTB_HSIZE; i++) {
-		hlist_for_each_entry_safe(cl, n, next, q->hash + i, hlist)
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode)
 			htb_destroy_class(sch, cl);
 	}
+	qdisc_class_hash_destroy(&q->clhash);
 	__skb_queue_purge(&q->direct_queue);
 }
 
@@ -1280,7 +1264,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	if (!cl->level && htb_parent_last_child(cl)) {
 		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-						cl->parent->classid);
+						cl->parent->common.classid);
 		last_child = 1;
 	}
 
@@ -1292,8 +1276,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
 	}
 
-	/* delete from hash, sibling list and active */
-	hlist_del(&cl->hlist);
+	/* delete from hash and active; remainder in destroy_class */
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
 	list_del(&cl->sibling);
 
 	if (cl->prio_activity)
@@ -1390,7 +1374,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1425,7 +1408,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* leaf (we) needs elementary qdisc */
 		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-		cl->classid = classid;
+		cl->common.classid = classid;
 		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
@@ -1436,7 +1419,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
-		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+		qdisc_class_hash_insert(&q->clhash, &cl->common);
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
 	} else {
@@ -1454,13 +1437,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
 			printk(KERN_WARNING
 			       "HTB: quantum of class %X is small. Consider r2q change.\n",
-			       cl->classid);
+			       cl->common.classid);
 			cl->un.leaf.quantum = 1000;
 		}
 		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
 			printk(KERN_WARNING
 			       "HTB: quantum of class %X is big. Consider r2q change.\n",
-			       cl->classid);
+			       cl->common.classid);
 			cl->un.leaf.quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1483,6 +1466,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	cl->ceil = ctab;
 	sch_tree_unlock(sch);
 
+	qdisc_class_hash_grow(sch, &q->clhash);
+
 	*arg = (unsigned long)cl;
 	return 0;
 
@@ -1539,16 +1524,15 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	int i;
+	struct htb_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
 
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
-
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
-- 
cgit v1.2.3-70-g09d2


From 4207759939c208f24d7e09596753e67b6bc188f9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:22:53 -0700
Subject: net-sched: sch_htb: remove child and sibling lists

Now that the qdisc isn't destroyed in hierarchical order anymore,
the only user of the child lists left is htb_parent_last_child().
This can be easily changed to use a counter of children to save
a few bytes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index e7461a17d71..128a5ab2e37 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -81,9 +81,8 @@ struct htb_class {
 
 	/* topology */
 	int level;		/* our level (see above) */
+	unsigned int children;
 	struct htb_class *parent;	/* parent class */
-	struct list_head sibling;	/* sibling list item */
-	struct list_head children;	/* children list */
 
 	union {
 		struct htb_class_leaf {
@@ -138,7 +137,6 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 }
 
 struct htb_sched {
-	struct list_head root;	/* root classes list */
 	struct Qdisc_class_hash clhash;
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
@@ -1020,7 +1018,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 		return -EINVAL;
 	}
 
-	INIT_LIST_HEAD(&q->root);
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
@@ -1175,12 +1172,9 @@ static inline int htb_parent_last_child(struct htb_class *cl)
 	if (!cl->parent)
 		/* the root class */
 		return 0;
-
-	if (!(cl->parent->children.next == &cl->sibling &&
-		cl->parent->children.prev == &cl->sibling))
+	if (cl->parent->children > 1)
 		/* not the last child */
 		return 0;
-
 	return 1;
 }
 
@@ -1259,7 +1253,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	// TODO: why don't allow to delete subtree ? references ? does
 	// tc subsys quarantee us that in htb_destroy it holds no class
 	// refs so that we can remove children safely there ?
-	if (!list_empty(&cl->children) || cl->filter_cnt)
+	if (cl->children || cl->filter_cnt)
 		return -EBUSY;
 
 	if (!cl->level && htb_parent_last_child(cl)) {
@@ -1278,7 +1272,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	/* delete from hash and active; remainder in destroy_class */
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
-	list_del(&cl->sibling);
+	cl->parent->children--;
 
 	if (cl->prio_activity)
 		htb_deactivate(q, cl);
@@ -1373,8 +1367,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 				  &sch->dev->queue_lock,
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
-		INIT_LIST_HEAD(&cl->sibling);
-		INIT_LIST_HEAD(&cl->children);
+		cl->children = 0;
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
 
@@ -1420,8 +1413,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 		/* attach to the hash list and parent's family */
 		qdisc_class_hash_insert(&q->clhash, &cl->common);
-		list_add_tail(&cl->sibling,
-			      parent ? &parent->children : &q->root);
+		if (parent)
+			parent->children++;
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-- 
cgit v1.2.3-70-g09d2


From aee18a8cf28808b7302ef698d77fa73883e60f1b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:23:27 -0700
Subject: net-sched: sch_htb: remove write-only qdisc filter_cnt

The filter_cnt is supposed to count filter references to a class.
Since the qdisc can't be the target of a filter, it doesn't need
a filter_cnt. In fact the counter is never decreased since cls_api
considers a return value of zero a failure and doesn't unbind again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 128a5ab2e37..ee8b4ffe110 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -159,7 +159,6 @@ struct htb_sched {
 
 	/* filters for qdisc itself */
 	struct tcf_proto *filter_list;
-	int filter_cnt;
 
 	int rate2quantum;	/* quant = rate / rate2quantum */
 	psched_time_t now;	/* cached dequeue time */
@@ -1484,7 +1483,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
 				     u32 classid)
 {
-	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_find(classid, sch);
 
 	/*if (cl && !cl->level) return 0;
@@ -1498,20 +1496,15 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
 	 */
 	if (cl)
 		cl->filter_cnt++;
-	else
-		q->filter_cnt++;
 	return (unsigned long)cl;
 }
 
 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (cl)
 		cl->filter_cnt--;
-	else
-		q->filter_cnt--;
 }
 
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-- 
cgit v1.2.3-70-g09d2


From 7f2d38eb7a42bea1c1df51bbdaa2ca0f0bdda07f Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Date: Sat, 5 Jul 2008 23:38:43 -0700
Subject: can: add sanity checks

Even though the CAN netlayer only deals with CAN netdevices, the
netlayer interface to the userspace and to the device layer should
perform some sanity checks.

This patch adds several sanity checks that mainly prevent userspace apps
to send broken content into the system that may be misinterpreted by
some other userspace application.

Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Acked-by: Andre Naujoks <nautsch@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 10 ++++++++++
 net/can/bcm.c    | 23 +++++++++++++++++++----
 net/can/raw.c    |  3 +++
 3 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7e8ca283645..484bbf6dd03 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
  *  -ENOBUFS on full driver queue (see net_xmit_errno())
  *  -ENOMEM when local loopback failed at calling skb_clone()
  *  -EPERM when trying to send on a non-CAN interface
+ *  -EINVAL when the skb->data does not contain a valid CAN frame
  */
 int can_send(struct sk_buff *skb, int loop)
 {
 	struct sk_buff *newskb = NULL;
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	int err;
 
+	if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
 	if (skb->dev->type != ARPHRD_CAN) {
 		kfree_skb(skb);
 		return -EPERM;
@@ -605,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct dev_rcv_lists *d;
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	int matches;
 
 	if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) {
@@ -612,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
+	BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8);
+
 	/* update statistics */
 	can_stats.rx_frames++;
 	can_stats.rx_frames_delta++;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d9a3a9d13be..72c2ce904f8 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 
 	if (head->nframes) {
 		/* can_frames starting here */
-		firstframe = (struct can_frame *) skb_tail_pointer(skb);
+		firstframe = (struct can_frame *)skb_tail_pointer(skb);
 
 		memcpy(skb_put(skb, datalen), frames, datalen);
 
@@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		for (i = 0; i < msg_head->nframes; i++) {
 			err = memcpy_fromiovec((u8 *)&op->frames[i],
 					       msg->msg_iov, CFSIZ);
+
+			if (op->frames[i].can_dlc > 8)
+				err = -EINVAL;
+
 			if (err < 0)
 				return err;
 
@@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		for (i = 0; i < msg_head->nframes; i++) {
 			err = memcpy_fromiovec((u8 *)&op->frames[i],
 					       msg->msg_iov, CFSIZ);
+
+			if (op->frames[i].can_dlc > 8)
+				err = -EINVAL;
+
 			if (err < 0) {
 				if (op->frames != &op->sframe)
 					kfree(op->frames);
@@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 
 	skb->dev = dev;
 	skb->sk  = sk;
-	can_send(skb, 1); /* send with loopback */
+	err = can_send(skb, 1); /* send with loopback */
 	dev_put(dev);
 
+	if (err)
+		return err;
+
 	return CFSIZ + MHSIZ;
 }
 
@@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!bo->bound)
 		return -ENOTCONN;
 
+	/* check for valid message length from userspace */
+	if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+		return -EINVAL;
+
 	/* check for alternative ifindex for this bcm_op */
 
 	if (!ifindex && msg->msg_name) {
@@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 		break;
 
 	case TX_SEND:
-		/* we need at least one can_frame */
-		if (msg_head.nframes < 1)
+		/* we need exactly one can_frame behind the msg head */
+		if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
 			ret = -EINVAL;
 		else
 			ret = bcm_tx_send(msg, ifindex, sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index 69877b8e7e9..3e46ee36a1a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	} else
 		ifindex = ro->ifindex;
 
+	if (size != sizeof(struct can_frame))
+		return -EINVAL;
+
 	dev = dev_get_by_index(&init_net, ifindex);
 	if (!dev)
 		return -ENXIO;
-- 
cgit v1.2.3-70-g09d2


From fb0305ce1b03f6ff17f84f2c63daccecb45f2805 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Jul 2008 23:40:21 -0700
Subject: net-sched: consolidate default fifo qdisc setup

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  4 ++++
 net/sched/sch_fifo.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_netem.c   | 24 +-----------------------
 net/sched/sch_red.c     | 33 +++------------------------------
 net/sched/sch_tbf.c     | 33 ++++-----------------------------
 5 files changed, 54 insertions(+), 82 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 46fb4d80c74..8e3a0c4e9d9 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -72,6 +72,10 @@ extern void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
 
+extern int fifo_set_limit(struct Qdisc *q, unsigned int limit);
+extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
+				      unsigned int limit);
+
 extern int register_qdisc(struct Qdisc_ops *qops);
 extern int unregister_qdisc(struct Qdisc_ops *qops);
 extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 95ed4822165..82d7d7bbbb1 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -107,3 +107,45 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 EXPORT_SYMBOL(bfifo_qdisc_ops);
+
+/* Pass size change message down to embedded FIFO */
+int fifo_set_limit(struct Qdisc *q, unsigned int limit)
+{
+	struct nlattr *nla;
+	int ret = -ENOMEM;
+
+	/* Hack to avoid sending change message to non-FIFO */
+	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
+		return 0;
+
+	nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+	if (nla) {
+		nla->nla_type = RTM_NEWQDISC;
+		nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+		((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
+
+		ret = q->ops->change(q, nla);
+		kfree(nla);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(fifo_set_limit);
+
+struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
+			       unsigned int limit)
+{
+	struct Qdisc *q;
+	int err = -ENOMEM;
+
+	q = qdisc_create_dflt(sch->dev, ops, TC_H_MAKE(sch->handle, 1));
+	if (q) {
+		err = fifo_set_limit(q, limit);
+		if (err < 0) {
+			qdisc_destroy(q);
+			q = NULL;
+		}
+	}
+
+	return q ? : ERR_PTR(err);
+}
+EXPORT_SYMBOL(fifo_create_dflt);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c9c649b26ea..24697667247 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -310,28 +310,6 @@ static void netem_reset(struct Qdisc *sch)
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
-/* Pass size change message down to embedded FIFO */
-static int set_fifo_limit(struct Qdisc *q, int limit)
-{
-	struct nlattr *nla;
-	int ret = -ENOMEM;
-
-	/* Hack to avoid sending change message to non-FIFO */
-	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
-		return 0;
-
-	nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
-	if (nla) {
-		nla->nla_type = RTM_NEWQDISC;
-		nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
-		((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
-		ret = q->ops->change(q, nla);
-		kfree(nla);
-	}
-	return ret;
-}
-
 /*
  * Distribution data is a variable size payload containing
  * signed 16 bit values.
@@ -416,7 +394,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ret < 0)
 		return ret;
 
-	ret = set_fifo_limit(q->qdisc, qopt->limit);
+	ret = fifo_set_limit(q->qdisc, qopt->limit);
 	if (ret) {
 		pr_debug("netem: can't set fifo limit\n");
 		return ret;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5c569853b9c..77098acf0ad 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -174,33 +174,6 @@ static void red_destroy(struct Qdisc *sch)
 	qdisc_destroy(q->qdisc);
 }
 
-static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
-{
-	struct Qdisc *q;
-	struct nlattr *nla;
-	int ret;
-
-	q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
-			      TC_H_MAKE(sch->handle, 1));
-	if (q) {
-		nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
-			      GFP_KERNEL);
-		if (nla) {
-			nla->nla_type = RTM_NEWQDISC;
-			nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
-			((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
-			ret = q->ops->change(q, nla);
-			kfree(nla);
-
-			if (ret == 0)
-				return q;
-		}
-		qdisc_destroy(q);
-	}
-	return NULL;
-}
-
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
@@ -228,9 +201,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	ctl = nla_data(tb[TCA_RED_PARMS]);
 
 	if (ctl->limit > 0) {
-		child = red_create_dflt(sch, ctl->limit);
-		if (child == NULL)
-			return -ENOMEM;
+		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	sch_tree_lock(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 0b7d78f59d8..444c227fcb6 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -242,34 +242,6 @@ static void tbf_reset(struct Qdisc* sch)
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
-static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
-{
-	struct Qdisc *q;
-	struct nlattr *nla;
-	int ret;
-
-	q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
-			      TC_H_MAKE(sch->handle, 1));
-	if (q) {
-		nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
-			      GFP_KERNEL);
-		if (nla) {
-			nla->nla_type = RTM_NEWQDISC;
-			nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
-			((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
-			ret = q->ops->change(q, nla);
-			kfree(nla);
-
-			if (ret == 0)
-				return q;
-		}
-		qdisc_destroy(q);
-	}
-
-	return NULL;
-}
-
 static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
 	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
 	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
@@ -322,8 +294,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 		goto done;
 
 	if (qopt->limit > 0) {
-		if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL)
+		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+		if (IS_ERR(child)) {
+			err = PTR_ERR(child);
 			goto done;
+		}
 	}
 
 	sch_tree_lock(sch);
-- 
cgit v1.2.3-70-g09d2


From 4b5a698ef423eebc37cfacc6d3376d6dffd5bf83 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 6 Jul 2008 15:49:08 -0700
Subject: net: fix dev_set_promiscuity() breakage

Commit dad9b335 (netdevice: Fix promiscuity and allmulti overflow) broke
dev_set_promiscuity() by returning on success without reprogramming the
device.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index bfa9a6a951d..75933932463 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2859,7 +2859,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
 	int err;
 
 	err = __dev_set_promiscuity(dev, inc);
-	if (!err)
+	if (err < 0)
 		return err;
 	if (dev->flags != old_flags)
 		dev_set_rx_mode(dev);
-- 
cgit v1.2.3-70-g09d2


From 8db9369ff92b1cd93566baadd8bd2992f025fdd0 Mon Sep 17 00:00:00 2001
From: Guy Cohen <guy.cohen@intel.com>
Date: Thu, 3 Jul 2008 19:56:13 +0300
Subject: mac80211: move netif_carrier_on to after
 ieee80211_bss_info_change_notify

Putting netif_carrier_on before configuring the driver/device with the
new association state may cause a race (tx frames may be sent before
configuration is done)

Signed-off-by: Guy Cohen <guy.cohen@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4d2b582dd05..a7018540ae8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -547,15 +547,14 @@ static void ieee80211_set_associated(struct net_device *dev,
 			sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
 		}
 
-		netif_carrier_on(dev);
 		ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
 		memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
 		memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
 		ieee80211_sta_send_associnfo(dev, ifsta);
 	} else {
+		netif_carrier_off(dev);
 		ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid);
 		ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
-		netif_carrier_off(dev);
 		ieee80211_reset_erp_info(dev);
 
 		sdata->bss_conf.assoc_ht = 0;
@@ -569,6 +568,10 @@ static void ieee80211_set_associated(struct net_device *dev,
 
 	sdata->bss_conf.assoc = assoc;
 	ieee80211_bss_info_change_notify(sdata, changed);
+
+	if (assoc)
+		netif_carrier_on(dev);
+
 	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 }
-- 
cgit v1.2.3-70-g09d2


From ea0c925370b33baf168bb33782c613468c1aa119 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Thu, 3 Jul 2008 19:02:44 +0200
Subject: mac80211: Only flush workqueue when last interface was removed

Currently the ieee80211_hw->workqueue is flushed each time
an interface is being removed. However most scheduled work
is not interface specific but device specific, for example things like
periodic work for link tuners.

This patch will move the flush_workqueue() call to directly behind
the call to ops->stop() to make sure the workqueue is only flushed
when all interfaces are gone and there really shouldn't be any scheduled
work in the drivers left.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 98c0b5e56ec..df0836ff1a2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -530,8 +530,6 @@ static int ieee80211_stop(struct net_device *dev)
 				local->sta_hw_scanning = 0;
 		}
 
-		flush_workqueue(local->hw.workqueue);
-
 		sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
 		kfree(sdata->u.sta.extra_ie);
 		sdata->u.sta.extra_ie = NULL;
@@ -555,6 +553,8 @@ static int ieee80211_stop(struct net_device *dev)
 
 		ieee80211_led_radio(local, 0);
 
+		flush_workqueue(local->hw.workqueue);
+
 		tasklet_disable(&local->tx_pending_tasklet);
 		tasklet_disable(&local->tasklet);
 	}
-- 
cgit v1.2.3-70-g09d2


From 6e43829bb69bf1d584a592075f1357590eb49b1a Mon Sep 17 00:00:00 2001
From: Vladimir Koutny <vlado@work.ksp.sk>
Date: Mon, 7 Jul 2008 14:23:01 +0200
Subject: mac80211: don't report selected IBSS when not found

Don't report a 'selected' IBSS in sta_find_ibss when none was found.

Signed-off-by: Vladimir Koutny <vlado@ksp.sk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a7018540ae8..b404537c0bc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3614,8 +3614,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 	spin_unlock_bh(&local->sta_bss_lock);
 
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
-	printk(KERN_DEBUG "   sta_find_ibss: selected %s current "
-	       "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid));
+	if (found)
+		printk(KERN_DEBUG "   sta_find_ibss: selected %s current "
+		       "%s\n", print_mac(mac, bssid),
+		       print_mac(mac2, ifsta->bssid));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
 	    (bss = ieee80211_rx_bss_get(dev, bssid,
-- 
cgit v1.2.3-70-g09d2


From 3888e9efc9bf05e60504d2a420be7a527ff43678 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 8 Jul 2008 02:28:39 -0700
Subject: sctp: Mark the tsn as received after all allocations finish

If we don't have the buffer space or memory allocations fail,
the data chunk is dropped, but TSN is still reported as received.
This introduced a data loss that can't be recovered.  We should
only mark TSNs are received after memory allocations finish.
The one exception is the invalid stream identifier, but that's
due to user error and is reported back to the user.

This was noticed by Michael Tuexen.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 9 +++------
 net/sctp/ulpevent.c     | 5 +++++
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 0c9d5a6950f..fcdb45d1071 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5899,12 +5899,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		return SCTP_IERROR_NO_DATA;
 	}
 
-	/* If definately accepting the DATA chunk, record its TSN, otherwise
-	 * wait for renege processing.
-	 */
-	if (SCTP_CMD_CHUNK_ULP == deliver)
-		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
-
 	chunk->data_accepted = 1;
 
 	/* Note: Some chunks may get overcounted (if we drop) or overcounted
@@ -5924,6 +5918,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * and discard the DATA chunk.
 	 */
 	if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) {
+		/* Mark tsn as received even though we drop it */
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
+
 		err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM,
 					 &data_hdr->stream,
 					 sizeof(data_hdr->stream));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index ce6cda6b699..a1f654aea26 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	if (!skb)
 		goto fail;
 
+	/* Now that all memory allocations for this chunk succeeded, we
+	 * can mark it as received so the tsn_map is updated correctly.
+	 */
+	sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn));
+
 	/* First calculate the padding, so we don't inadvertently
 	 * pass up the wrong length to the user.
 	 *
-- 
cgit v1.2.3-70-g09d2


From d2789312cc6d875462d1d248e07a8a9caf8a6ae3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@parallels.com>
Date: Tue, 8 Jul 2008 02:34:52 -0700
Subject: netfilter: use correct namespace in ip6table_security

Signed-off-by: Alexey Dobriyan <adobriyan@parallels.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6table_security.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 063a3d9c3c6..a07abee3049 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -72,7 +72,7 @@ ip6t_local_in_hook(unsigned int hook,
 		   int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     init_net.ipv6.ip6table_security);
+			     nf_local_in_net(in, out)->ipv6.ip6table_security);
 }
 
 static unsigned int
@@ -83,7 +83,7 @@ ip6t_forward_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     init_net.ipv6.ip6table_security);
+			     nf_forward_net(in, out)->ipv6.ip6table_security);
 }
 
 static unsigned int
@@ -95,7 +95,7 @@ ip6t_local_out_hook(unsigned int hook,
 {
 	/* TBD: handle short packets via raw socket */
 	return ip6t_do_table(skb, hook, in, out,
-			     init_net.ipv6.ip6table_security);
+			     nf_local_out_net(in, out)->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-- 
cgit v1.2.3-70-g09d2


From b11c16beb92112885edccc79e17d39c5d218f441 Mon Sep 17 00:00:00 2001
From: Russ Dill <Russ.Dill@gmail.com>
Date: Tue, 8 Jul 2008 02:35:27 -0700
Subject: netfilter: Get rid of refrences to no longer existant Fast NAT.

Get rid of refrences to no longer existant Fast NAT.

IP_ROUTE_NAT support was removed in August of 2004, but references to Fast
NAT were left in a couple of config options.

Signed-off-by: Russ Dill <Russ.Dill@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 3 +--
 net/netfilter/Kconfig      | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 6e251402506..f23e60c93ef 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP
 	help
 	  NETMAP is an implementation of static 1:1 NAT mapping of network
 	  addresses. It maps the network address part, while keeping the host
-	  address part intact. It is similar to Fast NAT, except that
-	  Netfilter's connection tracking doesn't work well with Fast NAT.
+	  address part intact.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index aa8d80c35e2..316c7af1d2b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -33,9 +33,8 @@ config NF_CONNTRACK
 	  into connections.
 
 	  This is required to do Masquerading or other kinds of Network
-	  Address Translation (except for Fast NAT).  It can also be used to
-	  enhance packet filtering (see `Connection state match support'
-	  below).
+	  Address Translation.  It can also be used to enhance packet
+	  filtering (see `Connection state match support' below).
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-- 
cgit v1.2.3-70-g09d2


From b891c5a831b13f74989dcbd7b39d04537b2a05d9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 8 Jul 2008 02:35:55 -0700
Subject: netfilter: nf_conntrack: add allocation flag to nf_conntrack_alloc

ctnetlink does not need to allocate the conntrack entries with GFP_ATOMIC
as its code is executed in user context.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack.h | 3 ++-
 net/netfilter/nf_conntrack_core.c    | 7 ++++---
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d77dec768dc..d5d76ec7abb 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -258,7 +258,8 @@ nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
 extern void nf_conntrack_free(struct nf_conn *ct);
 extern struct nf_conn *
 nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
-		   const struct nf_conntrack_tuple *repl);
+		   const struct nf_conntrack_tuple *repl,
+		   gfp_t gfp);
 
 /* It's confirmed if it is, or has been in the hash table. */
 static inline int nf_ct_is_confirmed(struct nf_conn *ct)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f27c99246a4..212a0888408 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -464,7 +464,8 @@ static noinline int early_drop(unsigned int hash)
 }
 
 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
-				   const struct nf_conntrack_tuple *repl)
+				   const struct nf_conntrack_tuple *repl,
+				   gfp_t gfp)
 {
 	struct nf_conn *ct = NULL;
 
@@ -489,7 +490,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		}
 	}
 
-	ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
+	ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&nf_conntrack_count);
@@ -542,7 +543,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(tuple, &repl_tuple);
+	ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC);
 	if (ct == NULL || IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 63c4e1f299b..dd233393f69 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1128,7 +1128,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	struct nf_conn_help *help;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(otuple, rtuple);
+	ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;
 
-- 
cgit v1.2.3-70-g09d2


From 43de9dfeaa30f7ed801dc1c38bdb63b1738bddcc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 8 Jul 2008 02:36:18 -0700
Subject: netfilter: ip6table_filter in netns for real

One still needs to remove checks in nf_hook_slow() and nf_sockopt_find()
to test this, though.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6table_filter.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index f979e48b469..55a2c290bad 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,13 +61,25 @@ static struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_hook(unsigned int hook,
-	 struct sk_buff *skb,
-	 const struct net_device *in,
-	 const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
+ip6t_local_in_hook(unsigned int hook,
+		   struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(skb, hook, in, out,
+			     nf_local_in_net(in, out)->ipv6.ip6table_filter);
+}
+
+static unsigned int
+ip6t_forward_hook(unsigned int hook,
+		  struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
+	return ip6t_do_table(skb, hook, in, out,
+			     nf_forward_net(in, out)->ipv6.ip6table_filter);
 }
 
 static unsigned int
@@ -87,19 +99,20 @@ ip6t_local_out_hook(unsigned int hook,
 	}
 #endif
 
-	return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
+	return ip6t_do_table(skb, hook, in, out,
+			     nf_local_out_net(in, out)->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-		.hook		= ip6t_hook,
+		.hook		= ip6t_local_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_FILTER,
 	},
 	{
-		.hook		= ip6t_hook,
+		.hook		= ip6t_forward_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_FORWARD,
-- 
cgit v1.2.3-70-g09d2


From 58de7862e61cb71251a25314d1b3d7260af1448a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 02:37:07 -0700
Subject: netfilter: ebt_nflog: fix Kconfig typo

The help text should refer to nflog instead of ulog. Noticed by
Krzysztof Halasa <khc@pm.waw.pl>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 540df4106be..90947979499 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -230,7 +230,7 @@ config BRIDGE_EBT_NFLOG
 	  either the old LOG target, the old ULOG target or nfnetlink_log
 	  as backend.
 
-	  This option adds the ulog watcher, that you can use in any rule
+	  This option adds the nflog watcher, that you can use in any rule
 	  in any ebtables table.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
-- 
cgit v1.2.3-70-g09d2


From 4ad3f26162ece5aca3045fd45e15dd99acea4a0e Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Tue, 8 Jul 2008 02:38:56 -0700
Subject: netfilter: fix string extension for case insensitive pattern matching

The flag XT_STRING_FLAG_IGNORECASE indicates case insensitive string
matching. netfilter can find cmd.exe, Cmd.exe, cMd.exe and etc easily.

A new revision 1 was added, in the meantime invert of xt_string_info
was moved into flags as a flag. If revision is 1, The flag
XT_STRING_FLAG_INVERT indicates invert matching.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_string.h | 15 ++++++++++++++-
 net/netfilter/xt_string.c           | 38 +++++++++++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/xt_string.h b/include/linux/netfilter/xt_string.h
index bb21dd1aee2..8a6ba7bbef9 100644
--- a/include/linux/netfilter/xt_string.h
+++ b/include/linux/netfilter/xt_string.h
@@ -4,6 +4,11 @@
 #define XT_STRING_MAX_PATTERN_SIZE 128
 #define XT_STRING_MAX_ALGO_NAME_SIZE 16
 
+enum {
+	XT_STRING_FLAG_INVERT		= 0x01,
+	XT_STRING_FLAG_IGNORECASE	= 0x02
+};
+
 struct xt_string_info
 {
 	u_int16_t from_offset;
@@ -11,7 +16,15 @@ struct xt_string_info
 	char	  algo[XT_STRING_MAX_ALGO_NAME_SIZE];
 	char 	  pattern[XT_STRING_MAX_PATTERN_SIZE];
 	u_int8_t  patlen;
-	u_int8_t  invert;
+	union {
+		struct {
+			u_int8_t  invert;
+		} v0;
+
+		struct {
+			u_int8_t  flags;
+		} v1;
+	} u;
 
 	/* Used internally by the kernel */
 	struct ts_config __attribute__((aligned(8))) *config;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 72f694d947f..4903182a062 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -29,12 +29,16 @@ string_mt(const struct sk_buff *skb, const struct net_device *in,
 {
 	const struct xt_string_info *conf = matchinfo;
 	struct ts_state state;
+	int invert;
 
 	memset(&state, 0, sizeof(struct ts_state));
 
+	invert = (match->revision == 0 ? conf->u.v0.invert :
+				    conf->u.v1.flags & XT_STRING_FLAG_INVERT);
+
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
 			     conf->to_offset, conf->config, &state)
-			     != UINT_MAX) ^ conf->invert;
+			     != UINT_MAX) ^ invert;
 }
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
@@ -46,6 +50,7 @@ string_mt_check(const char *tablename, const void *ip,
 {
 	struct xt_string_info *conf = matchinfo;
 	struct ts_config *ts_conf;
+	int flags = TS_AUTOLOAD;
 
 	/* Damn, can't handle this case properly with iptables... */
 	if (conf->from_offset > conf->to_offset)
@@ -54,8 +59,15 @@ string_mt_check(const char *tablename, const void *ip,
 		return false;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
 		return false;
+	if (match->revision == 1) {
+		if (conf->u.v1.flags &
+		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
+			return false;
+		if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
+			flags |= TS_IGNORECASE;
+	}
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
-				     GFP_KERNEL, TS_AUTOLOAD);
+				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
 		return false;
 
@@ -72,6 +84,17 @@ static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
 static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
+		.revision	= 0,
+		.family		= AF_INET,
+		.checkentry	= string_mt_check,
+		.match 		= string_mt,
+		.destroy 	= string_mt_destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
+	{
+		.name 		= "string",
+		.revision	= 1,
 		.family		= AF_INET,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
@@ -81,6 +104,17 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	},
 	{
 		.name 		= "string",
+		.revision	= 0,
+		.family		= AF_INET6,
+		.checkentry	= string_mt_check,
+		.match 		= string_mt,
+		.destroy 	= string_mt_destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
+	{
+		.name 		= "string",
+		.revision	= 1,
 		.family		= AF_INET6,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
-- 
cgit v1.2.3-70-g09d2


From 81c684d12ddc05bba4953e36e9cdd5939dde344b Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 8 Jul 2008 03:05:28 -0700
Subject: ipv4: remove flush_mutex from ipv4_sysctl_rtcache_flush

It is possible to avoid locking at all in ipv4_sysctl_rtcache_flush by
defining local ctl_table on the stack.

The patch is based on the suggestion from Eric W. Biederman.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 113cd2512ba..79c1e74263a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2873,22 +2873,20 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 }
 
 #ifdef CONFIG_SYSCTL
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					struct file *filp, void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
 		int flush_delay;
+		ctl_table ctl;
 		struct net *net;
-		static DEFINE_MUTEX(flush_mutex);
 
-		mutex_lock(&flush_mutex);
-		ctl->data = &flush_delay;
-		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-		ctl->data = NULL;
-		mutex_unlock(&flush_mutex);
+		memcpy(&ctl, __ctl, sizeof(ctl));
+		ctl.data = &flush_delay;
+		proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
 
-		net = (struct net *)ctl->extra1;
+		net = (struct net *)__ctl->extra1;
 		rt_cache_flush(net, flush_delay);
 		return 0;
 	}
-- 
cgit v1.2.3-70-g09d2


From 07035fc1bbf931a06e47583cddd2cea2907ac0db Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 8 Jul 2008 03:07:43 -0700
Subject: irda: Fix netlink error path return value

Fix an incorrect return value check of genlmsg_put() in irda_nl_get_mode().
genlmsg_put() does not use ERR_PTR() to encode return values, it just
returns NULL on error.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 9e1fb82e322..2f05ec1037a 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
 
 	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
 			  &irda_nl_family, 0,  IRDA_NL_CMD_GET_MODE);
-	if (IS_ERR(hdr)) {
-		ret = PTR_ERR(hdr);
+	if (hdr == NULL) {
+		ret = -EMSGSIZE;
 		goto err_out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From acc81e1465d29e0284008770cc4b8bc90bd93bd7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 03:21:27 -0700
Subject: vlan: fix network_header/mac_header adjustments

Lennert Buytenhek points out that the VLAN code incorrectly adjusts
skb->network_header to point in the middle of the VLAN header and
additionally tries to adjust skb->mac_header without checking for
validity.

The network_header should not be touched at all since we're only
adding headers in front of it, mac_header adjustments are not
necessary at all.

Based on patch by Lennert Buytenhek <buytenh@wantstofly.org>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 --
 net/8021q/vlan_dev.c    | 1 -
 2 files changed, 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 5190452ac7d..8f5bf9b676a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -279,8 +279,6 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short
 	veth->h_vlan_TCI = htons(tag);
 
 	skb->protocol = htons(ETH_P_8021Q);
-	skb->mac_header -= VLAN_HLEN;
-	skb->network_header -= VLAN_HLEN;
 
 	return skb;
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a0617bf7cec..4a8525927c2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -308,7 +308,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
 		skb->protocol = htons(ETH_P_8021Q);
-		skb_reset_network_header(skb);
 	}
 
 	/* Before delegating work to the lower layer, enter our MAC-address */
-- 
cgit v1.2.3-70-g09d2


From 26a25239d7a660cc7162e2463b48b40d544364d0 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwpark81@gmail.com>
Date: Tue, 8 Jul 2008 03:22:16 -0700
Subject: vlan: Use is_vlan_dev()

Use simplified is_vlan_dev function.

Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 3 +--
 net/8021q/vlan_dev.c | 2 +-
 net/8021q/vlanproc.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b529110c935..8141e2dc510 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -570,8 +570,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 			goto out;
 
 		err = -EINVAL;
-		if (args.cmd != ADD_VLAN_CMD &&
-		    !(dev->priv_flags & IFF_802_1Q_VLAN))
+		if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev))
 			goto out;
 	}
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4a8525927c2..88f318a9601 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -694,7 +694,7 @@ static int vlan_dev_init(struct net_device *dev)
 		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
 	}
 
-	if (real_dev->priv_flags & IFF_802_1Q_VLAN)
+	if (is_vlan_dev(real_dev))
 		subclass = 1;
 
 	lockdep_set_class_and_subclass(&dev->_xmit_lock,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 08b54b593d5..6073a888b6f 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -290,7 +290,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	static const char fmt[] = "%30s %12lu\n";
 	int i;
 
-	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN))
+	if (!is_vlan_dev(vlandev))
 		return 0;
 
 	seq_printf(seq,
-- 
cgit v1.2.3-70-g09d2


From 75b8846acd11ad3fc736d4df3413fe946bbf367c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 03:22:42 -0700
Subject: vlan: Add ethtool support

Add ethtool support for querying the device for offload settings.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 88f318a9601..722697d31e4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -28,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ethtool.h>
 #include <net/datalink.h>
 #include <net/p8022.h>
 #include <net/arp.h>
@@ -716,6 +717,22 @@ static void vlan_dev_uninit(struct net_device *dev)
 	}
 }
 
+static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
+{
+	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct net_device *real_dev = vlan->real_dev;
+
+	if (real_dev->ethtool_ops == NULL ||
+	    real_dev->ethtool_ops->get_rx_csum == NULL)
+		return 0;
+	return real_dev->ethtool_ops->get_rx_csum(real_dev);
+}
+
+static const struct ethtool_ops vlan_ethtool_ops = {
+	.get_link		= ethtool_op_get_link,
+	.get_rx_csum		= vlan_ethtool_get_rx_csum,
+};
+
 void vlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -734,6 +751,7 @@ void vlan_setup(struct net_device *dev)
 	dev->change_rx_flags	= vlan_dev_change_rx_flags;
 	dev->do_ioctl		= vlan_dev_ioctl;
 	dev->destructor		= free_netdev;
+	dev->ethtool_ops	= &vlan_ethtool_ops;
 
 	memset(dev->broadcast, 0, ETH_ALEN);
 }
-- 
cgit v1.2.3-70-g09d2


From 7750f403cbe56971336d575b354365190b4e3227 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 03:23:36 -0700
Subject: vlan: uninline __vlan_hwaccel_rx

The function is huge and included at least once in every VLAN acceleration
capable driver. Uninline it; to avoid having drivers depend on the VLAN
module, the function is always built in statically when VLAN is enabled.

With all VLAN acceleration capable drivers that build on x86_64 enabled,
this results in:

   text    data     bss     dec     hex filename
6515227  854044  343968 7713239  75b1d7 vmlinux.inlined
6505637  854044  343968 7703649  758c61 vmlinux.uninlined
----------------------------------------------------------
  -9590

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 64 +++++++------------------------------------------
 net/8021q/Makefile      |  9 +++----
 net/8021q/vlan.h        |  8 +++++++
 net/8021q/vlan_core.c   | 48 +++++++++++++++++++++++++++++++++++++
 net/Makefile            |  4 +++-
 5 files changed, 72 insertions(+), 61 deletions(-)
 create mode 100644 net/8021q/vlan_core.c

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8f5bf9b676a..594cd35b007 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -150,15 +150,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
-/* inline functions */
-static inline __u32 vlan_get_ingress_priority(struct net_device *dev,
-					      unsigned short vlan_tag)
-{
-	struct vlan_dev_info *vip = vlan_dev_info(dev);
-
-	return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
-}
-
 /* VLAN tx hw acceleration helpers. */
 struct vlan_skb_tx_cookie {
 	u32	magic;
@@ -171,56 +162,17 @@ struct vlan_skb_tx_cookie {
 	(VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC)
 #define vlan_tx_tag_get(__skb)	(VLAN_TX_SKB_CB(__skb)->vlan_tag)
 
-/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
-static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
-				    struct vlan_group *grp,
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+			     unsigned short vlan_tag, int polling);
+#else
+static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 				    unsigned short vlan_tag, int polling)
 {
-	struct net_device_stats *stats;
-
-	if (skb_bond_should_drop(skb)) {
-		dev_kfree_skb_any(skb);
-		return NET_RX_DROP;
-	}
-
-	skb->dev = vlan_group_get_device(grp, vlan_tag & VLAN_VID_MASK);
-	if (skb->dev == NULL) {
-		dev_kfree_skb_any(skb);
-
-		/* Not NET_RX_DROP, this is not being dropped
-		 * due to congestion.
-		 */
-		return 0;
-	}
-
-	skb->dev->last_rx = jiffies;
-
-	stats = &skb->dev->stats;
-	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
-
-	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag);
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST:
-		break;
-
-	case PACKET_MULTICAST:
-		stats->multicast++;
-		break;
-
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the underlying
-		 * device, and still route correctly.
-		 */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-				       	skb->dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	};
-
-	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+	BUG();
+	return NET_XMIT_SUCCESS;
 }
+#endif
 
 static inline int vlan_hwaccel_rx(struct sk_buff *skb,
 				  struct vlan_group *grp,
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 3006e9ed7b0..9f4f174ead1 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,9 +1,10 @@
 #
 # Makefile for the Linux VLAN layer.
 #
+obj-$(subst m,y,$(CONFIG_VLAN_8021Q))	+= vlan_core.o
+obj-$(CONFIG_VLAN_8021Q)		+= 8021q.o
 
-obj-$(CONFIG_VLAN_8021Q) += 8021q.o
+8021q-y					:= vlan.o vlan_dev.o vlan_netlink.o
+8021q-$(CONFIG_VLAN_8021Q_GVRP)		+= vlan_gvrp.o
+8021q-$(CONFIG_PROC_FS)			+= vlanproc.o
 
-8021q-y				:= vlan.o vlan_dev.o vlan_netlink.o
-8021q-$(CONFIG_VLAN_8021Q_GVRP)	+= vlan_gvrp.o
-8021q-$(CONFIG_PROC_FS)		+= vlanproc.o
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 097b2e04c92..7cc1a97c42f 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -37,6 +37,14 @@ void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev);
 
+static inline u32 vlan_get_ingress_priority(struct net_device *dev,
+					    unsigned short vlan_tag)
+{
+	struct vlan_dev_info *vip = vlan_dev_info(dev);
+
+	return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
+}
+
 #ifdef CONFIG_VLAN_8021Q_GVRP
 extern int vlan_gvrp_request_join(const struct net_device *dev);
 extern void vlan_gvrp_request_leave(const struct net_device *dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
new file mode 100644
index 00000000000..85c94edb000
--- /dev/null
+++ b/net/8021q/vlan_core.c
@@ -0,0 +1,48 @@
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include "vlan.h"
+
+/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
+int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+		      unsigned short vlan_tag, int polling)
+{
+	struct net_device_stats *stats;
+
+	if (skb_bond_should_drop(skb)) {
+		dev_kfree_skb_any(skb);
+		return NET_RX_DROP;
+	}
+
+	skb->dev = vlan_group_get_device(grp, vlan_tag & VLAN_VID_MASK);
+	if (skb->dev == NULL) {
+		dev_kfree_skb_any(skb);
+		/* Not NET_RX_DROP, this is not being dropped
+		 * due to congestion. */
+		return NET_RX_SUCCESS;
+	}
+	skb->dev->last_rx = jiffies;
+
+	stats = &skb->dev->stats;
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+
+	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag);
+	switch (skb->pkt_type) {
+	case PACKET_BROADCAST:
+		break;
+	case PACKET_MULTICAST:
+		stats->multicast++;
+		break;
+	case PACKET_OTHERHOST:
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly. */
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					skb->dev->dev_addr))
+			skb->pkt_type = PACKET_HOST;
+		break;
+	};
+	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+}
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
diff --git a/net/Makefile b/net/Makefile
index b7a13643b54..4f43e7f874f 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -42,7 +42,9 @@ obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
-obj-$(CONFIG_VLAN_8021Q)	+= 8021q/
+ifneq ($(CONFIG_VLAN_8021Q),)
+obj-y				+= 8021q/
+endif
 obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-y				+= wireless/
-- 
cgit v1.2.3-70-g09d2


From 22d1ba74bbafa96d3f425cc12714d3fe8675183f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 03:23:57 -0700
Subject: vlan: move struct vlan_dev_info to private header

Hide struct vlan_dev_info from drivers to prevent them from growing
more creative ways to use it. Provide accessors for the two drivers
that currently use it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb3/l2t.c         |  2 +-
 drivers/s390/net/qeth_l3_main.c |  4 +--
 include/linux/if_vlan.h         | 56 +++++++++++------------------------------
 net/8021q/vlan.c                |  4 +--
 net/8021q/vlan.h                | 50 +++++++++++++++++++++++++++++++++++-
 net/8021q/vlan_core.c           | 12 +++++++++
 net/8021q/vlan_dev.c            |  5 ----
 7 files changed, 80 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c
index f510140885a..825e510bd9e 100644
--- a/drivers/net/cxgb3/l2t.c
+++ b/drivers/net/cxgb3/l2t.c
@@ -337,7 +337,7 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh,
 		atomic_set(&e->refcnt, 1);
 		neigh_replace(e, neigh);
 		if (neigh->dev->priv_flags & IFF_802_1Q_VLAN)
-			e->vlan = vlan_dev_info(neigh->dev)->vlan_id;
+			e->vlan = vlan_dev_vlan_id(neigh->dev);
 		else
 			e->vlan = VLAN_NONE;
 		spin_unlock(&e->lock);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 999552c83bb..85be40abdda 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2020,7 +2020,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
 		}
 	}
 
-	if (rc && !(netdev_priv(vlan_dev_info(dev)->real_dev) == (void *)card))
+	if (rc && !(netdev_priv(vlan_dev_real_dev(dev)) == (void *)card))
 		return 0;
 
 	return rc;
@@ -2056,7 +2056,7 @@ static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
 	if (rc == QETH_REAL_CARD)
 		card = netdev_priv(dev);
 	else if (rc == QETH_VLAN_CARD)
-		card = netdev_priv(vlan_dev_info(dev)->real_dev);
+		card = netdev_priv(vlan_dev_real_dev(dev));
 	if (card && card->options.layer2)
 		card = NULL;
 	QETH_DBF_TEXT_(TRACE, 4, "%d", rc);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 594cd35b007..cb2e6b48088 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -109,47 +109,6 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
-struct vlan_priority_tci_mapping {
-	u32 priority;
-	unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
-				  * at provisioning time.
-				  * ((skb->priority << 13) & 0xE000)
-				  */
-	struct vlan_priority_tci_mapping *next;
-};
-
-/* Holds information that makes sense if this device is a VLAN device. */
-struct vlan_dev_info {
-	/** This will be the mapping that correlates skb->priority to
-	 * 3 bits of VLAN QOS tags...
-	 */
-	unsigned int nr_ingress_mappings;
-	u32 ingress_priority_map[8];
-
-	unsigned int nr_egress_mappings;
-	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
-
-	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
-	unsigned short flags;          /* (1 << 0) re_order_header   This option will cause the
-                                        *   VLAN code to move around the ethernet header on
-                                        *   ingress to make the skb look **exactly** like it
-                                        *   came in from an ethernet port.  This destroys some of
-                                        *   the VLAN information in the skb, but it fixes programs
-                                        *   like DHCP that use packet-filtering and don't understand
-                                        *   802.1Q
-                                        */
-	struct net_device *real_dev;    /* the underlying device/interface */
-	unsigned char real_dev_addr[ETH_ALEN];
-	struct proc_dir_entry *dent;    /* Holds the proc data */
-	unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */
-	unsigned long cnt_encap_on_xmit;      /* How many times did we have to encapsulate the skb on TX. */
-};
-
-static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
 /* VLAN tx hw acceleration helpers. */
 struct vlan_skb_tx_cookie {
 	u32	magic;
@@ -163,9 +122,24 @@ struct vlan_skb_tx_cookie {
 #define vlan_tx_tag_get(__skb)	(VLAN_TX_SKB_CB(__skb)->vlan_tag)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
+extern u16 vlan_dev_vlan_id(const struct net_device *dev);
+
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     unsigned short vlan_tag, int polling);
 #else
+static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
+{
+	BUG();
+	return NULL;
+}
+
+static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
+{
+	BUG();
+	return 0;
+}
+
 static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 				    unsigned short vlan_tag, int polling)
 {
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8141e2dc510..7a2625d2f9a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -543,7 +543,6 @@ static struct notifier_block vlan_notifier_block __read_mostly = {
 static int vlan_ioctl_handler(struct net *net, void __user *arg)
 {
 	int err;
-	unsigned short vid = 0;
 	struct vlan_ioctl_args args;
 	struct net_device *dev = NULL;
 
@@ -644,8 +643,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case GET_VLAN_VID_CMD:
 		err = 0;
-		vlan_dev_get_vid(dev, &vid);
-		args.u.VID = vid;
+		args.u.VID = vlan_dev_vlan_id(dev);
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args)))
 		      err = -EFAULT;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 7cc1a97c42f..14c421e033f 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,6 +3,55 @@
 
 #include <linux/if_vlan.h>
 
+
+/**
+ *	struct vlan_priority_tci_mapping - vlan egress priority mappings
+ *	@priority: skb priority
+ *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
+ *	@next: pointer to next struct
+ */
+struct vlan_priority_tci_mapping {
+	u32					priority;
+	unsigned short				vlan_qos;
+	struct vlan_priority_tci_mapping	*next;
+};
+
+/**
+ *	struct vlan_dev_info - VLAN private device data
+ *	@nr_ingress_mappings: number of ingress priority mappings
+ *	@ingress_priority_map: ingress priority mappings
+ *	@nr_egress_mappings: number of egress priority mappings
+ *	@egress_priority_map: hash of egress priority mappings
+ *	@vlan_id: VLAN identifier
+ *	@flags: device flags
+ *	@real_dev: underlying netdevice
+ *	@real_dev_addr: address of underlying netdevice
+ *	@dent: proc dir entry
+ *	@cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX
+ *	@cnt_encap_on_xmit: statistic - number of skb encapsulations on TX
+ */
+struct vlan_dev_info {
+	unsigned int				nr_ingress_mappings;
+	u32					ingress_priority_map[8];
+	unsigned int				nr_egress_mappings;
+	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+
+	unsigned short				vlan_id;
+	unsigned short				flags;
+
+	struct net_device			*real_dev;
+	unsigned char				real_dev_addr[ETH_ALEN];
+
+	struct proc_dir_entry			*dent;
+	unsigned long				cnt_inc_headroom_on_tx;
+	unsigned long				cnt_encap_on_xmit;
+};
+
+static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
 #define VLAN_GRP_HASH_SHIFT	5
 #define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
 #define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
@@ -30,7 +79,6 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, short vlan_prio);
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
-void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
 
 int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
 void vlan_setup(struct net_device *dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 85c94edb000..f980b9154cc 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -46,3 +46,15 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
 }
 EXPORT_SYMBOL(__vlan_hwaccel_rx);
+
+struct net_device *vlan_dev_real_dev(const struct net_device *dev)
+{
+	return vlan_dev_info(dev)->real_dev;
+}
+EXPORT_SYMBOL_GPL(vlan_dev_real_dev);
+
+u16 vlan_dev_vlan_id(const struct net_device *dev)
+{
+	return vlan_dev_info(dev)->vlan_id;
+}
+EXPORT_SYMBOL_GPL(vlan_dev_vlan_id);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 722697d31e4..2aab294c574 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -531,11 +531,6 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 	strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
 }
 
-void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
-{
-	*result = vlan_dev_info(dev)->vlan_id;
-}
-
 static int vlan_dev_open(struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
-- 
cgit v1.2.3-70-g09d2


From 9bb8582efb555521c7eec595ebd34e835ddc34b8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 03:24:44 -0700
Subject: vlan: TCI related type and naming cleanups

The VLAN code contains multiple spots that use tag, id and tci as
identifiers for arguments and variables incorrectly and they actually
contain or are expected to contain something different. Additionally
types are used inconsistently (unsigned short vs u16) and identifiers
are sometimes capitalized.

- consistently use u16 for storing TCI, ID or QoS values
- consistently use vlan_id and vlan_tci for storing the respective values
- remove capitalization
- add kdoc comment to netif_hwaccel_{rx,receive_skb}

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 84 ++++++++++++++++++++++++++++---------------------
 net/8021q/vlan.c        | 34 ++++++++++----------
 net/8021q/vlan.h        | 19 ++++++-----
 net/8021q/vlan_core.c   |  6 ++--
 net/8021q/vlan_dev.c    | 47 ++++++++++++++-------------
 net/8021q/vlan_gvrp.c   |  8 ++---
 6 files changed, 103 insertions(+), 95 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8e68b05b13d..d36515dae62 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -87,7 +87,7 @@ struct vlan_group {
 };
 
 static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
-						       unsigned int vlan_id)
+						       u16 vlan_id)
 {
 	struct net_device **array;
 	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
@@ -95,7 +95,7 @@ static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
 }
 
 static inline void vlan_group_set_device(struct vlan_group *vg,
-					 unsigned int vlan_id,
+					 u16 vlan_id,
 					 struct net_device *dev)
 {
 	struct net_device **array;
@@ -122,7 +122,7 @@ extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-			     unsigned short vlan_tag, int polling);
+			     u16 vlan_tci, int polling);
 #else
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
@@ -137,39 +137,51 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
 }
 
 static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-				    unsigned short vlan_tag, int polling)
+				    u16 vlan_tci, int polling)
 {
 	BUG();
 	return NET_XMIT_SUCCESS;
 }
 #endif
 
+/**
+ * vlan_hwaccel_rx - netif_rx wrapper for VLAN RX acceleration
+ * @skb: buffer
+ * @grp: vlan group
+ * @vlan_tci: VLAN TCI as received from the card
+ */
 static inline int vlan_hwaccel_rx(struct sk_buff *skb,
 				  struct vlan_group *grp,
-				  unsigned short vlan_tag)
+				  u16 vlan_tci)
 {
-	return __vlan_hwaccel_rx(skb, grp, vlan_tag, 0);
+	return __vlan_hwaccel_rx(skb, grp, vlan_tci, 0);
 }
 
+/**
+ * vlan_hwaccel_receive_skb - netif_receive_skb wrapper for VLAN RX acceleration
+ * @skb: buffer
+ * @grp: vlan group
+ * @vlan_tci: VLAN TCI as received from the card
+ */
 static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
 					   struct vlan_group *grp,
-					   unsigned short vlan_tag)
+					   u16 vlan_tci)
 {
-	return __vlan_hwaccel_rx(skb, grp, vlan_tag, 1);
+	return __vlan_hwaccel_rx(skb, grp, vlan_tci, 1);
 }
 
 /**
  * __vlan_put_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
- * @tag: VLAN tag to insert
+ * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
  * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
- * 
+ *
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short tag)
+static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -197,8 +209,8 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short
 	/* first, the ethernet type */
 	veth->h_vlan_proto = htons(ETH_P_8021Q);
 
-	/* now, the tag */
-	veth->h_vlan_TCI = htons(tag);
+	/* now, the TCI */
+	veth->h_vlan_TCI = htons(vlan_tci);
 
 	skb->protocol = htons(ETH_P_8021Q);
 
@@ -208,17 +220,18 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short
 /**
  * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting
  * @skb: skbuff to tag
- * @tag: VLAN tag to insert
+ * @vlan_tci: VLAN TCI to insert
  *
- * Puts the VLAN tag in @skb->cb[] and lets the device do the rest
+ * Puts the VLAN TCI in @skb->cb[] and lets the device do the rest
  */
-static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, unsigned short tag)
+static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
+						     u16 vlan_tci)
 {
 	struct vlan_skb_tx_cookie *cookie;
 
 	cookie = VLAN_TX_SKB_CB(skb);
 	cookie->magic = VLAN_TX_COOKIE_MAGIC;
-	cookie->vlan_tag = tag;
+	cookie->vlan_tag = vlan_tci;
 
 	return skb;
 }
@@ -228,28 +241,28 @@ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, unsign
 /**
  * vlan_put_tag - inserts VLAN tag according to device features
  * @skb: skbuff to tag
- * @tag: VLAN tag to insert
+ * @vlan_tci: VLAN TCI to insert
  *
  * Assumes skb->dev is the target that will xmit this frame.
  * Returns a VLAN tagged skb.
  */
-static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, unsigned short tag)
+static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	if (skb->dev->features & NETIF_F_HW_VLAN_TX) {
-		return __vlan_hwaccel_put_tag(skb, tag);
+		return __vlan_hwaccel_put_tag(skb, vlan_tci);
 	} else {
-		return __vlan_put_tag(skb, tag);
+		return __vlan_put_tag(skb, vlan_tci);
 	}
 }
 
 /**
  * __vlan_get_tag - get the VLAN ID that is part of the payload
  * @skb: skbuff to query
- * @tag: buffer to store vlaue
- * 
+ * @vlan_tci: buffer to store vlaue
+ *
  * Returns error if the skb is not of VLAN type
  */
-static inline int __vlan_get_tag(const struct sk_buff *skb, unsigned short *tag)
+static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
 
@@ -257,29 +270,28 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, unsigned short *tag)
 		return -EINVAL;
 	}
 
-	*tag = ntohs(veth->h_vlan_TCI);
-
+	*vlan_tci = ntohs(veth->h_vlan_TCI);
 	return 0;
 }
 
 /**
  * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[]
  * @skb: skbuff to query
- * @tag: buffer to store vlaue
- * 
+ * @vlan_tci: buffer to store vlaue
+ *
  * Returns error if @skb->cb[] is not set correctly
  */
 static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
-					 unsigned short *tag)
+					 u16 *vlan_tci)
 {
 	struct vlan_skb_tx_cookie *cookie;
 
 	cookie = VLAN_TX_SKB_CB(skb);
 	if (cookie->magic == VLAN_TX_COOKIE_MAGIC) {
-		*tag = cookie->vlan_tag;
+		*vlan_tci = cookie->vlan_tag;
 		return 0;
 	} else {
-		*tag = 0;
+		*vlan_tci = 0;
 		return -EINVAL;
 	}
 }
@@ -289,16 +301,16 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 /**
  * vlan_get_tag - get the VLAN ID from the skb
  * @skb: skbuff to query
- * @tag: buffer to store vlaue
- * 
+ * @vlan_tci: buffer to store vlaue
+ *
  * Returns error if the skb is not VLAN tagged
  */
-static inline int vlan_get_tag(const struct sk_buff *skb, unsigned short *tag)
+static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 {
 	if (skb->dev->features & NETIF_F_HW_VLAN_TX) {
-		return __vlan_hwaccel_get_tag(skb, tag);
+		return __vlan_hwaccel_get_tag(skb, vlan_tci);
 	} else {
-		return __vlan_get_tag(skb, tag);
+		return __vlan_get_tag(skb, vlan_tci);
 	}
 }
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 7a2625d2f9a..68bdcf4a795 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -83,13 +83,12 @@ static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
  *
  * Must be invoked with RCU read lock (no preempt)
  */
-struct net_device *__find_vlan_dev(struct net_device *real_dev,
-				   unsigned short VID)
+struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
 {
 	struct vlan_group *grp = __vlan_find_group(real_dev);
 
 	if (grp)
-		return vlan_group_get_device(grp, VID);
+		return vlan_group_get_device(grp, vlan_id);
 
 	return NULL;
 }
@@ -117,14 +116,14 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
 	return grp;
 }
 
-static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid)
+static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 {
 	struct net_device **array;
 	unsigned int size;
 
 	ASSERT_RTNL();
 
-	array = vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	if (array != NULL)
 		return 0;
 
@@ -133,7 +132,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid)
 	if (array == NULL)
 		return -ENOBUFS;
 
-	vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN] = array;
+	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
 	return 0;
 }
 
@@ -147,7 +146,7 @@ void unregister_vlan_dev(struct net_device *dev)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	struct vlan_group *grp;
-	unsigned short vlan_id = vlan->vlan_id;
+	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
 
@@ -205,7 +204,7 @@ static void vlan_transfer_operstate(const struct net_device *dev,
 	}
 }
 
-int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 {
 	char *name = real_dev->name;
 
@@ -242,7 +241,7 @@ int register_vlan_dev(struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	unsigned short vlan_id = vlan->vlan_id;
+	u16 vlan_id = vlan->vlan_id;
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
@@ -295,8 +294,7 @@ out_free_group:
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns 0 if the device was created or a negative error code otherwise.
  */
-static int register_vlan_device(struct net_device *real_dev,
-				unsigned short VLAN_ID)
+static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 {
 	struct net_device *new_dev;
 	struct net *net = dev_net(real_dev);
@@ -304,10 +302,10 @@ static int register_vlan_device(struct net_device *real_dev,
 	char name[IFNAMSIZ];
 	int err;
 
-	if (VLAN_ID >= VLAN_VID_MASK)
+	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
-	err = vlan_check_real_dev(real_dev, VLAN_ID);
+	err = vlan_check_real_dev(real_dev, vlan_id);
 	if (err < 0)
 		return err;
 
@@ -315,26 +313,26 @@ static int register_vlan_device(struct net_device *real_dev,
 	switch (vn->name_type) {
 	case VLAN_NAME_TYPE_RAW_PLUS_VID:
 		/* name will look like:	 eth1.0005 */
-		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, VLAN_ID);
+		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
 		break;
 	case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 vlan5
 		 */
-		snprintf(name, IFNAMSIZ, "vlan%i", VLAN_ID);
+		snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
 		break;
 	case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 eth0.5
 		 */
-		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, VLAN_ID);
+		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
 		break;
 	case VLAN_NAME_TYPE_PLUS_VID:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 vlan0005
 		 */
 	default:
-		snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
+		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
@@ -349,7 +347,7 @@ static int register_vlan_device(struct net_device *real_dev,
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-	vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+	vlan_dev_info(new_dev)->vlan_id = vlan_id;
 	vlan_dev_info(new_dev)->real_dev = real_dev;
 	vlan_dev_info(new_dev)->dent = NULL;
 	vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 14c421e033f..a6603a4d917 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -12,7 +12,7 @@
  */
 struct vlan_priority_tci_mapping {
 	u32					priority;
-	unsigned short				vlan_qos;
+	u16					vlan_qos;
 	struct vlan_priority_tci_mapping	*next;
 };
 
@@ -36,8 +36,8 @@ struct vlan_dev_info {
 	unsigned int				nr_egress_mappings;
 	struct vlan_priority_tci_mapping	*egress_priority_map[16];
 
-	unsigned short				vlan_id;
-	unsigned short				flags;
+	u16					vlan_id;
+	u16					flags;
 
 	struct net_device			*real_dev;
 	unsigned char				real_dev_addr[ETH_ALEN];
@@ -67,30 +67,29 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
  *  Must be invoked with rcu_read_lock (ie preempt disabled)
  *  or with RTNL.
  */
-struct net_device *__find_vlan_dev(struct net_device *real_dev,
-				   unsigned short VID); /* vlan.c */
+struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
 
 /* found in vlan_dev.c */
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev);
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
-				   u32 skb_prio, short vlan_prio);
+				   u32 skb_prio, u16 vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
-				 u32 skb_prio, short vlan_prio);
+				 u32 skb_prio, u16 vlan_prio);
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
-int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev);
 
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
-					    unsigned short vlan_tag)
+					    u16 vlan_tci)
 {
 	struct vlan_dev_info *vip = vlan_dev_info(dev);
 
-	return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
+	return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7];
 }
 
 #ifdef CONFIG_VLAN_8021Q_GVRP
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f980b9154cc..68df12d3664 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -5,7 +5,7 @@
 
 /* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-		      unsigned short vlan_tag, int polling)
+		      u16 vlan_tci, int polling)
 {
 	struct net_device_stats *stats;
 
@@ -14,7 +14,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		return NET_RX_DROP;
 	}
 
-	skb->dev = vlan_group_get_device(grp, vlan_tag & VLAN_VID_MASK);
+	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
 	if (skb->dev == NULL) {
 		dev_kfree_skb_any(skb);
 		/* Not NET_RX_DROP, this is not being dropped
@@ -27,7 +27,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	stats->rx_packets++;
 	stats->rx_bytes += skb->len;
 
-	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag);
+	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
 	switch (skb->pkt_type) {
 	case PACKET_BROADCAST:
 		break;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2aab294c574..2ccac6bea57 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -150,9 +150,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
 	struct vlan_hdr *vhdr;
-	unsigned short vid;
 	struct net_device_stats *stats;
-	unsigned short vlan_TCI;
+	u16 vlan_id;
+	u16 vlan_tci;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (skb == NULL)
@@ -162,14 +162,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		goto err_free;
 
 	vhdr = (struct vlan_hdr *)skb->data;
-	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
-	vid = (vlan_TCI & VLAN_VID_MASK);
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	vlan_id = vlan_tci & VLAN_VID_MASK;
 
 	rcu_read_lock();
-	skb->dev = __find_vlan_dev(dev, vid);
+	skb->dev = __find_vlan_dev(dev, vlan_id);
 	if (!skb->dev) {
 		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-			 __func__, (unsigned int)vid, dev->name);
+			 __func__, vlan_id, dev->name);
 		goto err_unlock;
 	}
 
@@ -181,11 +181,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_pull_rcsum(skb, VLAN_HLEN);
 
-	skb->priority = vlan_get_ingress_priority(skb->dev,
-						  ntohs(vhdr->h_vlan_TCI));
+	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
 
 	pr_debug("%s: priority: %u for TCI: %hu\n",
-		 __func__, skb->priority, ntohs(vhdr->h_vlan_TCI));
+		 __func__, skb->priority, vlan_tci);
 
 	switch (skb->pkt_type) {
 	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
@@ -228,7 +227,7 @@ err_free:
 	return NET_RX_DROP;
 }
 
-static inline unsigned short
+static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
 	struct vlan_priority_tci_mapping *mp;
@@ -260,7 +259,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				unsigned int len)
 {
 	struct vlan_hdr *vhdr;
-	unsigned short veth_TCI = 0;
+	u16 vlan_tci = 0;
 	int rc = 0;
 	int build_vlan_header = 0;
 	struct net_device *vdev = dev;
@@ -292,10 +291,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		 * VLAN ID	 12 bits (low bits)
 		 *
 		 */
-		veth_TCI = vlan_dev_info(dev)->vlan_id;
-		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 
-		vhdr->h_vlan_TCI = htons(veth_TCI);
+		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
 		 *  Set the protocol type. For a packet of type ETH_P_802_3 we
@@ -373,7 +372,7 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
 		vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		int orig_headroom = skb_headroom(skb);
-		unsigned short veth_TCI;
+		u16 vlan_tci;
 
 		/* This is not a VLAN frame...but we can fix that! */
 		vlan_dev_info(dev)->cnt_encap_on_xmit++;
@@ -386,10 +385,10 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * CFI		 1 bit
 		 * VLAN ID	 12 bits (low bits)
 		 */
-		veth_TCI = vlan_dev_info(dev)->vlan_id;
-		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 
-		skb = __vlan_put_tag(skb, veth_TCI);
+		skb = __vlan_put_tag(skb, vlan_tci);
 		if (!skb) {
 			stats->tx_dropped++;
 			return 0;
@@ -422,7 +421,7 @@ static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
 	struct net_device_stats *stats = &dev->stats;
-	unsigned short veth_TCI;
+	u16 vlan_tci;
 
 	/* Construct the second two bytes. This field looks something
 	 * like:
@@ -430,9 +429,9 @@ static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	 * CFI		 1 bit
 	 * VLAN ID	 12 bits (low bits)
 	 */
-	veth_TCI = vlan_dev_info(dev)->vlan_id;
-	veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
-	skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
+	vlan_tci = vlan_dev_info(dev)->vlan_id;
+	vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+	skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 
 	stats->tx_packets++;
 	stats->tx_bytes += skb->len;
@@ -457,7 +456,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
-				   u32 skb_prio, short vlan_prio)
+				   u32 skb_prio, u16 vlan_prio)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 
@@ -470,7 +469,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 }
 
 int vlan_dev_set_egress_priority(const struct net_device *dev,
-				 u32 skb_prio, short vlan_prio)
+				 u32 skb_prio, u16 vlan_prio)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index db978160836..061ceceeef1 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -30,19 +30,19 @@ static struct garp_application vlan_gvrp_app __read_mostly = {
 int vlan_gvrp_request_join(const struct net_device *dev)
 {
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	__be16 vid = htons(vlan->vlan_id);
+	__be16 vlan_id = htons(vlan->vlan_id);
 
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
-				 &vid, sizeof(vid), GVRP_ATTR_VID);
+				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
 
 void vlan_gvrp_request_leave(const struct net_device *dev)
 {
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	__be16 vid = htons(vlan->vlan_id);
+	__be16 vlan_id = htons(vlan->vlan_id);
 
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
-			   &vid, sizeof(vid), GVRP_ATTR_VID);
+			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
 
 int vlan_gvrp_init_applicant(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From b2898a27809f54a33050a70d0eaa4a78194163a0 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Tue, 1 Jul 2008 10:45:13 +0200
Subject: mac80211: Don't request encryption for probe response

Probe responses shouldn't be encrypted, and mac80211 doesn't
set the crypto key accordingly. However it didn't set the
IEEE80211_TX_CTL_DO_NOT_ENCRYPT flag which means drivers
could make an attempt to encrypt it, and causing a NULL
pointer dereference when accessing the provided hw_key field.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4a3bddd206d..86abdf96390 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2481,6 +2481,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 			control->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control->flags |= IEEE80211_TX_CTL_NO_ACK;
+		control->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 		control->control.retry_limit = 1;
 
 		ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC);
-- 
cgit v1.2.3-70-g09d2


From 429a380571a6e6b8525b93161544eafc9b227e44 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Tue, 1 Jul 2008 14:16:03 +0300
Subject: mac80211: add block ack request capability

This patch adds block ack request capability

Signed-off-by: Ester Kummer <ester.kummer@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  4 ++++
 include/net/mac80211.h     |  3 +++
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/main.c        | 21 +++++++++++++++++++--
 net/mac80211/mlme.c        | 29 +++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cffd6d0094f..aa603c3d76d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -658,6 +658,10 @@ struct ieee80211_bar {
 	__le16 start_seq_num;
 } __attribute__((packed));
 
+/* 802.11 BAR control masks */
+#define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL     0x0000
+#define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA  0x0004
+
 /**
  * struct ieee80211_ht_cap - HT capabilities
  *
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a204acad90..0a5de3ef527 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -235,6 +235,8 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_STAT_ACK: Frame was acknowledged
  * @IEEE80211_TX_STAT_AMPDU: The frame was aggregated, so status
  * 	is for the whole aggregation.
+ * @IEEE80211_TX_STAT_AMPDU_NO_BACK: no block ack was returned,
+ * 	so consider using block ack request (BAR).
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -260,6 +262,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_STAT_TX_FILTERED		= BIT(20),
 	IEEE80211_TX_STAT_ACK			= BIT(21),
 	IEEE80211_TX_STAT_AMPDU			= BIT(22),
+	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(23),
 };
 
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f90da1bbec4..175cbdd36d7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -904,6 +904,7 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
 				  u16 agg_size, u16 timeout);
 void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 				u16 initiator, u16 reason_code);
+void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn);
 
 void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
 				u16 tid, u16 initiator, u16 reason);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f18cfd72787..074f71a62a6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1404,14 +1404,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u16 frag, type;
+	__le16 fc;
 	struct ieee80211_tx_status_rtap_hdr *rthdr;
 	struct ieee80211_sub_if_data *sdata;
 	struct net_device *prev_dev = NULL;
+	struct sta_info *sta;
 
 	rcu_read_lock();
 
 	if (info->status.excessive_retries) {
-		struct sta_info *sta;
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
 			if (test_sta_flags(sta, WLAN_STA_PS)) {
@@ -1426,8 +1427,24 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		}
 	}
 
+	fc = hdr->frame_control;
+
+	if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
+	    (ieee80211_is_data_qos(fc))) {
+		u16 tid, ssn;
+		u8 *qc;
+		sta = sta_info_get(local, hdr->addr1);
+		if (sta) {
+			qc = ieee80211_get_qos_ctl(hdr);
+			tid = qc[0] & 0xf;
+			ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10)
+						& IEEE80211_SCTL_SEQ);
+			ieee80211_send_bar(sta->sdata->dev, hdr->addr1,
+					   tid, ssn);
+		}
+	}
+
 	if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
-		struct sta_info *sta;
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
 			ieee80211_handle_filtered_frame(local, sta, skb);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86abdf96390..e080482b63c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1536,6 +1536,35 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 	ieee80211_sta_tx(dev, skb, 0);
 }
 
+void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sk_buff *skb;
+	struct ieee80211_bar *bar;
+	u16 bar_control = 0;
+
+	skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer for "
+			"bar frame\n", dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
+	memset(bar, 0, sizeof(*bar));
+	bar->frame_control = IEEE80211_FC(IEEE80211_FTYPE_CTL,
+					IEEE80211_STYPE_BACK_REQ);
+	memcpy(bar->ra, ra, ETH_ALEN);
+	memcpy(bar->ta, dev->dev_addr, ETH_ALEN);
+	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
+	bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
+	bar_control |= (u16)(tid << 12);
+	bar->control = cpu_to_le16(bar_control);
+	bar->start_seq_num = cpu_to_le16(ssn);
+
+	ieee80211_sta_tx(dev, skb, 0);
+}
+
 void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 					u16 initiator, u16 reason)
 {
-- 
cgit v1.2.3-70-g09d2


From d96a7bc0499d0332cecb0a1d7d7d0d44f9c8cc28 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 1 Jul 2008 14:29:20 +0300
Subject: mac80211: remove useless tid assignment for management and control
 frames

This patch removes useless tid assignment for management and control frames

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index ffe1af82fa4..a1a53a4f2b9 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -210,7 +210,6 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			kfree_skb(skb);
 			err = NET_XMIT_DROP;
 	} else {
-		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
 		skb_set_queue_mapping(skb, queue);
 		qdisc = q->queues[queue];
 		err = qdisc->enqueue(skb, qdisc);
-- 
cgit v1.2.3-70-g09d2


From ebd74487d4b7a48ab8513ecfe3d321346d7c602e Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 1 Jul 2008 10:44:50 +0300
Subject: mac80211: fix warning: unused variable ifsta

This patch fixes warning unused variable ifsta
when compiling without CONFIG_MAC80211_VERBOSE_DEBUG

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e080482b63c..2a927089f4d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -366,8 +366,10 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
 					   bool use_short_preamble)
 {
 	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	DECLARE_MAC_BUF(mac);
+#endif
 	u32 changed = 0;
 
 	if (use_protection != bss_conf->use_cts_prot) {
-- 
cgit v1.2.3-70-g09d2


From 4e887d5b2fb909f18d153f47d2f3fdc2c76bb83a Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 2 Jul 2008 10:53:57 +0300
Subject: mac80211: remove MAC80211_DEBUG from net/mac80211/Kconfig.

This patch removes MAC80211_DEBUG from /net/mac80211/Kconfig
(in MAC80211_DEBUG_COUNTERS config entry), and replaces
MAC80211_DEBUG_MENU instead of MAC80211_DEBUG
(in MAC80211_VERBOSE_SPECT_MGMT_DEBUG config entry).

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 11a1e7fa195..40f1add1775 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -207,7 +207,6 @@ config MAC80211_LOWTX_FRAME_DUMP
 
 config MAC80211_DEBUG_COUNTERS
 	bool "Extra statistics for TX/RX debugging"
-	depends on MAC80211_DEBUG
 	depends on MAC80211_DEBUG_MENU
 	depends on MAC80211_DEBUGFS
 	---help---
@@ -219,7 +218,7 @@ config MAC80211_DEBUG_COUNTERS
 
 config MAC80211_VERBOSE_SPECT_MGMT_DEBUG
 	bool "Verbose Spectrum Management (IEEE 802.11h)debugging"
-	depends on MAC80211_DEBUG
+	depends on MAC80211_DEBUG_MENU
 	---help---
 	  Say Y here to print out verbose Spectrum Management (IEEE 802.11h)
 	  debug messages.
-- 
cgit v1.2.3-70-g09d2


From 238f74a227fd7de8ea1bc66dcbbd36cf9920d1cb Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 11:05:34 -0700
Subject: mac80211: move QOS control helpers into ieee80211.h

Also remove the WLAN_IS_QOS_DATA inline after removing the last
two users.  This starts moving away from using rx->fc to using
the header frame_control directly.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  2 ++
 net/mac80211/rx.c         | 28 ++++++++++++++--------------
 net/mac80211/wme.c        |  4 ++--
 net/mac80211/wme.h        |  8 --------
 4 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aa603c3d76d..a1630ba0b87 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -99,6 +99,8 @@
 #define IEEE80211_MAX_SSID_LEN		32
 #define IEEE80211_MAX_MESH_ID_LEN	32
 #define IEEE80211_QOS_CTL_LEN		2
+#define IEEE80211_QOS_CTL_TID_MASK	0x000F
+#define IEEE80211_QOS_CTL_TAG1D_MASK	0x0007
 
 struct ieee80211_hdr {
 	__le16 frame_control;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6a88e8f9bff..a3a26e55727 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -321,20 +321,20 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 
 static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 {
-	u8 *data = rx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	int tid;
 
 	/* does the frame have a qos control field? */
-	if (WLAN_FC_IS_QOS_DATA(rx->fc)) {
-		u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN;
+	if (ieee80211_is_data_qos(hdr->frame_control)) {
+		u8 *qc = ieee80211_get_qos_ctl(hdr);
 		/* frame has qos control */
-		tid = qc[0] & QOS_CONTROL_TID_MASK;
-		if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
+		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+		if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
 			rx->flags |= IEEE80211_RX_AMSDU;
 		else
 			rx->flags &= ~IEEE80211_RX_AMSDU;
 	} else {
-		if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
+		if (unlikely(ieee80211_is_mgmt(hdr->frame_control))) {
 			/* Separate TID for management frames */
 			tid = NUM_RX_DATA_QUEUES - 1;
 		} else {
@@ -1037,19 +1037,19 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx)
 {
-	u16 fc = rx->fc;
 	u8 *data = rx->skb->data;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data;
 
-	if (!WLAN_FC_IS_QOS_DATA(fc))
+	if (!ieee80211_is_data_qos(hdr->frame_control))
 		return RX_CONTINUE;
 
 	/* remove the qos control field, update frame type and meta-data */
-	memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2);
-	hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2);
+	memmove(data + IEEE80211_QOS_CTL_LEN, data,
+		ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN);
+	hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN);
 	/* change frame type to non QOS */
-	rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA;
-	hdr->frame_control = cpu_to_le16(fc);
+	rx->fc &= ~IEEE80211_STYPE_QOS_DATA;
+	hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
 
 	return RX_CONTINUE;
 }
@@ -2044,7 +2044,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	if (!ieee80211_is_data_qos(hdr->frame_control))
 		goto end_reorder;
 
-	tid = *ieee80211_get_qos_ctl(hdr) & QOS_CONTROL_TID_MASK;
+	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
 	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
 		goto end_reorder;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index a1a53a4f2b9..5c666f7eda8 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -154,7 +154,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		queue = skb_get_queue_mapping(skb);
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr->addr1);
-		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
+		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 		if (sta) {
 			int ampdu_queue = sta->tid_to_tx_q[tid];
 			if ((ampdu_queue < QD_NUM(hw)) &&
@@ -181,7 +181,7 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 	if (ieee80211_is_data_qos(hdr->frame_control)) {
 		u8 *p = ieee80211_get_qos_ctl(hdr);
 		u8 ack_policy = 0;
-		tid = skb->priority & QOS_CONTROL_TAG1D_MASK;
+		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 		if (local->wifi_wme_noack_test)
 			ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK <<
 					QOS_CONTROL_ACK_POLICY_SHIFT;
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index bbdb5334481..1aca609eccf 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -19,18 +19,10 @@
 #define QOS_CONTROL_ACK_POLICY_NORMAL 0
 #define QOS_CONTROL_ACK_POLICY_NOACK 1
 
-#define QOS_CONTROL_TID_MASK 0x0f
 #define QOS_CONTROL_ACK_POLICY_SHIFT 5
 
-#define QOS_CONTROL_TAG1D_MASK 0x07
-
 extern const int ieee802_1d_to_ac[8];
 
-static inline int WLAN_FC_IS_QOS_DATA(u16 fc)
-{
-	return (fc & 0x8C) == 0x88;
-}
-
 #ifdef CONFIG_MAC80211_QOS
 void ieee80211_install_qdisc(struct net_device *dev);
 int ieee80211_qdisc_installed(struct net_device *dev);
-- 
cgit v1.2.3-70-g09d2


From 73e1f7c823252d671361cddde0b498bf2c0fe4e1 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 11:05:34 -0700
Subject: mac80211: use symbolic defines in wpa.c

ETH_ALEN and IEEE80211_QOS_CTL_LEN

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wpa.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b414d5d92f3..222001c47d8 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -38,7 +38,7 @@ static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da,
 	*data_len = skb->len - hdrlen;
 
 	if (ieee80211_is_data_qos(fc))
-		*qos_tid = (*ieee80211_get_qos_ctl(hdr) & 0x0f) | 0x80;
+		*qos_tid = (*ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK) | 0x80;
 	else
 		*qos_tid = 0;
 
@@ -312,7 +312,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 	data_len -= CCMP_HDR_LEN + (encrypted ? CCMP_MIC_LEN : 0);
 	if (qos_tid & 0x80) {
 		qos_included = 1;
-		qos_tid &= 0x0f;
+		qos_tid &= IEEE80211_QOS_CTL_TID_MASK;
 	} else
 		qos_included = 0;
 	/* First block, b_0 */
@@ -320,7 +320,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 	b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
 	/* Nonce: QoS Priority | A2 | PN */
 	b_0[1] = qos_tid;
-	memcpy(&b_0[2], hdr->addr2, 6);
+	memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
 	memcpy(&b_0[8], pn, CCMP_PN_LEN);
 	/* l(m) */
 	b_0[14] = (data_len >> 8) & 0xff;
@@ -332,7 +332,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 
 	len_a = a4_included ? 28 : 22;
 	if (qos_included)
-		len_a += 2;
+		len_a += IEEE80211_QOS_CTL_LEN;
 
 	aad[0] = 0; /* (len_a >> 8) & 0xff; */
 	aad[1] = len_a & 0xff;
@@ -340,17 +340,17 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 	aad[2] = fc_pos[0] & ~(BIT(4) | BIT(5) | BIT(6));
 	/* Retry, PwrMgt, MoreData; set Protected */
 	aad[3] = (fc_pos[1] & ~(BIT(3) | BIT(4) | BIT(5))) | BIT(6);
-	memcpy(&aad[4], &hdr->addr1, 18);
+	memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
 
 	/* Mask Seq#, leave Frag# */
 	aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f;
 	aad[23] = 0;
 	if (a4_included) {
-		memcpy(&aad[24], hdr->addr4, 6);
+		memcpy(&aad[24], hdr->addr4, ETH_ALEN);
 		aad[30] = 0;
 		aad[31] = 0;
 	} else
-		memset(&aad[24], 0, 8);
+		memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
 	if (qos_included) {
 		u8 *dpos = &aad[a4_included ? 30 : 24];
 
-- 
cgit v1.2.3-70-g09d2


From f14df8049f9c9f34164dd598772fecea83a394a2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 11:05:35 -0700
Subject: mac80211: remove one user of ieee80211_get_hdr_info

ccmp_special_blocks was only using it to calculate data_len,
calculate that directly.

Use unaligned helpers rather than masking/shifting.

Use symbolic constants for the masked frame_control, and do it directly
on a le16 value.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wpa.c | 78 ++++++++++++++++++++++++++----------------------------
 1 file changed, 37 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 222001c47d8..919e30ce298 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/compiler.h>
+#include <linux/ieee80211.h>
+#include <asm/unaligned.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
@@ -296,67 +298,61 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 				int encrypted)
 {
-	u16 fc;
-	int a4_included, qos_included;
-	u8 qos_tid, *fc_pos, *data, *sa, *da;
-	int len_a;
-	size_t data_len;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	__le16 mask_fc;
+	int a4_included;
+	u8 qos_tid;
+	u16 data_len, len_a;
+	unsigned int hdrlen;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
-	fc_pos = (u8 *) &hdr->frame_control;
-	fc = fc_pos[0] ^ (fc_pos[1] << 8);
-	a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
-		(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS);
-
-	ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len);
-	data_len -= CCMP_HDR_LEN + (encrypted ? CCMP_MIC_LEN : 0);
-	if (qos_tid & 0x80) {
-		qos_included = 1;
-		qos_tid &= IEEE80211_QOS_CTL_TID_MASK;
-	} else
-		qos_included = 0;
-	/* First block, b_0 */
+	/*
+	 * Mask FC: zero subtype b4 b5 b6
+	 * Retry, PwrMgt, MoreData; set Protected
+	 */
+	mask_fc = hdr->frame_control;
+	mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY |
+				IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
+	mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	len_a = hdrlen - 2;
+	a4_included = ieee80211_has_a4(hdr->frame_control);
 
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	else
+		qos_tid = 0;
+
+	data_len = skb->len - hdrlen - CCMP_HDR_LEN;
+	if (encrypted)
+		data_len -= CCMP_MIC_LEN;
+
+	/* First block, b_0 */
 	b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
 	/* Nonce: QoS Priority | A2 | PN */
 	b_0[1] = qos_tid;
 	memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
 	memcpy(&b_0[8], pn, CCMP_PN_LEN);
 	/* l(m) */
-	b_0[14] = (data_len >> 8) & 0xff;
-	b_0[15] = data_len & 0xff;
-
+	put_unaligned_be16(data_len, &b_0[14]);
 
 	/* AAD (extra authenticate-only data) / masked 802.11 header
 	 * FC | A1 | A2 | A3 | SC | [A4] | [QC] */
-
-	len_a = a4_included ? 28 : 22;
-	if (qos_included)
-		len_a += IEEE80211_QOS_CTL_LEN;
-
-	aad[0] = 0; /* (len_a >> 8) & 0xff; */
-	aad[1] = len_a & 0xff;
-	/* Mask FC: zero subtype b4 b5 b6 */
-	aad[2] = fc_pos[0] & ~(BIT(4) | BIT(5) | BIT(6));
-	/* Retry, PwrMgt, MoreData; set Protected */
-	aad[3] = (fc_pos[1] & ~(BIT(3) | BIT(4) | BIT(5))) | BIT(6);
+	put_unaligned_be16(len_a, &aad[0]);
+	put_unaligned(mask_fc, (__le16 *)&aad[2]);
 	memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
 
 	/* Mask Seq#, leave Frag# */
 	aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f;
 	aad[23] = 0;
+
 	if (a4_included) {
 		memcpy(&aad[24], hdr->addr4, ETH_ALEN);
-		aad[30] = 0;
+		aad[30] = qos_tid;
 		aad[31] = 0;
-	} else
+	} else {
 		memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
-	if (qos_included) {
-		u8 *dpos = &aad[a4_included ? 30 : 24];
-
-		/* Mask QoS Control field */
-		dpos[0] = qos_tid;
-		dpos[1] = 0;
+		aad[24] = qos_tid;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8e8862b79d2ce9177bfddd85b8328a86a25c69b2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 11:05:35 -0700
Subject: mac80211: remove ieee80211_get_hdr_info

Do the check for sufficient skb->len explicitly and pass a pointer
to the struct ieee80211_hdr directly to the michael_mic calculation.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/michael.c | 20 ++++++++++++++-----
 net/mac80211/michael.h |  2 +-
 net/mac80211/wpa.c     | 54 ++++++++++++++++++--------------------------------
 3 files changed, 35 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 1fcdf38cf60..408649bd470 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -8,6 +8,7 @@
  */
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/ieee80211.h>
 #include <asm/unaligned.h>
 
 #include "michael.h"
@@ -26,9 +27,18 @@ static void michael_block(struct michael_mic_ctx *mctx, u32 val)
 	mctx->l += mctx->r;
 }
 
-static void michael_mic_hdr(struct michael_mic_ctx *mctx,
-			const u8 *key, const u8 *da, const u8 *sa, u8 priority)
+static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
+			    struct ieee80211_hdr *hdr)
 {
+	u8 *da, *sa, tid;
+
+	da = ieee80211_get_DA(hdr);
+	sa = ieee80211_get_SA(hdr);
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	else
+		tid = 0;
+
 	mctx->l = get_unaligned_le32(key);
 	mctx->r = get_unaligned_le32(key + 4);
 
@@ -40,17 +50,17 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx,
 	michael_block(mctx, get_unaligned_le16(&da[4]) |
 			    (get_unaligned_le16(sa) << 16));
 	michael_block(mctx, get_unaligned_le32(&sa[2]));
-	michael_block(mctx, priority);
+	michael_block(mctx, tid);
 }
 
-void michael_mic(const u8 *key, const u8 *da, const u8 *sa, u8 priority,
+void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
 		 const u8 *data, size_t data_len, u8 *mic)
 {
 	u32 val;
 	size_t block, blocks, left;
 	struct michael_mic_ctx mctx;
 
-	michael_mic_hdr(&mctx, key, da, sa, priority);
+	michael_mic_hdr(&mctx, key, hdr);
 
 	/* Real data */
 	blocks = data_len / 4;
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 69b4501f13b..3b848dad958 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h
@@ -18,7 +18,7 @@ struct michael_mic_ctx {
 	u32 l, r;
 };
 
-void michael_mic(const u8 *key, const u8 *da, const u8 *sa, u8 priority,
+void michael_mic(const u8 *key, struct ieee80211_hdr *hdr,
 		 const u8 *data, size_t data_len, u8 *mic);
 
 #endif /* MICHAEL_H */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 919e30ce298..241c932d3b6 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -21,38 +21,13 @@
 #include "aes_ccm.h"
 #include "wpa.h"
 
-static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da,
-				  u8 *qos_tid, u8 **data, size_t *data_len)
-{
-	struct ieee80211_hdr *hdr;
-	size_t hdrlen;
-	__le16 fc;
-
-	hdr = (struct ieee80211_hdr *)skb->data;
-	fc = hdr->frame_control;
-
-	hdrlen = ieee80211_hdrlen(fc);
-
-	*sa = ieee80211_get_SA(hdr);
-	*da = ieee80211_get_DA(hdr);
-
-	*data = skb->data + hdrlen;
-	*data_len = skb->len - hdrlen;
-
-	if (ieee80211_is_data_qos(fc))
-		*qos_tid = (*ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK) | 0x80;
-	else
-		*qos_tid = 0;
-
-	return skb->len < hdrlen ? -1 : 0;
-}
-
-
 ieee80211_tx_result
 ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 {
-	u8 *data, *sa, *da, *key, *mic, qos_tid, key_offset;
+	u8 *data, *key, *mic, key_offset;
 	size_t data_len;
+	unsigned int hdrlen;
+	struct ieee80211_hdr *hdr;
 	u16 fc;
 	struct sk_buff *skb = tx->skb;
 	int authenticator;
@@ -65,9 +40,14 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	    !WLAN_FC_DATA_PRESENT(fc))
 		return TX_CONTINUE;
 
-	if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len))
+	hdr = (struct ieee80211_hdr *)skb->data;
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (skb->len < hdrlen)
 		return TX_DROP;
 
+	data = skb->data + hdrlen;
+	data_len = skb->len - hdrlen;
+
 	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
 	    !(tx->flags & IEEE80211_TX_FRAGMENTED) &&
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) &&
@@ -97,7 +77,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY;
 	key = &tx->key->conf.key[key_offset];
 	mic = skb_put(skb, MICHAEL_MIC_LEN);
-	michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
+	michael_mic(key, hdr, data, data_len, mic);
 
 	return TX_CONTINUE;
 }
@@ -106,8 +86,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 ieee80211_rx_result
 ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 {
-	u8 *data, *sa, *da, *key = NULL, qos_tid, key_offset;
+	u8 *data, *key = NULL, key_offset;
 	size_t data_len;
+	unsigned int hdrlen;
+	struct ieee80211_hdr *hdr;
 	u16 fc;
 	u8 mic[MICHAEL_MIC_LEN];
 	struct sk_buff *skb = rx->skb;
@@ -126,11 +108,13 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	    !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc))
 		return RX_CONTINUE;
 
-	if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)
-	    || data_len < MICHAEL_MIC_LEN)
+	hdr = (struct ieee80211_hdr *)skb->data;
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (skb->len < hdrlen + MICHAEL_MIC_LEN)
 		return RX_DROP_UNUSABLE;
 
-	data_len -= MICHAEL_MIC_LEN;
+	data = skb->data + hdrlen;
+	data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
 
 #if 0
 	authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */
@@ -143,7 +127,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY :
 		NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY;
 	key = &rx->key->conf.key[key_offset];
-	michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic);
+	michael_mic(key, hdr, data, data_len, mic);
 	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
 		if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 			return RX_DROP_UNUSABLE;
-- 
cgit v1.2.3-70-g09d2


From fc32f9243dc93e75f81457e95d9cb90ee6136d94 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 3 Jul 2008 01:27:13 +0300
Subject: mac80211: call bss_info_change only once upon disassociation

This patch removes call of ieee80211_bss_info_change_notify from within
ieee80211_reset_erp_info. This allows gathering all bss info changes
into one call to the driver in the disassociation flow.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/main.c        | 12 ++++++------
 net/mac80211/mlme.c        |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 175cbdd36d7..02a8753a4ec 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -893,7 +893,7 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
 int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
-void ieee80211_reset_erp_info(struct net_device *dev);
+u32 ieee80211_reset_erp_info(struct net_device *dev);
 int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
 				   struct ieee80211_ht_info *ht_info);
 int ieee80211_ht_addt_info_ie_to_ht_bss_info(
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 074f71a62a6..cc756e93e6c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -182,10 +182,11 @@ static int ieee80211_open(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata, *nsdata;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
 	struct ieee80211_if_init_conf conf;
+	u32 changed = 0;
 	int res;
 	bool need_hw_reconfig = 0;
-	struct sta_info *sta;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
@@ -329,7 +330,8 @@ static int ieee80211_open(struct net_device *dev)
 			goto err_stop;
 
 		ieee80211_if_config(dev);
-		ieee80211_reset_erp_info(dev);
+		changed |= ieee80211_reset_erp_info(dev);
+		ieee80211_bss_info_change_notify(sdata, changed);
 		ieee80211_enable_keys(sdata);
 
 		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
@@ -1190,15 +1192,13 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 					     changed);
 }
 
-void ieee80211_reset_erp_info(struct net_device *dev)
+u32 ieee80211_reset_erp_info(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	sdata->bss_conf.use_cts_prot = 0;
 	sdata->bss_conf.use_short_preamble = 0;
-	ieee80211_bss_info_change_notify(sdata,
-					 BSS_CHANGED_ERP_CTS_PROT |
-					 BSS_CHANGED_ERP_PREAMBLE);
+	return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE;
 }
 
 void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2a927089f4d..37ea04f5bab 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -573,7 +573,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 		ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid);
 		ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 		netif_carrier_off(dev);
-		ieee80211_reset_erp_info(dev);
+		changed |= ieee80211_reset_erp_info(dev);
 
 		sdata->bss_conf.assoc_ht = 0;
 		sdata->bss_conf.ht_conf = NULL;
-- 
cgit v1.2.3-70-g09d2


From a7767f958a3b09a1bcd0ddcb21ef6f9a8ebd782c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 16:30:51 -0700
Subject: mac80211: remove trivial rx_data->fc users

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 107 ++++++++++++++++++++++++++----------------------------
 1 file changed, 51 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a3a26e55727..fab443d717e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -352,9 +352,10 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx)
 {
 #ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	int hdrlen;
 
-	if (!WLAN_FC_DATA_PRESENT(rx->fc))
+	if (!ieee80211_is_data_present(hdr->frame_control))
 		return;
 
 	/*
@@ -376,7 +377,7 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx)
 	 * header and the payload is not supported, the driver is required
 	 * to move the 802.11 header further back in that case.
 	 */
-	hdrlen = ieee80211_get_hdrlen(rx->fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (rx->flags & IEEE80211_RX_AMSDU)
 		hdrlen += ETH_HLEN;
 	WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3);
@@ -415,14 +416,11 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result
 ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 {
-	int hdrlen = ieee80211_get_hdrlen(rx->fc);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-
-#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l))
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) {
-		if (!((rx->fc & IEEE80211_FCTL_FROMDS) &&
-		      (rx->fc & IEEE80211_FCTL_TODS)))
+	if (ieee80211_is_data(hdr->frame_control)) {
+		if (!ieee80211_has_a4(hdr->frame_control))
 			return RX_DROP_MONITOR;
 		if (memcmp(hdr->addr4, rx->dev->dev_addr, ETH_ALEN) == 0)
 			return RX_DROP_MONITOR;
@@ -435,27 +433,30 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 	if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) {
 		struct ieee80211_mgmt *mgmt;
 
-		if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
+		if (!ieee80211_is_mgmt(hdr->frame_control))
 			return RX_DROP_MONITOR;
 
-		switch (rx->fc & IEEE80211_FCTL_STYPE) {
-		case IEEE80211_STYPE_ACTION:
+		if (ieee80211_is_action(hdr->frame_control)) {
 			mgmt = (struct ieee80211_mgmt *)hdr;
 			if (mgmt->u.action.category != PLINK_CATEGORY)
 				return RX_DROP_MONITOR;
-			/* fall through on else */
-		case IEEE80211_STYPE_PROBE_REQ:
-		case IEEE80211_STYPE_PROBE_RESP:
-		case IEEE80211_STYPE_BEACON:
 			return RX_CONTINUE;
-			break;
-		default:
-			return RX_DROP_MONITOR;
 		}
 
-	 } else if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
-		    is_multicast_ether_addr(hdr->addr1) &&
-		    mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev))
+		if (ieee80211_is_probe_req(hdr->frame_control) ||
+		    ieee80211_is_probe_resp(hdr->frame_control) ||
+		    ieee80211_is_beacon(hdr->frame_control))
+			return RX_CONTINUE;
+
+		return RX_DROP_MONITOR;
+
+	}
+
+#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l))
+
+	if (ieee80211_is_data(hdr->frame_control) &&
+	    is_multicast_ether_addr(hdr->addr1) &&
+	    mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev))
 		return RX_DROP_MONITOR;
 #undef msh_h_get
 
@@ -466,13 +467,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_hdr *hdr;
-
-	hdr = (struct ieee80211_hdr *) rx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 
 	/* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
 	if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
-		if (unlikely(rx->fc & IEEE80211_FCTL_RETRY &&
+		if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
 			     rx->sta->last_seq_ctrl[rx->queue] ==
 			     hdr->seq_ctrl)) {
 			if (rx->flags & IEEE80211_RX_RA_MATCH) {
@@ -501,15 +500,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 	if (ieee80211_vif_is_mesh(&rx->sdata->vif))
 		return ieee80211_rx_mesh_check(rx);
 
-	if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA ||
-		      ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
-		       (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
+	if (unlikely((ieee80211_is_data(hdr->frame_control) ||
+		      ieee80211_is_pspoll(hdr->frame_control)) &&
 		     rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 		     (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
-		if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
-		     !(rx->fc & IEEE80211_FCTL_TODS) &&
-		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)
-		    || !(rx->flags & IEEE80211_RX_RA_MATCH)) {
+		if ((!ieee80211_has_fromds(hdr->frame_control) &&
+		     !ieee80211_has_tods(hdr->frame_control) &&
+		     ieee80211_is_data(hdr->frame_control)) ||
+		    !(rx->flags & IEEE80211_RX_RA_MATCH)) {
 			/* Drop IBSS frames and frames for other hosts
 			 * silently. */
 			return RX_DROP_MONITOR;
@@ -525,7 +523,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
@@ -557,7 +555,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	 * possible.
 	 */
 
-	if (!(rx->fc & IEEE80211_FCTL_PROTECTED))
+	if (!ieee80211_has_protected(hdr->frame_control))
 		return RX_CONTINUE;
 
 	/*
@@ -586,7 +584,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		    (rx->status->flag & RX_FLAG_IV_STRIPPED))
 			return RX_CONTINUE;
 
-		hdrlen = ieee80211_get_hdrlen(rx->fc);
+		hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 		if (rx->skb->len < 8 + hdrlen)
 			return RX_DROP_UNUSABLE; /* TODO: count this? */
@@ -618,7 +616,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
-	    ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
+	    ieee80211_is_data(hdr->frame_control) &&
 	    (!(rx->status->flag & RX_FLAG_IV_STRIPPED) ||
 	     !(rx->status->flag & RX_FLAG_DECRYPTED)) &&
 	    ieee80211_wep_is_weak_iv(rx->skb, rx->key))
@@ -710,7 +708,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 {
 	struct sta_info *sta = rx->sta;
 	struct net_device *dev = rx->dev;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 
 	if (!sta)
 		return RX_CONTINUE;
@@ -744,21 +742,20 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->last_qual = rx->status->qual;
 	sta->last_noise = rx->status->noise;
 
-	if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) {
+	if (!ieee80211_has_morefrags(hdr->frame_control)) {
 		/* Change STA power saving mode only in the end of a frame
 		 * exchange sequence */
 		if (test_sta_flags(sta, WLAN_STA_PS) &&
-		    !(rx->fc & IEEE80211_FCTL_PM))
+		    !ieee80211_has_pm(hdr->frame_control))
 			rx->sent_ps_buffered += ap_sta_ps_end(dev, sta);
 		else if (!test_sta_flags(sta, WLAN_STA_PS) &&
-			 (rx->fc & IEEE80211_FCTL_PM))
+			 ieee80211_has_pm(hdr->frame_control))
 			ap_sta_ps_start(dev, sta);
 	}
 
 	/* Drop data::nullfunc frames silently, since they are used only to
 	 * control station power saving mode. */
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
-	    (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) {
+	if (ieee80211_is_nullfunc(hdr->frame_control)) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc);
 		/* Update counter and free packet here to avoid counting this
 		 * as a dropped packed. */
@@ -1465,15 +1462,15 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_hw *hw = &local->hw;
 	struct sk_buff *skb = rx->skb;
-	struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data;
+	struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
 	struct tid_ampdu_rx *tid_agg_rx;
 	u16 start_seq_num;
 	u16 tid;
 
-	if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL))
+	if (likely(!ieee80211_is_ctl(bar->frame_control)))
 		return RX_CONTINUE;
 
-	if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) {
+	if (ieee80211_is_back_req(bar->frame_control)) {
 		if (!rx->sta)
 			return RX_CONTINUE;
 		tid = le16_to_cpu(bar->control) >> 12;
@@ -1527,11 +1524,12 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 					    struct ieee80211_hdr *hdr,
 					    struct ieee80211_rx_data *rx)
 {
-	int keyidx, hdrlen;
+	int keyidx;
+	unsigned int hdrlen;
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
-	hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (rx->skb->len >= hdrlen + 4)
 		keyidx = rx->skb->data[hdrlen + 3] >> 6;
 	else
@@ -1545,7 +1543,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 		goto ignore;
 	}
 
-	if (!(rx->fc & IEEE80211_FCTL_PROTECTED))
+	if (!ieee80211_has_protected(hdr->frame_control))
 		goto ignore;
 
 	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
@@ -1558,9 +1556,8 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 		goto ignore;
 	}
 
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
-	    ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
-	     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_auth(hdr->frame_control))
 		goto ignore;
 
 	mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr);
@@ -1731,8 +1728,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_IF_TYPE_IBSS:
 		if (!bssid)
 			return 0;
-		if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT &&
-		    (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
+		if (ieee80211_is_beacon(hdr->frame_control)) {
 			if (!rx->sta)
 				rx->sta = ieee80211_ibss_add_sta(sdata->dev,
 						rx->skb, bssid, hdr->addr2,
@@ -1783,8 +1779,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			return 0;
 		break;
 	case IEEE80211_IF_TYPE_WDS:
-		if (bssid ||
-		    (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+		if (bssid || !ieee80211_is_data(hdr->frame_control))
 			return 0;
 		if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
 			return 0;
-- 
cgit v1.2.3-70-g09d2


From c34498b9e633baa3266af98106502633b6bc371b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 16:30:52 -0700
Subject: mac80211: wpa.c remove rx/tx_data ->fc users

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wpa.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 241c932d3b6..ec2ae86d64a 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -28,19 +28,16 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	size_t data_len;
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr;
-	u16 fc;
 	struct sk_buff *skb = tx->skb;
 	int authenticator;
 	int wpa_test = 0;
 	int tail;
 
-	fc = tx->fc;
-
+	hdr = (struct ieee80211_hdr *)skb->data;
 	if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 ||
-	    !WLAN_FC_DATA_PRESENT(fc))
+	    !ieee80211_is_data_present(hdr->frame_control))
 		return TX_CONTINUE;
 
-	hdr = (struct ieee80211_hdr *)skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (skb->len < hdrlen)
 		return TX_DROP;
@@ -90,25 +87,23 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	size_t data_len;
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr;
-	u16 fc;
 	u8 mic[MICHAEL_MIC_LEN];
 	struct sk_buff *skb = rx->skb;
 	int authenticator = 1, wpa_test = 0;
 	DECLARE_MAC_BUF(mac);
 
-	fc = rx->fc;
-
 	/*
 	 * No way to verify the MIC if the hardware stripped it
 	 */
 	if (rx->status->flag & RX_FLAG_MMIC_STRIPPED)
 		return RX_CONTINUE;
 
+	hdr = (struct ieee80211_hdr *)skb->data;
 	if (!rx->key || rx->key->conf.alg != ALG_TKIP ||
-	    !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc))
+	    !ieee80211_has_protected(hdr->frame_control) ||
+	    !ieee80211_is_data_present(hdr->frame_control))
 		return RX_CONTINUE;
 
-	hdr = (struct ieee80211_hdr *)skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (skb->len < hdrlen + MICHAEL_MIC_LEN)
 		return RX_DROP_UNUSABLE;
@@ -239,7 +234,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+	if (!ieee80211_is_data(hdr->frame_control))
 		return RX_CONTINUE;
 
 	if (!rx->sta || skb->len - hdrlen < 12)
@@ -458,7 +453,7 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
 ieee80211_rx_result
 ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	int hdrlen;
 	struct ieee80211_key *key = rx->key;
 	struct sk_buff *skb = rx->skb;
@@ -468,7 +463,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)
+	if (!ieee80211_is_data(hdr->frame_control))
 		return RX_CONTINUE;
 
 	data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN;
-- 
cgit v1.2.3-70-g09d2


From feccb466944cb1aeaabc701cfde6771f3be74919 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 16:30:52 -0700
Subject: mac80211: pass scratch buffer directly, remove additional pointers

Recalculate the offset pointers in the ccmp calculations rather than
in the callers.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/aes_ccm.c | 13 ++++++++-----
 net/mac80211/aes_ccm.h |  4 ++--
 net/mac80211/wpa.c     | 26 ++++++++++----------------
 3 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 4d4c2dfcf9a..e756ed93116 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -53,15 +53,17 @@ static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad,
 
 
 void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
-			       u8 *b_0, u8 *aad, u8 *data, size_t data_len,
+			       u8 *data, size_t data_len,
 			       u8 *cdata, u8 *mic)
 {
 	int i, j, last_len, num_blocks;
-	u8 *pos, *cpos, *b, *s_0, *e;
+	u8 *pos, *cpos, *b, *s_0, *e, *b_0, *aad;
 
 	b = scratch;
 	s_0 = scratch + AES_BLOCK_LEN;
 	e = scratch + 2 * AES_BLOCK_LEN;
+	b_0 = scratch + 3 * AES_BLOCK_LEN;
+	aad = scratch + 4 * AES_BLOCK_LEN;
 
 	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
 	last_len = data_len % AES_BLOCK_LEN;
@@ -92,15 +94,16 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
 
 
 int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
-			      u8 *b_0, u8 *aad, u8 *cdata, size_t data_len,
-			      u8 *mic, u8 *data)
+			      u8 *cdata, size_t data_len, u8 *mic, u8 *data)
 {
 	int i, j, last_len, num_blocks;
-	u8 *pos, *cpos, *b, *s_0, *a;
+	u8 *pos, *cpos, *b, *s_0, *a, *b_0, *aad;
 
 	b = scratch;
 	s_0 = scratch + AES_BLOCK_LEN;
 	a = scratch + 2 * AES_BLOCK_LEN;
+	b_0 = scratch + 3 * AES_BLOCK_LEN;
+	aad = scratch + 4 * AES_BLOCK_LEN;
 
 	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
 	last_len = data_len % AES_BLOCK_LEN;
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index 8cd0f14aab4..6e7820ef344 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -16,10 +16,10 @@
 
 struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]);
 void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
-			       u8 *b_0, u8 *aad, u8 *data, size_t data_len,
+			       u8 *data, size_t data_len,
 			       u8 *cdata, u8 *mic);
 int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
-			      u8 *b_0, u8 *aad, u8 *cdata, size_t data_len,
+			      u8 *cdata, size_t data_len,
 			      u8 *mic, u8 *data);
 void ieee80211_aes_key_free(struct crypto_cipher *tfm);
 
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index ec2ae86d64a..2f33df0dccc 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -274,16 +274,20 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 }
 
 
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
 				int encrypted)
 {
 	__le16 mask_fc;
 	int a4_included;
 	u8 qos_tid;
+	u8 *b_0, *aad;
 	u16 data_len, len_a;
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
+	b_0 = scratch + 3 * AES_BLOCK_LEN;
+	aad = scratch + 4 * AES_BLOCK_LEN;
+
 	/*
 	 * Mask FC: zero subtype b4 b5 b6
 	 * Retry, PwrMgt, MoreData; set Protected
@@ -367,7 +371,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tail;
-	u8 *pos, *pn, *b_0, *aad, *scratch;
+	u8 *pos, *pn;
 	int i;
 
 	info->control.icv_len = CCMP_MIC_LEN;
@@ -381,10 +385,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		return 0;
 	}
 
-	scratch = key->u.ccmp.tx_crypto_buf;
-	b_0 = scratch + 3 * AES_BLOCK_LEN;
-	aad = scratch + 4 * AES_BLOCK_LEN;
-
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	len = skb->len - hdrlen;
 
@@ -420,8 +420,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	}
 
 	pos += CCMP_HDR_LEN;
-	ccmp_special_blocks(skb, pn, b_0, aad, 0);
-	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, b_0, aad, pos, len,
+	ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0);
+	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, key->u.ccmp.tx_crypto_buf, pos, len,
 				  pos, skb_put(skb, CCMP_MIC_LEN));
 
 	return 0;
@@ -483,16 +483,10 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 
 	if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
 		/* hardware didn't decrypt/verify MIC */
-		u8 *scratch, *b_0, *aad;
-
-		scratch = key->u.ccmp.rx_crypto_buf;
-		b_0 = scratch + 3 * AES_BLOCK_LEN;
-		aad = scratch + 4 * AES_BLOCK_LEN;
-
-		ccmp_special_blocks(skb, pn, b_0, aad, 1);
+		ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1);
 
 		if (ieee80211_aes_ccm_decrypt(
-			    key->u.ccmp.tfm, scratch, b_0, aad,
+			    key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf,
 			    skb->data + hdrlen + CCMP_HDR_LEN, data_len,
 			    skb->data + skb->len - CCMP_MIC_LEN,
 			    skb->data + hdrlen + CCMP_HDR_LEN)) {
-- 
cgit v1.2.3-70-g09d2


From 5fdae6b37e9346dbaea1e51ed6235ed0269d445d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 2 Jul 2008 16:30:53 -0700
Subject: mac80211: aes_ccm.c remove crypto wrapper and extra args

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/aes_ccm.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index e756ed93116..a87cb3ba2df 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -16,31 +16,28 @@
 #include "key.h"
 #include "aes_ccm.h"
 
-
-static void ieee80211_aes_encrypt(struct crypto_cipher *tfm,
-				  const u8 pt[16], u8 ct[16])
-{
-	crypto_cipher_encrypt_one(tfm, ct, pt);
-}
-
-
-static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad,
-				   u8 *b, u8 *s_0, u8 *a)
+static void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *scratch, u8 *a)
 {
 	int i;
+	u8 *b_0, *aad, *b, *s_0;
 
-	ieee80211_aes_encrypt(tfm, b_0, b);
+	b_0 = scratch + 3 * AES_BLOCK_LEN;
+	aad = scratch + 4 * AES_BLOCK_LEN;
+	b = scratch;
+	s_0 = scratch + AES_BLOCK_LEN;
+
+	crypto_cipher_encrypt_one(tfm, b, b_0);
 
 	/* Extra Authenticate-only data (always two AES blocks) */
 	for (i = 0; i < AES_BLOCK_LEN; i++)
 		aad[i] ^= b[i];
-	ieee80211_aes_encrypt(tfm, aad, b);
+	crypto_cipher_encrypt_one(tfm, b, aad);
 
 	aad += AES_BLOCK_LEN;
 
 	for (i = 0; i < AES_BLOCK_LEN; i++)
 		aad[i] ^= b[i];
-	ieee80211_aes_encrypt(tfm, aad, a);
+	crypto_cipher_encrypt_one(tfm, a, aad);
 
 	/* Mask out bits from auth-only-b_0 */
 	b_0[0] &= 0x07;
@@ -48,7 +45,7 @@ static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad,
 	/* S_0 is used to encrypt T (= MIC) */
 	b_0[14] = 0;
 	b_0[15] = 0;
-	ieee80211_aes_encrypt(tfm, b_0, s_0);
+	crypto_cipher_encrypt_one(tfm, s_0, b_0);
 }
 
 
@@ -67,7 +64,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
 
 	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
 	last_len = data_len % AES_BLOCK_LEN;
-	aes_ccm_prepare(tfm, b_0, aad, b, s_0, b);
+	aes_ccm_prepare(tfm, scratch, b);
 
 	/* Process payload blocks */
 	pos = data;
@@ -79,11 +76,11 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
 		/* Authentication followed by encryption */
 		for (i = 0; i < blen; i++)
 			b[i] ^= pos[i];
-		ieee80211_aes_encrypt(tfm, b, b);
+		crypto_cipher_encrypt_one(tfm, b, b);
 
 		b_0[14] = (j >> 8) & 0xff;
 		b_0[15] = j & 0xff;
-		ieee80211_aes_encrypt(tfm, b_0, e);
+		crypto_cipher_encrypt_one(tfm, e, b_0);
 		for (i = 0; i < blen; i++)
 			*cpos++ = *pos++ ^ e[i];
 	}
@@ -107,7 +104,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
 
 	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
 	last_len = data_len % AES_BLOCK_LEN;
-	aes_ccm_prepare(tfm, b_0, aad, b, s_0, a);
+	aes_ccm_prepare(tfm, scratch, a);
 
 	/* Process payload blocks */
 	cpos = cdata;
@@ -119,13 +116,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
 		/* Decryption followed by authentication */
 		b_0[14] = (j >> 8) & 0xff;
 		b_0[15] = j & 0xff;
-		ieee80211_aes_encrypt(tfm, b_0, b);
+		crypto_cipher_encrypt_one(tfm, b, b_0);
 		for (i = 0; i < blen; i++) {
 			*pos = *cpos++ ^ b[i];
 			a[i] ^= *pos++;
 		}
-
-		ieee80211_aes_encrypt(tfm, a, a);
+		crypto_cipher_encrypt_one(tfm, a, a);
 	}
 
 	for (i = 0; i < CCMP_MIC_LEN; i++) {
-- 
cgit v1.2.3-70-g09d2


From e4abd4d49d6df6d5e94564c5e831c61ac722f6ec Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 3 Jul 2008 18:02:27 +0300
Subject: mac80211: add support for iwconfig wlanX frag auto

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wext.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 1babb979fe0..736c32e340f 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -800,6 +800,8 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev,
 
 	if (frag->disabled)
 		local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
+	else if (!frag->fixed)
+		local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
 	else if (frag->value < 256 ||
 		 frag->value > IEEE80211_MAX_FRAG_THRESHOLD)
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 0f687e9aeb590e9581709379f47dd13ee9357258 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Thu, 3 Jul 2008 13:14:56 -0300
Subject: rfkill: some minor kernel-doc changes for rfkill_toggle_radio

Improve rfkill_toggle_radio's kernel-doc header a bit.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index ce0e23148cd..aa7039dfa19 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -130,17 +130,19 @@ static void update_rfkill_state(struct rfkill *rfkill)
 
 /**
  * rfkill_toggle_radio - wrapper for toggle_radio hook
- * calls toggle_radio taking into account a lot of "small"
- * details.
+ *
  * @rfkill: the rfkill struct to use
  * @force: calls toggle_radio even if cache says it is not needed,
  *	and also makes sure notifications of the state will be
  *	sent even if it didn't change
  * @state: the new state to call toggle_radio() with
  *
- * This wrappen protects and enforces the API for toggle_radio
- * calls.  Note that @force cannot override a (possibly cached)
- * state of RFKILL_STATE_HARD_BLOCKED.  Any device making use of
+ * Calls rfkill->toggle_radio, enforcing the API for toggle_radio
+ * calls and handling all the red tape such as issuing notifications
+ * if the call is successful.
+ *
+ * Note that @force cannot override a (possibly cached) state of
+ * RFKILL_STATE_HARD_BLOCKED.  Any device making use of
  * RFKILL_STATE_HARD_BLOCKED implements either get_state() or
  * rfkill_force_state(), so the cache either is bypassed or valid.
  *
-- 
cgit v1.2.3-70-g09d2


From fd4484af7c02b31bcb6090eeb0d85cf947719f2d Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Thu, 3 Jul 2008 13:14:57 -0300
Subject: rfkill: ignore errors from rfkill_toggle_radio in rfkill_add_switch

rfkill_add_switch() calls rfkill_toggle_radio() to set the state of a
recently registered rfkill class to the current global state [for that
rfkill->type].

The rfkill_toggle_radio() call is going to error out if the hardware is
RFKILL_STATE_HARD_BLOCKED, and the global state is RFKILL_STATE_UNBLOCKED.

That is a quite normal situation which I missed to account for.  As things
stand, the error return from rfkill_toggle_radio ends up causing
rfkill_register to bail out with an error (de-registering the new switch in
the process), which is Not Nice.

Change rfkill_add_switch() to not return errors because of a failed call to
rfkill_toggle_radio().  We can go back to returning errors again (if that's
indeed the right thing to do) if we define the exact error codes the
rfkill->toggle_radio callbacks are to return in each situation, so that we
can ignore the right ones only.

Bug reported by "kionez <kionez@anche.no>".

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: kionez <kionez@anche.no>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index aa7039dfa19..7a560b78509 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -501,17 +501,15 @@ static struct class rfkill_class = {
 
 static int rfkill_add_switch(struct rfkill *rfkill)
 {
-	int error;
-
 	mutex_lock(&rfkill_mutex);
 
-	error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0);
-	if (!error)
-		list_add_tail(&rfkill->node, &rfkill_list);
+	rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0);
+
+	list_add_tail(&rfkill->node, &rfkill_list);
 
 	mutex_unlock(&rfkill_mutex);
 
-	return error;
+	return 0;
 }
 
 static void rfkill_remove_switch(struct rfkill *rfkill)
-- 
cgit v1.2.3-70-g09d2


From 6ef307bc561911c8cdda98ef3896b5982b602a43 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 3 Jul 2008 13:52:18 -0700
Subject: mac80211: fix lots of kernel-doc

Fix more than 50 kernel-doc warnings in ieee80211/mac80211 kernel-doc notation.
Fix a few typos also.

Note: Some fields are marked as TBD and need to have their description
corrected.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h  | 40 +++++++++++++++---------
 net/mac80211/sta_info.c |  1 +
 net/mac80211/sta_info.h | 81 ++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 86 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0a5de3ef527..656442c6b1c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -42,7 +42,7 @@
  * tasklet function.
  *
  * NOTE: If the driver opts to use the _irqsafe() functions, it may not also
- *	 use the non-irqsafe functions!
+ *	 use the non-IRQ-safe functions!
  */
 
 /**
@@ -85,7 +85,7 @@ enum ieee80211_notification_types {
  * struct ieee80211_ht_bss_info - describing BSS's HT characteristics
  *
  * This structure describes most essential parameters needed
- * to describe 802.11n HT characteristics in a BSS
+ * to describe 802.11n HT characteristics in a BSS.
  *
  * @primary_channel: channel number of primery channel
  * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width)
@@ -201,9 +201,9 @@ struct ieee80211_bss_conf {
 };
 
 /**
- * enum mac80211_tx_flags - flags to transmission information/status
+ * enum mac80211_tx_control_flags - flags to describe transmission information/status
  *
- * These flags are used with the @flags member of &ieee80211_tx_info
+ * These flags are used with the @flags member of &ieee80211_tx_info.
  *
  * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
  * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption;
@@ -212,11 +212,12 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
  *	for combined 802.11g / 802.11b networks)
  * @IEEE80211_TX_CTL_NO_ACK: tell the low level not to wait for an ack
- * @IEEE80211_TX_CTL_RATE_CTRL_PROBE
+ * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: TBD
  * @IEEE80211_TX_CTL_CLEAR_PS_FILT: clear powersave filter for destination
  *	station
- * @IEEE80211_TX_CTL_REQUEUE:
+ * @IEEE80211_TX_CTL_REQUEUE: TBD
  * @IEEE80211_TX_CTL_FIRST_FRAGMENT: this is a first fragment of the frame
+ * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD
  * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
  *	through set_retry_limit configured long retry value
  * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211
@@ -230,6 +231,7 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width
  * @IEEE80211_TX_CTL_DUP_DATA: duplicate data frame on both 20 Mhz channels
  * @IEEE80211_TX_CTL_SHORT_GI: send this frame using short guard interval
+ * @IEEE80211_TX_CTL_INJECTED: TBD
  * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted
  *	because the destination STA was in powersave mode.
  * @IEEE80211_TX_STAT_ACK: Frame was acknowledged
@@ -280,6 +282,12 @@ enum mac80211_tx_control_flags {
  *  (3) TX status information - driver tells mac80211 what happened
  *
  * @flags: transmit info flags, defined above
+ * @band: TBD
+ * @tx_rate_idx: TBD
+ * @antenna_sel_tx: TBD
+ * @control: union for control data
+ * @status: union for status data
+ * @driver_data: array of driver_data pointers
  * @retry_count: number of retries
  * @excessive_retries: set to 1 if the frame was retried many times
  *	but not acknowledged
@@ -562,8 +570,8 @@ enum ieee80211_key_alg {
 
 /**
  * enum ieee80211_key_len - key length
- * @WEP40: WEP 5 byte long key
- * @WEP104: WEP 13 byte long key
+ * @LEN_WEP40: WEP 5-byte long key
+ * @LEN_WEP104: WEP 13-byte long key
  */
 enum ieee80211_key_len {
 	LEN_WEP40 = 5,
@@ -640,7 +648,7 @@ enum set_key_cmd {
  * enum sta_notify_cmd - sta notify command
  *
  * Used with the sta_notify() callback in &struct ieee80211_ops, this
- * indicates addition and removal of a station to station table
+ * indicates addition and removal of a station to station table.
  *
  * @STA_NOTIFY_ADD: a station was added to the station table
  * @STA_NOTIFY_REMOVE: a station being removed from the station table
@@ -1340,7 +1348,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw);
  *
  * This function frees everything that was allocated, including the
  * private data for the driver. You must call ieee80211_unregister_hw()
- * before calling this function
+ * before calling this function.
  *
  * @hw: the hardware to free
  */
@@ -1411,7 +1419,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw,
 			 struct sk_buff *skb);
 
 /**
- * ieee80211_tx_status_irqsafe - irq-safe transmit status callback
+ * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
  *
  * Like ieee80211_tx_status() but can be called in IRQ context
  * (internally defers to a tasklet.)
@@ -1589,6 +1597,8 @@ unsigned int ieee80211_hdrlen(__le16 fc);
  * @keyconf: the parameter passed with the set key
  * @skb: the skb for which the key is needed
  * @rc4key: a buffer to which the key will be written
+ * @type: TBD
+ * @key: TBD
  */
 void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf,
 				struct sk_buff *skb,
@@ -1639,7 +1649,7 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
 void ieee80211_scan_completed(struct ieee80211_hw *hw);
 
 /**
- * ieee80211_iterate_active_interfaces- iterate active interfaces
+ * ieee80211_iterate_active_interfaces - iterate active interfaces
  *
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
@@ -1706,7 +1716,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid);
  *
  * This function must be called by low level driver once it has
  * finished with preparations for the BA session.
- * This version of the function is irq safe.
+ * This version of the function is IRQ-safe.
  */
 void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra,
 				      u16 tid);
@@ -1746,7 +1756,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid);
  *
  * This function must be called by low level driver once it has
  * finished with preparations for the BA session tear down.
- * This version of the function is irq safe.
+ * This version of the function is IRQ-safe.
  */
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra,
 				     u16 tid);
@@ -1754,7 +1764,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra,
 /**
  * ieee80211_notify_mac - low level driver notification
  * @hw: pointer as obtained from ieee80211_alloc_hw().
- * @notification_types: enum ieee80211_notification_types
+ * @notif_type: enum ieee80211_notification_types
  *
  * This function must be called by low level driver to inform mac80211 of
  * low level driver status change or force mac80211 to re-assoc for low
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index d8a16b7f6a6..47d2c1bbfca 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -135,6 +135,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx,
 /**
  * __sta_info_free - internal STA free helper
  *
+ * @local: pointer to the global information
  * @sta: STA info to free
  *
  * This function must undo everything done by sta_info_alloc()
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index fd228c198e3..94311dcfe04 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -160,11 +160,21 @@ struct sta_ampdu_mlme {
  * @list: global linked list entry
  * @hnext: hash table linked list pointer
  * @local: pointer to the global information
+ * @sdata: TBD
+ * @key: TBD
+ * @rate_ctrl: TBD
+ * @rate_ctrl_priv: TBD
+ * @lock: used for locking all fields that require locking, see comments
+ *	in the header file.
+ * @flaglock: spinlock for flags accesses
+ * @ht_info: HT capabilities of this STA
+ * @supp_rates: Bitmap of supported rates (per band)
  * @addr: MAC address of this STA
  * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
  *	only used in AP (and IBSS?) mode
+ * @listen_interval: TBD
+ * @pin_status: TBD
  * @flags: STA flags, see &enum ieee80211_sta_info_flags
- * @flaglock: spinlock for flags accesses
  * @ps_tx_buf: buffer of frames to transmit to this station
  *	when it leaves power saving state
  * @tx_filtered: buffer of frames we already tried to transmit
@@ -172,10 +182,41 @@ struct sta_ampdu_mlme {
  *	power saving state
  * @rx_packets: Number of MSDUs received from this STA
  * @rx_bytes: Number of bytes received from this STA
- * @supp_rates: Bitmap of supported rates (per band)
- * @ht_info: HT capabilities of this STA
- * @lock: used for locking all fields that require locking, see comments
- *	in the header file.
+ * @wep_weak_iv_count: TBD
+ * @last_rx: TBD
+ * @num_duplicates: number of duplicate frames received from this STA
+ * @rx_fragments: number of received MPDUs
+ * @rx_dropped: number of dropped MPDUs from this STA
+ * @last_signal: signal of last received frame from this STA
+ * @last_qual: qual of last received frame from this STA
+ * @last_noise: noise of last received frame from this STA
+ * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
+ * @wme_rx_queue: TBD
+ * @tx_filtered_count: TBD
+ * @tx_retry_failed: TBD
+ * @tx_retry_count: TBD
+ * @tx_num_consecutive_failures: TBD
+ * @tx_num_mpdu_ok: TBD
+ * @tx_num_mpdu_fail: TBD
+ * @fail_avg: moving percentage of failed MSDUs
+ * @tx_packets: number of RX/TX MSDUs
+ * @tx_bytes: TBD
+ * @tx_fragments: number of transmitted MPDUs
+ * @txrate_idx: TBD
+ * @last_txrate_idx: TBD
+ * @wme_tx_queue: TBD
+ * @ampdu_mlme: TBD
+ * @timer_to_tid: identity mapping to ID timers
+ * @tid_to_tx_q: map tid to tx queue
+ * @llid: Local link ID
+ * @plid: Peer link ID
+ * @reason: Cancel reason on PLINK_HOLDING state
+ * @plink_retries: Retries in establishment
+ * @ignore_plink_timer: TBD
+ * @plink_state plink_state: TBD
+ * @plink_timeout: TBD
+ * @plink_timer: TBD
+ * @debugfs: debug filesystem info
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -217,14 +258,12 @@ struct sta_info {
 	unsigned long rx_packets, rx_bytes;
 	unsigned long wep_weak_iv_count;
 	unsigned long last_rx;
-	unsigned long num_duplicates; /* number of duplicate frames received
-				       * from this STA */
-	unsigned long rx_fragments; /* number of received MPDUs */
-	unsigned long rx_dropped; /* number of dropped MPDUs from this STA */
-	int last_signal; /* signal of last received frame from this STA */
-	int last_qual;  /* qual of last received frame from this STA */
-	int last_noise; /* noise of last received frame from this STA */
-	/* last received seq/frag number from this STA (per RX queue) */
+	unsigned long num_duplicates;
+	unsigned long rx_fragments;
+	unsigned long rx_dropped;
+	int last_signal;
+	int last_qual;
+	int last_noise;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
 	unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES];
@@ -241,9 +280,9 @@ struct sta_info {
 	unsigned int fail_avg;
 
 	/* Updated from TX path only, no locking requirements */
-	unsigned long tx_packets; /* number of RX/TX MSDUs */
+	unsigned long tx_packets;
 	unsigned long tx_bytes;
-	unsigned long tx_fragments; /* number of transmitted MPDUs */
+	unsigned long tx_fragments;
 	int txrate_idx;
 	int last_txrate_idx;
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
@@ -254,18 +293,18 @@ struct sta_info {
 	 * Aggregation information, locked with lock.
 	 */
 	struct sta_ampdu_mlme ampdu_mlme;
-	u8 timer_to_tid[STA_TID_NUM];	/* identity mapping to ID timers */
-	u8 tid_to_tx_q[STA_TID_NUM];	/* map tid to tx queue */
+	u8 timer_to_tid[STA_TID_NUM];
+	u8 tid_to_tx_q[STA_TID_NUM];
 
 #ifdef CONFIG_MAC80211_MESH
 	/*
 	 * Mesh peer link attributes
 	 * TODO: move to a sub-structure that is referenced with pointer?
 	 */
-	__le16 llid;		/* Local link ID */
-	__le16 plid;		/* Peer link ID */
-	__le16 reason;		/* Cancel reason on PLINK_HOLDING state */
-	u8 plink_retries;	/* Retries in establishment */
+	__le16 llid;
+	__le16 plid;
+	__le16 reason;
+	u8 plink_retries;
 	bool ignore_plink_timer;
 	enum plink_state plink_state;
 	u32 plink_timeout;
-- 
cgit v1.2.3-70-g09d2


From b46372710ab536c0967f76be5dc41341583d4a54 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 8 Jul 2008 14:02:19 +0200
Subject: net/wireless/nl80211.c: fix endless Netlink callback loop.

Although I only tested similar code (I don't use any of this wireless
code), the state maintainance between Netlink dump callback invocations
seems wrong here and should lead to an endless loop. There are also other
examples in the same file which might have the same problem. Perhaps someone
can actually test this (or refute my logic).

Take the simple example with only one element in the list (which should fit
into the message):

1. invocation:
  Start:
    idx = 0, start = 0
  Loop:
    condition (++idx < start) => (1 < 0) => false
    => no continue, fill one entry, exit loop, return skb->len > 0

2. invocation:
  Start:
    idx = 0, start = 1
  Loop:
    condition (++idx < start) => (1 < 1) => false
    => no continue, fill the same entry again, exit loop, return skb->len > 0

3. invocation:
  Same as 2. invocation, endless invocation of callback.

Also, iterations where the filling of an element fails should not be counted as
completed, so idx should not be incremented in this case.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fb75f265b39..b7fefffd2d0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -199,12 +199,14 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 
 	mutex_lock(&cfg80211_drv_mutex);
 	list_for_each_entry(dev, &cfg80211_drv_list, list) {
-		if (++idx < start)
+		if (++idx <= start)
 			continue;
 		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid,
 				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				       dev) < 0)
+				       dev) < 0) {
+			idx--;
 			break;
+		}
 	}
 	mutex_unlock(&cfg80211_drv_mutex);
 
-- 
cgit v1.2.3-70-g09d2


From 0d3a34b48c87a374b37d7a21a60d257d076484f3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Jul 2008 12:18:52 -0400
Subject: SUNRPC: Fix a double-free in rpcbind

It is wrong to be freeing up the rpcbind arguments if the call to
rpcb_call_async() fails, since they should already have been freed up by
rpcb_map_release().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0517967a68b..21c698d7b77 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -365,18 +365,16 @@ void rpcb_getport_async(struct rpc_task *task)
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;
+		/* rpcb_map_release() has freed the arguments */
 		dprintk("RPC: %5u %s: rpc_run_task failed\n",
 			task->tk_pid, __func__);
-		goto bailout;
+		goto bailout_nofree;
 	}
 	rpc_put_task(child);
 
 	task->tk_xprt->stat.bind_count++;
 	return;
 
-bailout:
-	kfree(map);
-	xprt_put(xprt);
 bailout_nofree:
 	rpcb_wake_rpcbind_waiters(xprt, status);
 bailout_nowake:
-- 
cgit v1.2.3-70-g09d2


From 803a9067e19714ea7b7da760fe92f0d53bfa6994 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Jul 2008 15:20:55 -0400
Subject: SUNRPC: Fix an rpcbind breakage for the case of IPv6 lookups

Now that rpcb_next_version has been split into an IPv4 version and an IPv6
version, we Oops when rpcb_call_async attempts to look up the IPv6-specific
RPC procedure in rpcb_next_version.

Fix the Oops simply by having rpcb_getport_async pass the correct RPC
procedure as an argument.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 21c698d7b77..e6fb21b19b8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -243,10 +243,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 }
 EXPORT_SYMBOL_GPL(rpcb_getport_sync);
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
-		.rpc_proc = rpcb_next_version[version].rpc_proc,
+		.rpc_proc = proc,
 		.rpc_argp = map,
 		.rpc_resp = &map->r_port,
 	};
@@ -271,6 +271,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -280,7 +281,6 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct sockaddr *sap = (struct sockaddr *)&addr;
 	size_t salen;
 	int status;
-	struct rpcb_info *info;
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __func__,
@@ -313,10 +313,12 @@ void rpcb_getport_async(struct rpc_task *task)
 	/* Don't ever use rpcbind v2 for AF_INET6 requests */
 	switch (sap->sa_family) {
 	case AF_INET:
-		info = rpcb_next_version;
+		proc = rpcb_next_version[xprt->bind_index].rpc_proc;
+		bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
 		break;
 	case AF_INET6:
-		info = rpcb_next_version6;
+		proc = rpcb_next_version6[xprt->bind_index].rpc_proc;
+		bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers;
 		break;
 	default:
 		status = -EAFNOSUPPORT;
@@ -324,14 +326,13 @@ void rpcb_getport_async(struct rpc_task *task)
 				task->tk_pid, __func__);
 		goto bailout_nofree;
 	}
-	if (info[xprt->bind_index].rpc_proc == NULL) {
+	if (proc == NULL) {
 		xprt->bind_index = 0;
 		status = -EPFNOSUPPORT;
 		dprintk("RPC: %5u %s: no more getport versions available\n",
 			task->tk_pid, __func__);
 		goto bailout_nofree;
 	}
-	bind_version = info[xprt->bind_index].rpc_vers;
 
 	dprintk("RPC: %5u %s: trying rpcbind version %u\n",
 		task->tk_pid, __func__, bind_version);
@@ -361,7 +362,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
+	child = rpcb_call_async(rpcb_clnt, map, proc);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;
-- 
cgit v1.2.3-70-g09d2


From b2238566401f01eb796e75750213c7b0fce396b2 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@parallels.com>
Date: Tue, 8 Jul 2008 15:13:31 -0700
Subject: ipv6: fix race between ipv6_del_addr and DAD timer

Consider the following scenario:

ipv6_del_addr(ifp)
  ipv6_ifa_notify(RTM_DELADDR, ifp)
    ip6_del_rt(ifp->rt)

after returning from the ipv6_ifa_notify and enabling BH-s
back, but *before* calling the addrconf_del_timer the
ifp->timer fires and:

addrconf_dad_timer(ifp)
  addrconf_dad_completed(ifp)
    ipv6_ifa_notify(RTM_NEWADDR, ifp)
      ip6_ins_rt(ifp->rt)

then return back to the ipv6_del_addr and:

in6_ifa_put(ifp)
  inet6_ifa_finish_destroy(ifp)
    dst_release(&ifp->rt->u.dst)

After this we have an ifp->rt inserted into fib6 lists, but
queued for gc, which in turn can result in oopses in the
fib6_run_gc. Maybe some other nasty things, but we caught
only the oops in gc so far.

The solution is to disarm the ifp->timer before flushing the
rt from it.

Signed-off-by: Andrey Vagin <avagin@parallels.com>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 147588f4c7c..ff61a5cdb0b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -749,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	}
 	write_unlock_bh(&idev->lock);
 
+	addrconf_del_timer(ifp);
+
 	ipv6_ifa_notify(RTM_DELADDR, ifp);
 
 	atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
 
-	addrconf_del_timer(ifp);
-
 	/*
 	 * Purge or update corresponding prefix
 	 *
-- 
cgit v1.2.3-70-g09d2


From 11a100f844f6096787ab20e19f17d72abc957a8f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 8 Jul 2008 15:36:57 -0700
Subject: vlan: avoid header copying and linearisation where possible

- vlan_dev_reorder_header() is only called on the receive path after
  calling skb_share_check(). This means we can use skb_cow() since
  all we need is a writable header.

- vlan_dev_hard_header() includes a work-around for some apparently
  broken out of tree MPLS code. The hard_header functions can expect
  to always have a headroom of at least there own hard_header_len
  available, so the reallocation check is unnecessary.

- __vlan_put_tag() can use skb_cow_head() to avoid the skb_unshare()
  copy when the header is writable.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 18 +++---------------
 net/8021q/vlan_dev.c    | 34 +++++-----------------------------
 2 files changed, 8 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d36515dae62..93f5d9b0e9f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -185,22 +185,10 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
-	if (skb_headroom(skb) < VLAN_HLEN) {
-		struct sk_buff *sk_tmp = skb;
-		skb = skb_realloc_headroom(sk_tmp, VLAN_HLEN);
-		kfree_skb(sk_tmp);
-		if (!skb) {
-			printk(KERN_ERR "vlan: failed to realloc headroom\n");
-			return NULL;
-		}
-	} else {
-		skb = skb_unshare(skb, GFP_ATOMIC);
-		if (!skb) {
-			printk(KERN_ERR "vlan: failed to unshare skbuff\n");
-			return NULL;
-		}
+	if (skb_cow_head(skb, VLAN_HLEN) < 0) {
+		kfree_skb(skb);
+		return NULL;
 	}
-
 	veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
 
 	/* Move the mac addresses to the beginning of the new header. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2ccac6bea57..b6e52c025fd 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -74,11 +74,8 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 {
 	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_shared(skb) || skb_cloned(skb)) {
-			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
-			kfree_skb(skb);
-			skb = nskb;
-		}
+		if (skb_cow(skb, skb_headroom(skb)) < 0)
+			skb = NULL;
 		if (skb) {
 			/* Lifted from Gleb's VLAN code... */
 			memmove(skb->data - ETH_HLEN,
@@ -262,12 +259,14 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	u16 vlan_tci = 0;
 	int rc = 0;
 	int build_vlan_header = 0;
-	struct net_device *vdev = dev;
 
 	pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
 		 __func__, skb, type, len, vlan_dev_info(dev)->vlan_id,
 		 daddr);
 
+	if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
+		return -ENOSPC;
+
 	/* build vlan header only if re_order_header flag is NOT set.  This
 	 * fixes some programs that get confused when they see a VLAN device
 	 * sending a frame that is VLAN encoded (the consensus is that the VLAN
@@ -316,29 +315,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 	dev = vlan_dev_info(dev)->real_dev;
 
-	/* MPLS can send us skbuffs w/out enough space.	This check will grow
-	 * the skb if it doesn't have enough headroom. Not a beautiful solution,
-	 * so I'll tick a counter so that users can know it's happening...
-	 * If they care...
-	 */
-
-	/* NOTE: This may still break if the underlying device is not the final
-	 * device (and thus there are more headers to add...) It should work for
-	 * good-ole-ethernet though.
-	 */
-	if (skb_headroom(skb) < dev->hard_header_len) {
-		struct sk_buff *sk_tmp = skb;
-		skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
-		kfree_skb(sk_tmp);
-		if (skb == NULL) {
-			struct net_device_stats *stats = &vdev->stats;
-			stats->tx_dropped++;
-			return -ENOMEM;
-		}
-		vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
-		pr_debug("%s: %s: had to grow skb\n", __func__, vdev->name);
-	}
-
 	if (build_vlan_header) {
 		/* Now make the underlying real hard header */
 		rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr,
-- 
cgit v1.2.3-70-g09d2


From e65d22e18038eed7307276e46810d884c402d57d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 16:46:01 -0700
Subject: pkt_sched: Remove comment reference to old style TX locking.

We haven't had netdev->tbusy in many years :)

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9ebc7af049..69e918bb427 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -99,7 +99,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
    ---requeue
 
    requeues once dequeued packet. It is used for non-standard or
-   just buggy devices, which can defer output even if dev->tbusy=0.
+   just buggy devices, which can defer output even if netif_queue_stopped()=0.
 
    ---reset
 
-- 
cgit v1.2.3-70-g09d2


From bb949fbd1878973c3539d9aecff52f284482a937 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 16:55:56 -0700
Subject: netdev: Create netdev_queue abstraction.

A netdev_queue is an entity managed by a qdisc.

Currently there is one RX and one TX queue, and a netdev_queue merely
contains a backpointer to the net_device.

The Qdisc struct is augmented with a netdev_queue pointer as well.

Eventually the 'dev' Qdisc member will go away and we will have the
resulting hierarchy:

	net_device --> netdev_queue --> Qdisc

Also, qdisc_alloc() and qdisc_create_dflt() now take a netdev_queue
pointer argument.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++++
 include/net/sch_generic.h |  6 +++++-
 net/core/dev.c            |  8 ++++++++
 net/mac80211/wme.c        |  6 ++++--
 net/sched/sch_api.c       | 12 +++++++-----
 net/sched/sch_atm.c       |  6 ++++--
 net/sched/sch_cbq.c       |  9 ++++++---
 net/sched/sch_dsmark.c    |  6 ++++--
 net/sched/sch_fifo.c      |  3 ++-
 net/sched/sch_generic.c   | 14 ++++++++++----
 net/sched/sch_hfsc.c      |  9 ++++++---
 net/sched/sch_htb.c       | 11 +++++++----
 net/sched/sch_netem.c     |  3 ++-
 net/sched/sch_prio.c      |  3 ++-
 14 files changed, 74 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e009c6fbf5c..515fd25bf0f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -448,6 +448,10 @@ static inline void napi_synchronize(const struct napi_struct *n)
 # define napi_synchronize(n)	barrier()
 #endif
 
+struct netdev_queue {
+	struct net_device	*dev;
+};
+
 /*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
@@ -624,6 +628,9 @@ struct net_device
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+	struct netdev_queue	rx_queue;
+	struct netdev_queue	tx_queue;
+
 	/* ingress path synchronizer */
 	spinlock_t		ingress_lock;
 	struct Qdisc		*qdisc_ingress;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 073f2580b83..0ab53c575f8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -37,6 +37,7 @@ struct Qdisc
 	u32			parent;
 	atomic_t		refcnt;
 	struct sk_buff_head	q;
+	struct netdev_queue	*dev_queue;
 	struct net_device	*dev;
 	struct list_head	list;
 
@@ -216,8 +217,11 @@ extern void dev_deactivate(struct net_device *dev);
 extern void qdisc_reset(struct Qdisc *qdisc);
 extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
-extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
+extern struct Qdisc *qdisc_alloc(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
+				       struct netdev_queue *dev_queue,
 				       struct Qdisc_ops *ops, u32 parentid);
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
diff --git a/net/core/dev.c b/net/core/dev.c
index 75933932463..9b281c906eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4072,6 +4072,12 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
+static void netdev_init_queues(struct net_device *dev)
+{
+	dev->rx_queue.dev = dev;
+	dev->tx_queue.dev = dev;
+}
+
 /**
  *	alloc_netdev_mq - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -4124,6 +4130,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->egress_subqueue_count = queue_count;
 	dev->gso_max_size = GSO_MAX_SIZE;
 
+	netdev_init_queues(dev);
+
 	dev->get_stats = internal_stats;
 	netpoll_netdev_init(dev);
 	setup(dev);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5c666f7eda8..770f1c09b79 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -359,7 +359,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 	/* create child queues */
 	for (i = 0; i < QD_NUM(hw); i++) {
 		skb_queue_head_init(&q->requeued[i]);
-		q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops,
+		q->queues[i] = qdisc_create_dflt(qd->dev, qd->dev_queue,
+						 &pfifo_qdisc_ops,
 						 qd->handle);
 		if (!q->queues[i]) {
 			q->queues[i] = &noop_qdisc;
@@ -575,7 +576,8 @@ void ieee80211_install_qdisc(struct net_device *dev)
 {
 	struct Qdisc *qdisc;
 
-	qdisc = qdisc_create_dflt(dev, &wme_qdisc_ops, TC_H_ROOT);
+	qdisc = qdisc_create_dflt(dev, &dev->tx_queue,
+				  &wme_qdisc_ops, TC_H_ROOT);
 	if (!qdisc) {
 		printk(KERN_ERR "%s: qdisc installation failed\n", dev->name);
 		return;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 69e918bb427..b86c98bd06a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -552,8 +552,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
  */
 
 static struct Qdisc *
-qdisc_create(struct net_device *dev, u32 parent, u32 handle,
-	   struct nlattr **tca, int *errp)
+qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
+	     u32 parent, u32 handle, struct nlattr **tca, int *errp)
 {
 	int err;
 	struct nlattr *kind = tca[TCA_KIND];
@@ -593,7 +593,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
 	if (ops == NULL)
 		goto err_out;
 
-	sch = qdisc_alloc(dev, ops);
+	sch = qdisc_alloc(dev, dev_queue, ops);
 	if (IS_ERR(sch)) {
 		err = PTR_ERR(sch);
 		goto err_out2;
@@ -892,10 +892,12 @@ create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
+		q = qdisc_create(dev, &dev->rx_queue,
+				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else
-		q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
+		q = qdisc_create(dev, &dev->tx_queue,
+				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
 	if (q == NULL) {
 		if (err == -EAGAIN)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index db0e23ae85f..3dddab531d5 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		goto err_out;
 	}
 	flow->filter_list = NULL;
-	flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+	flow->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+				    &pfifo_qdisc_ops, classid);
 	if (!flow->q)
 		flow->q = &noop_qdisc;
 	pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -555,7 +556,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	p->flows = &p->link;
-	p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
+	p->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
 		p->link.q = &noop_qdisc;
 	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 968b4c73c9c..d360dcd0818 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1401,7 +1401,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
-	if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+	if (!(q->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					    &pfifo_qdisc_ops,
 					    sch->handle)))
 		q->link.q = &noop_qdisc;
 
@@ -1645,7 +1646,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	if (cl) {
 		if (new == NULL) {
-			new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+			new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+						&pfifo_qdisc_ops,
 						cl->common.classid);
 			if (new == NULL)
 				return -ENOBUFS;
@@ -1877,7 +1879,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->R_tab = rtab;
 	rtab = NULL;
 	cl->refcnt = 1;
-	if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
+	if (!(cl->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					&pfifo_qdisc_ops, classid)))
 		cl->q = &noop_qdisc;
 	cl->common.classid = classid;
 	cl->tparent = parent;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index c4c1317cd47..c955ba24e5c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 		sch, p, new, old);
 
 	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+		new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					&pfifo_qdisc_ops,
 					sch->handle);
 		if (new == NULL)
 			new = &noop_qdisc;
@@ -390,7 +391,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	p->default_index = default_index;
 	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
 
-	p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
+	p->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+				 &pfifo_qdisc_ops, sch->handle);
 	if (p->q == NULL)
 		p->q = &noop_qdisc;
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 82d7d7bbbb1..779eae85faf 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -137,7 +137,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 	struct Qdisc *q;
 	int err = -ENOMEM;
 
-	q = qdisc_create_dflt(sch->dev, ops, TC_H_MAKE(sch->handle, 1));
+	q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+			      ops, TC_H_MAKE(sch->handle, 1));
 	if (q) {
 		err = fifo_set_limit(q, limit);
 		if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 13afa721439..d9708648089 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -440,7 +440,9 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
-struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
+struct Qdisc *qdisc_alloc(struct net_device *dev,
+			  struct netdev_queue *dev_queue,
+			  struct Qdisc_ops *ops)
 {
 	void *p;
 	struct Qdisc *sch;
@@ -462,6 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
+	sch->dev_queue = dev_queue;
 	sch->dev = dev;
 	dev_hold(dev);
 	atomic_set(&sch->refcnt, 1);
@@ -471,12 +474,14 @@ errout:
 	return ERR_PTR(err);
 }
 
-struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
+struct Qdisc * qdisc_create_dflt(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 struct Qdisc_ops *ops,
 				 unsigned int parentid)
 {
 	struct Qdisc *sch;
 
-	sch = qdisc_alloc(dev, ops);
+	sch = qdisc_alloc(dev, dev_queue, ops);
 	if (IS_ERR(sch))
 		goto errout;
 	sch->stats_lock = &dev->queue_lock;
@@ -545,7 +550,8 @@ void dev_activate(struct net_device *dev)
 	if (dev->qdisc_sleeping == &noop_qdisc) {
 		struct Qdisc *qdisc;
 		if (dev->tx_queue_len) {
-			qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
+			qdisc = qdisc_create_dflt(dev, &dev->tx_queue,
+						  &pfifo_fast_ops,
 						  TC_H_ROOT);
 			if (qdisc == NULL) {
 				printk(KERN_INFO "%s: activation failed\n", dev->name);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3a8267246a4..5a22fec4ead 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1083,7 +1083,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
-	cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+	cl->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+				      &pfifo_qdisc_ops, classid);
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
 	INIT_LIST_HEAD(&cl->children);
@@ -1201,7 +1202,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (cl->level > 0)
 		return -EINVAL;
 	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+		new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					&pfifo_qdisc_ops,
 					cl->cl_common.classid);
 		if (new == NULL)
 			new = &noop_qdisc;
@@ -1443,7 +1445,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
 	q->root.sched   = q;
-	q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+	q->root.qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					  &pfifo_qdisc_ops,
 					  sch->handle);
 	if (q->root.qdisc == NULL)
 		q->root.qdisc = &noop_qdisc;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ee8b4ffe110..956a67f66b9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1129,7 +1129,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	if (cl && !cl->level) {
 		if (new == NULL &&
-		    (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+		    (new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					     &pfifo_qdisc_ops,
 					     cl->common.classid))
 		    == NULL)
 			return -ENOBUFS;
@@ -1256,8 +1257,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 		return -EBUSY;
 
 	if (!cl->level && htb_parent_last_child(cl)) {
-		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-						cl->parent->common.classid);
+		new_q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					  &pfifo_qdisc_ops,
+					  cl->parent->common.classid);
 		last_child = 1;
 	}
 
@@ -1376,7 +1378,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
 		   -- thanks to Karlis Peisenieks */
-		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+		new_q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+					  &pfifo_qdisc_ops, classid);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
 			unsigned int qlen = parent->un.leaf.q->q.qlen;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 24697667247..aa7a04e32ae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -536,7 +536,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 
-	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
+	q->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+				     &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
 	if (!q->qdisc) {
 		pr_debug("netem: qdisc create failed\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 5532f1031ab..ca58a039208 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -281,7 +281,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	for (i=0; i<q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child;
-			child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+			child = qdisc_create_dflt(sch->dev, sch->dev_queue,
+						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
 			if (child) {
 				sch_tree_lock(sch);
-- 
cgit v1.2.3-70-g09d2


From 5ce2d488fe039ddd86a638496cf704df86c74eeb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 17:06:30 -0700
Subject: pkt_sched: Remove 'dev' member of struct Qdisc.

It can be obtained via the netdev_queue.  So create a helper routine,
qdisc_dev(), to make the transformations nicer looking.

Now, qdisc_alloc() now no longer needs a net_device pointer argument.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 16 +++++++++-------
 net/mac80211/wme.c        | 28 ++++++++++++++--------------
 net/sched/cls_api.c       |  2 +-
 net/sched/cls_route.c     |  4 ++--
 net/sched/sch_api.c       | 10 +++++-----
 net/sched/sch_atm.c       |  4 ++--
 net/sched/sch_cbq.c       | 22 +++++++++++-----------
 net/sched/sch_dsmark.c    |  4 ++--
 net/sched/sch_fifo.c      |  6 +++---
 net/sched/sch_generic.c   | 12 +++++-------
 net/sched/sch_gred.c      |  2 +-
 net/sched/sch_hfsc.c      | 10 +++++-----
 net/sched/sch_htb.c       | 24 ++++++++++++------------
 net/sched/sch_netem.c     | 10 +++++-----
 net/sched/sch_prio.c      | 15 ++++++++-------
 net/sched/sch_sfq.c       |  4 ++--
 net/sched/sch_teql.c      | 12 ++++++------
 17 files changed, 93 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0ab53c575f8..66ec36d8ac9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -38,7 +38,6 @@ struct Qdisc
 	atomic_t		refcnt;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
-	struct net_device	*dev;
 	struct list_head	list;
 
 	struct gnet_stats_basic	bstats;
@@ -156,14 +155,18 @@ struct tcf_proto
 	struct tcf_proto_ops	*ops;
 };
 
+static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
+{
+	return qdisc->dev_queue->dev;
+}
 
 extern void qdisc_lock_tree(struct net_device *dev);
 extern void qdisc_unlock_tree(struct net_device *dev);
 
-#define sch_tree_lock(q)	qdisc_lock_tree((q)->dev)
-#define sch_tree_unlock(q)	qdisc_unlock_tree((q)->dev)
-#define tcf_tree_lock(tp)	qdisc_lock_tree((tp)->q->dev)
-#define tcf_tree_unlock(tp)	qdisc_unlock_tree((tp)->q->dev)
+#define sch_tree_lock(q)	qdisc_lock_tree(qdisc_dev(q))
+#define sch_tree_unlock(q)	qdisc_unlock_tree(qdisc_dev(q))
+#define tcf_tree_lock(tp)	qdisc_lock_tree(qdisc_dev((tp)->q))
+#define tcf_tree_unlock(tp)	qdisc_unlock_tree(qdisc_dev((tp)->q))
 
 extern struct Qdisc noop_qdisc;
 extern struct Qdisc_ops noop_qdisc_ops;
@@ -217,8 +220,7 @@ extern void dev_deactivate(struct net_device *dev);
 extern void qdisc_reset(struct Qdisc *qdisc);
 extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
-extern struct Qdisc *qdisc_alloc(struct net_device *dev,
-				 struct netdev_queue *dev_queue,
+extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 				 struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct netdev_queue *dev_queue,
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 770f1c09b79..2fbc171130b 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -103,7 +103,7 @@ static inline int wme_downgrade_ac(struct sk_buff *skb)
  * negative return value indicates to drop the frame */
 static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 {
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	if (!ieee80211_is_data(hdr->frame_control)) {
@@ -140,7 +140,7 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 
 static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 {
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -249,7 +249,7 @@ static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd)
 static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct net_device *dev = qd->dev;
+	struct net_device *dev = qdisc_dev(qd);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct sk_buff *skb;
@@ -286,7 +286,7 @@ static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
 static void wme_qdiscop_reset(struct Qdisc* qd)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	int queue;
 
@@ -303,7 +303,7 @@ static void wme_qdiscop_reset(struct Qdisc* qd)
 static void wme_qdiscop_destroy(struct Qdisc* qd)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	int queue;
 
@@ -328,7 +328,7 @@ static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
 static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct net_device *dev = qd->dev;
+	struct net_device *dev = qdisc_dev(qd);
 	struct ieee80211_local *local;
 	struct ieee80211_hw *hw;
 	int err = 0, i;
@@ -359,7 +359,7 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 	/* create child queues */
 	for (i = 0; i < QD_NUM(hw); i++) {
 		skb_queue_head_init(&q->requeued[i]);
-		q->queues[i] = qdisc_create_dflt(qd->dev, qd->dev_queue,
+		q->queues[i] = qdisc_create_dflt(qdisc_dev(qd), qd->dev_queue,
 						 &pfifo_qdisc_ops,
 						 qd->handle);
 		if (!q->queues[i]) {
@@ -386,7 +386,7 @@ static int wme_classop_graft(struct Qdisc *qd, unsigned long arg,
 			     struct Qdisc *new, struct Qdisc **old)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = arg - 1;
 
@@ -410,7 +410,7 @@ static struct Qdisc *
 wme_classop_leaf(struct Qdisc *qd, unsigned long arg)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = arg - 1;
 
@@ -423,7 +423,7 @@ wme_classop_leaf(struct Qdisc *qd, unsigned long arg)
 
 static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid)
 {
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	unsigned long queue = TC_H_MIN(classid);
 
@@ -450,7 +450,7 @@ static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
 			      struct nlattr **tca, unsigned long *arg)
 {
 	unsigned long cl = *arg;
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
 	if (cl - 1 > QD_NUM(hw))
@@ -467,7 +467,7 @@ static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
  * when we add WMM-SA support - TSPECs may be deleted here */
 static int wme_classop_delete(struct Qdisc *qd, unsigned long cl)
 {
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
 	if (cl - 1 > QD_NUM(hw))
@@ -480,7 +480,7 @@ static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl,
 				  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 
 	if (cl - 1 > QD_NUM(hw))
@@ -494,7 +494,7 @@ static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl,
 
 static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg)
 {
-	struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	int queue;
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9360fc81e8c..e2389f161e4 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -334,7 +334,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
 	tcm->tcm_family = AF_UNSPEC;
 	tcm->tcm__pad1 = 0;
 	tcm->tcm__pad1 = 0;
-	tcm->tcm_ifindex = tp->q->dev->ifindex;
+	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
 	tcm->tcm_parent = tp->classid;
 	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
 	NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 784dcb870b9..5a16ca28aa3 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -302,7 +302,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 			*fp = f->next;
 			tcf_tree_unlock(tp);
 
-			route4_reset_fastmap(tp->q->dev, head, f->id);
+			route4_reset_fastmap(qdisc_dev(tp->q), head, f->id);
 			route4_delete_filter(tp, f);
 
 			/* Strip tree */
@@ -500,7 +500,7 @@ reinsert:
 	}
 	tcf_tree_unlock(tp);
 
-	route4_reset_fastmap(tp->q->dev, head, f->id);
+	route4_reset_fastmap(qdisc_dev(tp->q), head, f->id);
 	*arg = (unsigned long)f;
 	return 0;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b86c98bd06a..1f893082a4f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -281,7 +281,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
-	struct net_device *dev = wd->qdisc->dev;
+	struct net_device *dev = qdisc_dev(wd->qdisc);
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	smp_wmb();
@@ -493,7 +493,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
 			return;
 
-		sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
 			WARN_ON(parentid != TC_H_ROOT);
 			return;
@@ -593,7 +593,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	if (ops == NULL)
 		goto err_out;
 
-	sch = qdisc_alloc(dev, dev_queue, ops);
+	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch)) {
 		err = PTR_ERR(sch);
 		goto err_out2;
@@ -940,7 +940,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	tcm->tcm_family = AF_UNSPEC;
 	tcm->tcm__pad1 = 0;
 	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = q->dev->ifindex;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
 	tcm->tcm_parent = clid;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = atomic_read(&q->refcnt);
@@ -1186,7 +1186,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
 	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm_ifindex = q->dev->ifindex;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
 	tcm->tcm_parent = q->handle;
 	tcm->tcm_handle = q->handle;
 	tcm->tcm_info = 0;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 3dddab531d5..0de757e3be4 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -296,7 +296,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		goto err_out;
 	}
 	flow->filter_list = NULL;
-	flow->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				    &pfifo_qdisc_ops, classid);
 	if (!flow->q)
 		flow->q = &noop_qdisc;
@@ -556,7 +556,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	p->flows = &p->link;
-	p->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
 		p->link.q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d360dcd0818..9f2ace585fd 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
+	netif_schedule(qdisc_dev(sch));
 	return HRTIMER_NORESTART;
 }
 
@@ -1077,9 +1077,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
 					q->quanta[prio];
 			}
-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
+			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
 				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
 			}
 		}
 	}
@@ -1401,7 +1401,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
-	if (!(q->link.q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					    &pfifo_qdisc_ops,
 					    sch->handle)))
 		q->link.q = &noop_qdisc;
@@ -1411,7 +1411,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.cpriority = TC_CBQ_MAXPRIO-1;
 	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
 	q->link.overlimit = cbq_ovl_classic;
-	q->link.allot = psched_mtu(sch->dev);
+	q->link.allot = psched_mtu(qdisc_dev(sch));
 	q->link.quantum = q->link.allot;
 	q->link.weight = q->link.R_tab->rate.rate;
 
@@ -1646,7 +1646,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	if (cl) {
 		if (new == NULL) {
-			new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+			new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 						&pfifo_qdisc_ops,
 						cl->common.classid);
 			if (new == NULL)
@@ -1746,10 +1746,10 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
 #ifdef CONFIG_NET_CLS_ACT
 		struct cbq_sched_data *q = qdisc_priv(sch);
 
-		spin_lock_bh(&sch->dev->queue_lock);
+		spin_lock_bh(&qdisc_dev(sch)->queue_lock);
 		if (q->rx_class == cl)
 			q->rx_class = NULL;
-		spin_unlock_bh(&sch->dev->queue_lock);
+		spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 #endif
 
 		cbq_destroy_class(sch, cl);
@@ -1828,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev->queue_lock,
+					      &qdisc_dev(sch)->queue_lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1879,7 +1879,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->R_tab = rtab;
 	rtab = NULL;
 	cl->refcnt = 1;
-	if (!(cl->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					&pfifo_qdisc_ops, classid)))
 		cl->q = &noop_qdisc;
 	cl->common.classid = classid;
@@ -1919,7 +1919,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev->queue_lock, tca[TCA_RATE]);
+				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index c955ba24e5c..3aafbd17393 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -60,7 +60,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 		sch, p, new, old);
 
 	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					&pfifo_qdisc_ops,
 					sch->handle);
 		if (new == NULL)
@@ -391,7 +391,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	p->default_index = default_index;
 	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
 
-	p->q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				 &pfifo_qdisc_ops, sch->handle);
 	if (p->q == NULL)
 		p->q = &noop_qdisc;
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 779eae85faf..1d97fa42c90 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
 	if (opt == NULL) {
-		u32 limit = sch->dev->tx_queue_len ? : 1;
+		u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
 
 		if (sch->ops == &bfifo_qdisc_ops)
-			limit *= sch->dev->mtu;
+			limit *= qdisc_dev(sch)->mtu;
 
 		q->limit = limit;
 	} else {
@@ -137,7 +137,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 	struct Qdisc *q;
 	int err = -ENOMEM;
 
-	q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 			      ops, TC_H_MAKE(sch->handle, 1));
 	if (q) {
 		err = fifo_set_limit(q, limit);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d9708648089..b626a4f32b6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -364,7 +364,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
 	struct sk_buff_head *list = prio2list(skb, qdisc);
 
-	if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
+	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
 		qdisc->q.qlen++;
 		return __qdisc_enqueue_tail(skb, qdisc, list);
 	}
@@ -440,8 +440,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
-struct Qdisc *qdisc_alloc(struct net_device *dev,
-			  struct netdev_queue *dev_queue,
+struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
 {
 	void *p;
@@ -465,8 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev,
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
-	sch->dev = dev;
-	dev_hold(dev);
+	dev_hold(qdisc_dev(sch));
 	atomic_set(&sch->refcnt, 1);
 
 	return sch;
@@ -481,7 +479,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev,
 {
 	struct Qdisc *sch;
 
-	sch = qdisc_alloc(dev, dev_queue, ops);
+	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch))
 		goto errout;
 	sch->stats_lock = &dev->queue_lock;
@@ -534,7 +532,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		ops->destroy(qdisc);
 
 	module_put(ops->owner);
-	dev_put(qdisc->dev);
+	dev_put(qdisc_dev(qdisc));
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
 EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c89fba56db5..39fa28511f0 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
 			 */
-			if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len)
+			if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
 				return qdisc_enqueue_tail(skb, sch);
 			else
 				goto drop;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5a22fec4ead..333525422f4 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev->queue_lock,
+					      &qdisc_dev(sch)->queue_lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1083,7 +1083,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
-	cl->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				      &pfifo_qdisc_ops, classid);
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
@@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev->queue_lock, tca[TCA_RATE]);
+				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
@@ -1202,7 +1202,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (cl->level > 0)
 		return -EINVAL;
 	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					&pfifo_qdisc_ops,
 					cl->cl_common.classid);
 		if (new == NULL)
@@ -1445,7 +1445,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
 	q->root.sched   = q;
-	q->root.qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					  &pfifo_qdisc_ops,
 					  sch->handle);
 	if (q->root.qdisc == NULL)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 956a67f66b9..31f7d1536e6 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1026,7 +1026,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	qdisc_watchdog_init(&q->watchdog, sch);
 	skb_queue_head_init(&q->direct_queue);
 
-	q->direct_qlen = sch->dev->tx_queue_len;
+	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
 	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
 
@@ -1043,7 +1043,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
-	spin_lock_bh(&sch->dev->queue_lock);
+	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
 
 	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
@@ -1057,11 +1057,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	nla_nest_end(skb, nest);
 
-	spin_unlock_bh(&sch->dev->queue_lock);
+	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&sch->dev->queue_lock);
+	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1073,7 +1073,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
-	spin_lock_bh(&sch->dev->queue_lock);
+	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1095,11 +1095,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
 	nla_nest_end(skb, nest);
-	spin_unlock_bh(&sch->dev->queue_lock);
+	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&sch->dev->queue_lock);
+	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1129,7 +1129,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	if (cl && !cl->level) {
 		if (new == NULL &&
-		    (new = qdisc_create_dflt(sch->dev, sch->dev_queue,
+		    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					     &pfifo_qdisc_ops,
 					     cl->common.classid))
 		    == NULL)
@@ -1257,7 +1257,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 		return -EBUSY;
 
 	if (!cl->level && htb_parent_last_child(cl)) {
-		new_q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					  &pfifo_qdisc_ops,
 					  cl->parent->common.classid);
 		last_child = 1;
@@ -1365,7 +1365,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			goto failure;
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev->queue_lock,
+				  &qdisc_dev(sch)->queue_lock,
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		cl->children = 0;
@@ -1378,7 +1378,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
 		   -- thanks to Karlis Peisenieks */
-		new_q = qdisc_create_dflt(sch->dev, sch->dev_queue,
+		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 					  &pfifo_qdisc_ops, classid);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
@@ -1420,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev->queue_lock,
+					      &qdisc_dev(sch)->queue_lock,
 					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index aa7a04e32ae..79058296044 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = sch->dev->qdisc;
+		struct Qdisc *rootq = qdisc_dev(sch)->qdisc;
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
@@ -333,9 +333,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	for (i = 0; i < n; i++)
 		d->table[i] = data[i];
 
-	spin_lock_bh(&sch->dev->queue_lock);
+	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
 	d = xchg(&q->delay_dist, d);
-	spin_unlock_bh(&sch->dev->queue_lock);
+	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
 
 	kfree(d);
 	return 0;
@@ -495,7 +495,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
 
 		q->limit = ctl->limit;
 	} else
-		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
+		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
 
 	q->oldest = PSCHED_PASTPERFECT;
 	return 0;
@@ -536,7 +536,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 
-	q->qdisc = qdisc_create_dflt(sch->dev, sch->dev_queue,
+	q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				     &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
 	if (!q->qdisc) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ca58a039208..39157f7bc04 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -136,7 +136,8 @@ prio_dequeue(struct Qdisc* sch)
 		 * pulling an skb.  This way we avoid excessive requeues
 		 * for slower queues.
 		 */
-		if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) {
+		if (!__netif_subqueue_stopped(qdisc_dev(sch),
+					      (q->mq ? prio : 0))) {
 			qdisc = q->queues[prio];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -165,8 +166,8 @@ static struct sk_buff *rr_dequeue(struct Qdisc* sch)
 		 * for slower queues.  If the queue is stopped, try the
 		 * next queue.
 		 */
-		if (!__netif_subqueue_stopped(sch->dev,
-					    (q->mq ? q->curband : 0))) {
+		if (!__netif_subqueue_stopped(qdisc_dev(sch),
+					      (q->mq ? q->curband : 0))) {
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -249,10 +250,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	if (q->mq) {
 		if (sch->parent != TC_H_ROOT)
 			return -EINVAL;
-		if (netif_is_multiqueue(sch->dev)) {
+		if (netif_is_multiqueue(qdisc_dev(sch))) {
 			if (q->bands == 0)
-				q->bands = sch->dev->egress_subqueue_count;
-			else if (q->bands != sch->dev->egress_subqueue_count)
+				q->bands = qdisc_dev(sch)->egress_subqueue_count;
+			else if (q->bands != qdisc_dev(sch)->egress_subqueue_count)
 				return -EINVAL;
 		} else
 			return -EOPNOTSUPP;
@@ -281,7 +282,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	for (i=0; i<q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child;
-			child = qdisc_create_dflt(sch->dev, sch->dev_queue,
+			child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
 			if (child) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6a97afbfb95..8458f630fac 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 		return -EINVAL;
 
 	sch_tree_lock(sch);
-	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
@@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->max_depth = 0;
 	q->tail = SFQ_DEPTH;
 	if (opt == NULL) {
-		q->quantum = psched_mtu(sch->dev);
+		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->perturb_period = 0;
 		q->perturbation = net_random();
 	} else {
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 0444fd0f0d2..b3fc82623fc 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -78,7 +78,7 @@ struct teql_sched_data
 static int
 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
-	struct net_device *dev = sch->dev;
+	struct net_device *dev = qdisc_dev(sch);
 	struct teql_sched_data *q = qdisc_priv(sch);
 
 	if (q->q.qlen < dev->tx_queue_len) {
@@ -111,7 +111,7 @@ teql_dequeue(struct Qdisc* sch)
 
 	skb = __skb_dequeue(&dat->q);
 	if (skb == NULL) {
-		struct net_device *m = dat->m->dev->qdisc->dev;
+		struct net_device *m = qdisc_dev(dat->m->dev->qdisc);
 		if (m) {
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
@@ -170,7 +170,7 @@ teql_destroy(struct Qdisc* sch)
 
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct net_device *dev = sch->dev;
+	struct net_device *dev = qdisc_dev(sch);
 	struct teql_master *m = (struct teql_master*)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
@@ -282,7 +282,7 @@ restart:
 		goto drop;
 
 	do {
-		struct net_device *slave = q->dev;
+		struct net_device *slave = qdisc_dev(q);
 
 		if (slave->qdisc_sleeping != q)
 			continue;
@@ -352,7 +352,7 @@ static int teql_master_open(struct net_device *dev)
 
 	q = m->slaves;
 	do {
-		struct net_device *slave = q->dev;
+		struct net_device *slave = qdisc_dev(q);
 
 		if (slave == NULL)
 			return -EUNATCH;
@@ -403,7 +403,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
 	q = m->slaves;
 	if (q) {
 		do {
-			if (new_mtu > q->dev->mtu)
+			if (new_mtu > qdisc_dev(q)->mtu)
 				return -EINVAL;
 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
 	}
-- 
cgit v1.2.3-70-g09d2


From dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 17:18:23 -0700
Subject: netdev: Move queue_lock into struct netdev_queue.

The lock is now an attribute of the device queue.

One thing to notice is that "suspicious" places
emerge which will need specific training about
multiple queue handling.  They are so marked with
explicit "netdev->rx_queue" and "netdev->tx_queue"
references.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         |  8 ++++----
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 33 ++++++++++++++++++++++-----------
 net/mac80211/main.c       | 10 +++++-----
 net/mac80211/wme.c        |  2 +-
 net/sched/sch_api.c       |  2 +-
 net/sched/sch_cbq.c       |  8 ++++----
 net/sched/sch_generic.c   | 40 ++++++++++++++++++++--------------------
 net/sched/sch_hfsc.c      |  4 ++--
 net/sched/sch_htb.c       | 16 ++++++++--------
 net/sched/sch_netem.c     |  4 ++--
 net/sched/sch_teql.c      |  4 ++--
 12 files changed, 73 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index af233b59153..bc3de272a82 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -229,12 +229,12 @@ module_param(numifbs, int, 0);
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
 /*
- * dev_ifb->queue_lock is usually taken after dev->ingress_lock,
+ * dev_ifb->tx_queue.lock is usually taken after dev->ingress_lock,
  * reversely to e.g. qdisc_lock_tree(). It should be safe until
- * ifb doesn't take dev->queue_lock with dev_ifb->ingress_lock.
+ * ifb doesn't take dev->tx_queue.lock with dev_ifb->ingress_lock.
  * But lockdep should know that ifb has different locks from dev.
  */
-static struct lock_class_key ifb_queue_lock_key;
+static struct lock_class_key ifb_tx_queue_lock_key;
 static struct lock_class_key ifb_ingress_lock_key;
 
 
@@ -258,7 +258,7 @@ static int __init ifb_init_one(int index)
 	if (err < 0)
 		goto err;
 
-	lockdep_set_class(&dev_ifb->queue_lock, &ifb_queue_lock_key);
+	lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key);
 	lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key);
 
 	return 0;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 515fd25bf0f..e835acacb47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -449,6 +449,7 @@ static inline void napi_synchronize(const struct napi_struct *n)
 #endif
 
 struct netdev_queue {
+	spinlock_t		lock;
 	struct net_device	*dev;
 };
 
@@ -629,7 +630,7 @@ struct net_device
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
 	struct netdev_queue	rx_queue;
-	struct netdev_queue	tx_queue;
+	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
 
 	/* ingress path synchronizer */
 	spinlock_t		ingress_lock;
@@ -639,7 +640,6 @@ struct net_device
  * Cache line mostly used on queue transmit path (qdisc)
  */
 	/* device queue lock */
-	spinlock_t		queue_lock ____cacheline_aligned_in_smp;
 	struct Qdisc		*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b281c906eb..05011048b86 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1667,6 +1667,7 @@ out_kfree_skb:
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
@@ -1699,14 +1700,15 @@ int dev_queue_xmit(struct sk_buff *skb)
 	}
 
 gso:
-	spin_lock_prefetch(&dev->queue_lock);
+	txq = &dev->tx_queue;
+	spin_lock_prefetch(&txq->lock);
 
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 
-	/* Updates of qdisc are serialized by queue_lock.
+	/* Updates of qdisc are serialized by queue->lock.
 	 * The struct Qdisc which is pointed to by qdisc is now a
 	 * rcu structure - it may be accessed without acquiring
 	 * a lock (but the structure may be stale.) The freeing of the
@@ -1714,7 +1716,7 @@ gso:
 	 * more references to it.
 	 *
 	 * If the qdisc has an enqueue function, we still need to
-	 * hold the queue_lock before calling it, since queue_lock
+	 * hold the queue->lock before calling it, since queue->lock
 	 * also serializes access to the device queue.
 	 */
 
@@ -1724,19 +1726,19 @@ gso:
 #endif
 	if (q->enqueue) {
 		/* Grab device queue */
-		spin_lock(&dev->queue_lock);
+		spin_lock(&txq->lock);
 		q = dev->qdisc;
 		if (q->enqueue) {
 			/* reset queue_mapping to zero */
 			skb_set_queue_mapping(skb, 0);
 			rc = q->enqueue(skb, q);
 			qdisc_run(dev);
-			spin_unlock(&dev->queue_lock);
+			spin_unlock(&txq->lock);
 
 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 			goto out;
 		}
-		spin_unlock(&dev->queue_lock);
+		spin_unlock(&txq->lock);
 	}
 
 	/* The device has no queue. Common case for software devices:
@@ -1919,14 +1921,17 @@ static void net_tx_action(struct softirq_action *h)
 
 		while (head) {
 			struct net_device *dev = head;
+			struct netdev_queue *txq;
 			head = head->next_sched;
 
+			txq = &dev->tx_queue;
+
 			smp_mb__before_clear_bit();
 			clear_bit(__LINK_STATE_SCHED, &dev->state);
 
-			if (spin_trylock(&dev->queue_lock)) {
+			if (spin_trylock(&txq->lock)) {
 				qdisc_run(dev);
-				spin_unlock(&dev->queue_lock);
+				spin_unlock(&txq->lock);
 			} else {
 				netif_schedule(dev);
 			}
@@ -3787,7 +3792,6 @@ int register_netdevice(struct net_device *dev)
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 
-	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
 	dev->xmit_lock_owner = -1;
@@ -4072,10 +4076,17 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue)
+{
+	spin_lock_init(&queue->lock);
+	queue->dev = dev;
+}
+
 static void netdev_init_queues(struct net_device *dev)
 {
-	dev->rx_queue.dev = dev;
-	dev->tx_queue.dev = dev;
+	netdev_init_one_queue(dev, &dev->rx_queue);
+	netdev_init_one_queue(dev, &dev->tx_queue);
 }
 
 /**
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index cf477ad39da..12aeaf78ae7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -636,7 +636,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* ensure that TX flow won't interrupt us
 	 * until the end of the call to requeue function */
-	spin_lock_bh(&local->mdev->queue_lock);
+	spin_lock_bh(&local->mdev->tx_queue.lock);
 
 	/* create a new queue for this aggregation */
 	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
@@ -675,7 +675,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* Will put all the packets in the new SW queue */
 	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -701,7 +701,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 err_unlock_queue:
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 	ret = -EBUSY;
 err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
@@ -875,10 +875,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 
 	/* avoid ordering issues: we are the only one that can modify
 	 * the content of the qdiscs */
-	spin_lock_bh(&local->mdev->queue_lock);
+	spin_lock_bh(&local->mdev->tx_queue.lock);
 	/* remove the queue for this aggregation */
 	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-	spin_unlock_bh(&local->mdev->queue_lock);
+	spin_unlock_bh(&local->mdev->tx_queue.lock);
 
 	/* we just requeued the all the frames that were in the removed
 	 * queue, and since we might miss a softirq we do netif_schedule.
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 2fbc171130b..59ed9cae66b 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -648,7 +648,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 }
 
 /**
- * the caller needs to hold local->mdev->queue_lock
+ * the caller needs to hold local->mdev->tx_queue.lock
  */
 void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1f893082a4f..2a1834f8c7d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -606,7 +606,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		sch->stats_lock = &dev->ingress_lock;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	} else {
-		sch->stats_lock = &dev->queue_lock;
+		sch->stats_lock = &dev_queue->lock;
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
 			err = -ENOMEM;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9f2ace585fd..99ce3da2b0a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1746,10 +1746,10 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
 #ifdef CONFIG_NET_CLS_ACT
 		struct cbq_sched_data *q = qdisc_priv(sch);
 
-		spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+		spin_lock_bh(&sch->dev_queue->lock);
 		if (q->rx_class == cl)
 			q->rx_class = NULL;
-		spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+		spin_unlock_bh(&sch->dev_queue->lock);
 #endif
 
 		cbq_destroy_class(sch, cl);
@@ -1828,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1919,7 +1919,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
+				  &sch->dev_queue->lock, tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b626a4f32b6..ee8f9f78a09 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,31 +29,31 @@
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
- * dev->queue_lock spinlock.
+ * queue->lock spinlock.
  *
  * The idea is the following:
  * - enqueue, dequeue are serialized via top level device
- *   spinlock dev->queue_lock.
+ *   spinlock queue->lock.
  * - ingress filtering is serialized via top level device
  *   spinlock dev->ingress_lock.
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
 void qdisc_lock_tree(struct net_device *dev)
-	__acquires(dev->queue_lock)
+	__acquires(dev->tx_queue.lock)
 	__acquires(dev->ingress_lock)
 {
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	spin_lock(&dev->ingress_lock);
 }
 EXPORT_SYMBOL(qdisc_lock_tree);
 
 void qdisc_unlock_tree(struct net_device *dev)
 	__releases(dev->ingress_lock)
-	__releases(dev->queue_lock)
+	__releases(dev->tx_queue.lock)
 {
 	spin_unlock(&dev->ingress_lock);
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 }
 EXPORT_SYMBOL(qdisc_unlock_tree);
 
@@ -118,15 +118,15 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 }
 
 /*
- * NOTE: Called under dev->queue_lock with locally disabled BH.
+ * NOTE: Called under queue->lock with locally disabled BH.
  *
  * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
- * device at a time. dev->queue_lock serializes queue accesses for
+ * device at a time. queue->lock serializes queue accesses for
  * this device AND dev->qdisc pointer itself.
  *
  *  netif_tx_lock serializes accesses to device driver.
  *
- *  dev->queue_lock and netif_tx_lock are mutually exclusive,
+ *  queue->lock and netif_tx_lock are mutually exclusive,
  *  if one is grabbed, another must be free.
  *
  * Note, that this procedure can be called by a watchdog timer
@@ -148,14 +148,14 @@ static inline int qdisc_restart(struct net_device *dev)
 
 
 	/* And release queue */
-	spin_unlock(&dev->queue_lock);
+	spin_unlock(&q->dev_queue->lock);
 
 	HARD_TX_LOCK(dev, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
 		ret = dev_hard_start_xmit(skb, dev);
 	HARD_TX_UNLOCK(dev);
 
-	spin_lock(&dev->queue_lock);
+	spin_lock(&q->dev_queue->lock);
 	q = dev->qdisc;
 
 	switch (ret) {
@@ -482,7 +482,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev,
 	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch))
 		goto errout;
-	sch->stats_lock = &dev->queue_lock;
+	sch->stats_lock = &dev_queue->lock;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
@@ -494,7 +494,7 @@ errout:
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
 
-/* Under dev->queue_lock and BH! */
+/* Under queue->lock and BH! */
 
 void qdisc_reset(struct Qdisc *qdisc)
 {
@@ -514,7 +514,7 @@ static void __qdisc_destroy(struct rcu_head *head)
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-/* Under dev->queue_lock and BH! */
+/* Under queue->lock and BH! */
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
@@ -566,13 +566,13 @@ void dev_activate(struct net_device *dev)
 		/* Delay activation until next carrier-on event */
 		return;
 
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
 	if (dev->qdisc != &noqueue_qdisc) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 }
 
 void dev_deactivate(struct net_device *dev)
@@ -581,7 +581,7 @@ void dev_deactivate(struct net_device *dev)
 	struct sk_buff *skb;
 	int running;
 
-	spin_lock_bh(&dev->queue_lock);
+	spin_lock_bh(&dev->tx_queue.lock);
 	qdisc = dev->qdisc;
 	dev->qdisc = &noop_qdisc;
 
@@ -589,7 +589,7 @@ void dev_deactivate(struct net_device *dev)
 
 	skb = dev->gso_skb;
 	dev->gso_skb = NULL;
-	spin_unlock_bh(&dev->queue_lock);
+	spin_unlock_bh(&dev->tx_queue.lock);
 
 	kfree_skb(skb);
 
@@ -607,9 +607,9 @@ void dev_deactivate(struct net_device *dev)
 		 * Double-check inside queue lock to ensure that all effects
 		 * of the queue run are visible when we return.
 		 */
-		spin_lock_bh(&dev->queue_lock);
+		spin_lock_bh(&dev->tx_queue.lock);
 		running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
-		spin_unlock_bh(&dev->queue_lock);
+		spin_unlock_bh(&dev->tx_queue.lock);
 
 		/*
 		 * The running flag should never be set at this point because
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 333525422f4..997d520ca58 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock, tca[TCA_RATE]);
+				  &sch->dev_queue->lock, tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 31f7d1536e6..c8ca54cc26b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1043,7 +1043,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 
 	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
@@ -1057,11 +1057,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	nla_nest_end(skb, nest);
 
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1073,7 +1073,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1095,11 +1095,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
 	nla_nest_end(skb, nest);
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1365,7 +1365,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			goto failure;
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &qdisc_dev(sch)->queue_lock,
+				  &sch->dev_queue->lock,
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		cl->children = 0;
@@ -1420,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &qdisc_dev(sch)->queue_lock,
+					      &sch->dev_queue->lock,
 					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 79058296044..71b73c528f9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -333,9 +333,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	for (i = 0; i < n; i++)
 		d->table[i] = data[i];
 
-	spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_lock_bh(&sch->dev_queue->lock);
 	d = xchg(&q->delay_dist, d);
-	spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+	spin_unlock_bh(&sch->dev_queue->lock);
 
 	kfree(d);
 	return 0;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index b3fc82623fc..4f3054e8e1a 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -154,9 +154,9 @@ teql_destroy(struct Qdisc* sch)
 					master->slaves = NEXT_SLAVE(q);
 					if (q == master->slaves) {
 						master->slaves = NULL;
-						spin_lock_bh(&master->dev->queue_lock);
+						spin_lock_bh(&master->dev->tx_queue.lock);
 						qdisc_reset(master->dev->qdisc);
-						spin_unlock_bh(&master->dev->queue_lock);
+						spin_unlock_bh(&master->dev->tx_queue.lock);
 					}
 				}
 				skb_queue_purge(&dat->q);
-- 
cgit v1.2.3-70-g09d2


From 555353cfa1aee293de445bfa6de43276138ddd82 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 17:33:13 -0700
Subject: netdev: The ingress_lock member is no longer needed.

Every qdisc is assosciated with a queue, and in the case of ingress
qdiscs that will now be netdev->rx_queue so using that queue's lock is
the thing to do.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         |  8 ++++----
 include/linux/netdevice.h |  2 --
 net/core/dev.c            | 12 +++++++-----
 net/sched/sch_api.c       |  3 +--
 net/sched/sch_generic.c   | 10 +++++-----
 5 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index bc3de272a82..ccbd6554f6e 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -229,13 +229,13 @@ module_param(numifbs, int, 0);
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
 /*
- * dev_ifb->tx_queue.lock is usually taken after dev->ingress_lock,
+ * dev_ifb->tx_queue.lock is usually taken after dev->rx_queue.lock,
  * reversely to e.g. qdisc_lock_tree(). It should be safe until
- * ifb doesn't take dev->tx_queue.lock with dev_ifb->ingress_lock.
+ * ifb doesn't take dev->tx_queue.lock with dev_ifb->rx_queue.lock.
  * But lockdep should know that ifb has different locks from dev.
  */
 static struct lock_class_key ifb_tx_queue_lock_key;
-static struct lock_class_key ifb_ingress_lock_key;
+static struct lock_class_key ifb_rx_queue_lock_key;
 
 
 static int __init ifb_init_one(int index)
@@ -259,7 +259,7 @@ static int __init ifb_init_one(int index)
 		goto err;
 
 	lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key);
-	lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key);
+	lockdep_set_class(&dev_ifb->rx_queue.lock, &ifb_rx_queue_lock_key);
 
 	return 0;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e835acacb47..633a44c6fa5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -632,8 +632,6 @@ struct net_device
 	struct netdev_queue	rx_queue;
 	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
 
-	/* ingress path synchronizer */
-	spinlock_t		ingress_lock;
 	struct Qdisc		*qdisc_ingress;
 
 /*
diff --git a/net/core/dev.c b/net/core/dev.c
index 05011048b86..2322fb69fd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2014,10 +2014,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
  */
 static int ing_filter(struct sk_buff *skb)
 {
-	struct Qdisc *q;
 	struct net_device *dev = skb->dev;
-	int result = TC_ACT_OK;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
+	struct netdev_queue *rxq;
+	int result = TC_ACT_OK;
+	struct Qdisc *q;
 
 	if (MAX_RED_LOOP < ttl++) {
 		printk(KERN_WARNING
@@ -2029,10 +2030,12 @@ static int ing_filter(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	spin_lock(&dev->ingress_lock);
+	rxq = &dev->rx_queue;
+
+	spin_lock(&rxq->lock);
 	if ((q = dev->qdisc_ingress) != NULL)
 		result = q->enqueue(skb, q);
-	spin_unlock(&dev->ingress_lock);
+	spin_unlock(&rxq->lock);
 
 	return result;
 }
@@ -3795,7 +3798,6 @@ int register_netdevice(struct net_device *dev)
 	spin_lock_init(&dev->_xmit_lock);
 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
 	dev->xmit_lock_owner = -1;
-	spin_lock_init(&dev->ingress_lock);
 
 	dev->iflink = -1;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2a1834f8c7d..570cef2a9c5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -601,12 +601,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 
 	sch->parent = parent;
 
+	sch->stats_lock = &dev_queue->lock;
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
-		sch->stats_lock = &dev->ingress_lock;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
 	} else {
-		sch->stats_lock = &dev_queue->lock;
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
 			err = -ENOMEM;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ee8f9f78a09..804d44b0034 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -35,24 +35,24 @@
  * - enqueue, dequeue are serialized via top level device
  *   spinlock queue->lock.
  * - ingress filtering is serialized via top level device
- *   spinlock dev->ingress_lock.
+ *   spinlock dev->rx_queue.lock.
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
 void qdisc_lock_tree(struct net_device *dev)
 	__acquires(dev->tx_queue.lock)
-	__acquires(dev->ingress_lock)
+	__acquires(dev->rx_queue.lock)
 {
 	spin_lock_bh(&dev->tx_queue.lock);
-	spin_lock(&dev->ingress_lock);
+	spin_lock(&dev->rx_queue.lock);
 }
 EXPORT_SYMBOL(qdisc_lock_tree);
 
 void qdisc_unlock_tree(struct net_device *dev)
-	__releases(dev->ingress_lock)
+	__releases(dev->rx_queue.lock)
 	__releases(dev->tx_queue.lock)
 {
-	spin_unlock(&dev->ingress_lock);
+	spin_unlock(&dev->rx_queue.lock);
 	spin_unlock_bh(&dev->tx_queue.lock);
 }
 EXPORT_SYMBOL(qdisc_unlock_tree);
-- 
cgit v1.2.3-70-g09d2


From b0e1e6462df3c5944010b3328a546d8fe5d932cd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 17:42:10 -0700
Subject: netdev: Move rest of qdisc state into struct netdev_queue

Now qdisc, qdisc_sleeping, and qdisc_list also live there.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c    |  2 +-
 include/linux/netdevice.h      | 10 ++---
 include/net/irda/irda_device.h |  2 +-
 net/core/dev.c                 |  4 +-
 net/core/link_watch.c          |  8 +++-
 net/core/rtnetlink.c           |  6 ++-
 net/ipv6/addrconf.c            |  3 +-
 net/mac80211/wme.c             | 20 ++++++----
 net/sched/cls_api.c            |  7 +++-
 net/sched/sch_api.c            | 34 ++++++++++------
 net/sched/sch_generic.c        | 90 ++++++++++++++++++++++++++----------------
 net/sched/sch_netem.c          |  2 +-
 net/sched/sch_teql.c           | 14 ++++---
 13 files changed, 125 insertions(+), 77 deletions(-)

(limited to 'net')

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index ef1a300068d..457bbd119f9 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -287,7 +287,7 @@ isdn_net_unbind_channel(isdn_net_local * lp)
 		   BEWARE! This chunk of code cannot be called from hardware
 		   interrupt handler. I hope it is true. --ANK
 		 */
-		qdisc_reset(lp->netdev->dev->qdisc);
+		qdisc_reset(lp->netdev->dev->tx_queue.qdisc);
 	}
 	lp->dialstate = 0;
 	dev->rx_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 633a44c6fa5..df702a7b3db 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -451,6 +451,9 @@ static inline void napi_synchronize(const struct napi_struct *n)
 struct netdev_queue {
 	spinlock_t		lock;
 	struct net_device	*dev;
+	struct Qdisc		*qdisc;
+	struct Qdisc		*qdisc_sleeping;
+	struct list_head	qdisc_list;
 };
 
 /*
@@ -634,13 +637,6 @@ struct net_device
 
 	struct Qdisc		*qdisc_ingress;
 
-/*
- * Cache line mostly used on queue transmit path (qdisc)
- */
-	/* device queue lock */
-	struct Qdisc		*qdisc;
-	struct Qdisc		*qdisc_sleeping;
-	struct list_head	qdisc_list;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 	/* Partially transmitted GSO packet. */
diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h
index f70e9b39eba..16fbf672e0b 100644
--- a/include/net/irda/irda_device.h
+++ b/include/net/irda/irda_device.h
@@ -223,7 +223,7 @@ int  irda_device_is_receiving(struct net_device *dev);
 /* Interface for internal use */
 static inline int irda_device_txqueue_empty(const struct net_device *dev)
 {
-	return skb_queue_empty(&dev->qdisc->q);
+	return skb_queue_empty(&dev->tx_queue.qdisc->q);
 }
 int  irda_device_set_raw_mode(struct net_device* self, int status);
 struct net_device *alloc_irdadev(int sizeof_priv);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2322fb69fd5..ce79c28d739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1720,14 +1720,14 @@ gso:
 	 * also serializes access to the device queue.
 	 */
 
-	q = rcu_dereference(dev->qdisc);
+	q = rcu_dereference(txq->qdisc);
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
 		/* Grab device queue */
 		spin_lock(&txq->lock);
-		q = dev->qdisc;
+		q = txq->qdisc;
 		if (q->enqueue) {
 			/* reset queue_mapping to zero */
 			skb_set_queue_mapping(skb, 0);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index a5e372b9ec4..50218218445 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,8 +79,10 @@ static void rfc2863_policy(struct net_device *dev)
 
 static int linkwatch_urgent_event(struct net_device *dev)
 {
+	struct netdev_queue *txq = &dev->tx_queue;
+
 	return netif_running(dev) && netif_carrier_ok(dev) &&
-	       dev->qdisc != dev->qdisc_sleeping;
+	       txq->qdisc != txq->qdisc_sleeping;
 }
 
 
@@ -181,7 +183,9 @@ static void __linkwatch_run_queue(int urgent_only)
 		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev)) {
-				WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
+				struct netdev_queue *txq = &dev->tx_queue;
+
+				WARN_ON(txq->qdisc_sleeping == &noop_qdisc);
 				dev_activate(dev);
 			} else
 				dev_deactivate(dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6c8d7f0ea01..8ef9f1db610 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -605,6 +605,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
 {
+	struct netdev_queue *txq;
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
 	struct net_device_stats *stats;
@@ -635,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (dev->master)
 		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	if (dev->qdisc_sleeping)
-		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
+	txq = &dev->tx_queue;
+	if (txq->qdisc_sleeping)
+		NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
 
 	if (1) {
 		struct rtnl_link_ifmap map = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8572cb05fc2..5c84c798331 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -231,7 +231,8 @@ const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTER
 /* Check if a valid qdisc is available */
 static inline int addrconf_qdisc_ok(struct net_device *dev)
 {
-	return (dev->qdisc != &noop_qdisc);
+	struct netdev_queue *txq = &dev->tx_queue;
+	return (txq->qdisc != &noop_qdisc);
 }
 
 /* Check if a route is valid prefix route */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 59ed9cae66b..6ae43a3c772 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -574,9 +574,10 @@ static struct Qdisc_ops wme_qdisc_ops __read_mostly =
 
 void ieee80211_install_qdisc(struct net_device *dev)
 {
+	struct netdev_queue *txq = &dev->tx_queue;
 	struct Qdisc *qdisc;
 
-	qdisc = qdisc_create_dflt(dev, &dev->tx_queue,
+	qdisc = qdisc_create_dflt(dev, txq,
 				  &wme_qdisc_ops, TC_H_ROOT);
 	if (!qdisc) {
 		printk(KERN_ERR "%s: qdisc installation failed\n", dev->name);
@@ -587,15 +588,17 @@ void ieee80211_install_qdisc(struct net_device *dev)
 	qdisc->handle = 0x80010000;
 
 	qdisc_lock_tree(dev);
-	list_add_tail(&qdisc->list, &dev->qdisc_list);
-	dev->qdisc_sleeping = qdisc;
+	list_add_tail(&qdisc->list, &txq->qdisc_list);
+	txq->qdisc_sleeping = qdisc;
 	qdisc_unlock_tree(dev);
 }
 
 
 int ieee80211_qdisc_installed(struct net_device *dev)
 {
-	return dev->qdisc_sleeping->ops == &wme_qdisc_ops;
+	struct netdev_queue *txq = &dev->tx_queue;
+
+	return txq->qdisc_sleeping->ops == &wme_qdisc_ops;
 }
 
 
@@ -614,8 +617,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 			struct sta_info *sta, u16 tid)
 {
 	int i;
+	struct netdev_queue *txq = &local->mdev->tx_queue;
 	struct ieee80211_sched_data *q =
-			qdisc_priv(local->mdev->qdisc_sleeping);
+			qdisc_priv(txq->qdisc_sleeping);
 	DECLARE_MAC_BUF(mac);
 
 	/* prepare the filter and save it for the SW queue
@@ -655,8 +659,9 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   u8 requeue)
 {
 	struct ieee80211_hw *hw = &local->hw;
+	struct netdev_queue *txq = &local->mdev->tx_queue;
 	struct ieee80211_sched_data *q =
-		qdisc_priv(local->mdev->qdisc_sleeping);
+		qdisc_priv(txq->qdisc_sleeping);
 	int agg_queue = sta->tid_to_tx_q[tid];
 
 	/* return the qdisc to the pool */
@@ -671,7 +676,8 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 
 void ieee80211_requeue(struct ieee80211_local *local, int queue)
 {
-	struct Qdisc *root_qd = local->mdev->qdisc_sleeping;
+	struct netdev_queue *txq = &local->mdev->tx_queue;
+	struct Qdisc *root_qd = txq->qdisc_sleeping;
 	struct ieee80211_sched_data *q = qdisc_priv(root_qd);
 	struct Qdisc *qdisc = q->queues[queue];
 	struct sk_buff *skb = NULL;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2389f161e4..b483bbea611 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -166,7 +166,8 @@ replay:
 
 	/* Find qdisc */
 	if (!parent) {
-		q = dev->qdisc_sleeping;
+		struct netdev_queue *dev_queue = &dev->tx_queue;
+		q = dev_queue->qdisc_sleeping;
 		parent = q->handle;
 	} else {
 		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
@@ -390,6 +391,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -408,8 +410,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
+	dev_queue = &dev->tx_queue;
 	if (!tcm->tcm_parent)
-		q = dev->qdisc_sleeping;
+		q = dev_queue->qdisc_sleeping;
 	else
 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
 	if (!q)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 570cef2a9c5..2313fa7c97b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -185,9 +185,10 @@ EXPORT_SYMBOL(unregister_qdisc);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
+	struct netdev_queue *dev_queue = &dev->tx_queue;
 	struct Qdisc *q;
 
-	list_for_each_entry(q, &dev->qdisc_list, list) {
+	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
 		if (q->handle == handle)
 			return q;
 	}
@@ -441,6 +442,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 static struct Qdisc *
 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 {
+	struct netdev_queue *dev_queue;
 	struct Qdisc *oqdisc;
 
 	if (dev->flags & IFF_UP)
@@ -459,8 +461,8 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 		}
 
 	} else {
-
-		oqdisc = dev->qdisc_sleeping;
+		dev_queue = &dev->tx_queue;
+		oqdisc = dev_queue->qdisc_sleeping;
 
 		/* Prune old scheduler */
 		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
@@ -469,8 +471,8 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 		/* ... and graft new one */
 		if (qdisc == NULL)
 			qdisc = &noop_qdisc;
-		dev->qdisc_sleeping = qdisc;
-		dev->qdisc = &noop_qdisc;
+		dev_queue->qdisc_sleeping = qdisc;
+		dev_queue->qdisc = &noop_qdisc;
 	}
 
 	qdisc_unlock_tree(dev);
@@ -633,7 +635,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			}
 		}
 		qdisc_lock_tree(dev);
-		list_add_tail(&sch->list, &dev->qdisc_list);
+		list_add_tail(&sch->list, &dev_queue->qdisc_list);
 		qdisc_unlock_tree(dev);
 
 		return sch;
@@ -740,7 +742,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 				q = dev->qdisc_ingress;
 			}
 		} else {
-			q = dev->qdisc_sleeping;
+			struct netdev_queue *dev_queue = &dev->tx_queue;
+			q = dev_queue->qdisc_sleeping;
 		}
 		if (!q)
 			return -ENOENT;
@@ -814,7 +817,8 @@ replay:
 				q = dev->qdisc_ingress;
 			}
 		} else {
-			q = dev->qdisc_sleeping;
+			struct netdev_queue *dev_queue = &dev->tx_queue;
+			q = dev_queue->qdisc_sleeping;
 		}
 
 		/* It may be default qdisc, ignore it */
@@ -1015,12 +1019,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	read_lock(&dev_base_lock);
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
+		struct netdev_queue *dev_queue;
 		if (idx < s_idx)
 			goto cont;
 		if (idx > s_idx)
 			s_q_idx = 0;
 		q_idx = 0;
-		list_for_each_entry(q, &dev->qdisc_list, list) {
+		dev_queue = &dev->tx_queue;
+		list_for_each_entry(q, &dev_queue->qdisc_list, list) {
 			if (q_idx < s_q_idx) {
 				q_idx++;
 				continue;
@@ -1054,6 +1060,7 @@ done:
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
 	struct tcmsg *tcm = NLMSG_DATA(n);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
@@ -1091,6 +1098,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Step 1. Determine qdisc handle X:0 */
 
+	dev_queue = &dev->tx_queue;
 	if (pid != TC_H_ROOT) {
 		u32 qid1 = TC_H_MAJ(pid);
 
@@ -1101,7 +1109,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		} else if (qid1) {
 			qid = qid1;
 		} else if (qid == 0)
-			qid = dev->qdisc_sleeping->handle;
+			qid = dev_queue->qdisc_sleeping->handle;
 
 		/* Now qid is genuine qdisc handle consistent
 		   both with parent and child.
@@ -1112,7 +1120,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			pid = TC_H_MAKE(qid, pid);
 	} else {
 		if (qid == 0)
-			qid = dev->qdisc_sleeping->handle;
+			qid = dev_queue->qdisc_sleeping->handle;
 	}
 
 	/* OK. Locate qdisc */
@@ -1248,6 +1256,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -1266,7 +1275,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	list_for_each_entry(q, &dev->qdisc_list, list) {
+	dev_queue = &dev->tx_queue;
+	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
 		if (t < s_t || !q->ops->cl_ops ||
 		    (tcm->tcm_parent &&
 		     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 804d44b0034..3223e5ba76a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -122,7 +122,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  *
  * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
  * device at a time. queue->lock serializes queue accesses for
- * this device AND dev->qdisc pointer itself.
+ * this device AND txq->qdisc pointer itself.
  *
  *  netif_tx_lock serializes accesses to device driver.
  *
@@ -138,7 +138,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  */
 static inline int qdisc_restart(struct net_device *dev)
 {
-	struct Qdisc *q = dev->qdisc;
+	struct netdev_queue *txq = &dev->tx_queue;
+	struct Qdisc *q = txq->qdisc;
 	struct sk_buff *skb;
 	int ret = NETDEV_TX_BUSY;
 
@@ -148,15 +149,15 @@ static inline int qdisc_restart(struct net_device *dev)
 
 
 	/* And release queue */
-	spin_unlock(&q->dev_queue->lock);
+	spin_unlock(&txq->lock);
 
 	HARD_TX_LOCK(dev, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
 		ret = dev_hard_start_xmit(skb, dev);
 	HARD_TX_UNLOCK(dev);
 
-	spin_lock(&q->dev_queue->lock);
-	q = dev->qdisc;
+	spin_lock(&txq->lock);
+	q = txq->qdisc;
 
 	switch (ret) {
 	case NETDEV_TX_OK:
@@ -207,9 +208,10 @@ void __qdisc_run(struct net_device *dev)
 static void dev_watchdog(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
+	struct netdev_queue *txq = &dev->tx_queue;
 
 	netif_tx_lock(dev);
-	if (dev->qdisc != &noop_qdisc) {
+	if (txq->qdisc != &noop_qdisc) {
 		if (netif_device_present(dev) &&
 		    netif_running(dev) &&
 		    netif_carrier_ok(dev)) {
@@ -539,53 +541,63 @@ EXPORT_SYMBOL(qdisc_destroy);
 
 void dev_activate(struct net_device *dev)
 {
+	struct netdev_queue *txq = &dev->tx_queue;
+
 	/* No queueing discipline is attached to device;
 	   create default one i.e. pfifo_fast for devices,
 	   which need queueing and noqueue_qdisc for
 	   virtual interfaces
 	 */
 
-	if (dev->qdisc_sleeping == &noop_qdisc) {
+	if (txq->qdisc_sleeping == &noop_qdisc) {
 		struct Qdisc *qdisc;
 		if (dev->tx_queue_len) {
-			qdisc = qdisc_create_dflt(dev, &dev->tx_queue,
+			qdisc = qdisc_create_dflt(dev, txq,
 						  &pfifo_fast_ops,
 						  TC_H_ROOT);
 			if (qdisc == NULL) {
 				printk(KERN_INFO "%s: activation failed\n", dev->name);
 				return;
 			}
-			list_add_tail(&qdisc->list, &dev->qdisc_list);
+			list_add_tail(&qdisc->list, &txq->qdisc_list);
 		} else {
 			qdisc =  &noqueue_qdisc;
 		}
-		dev->qdisc_sleeping = qdisc;
+		txq->qdisc_sleeping = qdisc;
 	}
 
 	if (!netif_carrier_ok(dev))
 		/* Delay activation until next carrier-on event */
 		return;
 
-	spin_lock_bh(&dev->tx_queue.lock);
-	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
-	if (dev->qdisc != &noqueue_qdisc) {
+	spin_lock_bh(&txq->lock);
+	rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping);
+	if (txq->qdisc != &noqueue_qdisc) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
-	spin_unlock_bh(&dev->tx_queue.lock);
+	spin_unlock_bh(&txq->lock);
+}
+
+static void dev_deactivate_queue(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 struct Qdisc *qdisc_default)
+{
+	struct Qdisc *qdisc = dev_queue->qdisc;
+
+	if (qdisc) {
+		dev_queue->qdisc = qdisc_default;
+		qdisc_reset(qdisc);
+	}
 }
 
 void dev_deactivate(struct net_device *dev)
 {
-	struct Qdisc *qdisc;
 	struct sk_buff *skb;
 	int running;
 
 	spin_lock_bh(&dev->tx_queue.lock);
-	qdisc = dev->qdisc;
-	dev->qdisc = &noop_qdisc;
-
-	qdisc_reset(qdisc);
+	dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc);
 
 	skb = dev->gso_skb;
 	dev->gso_skb = NULL;
@@ -622,32 +634,44 @@ void dev_deactivate(struct net_device *dev)
 	} while (WARN_ON_ONCE(running));
 }
 
+static void dev_init_scheduler_queue(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     struct Qdisc *qdisc)
+{
+	dev_queue->qdisc = qdisc;
+	dev_queue->qdisc_sleeping = qdisc;
+	INIT_LIST_HEAD(&dev_queue->qdisc_list);
+}
+
 void dev_init_scheduler(struct net_device *dev)
 {
 	qdisc_lock_tree(dev);
-	dev->qdisc = &noop_qdisc;
-	dev->qdisc_sleeping = &noop_qdisc;
-	INIT_LIST_HEAD(&dev->qdisc_list);
+	dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc);
+	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
 	qdisc_unlock_tree(dev);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
 
-void dev_shutdown(struct net_device *dev)
+static void dev_shutdown_scheduler_queue(struct net_device *dev,
+					 struct netdev_queue *dev_queue,
+					 struct Qdisc *qdisc_default)
 {
-	struct Qdisc *qdisc;
+	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+	if (qdisc) {
+		dev_queue->qdisc = qdisc_default;
+		dev_queue->qdisc_sleeping = qdisc_default;
 
-	qdisc_lock_tree(dev);
-	qdisc = dev->qdisc_sleeping;
-	dev->qdisc = &noop_qdisc;
-	dev->qdisc_sleeping = &noop_qdisc;
-	qdisc_destroy(qdisc);
-#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
-	if ((qdisc = dev->qdisc_ingress) != NULL) {
-		dev->qdisc_ingress = NULL;
 		qdisc_destroy(qdisc);
 	}
-#endif
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+	qdisc_lock_tree(dev);
+	dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc);
+	dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
 	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
 	qdisc_unlock_tree(dev);
 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 71b73c528f9..4093f1eaaf6 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = qdisc_dev(sch)->qdisc;
+		struct Qdisc *rootq = qdisc_dev(sch)->tx_queue.qdisc;
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4f3054e8e1a..8ac05981be2 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -107,17 +107,19 @@ static struct sk_buff *
 teql_dequeue(struct Qdisc* sch)
 {
 	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct netdev_queue *dat_queue;
 	struct sk_buff *skb;
 
 	skb = __skb_dequeue(&dat->q);
+	dat_queue = &dat->m->dev->tx_queue;
 	if (skb == NULL) {
-		struct net_device *m = qdisc_dev(dat->m->dev->qdisc);
+		struct net_device *m = qdisc_dev(dat_queue->qdisc);
 		if (m) {
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
 		}
 	}
-	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
+	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 	return skb;
 }
 
@@ -155,7 +157,7 @@ teql_destroy(struct Qdisc* sch)
 					if (q == master->slaves) {
 						master->slaves = NULL;
 						spin_lock_bh(&master->dev->tx_queue.lock);
-						qdisc_reset(master->dev->qdisc);
+						qdisc_reset(master->dev->tx_queue.qdisc);
 						spin_unlock_bh(&master->dev->tx_queue.lock);
 					}
 				}
@@ -216,7 +218,7 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 static int
 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 {
-	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
+	struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc);
 	struct neighbour *mn = skb->dst->neighbour;
 	struct neighbour *n = q->ncache;
 
@@ -252,7 +254,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 static inline int teql_resolve(struct sk_buff *skb,
 			       struct sk_buff *skb_res, struct net_device *dev)
 {
-	if (dev->qdisc == &noop_qdisc)
+	if (dev->tx_queue.qdisc == &noop_qdisc)
 		return -ENODEV;
 
 	if (dev->header_ops == NULL ||
@@ -284,7 +286,7 @@ restart:
 	do {
 		struct net_device *slave = qdisc_dev(q);
 
-		if (slave->qdisc_sleeping != q)
+		if (slave->tx_queue.qdisc_sleeping != q)
 			continue;
 		if (netif_queue_stopped(slave) ||
 		    __netif_subqueue_stopped(slave, subq) ||
-- 
cgit v1.2.3-70-g09d2


From 816f3258e70db38d6d92c8d871377179fd69160f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 22:49:00 -0700
Subject: netdev: Kill qdisc_ingress, use netdev->rx_queue.qdisc instead.

Now that our qdisc management is bi-directional, per-queue, and fully
orthogonal, there is no reason to have a special ingress qdisc pointer
in struct net_device.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 ---
 net/core/dev.c            |  4 ++--
 net/sched/sch_api.c       | 11 ++++++-----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index df702a7b3db..e7c49246fd8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -634,9 +634,6 @@ struct net_device
 
 	struct netdev_queue	rx_queue;
 	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
-
-	struct Qdisc		*qdisc_ingress;
-
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 	/* Partially transmitted GSO packet. */
diff --git a/net/core/dev.c b/net/core/dev.c
index ce79c28d739..ab760a954d9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2033,7 +2033,7 @@ static int ing_filter(struct sk_buff *skb)
 	rxq = &dev->rx_queue;
 
 	spin_lock(&rxq->lock);
-	if ((q = dev->qdisc_ingress) != NULL)
+	if ((q = rxq->qdisc) != NULL)
 		result = q->enqueue(skb, q);
 	spin_unlock(&rxq->lock);
 
@@ -2044,7 +2044,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (!skb->dev->qdisc_ingress)
+	if (!skb->dev->rx_queue.qdisc)
 		goto out;
 
 	if (*pt_prev) {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2313fa7c97b..4003c280b69 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -450,14 +450,15 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 
 	qdisc_lock_tree(dev);
 	if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
-		oqdisc = dev->qdisc_ingress;
+		dev_queue = &dev->rx_queue;
+		oqdisc = dev_queue->qdisc;
 		/* Prune old scheduler */
 		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
 			/* delete */
 			qdisc_reset(oqdisc);
-			dev->qdisc_ingress = NULL;
+			dev_queue->qdisc = NULL;
 		} else {  /* new */
-			dev->qdisc_ingress = qdisc;
+			dev_queue->qdisc = qdisc;
 		}
 
 	} else {
@@ -739,7 +740,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /* ingress */
-				q = dev->qdisc_ingress;
+				q = dev->rx_queue.qdisc;
 			}
 		} else {
 			struct netdev_queue *dev_queue = &dev->tx_queue;
@@ -814,7 +815,7 @@ replay:
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /*ingress */
-				q = dev->qdisc_ingress;
+				q = dev->rx_queue.qdisc;
 			}
 		} else {
 			struct netdev_queue *dev_queue = &dev->tx_queue;
-- 
cgit v1.2.3-70-g09d2


From 68dfb42798e1eb2d42acbf872925cc75f1487d9b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 22:57:31 -0700
Subject: pkt_sched: Kill stats_lock member of struct Qdisc.

It is always equal to qdisc->dev_queue->lock

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 -
 net/sched/sch_api.c       | 9 ++++-----
 net/sched/sch_generic.c   | 1 -
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 66ec36d8ac9..ea71705e9c7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -43,7 +43,6 @@ struct Qdisc
 	struct gnet_stats_basic	bstats;
 	struct gnet_stats_queue	qstats;
 	struct gnet_stats_rate_est	rate_est;
-	spinlock_t		*stats_lock;
 	struct rcu_head 	q_rcu;
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4003c280b69..e73bd68aa7a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -604,7 +604,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 
 	sch->parent = parent;
 
-	sch->stats_lock = &dev_queue->lock;
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
@@ -622,7 +621,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
 		if (tca[TCA_RATE]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
-						sch->stats_lock,
+						&sch->dev_queue->lock,
 						tca[TCA_RATE]);
 			if (err) {
 				/*
@@ -664,7 +663,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 	}
 	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
-			sch->stats_lock, tca[TCA_RATE]);
+				      &sch->dev_queue->lock, tca[TCA_RATE]);
 	return 0;
 }
 
@@ -954,7 +953,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	q->qstats.qlen = q->q.qlen;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-			TCA_XSTATS, q->stats_lock, &d) < 0)
+					 TCA_XSTATS, &q->dev_queue->lock, &d) < 0)
 		goto nla_put_failure;
 
 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1203,7 +1202,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-			TCA_XSTATS, q->stats_lock, &d) < 0)
+					 TCA_XSTATS, &q->dev_queue->lock, &d) < 0)
 		goto nla_put_failure;
 
 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3223e5ba76a..dda78ee314e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -484,7 +484,6 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev,
 	sch = qdisc_alloc(dev_queue, ops);
 	if (IS_ERR(sch))
 		goto errout;
-	sch->stats_lock = &dev_queue->lock;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
-- 
cgit v1.2.3-70-g09d2


From 74d58a0c1d5b348a8d4ea9643b573a6ab455a3f3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 22:57:51 -0700
Subject: pkt_sched: Make netem queue agnostic.

It just wants the root qdisc given an arbitrary qdisc,
and that is simply qdisc->dev_queue->qdisc

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
---
 net/sched/sch_netem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4093f1eaaf6..bc585f2089f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = qdisc_dev(sch)->tx_queue.qdisc;
+		struct Qdisc *rootq = sch->dev_queue->qdisc;
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
-- 
cgit v1.2.3-70-g09d2


From ee609cb36220d18c0cf476b066a5ab7e6f6d3a69 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 22:58:37 -0700
Subject: netdev: Move next_sched into struct netdev_queue.

We schedule queues, not the device, for output queue processing in BH.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  5 ++---
 net/core/dev.c            | 15 +++++++--------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7c49246fd8..1379c822e51 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -454,6 +454,7 @@ struct netdev_queue {
 	struct Qdisc		*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
+	struct netdev_queue	*next_sched;
 };
 
 /*
@@ -545,8 +546,6 @@ struct net_device
 #define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
 #define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
 
-	struct net_device	*next_sched;
-
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
 	int			iflink;
@@ -940,7 +939,7 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data
 {
-	struct net_device	*output_queue;
+	struct netdev_queue	*output_queue;
 	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
diff --git a/net/core/dev.c b/net/core/dev.c
index ab760a954d9..d6b8d3c3e6e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1323,13 +1323,14 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 void __netif_schedule(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+		struct netdev_queue *txq = &dev->tx_queue;
 		unsigned long flags;
 		struct softnet_data *sd;
 
 		local_irq_save(flags);
 		sd = &__get_cpu_var(softnet_data);
-		dev->next_sched = sd->output_queue;
-		sd->output_queue = dev;
+		txq->next_sched = sd->output_queue;
+		sd->output_queue = txq;
 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
 		local_irq_restore(flags);
 	}
@@ -1912,7 +1913,7 @@ static void net_tx_action(struct softirq_action *h)
 	}
 
 	if (sd->output_queue) {
-		struct net_device *head;
+		struct netdev_queue *head;
 
 		local_irq_disable();
 		head = sd->output_queue;
@@ -1920,12 +1921,10 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_enable();
 
 		while (head) {
-			struct net_device *dev = head;
-			struct netdev_queue *txq;
+			struct netdev_queue *txq = head;
+			struct net_device *dev = txq->dev;
 			head = head->next_sched;
 
-			txq = &dev->tx_queue;
-
 			smp_mb__before_clear_bit();
 			clear_bit(__LINK_STATE_SCHED, &dev->state);
 
@@ -4346,7 +4345,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct net_device **list_net;
+	struct netdev_queue **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
-- 
cgit v1.2.3-70-g09d2


From 6fa9864b53f0680e432a2c431c2cf2055daa3a88 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:01:06 -0700
Subject: net: Clean up explicit ->tx_queue references in link watch.

First, we add a qdisc_tx_changing() helper which returns true if the
qdisc attachment is in transition.

Second, we remove an assertion warning which is of limited value and
is hard to express precisely in a multiqueue environment.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  8 ++++++++
 net/core/link_watch.c     | 15 +++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8cfdaebbbab..bf8f7264a77 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -242,6 +242,14 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 	return (q->q.qlen == 0);
 }
 
+/* Are any of the TX qdiscs changing?  */
+static inline bool qdisc_tx_changing(struct net_device *dev)
+{
+	struct netdev_queue *txq = &dev->tx_queue;
+
+	return (txq->qdisc != txq->qdisc_sleeping);
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 50218218445..bf8f7af699d 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -77,12 +77,10 @@ static void rfc2863_policy(struct net_device *dev)
 }
 
 
-static int linkwatch_urgent_event(struct net_device *dev)
+static bool linkwatch_urgent_event(struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
-
 	return netif_running(dev) && netif_carrier_ok(dev) &&
-	       txq->qdisc != txq->qdisc_sleeping;
+		qdisc_tx_changing(dev);
 }
 
 
@@ -182,12 +180,9 @@ static void __linkwatch_run_queue(int urgent_only)
 
 		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
-			if (netif_carrier_ok(dev)) {
-				struct netdev_queue *txq = &dev->tx_queue;
-
-				WARN_ON(txq->qdisc_sleeping == &noop_qdisc);
+			if (netif_carrier_ok(dev))
 				dev_activate(dev);
-			} else
+			else
 				dev_deactivate(dev);
 
 			netdev_state_change(dev);
@@ -218,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy)
 
 void linkwatch_fire_event(struct net_device *dev)
 {
-	int urgent = linkwatch_urgent_event(dev);
+	bool urgent = linkwatch_urgent_event(dev);
 
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
 		dev_hold(dev);
-- 
cgit v1.2.3-70-g09d2


From 052979499c767268b912d25031ae524c451679d0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:01:27 -0700
Subject: pkt_sched: Add qdisc_tx_is_noop() helper and use in IPV6.

This indicates if the NOOP scheduler is what is active for TX on a
given device.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 8 ++++++++
 net/ipv6/addrconf.c       | 5 ++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bf8f7264a77..5ba66b55557 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -250,6 +250,14 @@ static inline bool qdisc_tx_changing(struct net_device *dev)
 	return (txq->qdisc != txq->qdisc_sleeping);
 }
 
+/* Is the device using the noop qdisc?  */
+static inline bool qdisc_tx_is_noop(const struct net_device *dev)
+{
+	const struct netdev_queue *txq = &dev->tx_queue;
+
+	return (txq->qdisc == &noop_qdisc);
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5c84c798331..30184e0dd74 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -229,10 +229,9 @@ const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_IN
 const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 
 /* Check if a valid qdisc is available */
-static inline int addrconf_qdisc_ok(struct net_device *dev)
+static inline bool addrconf_qdisc_ok(const struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
-	return (txq->qdisc != &noop_qdisc);
+	return !qdisc_tx_is_noop(dev);
 }
 
 /* Check if a route is valid prefix route */
-- 
cgit v1.2.3-70-g09d2


From c2aa288548a29d909ec875e81137fb0dbbb420b7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:01:52 -0700
Subject: mac80211: Decrease number of explicit ->tx_queue references.

Accomplish this by using local variables.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/main.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 12aeaf78ae7..2968baa66b9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -570,6 +570,7 @@ static int ieee80211_stop(struct net_device *dev)
 int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct netdev_queue *txq;
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata;
 	u16 start_seq_num = 0;
@@ -636,7 +637,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* ensure that TX flow won't interrupt us
 	 * until the end of the call to requeue function */
-	spin_lock_bh(&local->mdev->tx_queue.lock);
+	txq = &local->mdev->tx_queue;
+	spin_lock_bh(&txq->lock);
 
 	/* create a new queue for this aggregation */
 	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
@@ -675,7 +677,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* Will put all the packets in the new SW queue */
 	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
-	spin_unlock_bh(&local->mdev->tx_queue.lock);
+	spin_unlock_bh(&txq->lock);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -701,7 +703,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 err_unlock_queue:
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&local->mdev->tx_queue.lock);
+	spin_unlock_bh(&txq->lock);
 	ret = -EBUSY;
 err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
@@ -826,6 +828,7 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
 void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct netdev_queue *txq;
 	struct sta_info *sta;
 	u8 *state;
 	int agg_queue;
@@ -875,10 +878,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 
 	/* avoid ordering issues: we are the only one that can modify
 	 * the content of the qdiscs */
-	spin_lock_bh(&local->mdev->tx_queue.lock);
+	txq = &local->mdev->tx_queue;
+	spin_lock_bh(&txq->lock);
 	/* remove the queue for this aggregation */
 	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-	spin_unlock_bh(&local->mdev->tx_queue.lock);
+	spin_unlock_bh(&txq->lock);
 
 	/* we just requeued the all the frames that were in the removed
 	 * queue, and since we might miss a softirq we do netif_schedule.
-- 
cgit v1.2.3-70-g09d2


From 970565bbad0c7b98db0d14131a69e5a0f4445d49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:10:33 -0700
Subject: netdev: Move gso_skb into netdev_queue.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +---
 net/sched/sch_generic.c   | 42 +++++++++++++++++++++++-------------------
 2 files changed, 24 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1379c822e51..aae6c6d153f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -452,6 +452,7 @@ struct netdev_queue {
 	spinlock_t		lock;
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
+	struct sk_buff		*gso_skb;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	struct netdev_queue	*next_sched;
@@ -635,9 +636,6 @@ struct net_device
 	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
-	/* Partially transmitted GSO packet. */
-	struct sk_buff		*gso_skb;
-
 /*
  * One part is mostly used on xmit path (device)
  */
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index dda78ee314e..8247a406a40 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -63,10 +63,11 @@ static inline int qdisc_qlen(struct Qdisc *q)
 }
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
+				  struct netdev_queue *dev_queue,
 				  struct Qdisc *q)
 {
 	if (unlikely(skb->next))
-		dev->gso_skb = skb;
+		dev_queue->gso_skb = skb;
 	else
 		q->ops->requeue(skb, q);
 
@@ -75,12 +76,13 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
 }
 
 static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
+					      struct netdev_queue *dev_queue,
 					      struct Qdisc *q)
 {
 	struct sk_buff *skb;
 
-	if ((skb = dev->gso_skb))
-		dev->gso_skb = NULL;
+	if ((skb = dev_queue->gso_skb))
+		dev_queue->gso_skb = NULL;
 	else
 		skb = q->dequeue(q);
 
@@ -89,6 +91,7 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
 
 static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 					   struct net_device *dev,
+					   struct netdev_queue *dev_queue,
 					   struct Qdisc *q)
 {
 	int ret;
@@ -111,7 +114,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * some time.
 		 */
 		__get_cpu_var(netdev_rx_stat).cpu_collision++;
-		ret = dev_requeue_skb(skb, dev, q);
+		ret = dev_requeue_skb(skb, dev, dev_queue, q);
 	}
 
 	return ret;
@@ -144,7 +147,7 @@ static inline int qdisc_restart(struct net_device *dev)
 	int ret = NETDEV_TX_BUSY;
 
 	/* Dequeue packet */
-	if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
+	if (unlikely((skb = dev_dequeue_skb(dev, txq, q)) == NULL))
 		return 0;
 
 
@@ -167,7 +170,7 @@ static inline int qdisc_restart(struct net_device *dev)
 
 	case NETDEV_TX_LOCKED:
 		/* Driver try lock failed */
-		ret = handle_dev_cpu_collision(skb, dev, q);
+		ret = handle_dev_cpu_collision(skb, dev, txq, q);
 		break;
 
 	default:
@@ -176,7 +179,7 @@ static inline int qdisc_restart(struct net_device *dev)
 			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
 			       dev->name, ret, q->q.qlen);
 
-		ret = dev_requeue_skb(skb, dev, q);
+		ret = dev_requeue_skb(skb, dev, txq, q);
 		break;
 	}
 
@@ -578,31 +581,32 @@ void dev_activate(struct net_device *dev)
 	spin_unlock_bh(&txq->lock);
 }
 
-static void dev_deactivate_queue(struct net_device *dev,
-				 struct netdev_queue *dev_queue,
+static void dev_deactivate_queue(struct netdev_queue *dev_queue,
 				 struct Qdisc *qdisc_default)
 {
-	struct Qdisc *qdisc = dev_queue->qdisc;
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+
+	spin_lock_bh(&dev_queue->lock);
 
+	qdisc = dev_queue->qdisc;
 	if (qdisc) {
 		dev_queue->qdisc = qdisc_default;
 		qdisc_reset(qdisc);
 	}
+	skb = dev_queue->gso_skb;
+	dev_queue->gso_skb = NULL;
+
+	spin_unlock_bh(&dev_queue->lock);
+
+	kfree_skb(skb);
 }
 
 void dev_deactivate(struct net_device *dev)
 {
-	struct sk_buff *skb;
 	int running;
 
-	spin_lock_bh(&dev->tx_queue.lock);
-	dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc);
-
-	skb = dev->gso_skb;
-	dev->gso_skb = NULL;
-	spin_unlock_bh(&dev->tx_queue.lock);
-
-	kfree_skb(skb);
+	dev_deactivate_queue(&dev->tx_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
-- 
cgit v1.2.3-70-g09d2


From 86d804e10a37cd86f16bf72386c37e843a98a74b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:11:25 -0700
Subject: netdev: Make netif_schedule() routines work with netdev_queue
 objects.

Only plain netif_schedule() remains taking a net_device, mostly as a
compatability item while we transition the rest of these interfaces.

Everything else calls netif_schedule_queue() or __netif_schedule(),
both of which take a netdev_queue pointer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 ++++++++++++-----
 net/core/dev.c            |  9 +++++----
 net/mac80211/main.c       |  4 ++--
 net/sched/sch_api.c       |  4 ++--
 net/sched/sch_cbq.c       |  2 +-
 net/sched/sch_generic.c   | 10 +++++-----
 6 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aae6c6d153f..28aa8e77cee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -952,12 +952,19 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
-extern void __netif_schedule(struct net_device *dev);
+extern void __netif_schedule(struct netdev_queue *txq);
 
-static inline void netif_schedule(struct net_device *dev)
+static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
+	struct net_device *dev = txq->dev;
+
 	if (!test_bit(__LINK_STATE_XOFF, &dev->state))
-		__netif_schedule(dev);
+		__netif_schedule(txq);
+}
+
+static inline void netif_schedule(struct net_device *dev)
+{
+	netif_schedule_queue(&dev->tx_queue);
 }
 
 /**
@@ -987,7 +994,7 @@ static inline void netif_wake_queue(struct net_device *dev)
 	}
 #endif
 	if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
-		__netif_schedule(dev);
+		__netif_schedule(&dev->tx_queue);
 }
 
 /**
@@ -1103,7 +1110,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #endif
 	if (test_and_clear_bit(__LINK_STATE_XOFF,
 			       &dev->egress_subqueue[queue_index].state))
-		__netif_schedule(dev);
+		__netif_schedule(&dev->tx_queue);
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index d6b8d3c3e6e..0dc888ad421 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1320,12 +1320,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-void __netif_schedule(struct net_device *dev)
+void __netif_schedule(struct netdev_queue *txq)
 {
+	struct net_device *dev = txq->dev;
+
 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-		struct netdev_queue *txq = &dev->tx_queue;
-		unsigned long flags;
 		struct softnet_data *sd;
+		unsigned long flags;
 
 		local_irq_save(flags);
 		sd = &__get_cpu_var(softnet_data);
@@ -1932,7 +1933,7 @@ static void net_tx_action(struct softirq_action *h)
 				qdisc_run(dev);
 				spin_unlock(&txq->lock);
 			} else {
-				netif_schedule(dev);
+				netif_schedule_queue(txq);
 			}
 		}
 	}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2968baa66b9..1c4d3ba6b87 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -885,10 +885,10 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	spin_unlock_bh(&txq->lock);
 
 	/* we just requeued the all the frames that were in the removed
-	 * queue, and since we might miss a softirq we do netif_schedule.
+	 * queue, and since we might miss a softirq we do netif_schedule_queue.
 	 * ieee80211_wake_queue is not used here as this queue is not
 	 * necessarily stopped */
-	netif_schedule(local->mdev);
+	netif_schedule_queue(txq);
 	spin_lock_bh(&sta->lock);
 	*state = HT_AGG_STATE_IDLE;
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e73bd68aa7a..95873f8dd37 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -282,11 +282,11 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
-	struct net_device *dev = qdisc_dev(wd->qdisc);
+	struct netdev_queue *txq = wd->qdisc->dev_queue;
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	smp_wmb();
-	netif_schedule(dev);
+	netif_schedule_queue(txq);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 99ce3da2b0a..4efc836cbf3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(qdisc_dev(sch));
+	netif_schedule_queue(sch->dev_queue);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8247a406a40..407dfdb142a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -62,7 +62,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 	return q->q.qlen;
 }
 
-static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
+static inline int dev_requeue_skb(struct sk_buff *skb,
 				  struct netdev_queue *dev_queue,
 				  struct Qdisc *q)
 {
@@ -71,7 +71,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
 	else
 		q->ops->requeue(skb, q);
 
-	netif_schedule(dev);
+	netif_schedule_queue(dev_queue);
 	return 0;
 }
 
@@ -114,7 +114,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * some time.
 		 */
 		__get_cpu_var(netdev_rx_stat).cpu_collision++;
-		ret = dev_requeue_skb(skb, dev, dev_queue, q);
+		ret = dev_requeue_skb(skb, dev_queue, q);
 	}
 
 	return ret;
@@ -179,7 +179,7 @@ static inline int qdisc_restart(struct net_device *dev)
 			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
 			       dev->name, ret, q->q.qlen);
 
-		ret = dev_requeue_skb(skb, dev, txq, q);
+		ret = dev_requeue_skb(skb, txq, q);
 		break;
 	}
 
@@ -200,7 +200,7 @@ void __qdisc_run(struct net_device *dev)
 		 * 2. we've been doing it for too long.
 		 */
 		if (need_resched() || jiffies != start_time) {
-			netif_schedule(dev);
+			netif_schedule_queue(&dev->tx_queue);
 			break;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From eb6aafe3f843cb0e939546c03540a3b4911b6964 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:12:38 -0700
Subject: pkt_sched: Make qdisc_run take a netdev_queue.

This allows us to use this calling convention all the way down into
qdisc_restart().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  8 +++++---
 net/core/dev.c          |  4 ++--
 net/sched/sch_generic.c | 26 ++++++++++++++------------
 3 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8e3a0c4e9d9..2311d242bb3 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -84,13 +84,15 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 		struct nlattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 
-extern void __qdisc_run(struct net_device *dev);
+extern void __qdisc_run(struct netdev_queue *txq);
 
-static inline void qdisc_run(struct net_device *dev)
+static inline void qdisc_run(struct netdev_queue *txq)
 {
+	struct net_device *dev = txq->dev;
+
 	if (!netif_queue_stopped(dev) &&
 	    !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-		__qdisc_run(dev);
+		__qdisc_run(txq);
 }
 
 extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
diff --git a/net/core/dev.c b/net/core/dev.c
index 0dc888ad421..0218b0b9be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1734,7 +1734,7 @@ gso:
 			/* reset queue_mapping to zero */
 			skb_set_queue_mapping(skb, 0);
 			rc = q->enqueue(skb, q);
-			qdisc_run(dev);
+			qdisc_run(txq);
 			spin_unlock(&txq->lock);
 
 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
@@ -1930,7 +1930,7 @@ static void net_tx_action(struct softirq_action *h)
 			clear_bit(__LINK_STATE_SCHED, &dev->state);
 
 			if (spin_trylock(&txq->lock)) {
-				qdisc_run(dev);
+				qdisc_run(txq);
 				spin_unlock(&txq->lock);
 			} else {
 				netif_schedule_queue(txq);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 407dfdb142a..fcc7533f0bc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -75,9 +75,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb,
 	return 0;
 }
 
-static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
-					      struct netdev_queue *dev_queue,
-					      struct Qdisc *q)
+static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue,
+					  struct Qdisc *q)
 {
 	struct sk_buff *skb;
 
@@ -90,10 +89,10 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
 }
 
 static inline int handle_dev_cpu_collision(struct sk_buff *skb,
-					   struct net_device *dev,
 					   struct netdev_queue *dev_queue,
 					   struct Qdisc *q)
 {
+	struct net_device *dev = dev_queue->dev;
 	int ret;
 
 	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
@@ -139,21 +138,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  *				>0 - queue is not empty.
  *
  */
-static inline int qdisc_restart(struct net_device *dev)
+static inline int qdisc_restart(struct netdev_queue *txq)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
 	struct Qdisc *q = txq->qdisc;
-	struct sk_buff *skb;
 	int ret = NETDEV_TX_BUSY;
+	struct net_device *dev;
+	struct sk_buff *skb;
 
 	/* Dequeue packet */
-	if (unlikely((skb = dev_dequeue_skb(dev, txq, q)) == NULL))
+	if (unlikely((skb = dequeue_skb(txq, q)) == NULL))
 		return 0;
 
 
 	/* And release queue */
 	spin_unlock(&txq->lock);
 
+	dev = txq->dev;
+
 	HARD_TX_LOCK(dev, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
 		ret = dev_hard_start_xmit(skb, dev);
@@ -170,7 +171,7 @@ static inline int qdisc_restart(struct net_device *dev)
 
 	case NETDEV_TX_LOCKED:
 		/* Driver try lock failed */
-		ret = handle_dev_cpu_collision(skb, dev, txq, q);
+		ret = handle_dev_cpu_collision(skb, txq, q);
 		break;
 
 	default:
@@ -186,11 +187,12 @@ static inline int qdisc_restart(struct net_device *dev)
 	return ret;
 }
 
-void __qdisc_run(struct net_device *dev)
+void __qdisc_run(struct netdev_queue *txq)
 {
+	struct net_device *dev = txq->dev;
 	unsigned long start_time = jiffies;
 
-	while (qdisc_restart(dev)) {
+	while (qdisc_restart(txq)) {
 		if (netif_queue_stopped(dev))
 			break;
 
@@ -200,7 +202,7 @@ void __qdisc_run(struct net_device *dev)
 		 * 2. we've been doing it for too long.
 		 */
 		if (need_resched() || jiffies != start_time) {
-			netif_schedule_queue(&dev->tx_queue);
+			netif_schedule_queue(txq);
 			break;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From c773e847ea8f6812804e40f52399c6921a00eab1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:13:53 -0700
Subject: netdev: Move _xmit_lock and xmit_lock_owner into netdev_queue.

Accesses are mostly structured such that when there are multiple TX
queues the code transformations will be a little bit simpler.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c         | 13 ++++++-
 drivers/net/hamradio/bpqether.c         | 12 ++++++-
 drivers/net/macvlan.c                   | 14 +++++++-
 drivers/net/wireless/hostap/hostap_hw.c | 12 ++++++-
 include/linux/netdevice.h               | 62 +++++++++++++++++++++------------
 net/8021q/vlan_dev.c                    | 15 ++++++--
 net/core/dev.c                          | 28 ++++++++++-----
 net/netrom/af_netrom.c                  | 12 ++++++-
 net/rose/af_rose.c                      | 12 ++++++-
 net/sched/sch_generic.c                 |  9 +++--
 10 files changed, 145 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d57b65dc2c7..dc733d75a5e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5019,6 +5019,17 @@ static int bond_check_params(struct bond_params *params)
 
 static struct lock_class_key bonding_netdev_xmit_lock_key;
 
+static void bond_set_lockdep_class_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &bonding_netdev_xmit_lock_key);
+}
+
+static void bond_set_lockdep_class(struct net_device *dev)
+{
+	bond_set_lockdep_class_one(&dev->tx_queue);
+}
+
 /* Create a new bond based on the specified name and bonding parameters.
  * If name is NULL, obtain a suitable "bond%d" name for us.
  * Caller must NOT hold rtnl_lock; we need to release it here before we
@@ -5076,7 +5087,7 @@ int bond_create(char *name, struct bond_params *params)
 		goto out_bond;
 	}
 
-	lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key);
+	bond_set_lockdep_class(bond_dev);
 
 	netif_carrier_off(bond_dev);
 
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 5f4b4c6c9f7..fb186b8c3d4 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -124,6 +124,16 @@ static LIST_HEAD(bpq_devices);
  */
 static struct lock_class_key bpq_netdev_xmit_lock_key;
 
+static void bpq_set_lockdep_class_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key);
+}
+
+static void bpq_set_lockdep_class(struct net_device *dev)
+{
+	bpq_set_lockdep_class_one(&dev->tx_queue);
+}
+
 /* ------------------------------------------------------------------------ */
 
 
@@ -523,7 +533,7 @@ static int bpq_new_device(struct net_device *edev)
 	err = register_netdevice(ndev);
 	if (err)
 		goto error;
-	lockdep_set_class(&ndev->_xmit_lock, &bpq_netdev_xmit_lock_key);
+	bpq_set_lockdep_class(ndev);
 
 	/* List protected by RTNL */
 	list_add_rcu(&bpq->bpq_list, &bpq_devices);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c36a03ae9bf..c02ceaa4a21 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -277,6 +277,17 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key;
 #define MACVLAN_STATE_MASK \
 	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 
+static void macvlan_set_lockdep_class_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &macvlan_netdev_xmit_lock_key);
+}
+
+static void macvlan_set_lockdep_class(struct net_device *dev)
+{
+	macvlan_set_lockdep_class_one(&dev->tx_queue);
+}
+
 static int macvlan_init(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -287,7 +298,8 @@ static int macvlan_init(struct net_device *dev)
 	dev->features 		= lowerdev->features & MACVLAN_FEATURES;
 	dev->iflink		= lowerdev->ifindex;
 
-	lockdep_set_class(&dev->_xmit_lock, &macvlan_netdev_xmit_lock_key);
+	macvlan_set_lockdep_class(dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 09004a632ae..c1f4bb005d9 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3102,6 +3102,16 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
  */
 static struct lock_class_key hostap_netdev_xmit_lock_key;
 
+static void prism2_set_lockdep_class_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &hostap_netdev_xmit_lock_key);
+}
+
+static void prism2_set_lockdep_class(struct net_device *dev)
+{
+	prism2_set_lockdep_class_one(&dev->tx_queue);
+}
 
 static struct net_device *
 prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
@@ -3268,7 +3278,7 @@ while (0)
 	if (ret >= 0)
 		ret = register_netdevice(dev);
 
-	lockdep_set_class(&dev->_xmit_lock, &hostap_netdev_xmit_lock_key);
+	prism2_set_lockdep_class(dev);
 	rtnl_unlock();
 	if (ret < 0) {
 		printk(KERN_WARNING "%s: register netdevice failed!\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28aa8e77cee..c8d5f128858 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -453,6 +453,8 @@ struct netdev_queue {
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
 	struct sk_buff		*gso_skb;
+	spinlock_t		_xmit_lock;
+	int			xmit_lock_owner;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	struct netdev_queue	*next_sched;
@@ -639,12 +641,6 @@ struct net_device
 /*
  * One part is mostly used on xmit path (device)
  */
-	/* hard_start_xmit synchronizer */
-	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
-	/* cpu id of processor entered to hard_start_xmit or -1,
-	   if nobody entered there.
-	 */
-	int			xmit_lock_owner;
 	void			*priv;	/* pointer to private data	*/
 	int			(*hard_start_xmit) (struct sk_buff *skb,
 						    struct net_device *dev);
@@ -1402,52 +1398,72 @@ static inline void netif_rx_complete(struct net_device *dev,
  *
  * Get network device transmit lock
  */
-static inline void __netif_tx_lock(struct net_device *dev, int cpu)
+static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
-	spin_lock(&dev->_xmit_lock);
-	dev->xmit_lock_owner = cpu;
+	spin_lock(&txq->_xmit_lock);
+	txq->xmit_lock_owner = cpu;
 }
 
 static inline void netif_tx_lock(struct net_device *dev)
 {
-	__netif_tx_lock(dev, smp_processor_id());
+	__netif_tx_lock(&dev->tx_queue, smp_processor_id());
+}
+
+static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
+{
+	spin_lock_bh(&txq->_xmit_lock);
+	txq->xmit_lock_owner = smp_processor_id();
 }
 
 static inline void netif_tx_lock_bh(struct net_device *dev)
 {
-	spin_lock_bh(&dev->_xmit_lock);
-	dev->xmit_lock_owner = smp_processor_id();
+	__netif_tx_lock_bh(&dev->tx_queue);
 }
 
-static inline int netif_tx_trylock(struct net_device *dev)
+static inline int __netif_tx_trylock(struct netdev_queue *txq)
 {
-	int ok = spin_trylock(&dev->_xmit_lock);
+	int ok = spin_trylock(&txq->_xmit_lock);
 	if (likely(ok))
-		dev->xmit_lock_owner = smp_processor_id();
+		txq->xmit_lock_owner = smp_processor_id();
 	return ok;
 }
 
+static inline int netif_tx_trylock(struct net_device *dev)
+{
+	return __netif_tx_trylock(&dev->tx_queue);
+}
+
+static inline void __netif_tx_unlock(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock(&txq->_xmit_lock);
+}
+
 static inline void netif_tx_unlock(struct net_device *dev)
 {
-	dev->xmit_lock_owner = -1;
-	spin_unlock(&dev->_xmit_lock);
+	__netif_tx_unlock(&dev->tx_queue);
+}
+
+static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock_bh(&txq->_xmit_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
 {
-	dev->xmit_lock_owner = -1;
-	spin_unlock_bh(&dev->_xmit_lock);
+	__netif_tx_unlock_bh(&dev->tx_queue);
 }
 
-#define HARD_TX_LOCK(dev, cpu) {			\
+#define HARD_TX_LOCK(dev, txq, cpu) {			\
 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
-		__netif_tx_lock(dev, cpu);			\
+		__netif_tx_lock(txq, cpu);		\
 	}						\
 }
 
-#define HARD_TX_UNLOCK(dev) {				\
+#define HARD_TX_UNLOCK(dev, txq) {			\
 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
-		netif_tx_unlock(dev);			\
+		__netif_tx_unlock(txq);			\
 	}						\
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b6e52c025fd..8efa399823e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -627,6 +627,18 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
  */
 static struct lock_class_key vlan_netdev_xmit_lock_key;
 
+static void vlan_dev_set_lockdep_one(struct netdev_queue *txq,
+				     int subclass)
+{
+	lockdep_set_class_and_subclass(&txq->_xmit_lock,
+				       &vlan_netdev_xmit_lock_key, subclass);
+}
+
+static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
+{
+	vlan_dev_set_lockdep_one(&dev->tx_queue, subclass);
+}
+
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
 	.rebuild = vlan_dev_rebuild_header,
@@ -668,8 +680,7 @@ static int vlan_dev_init(struct net_device *dev)
 	if (is_vlan_dev(real_dev))
 		subclass = 1;
 
-	lockdep_set_class_and_subclass(&dev->_xmit_lock,
-				&vlan_netdev_xmit_lock_key, subclass);
+	vlan_dev_set_lockdep_class(dev, subclass);
 	return 0;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 0218b0b9be8..a29a359b15d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -258,7 +258,7 @@ DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 /*
- * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
  */
 static const unsigned short netdev_lock_type[] =
@@ -1758,19 +1758,19 @@ gso:
 	if (dev->flags & IFF_UP) {
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 
-		if (dev->xmit_lock_owner != cpu) {
+		if (txq->xmit_lock_owner != cpu) {
 
-			HARD_TX_LOCK(dev, cpu);
+			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_queue_stopped(dev) &&
 			    !netif_subqueue_stopped(dev, skb)) {
 				rc = 0;
 				if (!dev_hard_start_xmit(skb, dev)) {
-					HARD_TX_UNLOCK(dev);
+					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
 			}
-			HARD_TX_UNLOCK(dev);
+			HARD_TX_UNLOCK(dev, txq);
 			if (net_ratelimit())
 				printk(KERN_CRIT "Virtual device %s asks to "
 				       "queue packet!\n", dev->name);
@@ -3761,6 +3761,20 @@ static void rollback_registered(struct net_device *dev)
 	dev_put(dev);
 }
 
+static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue,
+					  struct net_device *dev)
+{
+	spin_lock_init(&dev_queue->_xmit_lock);
+	netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+	dev_queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queue_locks(struct net_device *dev)
+{
+	__netdev_init_queue_locks_one(&dev->tx_queue, dev);
+	__netdev_init_queue_locks_one(&dev->rx_queue, dev);
+}
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -3795,9 +3809,7 @@ int register_netdevice(struct net_device *dev)
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 
-	spin_lock_init(&dev->_xmit_lock);
-	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
-	dev->xmit_lock_owner = -1;
+	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 74884f4a625..819afc449e1 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -74,6 +74,16 @@ static const struct proto_ops nr_proto_ops;
  */
 static struct lock_class_key nr_netdev_xmit_lock_key;
 
+static void nr_set_lockdep_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
+}
+
+static void nr_set_lockdep_key(struct net_device *dev)
+{
+	nr_set_lockdep_one(&dev->tx_queue);
+}
+
 /*
  *	Socket removal during an interrupt is now safe.
  */
@@ -1430,7 +1440,7 @@ static int __init nr_proto_init(void)
 			free_netdev(dev);
 			goto fail;
 		}
-		lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key);
+		nr_set_lockdep_key(dev);
 		dev_nr[i] = dev;
 	}
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 46461a69cd0..7dbbc089162 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -75,6 +75,16 @@ ax25_address rose_callsign;
  */
 static struct lock_class_key rose_netdev_xmit_lock_key;
 
+static void rose_set_lockdep_one(struct netdev_queue *txq)
+{
+	lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
+}
+
+static void rose_set_lockdep_key(struct net_device *dev)
+{
+	rose_set_lockdep_one(&dev->tx_queue);
+}
+
 /*
  *	Convert a ROSE address into text.
  */
@@ -1576,7 +1586,7 @@ static int __init rose_proto_init(void)
 			free_netdev(dev);
 			goto fail;
 		}
-		lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key);
+		rose_set_lockdep_key(dev);
 		dev_rose[i] = dev;
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fcc7533f0bc..b6a36d39466 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -92,10 +92,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 					   struct netdev_queue *dev_queue,
 					   struct Qdisc *q)
 {
-	struct net_device *dev = dev_queue->dev;
 	int ret;
 
-	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
 		/*
 		 * Same CPU holding the lock. It may be a transient
 		 * configuration error, when hard_start_xmit() recurses. We
@@ -105,7 +104,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		kfree_skb(skb);
 		if (net_ratelimit())
 			printk(KERN_WARNING "Dead loop on netdevice %s, "
-			       "fix it urgently!\n", dev->name);
+			       "fix it urgently!\n", dev_queue->dev->name);
 		ret = qdisc_qlen(q);
 	} else {
 		/*
@@ -155,10 +154,10 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 
 	dev = txq->dev;
 
-	HARD_TX_LOCK(dev, smp_processor_id());
+	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
 		ret = dev_hard_start_xmit(skb, dev);
-	HARD_TX_UNLOCK(dev);
+	HARD_TX_UNLOCK(dev, txq);
 
 	spin_lock(&txq->lock);
 	q = txq->qdisc;
-- 
cgit v1.2.3-70-g09d2


From b19fa1fa91845234961c64dbd564671aa7c0fd27 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:14:24 -0700
Subject: net: Delete NETDEVICES_MULTIQUEUE kconfig option.

Multiple TX queue support is a core networking feature.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/multiqueue.txt | 79 +--------------------------------
 drivers/net/Kconfig                     |  8 ----
 drivers/net/cpmac.c                     | 14 ------
 drivers/net/ixgbe/ixgbe_ethtool.c       |  6 ---
 drivers/net/ixgbe/ixgbe_main.c          | 40 -----------------
 drivers/net/s2io.c                      | 47 ++++----------------
 include/linux/netdevice.h               | 14 ------
 include/linux/skbuff.h                  | 10 -----
 net/mac80211/Kconfig                    |  3 --
 9 files changed, 9 insertions(+), 212 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index ea5a42e8f79..e6dc1ee9e8f 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -3,19 +3,11 @@
 		===========================================
 
 Section 1: Base driver requirements for implementing multiqueue support
-Section 2: Qdisc support for multiqueue devices
-Section 3: Brief howto using PRIO or RR for multiqueue devices
-
 
 Intro: Kernel support for multiqueue devices
 ---------------------------------------------------------
 
-Kernel support for multiqueue devices is only an API that is presented to the
-netdevice layer for base drivers to implement.  This feature is part of the
-core networking stack, and all network devices will be running on the
-multiqueue-aware stack.  If a base driver only has one queue, then these
-changes are transparent to that driver.
-
+Kernel support for multiqueue devices is always present.
 
 Section 1: Base driver requirements for implementing multiqueue support
 -----------------------------------------------------------------------
@@ -43,73 +35,4 @@ bitmap on device initialization.  Below is an example from e1000:
 		netdev->features |= NETIF_F_MULTI_QUEUE;
 #endif
 
-
-Section 2: Qdisc support for multiqueue devices
------------------------------------------------
-
-Currently two qdiscs support multiqueue devices.  A new round-robin qdisc,
-sch_rr, and sch_prio. The qdisc is responsible for classifying the skb's to
-bands and queues, and will store the queue mapping into skb->queue_mapping.
-Use this field in the base driver to determine which queue to send the skb
-to.
-
-sch_rr has been added for hardware that doesn't want scheduling policies from
-software, so it's a straight round-robin qdisc.  It uses the same syntax and
-classification priomap that sch_prio uses, so it should be intuitive to
-configure for people who've used sch_prio.
-
-In order to utilitize the multiqueue features of the qdiscs, the network
-device layer needs to enable multiple queue support.  This can be done by
-selecting NETDEVICES_MULTIQUEUE under Drivers.
-
-The PRIO qdisc naturally plugs into a multiqueue device.  If
-NETDEVICES_MULTIQUEUE is selected, then on qdisc load, the number of
-bands requested is compared to the number of queues on the hardware.  If they
-are equal, it sets a one-to-one mapping up between the queues and bands.  If
-they're not equal, it will not load the qdisc.  This is the same behavior
-for RR.  Once the association is made, any skb that is classified will have
-skb->queue_mapping set, which will allow the driver to properly queue skb's
-to multiple queues.
-
-
-Section 3: Brief howto using PRIO and RR for multiqueue devices
----------------------------------------------------------------
-
-The userspace command 'tc,' part of the iproute2 package, is used to configure
-qdiscs.  To add the PRIO qdisc to your network device, assuming the device is
-called eth0, run the following command:
-
-# tc qdisc add dev eth0 root handle 1: prio bands 4 multiqueue
-
-This will create 4 bands, 0 being highest priority, and associate those bands
-to the queues on your NIC.  Assuming eth0 has 4 Tx queues, the band mapping
-would look like:
-
-band 0 => queue 0
-band 1 => queue 1
-band 2 => queue 2
-band 3 => queue 3
-
-Traffic will begin flowing through each queue if your TOS values are assigning
-traffic across the various bands.  For example, ssh traffic will always try to
-go out band 0 based on TOS -> Linux priority conversion (realtime traffic),
-so it will be sent out queue 0.  ICMP traffic (pings) fall into the "normal"
-traffic classification, which is band 1.  Therefore pings will be send out
-queue 1 on the NIC.
-
-Note the use of the multiqueue keyword.  This is only in versions of iproute2
-that support multiqueue networking devices; if this is omitted when loading
-a qdisc onto a multiqueue device, the qdisc will load and operate the same
-if it were loaded onto a single-queue device (i.e. - sends all traffic to
-queue 0).
-
-Another alternative to multiqueue band allocation can be done by using the
-multiqueue option and specify 0 bands.  If this is the case, the qdisc will
-allocate the number of bands to equal the number of queues that the device
-reports, and bring the qdisc online.
-
-The behavior of tc filters remains the same, where it will override TOS priority
-classification.
-
-
 Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ef733abc857..4675c1bd6fb 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -26,14 +26,6 @@ menuconfig NETDEVICES
 # that for each of the symbols.
 if NETDEVICES
 
-config NETDEVICES_MULTIQUEUE
-	bool "Netdevice multiple hardware queue support"
-	---help---
-	  Say Y here if you want to allow the network stack to use multiple
-	  hardware TX queues on an ethernet device.
-
-	  Most people will say N here.
-
 config IFB
 	tristate "Intermediate Functional Block support"
 	depends on NET_CLS_ACT
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 7f3f62e1b11..d630e2a72f4 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -569,11 +569,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	len = max(skb->len, ETH_ZLEN);
 	queue = skb_get_queue_mapping(skb);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	netif_stop_subqueue(dev, queue);
-#else
-	netif_stop_queue(dev);
-#endif
 
 	desc = &priv->desc_ring[queue];
 	if (unlikely(desc->dataflags & CPMAC_OWN)) {
@@ -626,24 +622,14 @@ static void cpmac_end_xmit(struct net_device *dev, int queue)
 
 		dev_kfree_skb_irq(desc->skb);
 		desc->skb = NULL;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		if (netif_subqueue_stopped(dev, queue))
 			netif_wake_subqueue(dev, queue);
-#else
-		if (netif_queue_stopped(dev))
-			netif_wake_queue(dev);
-#endif
 	} else {
 		if (netif_msg_tx_err(priv) && net_ratelimit())
 			printk(KERN_WARNING
 			       "%s: end_xmit: spurious interrupt\n", dev->name);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		if (netif_subqueue_stopped(dev, queue))
 			netif_wake_subqueue(dev, queue);
-#else
-		if (netif_queue_stopped(dev))
-			netif_wake_queue(dev);
-#endif
 	}
 }
 
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 12990b1fe7e..81b769093d2 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -252,21 +252,15 @@ static int ixgbe_set_tso(struct net_device *netdev, u32 data)
 		netdev->features |= NETIF_F_TSO;
 		netdev->features |= NETIF_F_TSO6;
 	} else {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		struct ixgbe_adapter *adapter = netdev_priv(netdev);
 		int i;
-#endif
 		netif_stop_queue(netdev);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			netif_stop_subqueue(netdev, i);
-#endif
 		netdev->features &= ~NETIF_F_TSO;
 		netdev->features &= ~NETIF_F_TSO6;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			netif_start_subqueue(netdev, i);
-#endif
 		netif_start_queue(netdev);
 	}
 	return 0;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index b37d618d8e2..10a1c8c5cda 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -266,28 +266,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_adapter *adapter,
 		 * sees the new next_to_clean.
 		 */
 		smp_mb();
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 		if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
 		    !test_bit(__IXGBE_DOWN, &adapter->state)) {
 			netif_wake_subqueue(netdev, tx_ring->queue_index);
 			adapter->restart_queue++;
 		}
-#else
-		if (netif_queue_stopped(netdev) &&
-		    !test_bit(__IXGBE_DOWN, &adapter->state)) {
-			netif_wake_queue(netdev);
-			adapter->restart_queue++;
-		}
-#endif
 	}
 
 	if (adapter->detect_tx_hung)
 		if (ixgbe_check_tx_hang(adapter, tx_ring, eop, eop_desc))
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 			netif_stop_subqueue(netdev, tx_ring->queue_index);
-#else
-			netif_stop_queue(netdev);
-#endif
 
 	if (total_tx_packets >= tx_ring->work_limit)
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, tx_ring->eims_value);
@@ -2192,11 +2180,7 @@ static void __devinit ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 		case (IXGBE_FLAG_RSS_ENABLED):
 			rss_m = 0xF;
 			nrq = rss_i;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 			ntq = rss_i;
-#else
-			ntq = 1;
-#endif
 			break;
 		case 0:
 		default:
@@ -2370,10 +2354,8 @@ try_msi:
 	}
 
 out:
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
 	adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
-#endif
 
 	return err;
 }
@@ -2910,9 +2892,7 @@ static void ixgbe_watchdog(unsigned long data)
 	struct net_device *netdev = adapter->netdev;
 	bool link_up;
 	u32 link_speed = 0;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	int i;
-#endif
 
 	adapter->hw.mac.ops.check_link(&adapter->hw, &(link_speed), &link_up);
 
@@ -2934,10 +2914,8 @@ static void ixgbe_watchdog(unsigned long data)
 
 			netif_carrier_on(netdev);
 			netif_wake_queue(netdev);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 			for (i = 0; i < adapter->num_tx_queues; i++)
 				netif_wake_subqueue(netdev, i);
-#endif
 		} else {
 			/* Force detection of hung controller */
 			adapter->detect_tx_hung = true;
@@ -3264,11 +3242,7 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	netif_stop_subqueue(netdev, tx_ring->queue_index);
-#else
-	netif_stop_queue(netdev);
-#endif
 	/* Herbert's original patch had:
 	 *  smp_mb__after_netif_stop_queue();
 	 * but since that doesn't exist yet, just open code it. */
@@ -3280,11 +3254,7 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
 		return -EBUSY;
 
 	/* A reprieve! - use start_queue because it doesn't call schedule */
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	netif_wake_subqueue(netdev, tx_ring->queue_index);
-#else
-	netif_wake_queue(netdev);
-#endif
 	++adapter->restart_queue;
 	return 0;
 }
@@ -3312,9 +3282,7 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	unsigned int f;
 	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
 	len -= skb->data_len;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	r_idx = (adapter->num_tx_queues - 1) & skb->queue_mapping;
-#endif
 	tx_ring = &adapter->tx_ring[r_idx];
 
 
@@ -3502,11 +3470,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 	pci_save_state(pdev);
 
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);
-#else
-	netdev = alloc_etherdev(sizeof(struct ixgbe_adapter));
-#endif
 	if (!netdev) {
 		err = -ENOMEM;
 		goto err_alloc_etherdev;
@@ -3598,9 +3562,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	netdev->features |= NETIF_F_MULTI_QUEUE;
-#endif
 
 	/* make sure the EEPROM is good */
 	if (ixgbe_validate_eeprom_checksum(hw, NULL) < 0) {
@@ -3668,10 +3630,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 	netif_stop_queue(netdev);
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		netif_stop_subqueue(netdev, i);
-#endif
 
 	ixgbe_napi_add_all(adapter);
 
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index e7a3dbec674..51a91154125 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -546,13 +546,10 @@ static struct pci_driver s2io_driver = {
 static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
 {
 	int i;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq) {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			netif_stop_subqueue(sp->dev, i);
-	} else
-#endif
-	{
+	} else {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_STOP;
 		netif_stop_queue(sp->dev);
@@ -561,12 +558,9 @@ static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
 
 static inline void s2io_stop_tx_queue(struct s2io_nic *sp, int fifo_no)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq)
 		netif_stop_subqueue(sp->dev, fifo_no);
-	else
-#endif
-	{
+	else {
 		sp->mac_control.fifos[fifo_no].queue_state =
 			FIFO_QUEUE_STOP;
 		netif_stop_queue(sp->dev);
@@ -576,13 +570,10 @@ static inline void s2io_stop_tx_queue(struct s2io_nic *sp, int fifo_no)
 static inline void s2io_start_all_tx_queue(struct s2io_nic *sp)
 {
 	int i;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq) {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			netif_start_subqueue(sp->dev, i);
-	} else
-#endif
-	{
+	} else {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
 		netif_start_queue(sp->dev);
@@ -591,12 +582,9 @@ static inline void s2io_start_all_tx_queue(struct s2io_nic *sp)
 
 static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq)
 		netif_start_subqueue(sp->dev, fifo_no);
-	else
-#endif
-	{
+	else {
 		sp->mac_control.fifos[fifo_no].queue_state =
 			FIFO_QUEUE_START;
 		netif_start_queue(sp->dev);
@@ -606,13 +594,10 @@ static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
 static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp)
 {
 	int i;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq) {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			netif_wake_subqueue(sp->dev, i);
-	} else
-#endif
-	{
+	} else {
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
 		netif_wake_queue(sp->dev);
@@ -623,13 +608,10 @@ static inline void s2io_wake_tx_queue(
 	struct fifo_info *fifo, int cnt, u8 multiq)
 {
 
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (multiq) {
 		if (cnt && __netif_subqueue_stopped(fifo->dev, fifo->fifo_no))
 			netif_wake_subqueue(fifo->dev, fifo->fifo_no);
-	} else
-#endif
-	if (cnt && (fifo->queue_state == FIFO_QUEUE_STOP)) {
+	} else if (cnt && (fifo->queue_state == FIFO_QUEUE_STOP)) {
 		if (netif_queue_stopped(fifo->dev)) {
 			fifo->queue_state = FIFO_QUEUE_START;
 			netif_wake_queue(fifo->dev);
@@ -4189,15 +4171,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_LOCKED;
 	}
 
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (sp->config.multiq) {
 		if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
 			spin_unlock_irqrestore(&fifo->tx_lock, flags);
 			return NETDEV_TX_BUSY;
 		}
-	} else
-#endif
-	if (unlikely(fifo->queue_state == FIFO_QUEUE_STOP)) {
+	} else if (unlikely(fifo->queue_state == FIFO_QUEUE_STOP)) {
 		if (netif_queue_stopped(dev)) {
 			spin_unlock_irqrestore(&fifo->tx_lock, flags);
 			return NETDEV_TX_BUSY;
@@ -7633,12 +7612,6 @@ static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type,
 		DBG_PRINT(ERR_DBG, "tx fifos\n");
 	}
 
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-	if (multiq) {
-		DBG_PRINT(ERR_DBG, "s2io: Multiqueue support not enabled\n");
-		multiq = 0;
-	}
-#endif
 	if (multiq)
 		*dev_multiq = multiq;
 
@@ -7783,12 +7756,10 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		pci_disable_device(pdev);
 		return -ENODEV;
 	}
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (dev_multiq)
 		dev = alloc_etherdev_mq(sizeof(struct s2io_nic), tx_fifo_num);
 	else
-#endif
-	dev = alloc_etherdev(sizeof(struct s2io_nic));
+		dev = alloc_etherdev(sizeof(struct s2io_nic));
 	if (dev == NULL) {
 		DBG_PRINT(ERR_DBG, "Device allocation failed\n");
 		pci_disable_device(pdev);
@@ -7979,10 +7950,8 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		dev->features |= NETIF_F_UFO;
 		dev->features |= NETIF_F_HW_CSUM;
 	}
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	if (config->multiq)
 		dev->features |= NETIF_F_MULTI_QUEUE;
-#endif
 	dev->tx_timeout = &s2io_tx_watchdog;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
 	INIT_WORK(&sp->rst_timer_task, s2io_restart_nic);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c8d5f128858..e2d931f9b70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1043,9 +1043,7 @@ static inline int netif_running(const struct net_device *dev)
  */
 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
-#endif
 }
 
 /**
@@ -1057,13 +1055,11 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
  */
 static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap())
 		return;
 #endif
 	set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
-#endif
 }
 
 /**
@@ -1076,12 +1072,8 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 static inline int __netif_subqueue_stopped(const struct net_device *dev,
 					 u16 queue_index)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	return test_bit(__LINK_STATE_XOFF,
 			&dev->egress_subqueue[queue_index].state);
-#else
-	return 0;
-#endif
 }
 
 static inline int netif_subqueue_stopped(const struct net_device *dev,
@@ -1099,7 +1091,6 @@ static inline int netif_subqueue_stopped(const struct net_device *dev,
  */
 static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap())
 		return;
@@ -1107,7 +1098,6 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 	if (test_and_clear_bit(__LINK_STATE_XOFF,
 			       &dev->egress_subqueue[queue_index].state))
 		__netif_schedule(&dev->tx_queue);
-#endif
 }
 
 /**
@@ -1119,11 +1109,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
  */
 static inline int netif_is_multiqueue(const struct net_device *dev)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	return (!!(NETIF_F_MULTI_QUEUE & dev->features));
-#else
-	return 0;
-#endif
 }
 
 /* Use this variant when it is known for sure that it
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2220b9e2dab..8f10e3d08fd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -305,9 +305,7 @@ struct sk_buff {
 #endif
 
 	int			iif;
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	__u16			queue_mapping;
-#endif
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
@@ -1671,25 +1669,17 @@ static inline void skb_init_secmark(struct sk_buff *skb)
 
 static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	skb->queue_mapping = queue_mapping;
-#endif
 }
 
 static inline u16 skb_get_queue_mapping(struct sk_buff *skb)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	return skb->queue_mapping;
-#else
-	return 0;
-#endif
 }
 
 static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
 {
-#ifdef CONFIG_NETDEVICES_MULTIQUEUE
 	to->queue_mapping = from->queue_mapping;
-#endif
 }
 
 static inline int skb_is_gso(const struct sk_buff *skb)
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 40f1add1775..d2038418e2b 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,14 +15,11 @@ config MAC80211_QOS
 	def_bool y
 	depends on MAC80211
 	depends on NET_SCHED
-	depends on NETDEVICES_MULTIQUEUE
 
 comment "QoS/HT support disabled"
 	depends on MAC80211 && !MAC80211_QOS
 comment "QoS/HT support needs CONFIG_NET_SCHED"
 	depends on MAC80211 && !NET_SCHED
-comment "QoS/HT support needs CONFIG_NETDEVICES_MULTIQUEUE"
-	depends on MAC80211 && !NETDEVICES_MULTIQUEUE
 
 menu "Rate control algorithm selection"
 	depends on MAC80211 != n
-- 
cgit v1.2.3-70-g09d2


From 79d16385c7f287a33ea771c4dbe60ae43f791b49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Jul 2008 23:14:46 -0700
Subject: netdev: Move atomic queue state bits into netdev_queue.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 55 +++++++++++++++++++++++++++++++++--------------
 include/net/pkt_sched.h   |  2 +-
 net/sched/sch_generic.c   | 20 +++++++++--------
 3 files changed, 51 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2d931f9b70..203c5504fe4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -281,14 +281,12 @@ struct header_ops {
 
 enum netdev_state_t
 {
-	__LINK_STATE_XOFF=0,
 	__LINK_STATE_START,
 	__LINK_STATE_PRESENT,
 	__LINK_STATE_SCHED,
 	__LINK_STATE_NOCARRIER,
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
-	__LINK_STATE_QDISC_RUNNING,
 };
 
 
@@ -448,10 +446,17 @@ static inline void napi_synchronize(const struct napi_struct *n)
 # define napi_synchronize(n)	barrier()
 #endif
 
+enum netdev_queue_state_t
+{
+	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_QDISC_RUNNING,
+};
+
 struct netdev_queue {
 	spinlock_t		lock;
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
+	unsigned long		state;
 	struct sk_buff		*gso_skb;
 	spinlock_t		_xmit_lock;
 	int			xmit_lock_owner;
@@ -952,9 +957,7 @@ extern void __netif_schedule(struct netdev_queue *txq);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
-	struct net_device *dev = txq->dev;
-
-	if (!test_bit(__LINK_STATE_XOFF, &dev->state))
+	if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
 		__netif_schedule(txq);
 }
 
@@ -969,9 +972,14 @@ static inline void netif_schedule(struct net_device *dev)
  *
  *	Allow upper layers to call the device hard_start_xmit routine.
  */
+static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
+{
+	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 static inline void netif_start_queue(struct net_device *dev)
 {
-	clear_bit(__LINK_STATE_XOFF, &dev->state);
+	netif_tx_start_queue(&dev->tx_queue);
 }
 
 /**
@@ -981,16 +989,21 @@ static inline void netif_start_queue(struct net_device *dev)
  *	Allow upper layers to call the device hard_start_xmit routine.
  *	Used for flow control when transmit resources are available.
  */
-static inline void netif_wake_queue(struct net_device *dev)
+static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 {
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap()) {
-		clear_bit(__LINK_STATE_XOFF, &dev->state);
+		clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
 		return;
 	}
 #endif
-	if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
-		__netif_schedule(&dev->tx_queue);
+	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
+		__netif_schedule(dev_queue);
+}
+
+static inline void netif_wake_queue(struct net_device *dev)
+{
+	netif_tx_wake_queue(&dev->tx_queue);
 }
 
 /**
@@ -1000,9 +1013,14 @@ static inline void netif_wake_queue(struct net_device *dev)
  *	Stop upper layers calling the device hard_start_xmit routine.
  *	Used for flow control when transmit resources are unavailable.
  */
+static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
+{
+	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 static inline void netif_stop_queue(struct net_device *dev)
 {
-	set_bit(__LINK_STATE_XOFF, &dev->state);
+	netif_tx_stop_queue(&dev->tx_queue);
 }
 
 /**
@@ -1011,9 +1029,14 @@ static inline void netif_stop_queue(struct net_device *dev)
  *
  *	Test if transmit queue on device is currently unable to send.
  */
+static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
+{
+	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 static inline int netif_queue_stopped(const struct net_device *dev)
 {
-	return test_bit(__LINK_STATE_XOFF, &dev->state);
+	return netif_tx_queue_stopped(&dev->tx_queue);
 }
 
 /**
@@ -1043,7 +1066,7 @@ static inline int netif_running(const struct net_device *dev)
  */
 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
 {
-	clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+	clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
 }
 
 /**
@@ -1059,7 +1082,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+	set_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
 }
 
 /**
@@ -1072,7 +1095,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 static inline int __netif_subqueue_stopped(const struct net_device *dev,
 					 u16 queue_index)
 {
-	return test_bit(__LINK_STATE_XOFF,
+	return test_bit(__QUEUE_STATE_XOFF,
 			&dev->egress_subqueue[queue_index].state);
 }
 
@@ -1095,7 +1118,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	if (test_and_clear_bit(__LINK_STATE_XOFF,
+	if (test_and_clear_bit(__QUEUE_STATE_XOFF,
 			       &dev->egress_subqueue[queue_index].state))
 		__netif_schedule(&dev->tx_queue);
 }
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2311d242bb3..d58c1a5eb84 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -91,7 +91,7 @@ static inline void qdisc_run(struct netdev_queue *txq)
 	struct net_device *dev = txq->dev;
 
 	if (!netif_queue_stopped(dev) &&
-	    !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+	    !test_and_set_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state))
 		__qdisc_run(txq);
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b6a36d39466..243de935b18 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -121,9 +121,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 /*
  * NOTE: Called under queue->lock with locally disabled BH.
  *
- * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
- * device at a time. queue->lock serializes queue accesses for
- * this device AND txq->qdisc pointer itself.
+ * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process
+ * this queue at a time. queue->lock serializes queue accesses for
+ * this queue AND txq->qdisc pointer itself.
  *
  *  netif_tx_lock serializes accesses to device driver.
  *
@@ -206,7 +206,7 @@ void __qdisc_run(struct netdev_queue *txq)
 		}
 	}
 
-	clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+	clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state);
 }
 
 static void dev_watchdog(unsigned long arg)
@@ -605,9 +605,10 @@ static void dev_deactivate_queue(struct netdev_queue *dev_queue,
 
 void dev_deactivate(struct net_device *dev)
 {
+	struct netdev_queue *dev_queue = &dev->tx_queue;
 	int running;
 
-	dev_deactivate_queue(&dev->tx_queue, &noop_qdisc);
+	dev_deactivate_queue(dev_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -616,16 +617,17 @@ void dev_deactivate(struct net_device *dev)
 
 	/* Wait for outstanding qdisc_run calls. */
 	do {
-		while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+		while (test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state))
 			yield();
 
 		/*
 		 * Double-check inside queue lock to ensure that all effects
 		 * of the queue run are visible when we return.
 		 */
-		spin_lock_bh(&dev->tx_queue.lock);
-		running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
-		spin_unlock_bh(&dev->tx_queue.lock);
+		spin_lock_bh(&dev_queue->lock);
+		running = test_bit(__QUEUE_STATE_QDISC_RUNNING,
+				   &dev_queue->state);
+		spin_unlock_bh(&dev_queue->lock);
 
 		/*
 		 * The running flag should never be set at this point because
-- 
cgit v1.2.3-70-g09d2


From 8b39f2b41033754e7ba669503d27268beb1b524a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 14 May 2008 19:48:25 -0700
Subject: SUNRPC: Ensure we exit early in case of an encode error

All errors from call_encode(), with exception of EAGAIN are fatal, so we
should immediately return instead of proceeding to xprt_transmit().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8945307556e..9503b4c177d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -890,7 +890,6 @@ call_encode(struct rpc_task *task)
 			task->tk_msg.rpc_argp);
 	if (task->tk_status == -ENOMEM) {
 		/* XXX: Is this sane? */
-		rpc_delay(task, 3*HZ);
 		task->tk_status = -EAGAIN;
 	}
 }
@@ -1048,8 +1047,14 @@ call_transmit(struct rpc_task *task)
 		BUG_ON(task->tk_rqstp->rq_bytes_sent != 0);
 		call_encode(task);
 		/* Did the encode result in an error condition? */
-		if (task->tk_status != 0)
+		if (task->tk_status != 0) {
+			/* Was the error nonfatal? */
+			if (task->tk_status == -EAGAIN)
+				rpc_delay(task, HZ >> 4);
+			else
+				rpc_exit(task, task->tk_status);
 			return;
+		}
 	}
 	xprt_transmit(task);
 	if (task->tk_status < 0)
-- 
cgit v1.2.3-70-g09d2


From b390c2b55c830eb3b64633fa8d8b8837e073e458 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 10 Jun 2008 18:30:11 -0400
Subject: SUNRPC: An ENOMEM error from call_encode is always fatal

The special 'ENOMEM' case that was previously flagged as non-fatal is
bogus: auth_gss always returns EAGAIN for non-fatal errors, and may in fact
return ENOMEM in the special case where xdr_buf_read_netobj runs out of
preallocated buffer space (invariably a _fatal_ error, since there is no
provision for preallocating larger buffers).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9503b4c177d..1af4f161cda 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -888,10 +888,6 @@ call_encode(struct rpc_task *task)
 
 	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
 			task->tk_msg.rpc_argp);
-	if (task->tk_status == -ENOMEM) {
-		/* XXX: Is this sane? */
-		task->tk_status = -EAGAIN;
-	}
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 0f38b873aeaae698c3693748438547c8493165fb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 10 Jun 2008 18:31:01 -0400
Subject: SUNRPC: Use GFP_NOFS when allocating credentials

Since the credentials may be allocated during the call to rpc_new_task(),
which again may be called by a memory allocator...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c        | 10 +++++-----
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  4 ++--
 net/sunrpc/auth_gss/gss_spkm3_mech.c  |  4 ++--
 net/sunrpc/auth_gss/gss_spkm3_token.c |  2 +-
 net/sunrpc/auth_unix.c                |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index cc12d5f5d5d..bf7585b8054 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -146,7 +146,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
 	q = (const void *)((const char *)p + len);
 	if (unlikely(q > end || q < p))
 		return ERR_PTR(-EFAULT);
-	dest->data = kmemdup(p, len, GFP_KERNEL);
+	dest->data = kmemdup(p, len, GFP_NOFS);
 	if (unlikely(dest->data == NULL))
 		return ERR_PTR(-ENOMEM);
 	dest->len = len;
@@ -171,7 +171,7 @@ gss_alloc_context(void)
 {
 	struct gss_cl_ctx *ctx;
 
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
 	if (ctx != NULL) {
 		ctx->gc_proc = RPC_GSS_PROC_DATA;
 		ctx->gc_seq = 1;	/* NetApp 6.4R1 doesn't accept seq. no. 0 */
@@ -341,7 +341,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
 {
 	struct gss_upcall_msg *gss_msg;
 
-	gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
+	gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
 	if (gss_msg != NULL) {
 		INIT_LIST_HEAD(&gss_msg->list);
 		rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
@@ -503,7 +503,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (mlen > MSG_BUF_MAXSIZE)
 		goto out;
 	err = -ENOMEM;
-	buf = kmalloc(mlen, GFP_KERNEL);
+	buf = kmalloc(mlen, GFP_NOFS);
 	if (!buf)
 		goto out;
 
@@ -806,7 +806,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	dprintk("RPC:       gss_create_cred for uid %d, flavor %d\n",
 		acred->uid, auth->au_flavor);
 
-	if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
+	if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
 		goto out_err;
 
 	rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 60c3dba545d..ef45eba2248 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 	q = (const void *)((const char *)p + len);
 	if (unlikely(q > end || q < p))
 		return ERR_PTR(-EFAULT);
-	res->data = kmemdup(p, len, GFP_KERNEL);
+	res->data = kmemdup(p, len, GFP_NOFS);
 	if (unlikely(res->data == NULL))
 		return ERR_PTR(-ENOMEM);
 	res->len = len;
@@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p,
 	struct	krb5_ctx *ctx;
 	int tmp;
 
-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
+	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
 		goto out_err;
 
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 5deb4b6e451..035e1dd6af1 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 	q = (const void *)((const char *)p + len);
 	if (unlikely(q > end || q < p))
 		return ERR_PTR(-EFAULT);
-	res->data = kmemdup(p, len, GFP_KERNEL);
+	res->data = kmemdup(p, len, GFP_NOFS);
 	if (unlikely(res->data == NULL))
 		return ERR_PTR(-ENOMEM);
 	return q;
@@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
 	struct	spkm3_ctx *ctx;
 	int	version;
 
-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
+	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
 		goto out_err;
 
 	p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 6cdd241ad26..3308157436d 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
 int
 decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
 {
-	if (!(out->data = kzalloc(explen,GFP_KERNEL)))
+	if (!(out->data = kzalloc(explen,GFP_NOFS)))
 		return 0;
 	out->len = explen;
 	memcpy(out->data, in, enclen);
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 44920b90bdc..46b2647c5bd 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
 			acred->uid, acred->gid);
 
-	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+	if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
 		return ERR_PTR(-ENOMEM);
 
 	rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
-- 
cgit v1.2.3-70-g09d2


From 3748f1e447ac1dbf45f33ee7491a008a8bb5cdf0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 21 May 2008 17:09:12 -0400
Subject: SUNRPC: Add a function to display the name of an RPC procedure

Improve debugging messages in call_start() and call_verify() by having
them show the RPC procedure name instead of the procedure number.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1af4f161cda..68ea6dddcf1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -690,6 +690,21 @@ rpc_restart_call(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
+#ifdef RPC_DEBUG
+static const char *rpc_proc_name(const struct rpc_task *task)
+{
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+
+	if (proc) {
+		if (proc->p_name)
+			return proc->p_name;
+		else
+			return "NULL";
+	} else
+		return "no proc";
+}
+#endif
+
 /*
  * 0.  Initial state
  *
@@ -701,9 +716,9 @@ call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 
-	dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid,
+	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_protname, clnt->cl_vers,
-			task->tk_msg.rpc_proc->p_proc,
+			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
 	/* Increment call count */
@@ -1432,10 +1447,10 @@ call_verify(struct rpc_task *task)
 		error = -EPROTONOSUPPORT;
 		goto out_err;
 	case RPC_PROC_UNAVAIL:
-		dprintk("RPC: %5u %s: proc %p unsupported by program %u, "
+		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
 				"version %u on server %s\n",
 				task->tk_pid, __func__,
-				task->tk_msg.rpc_proc,
+				rpc_proc_name(task),
 				task->tk_client->cl_prog,
 				task->tk_client->cl_vers,
 				task->tk_client->cl_server);
-- 
cgit v1.2.3-70-g09d2


From b0e1c57ea00302c3ac541ffd37e7db07d13cd674 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 21 May 2008 17:09:19 -0400
Subject: SUNRPC: Rename "call_" functions that are no longer FSM states

The RPC client uses a finite state machine to move RPC tasks through each
step of an RPC request.  Each state is contained in a function in
net/sunrpc/clnt.c, and named call_foo.

Some of the functions named call_foo have changed over the past few years and
are no longer states in the FSM.  These include: call_encode, call_header,
and call_verify.  As a clean up, rename the functions that have changed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 68ea6dddcf1..ab8038db8ef 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -58,7 +58,6 @@ static void	call_start(struct rpc_task *task);
 static void	call_reserve(struct rpc_task *task);
 static void	call_reserveresult(struct rpc_task *task);
 static void	call_allocate(struct rpc_task *task);
-static void	call_encode(struct rpc_task *task);
 static void	call_decode(struct rpc_task *task);
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
@@ -70,9 +69,9 @@ static void	call_refreshresult(struct rpc_task *task);
 static void	call_timeout(struct rpc_task *task);
 static void	call_connect(struct rpc_task *task);
 static void	call_connect_status(struct rpc_task *task);
-static __be32 *	call_header(struct rpc_task *task);
-static __be32 *	call_verify(struct rpc_task *task);
 
+static __be32	*rpc_encode_header(struct rpc_task *task);
+static __be32	*rpc_verify_header(struct rpc_task *task);
 static int	rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
@@ -876,7 +875,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
  * 3.	Encode arguments of an RPC call
  */
 static void
-call_encode(struct rpc_task *task)
+rpc_xdr_encode(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	kxdrproc_t	encode;
@@ -891,13 +890,14 @@ call_encode(struct rpc_task *task)
 			 (char *)req->rq_buffer + req->rq_callsize,
 			 req->rq_rcvsize);
 
-	/* Encode header and provided arguments */
-	encode = task->tk_msg.rpc_proc->p_encode;
-	if (!(p = call_header(task))) {
-		printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
+	p = rpc_encode_header(task);
+	if (p == NULL) {
+		printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
 		rpc_exit(task, -EIO);
 		return;
 	}
+
+	encode = task->tk_msg.rpc_proc->p_encode;
 	if (encode == NULL)
 		return;
 
@@ -1056,7 +1056,7 @@ call_transmit(struct rpc_task *task)
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
 	if (rpc_task_need_encode(task)) {
 		BUG_ON(task->tk_rqstp->rq_bytes_sent != 0);
-		call_encode(task);
+		rpc_xdr_encode(task);
 		/* Did the encode result in an error condition? */
 		if (task->tk_status != 0) {
 			/* Was the error nonfatal? */
@@ -1240,8 +1240,7 @@ call_decode(struct rpc_task *task)
 		goto out_retry;
 	}
 
-	/* Verify the RPC header */
-	p = call_verify(task);
+	p = rpc_verify_header(task);
 	if (IS_ERR(p)) {
 		if (p == ERR_PTR(-EAGAIN))
 			goto out_retry;
@@ -1259,7 +1258,7 @@ call_decode(struct rpc_task *task)
 	return;
 out_retry:
 	task->tk_status = 0;
-	/* Note: call_verify() may have freed the RPC slot */
+	/* Note: rpc_verify_header() may have freed the RPC slot */
 	if (task->tk_rqstp == req) {
 		req->rq_received = req->rq_rcv_buf.len = 0;
 		if (task->tk_client->cl_discrtry)
@@ -1306,11 +1305,8 @@ call_refreshresult(struct rpc_task *task)
 	return;
 }
 
-/*
- * Call header serialization
- */
 static __be32 *
-call_header(struct rpc_task *task)
+rpc_encode_header(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
@@ -1330,11 +1326,8 @@ call_header(struct rpc_task *task)
 	return p;
 }
 
-/*
- * Reply header verification
- */
 static __be32 *
-call_verify(struct rpc_task *task)
+rpc_verify_header(struct rpc_task *task)
 {
 	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
 	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
@@ -1408,7 +1401,7 @@ call_verify(struct rpc_task *task)
 			task->tk_action = call_bind;
 			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
-			printk(KERN_NOTICE "call_verify: server %s requires stronger "
+			printk(KERN_NOTICE "RPC: server %s requires stronger "
 			       "authentication.\n", task->tk_client->cl_server);
 			break;
 		default:
-- 
cgit v1.2.3-70-g09d2


From 68a23ee94e3a06819f5a39d64f2e1f3131bab12d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 21 May 2008 17:09:26 -0400
Subject: SUNRPC: Don't display the rpc_show_tasks header if there are no tasks

Clean up: don't display the rpc_show_tasks column header unless there is at
least one task to display.  As far as I can tell, it is safe to let the
list_for_each_entry macro decide that each list is empty.

scripts/checkpatch.pl also wants a KERN_FOO at the start of any newly added
printk() calls, so this and subsequent patches will also add KERN_INFO.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ab8038db8ef..7dda32851ad 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1526,24 +1526,30 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
 #ifdef RPC_DEBUG
+static void rpc_show_header(void)
+{
+	printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- "
+		"-timeout -rpcwait -action- ---ops--\n");
+}
+
 void rpc_show_tasks(void)
 {
 	struct rpc_clnt *clnt;
 	struct rpc_task *t;
+	int header = 0;
 
 	spin_lock(&rpc_client_lock);
-	if (list_empty(&all_clients))
-		goto out;
-	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
-		"-rpcwait -action- ---ops--\n");
 	list_for_each_entry(clnt, &all_clients, cl_clients) {
-		if (list_empty(&clnt->cl_tasks))
-			continue;
 		spin_lock(&clnt->cl_lock);
 		list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
 			const char *rpc_waitq = "none";
 			int proc;
 
+			if (!header) {
+				rpc_show_header();
+				header++;
+			}
+
 			if (t->tk_msg.rpc_proc)
 				proc = t->tk_msg.rpc_proc->p_proc;
 			else
@@ -1563,7 +1569,6 @@ void rpc_show_tasks(void)
 		}
 		spin_unlock(&clnt->cl_lock);
 	}
-out:
 	spin_unlock(&rpc_client_lock);
 }
 #endif
-- 
cgit v1.2.3-70-g09d2


From 38e886e0c18975543938519254fc9bf0829c75a3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 21 May 2008 17:09:33 -0400
Subject: SUNRPC: Refactor rpc_show_tasks

Clean up: move the logic that displays each task to its own function.
This removes indentation and makes future changes easier.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7dda32851ad..7964a98c90e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1532,40 +1532,42 @@ static void rpc_show_header(void)
 		"-timeout -rpcwait -action- ---ops--\n");
 }
 
+static void rpc_show_task(const struct rpc_clnt *clnt,
+			  const struct rpc_task *task)
+{
+	const char *rpc_waitq = "none";
+	int proc = -1;
+
+	if (task->tk_msg.rpc_proc)
+		proc = task->tk_msg.rpc_proc->p_proc;
+
+	if (RPC_IS_QUEUED(task))
+		rpc_waitq = rpc_qname(task->tk_waitqueue);
+
+	printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
+		task->tk_pid, proc,
+		task->tk_flags, task->tk_status,
+		clnt, clnt->cl_prog,
+		task->tk_rqstp, task->tk_timeout,
+		rpc_waitq,
+		task->tk_action, task->tk_ops);
+}
+
 void rpc_show_tasks(void)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_task *t;
+	struct rpc_task *task;
 	int header = 0;
 
 	spin_lock(&rpc_client_lock);
 	list_for_each_entry(clnt, &all_clients, cl_clients) {
 		spin_lock(&clnt->cl_lock);
-		list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
-			const char *rpc_waitq = "none";
-			int proc;
-
+		list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
 			if (!header) {
 				rpc_show_header();
 				header++;
 			}
-
-			if (t->tk_msg.rpc_proc)
-				proc = t->tk_msg.rpc_proc->p_proc;
-			else
-				proc = -1;
-
-			if (RPC_IS_QUEUED(t))
-				rpc_waitq = rpc_qname(t->tk_waitqueue);
-
-			printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
-				t->tk_pid, proc,
-				t->tk_flags, t->tk_status,
-				t->tk_client,
-				(t->tk_client ? t->tk_client->cl_prog : 0),
-				t->tk_rqstp, t->tk_timeout,
-				rpc_waitq,
-				t->tk_action, t->tk_ops);
+			rpc_show_task(clnt, task);
 		}
 		spin_unlock(&clnt->cl_lock);
 	}
-- 
cgit v1.2.3-70-g09d2


From cb3997b5a0b21864368bd1bd1d0929f9304fb6d9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 21 May 2008 17:09:41 -0400
Subject: SUNRPC: Display some debugging information as text rather than
 numbers

In rpc_show_tasks(), display the program name, version number, procedure
name and tk_action as human-readable variable-length text fields rather
than columnar numbers.

Doing the symbol lookup here helps in cases where we have actual
debugging output from a kernel log, but don't have access to the kernel
image or RPC module that generated the output.

Sample output:

 -pid- flgs status -client- --rqstp- -timeout ---ops--
  5608 0001    -11 eeb42690 f6d93710        0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none
  5609 0001    -11 eeb42690 f6d937e0        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5610 0001    -11 eeb42690 f6d93230        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5611 0001    -11 eeb42690 f6d93300        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5612 0001    -11 eeb42690 f6d93090        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5613 0001    -11 eeb42690 f6d933d0        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5614 0001    -11 eeb42690 f6d93cc0        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5615 0001    -11 eeb42690 f6d93a50        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5616 0001    -11 eeb42690 f6d93640        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5617 0001    -11 eeb42690 f6d93b20        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending
  5618 0001    -11 eeb42690 f6d93160        0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7964a98c90e..0530eea37b5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -25,6 +25,7 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/kallsyms.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
@@ -1528,29 +1529,31 @@ EXPORT_SYMBOL_GPL(rpc_call_null);
 #ifdef RPC_DEBUG
 static void rpc_show_header(void)
 {
-	printk(KERN_INFO "-pid- proc flgs status -client- -prog- --rqstp- "
-		"-timeout -rpcwait -action- ---ops--\n");
+	printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
+		"-timeout ---ops--\n");
 }
 
 static void rpc_show_task(const struct rpc_clnt *clnt,
 			  const struct rpc_task *task)
 {
 	const char *rpc_waitq = "none";
-	int proc = -1;
-
-	if (task->tk_msg.rpc_proc)
-		proc = task->tk_msg.rpc_proc->p_proc;
+	char *p, action[KSYM_SYMBOL_LEN];
 
 	if (RPC_IS_QUEUED(task))
 		rpc_waitq = rpc_qname(task->tk_waitqueue);
 
-	printk(KERN_INFO "%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
-		task->tk_pid, proc,
-		task->tk_flags, task->tk_status,
-		clnt, clnt->cl_prog,
-		task->tk_rqstp, task->tk_timeout,
-		rpc_waitq,
-		task->tk_action, task->tk_ops);
+	/* map tk_action pointer to a function name; then trim off
+	 * the "+0x0 [sunrpc]" */
+	sprint_symbol(action, (unsigned long)task->tk_action);
+	p = strchr(action, '+');
+	if (p)
+		*p = '\0';
+
+	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
+		task->tk_pid, task->tk_flags, task->tk_status,
+		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
+		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
+		action, rpc_waitq);
 }
 
 void rpc_show_tasks(void)
-- 
cgit v1.2.3-70-g09d2


From cd983ef81b9d79573848dabf81277c7314220257 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 11 Jun 2008 17:56:13 -0400
Subject: SUNRPC: Remove obsolete messages during transport connect

Recent changes to the RPC client's transport connect logic make connect
status values ECONNREFUSED and ECONNRESET impossible.

Clean up xprt_connect_status() to account for these changes.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e1770f7ba0b..67996bd7fbf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
-	if (task->tk_status >= 0) {
+	if (task->tk_status == 0) {
 		xprt->stat.connect_count++;
 		xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
 		dprintk("RPC: %5u xprt_connect_status: connection established\n",
@@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task)
 	}
 
 	switch (task->tk_status) {
-	case -ECONNREFUSED:
-	case -ECONNRESET:
-		dprintk("RPC: %5u xprt_connect_status: server %s refused "
-				"connection\n", task->tk_pid,
-				task->tk_client->cl_server);
-		break;
 	case -ENOTCONN:
 		dprintk("RPC: %5u xprt_connect_status: connection broken\n",
 				task->tk_pid);
-- 
cgit v1.2.3-70-g09d2


From b6b6152c46861dd914d0e6cea9c27df057d6e235 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Mon, 9 Jun 2008 16:51:31 -0400
Subject: rpc: bring back cl_chatty

The cl_chatty flag alows us to control whether a given rpc client leaves

	"server X not responding, timed out"

messages in the syslog.  Such messages make sense for ordinary nfs
clients (where an unresponsive server means applications on the
mountpoint are probably hanging), but not for the callback client (which
can fail more commonly, with the only result just of disabling some
optimizations).

Previously cl_chatty was removed, do to lack of users; reinstate it, and
use it for the nfsd's callback client.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c      |  2 +-
 include/linux/sunrpc/clnt.h |  4 +++-
 net/sunrpc/clnt.c           | 16 +++++++++++-----
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4d4760e687c..702fa577aa6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -381,7 +381,7 @@ static int do_probe_callback(void *data)
 		.program	= &cb_program,
 		.version	= nfs_cb_version[1]->number,
 		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
-		.flags		= (RPC_CLNT_CREATE_NOPING),
+		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6fff7f82ef1..764fd4c286e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -42,7 +42,8 @@ struct rpc_clnt {
 
 	unsigned int		cl_softrtry : 1,/* soft timeouts */
 				cl_discrtry : 1,/* disconnect before retry */
-				cl_autobind : 1;/* use getport() */
+				cl_autobind : 1,/* use getport() */
+				cl_chatty   : 1;/* be verbose */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */
@@ -114,6 +115,7 @@ struct rpc_create_args {
 #define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 3)
 #define RPC_CLNT_CREATE_NOPING		(1UL << 4)
 #define RPC_CLNT_CREATE_DISCRTRY	(1UL << 5)
+#define RPC_CLNT_CREATE_QUIET		(1UL << 6)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0530eea37b5..09631f6e30e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		clnt->cl_autobind = 1;
 	if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
 		clnt->cl_discrtry = 1;
+	if (!(args->flags & RPC_CLNT_CREATE_QUIET))
+		clnt->cl_chatty = 1;
 
 	return clnt;
 }
@@ -1149,7 +1151,8 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	default:
-		printk("%s: RPC call returned error %d\n",
+		if (clnt->cl_chatty)
+			printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
 	}
@@ -1174,7 +1177,8 @@ call_timeout(struct rpc_task *task)
 	task->tk_timeouts++;
 
 	if (RPC_IS_SOFT(task)) {
-		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+		if (clnt->cl_chatty)
+			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
@@ -1182,7 +1186,8 @@ call_timeout(struct rpc_task *task)
 
 	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
-		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
+		if (clnt->cl_chatty)
+			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
 	}
 	rpc_force_rebind(clnt);
@@ -1213,8 +1218,9 @@ call_decode(struct rpc_task *task)
 			task->tk_pid, task->tk_status);
 
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
-		printk(KERN_NOTICE "%s: server %s OK\n",
-			clnt->cl_protname, clnt->cl_server);
+		if (clnt->cl_chatty)
+			printk(KERN_NOTICE "%s: server %s OK\n",
+				clnt->cl_protname, clnt->cl_server);
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 720b8f2d6f7de9e16f1217448cc7396e1604e175 Mon Sep 17 00:00:00 2001
From: "\\\\\\\"J. Bruce Fields\\\\\\" <bfields@citi.umich.edu>
Date: Mon, 9 Jun 2008 16:51:33 -0400
Subject: rpc: eliminate unused variable in auth_gss upcall code

Also, a minor comment grammar fix in the same file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bf7585b8054..ebfd630541f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -272,7 +272,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
 	return NULL;
 }
 
-/* Try to add a upcall to the pipefs queue.
+/* Try to add an upcall to the pipefs queue.
  * If an upcall owned by our uid already exists, then we return a reference
  * to that upcall instead of adding the new upcall.
  */
@@ -493,7 +493,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
 	const void *p, *end;
 	void *buf;
-	struct rpc_clnt *clnt;
 	struct gss_upcall_msg *gss_msg;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct gss_cl_ctx *ctx;
@@ -507,7 +506,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (!buf)
 		goto out;
 
-	clnt = RPC_I(inode)->private;
 	err = -EFAULT;
 	if (copy_from_user(buf, src, mlen))
 		goto err;
-- 
cgit v1.2.3-70-g09d2


From d25a03cf966f2cf9990dc0bf2a921a554919ea34 Mon Sep 17 00:00:00 2001
From: "\\\\\\\"J. Bruce Fields\\\\\\" <bfields@citi.umich.edu>
Date: Mon, 9 Jun 2008 16:51:34 -0400
Subject: rpc: remove some unused macros

There used to be a print_hexl() function that used isprint(), now gone.
I don't know why NFS_NGROUPS and CA_RUN_AS_MACHINE were here.

I also don't know why another #define that's actually used was marked
"unused".

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ebfd630541f..834a83199bd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -63,22 +63,11 @@ static const struct rpc_credops gss_nullops;
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
-#define NFS_NGROUPS	16
-
-#define GSS_CRED_SLACK		1024		/* XXX: unused */
+#define GSS_CRED_SLACK		1024
 /* length of a krb5 verifier (48), plus data added before arguments when
  * using integrity (two 4-byte integers): */
 #define GSS_VERF_SLACK		100
 
-/* XXX this define must match the gssd define
-* as it is passed to gssd to signal the use of
-* machine creds should be part of the shared rpc interface */
-
-#define CA_RUN_AS_MACHINE  0x00000200
-
-/* dump the buffer in `emacs-hexl' style */
-#define isprint(c)      ((c > 0x1f) && (c < 0x7f))
-
 struct gss_auth {
 	struct kref kref;
 	struct rpc_auth rpc_auth;
-- 
cgit v1.2.3-70-g09d2


From a486aeda9b2b0d944aecce7871b3186379b898de Mon Sep 17 00:00:00 2001
From: "\\\\\\\"J. Bruce Fields\\\\\\" <bfields@citi.umich.edu>
Date: Mon, 9 Jun 2008 16:51:35 -0400
Subject: rpc: minor cleanup of scheduler callback code

Try to make the comment here a little more clear and concise.

Also, this macro definition seems unnecessary.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 -
 net/sunrpc/sched.c           | 14 +++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d1a5c8c1a0f..64981a2f1ca 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -135,7 +135,6 @@ struct rpc_task_setup {
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_DO_CALLBACK(t)	((t)->tk_callback != NULL)
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 
 #define RPC_TASK_RUNNING	0
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6eab9bf94ba..6288af05c20 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -626,19 +626,15 @@ static void __rpc_execute(struct rpc_task *task)
 		/*
 		 * Execute any pending callback.
 		 */
-		if (RPC_DO_CALLBACK(task)) {
-			/* Define a callback save pointer */
+		if (task->tk_callback) {
 			void (*save_callback)(struct rpc_task *);
 
 			/*
-			 * If a callback exists, save it, reset it,
-			 * call it.
-			 * The save is needed to stop from resetting
-			 * another callback set within the callback handler
-			 * - Dave
+			 * We set tk_callback to NULL before calling it,
+			 * in case it sets the tk_callback field itself:
 			 */
-			save_callback=task->tk_callback;
-			task->tk_callback=NULL;
+			save_callback = task->tk_callback;
+			task->tk_callback = NULL;
 			save_callback(task);
 		}
 
-- 
cgit v1.2.3-70-g09d2


From b22602a673b1743bba4b62bb404ffd3b269d2f09 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 6 Jun 2008 13:22:25 -0400
Subject: SUNRPC: Ensure all transports set rq_xtime consistently

The RPC client uses the rq_xtime field in each RPC request to determine the
round-trip time of the request.  Currently, the rq_xtime field is
initialized by each transport just before it starts enqueing a request to
be sent.  However, transports do not handle initializing this value
consistently; sometimes they don't initialize it at all.

To make the measurement of request round-trip time consistent for all
RPC client transport capabilities, pull rq_xtime initialization into the
RPC client's generic transport logic.  Now all transports will get a
standardized RTT measure automatically, from:

  xprt_transmit()

to

  xprt_complete_rqst()

This makes round-trip time calculation more accurate for the TCP transport.
The socket ->sendmsg() method can return "-EAGAIN" if the socket's output
buffer is full, so the TCP transport's ->send_request() method may call
the ->sendmsg() method repeatedly until it gets all of the request's bytes
queued in the socket's buffer.

Currently, the TCP transport sets the rq_xtime field every time through
that loop so the final value is the timestamp just before the *last* call
to the underlying socket's ->sendmsg() method.  After this patch, the
rq_xtime field contains a timestamp that reflects the time just before the
*first* call to ->sendmsg().

This is consequential under heavy workloads because large requests often
take multiple ->sendmsg() calls to get all the bytes of a request queued.
The TCP transport causes the request to sleep until the remote end of the
socket has received enough bytes to clear space in the socket's local
output buffer.  This delay can be quite significant.

The method introduced by this patch is a more accurate measure of RTT
for stream transports, since the server can cause enough back pressure
to delay (ie increase the latency of) requests from the client.

Additionally, this patch corrects the behavior of the RDMA transport, which
entirely neglected to initialize the rq_xtime field.  RPC performance
metrics for RDMA transports now display correct RPC request round trip
times.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: Tom Talpey <thomas.talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c     | 1 +
 net/sunrpc/xprtsock.c | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 67996bd7fbf..99a52aabe33 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -872,6 +872,7 @@ void xprt_transmit(struct rpc_task *task)
 		return;
 
 	req->rq_connect_cookie = xprt->connect_cookie;
+	req->rq_xtime = jiffies;
 	status = xprt->ops->send_request(task);
 	if (status == 0) {
 		dprintk("RPC: %5u xmit complete\n", task->tk_pid);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ddbe981ab51..4486c59c3ac 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task)
 				req->rq_svec->iov_base,
 				req->rq_svec->iov_len);
 
-	req->rq_xtime = jiffies;
 	status = xs_sendpages(transport->sock,
 			      xs_addr(xprt),
 			      xprt->addrlen, xdr,
@@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called sendmsg(). */
 	while (1) {
-		req->rq_xtime = jiffies;
 		status = xs_sendpages(transport->sock,
 					NULL, 0, xdr, req->rq_bytes_sent);
 
-- 
cgit v1.2.3-70-g09d2


From 877fcf103982e52a59a1035378b4c0b8c63fe004 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 25 Jun 2008 17:24:23 -0400
Subject: SUNRPC: More useful debugging output for rpcb client

Clean up dprintk's in rpcb client's XDR decoder functions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index e6fb21b19b8..cf2b91613ac 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -438,7 +438,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
 			       unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
-	dprintk("RPC:      rpcb_decode_getport result %u\n",
+	dprintk("RPC:       rpcb_decode_getport result %u\n",
 			*portp);
 	return 0;
 }
@@ -447,8 +447,8 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 			   unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
-	dprintk("RPC:      rpcb_decode_set result %u\n",
-			*boolp);
+	dprintk("RPC:       rpcb_decode_set: call %s\n",
+			(*boolp ? "succeeded" : "failed"));
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From fc200e794d723bc88c39859e8f096b717532f9c9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 25 Jun 2008 17:24:31 -0400
Subject: SUNRPC: Document some naked integers in rpcbind client

Clean up: Replace naked integers that represent rpcbind protocol versions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 49 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cf2b91613ac..b23a719aca3 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -32,6 +32,10 @@
 #define RPCBIND_PROGRAM		(100000u)
 #define RPCBIND_PORT		(111u)
 
+#define RPCBVERS_2		(2u)
+#define RPCBVERS_3		(3u)
+#define RPCBVERS_4		(4u)
+
 enum {
 	RPCBPROC_NULL,
 	RPCBPROC_SET,
@@ -82,7 +86,7 @@ static struct rpc_procinfo rpcb_procedures2[];
 static struct rpc_procinfo rpcb_procedures3[];
 
 struct rpcb_info {
-	int			rpc_vers;
+	u32			rpc_vers;
 	struct rpc_procinfo *	rpc_proc;
 };
 
@@ -177,7 +181,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 			prog, vers, prot, port);
 
 	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-				sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
+				sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -227,7 +231,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 		__func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-				sizeof(*sin), prot, 2, 0);
+				sizeof(*sin), prot, RPCBVERS_2, 0);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -588,35 +592,54 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 
 static struct rpcb_info rpcb_next_version[] = {
 #ifdef CONFIG_SUNRPC_BIND34
-	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
-	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+	{
+		.rpc_vers	= RPCBVERS_4,
+		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETVERSADDR],
+	},
+	{
+		.rpc_vers	= RPCBVERS_3,
+		.rpc_proc	= &rpcb_procedures3[RPCBPROC_GETADDR],
+	},
 #endif
-	{ 2, &rpcb_procedures2[RPCBPROC_GETPORT] },
-	{ 0, NULL },
+	{
+		.rpc_vers	= RPCBVERS_2,
+		.rpc_proc	= &rpcb_procedures2[RPCBPROC_GETPORT],
+	},
+	{
+		.rpc_proc	= NULL,
+	},
 };
 
 static struct rpcb_info rpcb_next_version6[] = {
 #ifdef CONFIG_SUNRPC_BIND34
-	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
-	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+	{
+		.rpc_vers	= RPCBVERS_4,
+		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETVERSADDR],
+	},
+	{
+		.rpc_vers	= RPCBVERS_3,
+		.rpc_proc	= &rpcb_procedures3[RPCBPROC_GETADDR],
+	},
 #endif
-	{ 0, NULL },
+	{
+		.rpc_proc	= NULL,
+	},
 };
 
 static struct rpc_version rpcb_version2 = {
-	.number		= 2,
+	.number		= RPCBVERS_2,
 	.nrprocs	= RPCB_HIGHPROC_2,
 	.procs		= rpcb_procedures2
 };
 
 static struct rpc_version rpcb_version3 = {
-	.number		= 3,
+	.number		= RPCBVERS_3,
 	.nrprocs	= RPCB_HIGHPROC_3,
 	.procs		= rpcb_procedures3
 };
 
 static struct rpc_version rpcb_version4 = {
-	.number		= 4,
+	.number		= RPCBVERS_4,
 	.nrprocs	= RPCB_HIGHPROC_4,
 	.procs		= rpcb_procedures4
 };
-- 
cgit v1.2.3-70-g09d2


From 6a774051573042cdeb57e81f77b36c25e5856739 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 25 Jun 2008 17:24:39 -0400
Subject: SUNRPC: Use rpcbind version 2 GETPORT

Clean up: Change the version 2 procedure name to GETPORT.  It's the same
procedure number as GETADDR, but version 2 implementations usually refer
to it as GETPORT.

This also now matches the procedure name used in the version 2 procedure
entry in the rpcb_next_version[] array, making it slightly less confusing.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index b23a719aca3..a70cc1e1179 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -575,7 +575,7 @@ out_err:
 static struct rpc_procinfo rpcb_procedures2[] = {
 	PROC(SET,		mapping,	set),
 	PROC(UNSET,		mapping,	set),
-	PROC(GETADDR,		mapping,	getport),
+	PROC(GETPORT,		mapping,	getport),
 };
 
 static struct rpc_procinfo rpcb_procedures3[] = {
-- 
cgit v1.2.3-70-g09d2


From 8842413aa4c3220ce9313791f99808fc149ca16d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 25 Jun 2008 17:24:47 -0400
Subject: SUNRPC: Use GETADDR for rpcbind version 4 queries

Some rpcbind servers that do support rpcbind version 4 do not support
the GETVERSADDR procedure.  Use GETADDR for querying rpcbind servers
via rpcbind version 4 instead of GETVERSADDR.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a70cc1e1179..625ba72e624 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -587,6 +587,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	PROC(SET,		mapping,	set),
 	PROC(UNSET,		mapping,	set),
+	PROC(GETADDR,		getaddr,	getaddr),
 	PROC(GETVERSADDR,	getaddr,	getaddr),
 };
 
@@ -594,7 +595,7 @@ static struct rpcb_info rpcb_next_version[] = {
 #ifdef CONFIG_SUNRPC_BIND34
 	{
 		.rpc_vers	= RPCBVERS_4,
-		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETVERSADDR],
+		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETADDR],
 	},
 	{
 		.rpc_vers	= RPCBVERS_3,
@@ -614,7 +615,7 @@ static struct rpcb_info rpcb_next_version6[] = {
 #ifdef CONFIG_SUNRPC_BIND34
 	{
 		.rpc_vers	= RPCBVERS_4,
-		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETVERSADDR],
+		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETADDR],
 	},
 	{
 		.rpc_vers	= RPCBVERS_3,
-- 
cgit v1.2.3-70-g09d2


From 40fef8a649e5344bfb6a67a7cc3def3e0dad6448 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 25 Jun 2008 17:24:54 -0400
Subject: SUNRPC: Use only rpcbind v2 for AF_INET requests

Some server vendors support the higher versions of rpcbind only for
AF_INET6.  The kernel doesn't need to use v3 or v4 for AF_INET anyway,
so change the kernel's rpcbind client to query AF_INET servers over
rpcbind v2 only.

This has a few interesting benefits:

1. If the rpcbind request is going over TCP, and the server doesn't
   support rpcbind versions 3 or 4, the client reduces by two the number
   of ephemeral ports left in TIME_WAIT for each rpcbind request.  This
   will help during NFS mount storms.

2. The rpcbind interaction with servers that don't support rpcbind
   versions 3 or 4 will use less network traffic.  Also helpful
   during mount storms.

3. We can eliminate the kernel build option that controls whether the
   kernel's rpcbind client uses rpcbind version 3 and 4 for AF_INET
   servers.  Less complicated kernel configuration...

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig             | 21 ---------------------
 net/sunrpc/rpcb_clnt.c | 12 ------------
 2 files changed, 33 deletions(-)

(limited to 'net')

diff --git a/fs/Kconfig b/fs/Kconfig
index 1c16de9611e..0ce72dcd6b9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1799,27 +1799,6 @@ config SUNRPC_XPRT_RDMA
 
 	  If unsure, say N.
 
-config SUNRPC_BIND34
-	bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
-	depends on SUNRPC && EXPERIMENTAL
-	default n
-	help
-	  RPC requests over IPv6 networks require support for larger
-	  addresses when performing an RPC bind.  Sun added support for
-	  IPv6 addressing by creating two new versions of the rpcbind
-	  protocol (RFC 1833).
-
-	  This option enables support in the kernel RPC client for
-	  querying rpcbind servers via versions 3 and 4 of the rpcbind
-	  protocol.  The kernel automatically falls back to version 2
-	  if a remote rpcbind service does not support versions 3 or 4.
-	  By themselves, these new versions do not provide support for
-	  RPC over IPv6, but the new protocol versions are necessary to
-	  support it.
-
-	  If unsure, say N to get traditional behavior (version 2 rpcbind
-	  requests only).
-
 config RPCSEC_GSS_KRB5
 	tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
 	depends on SUNRPC && EXPERIMENTAL
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 625ba72e624..c62e446723a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -592,16 +592,6 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 };
 
 static struct rpcb_info rpcb_next_version[] = {
-#ifdef CONFIG_SUNRPC_BIND34
-	{
-		.rpc_vers	= RPCBVERS_4,
-		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETADDR],
-	},
-	{
-		.rpc_vers	= RPCBVERS_3,
-		.rpc_proc	= &rpcb_procedures3[RPCBPROC_GETADDR],
-	},
-#endif
 	{
 		.rpc_vers	= RPCBVERS_2,
 		.rpc_proc	= &rpcb_procedures2[RPCBPROC_GETPORT],
@@ -612,7 +602,6 @@ static struct rpcb_info rpcb_next_version[] = {
 };
 
 static struct rpcb_info rpcb_next_version6[] = {
-#ifdef CONFIG_SUNRPC_BIND34
 	{
 		.rpc_vers	= RPCBVERS_4,
 		.rpc_proc	= &rpcb_procedures4[RPCBPROC_GETADDR],
@@ -621,7 +610,6 @@ static struct rpcb_info rpcb_next_version6[] = {
 		.rpc_vers	= RPCBVERS_3,
 		.rpc_proc	= &rpcb_procedures3[RPCBPROC_GETADDR],
 	},
-#endif
 	{
 		.rpc_proc	= NULL,
 	},
-- 
cgit v1.2.3-70-g09d2


From 381ba74af55e58bca4c01553835a360a9f6fbb07 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Jul 2008 12:18:53 -0400
Subject: SUNRPC: Ensure our task is notified when an rpcbind call is done

If another task is busy in rpcb_getport_async number, it is more efficient
to have it wake us up when it has finished instead of arbitrarily sleeping
for 5 seconds.

Also ensure that rpcb_wake_rpcbind_waiters() is called regardless of
whether or not rpcb_getport_done() gets called.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c      |  8 +++-----
 net/sunrpc/rpcb_clnt.c | 38 ++++++++++++++++++++------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 09631f6e30e..76739e928d0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -942,11 +942,9 @@ call_bind_status(struct rpc_task *task)
 	}
 
 	switch (task->tk_status) {
-	case -EAGAIN:
-		dprintk("RPC: %5u rpcbind waiting for another request "
-				"to finish\n", task->tk_pid);
-		/* avoid busy-waiting here -- could be a network outage. */
-		rpc_delay(task, 5*HZ);
+	case -ENOMEM:
+		dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
+		rpc_delay(task, HZ >> 2);
 		goto retry_timeout;
 	case -EACCES:
 		dprintk("RPC: %5u remote rpcbind: RPC program/version "
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c62e446723a..24e93e0a0a2 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -68,6 +68,7 @@ enum {
 #define RPCB_MAXOWNERLEN	sizeof(RPCB_OWNER_STRING)
 
 static void			rpcb_getport_done(struct rpc_task *, void *);
+static void			rpcb_map_release(void *data);
 static struct rpc_program	rpcb_program;
 
 struct rpcbind_args {
@@ -80,6 +81,8 @@ struct rpcbind_args {
 	const char *		r_netid;
 	const char *		r_addr;
 	const char *		r_owner;
+
+	int			r_status;
 };
 
 static struct rpc_procinfo rpcb_procedures2[];
@@ -93,14 +96,6 @@ struct rpcb_info {
 static struct rpcb_info rpcb_next_version[];
 static struct rpcb_info rpcb_next_version6[];
 
-static void rpcb_map_release(void *data)
-{
-	struct rpcbind_args *map = data;
-
-	xprt_put(map->r_xprt);
-	kfree(map);
-}
-
 static const struct rpc_call_ops rpcb_getport_ops = {
 	.rpc_call_done		= rpcb_getport_done,
 	.rpc_release		= rpcb_map_release,
@@ -112,6 +107,15 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
 	rpc_wake_up_status(&xprt->binding, status);
 }
 
+static void rpcb_map_release(void *data)
+{
+	struct rpcbind_args *map = data;
+
+	rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status);
+	xprt_put(map->r_xprt);
+	kfree(map);
+}
+
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 				    size_t salen, int proto, u32 version,
 				    int privileged)
@@ -293,17 +297,16 @@ void rpcb_getport_async(struct rpc_task *task)
 	/* Autobind on cloned rpc clients is discouraged */
 	BUG_ON(clnt->cl_parent != clnt);
 
+	/* Put self on the wait queue to ensure we get notified if
+	 * some other task is already attempting to bind the port */
+	rpc_sleep_on(&xprt->binding, task, NULL);
+
 	if (xprt_test_and_set_binding(xprt)) {
-		status = -EAGAIN;	/* tell caller to check again */
 		dprintk("RPC: %5u %s: waiting for another binder\n",
 			task->tk_pid, __func__);
-		goto bailout_nowake;
+		return;
 	}
 
-	/* Put self on queue before sending rpcbind request, in case
-	 * rpcb_getport_done completes before we return from rpc_run_task */
-	rpc_sleep_on(&xprt->binding, task, NULL);
-
 	/* Someone else may have bound if we slept */
 	if (xprt_bound(xprt)) {
 		status = 0;
@@ -365,15 +368,15 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
 	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
+	map->r_status = -EIO;
 
 	child = rpcb_call_async(rpcb_clnt, map, proc);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
-		status = -EIO;
 		/* rpcb_map_release() has freed the arguments */
 		dprintk("RPC: %5u %s: rpc_run_task failed\n",
 			task->tk_pid, __func__);
-		goto bailout_nofree;
+		return;
 	}
 	rpc_put_task(child);
 
@@ -382,7 +385,6 @@ void rpcb_getport_async(struct rpc_task *task)
 
 bailout_nofree:
 	rpcb_wake_rpcbind_waiters(xprt, status);
-bailout_nowake:
 	task->tk_status = status;
 }
 EXPORT_SYMBOL_GPL(rpcb_getport_async);
@@ -421,7 +423,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 	dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
 			child->tk_pid, status, map->r_port);
 
-	rpcb_wake_rpcbind_waiters(xprt, status);
+	map->r_status = status;
 }
 
 static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
-- 
cgit v1.2.3-70-g09d2


From adeed48090fc370afa0db8d007748ee72a40b578 Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mattias.nissler@gmx.de>
Date: Mon, 7 Jul 2008 23:08:19 +0200
Subject: rc80211_pid: Fix fast_start parameter handling

This removes the fast_start parameter from the rc_pid parameters
information and instead uses the parameter macro when initializing
the rc_pid state. Since the parameter is only used on initialization,
there is no point of making exporting it via debugfs. This also fixes
uninitialized memory references to the fast_start and norm_offset
parameters detected by the kmemcheck utility.  Thanks to Vegard Nossum
for reporting the bug.

Signed-off-by: Mattias Nissler <mattias.nissler@gmx.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_pid.h      |  5 -----
 net/mac80211/rc80211_pid_algo.c | 31 +++++++++++++------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 04afc13ed82..4ea7b97d1af 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -141,7 +141,6 @@ struct rc_pid_events_file_info {
  *	rate behaviour values (lower means we should trust more what we learnt
  *	about behaviour of rates, higher means we should trust more the natural
  *	ordering of rates)
- * @fast_start: if Y, push high rates right after initialization
  */
 struct rc_pid_debugfs_entries {
 	struct dentry *dir;
@@ -154,7 +153,6 @@ struct rc_pid_debugfs_entries {
 	struct dentry *sharpen_factor;
 	struct dentry *sharpen_duration;
 	struct dentry *norm_offset;
-	struct dentry *fast_start;
 };
 
 void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
@@ -267,9 +265,6 @@ struct rc_pid_info {
 	/* Normalization offset. */
 	unsigned int norm_offset;
 
-	/* Fast starst parameter. */
-	unsigned int fast_start;
-
 	/* Rates information. */
 	struct rc_pid_rateinfo *rinfo;
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index a849b745bdb..bcd27c1d759 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 		return NULL;
 	}
 
+	pinfo->target = RC_PID_TARGET_PF;
+	pinfo->sampling_period = RC_PID_INTERVAL;
+	pinfo->coeff_p = RC_PID_COEFF_P;
+	pinfo->coeff_i = RC_PID_COEFF_I;
+	pinfo->coeff_d = RC_PID_COEFF_D;
+	pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
+	pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
+	pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
+	pinfo->norm_offset = RC_PID_NORM_OFFSET;
+	pinfo->rinfo = rinfo;
+	pinfo->oldrate = 0;
+
 	/* Sort the rates. This is optimized for the most common case (i.e.
 	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
 	 * mapping too. */
 	for (i = 0; i < sband->n_bitrates; i++) {
 		rinfo[i].index = i;
 		rinfo[i].rev_index = i;
-		if (pinfo->fast_start)
+		if (RC_PID_FAST_START)
 			rinfo[i].diff = 0;
 		else
 			rinfo[i].diff = i * pinfo->norm_offset;
@@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 			break;
 	}
 
-	pinfo->target = RC_PID_TARGET_PF;
-	pinfo->sampling_period = RC_PID_INTERVAL;
-	pinfo->coeff_p = RC_PID_COEFF_P;
-	pinfo->coeff_i = RC_PID_COEFF_I;
-	pinfo->coeff_d = RC_PID_COEFF_D;
-	pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
-	pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
-	pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
-	pinfo->norm_offset = RC_PID_NORM_OFFSET;
-	pinfo->fast_start = RC_PID_FAST_START;
-	pinfo->rinfo = rinfo;
-	pinfo->oldrate = 0;
-
 #ifdef CONFIG_MAC80211_DEBUGFS
 	de = &pinfo->dentries;
 	de->dir = debugfs_create_dir("rc80211_pid",
@@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 	de->norm_offset = debugfs_create_u32("norm_offset",
 					     S_IRUSR | S_IWUSR, de->dir,
 					     &pinfo->norm_offset);
-	de->fast_start = debugfs_create_bool("fast_start",
-					     S_IRUSR | S_IWUSR, de->dir,
-					     &pinfo->fast_start);
 #endif
 
 	return pinfo;
@@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv)
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct rc_pid_debugfs_entries *de = &pinfo->dentries;
 
-	debugfs_remove(de->fast_start);
 	debugfs_remove(de->norm_offset);
 	debugfs_remove(de->sharpen_duration);
 	debugfs_remove(de->sharpen_factor);
-- 
cgit v1.2.3-70-g09d2


From 6b69fe0c73c0f5a8dacf8f889db3cc9adee53649 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 9 Jul 2008 15:06:12 -0700
Subject: netfilter: nf_conntrack_tcp: fix endless loop

When a conntrack entry is destroyed in process context and destruction
is interrupted by packet processing and the packet is an attempt to
reopen a closed connection, TCP conntrack tries to kill the old entry
itself and returns NF_REPEAT to pass the packet through the hook
again. This may lead to an endless loop: TCP conntrack repeatedly
finds the old entry, but can not kill it itself since destruction
is already in progress, but destruction in process context can not
complete since TCP conntrack is keeping the CPU busy.

Drop the packet in TCP conntrack if we can't kill the connection
ourselves to avoid this.

Reported by: hemao77@gmail.com [ Kernel bugzilla #11058 ]
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 271cd01d57a..dd28fb239a6 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -844,9 +844,15 @@ static int tcp_packet(struct nf_conn *ct,
 			/* Attempt to reopen a closed/aborted connection.
 			 * Delete this connection and look up again. */
 			write_unlock_bh(&tcp_lock);
-			if (del_timer(&ct->timeout))
+			/* Only repeat if we can actually remove the timer.
+			 * Destruction may already be in progress in process
+			 * context and we must give it a chance to terminate.
+			 */
+			if (del_timer(&ct->timeout)) {
 				ct->timeout.function((unsigned long)ct);
-			return -NF_REPEAT;
+				return -NF_REPEAT;
+			}
+			return -NF_DROP;
 		}
 		/* Fall through */
 	case TCP_CONNTRACK_IGNORE:
-- 
cgit v1.2.3-70-g09d2


From 252815b0cfe711001eff0327872209986b36d490 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 9 Jul 2008 15:06:45 -0700
Subject: netfilter: nf_nat_snmp_basic: fix a range check in NAT for SNMP

Fix a range check in netfilter IP NAT for SNMP to always use a big enough size
variable that the compiler won't moan about comparing it to ULONG_MAX/8 on a
64-bit platform.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7750c97fde7..ffeaffc3fff 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
 				     unsigned int *len)
 {
 	unsigned long subid;
-	unsigned int  size;
 	unsigned long *optr;
+	size_t size;
 
 	size = eoc - ctx->pointer + 1;
 
-- 
cgit v1.2.3-70-g09d2


From 3d8ea1fd7001f39b5cc0ad2ff51696292ea3cfbf Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Thu, 10 Jul 2008 16:51:32 -0700
Subject: tcp: correct kcalloc usage

kcalloc is supposed to be called with the count as its first argument and
the element size as the second.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 5ff0ce6e9d3..7ddc30f0744 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -224,7 +224,7 @@ static __init int tcpprobe_init(void)
 	if (bufsize < 0)
 		return -EINVAL;
 
-	tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL);
+	tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
 	if (!tcp_probe.log)
 		goto err0;
 
-- 
cgit v1.2.3-70-g09d2


From 2e655571c618434c24ac2ca989374fdd84470d6d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 10 Jul 2008 16:52:52 -0700
Subject: ipv4: fib_trie: Fix lookup error return

In commit a07f5f508a4d9728c8e57d7f66294bf5b254ff7f "[IPV4] fib_trie: style
cleanup", the changes to check_leaf() and fn_trie_lookup() were wrong - where
fn_trie_lookup() would previously return a negative error value from
check_leaf(), it now returns 0.

Now fn_trie_lookup() doesn't appear to care about plen, so we can revert
check_leaf() to returning the error value.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Tested-by: William Boughton <bill@boughton.de>
Acked-by: Stephen Heminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4b02d14e7ab..e1600ad8fb0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1359,17 +1359,17 @@ static int check_leaf(struct trie *t, struct leaf *l,
 			t->stats.semantic_match_miss++;
 #endif
 		if (err <= 0)
-			return plen;
+			return err;
 	}
 
-	return -1;
+	return 1;
 }
 
 static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
 			  struct fib_result *res)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
-	int plen, ret = 0;
+	int ret;
 	struct node *n;
 	struct tnode *pn;
 	int pos, bits;
@@ -1393,10 +1393,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		plen = check_leaf(t, (struct leaf *)n, key, flp, res);
-		if (plen < 0)
-			goto failed;
-		ret = 0;
+		ret = check_leaf(t, (struct leaf *)n, key, flp, res);
 		goto found;
 	}
 
@@ -1421,11 +1418,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
 		}
 
 		if (IS_LEAF(n)) {
-			plen = check_leaf(t, (struct leaf *)n, key, flp, res);
-			if (plen < 0)
+			ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+			if (ret > 0)
 				goto backtrace;
-
-			ret = 0;
 			goto found;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From fe785bee05f08d37b34b7399d003b74199274ce4 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 10 Jul 2008 16:53:39 -0700
Subject: netlabel: netlink_unicast calls kfree_skb on error path by itself

So, no need to kfree_skb here on the error path. In this case we can
simply return.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_cipso_v4.c  |  7 +------
 net/netlabel/netlabel_mgmt.c      | 12 ++----------
 net/netlabel/netlabel_unlabeled.c |  6 +-----
 3 files changed, 4 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index fdc14a0d21a..9080c61b71a 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -584,12 +584,7 @@ list_start:
 	rcu_read_unlock();
 
 	genlmsg_end(ans_skb, data);
-
-	ret_val = genlmsg_reply(ans_skb, info);
-	if (ret_val != 0)
-		goto list_failure;
-
-	return 0;
+	return genlmsg_reply(ans_skb, info);
 
 list_retry:
 	/* XXX - this limit is a guesstimate */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 22c19126780..44be5d5261f 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 	rcu_read_unlock();
 
 	genlmsg_end(ans_skb, data);
-
-	ret_val = genlmsg_reply(ans_skb, info);
-	if (ret_val != 0)
-		goto listdef_failure;
-	return 0;
+	return genlmsg_reply(ans_skb, info);
 
 listdef_failure_lock:
 	rcu_read_unlock();
@@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
 		goto version_failure;
 
 	genlmsg_end(ans_skb, data);
-
-	ret_val = genlmsg_reply(ans_skb, info);
-	if (ret_val != 0)
-		goto version_failure;
-	return 0;
+	return genlmsg_reply(ans_skb, info);
 
 version_failure:
 	kfree_skb(ans_skb);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 52b2611a6eb..56f80872924 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
 		goto list_failure;
 
 	genlmsg_end(ans_skb, data);
-
-	ret_val = genlmsg_reply(ans_skb, info);
-	if (ret_val != 0)
-		goto list_failure;
-	return 0;
+	return genlmsg_reply(ans_skb, info);
 
 list_failure:
 	kfree_skb(ans_skb);
-- 
cgit v1.2.3-70-g09d2


From 0ce28553cc018be5022f51e67c87997f7271534e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@parallels.com>
Date: Thu, 10 Jul 2008 16:54:50 -0700
Subject: ipv6: missed namespace context in ipv6_rthdr_rcv

Signed-off-by: Denis V. Lunev <den@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cd1c993d52..dcf94fdfb86 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -445,7 +445,7 @@ looped_back:
 			kfree_skb(skb);
 			return -1;
 		}
-		if (!ipv6_chk_home_addr(&init_net, addr)) {
+		if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
-- 
cgit v1.2.3-70-g09d2


From ccf9b3b83d0e56fbf20c00a08b15031ce13204a7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 10 Jul 2008 16:55:37 -0700
Subject: xfrm: Add a XFRM_STATE_AF_UNSPEC flag to xfrm_usersa_info

Add a XFRM_STATE_AF_UNSPEC flag to handle the AF_UNSPEC behavior for
the selector family. Userspace applications can set this flag to leave
the selector family of the xfrm_state unspecified.  This can be used
to to handle inter family tunnels if the selector is not set from
userspace.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 1 +
 net/xfrm/xfrm_user.c | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 2ca6bae8872..fb0c215a305 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -339,6 +339,7 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_NOPMTUDISC	4
 #define XFRM_STATE_WILDRECV	8
 #define XFRM_STATE_ICMP		16
+#define XFRM_STATE_AF_UNSPEC	32
 };
 
 struct xfrm_usersa_id {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b976d9ed10e..04c41504f84 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -277,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
 	memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
 	x->props.flags = p->flags;
 
-	if (!x->sel.family)
+	if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC))
 		x->sel.family = p->family;
-
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 2013c7e35aeba39777f9b3eef8a70207b3931152 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 13 Jul 2008 11:51:40 +0100
Subject: dccp ccid-3: Fix error in loss detection

The TFRC loss detection code used the wrong loss condition (RFC 4340, 7.7.1):
 * the difference between sequence numbers s1 and s2 instead of
 * the number of packets missing between s1 and s2 (one less than the distance).

Since this condition appears in many places of the code, it has been put into a
separate function, dccp_loss_free().

Further changes:
----------------
 * tidied up incorrect typing (it was using `int' for u64/s64 types);
 * optimised conditional statements for common case of non-reordered packets;
 * rewrote comments/documentation to match the changes.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/packet_history.c | 73 ++++++++++++++-----------------------
 net/dccp/dccp.h                     | 15 ++++++++
 2 files changed, 43 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 20af1a69342..8b5c41ec7ee 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -215,22 +215,19 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
 	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
 	    s2 = DCCP_SKB_CB(skb)->dccpd_seq;
-	int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
-	   d12 = dccp_delta_seqno(s1, s2), d2;
 
-	if (d12 > 0) {			/* S1  <  S2 */
+	if (likely(dccp_delta_seqno(s1, s2) > 0)) {	/* S1  <  S2 */
 		h->loss_count = 2;
 		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
 		return;
 	}
 
 	/* S0  <  S2  <  S1 */
-	d2 = dccp_delta_seqno(s0, s2);
 
-	if (d2 == 1 || n2 >= d2) {	/* S2 is direct successor of S0 */
-		int d21 = -d12;
+	if (dccp_loss_free(s0, s2, n2)) {
+		u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
 
-		if (d21 == 1 || n1 >= d21) {
+		if (dccp_loss_free(s2, s1, n1)) {
 			/* hole is filled: S0, S2, and S1 are consecutive */
 			h->loss_count = 0;
 			h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -238,9 +235,9 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
 			/* gap between S2 and S1: just update loss_prev */
 			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
 
-	} else {			/* hole between S0 and S2 */
+	} else {	/* gap between S0 and S2 */
 		/*
-		 * Reorder history to insert S2 between S0 and s1
+		 * Reorder history to insert S2 between S0 and S1
 		 */
 		tfrc_rx_hist_swap(h, 0, 3);
 		h->loss_start = tfrc_rx_hist_index(h, 3);
@@ -256,22 +253,18 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 	    s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
 	    s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
 	    s3 = DCCP_SKB_CB(skb)->dccpd_seq;
-	int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
-	   d23 = dccp_delta_seqno(s2, s3), d13, d3, d31;
 
-	if (d23 > 0) {			/* S2  <  S3 */
+	if (likely(dccp_delta_seqno(s2, s3) > 0)) {	/* S2  <  S3 */
 		h->loss_count = 3;
 		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
 		return 1;
 	}
 
 	/* S3  <  S2 */
-	d13 = dccp_delta_seqno(s1, s3);
 
-	if (d13 > 0) {
+	if (dccp_delta_seqno(s1, s3) > 0) {		/* S1  <  S3  <  S2 */
 		/*
-		 * The sequence number order is S1, S3, S2
-		 * Reorder history to insert entry between S1 and S2
+		 * Reorder history to insert S3 between S1 and S2
 		 */
 		tfrc_rx_hist_swap(h, 2, 3);
 		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
@@ -280,17 +273,15 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 	}
 
 	/* S0  <  S3  <  S1 */
-	d31 = -d13;
-	d3  = dccp_delta_seqno(s0, s3);
 
-	if (d3 == 1 || n3 >= d3) {	/* S3 is a successor of S0 */
+	if (dccp_loss_free(s0, s3, n3)) {
+		u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
 
-		if (d31 == 1 || n1 >= d31) {
+		if (dccp_loss_free(s3, s1, n1)) {
 			/* hole between S0 and S1 filled by S3 */
-			int  d2 = dccp_delta_seqno(s1, s2),
-			     n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
+			u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
 
-			if (d2 == 1 || n2 >= d2) {
+			if (dccp_loss_free(s1, s2, n2)) {
 				/* entire hole filled by S0, S3, S1, S2 */
 				h->loss_start = tfrc_rx_hist_index(h, 2);
 				h->loss_count = 0;
@@ -307,8 +298,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 	}
 
 	/*
-	 * The remaining case: S3 is not a successor of S0.
-	 * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1
+	 * The remaining case:  S0  <  S3  <  S1  <  S2;  gap between S0 and S3
+	 * Reorder history to insert S3 between S0 and S1.
 	 */
 	tfrc_rx_hist_swap(h, 0, 3);
 	h->loss_start = tfrc_rx_hist_index(h, 3);
@@ -318,33 +309,25 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 	return 1;
 }
 
-/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */
-static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2)
-{
-	DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count);
-
-	return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno,
-				tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno);
-}
-
 /* recycle RX history records to continue loss detection if necessary */
 static void __three_after_loss(struct tfrc_rx_hist *h)
 {
 	/*
-	 * The distance between S0 and S1 is always greater than 1 and the NDP
-	 * count of S1 is smaller than this distance. Otherwise there would
-	 * have been no loss. Hence it is only necessary to see whether there
-	 * are further missing data packets between S1/S2 and S2/S3.
+	 * At this stage we know already that there is a gap between S0 and S1
+	 * (since S0 was the highest sequence number received before detecting
+	 * the loss). To recycle the loss record, it is	thus only necessary to
+	 * check for other possible gaps between S1/S2 and between S2/S3.
 	 */
-	int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2),
-	    d3 = tfrc_rx_hist_delta_seqno(h, 2, 3),
-	    n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
+	u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
+	    s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
+	    s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno;
+	u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
 	    n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
 
-	if (d2 == 1 || n2 >= d2) {	/* S2 is successor to S1 */
+	if (dccp_loss_free(s1, s2, n2)) {
 
-		if (d3 == 1 || n3 >= d3) {
-			/* S3 is successor of S2: entire hole is filled */
+		if (dccp_loss_free(s2, s3, n3)) {
+			/* no gap between S2 and S3: entire hole is filled */
 			h->loss_start = tfrc_rx_hist_index(h, 3);
 			h->loss_count = 0;
 		} else {
@@ -353,7 +336,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 			h->loss_count = 1;
 		}
 
-	} else {			/* gap between S1 and S2 */
+	} else {	/* gap between S1 and S2 */
 		h->loss_start = tfrc_rx_hist_index(h, 1);
 		h->loss_count = 2;
 	}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1b2cea244e1..32617e0576c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,6 +153,21 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 	return after48(seq1, seq2) ? seq1 : seq2;
 }
 
+/**
+ * dccp_loss_free  -  Evaluates condition for data loss from RFC 4340, 7.7.1
+ * @s1:	 start sequence number
+ * @s2:  end sequence number
+ * @ndp: NDP count on packet with sequence number @s2
+ * Returns true if the sequence range s1...s2 has no data loss.
+ */
+static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
+{
+	s64 delta = dccp_delta_seqno(s1, s2);
+
+	BUG_TRAP(delta >= 0);
+	return (u64)delta <= ndp + 1;
+}
+
 enum {
 	DCCP_MIB_NUM = 0,
 	DCCP_MIB_ACTIVEOPENS,			/* ActiveOpens */
-- 
cgit v1.2.3-70-g09d2


From 5b5d0e704880addfd979c262e6441f126708539c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 13 Jul 2008 11:51:40 +0100
Subject: dccp: Upgrade NDP count from 3 to 6 bytes

RFC 4340, 7.7 specifies up to 6 bytes for the NDP Count option, whereas the code
is currently limited to up to 3 bytes. This seems to be a relict of an earlier
draft version and is brought up to date by the patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h                |  6 ++----
 net/dccp/ccids/ccid3.c              |  2 +-
 net/dccp/ccids/lib/packet_history.c |  6 +++---
 net/dccp/ccids/lib/packet_history.h |  6 +++---
 net/dccp/options.c                  | 14 ++++++++------
 5 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index aa0737019e3..6080449fbec 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -364,8 +364,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
 
-#define DCCP_NDP_LIMIT 0xFFFFFF
-
 /**
   * struct dccp_minisock - Minimal DCCP connection representation
   *
@@ -437,7 +435,7 @@ extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			      struct sk_buff *skb);
 
 struct dccp_options_received {
-	u32	dccpor_ndp; /* only 24 bits */
+	u64	dccpor_ndp:48;
 	u32	dccpor_timestamp;
 	u32	dccpor_timestamp_echo;
 	u32	dccpor_elapsed_time;
@@ -533,7 +531,7 @@ struct dccp_sock {
 	__u16				dccps_r_ack_ratio;
 	__u16				dccps_pcslen;
 	__u16				dccps_pcrlen;
-	unsigned long			dccps_ndp_count;
+	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index a1929f33d70..523db262c18 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -794,7 +794,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
-	const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+	const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
 	const bool is_data_packet = dccp_data_packet(skb);
 
 	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 8b5c41ec7ee..712930564c3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -153,7 +153,7 @@ void tfrc_rx_packet_history_exit(void)
 
 static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
 					       const struct sk_buff *skb,
-					       const u32 ndp)
+					       const u64 ndp)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 
@@ -166,7 +166,7 @@ static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
 
 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 			     const struct sk_buff *skb,
-			     const u32 ndp)
+			     const u64 ndp)
 {
 	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
 
@@ -356,7 +356,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
  */
 int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 			struct tfrc_loss_hist *lh,
-			struct sk_buff *skb, u32 ndp,
+			struct sk_buff *skb, const u64 ndp,
 			u32 (*calc_first_li)(struct sock *), struct sock *sk)
 {
 	int is_new_loss = 0;
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index c7eeda49cb2..6976156cda3 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -64,7 +64,7 @@ struct tfrc_rx_hist_entry {
 	u64		 tfrchrx_seqno:48,
 			 tfrchrx_ccval:4,
 			 tfrchrx_type:4;
-	u32		 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */
+	u64		 tfrchrx_ndp:48;
 	ktime_t		 tfrchrx_tstamp;
 };
 
@@ -145,14 +145,14 @@ static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
 }
 
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
-				    const struct sk_buff *skb, const u32 ndp);
+				    const struct sk_buff *skb, const u64 ndp);
 
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
 
 struct tfrc_loss_hist;
 extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 				struct tfrc_loss_hist *lh,
-				struct sk_buff *skb, u32 ndp,
+				struct sk_buff *skb, const u64 ndp,
 				u32 (*first_li)(struct sock *sk),
 				struct sock *sk);
 extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 43bc24e761d..dc7c158a2f4 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -124,12 +124,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 				mandatory = 1;
 			break;
 		case DCCPO_NDP_COUNT:
-			if (len > 3)
+			if (len > 6)
 				goto out_invalid_option;
 
 			opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
-			dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
-				      opt_recv->dccpor_ndp);
+			dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
+				      (unsigned long long)opt_recv->dccpor_ndp);
 			break;
 		case DCCPO_CHANGE_L:
 			/* fall through */
@@ -307,9 +307,11 @@ static void dccp_encode_value_var(const u32 value, unsigned char *to,
 		*to++ = (value & 0xFF);
 }
 
-static inline int dccp_ndp_len(const int ndp)
+static inline u8 dccp_ndp_len(const u64 ndp)
 {
-	return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+	if (likely(ndp <= 0xFF))
+		return 1;
+	return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
 int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
@@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(dccp_insert_option);
 static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	int ndp = dp->dccps_ndp_count;
+	u64 ndp = dp->dccps_ndp_count;
 
 	if (dccp_non_data_packet(skb))
 		++dp->dccps_ndp_count;
-- 
cgit v1.2.3-70-g09d2


From b552c6231f19d50165bbf59e8b34d3f713ab5c01 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 13 Jul 2008 11:51:40 +0100
Subject: dccp ccid-3: Fix a loss detection bug

This fixes a bug in the logic of the TFRC loss detection:
 * new_loss_indicated() should not be called while a loss is pending;
 * but the code allows this;
 * thus, for two subsequent gaps in the sequence space, when loss_count
   has not yet reached NDUPACK=3, the loss_count is falsely reduced to 1.

To avoid further and similar problems, all loss handling and loss detection is
now done inside tfrc_rx_hist_handle_loss(), using an appropriate routine to
track new losses.

Further changes:
----------------
 * added a reminder that no RX history operations should be performed when
   rx_handle_loss() has identified a (new) loss, since the function takes
   care of packet reordering during loss detection;
 * made tfrc_rx_hist_loss_pending() bool (thanks to an earlier suggestion
   by Arnaldo);
 * removed unused functions.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 12 +++++-------
 net/dccp/ccids/lib/packet_history.c | 24 ++++++++++++++++++++----
 net/dccp/ccids/lib/packet_history.h | 24 ++----------------------
 3 files changed, 27 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 523db262c18..f6756e0c9e6 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -825,18 +825,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/*
-	 * Handle pending losses and otherwise check for new loss
+	 * Perform loss detection and handle pending losses
 	 */
-	if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
-	    tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
-				&hcrx->ccid3hcrx_li_hist,
-				skb, ndp, ccid3_first_li, sk) ) {
+	if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist,
+				skb, ndp, ccid3_first_li, sk)) {
 		do_feedback = CCID3_FBACK_PARAM_CHANGE;
 		goto done_receiving;
 	}
 
-	if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
-		goto update_records;
+	if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist))
+		return; /* done receiving */
 
 	/*
 	 * Handle data packets: RTT sampling and monitoring p
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 712930564c3..6cc108afdc3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -206,10 +206,21 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
  *
  * In the descriptions, `Si' refers to the sequence number of entry number i,
  * whose NDP count is `Ni' (lower case is used for variables).
- * Note: All __after_loss functions expect that a test against duplicates has
- *       been performed already: the seqno of the skb must not be less than the
- *       seqno of loss_prev; and it must not equal that of any valid hist_entry.
+ * Note: All __xxx_loss functions expect that a test against duplicates has been
+ *       performed already: the seqno of the skb must not be less than the seqno
+ *       of loss_prev; and it must not equal that of any valid history entry.
  */
+static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
+{
+	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
+	    s1 = DCCP_SKB_CB(skb)->dccpd_seq;
+
+	if (!dccp_loss_free(s0, s1, n1)) {	/* gap between S0 and S1 */
+		h->loss_count = 1;
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
+	}
+}
+
 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
 {
 	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
@@ -353,6 +364,9 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
  *  Chooses action according to pending loss, updates LI database when a new
  *  loss was detected, and does required post-processing. Returns 1 when caller
  *  should send feedback, 0 otherwise.
+ *  Since it also takes care of reordering during loss detection and updates the
+ *  records accordingly, the caller should not perform any more RX history
+ *  operations when loss_count is greater than 0 after calling this function.
  */
 int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 			struct tfrc_loss_hist *lh,
@@ -361,7 +375,9 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 {
 	int is_new_loss = 0;
 
-	if (h->loss_count == 1) {
+	if (h->loss_count == 0) {
+		__do_track_loss(h, skb, ndp);
+	} else if (h->loss_count == 1) {
 		__one_after_loss(h, skb, ndp);
 	} else if (h->loss_count != 2) {
 		DCCP_BUG("invalid loss_count %d", h->loss_count);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 6976156cda3..461cc91cce8 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -118,30 +118,10 @@ static inline struct tfrc_rx_hist_entry *
 	return h->ring[h->loss_start];
 }
 
-/* initialise loss detection and disable RTT sampling */
-static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
-{
-	h->loss_count = 1;
-}
-
 /* indicate whether previously a packet was detected missing */
-static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
-{
-	return h->loss_count;
-}
-
-/* any data packets missing between last reception and skb ? */
-static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
-						  const struct sk_buff *skb,
-						  u32 ndp)
+static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
 {
-	int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
-				     DCCP_SKB_CB(skb)->dccpd_seq);
-
-	if (delta > 1 && ndp < delta)
-		tfrc_rx_hist_loss_indicated(h);
-
-	return tfrc_rx_hist_loss_pending(h);
+	return h->loss_count > 0;
 }
 
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
-- 
cgit v1.2.3-70-g09d2


From 2eeea7ba6b4b65ed27b7646a1bdea3b45973c861 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 13 Jul 2008 11:51:40 +0100
Subject: dccp ccid-3: Length of loss intervals

This corrects an error in the computation of the open loss interval I_0:
  * the interval length is (highest_seqno - start_seqno) + 1
  * and not (highest_seqno - start_seqno).

This condition was not fully clear in RFC 3448, but reflects the current
revision state of rfc3448bis and is also consistent with RFC 4340, 6.1.1.

Further changes:
----------------
 * variable renamed due to line length constraints;
 * explicit typecast to `s64' to avoid implicit signed/unsigned casting.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/loss_interval.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 849e181e698..bcd6ac415bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -90,14 +90,14 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
 	u32 old_i_mean = lh->i_mean;
-	s64 length;
+	s64 len;
 
 	if (cur == NULL)			/* not initialised */
 		return 0;
 
-	length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq);
+	len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
 
-	if (length - cur->li_length <= 0)	/* duplicate or reordered */
+	if (len - (s64)cur->li_length <= 0)	/* duplicate or reordered */
 		return 0;
 
 	if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
@@ -114,7 +114,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 	if (tfrc_lh_length(lh) == 1)		/* due to RFC 3448, 6.3.1 */
 		return 0;
 
-	cur->li_length = length;
+	cur->li_length = len;
 	tfrc_lh_calc_i_mean(lh);
 
 	return (lh->i_mean < old_i_mean);
@@ -159,7 +159,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 	else {
 		cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
 		new->li_length = dccp_delta_seqno(new->li_seqno,
-				  tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno);
+				  tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1;
 		if (lh->counter > (2*LIH_SIZE))
 			lh->counter -= LIH_SIZE;
 
-- 
cgit v1.2.3-70-g09d2


From c2b4afd2f99a187ec3bbd6e2def186fbfb755929 Mon Sep 17 00:00:00 2001
From: Ursula Braun <braunu@de.ibm.com>
Date: Mon, 14 Jul 2008 09:59:29 +0200
Subject: [S390] Cleanup iucv printk messages.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 net/iucv/af_iucv.c | 8 ++------
 net/iucv/iucv.c    | 9 ++-------
 2 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7b0038f45b1..bda71015885 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1135,8 +1135,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
 		if (this)
 			kfree_skb(this);
 	}
-	if (!this)
-		printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag);
+	BUG_ON(!this);
 
 	if (sk->sk_state == IUCV_CLOSING) {
 		if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
@@ -1196,7 +1195,7 @@ static int __init afiucv_init(void)
 	}
 	cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
 	if (unlikely(err)) {
-		printk(KERN_ERR "AF_IUCV needs the VM userid\n");
+		WARN_ON(err);
 		err = -EPROTONOSUPPORT;
 		goto out;
 	}
@@ -1210,7 +1209,6 @@ static int __init afiucv_init(void)
 	err = sock_register(&iucv_sock_family_ops);
 	if (err)
 		goto out_proto;
-	printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n");
 	return 0;
 
 out_proto:
@@ -1226,8 +1224,6 @@ static void __exit afiucv_exit(void)
 	sock_unregister(PF_IUCV);
 	proto_unregister(&iucv_proto);
 	iucv_unregister(&af_iucv_handler, 0);
-
-	printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n");
 }
 
 module_init(afiucv_init);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 91897076213..7f82b761621 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1559,16 +1559,11 @@ static void iucv_external_interrupt(u16 code)
 
 	p = iucv_irq_data[smp_processor_id()];
 	if (p->ippathid >= iucv_max_pathid) {
-		printk(KERN_WARNING "iucv_do_int: Got interrupt with "
-		       "pathid %d > max_connections (%ld)\n",
-		       p->ippathid, iucv_max_pathid - 1);
+		WARN_ON(p->ippathid >= iucv_max_pathid);
 		iucv_sever_pathid(p->ippathid, iucv_error_no_listener);
 		return;
 	}
-	if (p->iptype  < 0x01 || p->iptype > 0x09) {
-		printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
-		return;
-	}
+	BUG_ON(p->iptype  < 0x01 || p->iptype > 0x09);
 	work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
 	if (!work) {
 		printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
-- 
cgit v1.2.3-70-g09d2


From 79d554a6976a295aa9212172b218f29ca71c3b3d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:44 +0200
Subject: [Bluetooth] Change retrieval of L2CAP features mask

Getting the remote L2CAP features mask is really important, but doing
this as less intrusive as possible is tricky. To play nice with older
systems and Bluetooth qualification testing, the features mask is now
only retrieved in two specific cases and only once per lifetime of an
ACL link.

When trying to establish a L2CAP connection and the remote features mask
is unknown, the L2CAP information request is sent when the ACL link goes
into connected state. This applies only to outgoing connections and also
only for the connection oriented channels.

The second case is when a connection request has been received. In this
case a connection response with the result pending and the information
request will be send. After receiving an information response or if the
timeout gets triggered, the normal connection setup process with security
setup will be initiated.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 166 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 118 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6e180d25550..2e3abdfbd69 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,7 +55,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.9"
+#define VERSION "2.10"
 
 static u32 l2cap_feat_mask = 0x0000;
 
@@ -253,6 +253,21 @@ static void l2cap_chan_del(struct sock *sk, int err)
 		sk->sk_state_change(sk);
 }
 
+/* Service level security */
+static inline int l2cap_check_link_mode(struct sock *sk)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+
+	if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) ||
+				(l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE))
+		return hci_conn_encrypt(conn->hcon);
+
+	if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH)
+		return hci_conn_auth(conn->hcon);
+
+	return 1;
+}
+
 static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
 {
 	u8 id;
@@ -287,6 +302,34 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16
 	return hci_send_acl(conn->hcon, skb, 0);
 }
 
+static void l2cap_do_start(struct sock *sk)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+
+	if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
+		struct l2cap_conn_req req;
+		req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+		req.psm  = l2cap_pi(sk)->psm;
+
+		l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+
+		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_REQ, sizeof(req), &req);
+	} else {
+		struct l2cap_info_req req;
+		req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
+
+		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
+		conn->info_ident = l2cap_get_ident(conn);
+
+		mod_timer(&conn->info_timer, jiffies +
+					msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
+
+		l2cap_send_cmd(conn, conn->info_ident,
+					L2CAP_INFO_REQ, sizeof(req), &req);
+	}
+}
+
 /* ---- L2CAP connections ---- */
 static void l2cap_conn_start(struct l2cap_conn *conn)
 {
@@ -301,16 +344,35 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		bh_lock_sock(sk);
 
 		if (sk->sk_type != SOCK_SEQPACKET) {
-			l2cap_sock_clear_timer(sk);
-			sk->sk_state = BT_CONNECTED;
-			sk->sk_state_change(sk);
-		} else if (sk->sk_state == BT_CONNECT) {
+			bh_unlock_sock(sk);
+			continue;
+		}
+
+		if (sk->sk_state == BT_CONNECT) {
 			struct l2cap_conn_req req;
-			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
 			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
+
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
+		} else if (sk->sk_state == BT_CONNECT2) {
+			struct l2cap_conn_rsp rsp;
+			rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+			rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+
+			if (l2cap_check_link_mode(sk)) {
+				sk->sk_state = BT_CONFIG;
+				rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+				rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
+			} else {
+				rsp.result = cpu_to_le16(L2CAP_CR_PEND);
+				rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
+			}
+
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 		}
 
 		bh_unlock_sock(sk);
@@ -321,22 +383,27 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 
 static void l2cap_conn_ready(struct l2cap_conn *conn)
 {
-	BT_DBG("conn %p", conn);
+	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock *sk;
 
-	if (conn->chan_list.head || !hlist_empty(&l2cap_sk_list.head)) {
-		struct l2cap_info_req req;
+	BT_DBG("conn %p", conn);
 
-		req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
+	read_lock(&l->lock);
 
-		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
-		conn->info_ident = l2cap_get_ident(conn);
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		bh_lock_sock(sk);
 
-		mod_timer(&conn->info_timer,
-			jiffies + msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
+		if (sk->sk_type != SOCK_SEQPACKET) {
+			l2cap_sock_clear_timer(sk);
+			sk->sk_state = BT_CONNECTED;
+			sk->sk_state_change(sk);
+		} else if (sk->sk_state == BT_CONNECT)
+			l2cap_do_start(sk);
 
-		l2cap_send_cmd(conn, conn->info_ident,
-					L2CAP_INFO_REQ, sizeof(req), &req);
+		bh_unlock_sock(sk);
 	}
+
+	read_unlock(&l->lock);
 }
 
 /* Notify sockets that we cannot guaranty reliability anymore */
@@ -729,22 +796,11 @@ static int l2cap_do_connect(struct sock *sk)
 	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
 	if (hcon->state == BT_CONNECTED) {
-		if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) {
-			l2cap_conn_ready(conn);
-			goto done;
-		}
-
-		if (sk->sk_type == SOCK_SEQPACKET) {
-			struct l2cap_conn_req req;
-			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
-			req.psm  = l2cap_pi(sk)->psm;
-			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-					L2CAP_CONN_REQ, sizeof(req), &req);
-		} else {
+		if (sk->sk_type != SOCK_SEQPACKET) {
 			l2cap_sock_clear_timer(sk);
 			sk->sk_state = BT_CONNECTED;
-		}
+		} else
+			l2cap_do_start(sk);
 	}
 
 done:
@@ -1477,7 +1533,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
 	struct l2cap_conn_rsp rsp;
 	struct sock *sk, *parent;
-	int result = 0, status = 0;
+	int result, status = 0;
 
 	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
 	__le16 psm  = req->psm;
@@ -1526,25 +1582,24 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 
 	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
-	/* Service level security */
-	result = L2CAP_CR_PEND;
-	status = L2CAP_CS_AUTHEN_PEND;
-	sk->sk_state = BT_CONNECT2;
 	l2cap_pi(sk)->ident = cmd->ident;
 
-	if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) ||
-			(l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) {
-		if (!hci_conn_encrypt(conn->hcon))
-			goto done;
-	} else if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) {
-		if (!hci_conn_auth(conn->hcon))
-			goto done;
+	if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
+		if (l2cap_check_link_mode(sk)) {
+			sk->sk_state = BT_CONFIG;
+			result = L2CAP_CR_SUCCESS;
+			status = L2CAP_CS_NO_INFO;
+		} else {
+			sk->sk_state = BT_CONNECT2;
+			result = L2CAP_CR_PEND;
+			status = L2CAP_CS_AUTHEN_PEND;
+		}
+	} else {
+		sk->sk_state = BT_CONNECT2;
+		result = L2CAP_CR_PEND;
+		status = L2CAP_CS_NO_INFO;
 	}
 
-	sk->sk_state = BT_CONFIG;
-	result = status = 0;
-
-done:
 	write_unlock_bh(&list->lock);
 
 response:
@@ -1556,6 +1611,21 @@ sendresp:
 	rsp.result = cpu_to_le16(result);
 	rsp.status = cpu_to_le16(status);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+	if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
+		struct l2cap_info_req info;
+		info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
+
+		conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
+		conn->info_ident = l2cap_get_ident(conn);
+
+		mod_timer(&conn->info_timer, jiffies +
+					msecs_to_jiffies(L2CAP_INFO_TIMEOUT));
+
+		l2cap_send_cmd(conn, conn->info_ident,
+					L2CAP_INFO_REQ, sizeof(info), &info);
+	}
+
 	return 0;
 }
 
@@ -1664,9 +1734,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	}
 
 	if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
-		u8 req[64];
+		u8 buf[64];
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-					l2cap_build_conf_req(sk, req), req);
+					l2cap_build_conf_req(sk, buf), buf);
 	}
 
 unlock:
-- 
cgit v1.2.3-70-g09d2


From 77db1980565626471a980f0d2d17299e4bd5e7a5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:45 +0200
Subject: [Bluetooth] Enforce security for outgoing RFCOMM connections

Recent tests with various Bluetooth headsets have shown that some of
them don't enforce authentication and encryption when connecting. All
of them leave it up to the host stack to enforce it. Non of them should
allow unencrypted connections, but that is how it is. So in case the
link mode settings require authentication and/or encryption it will now
also be enforced on outgoing RFCOMM connections. Previously this was
only done for incoming connections.

This support has a small drawback from a protocol level point of view
since the host stack can't really tell with 100% certainty if a remote
side is already authenticated or not. So if both sides are configured
to enforce authentication it will be requested twice. Most Bluetooth
chips are caching this information and thus no extra authentication
procedure has to be triggered over-the-air, but it can happen.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/rfcomm.h |  1 +
 net/bluetooth/rfcomm/core.c    | 77 +++++++++++++++++++++++++-----------------
 net/bluetooth/rfcomm/sock.c    |  8 +++--
 3 files changed, 52 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 98ec7a32068..8c54ff37ad4 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -181,6 +181,7 @@ struct rfcomm_dlc {
 	u8            priority;
 	u8            v24_sig;
 	u8            mscex;
+	u8            out;
 
 	u32           link_mode;
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 0c2c93735e9..1f92f9ab495 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -53,7 +53,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "1.8"
+#define VERSION "1.9"
 
 static int disable_cfc = 0;
 static int channel_mtu = -1;
@@ -230,6 +230,21 @@ static int rfcomm_l2sock_create(struct socket **sock)
 	return err;
 }
 
+static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d)
+{
+	struct sock *sk = d->session->sock->sk;
+
+	if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) {
+		if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon))
+			return 1;
+	} else if (d->link_mode & RFCOMM_LM_AUTH) {
+		if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon))
+			return 1;
+	}
+
+	return 0;
+}
+
 /* ---- RFCOMM DLCs ---- */
 static void rfcomm_dlc_timeout(unsigned long arg)
 {
@@ -371,15 +386,23 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
 	d->addr     = __addr(s->initiator, dlci);
 	d->priority = 7;
 
-	d->state    = BT_CONFIG;
+	d->state = BT_CONFIG;
 	rfcomm_dlc_link(s, d);
 
+	d->out = 1;
+
 	d->mtu = s->mtu;
 	d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc;
 
-	if (s->state == BT_CONNECTED)
-		rfcomm_send_pn(s, 1, d);
+	if (s->state == BT_CONNECTED) {
+		if (rfcomm_check_link_mode(d))
+			set_bit(RFCOMM_AUTH_PENDING, &d->flags);
+		else
+			rfcomm_send_pn(s, 1, d);
+	}
+
 	rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
+
 	return 0;
 }
 
@@ -1146,21 +1169,6 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
 	return 0;
 }
 
-static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d)
-{
-	struct sock *sk = d->session->sock->sk;
-
-	if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) {
-		if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon))
-			return 1;
-	} else if (d->link_mode & RFCOMM_LM_AUTH) {
-		if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon))
-			return 1;
-	}
-
-	return 0;
-}
-
 static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
 {
 	struct sock *sk = d->session->sock->sk;
@@ -1205,10 +1213,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
 			if (rfcomm_check_link_mode(d)) {
 				set_bit(RFCOMM_AUTH_PENDING, &d->flags);
 				rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
-				return 0;
-			}
-
-			rfcomm_dlc_accept(d);
+			} else
+				rfcomm_dlc_accept(d);
 		}
 		return 0;
 	}
@@ -1223,10 +1229,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
 		if (rfcomm_check_link_mode(d)) {
 			set_bit(RFCOMM_AUTH_PENDING, &d->flags);
 			rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
-			return 0;
-		}
-
-		rfcomm_dlc_accept(d);
+		} else
+			rfcomm_dlc_accept(d);
 	} else {
 		rfcomm_send_dm(s, dlci);
 	}
@@ -1636,7 +1640,11 @@ static void rfcomm_process_connect(struct rfcomm_session *s)
 		d = list_entry(p, struct rfcomm_dlc, list);
 		if (d->state == BT_CONFIG) {
 			d->mtu = s->mtu;
-			rfcomm_send_pn(s, 1, d);
+			if (rfcomm_check_link_mode(d)) {
+				set_bit(RFCOMM_AUTH_PENDING, &d->flags);
+				rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
+			} else
+				rfcomm_send_pn(s, 1, d);
 		}
 	}
 }
@@ -1709,7 +1717,11 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
 
 		if (test_and_clear_bit(RFCOMM_AUTH_ACCEPT, &d->flags)) {
 			rfcomm_dlc_clear_timer(d);
-			rfcomm_dlc_accept(d);
+			if (d->out) {
+				rfcomm_send_pn(s, 1, d);
+				rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
+			} else
+				rfcomm_dlc_accept(d);
 			if (d->link_mode & RFCOMM_LM_SECURE) {
 				struct sock *sk = s->sock->sk;
 				hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon);
@@ -1717,7 +1729,10 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
 			continue;
 		} else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) {
 			rfcomm_dlc_clear_timer(d);
-			rfcomm_send_dm(s, d->dlci);
+			if (!d->out)
+				rfcomm_send_dm(s, d->dlci);
+			else
+				d->state = BT_CLOSED;
 			__rfcomm_dlc_close(d, ECONNREFUSED);
 			continue;
 		}
@@ -1726,7 +1741,7 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
 			continue;
 
 		if ((d->state == BT_CONNECTED || d->state == BT_DISCONN) &&
-				d->mscex == RFCOMM_MSCEX_OK)
+						d->mscex == RFCOMM_MSCEX_OK)
 			rfcomm_process_tx(d);
 	}
 }
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 5083adcbfae..cacb1ab51f9 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -309,13 +309,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
 	sk->sk_destruct = rfcomm_sock_destruct;
 	sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT;
 
-	sk->sk_sndbuf   = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
-	sk->sk_rcvbuf   = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
+	sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
+	sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
 
 	sock_reset_flag(sk, SOCK_ZAPPED);
 
 	sk->sk_protocol = proto;
-	sk->sk_state	= BT_OPEN;
+	sk->sk_state    = BT_OPEN;
 
 	bt_sock_link(&rfcomm_sk_list, sk);
 
@@ -413,6 +413,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
 	bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr);
 	rfcomm_pi(sk)->channel = sa->rc_channel;
 
+	d->link_mode = rfcomm_pi(sk)->link_mode;
+
 	err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel);
 	if (!err)
 		err = bt_sock_wait_state(sk, BT_CONNECTED,
-- 
cgit v1.2.3-70-g09d2


From 9719f8afce34d3d04e884873a8a5e3483e30974c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:45 +0200
Subject: [Bluetooth] Disconnect when encryption gets disabled

The Bluetooth specification allows to enable or disable the encryption
of an ACL link at any time by either the peer or the remote device. If
a L2CAP or RFCOMM connection requested an encrypted link, they will now
disconnect that link if the encryption gets disabled. Higher protocols
that don't care about encryption (like SDP) are not affected.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 10 +++++-----
 net/bluetooth/l2cap.c            | 13 ++++++++++++-
 net/bluetooth/rfcomm/core.c      |  8 ++++++++
 3 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ea13baa3851..c8255adee8f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -474,7 +474,7 @@ struct hci_proto {
 	int (*recv_acldata)	(struct hci_conn *conn, struct sk_buff *skb, __u16 flags);
 	int (*recv_scodata)	(struct hci_conn *conn, struct sk_buff *skb);
 	int (*auth_cfm)		(struct hci_conn *conn, __u8 status);
-	int (*encrypt_cfm)	(struct hci_conn *conn, __u8 status);
+	int (*encrypt_cfm)	(struct hci_conn *conn, __u8 status, __u8 encrypt);
 };
 
 static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type)
@@ -532,17 +532,17 @@ static inline void hci_proto_auth_cfm(struct hci_conn *conn, __u8 status)
 		hp->auth_cfm(conn, status);
 }
 
-static inline void hci_proto_encrypt_cfm(struct hci_conn *conn, __u8 status)
+static inline void hci_proto_encrypt_cfm(struct hci_conn *conn, __u8 status, __u8 encrypt)
 {
 	register struct hci_proto *hp;
 
 	hp = hci_proto[HCI_PROTO_L2CAP];
 	if (hp && hp->encrypt_cfm)
-		hp->encrypt_cfm(conn, status);
+		hp->encrypt_cfm(conn, status, encrypt);
 
 	hp = hci_proto[HCI_PROTO_SCO];
 	if (hp && hp->encrypt_cfm)
-		hp->encrypt_cfm(conn, status);
+		hp->encrypt_cfm(conn, status, encrypt);
 }
 
 int hci_register_proto(struct hci_proto *hproto);
@@ -579,7 +579,7 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status, __u8 encr
 {
 	struct list_head *p;
 
-	hci_proto_encrypt_cfm(conn, status);
+	hci_proto_encrypt_cfm(conn, status, encrypt);
 
 	read_lock_bh(&hci_cb_list_lock);
 	list_for_each(p, &hci_cb_list) {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2e3abdfbd69..252264062f5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2197,7 +2197,7 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 	return 0;
 }
 
-static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
+static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 {
 	struct l2cap_chan_list *l;
 	struct l2cap_conn *conn = hcon->l2cap_data;
@@ -2215,8 +2215,19 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		struct l2cap_pinfo *pi = l2cap_pi(sk);
+
 		bh_lock_sock(sk);
 
+		if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) &&
+					(sk->sk_state == BT_CONNECTED ||
+						sk->sk_state == BT_CONFIG) &&
+						!status && encrypt == 0x00) {
+			__l2cap_sock_close(sk, ECONNREFUSED);
+			bh_unlock_sock(sk);
+			continue;
+		}
+
 		if (sk->sk_state != BT_CONNECT2) {
 			bh_unlock_sock(sk);
 			continue;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 1f92f9ab495..e7a6a03cea3 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2003,6 +2003,14 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
 	list_for_each_safe(p, n, &s->dlcs) {
 		d = list_entry(p, struct rfcomm_dlc, list);
 
+		if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) &&
+					(d->state == BT_CONNECTED ||
+						d->state == BT_CONFIG) &&
+						!status && encrypt == 0x00) {
+			__rfcomm_dlc_close(d, ECONNREFUSED);
+			continue;
+		}
+
 		if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
 			continue;
 
-- 
cgit v1.2.3-70-g09d2


From ae29319649b80ed9d28d7b4f164e3f5f75020fc8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:45 +0200
Subject: [Bluetooth] Update authentication status after successful encryption

The authentication status is not communicated to both parties. This is
actually a flaw in the Bluetooth specification. Only the requesting side
really knows if the authentication was successful or not. This piece of
information is however needed on the other side to know if it has to
trigger the authentication procedure or not. Worst case is that both
sides will request authentication at different times, but this should
be avoided since it costs extra time when setting up a new connection.

For Bluetooth encryption it is required to authenticate the link first
and the encryption status is communicated to both sides. So when a link
is switched to encryption it is possible to update the authentication
status since it implies an authenticated link.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6aef8f24e58..0aba21a03b3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -883,9 +883,11 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		if (!ev->status) {
-			if (ev->encrypt)
+			if (ev->encrypt) {
+				/* Encryption implies authentication */
+				conn->link_mode |= HCI_LM_AUTH;
 				conn->link_mode |= HCI_LM_ENCRYPT;
-			else
+			} else
 				conn->link_mode &= ~HCI_LM_ENCRYPT;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From 9dc0a3afc08d6c20c284994dcd84531787d00ec2 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:46 +0200
Subject: [Bluetooth] Support the case when headset falls back to SCO link

When trying to establish an eSCO link between two devices then it can
happen that the remote device falls back to a SCO link. Currently this
case is not handled correctly and the message dispatching will break
since it is looking for eSCO packets. So in case the configured link
falls back to SCO overwrite the link type with the correct value.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0aba21a03b3..6bc5a0506c6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1314,8 +1314,16 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
-	if (!conn)
-		goto unlock;
+	if (!conn) {
+		if (ev->link_type == ESCO_LINK)
+			goto unlock;
+
+		conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr);
+		if (!conn)
+			goto unlock;
+
+		conn->type = SCO_LINK;
+	}
 
 	if (!ev->status) {
 		conn->handle = __le16_to_cpu(ev->handle);
-- 
cgit v1.2.3-70-g09d2


From a8746417e864da1ed36dd2432a399fbeb843c2a0 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:46 +0200
Subject: [Bluetooth] Track connection packet type changes

The connection packet type can be changed after the connection has been
established and thus needs to be properly tracked to ensure that the
host stack has always correct and valid information about it.

On incoming connections the Bluetooth core switches the supported packet
types to the configured list for this controller. However the usefulness
of this feature has been questioned a lot. The general consent is that
every Bluetooth host stack should enable as many packet types as the
hardware actually supports and leave the decision to the link manager
software running on the Bluetooth chip.

When running on Bluetooth 2.0 or later hardware, don't change the packet
type for incoming connections anymore. This hardware likely supports
Enhanced Data Rate and thus leave it completely up to the link manager
to pick the best packet type.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  9 +++++++++
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         | 32 ++++++++++++++++++++++++--------
 net/bluetooth/hci_event.c        | 32 +++++++++++++++++++++++++-------
 4 files changed, 59 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index a8a9eb6af96..f1dc174abc2 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -137,6 +137,8 @@ enum {
 #define ESCO_EV4	0x0010
 #define ESCO_EV5	0x0020
 
+#define SCO_ESCO_MASK  (ESCO_HV1 | ESCO_HV2 | ESCO_HV3)
+
 /* ACL flags */
 #define ACL_CONT		0x01
 #define ACL_START		0x02
@@ -696,6 +698,13 @@ struct hci_ev_clock_offset {
 	__le16   clock_offset;
 } __attribute__ ((packed));
 
+#define HCI_EV_PKT_TYPE_CHANGE		0x1d
+struct hci_ev_pkt_type_change {
+	__u8     status;
+	__le16   handle;
+	__le16   pkt_type;
+} __attribute__ ((packed));
+
 #define HCI_EV_PSCAN_REP_MODE		0x20
 struct hci_ev_pscan_rep_mode {
 	bdaddr_t bdaddr;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c8255adee8f..6424d63e339 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -162,6 +162,7 @@ struct hci_conn {
 	__u8		 dev_class[3];
 	__u8             features[8];
 	__u16            interval;
+	__u16            pkt_type;
 	__u16            link_policy;
 	__u32		 link_mode;
 	__u8             power_save;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f8880261da0..69c64ce054f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -59,7 +59,8 @@ void hci_acl_connect(struct hci_conn *conn)
 	BT_DBG("%p", conn);
 
 	conn->state = BT_CONNECT;
-	conn->out   = 1;
+	conn->out = 1;
+
 	conn->link_mode = HCI_LM_MASTER;
 
 	conn->attempt++;
@@ -76,7 +77,7 @@ void hci_acl_connect(struct hci_conn *conn)
 		memcpy(conn->dev_class, ie->data.dev_class, 3);
 	}
 
-	cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+	cp.pkt_type = cpu_to_le16(conn->pkt_type);
 	if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
 		cp.role_switch = 0x01;
 	else
@@ -122,7 +123,7 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
 	conn->out = 1;
 
 	cp.handle   = cpu_to_le16(handle);
-	cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+	cp.pkt_type = cpu_to_le16(conn->pkt_type);
 
 	hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp);
 }
@@ -138,7 +139,7 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
 	conn->out = 1;
 
 	cp.handle   = cpu_to_le16(handle);
-	cp.pkt_type = cpu_to_le16(hdev->esco_type);
+	cp.pkt_type = cpu_to_le16(conn->pkt_type);
 
 	cp.tx_bandwidth   = cpu_to_le32(0x00001f40);
 	cp.rx_bandwidth   = cpu_to_le32(0x00001f40);
@@ -199,13 +200,28 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 		return NULL;
 
 	bacpy(&conn->dst, dst);
-	conn->hdev   = hdev;
-	conn->type   = type;
-	conn->mode   = HCI_CM_ACTIVE;
-	conn->state  = BT_OPEN;
+	conn->hdev  = hdev;
+	conn->type  = type;
+	conn->mode  = HCI_CM_ACTIVE;
+	conn->state = BT_OPEN;
 
 	conn->power_save = 1;
 
+	switch (type) {
+	case ACL_LINK:
+		conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
+		break;
+	case SCO_LINK:
+		if (lmp_esco_capable(hdev))
+			conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK;
+		else
+			conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
+		break;
+	case ESCO_LINK:
+		conn->pkt_type = hdev->esco_type;
+		break;
+	}
+
 	skb_queue_head_init(&conn->data_q);
 
 	setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6bc5a0506c6..d4d2dcc40fc 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -699,14 +699,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		}
 
 		/* Set packet type for incoming connection */
-		if (!conn->out) {
+		if (!conn->out && hdev->hci_ver < 3) {
 			struct hci_cp_change_conn_ptype cp;
 			cp.handle = ev->handle;
-			cp.pkt_type = (conn->type == ACL_LINK) ?
-				cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
-				cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
-
-			hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
+			cp.pkt_type = cpu_to_le16(conn->pkt_type);
+			hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE,
+							sizeof(cp), &cp);
 		} else {
 			/* Update disconnect timer */
 			hci_conn_hold(conn);
@@ -786,7 +784,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 			struct hci_cp_accept_sync_conn_req cp;
 
 			bacpy(&cp.bdaddr, &ev->bdaddr);
-			cp.pkt_type = cpu_to_le16(hdev->esco_type);
+			cp.pkt_type = cpu_to_le16(conn->pkt_type);
 
 			cp.tx_bandwidth   = cpu_to_le32(0x00001f40);
 			cp.rx_bandwidth   = cpu_to_le32(0x00001f40);
@@ -1237,6 +1235,22 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk
 	hci_dev_unlock(hdev);
 }
 
+static inline void hci_pkt_type_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_pkt_type_change *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+	if (conn && !ev->status)
+		conn->pkt_type = __le16_to_cpu(ev->pkt_type);
+
+	hci_dev_unlock(hdev);
+}
+
 static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_pscan_rep_mode *ev = (void *) skb->data;
@@ -1480,6 +1494,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_clock_offset_evt(hdev, skb);
 		break;
 
+	case HCI_EV_PKT_TYPE_CHANGE:
+		hci_pkt_type_change_evt(hdev, skb);
+		break;
+
 	case HCI_EV_PSCAN_REP_MODE:
 		hci_pscan_rep_mode_evt(hdev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From e4e8e37c42bdaaefcb84eeaef0dc1bc3f696f8f6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:47 +0200
Subject: [Bluetooth] Make use of the default link policy settings

The Bluetooth specification supports the default link policy settings
on a per host controller basis. For every new connection the link
manager would then use these settings. It is better to use this instead
of bothering the controller on every connection setup to overwrite the
default settings.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 11 +++++++
 net/bluetooth/hci_conn.c    |  2 ++
 net/bluetooth/hci_core.c    | 35 ++++++++++++++-------
 net/bluetooth/hci_event.c   | 75 +++++++++++++++++++++++++++++++++++++--------
 4 files changed, 100 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index f1dc174abc2..efc8c555c18 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -404,6 +404,17 @@ struct hci_rp_write_link_policy {
 	__le16   handle;
 } __attribute__ ((packed));
 
+#define HCI_OP_READ_DEF_LINK_POLICY	0x080e
+struct hci_rp_read_def_link_policy {
+	__u8     status;
+	__le16   policy;
+} __attribute__ ((packed));
+
+#define HCI_OP_WRITE_DEF_LINK_POLICY	0x080f
+struct hci_cp_write_def_link_policy {
+	__le16   policy;
+} __attribute__ ((packed));
+
 #define HCI_OP_SNIFF_SUBRATE		0x0811
 struct hci_cp_sniff_subrate {
 	__le16   handle;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 69c64ce054f..6175ce841e9 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -65,6 +65,8 @@ void hci_acl_connect(struct hci_conn *conn)
 
 	conn->attempt++;
 
+	conn->link_policy = hdev->link_policy;
+
 	memset(&cp, 0, sizeof(cp));
 	bacpy(&cp.bdaddr, &conn->dst);
 	cp.pscan_rep_mode = 0x02;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aec6929f5c1..69b2c1aac08 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -279,10 +279,20 @@ static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt)
 
 	BT_DBG("%s %x", hdev->name, encrypt);
 
-	/* Authentication */
+	/* Encryption */
 	hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
 }
 
+static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt)
+{
+	__le16 policy = cpu_to_le16(opt);
+
+	BT_DBG("%s %x", hdev->name, opt);
+
+	/* Default link policy */
+	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
+}
+
 /* Get HCI device by index.
  * Device is held on return. */
 struct hci_dev *hci_dev_get(int index)
@@ -694,32 +704,35 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 		break;
 
-	case HCISETPTYPE:
-		hdev->pkt_type = (__u16) dr.dev_opt;
-		break;
-
 	case HCISETLINKPOL:
-		hdev->link_policy = (__u16) dr.dev_opt;
+		err = hci_request(hdev, hci_linkpol_req, dr.dev_opt,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 		break;
 
 	case HCISETLINKMODE:
-		hdev->link_mode = ((__u16) dr.dev_opt) & (HCI_LM_MASTER | HCI_LM_ACCEPT);
+		hdev->link_mode = ((__u16) dr.dev_opt) &
+					(HCI_LM_MASTER | HCI_LM_ACCEPT);
+		break;
+
+	case HCISETPTYPE:
+		hdev->pkt_type = (__u16) dr.dev_opt;
 		break;
 
 	case HCISETACLMTU:
-		hdev->acl_mtu  = *((__u16 *)&dr.dev_opt + 1);
-		hdev->acl_pkts = *((__u16 *)&dr.dev_opt + 0);
+		hdev->acl_mtu  = *((__u16 *) &dr.dev_opt + 1);
+		hdev->acl_pkts = *((__u16 *) &dr.dev_opt + 0);
 		break;
 
 	case HCISETSCOMTU:
-		hdev->sco_mtu  = *((__u16 *)&dr.dev_opt + 1);
-		hdev->sco_pkts = *((__u16 *)&dr.dev_opt + 0);
+		hdev->sco_mtu  = *((__u16 *) &dr.dev_opt + 1);
+		hdev->sco_pkts = *((__u16 *) &dr.dev_opt + 0);
 		break;
 
 	default:
 		err = -EINVAL;
 		break;
 	}
+
 	hci_dev_put(hdev);
 	return err;
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d4d2dcc40fc..9af181a6165 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -110,6 +110,25 @@ static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_read_link_policy(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_read_link_policy *rp = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn)
+		conn->link_policy = __le16_to_cpu(rp->policy);
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_write_link_policy *rp = (void *) skb->data;
@@ -128,13 +147,41 @@ static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
-	if (conn) {
+	if (conn)
 		conn->link_policy = get_unaligned_le16(sent + 2);
-	}
 
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_read_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_read_def_link_policy *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->link_policy = __le16_to_cpu(rp->policy);
+}
+
+static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+	void *sent;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY);
+	if (!sent)
+		return;
+
+	if (!status)
+		hdev->link_policy = get_unaligned_le16(sent);
+
+	hci_req_complete(hdev, status);
+}
+
 static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -347,8 +394,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 
 	hdev->hci_ver = rp->hci_ver;
-	hdev->hci_rev = btohs(rp->hci_rev);
-	hdev->manufacturer = btohs(rp->manufacturer);
+	hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
+	hdev->manufacturer = __le16_to_cpu(rp->manufacturer);
 
 	BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name,
 					hdev->manufacturer,
@@ -690,14 +737,6 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp);
 		}
 
-		/* Set link policy */
-		if (conn->type == ACL_LINK && hdev->link_policy) {
-			struct hci_cp_write_link_policy cp;
-			cp.handle = ev->handle;
-			cp.policy = cpu_to_le16(hdev->link_policy);
-			hci_send_cmd(hdev, HCI_OP_WRITE_LINK_POLICY, sizeof(cp), &cp);
-		}
-
 		/* Set packet type for incoming connection */
 		if (!conn->out && hdev->hci_ver < 3) {
 			struct hci_cp_change_conn_ptype cp;
@@ -974,10 +1013,22 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
 		hci_cc_role_discovery(hdev, skb);
 		break;
 
+	case HCI_OP_READ_LINK_POLICY:
+		hci_cc_read_link_policy(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LINK_POLICY:
 		hci_cc_write_link_policy(hdev, skb);
 		break;
 
+	case HCI_OP_READ_DEF_LINK_POLICY:
+		hci_cc_read_def_link_policy(hdev, skb);
+		break;
+
+	case HCI_OP_WRITE_DEF_LINK_POLICY:
+		hci_cc_write_def_link_policy(hdev, skb);
+		break;
+
 	case HCI_OP_RESET:
 		hci_cc_reset(hdev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From f383f2750af19fe6f820edf40d8729f9741c5b37 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:47 +0200
Subject: [Bluetooth] Some cleanups for HCI event handling

Some minor cosmetic cleanups to the HCI event handling to make the
code easier to read and understand.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 9af181a6165..bf3fbf9817b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -198,12 +198,14 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
+	if (status)
+		return;
+
 	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME);
 	if (!sent)
 		return;
 
-	if (!status)
-		memcpy(hdev->dev_name, sent, 248);
+	memcpy(hdev->dev_name, sent, 248);
 }
 
 static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -313,12 +315,14 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
+	if (status)
+		return;
+
 	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV);
 	if (!sent)
 		return;
 
-	if (!status)
-		memcpy(hdev->dev_class, sent, 3);
+	memcpy(hdev->dev_class, sent, 3);
 }
 
 static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb)
@@ -333,7 +337,7 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb)
 
 	setting = __le16_to_cpu(rp->voice_setting);
 
-	if (hdev->voice_setting == setting )
+	if (hdev->voice_setting == setting)
 		return;
 
 	hdev->voice_setting = setting;
@@ -350,28 +354,31 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb)
 static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
+	__u16 setting;
 	void *sent;
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
+	if (status)
+		return;
+
 	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING);
 	if (!sent)
 		return;
 
-	if (!status) {
-		__u16 setting = get_unaligned_le16(sent);
+	setting = get_unaligned_le16(sent);
 
-		if (hdev->voice_setting != setting) {
-			hdev->voice_setting = setting;
+	if (hdev->voice_setting == setting)
+		return;
 
-			BT_DBG("%s voice setting 0x%04x", hdev->name, setting);
+	hdev->voice_setting = setting;
 
-			if (hdev->notify) {
-				tasklet_disable(&hdev->tx_task);
-				hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
-				tasklet_enable(&hdev->tx_task);
-			}
-		}
+	BT_DBG("%s voice setting 0x%04x", hdev->name, setting);
+
+	if (hdev->notify) {
+		tasklet_disable(&hdev->tx_task);
+		hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
+		tasklet_enable(&hdev->tx_task);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From c7bdd5026d28d178238bd794c61612602a54d55e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:47 +0200
Subject: [Bluetooth] Update class of device value whenever possible

The class of device value can only be retrieved via inquiry or during
an incoming connection request. Outgoing connections can't ask for the
class of device. To compensate for this the value is stored and copied
via the inquiry cache, but currently only updated via inquiry. This
update should also happen during an incoming connection request.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bf3fbf9817b..e47676128bb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -796,10 +796,14 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	if (mask & HCI_LM_ACCEPT) {
 		/* Connection accepted */
+		struct inquiry_entry *ie;
 		struct hci_conn *conn;
 
 		hci_dev_lock(hdev);
 
+		if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr)))
+			memcpy(ie->data.dev_class, ev->dev_class, 3);
+
 		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
 		if (!conn) {
 			if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
-- 
cgit v1.2.3-70-g09d2


From 0493684ed2397e111574f343534d8e4ec440dfa5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:48 +0200
Subject: [Bluetooth] Disable disconnect timer during Simple Pairing

During the Simple Pairing process the HCI disconnect timer must be
disabled. The way to do this is by holding a reference count of the
HCI connection. The Simple Pairing process on both sides starts with
an IO Capabilities Request and ends with Simple Pairing Complete.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 11 +++++++++++
 net/bluetooth/hci_event.c   | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index efc8c555c18..79629ff40e3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -794,6 +794,17 @@ struct extended_inquiry_info {
 	__u8     data[240];
 } __attribute__ ((packed));
 
+#define HCI_EV_IO_CAPA_REQUEST		0x31
+struct hci_ev_io_capa_request {
+	bdaddr_t bdaddr;
+} __attribute__ ((packed));
+
+#define HCI_EV_SIMPLE_PAIR_COMPLETE	0x36
+struct hci_ev_simple_pair_complete {
+	__u8     status;
+	bdaddr_t bdaddr;
+} __attribute__ ((packed));
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e47676128bb..7c9ac01cd8f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1464,6 +1464,38 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
 	hci_dev_unlock(hdev);
 }
 
+static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_io_capa_request *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn)
+		hci_conn_hold(conn);
+
+	hci_dev_unlock(hdev);
+}
+
+static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_simple_pair_complete *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn)
+		hci_conn_put(conn);
+
+	hci_dev_unlock(hdev);
+}
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (void *) skb->data;
@@ -1588,6 +1620,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_extended_inquiry_result_evt(hdev, skb);
 		break;
 
+	case HCI_EV_IO_CAPA_REQUEST:
+		hci_io_capa_request_evt(hdev, skb);
+		break;
+
+	case HCI_EV_SIMPLE_PAIR_COMPLETE:
+		hci_simple_pair_complete_evt(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s event 0x%x", hdev->name, event);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 333140b57fa7867bc92e5ee879b6ac4ef5e1d867 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:48 +0200
Subject: [Bluetooth] Track status of Simple Pairing mode

The Simple Pairing feature is optional and needs to be enabled by the
host stack first. The Linux kernel relies on the Bluetooth daemon to
either enable or disable it, but at any time it needs to know the
current state of the Simple Pairing mode. So track any changes made
by external entities and store the current mode in the HCI device
structure.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 11 +++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_event.c        | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 79629ff40e3..6d0c04a81fc 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -514,6 +514,17 @@ struct hci_cp_host_buffer_size {
 	__le16   sco_max_pkt;
 } __attribute__ ((packed));
 
+#define HCI_OP_READ_SSP_MODE		0x0c55
+struct hci_rp_read_ssp_mode {
+	__u8     status;
+	__u8     mode;
+} __attribute__ ((packed));
+
+#define HCI_OP_WRITE_SSP_MODE		0x0c56
+struct hci_cp_write_ssp_mode {
+	__u8     mode;
+} __attribute__ ((packed));
+
 #define HCI_OP_READ_LOCAL_VERSION	0x1001
 struct hci_rp_read_local_version {
 	__u8     status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6424d63e339..b85754e29a7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -75,6 +75,7 @@ struct hci_dev {
 	__u8		dev_class[3];
 	__u8		features[8];
 	__u8		commands[64];
+	__u8		ssp_mode;
 	__u8		hci_ver;
 	__u16		hci_rev;
 	__u16		manufacturer;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 7c9ac01cd8f..6077a651aac 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -391,6 +391,35 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_req_complete(hdev, status);
 }
 
+static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_read_ssp_mode *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->ssp_mode = rp->mode;
+}
+
+static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+	void *sent;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_MODE);
+	if (!sent)
+		return;
+
+	hdev->ssp_mode = *((__u8 *) sent);
+}
+
 static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -1084,6 +1113,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
 		hci_cc_host_buffer_size(hdev, skb);
 		break;
 
+	case HCI_OP_READ_SSP_MODE:
+		hci_cc_read_ssp_mode(hdev, skb);
+		break;
+
+	case HCI_OP_WRITE_SSP_MODE:
+		hci_cc_write_ssp_mode(hdev, skb);
+		break;
+
 	case HCI_OP_READ_LOCAL_VERSION:
 		hci_cc_read_local_version(hdev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 41a96212b3b7b3cd59e8e8d33e6dabf0e21d9778 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:48 +0200
Subject: [Bluetooth] Track status of remote Simple Pairing mode

The Simple Pairing process can only be used if both sides have the
support enabled in the host stack. The current Bluetooth specification
has three ways to detect this support.

If an Extended Inquiry Result has been sent during inquiry then it
is safe to assume that Simple Pairing is enabled. It is not allowed
to enable Extended Inquiry without Simple Pairing. During the remote
name request phase a notification with the remote host supported
features will be sent to indicate Simple Pairing support. Also the
second page of the remote extended features can indicate support for
Simple Pairing.

For all three cases the value of remote Simple Pairing mode is stored
in the inquiry cache for later use.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  6 ++++++
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_conn.c         | 14 ++++++++-----
 net/bluetooth/hci_event.c        | 43 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6d0c04a81fc..5ac0a18db63 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -816,6 +816,12 @@ struct hci_ev_simple_pair_complete {
 	bdaddr_t bdaddr;
 } __attribute__ ((packed));
 
+#define HCI_EV_REMOTE_HOST_FEATURES	0x3d
+struct hci_ev_remote_host_features {
+	bdaddr_t bdaddr;
+	__u8     features[8];
+} __attribute__ ((packed));
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b85754e29a7..f73cc294570 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -40,6 +40,7 @@ struct inquiry_data {
 	__u8		dev_class[3];
 	__le16		clock_offset;
 	__s8		rssi;
+	__u8		ssp_mode;
 };
 
 struct inquiry_entry {
@@ -162,6 +163,7 @@ struct hci_conn {
 	__u8		 attempt;
 	__u8		 dev_class[3];
 	__u8             features[8];
+	__u8             ssp_mode;
 	__u16            interval;
 	__u16            pkt_type;
 	__u16            link_policy;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6175ce841e9..41351ba692e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -71,12 +71,16 @@ void hci_acl_connect(struct hci_conn *conn)
 	bacpy(&cp.bdaddr, &conn->dst);
 	cp.pscan_rep_mode = 0x02;
 
-	if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)) &&
-			inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
-		cp.pscan_rep_mode = ie->data.pscan_rep_mode;
-		cp.pscan_mode     = ie->data.pscan_mode;
-		cp.clock_offset   = ie->data.clock_offset | cpu_to_le16(0x8000);
+	if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) {
+		if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
+			cp.pscan_rep_mode = ie->data.pscan_rep_mode;
+			cp.pscan_mode     = ie->data.pscan_mode;
+			cp.clock_offset   = ie->data.clock_offset |
+							cpu_to_le16(0x8000);
+		}
+
 		memcpy(conn->dev_class, ie->data.dev_class, 3);
+		conn->ssp_mode = ie->data.ssp_mode;
 	}
 
 	cp.pkt_type = cpu_to_le16(conn->pkt_type);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6077a651aac..c8fda7dc298 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -736,6 +736,7 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *
 		memcpy(data.dev_class, info->dev_class, 3);
 		data.clock_offset	= info->clock_offset;
 		data.rssi		= 0x00;
+		data.ssp_mode		= 0x00;
 		info++;
 		hci_inquiry_cache_update(hdev, &data);
 	}
@@ -1390,6 +1391,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
 			memcpy(data.dev_class, info->dev_class, 3);
 			data.clock_offset	= info->clock_offset;
 			data.rssi		= info->rssi;
+			data.ssp_mode		= 0x00;
 			info++;
 			hci_inquiry_cache_update(hdev, &data);
 		}
@@ -1404,6 +1406,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
 			memcpy(data.dev_class, info->dev_class, 3);
 			data.clock_offset	= info->clock_offset;
 			data.rssi		= info->rssi;
+			data.ssp_mode		= 0x00;
 			info++;
 			hci_inquiry_cache_update(hdev, &data);
 		}
@@ -1414,7 +1417,27 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
 
 static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_remote_ext_features *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
+
+	if (ev->status || ev->page != 0x01)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+	if (conn) {
+		struct inquiry_entry *ie;
+
+		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
+			ie->data.ssp_mode = (ev->features[0] & 0x01);
+
+		conn->ssp_mode = (ev->features[0] & 0x01);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1494,6 +1517,7 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
 		memcpy(data.dev_class, info->dev_class, 3);
 		data.clock_offset       = info->clock_offset;
 		data.rssi               = info->rssi;
+		data.ssp_mode		= 0x01;
 		info++;
 		hci_inquiry_cache_update(hdev, &data);
 	}
@@ -1533,6 +1557,21 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_
 	hci_dev_unlock(hdev);
 }
 
+static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_remote_host_features *ev = (void *) skb->data;
+	struct inquiry_entry *ie;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr)))
+		ie->data.ssp_mode = (ev->features[0] & 0x01);
+
+	hci_dev_unlock(hdev);
+}
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (void *) skb->data;
@@ -1665,6 +1704,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_simple_pair_complete_evt(hdev, skb);
 		break;
 
+	case HCI_EV_REMOTE_HOST_FEATURES:
+		hci_remote_host_features_evt(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s event 0x%x", hdev->name, event);
 		break;
-- 
cgit v1.2.3-70-g09d2


From a8bd28baf21b9ee6b8486666b771283e566c0d31 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:49 +0200
Subject: [Bluetooth] Export remote Simple Pairing mode via sysfs

Since the remote Simple Pairing mode is stored together with the
inquiry cache, it makes sense to show it together with the other
information.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 84360c117d4..a18871e0158 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -113,11 +113,13 @@ static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *a
 		struct inquiry_data *data = &e->data;
 		bdaddr_t bdaddr;
 		baswap(&bdaddr, &data->bdaddr);
-		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %u\n",
+		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
 				batostr(&bdaddr),
-				data->pscan_rep_mode, data->pscan_period_mode, data->pscan_mode,
-				data->dev_class[2], data->dev_class[1], data->dev_class[0],
-				__le16_to_cpu(data->clock_offset), data->rssi, e->timestamp);
+				data->pscan_rep_mode, data->pscan_period_mode,
+				data->pscan_mode, data->dev_class[2],
+				data->dev_class[1], data->dev_class[0],
+				__le16_to_cpu(data->clock_offset),
+				data->rssi, data->ssp_mode, e->timestamp);
 	}
 
 	hci_dev_unlock_bh(hdev);
@@ -249,15 +251,28 @@ static ssize_t show_conn_address(struct device *dev, struct device_attribute *at
 	return sprintf(buf, "%s\n", batostr(&bdaddr));
 }
 
+static ssize_t show_conn_features(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hci_conn *conn = dev_get_drvdata(dev);
+
+	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				conn->features[0], conn->features[1],
+				conn->features[2], conn->features[3],
+				conn->features[4], conn->features[5],
+				conn->features[6], conn->features[7]);
+}
+
 #define CONN_ATTR(_name,_mode,_show,_store) \
 struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
 static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL);
 static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL);
+static CONN_ATTR(features, S_IRUGO, show_conn_features, NULL);
 
 static struct device_attribute *conn_attrs[] = {
 	&conn_attr_type,
 	&conn_attr_address,
+	&conn_attr_features,
 	NULL
 };
 
-- 
cgit v1.2.3-70-g09d2


From 769be974d0c7b4fe1a52f9cdaad22259b60953f7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:49 +0200
Subject: [Bluetooth] Use ACL config stage to retrieve remote features

The Bluetooth technology introduces new features on a regular basis
and for some of them it is important that the hardware on both sides
support them. For features like Simple Pairing it is important that
the host stacks on both sides have switched this feature on. To make
valid decisions, a config stage during ACL link establishment has been
introduced that retrieves remote features and if needed also the remote
extended features (known as remote host features) before signalling
this link as connected.

This change introduces full reference counting of incoming and outgoing
ACL links and the Bluetooth core will disconnect both if no owner of it
is present. To better handle interoperability during the pairing phase
the disconnect timeout for incoming connections has been increased to
10 seconds. This is five times more than for outgoing connections.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |   2 +
 include/net/bluetooth/hci_core.h |   3 +-
 net/bluetooth/hci_conn.c         |   2 +
 net/bluetooth/hci_core.c         |   7 ++-
 net/bluetooth/hci_event.c        | 127 +++++++++++++++++++++++++++++++++------
 5 files changed, 118 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 5ac0a18db63..55576e84882 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -180,6 +180,8 @@ enum {
 
 #define LMP_SNIFF_SUBR	0x02
 
+#define LMP_SIMPLE_PAIR	0x08
+
 /* Connection modes */
 #define HCI_CM_ACTIVE	0x0000
 #define HCI_CM_HOLD	0x0001
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f73cc294570..28fbd0caa53 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -348,7 +348,7 @@ static inline void hci_conn_put(struct hci_conn *conn)
 			if (conn->state == BT_CONNECTED) {
 				timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT);
 				if (!conn->out)
-					timeo *= 2;
+					timeo *= 5;
 			} else
 				timeo = msecs_to_jiffies(10);
 		} else
@@ -463,6 +463,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_sniff_capable(dev)     ((dev)->features[0] & LMP_SNIFF)
 #define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR)
 #define lmp_esco_capable(dev)      ((dev)->features[3] & LMP_ESCO)
+#define lmp_ssp_capable(dev)       ((dev)->features[6] & LMP_SIMPLE_PAIR)
 
 /* ----- HCI protocols ----- */
 struct hci_proto {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 41351ba692e..6f22533e765 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -170,11 +170,13 @@ static void hci_conn_timeout(unsigned long arg)
 
 	switch (conn->state) {
 	case BT_CONNECT:
+	case BT_CONNECT2:
 		if (conn->type == ACL_LINK)
 			hci_acl_connect_cancel(conn);
 		else
 			hci_acl_disconn(conn, 0x13);
 		break;
+	case BT_CONFIG:
 	case BT_CONNECTED:
 		hci_acl_disconn(conn, 0x13);
 		break;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 69b2c1aac08..f5b21cb9369 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1283,9 +1283,12 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
 		struct hci_conn *c;
 		c = list_entry(p, struct hci_conn, list);
 
-		if (c->type != type || c->state != BT_CONNECTED
-				|| skb_queue_empty(&c->data_q))
+		if (c->type != type || skb_queue_empty(&c->data_q))
 			continue;
+
+		if (c->state != BT_CONNECTED && c->state != BT_CONFIG)
+			continue;
+
 		num++;
 
 		if (c->sent < min) {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index c8fda7dc298..e3e360c3c53 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -624,6 +624,62 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
 	BT_DBG("%s status 0x%x", hdev->name, status);
 }
 
+static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_cp_read_remote_features *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	if (!status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_FEATURES);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+	if (conn) {
+		if (conn->state == BT_CONFIG) {
+			conn->state = BT_CONNECTED;
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_put(conn);
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_cp_read_remote_ext_features *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	if (!status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+	if (conn) {
+		if (conn->state == BT_CONFIG) {
+			conn->state = BT_CONNECTED;
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_put(conn);
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
 {
 	struct hci_cp_setup_sync_conn *cp;
@@ -759,7 +815,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 
 	if (!ev->status) {
 		conn->handle = __le16_to_cpu(ev->handle);
-		conn->state  = BT_CONNECTED;
+
+		if (conn->type == ACL_LINK) {
+			conn->state = BT_CONFIG;
+			hci_conn_hold(conn);
+		} else
+			conn->state = BT_CONNECTED;
 
 		if (test_bit(HCI_AUTH, &hdev->flags))
 			conn->link_mode |= HCI_LM_AUTH;
@@ -771,7 +832,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn->type == ACL_LINK) {
 			struct hci_cp_read_remote_features cp;
 			cp.handle = ev->handle;
-			hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp);
+			hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
+							sizeof(cp), &cp);
 		}
 
 		/* Set packet type for incoming connection */
@@ -781,10 +843,6 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			cp.pkt_type = cpu_to_le16(conn->pkt_type);
 			hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE,
 							sizeof(cp), &cp);
-		} else {
-			/* Update disconnect timer */
-			hci_conn_hold(conn);
-			hci_conn_put(conn);
 		}
 	} else
 		conn->state = BT_CLOSED;
@@ -804,9 +862,10 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		}
 	}
 
-	hci_proto_connect_cfm(conn, ev->status);
-	if (ev->status)
+	if (ev->status) {
+		hci_proto_connect_cfm(conn, ev->status);
 		hci_conn_del(conn);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -1006,14 +1065,29 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
-	if (ev->status)
-		return;
-
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
-	if (conn)
-		memcpy(conn->features, ev->features, 8);
+	if (conn) {
+		if (!ev->status)
+			memcpy(conn->features, ev->features, 8);
+
+		if (conn->state == BT_CONFIG) {
+			if (!ev->status && lmp_ssp_capable(hdev) &&
+						lmp_ssp_capable(conn)) {
+				struct hci_cp_read_remote_ext_features cp;
+				cp.handle = ev->handle;
+				cp.page = 0x01;
+				hci_send_cmd(hdev,
+					HCI_OP_READ_REMOTE_EXT_FEATURES,
+							sizeof(cp), &cp);
+			} else {
+				conn->state = BT_CONNECTED;
+				hci_proto_connect_cfm(conn, ev->status);
+				hci_conn_put(conn);
+			}
+		}
+	}
 
 	hci_dev_unlock(hdev);
 }
@@ -1180,6 +1254,14 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cs_remote_name_req(hdev, ev->status);
 		break;
 
+	case HCI_OP_READ_REMOTE_FEATURES:
+		hci_cs_read_remote_features(hdev, ev->status);
+		break;
+
+	case HCI_OP_READ_REMOTE_EXT_FEATURES:
+		hci_cs_read_remote_ext_features(hdev, ev->status);
+		break;
+
 	case HCI_OP_SETUP_SYNC_CONN:
 		hci_cs_setup_sync_conn(hdev, ev->status);
 		break;
@@ -1422,19 +1504,24 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 
 	BT_DBG("%s", hdev->name);
 
-	if (ev->status || ev->page != 0x01)
-		return;
-
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
-		struct inquiry_entry *ie;
+		if (!ev->status && ev->page == 0x01) {
+			struct inquiry_entry *ie;
 
-		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
-			ie->data.ssp_mode = (ev->features[0] & 0x01);
+			if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
+				ie->data.ssp_mode = (ev->features[0] & 0x01);
 
-		conn->ssp_mode = (ev->features[0] & 0x01);
+			conn->ssp_mode = (ev->features[0] & 0x01);
+		}
+
+		if (conn->state == BT_CONFIG) {
+			conn->state = BT_CONNECTED;
+			hci_proto_connect_cfm(conn, ev->status);
+			hci_conn_put(conn);
+		}
 	}
 
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3-70-g09d2


From f8558555f31e177e2644f3c8116801c3e5c29974 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:49 +0200
Subject: [Bluetooth] Initiate authentication during connection establishment

With Bluetooth 2.1 and Simple Pairing the requirement is that any new
connection needs to be authenticated and that encryption has been
switched on before allowing L2CAP to use it. So make sure that all
the requirements are fulfilled and otherwise drop the connection with
a minimal disconnect timeout of 10 milliseconds.

This change only affects Bluetooth 2.1 devices and Simple Pairing
needs to be enabled locally and in the remote host stack. The previous
changes made sure that these information are discovered before any
kind of authentication and encryption is triggered.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 114 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 103 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e3e360c3c53..64668e2656a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -619,6 +619,60 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_cp_auth_requested *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	if (!status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_AUTH_REQUESTED);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+	if (conn) {
+		if (conn->state == BT_CONFIG) {
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_put(conn);
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_cp_set_conn_encrypt *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	if (!status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_SET_CONN_ENCRYPT);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+	if (conn) {
+		if (conn->state == BT_CONFIG) {
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_put(conn);
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%x", hdev->name, status);
@@ -643,7 +697,6 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			conn->state = BT_CONNECTED;
 			hci_proto_connect_cfm(conn, status);
 			hci_conn_put(conn);
 		}
@@ -671,7 +724,6 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			conn->state = BT_CONNECTED;
 			hci_proto_connect_cfm(conn, status);
 			hci_conn_put(conn);
 		}
@@ -982,15 +1034,29 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 
 		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
 
-		hci_auth_cfm(conn, ev->status);
+		if (conn->state == BT_CONFIG) {
+			if (!ev->status && hdev->ssp_mode > 0 &&
+							conn->ssp_mode > 0) {
+				struct hci_cp_set_conn_encrypt cp;
+				cp.handle  = ev->handle;
+				cp.encrypt = 0x01;
+				hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT,
+							sizeof(cp), &cp);
+			} else {
+				conn->state = BT_CONNECTED;
+				hci_proto_connect_cfm(conn, ev->status);
+				hci_conn_put(conn);
+			}
+		} else
+			hci_auth_cfm(conn, ev->status);
 
 		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
 			if (!ev->status) {
 				struct hci_cp_set_conn_encrypt cp;
-				cp.handle  = cpu_to_le16(conn->handle);
-				cp.encrypt = 1;
-				hci_send_cmd(conn->hdev,
-					HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp);
+				cp.handle  = ev->handle;
+				cp.encrypt = 0x01;
+				hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT,
+							sizeof(cp), &cp);
 			} else {
 				clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
 				hci_encrypt_cfm(conn, ev->status, 0x00);
@@ -1030,7 +1096,14 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *
 
 		clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
 
-		hci_encrypt_cfm(conn, ev->status, ev->encrypt);
+		if (conn->state == BT_CONFIG) {
+			if (!ev->status)
+				conn->state = BT_CONNECTED;
+
+			hci_proto_connect_cfm(conn, ev->status);
+			hci_conn_put(conn);
+		} else
+			hci_encrypt_cfm(conn, ev->status, ev->encrypt);
 	}
 
 	hci_dev_unlock(hdev);
@@ -1250,6 +1323,14 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cs_add_sco(hdev, ev->status);
 		break;
 
+	case HCI_OP_AUTH_REQUESTED:
+		hci_cs_auth_requested(hdev, ev->status);
+		break;
+
+	case HCI_OP_SET_CONN_ENCRYPT:
+		hci_cs_set_conn_encrypt(hdev, ev->status);
+		break;
+
 	case HCI_OP_REMOTE_NAME_REQ:
 		hci_cs_remote_name_req(hdev, ev->status);
 		break;
@@ -1518,9 +1599,20 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 		}
 
 		if (conn->state == BT_CONFIG) {
-			conn->state = BT_CONNECTED;
-			hci_proto_connect_cfm(conn, ev->status);
-			hci_conn_put(conn);
+			if (!ev->status && hdev->ssp_mode > 0 &&
+							conn->ssp_mode > 0) {
+				if (conn->out) {
+					struct hci_cp_auth_requested cp;
+					cp.handle = ev->handle;
+					hci_send_cmd(hdev,
+						HCI_OP_AUTH_REQUESTED,
+							sizeof(cp), &cp);
+				}
+			} else {
+				conn->state = BT_CONNECTED;
+				hci_proto_connect_cfm(conn, ev->status);
+				hci_conn_put(conn);
+			}
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 40be492fe4fab829951681860c2bb26fa1d5fe4a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:50 +0200
Subject: [Bluetooth] Export details about authentication requirements

With the Simple Pairing support, the authentication requirements are
an explicit setting during the bonding process. Track and enforce the
requirements and allow higher layers like L2CAP and RFCOMM to increase
them if needed.

This patch introduces a new IOCTL that allows to query the current
authentication requirements. It is also possible to detect Simple
Pairing support in the kernel this way.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 fs/compat_ioctl.c                |  1 +
 include/net/bluetooth/hci.h      | 18 ++++++++++++++----
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_conn.c         | 38 ++++++++++++++++++++++++++++++++++----
 net/bluetooth/hci_sock.c         | 18 +++++-------------
 net/bluetooth/l2cap.c            | 14 ++++++++++----
 net/bluetooth/rfcomm/core.c      |  3 ++-
 7 files changed, 68 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 97dba0d9234..d63d430d3b2 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2399,6 +2399,7 @@ COMPATIBLE_IOCTL(HCIGETDEVLIST)
 COMPATIBLE_IOCTL(HCIGETDEVINFO)
 COMPATIBLE_IOCTL(HCIGETCONNLIST)
 COMPATIBLE_IOCTL(HCIGETCONNINFO)
+COMPATIBLE_IOCTL(HCIGETAUTHINFO)
 COMPATIBLE_IOCTL(HCISETRAW)
 COMPATIBLE_IOCTL(HCISETSCAN)
 COMPATIBLE_IOCTL(HCISETAUTH)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 55576e84882..3cc29491931 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -72,8 +72,6 @@ enum {
 	HCI_INQUIRY,
 
 	HCI_RAW,
-
-	HCI_SECMGR
 };
 
 /* HCI ioctl defines */
@@ -86,6 +84,7 @@ enum {
 #define HCIGETDEVINFO	_IOR('H', 211, int)
 #define HCIGETCONNLIST	_IOR('H', 212, int)
 #define HCIGETCONNINFO	_IOR('H', 213, int)
+#define HCIGETAUTHINFO	_IOR('H', 215, int)
 
 #define HCISETRAW	_IOW('H', 220, int)
 #define HCISETSCAN	_IOW('H', 221, int)
@@ -97,8 +96,6 @@ enum {
 #define HCISETACLMTU	_IOW('H', 227, int)
 #define HCISETSCOMTU	_IOW('H', 228, int)
 
-#define HCISETSECMGR	_IOW('H', 230, int)
-
 #define HCIINQUIRY	_IOR('H', 240, int)
 
 /* HCI timeouts */
@@ -203,6 +200,14 @@ enum {
 #define HCI_LM_RELIABLE	0x0010
 #define HCI_LM_SECURE	0x0020
 
+/* Authentication types */
+#define HCI_AT_NO_BONDING		0x00
+#define HCI_AT_NO_BONDING_MITM		0x01
+#define HCI_AT_DEDICATED_BONDING	0x02
+#define HCI_AT_DEDICATED_BONDING_MITM	0x03
+#define HCI_AT_GENERAL_BONDING		0x04
+#define HCI_AT_GENERAL_BONDING_MITM	0x05
+
 /* -----  HCI Commands ---- */
 #define HCI_OP_INQUIRY			0x0401
 struct hci_cp_inquiry {
@@ -1001,6 +1006,11 @@ struct hci_conn_info_req {
 	struct   hci_conn_info conn_info[0];
 };
 
+struct hci_auth_info_req {
+	bdaddr_t bdaddr;
+	__u8     type;
+};
+
 struct hci_inquiry_req {
 	__u16 dev_id;
 	__u16 flags;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 28fbd0caa53..cbf75109468 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -168,6 +168,7 @@ struct hci_conn {
 	__u16            pkt_type;
 	__u16            link_policy;
 	__u32		 link_mode;
+	__u8             auth_type;
 	__u8             power_save;
 	unsigned long	 pend;
 
@@ -422,6 +423,7 @@ int hci_get_dev_list(void __user *arg);
 int hci_get_dev_info(void __user *arg);
 int hci_get_conn_list(void __user *arg);
 int hci_get_conn_info(struct hci_dev *hdev, void __user *arg);
+int hci_get_auth_info(struct hci_dev *hdev, void __user *arg);
 int hci_inquiry(void __user *arg);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6f22533e765..0d4b8aeb8e0 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -379,13 +379,21 @@ int hci_conn_auth(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
+	if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) {
+		if (!(conn->auth_type & 0x01)) {
+			conn->auth_type = HCI_AT_GENERAL_BONDING_MITM;
+			conn->link_mode &= ~HCI_LM_AUTH;
+		}
+	}
+
 	if (conn->link_mode & HCI_LM_AUTH)
 		return 1;
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_auth_requested cp;
 		cp.handle = cpu_to_le16(conn->handle);
-		hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
+		hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
+							sizeof(cp), &cp);
 	}
 	return 0;
 }
@@ -397,7 +405,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
 	BT_DBG("conn %p", conn);
 
 	if (conn->link_mode & HCI_LM_ENCRYPT)
-		return 1;
+		return hci_conn_auth(conn);
 
 	if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
 		return 0;
@@ -406,7 +414,8 @@ int hci_conn_encrypt(struct hci_conn *conn)
 		struct hci_cp_set_conn_encrypt cp;
 		cp.handle  = cpu_to_le16(conn->handle);
 		cp.encrypt = 1;
-		hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp);
+		hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT,
+							sizeof(cp), &cp);
 	}
 	return 0;
 }
@@ -420,7 +429,8 @@ int hci_conn_change_link_key(struct hci_conn *conn)
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_change_conn_link_key cp;
 		cp.handle = cpu_to_le16(conn->handle);
-		hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
+		hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY,
+							sizeof(cp), &cp);
 	}
 	return 0;
 }
@@ -624,3 +634,23 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)
 
 	return copy_to_user(ptr, &ci, sizeof(ci)) ? -EFAULT : 0;
 }
+
+int hci_get_auth_info(struct hci_dev *hdev, void __user *arg)
+{
+	struct hci_auth_info_req req;
+	struct hci_conn *conn;
+
+	if (copy_from_user(&req, arg, sizeof(req)))
+		return -EFAULT;
+
+	hci_dev_lock_bh(hdev);
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &req.bdaddr);
+	if (conn)
+		req.type = conn->auth_type;
+	hci_dev_unlock_bh(hdev);
+
+	if (!conn)
+		return -ENOENT;
+
+	return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0;
+}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 747fabd735d..d62579b6795 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -193,19 +193,11 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 
 		return 0;
 
-	case HCISETSECMGR:
-		if (!capable(CAP_NET_ADMIN))
-			return -EACCES;
-
-		if (arg)
-			set_bit(HCI_SECMGR, &hdev->flags);
-		else
-			clear_bit(HCI_SECMGR, &hdev->flags);
-
-		return 0;
-
 	case HCIGETCONNINFO:
-		return hci_get_conn_info(hdev, (void __user *)arg);
+		return hci_get_conn_info(hdev, (void __user *) arg);
+
+	case HCIGETAUTHINFO:
+		return hci_get_auth_info(hdev, (void __user *) arg);
 
 	default:
 		if (hdev->ioctl)
@@ -217,7 +209,7 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
-	void __user *argp = (void __user *)arg;
+	void __user *argp = (void __user *) arg;
 	int err;
 
 	BT_DBG("cmd %x arg %lx", cmd, arg);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 252264062f5..30ad59b717d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2150,7 +2150,7 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason)
 static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct l2cap_chan_list *l;
-	struct l2cap_conn *conn = conn = hcon->l2cap_data;
+	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct l2cap_conn_rsp rsp;
 	struct sock *sk;
 	int result;
@@ -2165,11 +2165,17 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		struct l2cap_pinfo *pi = l2cap_pi(sk);
+
 		bh_lock_sock(sk);
 
-		if (sk->sk_state != BT_CONNECT2 ||
-				(l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) ||
-				(l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) {
+		if (sk->sk_state != BT_CONNECT2) {
+			bh_unlock_sock(sk);
+			continue;
+		}
+
+		if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) &&
+					!(hcon->link_mode & HCI_LM_ENCRYPT)) {
 			bh_unlock_sock(sk);
 			continue;
 		}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e7a6a03cea3..e56bcfc35a4 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1969,7 +1969,8 @@ static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status)
 	list_for_each_safe(p, n, &s->dlcs) {
 		d = list_entry(p, struct rfcomm_dlc, list);
 
-		if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE))
+		if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) &&
+				!(conn->link_mode & HCI_LM_ENCRYPT) && !status)
 			continue;
 
 		if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
-- 
cgit v1.2.3-70-g09d2


From 3241ad820dbb172021e0268b5611031991431626 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:50 +0200
Subject: [Bluetooth] Add timestamp support to L2CAP, RFCOMM and SCO

Enable the common timestamp functionality that the network subsystem
provides for L2CAP, RFCOMM and SCO sockets. It is possible to either
use SO_TIMESTAMP or the IOCTLs to retrieve the timestamp of the
current packet.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  1 +
 net/bluetooth/af_bluetooth.c      | 28 ++++++++++++++++++++++++++++
 net/bluetooth/l2cap.c             |  2 +-
 net/bluetooth/rfcomm/sock.c       | 15 +++++++++++----
 net/bluetooth/sco.c               |  2 +-
 5 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 750648df13f..6f8418bf424 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -121,6 +121,7 @@ void bt_sock_link(struct bt_sock_list *l, struct sock *s);
 void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
 int  bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags);
 uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait);
+int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
 
 void bt_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index d366423c839..88afe25003d 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -36,6 +36,7 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <net/sock.h>
+#include <asm/ioctls.h>
 
 #if defined(CONFIG_KMOD)
 #include <linux/kmod.h>
@@ -266,6 +267,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err == 0)
+		sock_recv_timestamp(msg, sk, skb);
 
 	skb_free_datagram(sk, skb);
 
@@ -329,6 +332,31 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
 }
 EXPORT_SYMBOL(bt_sock_poll);
 
+int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg);
+
+	switch (cmd) {
+	case SIOCGSTAMP:
+		err = sock_get_timestamp(sk, (struct timeval __user *) arg);
+		break;
+
+	case SIOCGSTAMPNS:
+		err = sock_get_timestampns(sk, (struct timespec __user *) arg);
+		break;
+
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(bt_sock_ioctl);
+
 int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 {
 	DECLARE_WAITQUEUE(wait, current);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 30ad59b717d..4fcf24af759 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2388,9 +2388,9 @@ static const struct proto_ops l2cap_sock_ops = {
 	.sendmsg	= l2cap_sock_sendmsg,
 	.recvmsg	= bt_sock_recvmsg,
 	.poll		= bt_sock_poll,
+	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
-	.ioctl		= sock_no_ioctl,
 	.shutdown	= l2cap_sock_shutdown,
 	.setsockopt	= l2cap_sock_setsockopt,
 	.getsockopt	= l2cap_sock_getsockopt
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index cacb1ab51f9..c3ed076481d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -690,6 +690,8 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied += chunk;
 		size   -= chunk;
 
+		sock_recv_timestamp(msg, sk, skb);
+
 		if (!(flags & MSG_PEEK)) {
 			atomic_sub(chunk, &sk->sk_rmem_alloc);
 
@@ -795,15 +797,20 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 	struct sock *sk = sock->sk;
 	int err;
 
-	lock_sock(sk);
+	BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg);
 
+	err = bt_sock_ioctl(sock, cmd, arg);
+
+	if (err == -ENOIOCTLCMD) {
 #ifdef CONFIG_BT_RFCOMM_TTY
-	err = rfcomm_dev_ioctl(sk, cmd, (void __user *)arg);
+		lock_sock(sk);
+		err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg);
+		release_sock(sk);
 #else
-	err = -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
 #endif
+	}
 
-	release_sock(sk);
 	return err;
 }
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index b0d487e2db2..1ad226c9788 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -921,7 +921,7 @@ static const struct proto_ops sco_sock_ops = {
 	.sendmsg	= sco_sock_sendmsg,
 	.recvmsg	= bt_sock_recvmsg,
 	.poll		= bt_sock_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.shutdown	= sock_no_shutdown,
-- 
cgit v1.2.3-70-g09d2


From 43cbeee9f9b26300275e4e2d55ed1607f8c5f760 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:51 +0200
Subject: [Bluetooth] Add support for TIOCOUTQ and TIOCINQ ioctls

Almost every protocol family supports the TIOCOUTQ and TIOCINQ ioctls
and even Bluetooth could make use of them. When implementing audio
streaming and integration with GStreamer or PulseAudio they will allow
a better timing and synchronization.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/af_bluetooth.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 88afe25003d..77fe9b868fe 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -335,11 +335,34 @@ EXPORT_SYMBOL(bt_sock_poll);
 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	long amount;
 	int err;
 
 	BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg);
 
 	switch (cmd) {
+	case TIOCOUTQ:
+		if (sk->sk_state == BT_LISTEN)
+			return -EINVAL;
+
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		err = put_user(amount, (int __user *) arg);
+		break;
+
+	case TIOCINQ:
+		if (sk->sk_state == BT_LISTEN)
+			return -EINVAL;
+
+		lock_sock(sk);
+		skb = skb_peek(&sk->sk_receive_queue);
+		amount = skb ? skb->len : 0;
+		release_sock(sk);
+		err = put_user(amount, (int __user *) arg);
+		break;
+
 	case SIOCGSTAMP:
 		err = sock_get_timestamp(sk, (struct timeval __user *) arg);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 7d0db0a373195385a2e0b19d1f5e4b186fdcffac Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:51 +0200
Subject: [Bluetooth] Use a more unique bus name for connections

When attaching Bluetooth low-level connections to the bus, the bus name
is constructed from the remote address since at that time the connection
handle is not assigned yet. This has worked so far, but also caused a
lot of troubles. It is better to postpone the creation of the sysfs
entry to the time when the connection actually has been established
and then use its connection handle as unique identifier.

This also fixes the case where two different adapters try to connect
to the same remote device.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  | 8 +++++---
 net/bluetooth/hci_event.c | 7 +++++++
 net/bluetooth/hci_sysfs.c | 8 ++------
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 0d4b8aeb8e0..ca8d05245ca 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -245,8 +245,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
 
-	hci_conn_add_sysfs(conn);
-
 	tasklet_enable(&hdev->tx_task);
 
 	return conn;
@@ -278,12 +276,14 @@ int hci_conn_del(struct hci_conn *conn)
 	}
 
 	tasklet_disable(&hdev->tx_task);
+
 	hci_conn_hash_del(hdev, conn);
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
+
 	tasklet_enable(&hdev->tx_task);
+
 	skb_queue_purge(&conn->data_q);
-	hci_conn_del_sysfs(conn);
 
 	return 0;
 }
@@ -532,6 +532,8 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
 
 		c->state = BT_CLOSED;
 
+		hci_conn_del_sysfs(c);
+
 		hci_proto_disconn_ind(c, 0x16);
 		hci_conn_del(c);
 	}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 64668e2656a..0e3db289f4b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -874,6 +874,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		} else
 			conn->state = BT_CONNECTED;
 
+		hci_conn_add_sysfs(conn);
+
 		if (test_bit(HCI_AUTH, &hdev->flags))
 			conn->link_mode |= HCI_LM_AUTH;
 
@@ -1011,6 +1013,9 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		conn->state = BT_CLOSED;
+
+		hci_conn_del_sysfs(conn);
+
 		hci_proto_disconn_ind(conn, ev->reason);
 		hci_conn_del(conn);
 	}
@@ -1643,6 +1648,8 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 	if (!ev->status) {
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
+
+		hci_conn_add_sysfs(conn);
 	} else
 		conn->state = BT_CLOSED;
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index a18871e0158..844ca5f1b2d 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -311,7 +311,6 @@ static void add_conn(struct work_struct *work)
 void hci_conn_add_sysfs(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
-	bdaddr_t *ba = &conn->dst;
 
 	BT_DBG("conn %p", conn);
 
@@ -320,11 +319,8 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 
 	conn->dev.release = bt_release;
 
-	snprintf(conn->dev.bus_id, BUS_ID_SIZE,
-			"%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X",
-			conn->type == ACL_LINK ? "acl" : "sco",
-			ba->b[5], ba->b[4], ba->b[3],
-			ba->b[2], ba->b[1], ba->b[0]);
+	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d",
+					hdev->name, conn->handle);
 
 	dev_set_drvdata(&conn->dev, conn);
 
-- 
cgit v1.2.3-70-g09d2


From 78c6a1744fafc3a396f85c1970eb78839b55af74 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:51 +0200
Subject: [Bluetooth] Update Bluetooth core version number

With all the Bluetooth 2.1 changes and the support for Simple Pairing,
it is important to update the Bluetooth core version number.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/af_bluetooth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 77fe9b868fe..4e59df5f8e0 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -49,7 +49,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.11"
+#define VERSION "2.12"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
-- 
cgit v1.2.3-70-g09d2


From ca37bdd53b5af06d00e792f2415b93206aa2a541 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:52 +0200
Subject: [Bluetooth] Use non-canonical TTY by default for RFCOMM

While the RFCOMM TTY emulation can act like a real serial port, in
reality it is not used like this. So to not mess up stupid applications,
use the non-canonical mode by default.

Signed-off-by: Denis Kenzior <denis.kenzior@trolltech.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/tty.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index c9191871c1e..248802796e1 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1123,6 +1123,7 @@ int rfcomm_init_ttys(void)
 	rfcomm_tty_driver->flags	= TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	rfcomm_tty_driver->init_termios	= tty_std_termios;
 	rfcomm_tty_driver->init_termios.c_cflag	= B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
 	tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
 
 	if (tty_register_driver(rfcomm_tty_driver)) {
-- 
cgit v1.2.3-70-g09d2


From 8b6b3da765af9600b5edd8e3e84a20523e975884 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:52 +0200
Subject: [Bluetooth] Store remote modem status for RFCOMM TTY

When switching a RFCOMM socket to a TTY, the remote modem status might
be needed later. Currently it is lost since the original configuration
is done via the socket interface. So store the modem status and reply
it when the socket has been converted to a TTY.

Signed-off-by: Denis Kenzior <denis.kenzior@trolltech.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/rfcomm.h | 1 +
 net/bluetooth/rfcomm/core.c    | 4 ++++
 net/bluetooth/rfcomm/tty.c     | 3 +++
 3 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 8c54ff37ad4..4dc8d92a463 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -180,6 +180,7 @@ struct rfcomm_dlc {
 	u8            addr;
 	u8            priority;
 	u8            v24_sig;
+	u8            remote_v24_sig;
 	u8            mscex;
 	u8            out;
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e56bcfc35a4..fcd2cafe70c 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1463,8 +1463,12 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
 			clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
 
 		rfcomm_dlc_lock(d);
+
+		d->remote_v24_sig = msc->v24_sig;
+
 		if (d->modem_status)
 			d->modem_status(d, msc->v24_sig);
+
 		rfcomm_dlc_unlock(d);
 
 		rfcomm_send_msc(s, 0, dlci, msc->v24_sig);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 248802796e1..8fcca08cef8 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -271,6 +271,9 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
 
 	dlc->owner = dev;
 	dev->dlc   = dlc;
+
+	rfcomm_dev_modem_status(dlc, dlc->remote_v24_sig);
+
 	rfcomm_dlc_unlock(dlc);
 
 	/* It's safe to call __module_get() here because socket already
-- 
cgit v1.2.3-70-g09d2


From a0c22f226502be6eab37a1d9bf6fb0fadf551376 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:52 +0200
Subject: [Bluetooth] Move pending packets from RFCOMM socket to TTY

When an incoming RFCOMM socket connection gets converted into a TTY,
it can happen that packets are lost. This mainly happens with the
Handsfree profile where the remote side starts sending data right
away. The problem is that these packets are in the socket receive
queue. So when creating the TTY make sure to copy all pending packets
from the socket receive queue to a private queue inside the TTY.

To make this actually work, the flow control on the newly created TTY
will be disabled and only enabled again when the TTY is opened by an
application. And right before that, the pending packets will be put
into the TTY flip buffer.

Signed-off-by: Denis Kenzior <denis.kenzior@trolltech.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/core.c |  2 +-
 net/bluetooth/rfcomm/tty.c  | 55 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fcd2cafe70c..b6b3d9b4066 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -53,7 +53,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "1.9"
+#define VERSION "1.10"
 
 static int disable_cfc = 0;
 static int channel_mtu = -1;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 8fcca08cef8..ec22ebe0c2c 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -77,6 +77,8 @@ struct rfcomm_dev {
 	struct device		*tty_dev;
 
 	atomic_t 		wmem_alloc;
+
+	struct sk_buff_head	pending;
 };
 
 static LIST_HEAD(rfcomm_dev_list);
@@ -264,7 +266,25 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
 	init_waitqueue_head(&dev->wait);
 	tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev);
 
+	skb_queue_head_init(&dev->pending);
+
 	rfcomm_dlc_lock(dlc);
+
+	if (req->flags & (1 << RFCOMM_REUSE_DLC)) {
+		struct sock *sk = dlc->owner;
+		struct sk_buff *skb;
+
+		BUG_ON(!sk);
+
+		rfcomm_dlc_throttle(dlc);
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			skb_queue_tail(&dev->pending, skb);
+			atomic_sub(skb->len, &sk->sk_rmem_alloc);
+		}
+	}
+
 	dlc->data_ready   = rfcomm_dev_data_ready;
 	dlc->state_change = rfcomm_dev_state_change;
 	dlc->modem_status = rfcomm_dev_modem_status;
@@ -542,11 +562,16 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 	struct rfcomm_dev *dev = dlc->owner;
 	struct tty_struct *tty;
 
-	if (!dev || !(tty = dev->tty)) {
+	if (!dev) {
 		kfree_skb(skb);
 		return;
 	}
 
+	if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) {
+		skb_queue_tail(&dev->pending, skb);
+		return;
+	}
+
 	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
 
 	tty_insert_flip_string(tty, skb->data, skb->len);
@@ -630,6 +655,30 @@ static void rfcomm_tty_wakeup(unsigned long arg)
 #endif
 }
 
+static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
+{
+	struct tty_struct *tty = dev->tty;
+	struct sk_buff *skb;
+	int inserted = 0;
+
+	if (!tty)
+		return;
+
+	BT_DBG("dev %p tty %p", dev, tty);
+
+	rfcomm_dlc_lock(dev->dlc);
+
+	while ((skb = skb_dequeue(&dev->pending))) {
+		inserted += tty_insert_flip_string(tty, skb->data, skb->len);
+		kfree_skb(skb);
+	}
+
+	rfcomm_dlc_unlock(dev->dlc);
+
+	if (inserted > 0)
+		tty_flip_buffer_push(tty);
+}
+
 static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
 {
 	DECLARE_WAITQUEUE(wait, current);
@@ -694,6 +743,10 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
 	if (err == 0)
 		device_move(dev->tty_dev, rfcomm_get_device(dev));
 
+	rfcomm_tty_copy_pending(dev);
+
+	rfcomm_dlc_unthrottle(dev->dlc);
+
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ec8dab36e0738d3059980d144e34f16a26bbda7d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:53 +0200
Subject: [Bluetooth] Signal user-space for HIDP and BNEP socket errors

When using the HIDP or BNEP kernel support, the user-space needs to
know if the connection has been terminated for some reasons. Wake up
the application if that happens. Otherwise kernel and user-space are
no longer on the same page and weird behaviors can happen.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/bnep/core.c |  5 +++++
 net/bluetooth/hidp/core.c | 10 ++++++++++
 2 files changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index f85d94643aa..24e91eb7f64 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -507,6 +507,11 @@ static int bnep_session(void *arg)
 	/* Delete network device */
 	unregister_netdev(dev);
 
+	/* Wakeup user-space polling for socket errors */
+	s->sock->sk->sk_err = EUNATCH;
+
+	wake_up_interruptible(s->sock->sk->sk_sleep);
+
 	/* Release the socket */
 	fput(s->sock->file);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 519cdb920f9..96434d774c8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -581,6 +581,12 @@ static int hidp_session(void *arg)
 		hid_free_device(session->hid);
 	}
 
+	/* Wakeup user-space polling for socket errors */
+	session->intr_sock->sk->sk_err = EUNATCH;
+	session->ctrl_sock->sk->sk_err = EUNATCH;
+
+	hidp_schedule(session);
+
 	fput(session->intr_sock->file);
 
 	wait_event_timeout(*(ctrl_sk->sk_sleep),
@@ -879,6 +885,10 @@ int hidp_del_connection(struct hidp_conndel_req *req)
 			skb_queue_purge(&session->ctrl_transmit);
 			skb_queue_purge(&session->intr_transmit);
 
+			/* Wakeup user-space polling for socket errors */
+			session->intr_sock->sk->sk_err = EUNATCH;
+			session->ctrl_sock->sk->sk_err = EUNATCH;
+
 			/* Kill session thread */
 			atomic_inc(&session->terminate);
 			hidp_schedule(session);
-- 
cgit v1.2.3-70-g09d2


From 7cb127d5b0e7af7a0afd23785722ca3edab4ceff Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:53 +0200
Subject: [Bluetooth] Add option to disable eSCO connection creation

It has been reported that some eSCO capable headsets are not able to
connect properly. The real reason for this is unclear at the moment. So
for easier testing add a module parameter to disable eSCO connection
creation.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/sco.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 1ad226c9788..8cda4987486 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -53,7 +53,9 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "0.5"
+#define VERSION "0.6"
+
+static int disable_esco = 0;
 
 static const struct proto_ops sco_sock_ops;
 
@@ -193,7 +195,10 @@ static int sco_connect(struct sock *sk)
 
 	err = -ENOMEM;
 
-	type = lmp_esco_capable(hdev) ? ESCO_LINK : SCO_LINK;
+	if (lmp_esco_capable(hdev) && !disable_esco)
+		type = ESCO_LINK;
+	else
+		type = SCO_LINK;
 
 	hcon = hci_connect(hdev, type, dst);
 	if (!hcon)
@@ -994,6 +999,9 @@ static void __exit sco_exit(void)
 module_init(sco_init);
 module_exit(sco_exit);
 
+module_param(disable_esco, bool, 0644);
+MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation");
+
 MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.3-70-g09d2


From b1235d79611e78a07629b4cbe53291c9cffd1834 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 14 Jul 2008 20:13:54 +0200
Subject: [Bluetooth] Allow security for outgoing L2CAP connections

When requested the L2CAP layer will now enforce authentication and
encryption on outgoing connections. The usefulness of this feature
is kinda limited since it will not allow proper connection ownership
tracking until the authentication procedure has been finished. This
is a limitation of Bluetooth 2.0 and before and can only be fixed by
using Simple Pairing.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 172 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 108 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4fcf24af759..c1239852834 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -76,11 +76,21 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 static void l2cap_sock_timeout(unsigned long arg)
 {
 	struct sock *sk = (struct sock *) arg;
+	int reason;
 
 	BT_DBG("sock %p state %d", sk, sk->sk_state);
 
 	bh_lock_sock(sk);
-	__l2cap_sock_close(sk, ETIMEDOUT);
+
+	if (sk->sk_state == BT_CONNECT &&
+			(l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH |
+					L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)))
+		reason = ECONNREFUSED;
+	else
+		reason = ETIMEDOUT;
+
+	__l2cap_sock_close(sk, reason);
+
 	bh_unlock_sock(sk);
 
 	l2cap_sock_kill(sk);
@@ -240,7 +250,7 @@ static void l2cap_chan_del(struct sock *sk, int err)
 		hci_conn_put(conn->hcon);
 	}
 
-	sk->sk_state  = BT_CLOSED;
+	sk->sk_state = BT_CLOSED;
 	sock_set_flag(sk, SOCK_ZAPPED);
 
 	if (err)
@@ -307,14 +317,16 @@ static void l2cap_do_start(struct sock *sk)
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
 	if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
-		struct l2cap_conn_req req;
-		req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
-		req.psm  = l2cap_pi(sk)->psm;
+		if (l2cap_check_link_mode(sk)) {
+			struct l2cap_conn_req req;
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+			req.psm  = l2cap_pi(sk)->psm;
 
-		l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
 
-		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
+		}
 	} else {
 		struct l2cap_info_req req;
 		req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
@@ -349,14 +361,16 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		}
 
 		if (sk->sk_state == BT_CONNECT) {
-			struct l2cap_conn_req req;
-			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
-			req.psm  = l2cap_pi(sk)->psm;
+			if (l2cap_check_link_mode(sk)) {
+				struct l2cap_conn_req req;
+				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+				req.psm  = l2cap_pi(sk)->psm;
 
-			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
 
-			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
+			}
 		} else if (sk->sk_state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
 			rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
@@ -455,7 +469,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 
 	conn->feat_mask = 0;
 
-	setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn);
+	setup_timer(&conn->info_timer, l2cap_info_timeout,
+						(unsigned long) conn);
 
 	spin_lock_init(&conn->lock);
 	rwlock_init(&conn->chan_list.lock);
@@ -567,7 +582,7 @@ static void l2cap_sock_cleanup_listen(struct sock *parent)
 	while ((sk = bt_accept_dequeue(parent, NULL)))
 		l2cap_sock_close(sk);
 
-	parent->sk_state  = BT_CLOSED;
+	parent->sk_state = BT_CLOSED;
 	sock_set_flag(parent, SOCK_ZAPPED);
 }
 
@@ -610,9 +625,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					L2CAP_DISCONN_REQ, sizeof(req), &req);
-		} else {
+		} else
 			l2cap_chan_del(sk, reason);
-		}
 		break;
 
 	case BT_CONNECT:
@@ -681,9 +695,9 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
 	sock_reset_flag(sk, SOCK_ZAPPED);
 
 	sk->sk_protocol = proto;
-	sk->sk_state    = BT_OPEN;
+	sk->sk_state = BT_OPEN;
 
-	setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk);
+	setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
 
 	bt_sock_link(&l2cap_sk_list, sk);
 	return sk;
@@ -1201,7 +1215,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 		__l2cap_sock_close(sk, 0);
 
 		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
-			err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+			err = bt_sock_wait_state(sk, BT_CLOSED,
+							sk->sk_lingertime);
 	}
 	release_sock(sk);
 	return err;
@@ -1245,6 +1260,11 @@ static void l2cap_chan_ready(struct sock *sk)
 		 */
 		parent->sk_data_ready(parent, 0);
 	}
+
+	if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) {
+		struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+		hci_conn_change_link_key(conn->hcon);
+	}
 }
 
 /* Copy frame to all raw sockets on that connection */
@@ -1778,7 +1798,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	default:
 		sk->sk_state = BT_DISCONN;
-		sk->sk_err   = ECONNRESET;
+		sk->sk_err = ECONNRESET;
 		l2cap_sock_set_timer(sk, HZ * 5);
 		{
 			struct l2cap_disconn_req req;
@@ -2151,9 +2171,7 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct l2cap_chan_list *l;
 	struct l2cap_conn *conn = hcon->l2cap_data;
-	struct l2cap_conn_rsp rsp;
 	struct sock *sk;
-	int result;
 
 	if (!conn)
 		return 0;
@@ -2169,37 +2187,53 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 
 		bh_lock_sock(sk);
 
-		if (sk->sk_state != BT_CONNECT2) {
-			bh_unlock_sock(sk);
-			continue;
-		}
-
 		if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) &&
-					!(hcon->link_mode & HCI_LM_ENCRYPT)) {
+					!(hcon->link_mode & HCI_LM_ENCRYPT) &&
+								!status) {
 			bh_unlock_sock(sk);
 			continue;
 		}
 
-		if (!status) {
-			sk->sk_state = BT_CONFIG;
-			result = 0;
-		} else {
-			sk->sk_state = BT_DISCONN;
-			l2cap_sock_set_timer(sk, HZ/10);
-			result = L2CAP_CR_SEC_BLOCK;
-		}
+		if (sk->sk_state == BT_CONNECT) {
+			if (!status) {
+				struct l2cap_conn_req req;
+				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+				req.psm  = l2cap_pi(sk)->psm;
+
+				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+
+				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_REQ, sizeof(req), &req);
+			} else {
+				l2cap_sock_clear_timer(sk);
+				l2cap_sock_set_timer(sk, HZ / 10);
+			}
+		} else if (sk->sk_state == BT_CONNECT2) {
+			struct l2cap_conn_rsp rsp;
+			__u16 result;
+
+			if (!status) {
+				sk->sk_state = BT_CONFIG;
+				result = L2CAP_CR_SUCCESS;
+			} else {
+				sk->sk_state = BT_DISCONN;
+				l2cap_sock_set_timer(sk, HZ / 10);
+				result = L2CAP_CR_SEC_BLOCK;
+			}
 
-		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = cpu_to_le16(result);
-		rsp.status = cpu_to_le16(0);
-		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+			rsp.result = cpu_to_le16(result);
+			rsp.status = cpu_to_le16(0);
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+		}
 
 		bh_unlock_sock(sk);
 	}
 
 	read_unlock(&l->lock);
+
 	return 0;
 }
 
@@ -2207,9 +2241,7 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 {
 	struct l2cap_chan_list *l;
 	struct l2cap_conn *conn = hcon->l2cap_data;
-	struct l2cap_conn_rsp rsp;
 	struct sock *sk;
-	int result;
 
 	if (!conn)
 		return 0;
@@ -2234,34 +2266,46 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 			continue;
 		}
 
-		if (sk->sk_state != BT_CONNECT2) {
-			bh_unlock_sock(sk);
-			continue;
-		}
+		if (sk->sk_state == BT_CONNECT) {
+			if (!status) {
+				struct l2cap_conn_req req;
+				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+				req.psm  = l2cap_pi(sk)->psm;
 
-		if (!status) {
-			sk->sk_state = BT_CONFIG;
-			result = 0;
-		} else {
-			sk->sk_state = BT_DISCONN;
-			l2cap_sock_set_timer(sk, HZ/10);
-			result = L2CAP_CR_SEC_BLOCK;
-		}
+				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
 
-		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = cpu_to_le16(result);
-		rsp.status = cpu_to_le16(0);
-		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_REQ, sizeof(req), &req);
+			} else {
+				l2cap_sock_clear_timer(sk);
+				l2cap_sock_set_timer(sk, HZ / 10);
+			}
+		} else if (sk->sk_state == BT_CONNECT2) {
+			struct l2cap_conn_rsp rsp;
+			__u16 result;
 
-		if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)
-			hci_conn_change_link_key(hcon);
+			if (!status) {
+				sk->sk_state = BT_CONFIG;
+				result = L2CAP_CR_SUCCESS;
+			} else {
+				sk->sk_state = BT_DISCONN;
+				l2cap_sock_set_timer(sk, HZ / 10);
+				result = L2CAP_CR_SEC_BLOCK;
+			}
+
+			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+			rsp.result = cpu_to_le16(result);
+			rsp.status = cpu_to_le16(0);
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+		}
 
 		bh_unlock_sock(sk);
 	}
 
 	read_unlock(&l->lock);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 49292d56352a6ab90d04c3448dd8b6106dfef2d6 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Fri, 4 Jul 2008 10:49:31 +0200
Subject: mac80211: power management wext hooks

This patch implements the power management routines wireless extensions
for mac80211.
For now we only support switching PS mode between on and off.

Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  2 ++
 net/mac80211/wext.c    | 43 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 656442c6b1c..9672a04c4f7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -407,11 +407,13 @@ struct ieee80211_rx_status {
  * @IEEE80211_CONF_SHORT_SLOT_TIME: use 802.11g short slot time
  * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported)
  * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported)
+ * @IEEE80211_CONF_PS: Enable 802.11 power save mode
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_SHORT_SLOT_TIME	= (1<<0),
 	IEEE80211_CONF_RADIOTAP		= (1<<1),
 	IEEE80211_CONF_SUPPORT_HT_MODE	= (1<<2),
+	IEEE80211_CONF_PS		= (1<<3),
 };
 
 /**
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 736c32e340f..207971e9ad7 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -1009,6 +1009,45 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev,
 	return 0;
 }
 
+static int ieee80211_ioctl_siwpower(struct net_device *dev,
+				    struct iw_request_info *info,
+				    struct iw_param *wrq,
+				    char *extra)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	if (wrq->disabled) {
+		conf->flags &= ~IEEE80211_CONF_PS;
+		return ieee80211_hw_config(local);
+	}
+
+	switch (wrq->flags & IW_POWER_MODE) {
+	case IW_POWER_ON:       /* If not specified */
+	case IW_POWER_MODE:     /* If set all mask */
+	case IW_POWER_ALL_R:    /* If explicitely state all */
+		conf->flags |= IEEE80211_CONF_PS;
+		break;
+	default:                /* Otherwise we don't support it */
+		return -EINVAL;
+	}
+
+	return ieee80211_hw_config(local);
+}
+
+static int ieee80211_ioctl_giwpower(struct net_device *dev,
+				    struct iw_request_info *info,
+				    union iwreq_data *wrqu,
+				    char *extra)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	wrqu->power.disabled = !(conf->flags & IEEE80211_CONF_PS);
+
+	return 0;
+}
+
 static int ieee80211_ioctl_siwauth(struct net_device *dev,
 				   struct iw_request_info *info,
 				   struct iw_param *data, char *extra)
@@ -1211,8 +1250,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) ieee80211_ioctl_giwretry,		/* SIOCGIWRETRY */
 	(iw_handler) ieee80211_ioctl_siwencode,		/* SIOCSIWENCODE */
 	(iw_handler) ieee80211_ioctl_giwencode,		/* SIOCGIWENCODE */
-	(iw_handler) NULL,				/* SIOCSIWPOWER */
-	(iw_handler) NULL,				/* SIOCGIWPOWER */
+	(iw_handler) ieee80211_ioctl_siwpower,		/* SIOCSIWPOWER */
+	(iw_handler) ieee80211_ioctl_giwpower,		/* SIOCGIWPOWER */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) ieee80211_ioctl_siwgenie,		/* SIOCSIWGENIE */
-- 
cgit v1.2.3-70-g09d2


From 3e122be089e6fb8d3f322416da4cdbb80ce12927 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 9 Jul 2008 14:40:34 +0200
Subject: mac80211: make master netdev handling sane

Currently, almost every interface type has a 'bss' pointer
pointing to BSS information. This BSS information, however,
is for a _local_ BSS, not for the BSS we joined, so having
it on a STA mode interface makes little sense, but now they
have it pointing to the master device, which is an AP mode
virtual interface. However, except for some bitrate control
data, this pointer is only used in AP/VLAN modes (for power
saving stations.)

Overall, it is not necessary to even have the master netdev
be a valid virtual interface, and it doesn't have to be on
the list of interfaces either.

This patch changes the master netdev to be special, it now
 - no longer is on the list of virtual interfaces, which
   lets me remove a lot of tests for that
 - no longer has sub_if_data attached, since that isn't used

Additionally, this patch changes some vlan/ap mode handling
that is related to these 'bss' pointers described above (but
in the VLAN case they actually make sense because there they
point to the AP they belong to); it also adds some debugging
code to IEEE80211_DEV_TO_SUB_IF to validate it is not called
on the master netdev any more.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c              |  2 +-
 net/mac80211/debugfs_netdev.c   | 34 ++++++++++++---
 net/mac80211/ieee80211_i.h      | 41 ++++++++++--------
 net/mac80211/iface.c            | 64 ++++++---------------------
 net/mac80211/main.c             | 96 ++++++++++++-----------------------------
 net/mac80211/mlme.c             | 56 ++++++++----------------
 net/mac80211/rc80211_pid_algo.c |  8 ++--
 net/mac80211/rx.c               | 15 +++----
 net/mac80211/sta_info.c         | 29 +++++++++----
 net/mac80211/tx.c               | 11 +++--
 net/mac80211/util.c             |  4 --
 net/mac80211/wext.c             | 10 ++---
 12 files changed, 153 insertions(+), 217 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 81087281b03..3c95cd9bf8e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -57,7 +57,7 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 	if (itype == IEEE80211_IF_TYPE_INVALID)
 		return -EINVAL;
 
-	err = ieee80211_if_add(local->mdev, name, &dev, itype, params);
+	err = ieee80211_if_add(local, name, &dev, itype, params);
 	if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags)
 		return err;
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index b2089b2da48..4aa6621f997 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -156,6 +156,8 @@ static const struct file_operations name##_ops = {			\
 
 /* common attributes */
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
+IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC);
+IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
 
 /* STA/IBSS attributes */
 IEEE80211_IF_FILE(state, u.sta.state, DEC);
@@ -191,8 +193,6 @@ __IEEE80211_IF_FILE(flags);
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
 IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
-IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC);
-IEEE80211_IF_FILE(max_ratectrl_rateidx, u.ap.max_ratectrl_rateidx, DEC);
 
 static ssize_t ieee80211_if_fmt_num_buffered_multicast(
 	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -248,6 +248,9 @@ IEEE80211_IF_WFILE(min_discovery_timeout,
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
+	DEBUGFS_ADD(force_unicast_rateidx, ap);
+	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_ADD(state, sta);
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(prev_bssid, sta);
@@ -268,23 +271,29 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, ap);
+	DEBUGFS_ADD(force_unicast_rateidx, ap);
+	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_count, ap);
 	DEBUGFS_ADD(num_beacons, ap);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
 	DEBUGFS_ADD(num_buffered_multicast, ap);
 }
 
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, wds);
+	DEBUGFS_ADD(force_unicast_rateidx, ap);
+	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_ADD(peer, wds);
 }
 
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, vlan);
+	DEBUGFS_ADD(force_unicast_rateidx, ap);
+	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -372,6 +381,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, sta);
+	DEBUGFS_DEL(force_unicast_rateidx, ap);
+	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_DEL(state, sta);
 	DEBUGFS_DEL(bssid, sta);
 	DEBUGFS_DEL(prev_bssid, sta);
@@ -392,23 +404,29 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, ap);
+	DEBUGFS_DEL(force_unicast_rateidx, ap);
+	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_DEL(num_sta_ps, ap);
 	DEBUGFS_DEL(dtim_count, ap);
 	DEBUGFS_DEL(num_beacons, ap);
-	DEBUGFS_DEL(force_unicast_rateidx, ap);
-	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
 	DEBUGFS_DEL(num_buffered_multicast, ap);
 }
 
 static void del_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, wds);
+	DEBUGFS_DEL(force_unicast_rateidx, ap);
+	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+
 	DEBUGFS_DEL(peer, wds);
 }
 
 static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, vlan);
+	DEBUGFS_DEL(force_unicast_rateidx, ap);
+	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
 }
 
 static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -525,7 +543,7 @@ static int netdev_notify(struct notifier_block *nb,
 {
 	struct net_device *dev = ndev;
 	struct dentry *dir;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *sdata;
 	char buf[10+IFNAMSIZ];
 
 	if (state != NETDEV_CHANGENAME)
@@ -537,6 +555,8 @@ static int netdev_notify(struct notifier_block *nb,
 	if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
 		return 0;
 
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	sprintf(buf, "netdev:%s", dev->name);
 	dir = sdata->debugfsdir;
 	if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 02a8753a4ec..1b1fc53dad3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -237,8 +237,6 @@ struct ieee80211_if_ap {
 	struct sk_buff_head ps_bc_buf;
 	atomic_t num_sta_ps; /* number of stations in PS mode */
 	int dtim_count;
-	int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
-	int max_ratectrl_rateidx; /* max TX rateidx for rate control */
 	int num_beacons; /* number of TXed beacon frames for this BSS */
 };
 
@@ -248,7 +246,6 @@ struct ieee80211_if_wds {
 };
 
 struct ieee80211_if_vlan {
-	struct ieee80211_sub_if_data *ap;
 	struct list_head list;
 };
 
@@ -432,16 +429,18 @@ struct ieee80211_sub_if_data {
 	struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
 	struct ieee80211_key *default_key;
 
+	/* BSS configuration for this interface. */
+	struct ieee80211_bss_conf bss_conf;
+
 	/*
-	 * BSS configuration for this interface.
-	 *
-	 * FIXME: I feel bad putting this here when we already have a
-	 *	  bss pointer, but the bss pointer is just wrong when
-	 *	  you have multiple virtual STA mode interfaces...
-	 *	  This needs to be fixed.
+	 * AP this belongs to: self in AP mode and
+	 * corresponding AP in VLAN mode, NULL for
+	 * all others (might be needed later in IBSS)
 	 */
-	struct ieee80211_bss_conf bss_conf;
-	struct ieee80211_if_ap *bss; /* BSS that this device belongs to */
+	struct ieee80211_if_ap *bss;
+
+	int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
+	int max_ratectrl_rateidx; /* max TX rateidx for rate control */
 
 	union {
 		struct ieee80211_if_ap ap;
@@ -533,8 +532,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
 	return container_of(p, struct ieee80211_sub_if_data, vif);
 }
 
-#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev)
-
 enum {
 	IEEE80211_RX_MSG	= 1,
 	IEEE80211_TX_STATUS_MSG	= 2,
@@ -760,6 +757,16 @@ static inline int ieee80211_is_multiqueue(struct ieee80211_local *local)
 #endif
 }
 
+static inline struct ieee80211_sub_if_data *
+IEEE80211_DEV_TO_SUB_IF(struct net_device *dev)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+	BUG_ON(!local || local->mdev == dev);
+
+	return netdev_priv(dev);
+}
+
 /* this struct represents 802.11n's RA/TID combination */
 struct ieee80211_ra_tid {
 	u8 ra[ETH_ALEN];
@@ -883,8 +890,8 @@ int ieee80211_sta_scan_results(struct net_device *dev,
 ieee80211_rx_result ieee80211_sta_rx_scan(
 	struct net_device *dev, struct sk_buff *skb,
 	struct ieee80211_rx_status *rx_status);
-void ieee80211_rx_bss_list_init(struct net_device *dev);
-void ieee80211_rx_bss_list_deinit(struct net_device *dev);
+void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
+void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
 int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len);
 struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 					struct sk_buff *skb, u8 *bssid,
@@ -925,8 +932,8 @@ static inline void ieee80211_start_mesh(struct net_device *dev)
 {}
 #endif
 
-/* ieee80211_iface.c */
-int ieee80211_if_add(struct net_device *dev, const char *name,
+/* interface handling */
+int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, int type,
 		     struct vif_params *params);
 void ieee80211_if_set_type(struct net_device *dev, int type);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index eeb16926aa7..f2aefd4b863 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -27,6 +27,9 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
 		skb_queue_head_init(&sdata->fragments[i].skb_list);
 
 	INIT_LIST_HEAD(&sdata->key_list);
+
+	sdata->force_unicast_rateidx = -1;
+	sdata->max_ratectrl_rateidx = -1;
 }
 
 static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata)
@@ -38,12 +41,11 @@ static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata)
 }
 
 /* Must be called with rtnl lock held. */
-int ieee80211_if_add(struct net_device *dev, const char *name,
+int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, int type,
 		     struct vif_params *params)
 {
 	struct net_device *ndev;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = NULL;
 	int ret;
 
@@ -67,13 +69,10 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
 		goto fail;
 
 	memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
-	ndev->base_addr = dev->base_addr;
-	ndev->irq = dev->irq;
-	ndev->mem_start = dev->mem_start;
-	ndev->mem_end = dev->mem_end;
 	SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
+	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
+	sdata = netdev_priv(ndev);
 	ndev->ieee80211_ptr = &sdata->wdev;
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->vif.type = IEEE80211_IF_TYPE_AP;
@@ -116,14 +115,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int oldtype = sdata->vif.type;
 
-	/*
-	 * We need to call this function on the master interface
-	 * which already has a hard_start_xmit routine assigned
-	 * which must not be changed.
-	 */
-	if (dev != sdata->local->mdev)
-		dev->hard_start_xmit = ieee80211_subif_start_xmit;
-
 	/*
 	 * Called even when register_netdevice fails, it would
 	 * oops if assigned before initialising the rest.
@@ -138,22 +129,16 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 
 	switch (type) {
 	case IEEE80211_IF_TYPE_WDS:
-		/* nothing special */
-		break;
 	case IEEE80211_IF_TYPE_VLAN:
-		sdata->u.vlan.ap = NULL;
+		/* nothing special */
 		break;
 	case IEEE80211_IF_TYPE_AP:
-		sdata->u.ap.force_unicast_rateidx = -1;
-		sdata->u.ap.max_ratectrl_rateidx = -1;
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
-		sdata->bss = &sdata->u.ap;
 		INIT_LIST_HEAD(&sdata->u.ap.vlans);
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS: {
-		struct ieee80211_sub_if_data *msdata;
 		struct ieee80211_if_sta *ifsta;
 
 		ifsta = &sdata->u.sta;
@@ -171,9 +156,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
 		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
 			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
 
-		msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev);
-		sdata->bss = &msdata->u.ap;
-
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			ieee80211_mesh_init_sdata(sdata);
 		break;
@@ -215,27 +197,8 @@ void ieee80211_if_reinit(struct net_device *dev)
 		WARN_ON(1);
 		break;
 	case IEEE80211_IF_TYPE_AP: {
-		/* Remove all virtual interfaces that use this BSS
-		 * as their sdata->bss */
-		struct ieee80211_sub_if_data *tsdata, *n;
 		struct beacon_data *beacon;
 
-		list_for_each_entry_safe(tsdata, n, &local->interfaces, list) {
-			if (tsdata != sdata && tsdata->bss == &sdata->u.ap) {
-				printk(KERN_DEBUG "%s: removing virtual "
-				       "interface %s because its BSS interface"
-				       " is being removed\n",
-				       sdata->dev->name, tsdata->dev->name);
-				list_del_rcu(&tsdata->list);
-				/*
-				 * We have lots of time and can afford
-				 * to sync for each interface
-				 */
-				synchronize_rcu();
-				__ieee80211_if_del(local, tsdata);
-			}
-		}
-
 		beacon = sdata->u.ap.beacon;
 		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
 		synchronize_rcu();
@@ -249,6 +212,7 @@ void ieee80211_if_reinit(struct net_device *dev)
 		break;
 	}
 	case IEEE80211_IF_TYPE_WDS:
+	case IEEE80211_IF_TYPE_VLAN:
 		/* nothing to do */
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
@@ -269,9 +233,6 @@ void ieee80211_if_reinit(struct net_device *dev)
 	case IEEE80211_IF_TYPE_MNTR:
 		dev->type = ARPHRD_ETHER;
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
-		sdata->u.vlan.ap = NULL;
-		break;
 	}
 
 	flushed = sta_info_flush(local, sdata);
@@ -289,8 +250,10 @@ void __ieee80211_if_del(struct ieee80211_local *local,
 
 	ieee80211_debugfs_remove_netdev(sdata);
 	unregister_netdevice(dev);
-	/* Except master interface, the net_device will be freed by
-	 * net_device->destructor (i. e. ieee80211_if_free). */
+	/*
+	 * The net_device will be freed by its destructor,
+	 * i.e. ieee80211_if_free.
+	 */
 }
 
 /* Must be called with rtnl lock held. */
@@ -303,8 +266,7 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
 
 	list_for_each_entry_safe(sdata, n, &local->interfaces, list) {
 		if ((sdata->vif.type == id || id == -1) &&
-		    strcmp(name, sdata->dev->name) == 0 &&
-		    sdata->dev != local->mdev) {
+		    strcmp(name, sdata->dev->name) == 0) {
 			list_del_rcu(&sdata->list);
 			synchronize_rcu();
 			__ieee80211_if_del(local, sdata);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1c4d3ba6b87..863923964d2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -105,7 +105,7 @@ static int ieee80211_master_open(struct net_device *dev)
 
 	/* we hold the RTNL here so can safely walk the list */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (sdata->dev != dev && netif_running(sdata->dev)) {
+		if (netif_running(sdata->dev)) {
 			res = 0;
 			break;
 		}
@@ -126,7 +126,7 @@ static int ieee80211_master_stop(struct net_device *dev)
 
 	/* we hold the RTNL here so can safely walk the list */
 	list_for_each_entry(sdata, &local->interfaces, list)
-		if (sdata->dev != dev && netif_running(sdata->dev))
+		if (netif_running(sdata->dev))
 			dev_close(sdata->dev);
 
 	return 0;
@@ -194,7 +194,7 @@ static int ieee80211_open(struct net_device *dev)
 	list_for_each_entry(nsdata, &local->interfaces, list) {
 		struct net_device *ndev = nsdata->dev;
 
-		if (ndev != dev && ndev != local->mdev && netif_running(ndev)) {
+		if (ndev != dev && netif_running(ndev)) {
 			/*
 			 * Allow only a single IBSS interface to be up at any
 			 * time. This is restricted because beacon distribution
@@ -209,30 +209,6 @@ static int ieee80211_open(struct net_device *dev)
 			    nsdata->vif.type == IEEE80211_IF_TYPE_IBSS)
 				return -EBUSY;
 
-			/*
-			 * Disallow multiple IBSS/STA mode interfaces.
-			 *
-			 * This is a technical restriction, it is possible although
-			 * most likely not IEEE 802.11 compliant to have multiple
-			 * STAs with just a single hardware (the TSF timer will not
-			 * be adjusted properly.)
-			 *
-			 * However, because mac80211 uses the master device's BSS
-			 * information for each STA/IBSS interface, doing this will
-			 * currently corrupt that BSS information completely, unless,
-			 * a not very useful case, both STAs are associated to the
-			 * same BSS.
-			 *
-			 * To remove this restriction, the BSS information needs to
-			 * be embedded in the STA/IBSS mode sdata instead of using
-			 * the master device's BSS structure.
-			 */
-			if ((sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-			     sdata->vif.type == IEEE80211_IF_TYPE_IBSS) &&
-			    (nsdata->vif.type == IEEE80211_IF_TYPE_STA ||
-			     nsdata->vif.type == IEEE80211_IF_TYPE_IBSS))
-				return -EBUSY;
-
 			/*
 			 * The remaining checks are only performed for interfaces
 			 * with the same MAC address.
@@ -252,7 +228,7 @@ static int ieee80211_open(struct net_device *dev)
 			 */
 			if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN &&
 			    nsdata->vif.type == IEEE80211_IF_TYPE_AP)
-				sdata->u.vlan.ap = nsdata;
+				sdata->bss = &nsdata->u.ap;
 		}
 	}
 
@@ -262,10 +238,13 @@ static int ieee80211_open(struct net_device *dev)
 			return -ENOLINK;
 		break;
 	case IEEE80211_IF_TYPE_VLAN:
-		if (!sdata->u.vlan.ap)
+		if (!sdata->bss)
 			return -ENOLINK;
+		list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
 		break;
 	case IEEE80211_IF_TYPE_AP:
+		sdata->bss = &sdata->u.ap;
+		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_MNTR:
 	case IEEE80211_IF_TYPE_IBSS:
@@ -283,14 +262,13 @@ static int ieee80211_open(struct net_device *dev)
 		if (local->ops->start)
 			res = local->ops->start(local_to_hw(local));
 		if (res)
-			return res;
+			goto err_del_bss;
 		need_hw_reconfig = 1;
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
 	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_VLAN:
-		list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans);
 		/* no need to tell driver */
 		break;
 	case IEEE80211_IF_TYPE_MNTR:
@@ -404,6 +382,10 @@ static int ieee80211_open(struct net_device *dev)
  err_stop:
 	if (!local->open_count && local->ops->stop)
 		local->ops->stop(local_to_hw(local));
+ err_del_bss:
+	sdata->bss = NULL;
+	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
+		list_del(&sdata->u.vlan.list);
 	return res;
 }
 
@@ -486,7 +468,6 @@ static int ieee80211_stop(struct net_device *dev)
 	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_VLAN:
 		list_del(&sdata->u.vlan.list);
-		sdata->u.vlan.ap = NULL;
 		/* no need to tell driver */
 		break;
 	case IEEE80211_IF_TYPE_MNTR:
@@ -549,6 +530,8 @@ static int ieee80211_stop(struct net_device *dev)
 		local->ops->remove_interface(local_to_hw(local), &conf);
 	}
 
+	sdata->bss = NULL;
+
 	if (local->open_count == 0) {
 		if (netif_running(local->mdev))
 			dev_close(local->mdev);
@@ -1659,7 +1642,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	int result;
 	enum ieee80211_band band;
 	struct net_device *mdev;
-	struct ieee80211_sub_if_data *sdata;
+	struct wireless_dev *mwdev;
 
 	/*
 	 * generic code guarantees at least one band,
@@ -1699,8 +1682,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	hw->ampdu_queues = 0;
 #endif
 
-	/* for now, mdev needs sub_if_data :/ */
-	mdev = alloc_netdev_mq(sizeof(struct ieee80211_sub_if_data),
+	mdev = alloc_netdev_mq(sizeof(struct wireless_dev),
 			       "wmaster%d", ether_setup,
 			       ieee80211_num_queues(hw));
 	if (!mdev)
@@ -1709,13 +1691,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (ieee80211_num_queues(hw) > 1)
 		mdev->features |= NETIF_F_MULTI_QUEUE;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(mdev);
-	mdev->ieee80211_ptr = &sdata->wdev;
-	sdata->wdev.wiphy = local->hw.wiphy;
+	mwdev = netdev_priv(mdev);
+	mdev->ieee80211_ptr = mwdev;
+	mwdev->wiphy = local->hw.wiphy;
 
 	local->mdev = mdev;
 
-	ieee80211_rx_bss_list_init(mdev);
+	ieee80211_rx_bss_list_init(local);
 
 	mdev->hard_start_xmit = ieee80211_master_start_xmit;
 	mdev->open = ieee80211_master_open;
@@ -1724,16 +1706,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	mdev->header_ops = &ieee80211_header_ops;
 	mdev->set_multicast_list = ieee80211_master_set_multicast_list;
 
-	sdata->vif.type = IEEE80211_IF_TYPE_AP;
-	sdata->dev = mdev;
-	sdata->local = local;
-	sdata->u.ap.force_unicast_rateidx = -1;
-	sdata->u.ap.max_ratectrl_rateidx = -1;
-	ieee80211_if_sdata_init(sdata);
-
-	/* no RCU needed since we're still during init phase */
-	list_add_tail(&sdata->list, &local->interfaces);
-
 	name = wiphy_dev(local->hw.wiphy)->driver->name;
 	local->hw.workqueue = create_freezeable_workqueue(name);
 	if (!local->hw.workqueue) {
@@ -1779,9 +1751,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (result < 0)
 		goto fail_dev;
 
-	ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
-	ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP);
-
 	result = ieee80211_init_rate_ctrl_alg(local,
 					      hw->rate_control_algorithm);
 	if (result < 0) {
@@ -1801,7 +1770,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	ieee80211_install_qdisc(local->mdev);
 
 	/* add one default STA interface */
-	result = ieee80211_if_add(local->mdev, "wlan%d", NULL,
+	result = ieee80211_if_add(local, "wlan%d", NULL,
 				  IEEE80211_IF_TYPE_STA, NULL);
 	if (result)
 		printk(KERN_WARNING "%s: Failed to add default virtual iface\n",
@@ -1817,7 +1786,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 fail_wep:
 	rate_control_deinitialize(local);
 fail_rate:
-	ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
 	unregister_netdevice(local->mdev);
 	local->mdev = NULL;
 fail_dev:
@@ -1827,10 +1795,8 @@ fail_sta_info:
 	debugfs_hw_del(local);
 	destroy_workqueue(local->hw.workqueue);
 fail_workqueue:
-	if (local->mdev != NULL) {
-		ieee80211_if_free(local->mdev);
-		local->mdev = NULL;
-	}
+	if (local->mdev)
+		free_netdev(local->mdev);
 fail_mdev_alloc:
 	wiphy_unregister(local->hw.wiphy);
 	return result;
@@ -1858,24 +1824,19 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	 */
 
 	/*
-	 * First, we remove all non-master interfaces. Do this because they
-	 * may have bss pointer dependency on the master, and when we free
-	 * the master these would be freed as well, breaking our list
-	 * iteration completely.
+	 * First, we remove all virtual interfaces.
 	 */
 	list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
-		if (sdata->dev == local->mdev)
-			continue;
 		list_del(&sdata->list);
 		__ieee80211_if_del(local, sdata);
 	}
 
 	/* then, finally, remove the master interface */
-	__ieee80211_if_del(local, IEEE80211_DEV_TO_SUB_IF(local->mdev));
+	unregister_netdevice(local->mdev);
 
 	rtnl_unlock();
 
-	ieee80211_rx_bss_list_deinit(local->mdev);
+	ieee80211_rx_bss_list_deinit(local);
 	ieee80211_clear_tx_pending(local);
 	sta_info_stop(local);
 	rate_control_deinitialize(local);
@@ -1892,8 +1853,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	wiphy_unregister(local->hw.wiphy);
 	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
-	ieee80211_if_free(local->mdev);
-	local->mdev = NULL;
+	free_netdev(local->mdev);
 }
 EXPORT_SYMBOL(ieee80211_unregister_hw);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index dbc8cf454bc..64d710a88b8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -78,7 +78,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 static struct ieee80211_sta_bss *
 ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len);
-static void ieee80211_rx_bss_put(struct net_device *dev,
+static void ieee80211_rx_bss_put(struct ieee80211_local *local,
 				 struct ieee80211_sta_bss *bss);
 static int ieee80211_sta_find_ibss(struct net_device *dev,
 				   struct ieee80211_if_sta *ifsta);
@@ -554,7 +554,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 
 			changed |= ieee80211_handle_bss_capability(sdata, bss);
 
-			ieee80211_rx_bss_put(dev, bss);
+			ieee80211_rx_bss_put(local, bss);
 		}
 
 		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
@@ -760,7 +760,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
 			capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
 
-		ieee80211_rx_bss_put(dev, bss);
+		ieee80211_rx_bss_put(local, bss);
 	} else {
 		rates = ~0;
 		rates_len = sband->n_bitrates;
@@ -992,7 +992,7 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
 	wep_privacy = !!ieee80211_sta_wep_configured(dev);
 	privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED);
 
-	ieee80211_rx_bss_put(dev, bss);
+	ieee80211_rx_bss_put(local, bss);
 
 	if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked))
 		return 0;
@@ -2094,7 +2094,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 			sta->last_signal = bss->signal;
 			sta->last_qual = bss->qual;
 			sta->last_noise = bss->noise;
-			ieee80211_rx_bss_put(dev, bss);
+			ieee80211_rx_bss_put(local, bss);
 		}
 
 		err = sta_info_insert(sta);
@@ -2212,10 +2212,9 @@ static void __ieee80211_rx_bss_hash_add(struct net_device *dev,
 
 
 /* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_del(struct net_device *dev,
+static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
 					struct ieee80211_sta_bss *bss)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *b, *prev = NULL;
 	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
 	while (b) {
@@ -2367,39 +2366,35 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 }
 
 
-static void ieee80211_rx_bss_put(struct net_device *dev,
+static void ieee80211_rx_bss_put(struct ieee80211_local *local,
 				 struct ieee80211_sta_bss *bss)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
 	local_bh_disable();
 	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
 		local_bh_enable();
 		return;
 	}
 
-	__ieee80211_rx_bss_hash_del(dev, bss);
+	__ieee80211_rx_bss_hash_del(local, bss);
 	list_del(&bss->list);
 	spin_unlock_bh(&local->sta_bss_lock);
 	ieee80211_rx_bss_free(bss);
 }
 
 
-void ieee80211_rx_bss_list_init(struct net_device *dev)
+void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	spin_lock_init(&local->sta_bss_lock);
 	INIT_LIST_HEAD(&local->sta_bss_list);
 }
 
 
-void ieee80211_rx_bss_list_deinit(struct net_device *dev)
+void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss, *tmp;
 
 	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
-		ieee80211_rx_bss_put(dev, bss);
+		ieee80211_rx_bss_put(local, bss);
 }
 
 
@@ -2775,7 +2770,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	 */
 	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
 	    bss->probe_resp && beacon) {
-		ieee80211_rx_bss_put(dev, bss);
+		ieee80211_rx_bss_put(local, bss);
 		return;
 	}
 
@@ -2918,7 +2913,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		}
 	}
 
-	ieee80211_rx_bss_put(dev, bss);
+	ieee80211_rx_bss_put(local, bss);
 }
 
 
@@ -3578,7 +3573,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
 					       selected->ssid_len);
 		ieee80211_sta_set_bssid(dev, selected->bssid);
 		ieee80211_sta_def_wmm_params(dev, selected, 0);
-		ieee80211_rx_bss_put(dev, selected);
+		ieee80211_rx_bss_put(local, selected);
 		ifsta->state = IEEE80211_AUTHENTICATE;
 		ieee80211_sta_reset_auth(dev, ifsta);
 		return 0;
@@ -3655,7 +3650,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	}
 
 	ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
-	ieee80211_rx_bss_put(dev, bss);
+	ieee80211_rx_bss_put(local, bss);
 	return ret;
 }
 
@@ -3711,7 +3706,7 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 		       " based on configured SSID\n",
 		       dev->name, print_mac(mac, bssid));
 		ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
-		ieee80211_rx_bss_put(dev, bss);
+		ieee80211_rx_bss_put(local, bss);
 		return ret;
 	}
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
@@ -3907,11 +3902,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-
-		/* No need to wake the master device. */
-		if (sdata->dev == local->mdev)
-			continue;
-
 		/* Tell AP we're back */
 		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
 		    sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)
@@ -4077,12 +4067,6 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-
-		/* Don't stop the master interface, otherwise we can't transmit
-		 * probes! */
-		if (sdata->dev == local->mdev)
-			continue;
-
 		netif_stop_queue(sdata->dev);
 		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
 		    (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED))
@@ -4473,12 +4457,10 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 	case IEEE80211_NOTIFY_RE_ASSOC:
 		rcu_read_lock();
 		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+			if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+				continue;
 
-			if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
-				ieee80211_sta_req_auth(sdata->dev,
-						       &sdata->u.sta);
-			}
-
+			ieee80211_sta_req_auth(sdata->dev, &sdata->u.sta);
 		}
 		rcu_read_unlock();
 		break;
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 62388f8e902..ca636295e62 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -259,8 +259,8 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 
 	/* Don't update the state if we're not controlling the rate. */
 	sdata = sta->sdata;
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
-		sta->txrate_idx = sdata->bss->max_ratectrl_rateidx;
+	if (sdata->force_unicast_rateidx > -1) {
+		sta->txrate_idx = sdata->max_ratectrl_rateidx;
 		goto unlock;
 	}
 
@@ -337,8 +337,8 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 
 	/* If a forced rate is in effect, select it. */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
-		sta->txrate_idx = sdata->bss->force_unicast_rateidx;
+	if (sdata->force_unicast_rateidx > -1)
+		sta->txrate_idx = sdata->force_unicast_rateidx;
 
 	rateidx = sta->txrate_idx;
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fab443d717e..244ee2d50a5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -647,8 +647,7 @@ static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta)
 
 	sdata = sta->sdata;
 
-	if (sdata->bss)
-		atomic_inc(&sdata->bss->num_sta_ps);
+	atomic_inc(&sdata->bss->num_sta_ps);
 	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n",
@@ -667,8 +666,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 
 	sdata = sta->sdata;
 
-	if (sdata->bss)
-		atomic_dec(&sdata->bss->num_sta_ps);
+	atomic_dec(&sdata->bss->num_sta_ps);
 
 	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
 
@@ -742,7 +740,9 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->last_qual = rx->status->qual;
 	sta->last_noise = rx->status->noise;
 
-	if (!ieee80211_has_morefrags(hdr->frame_control)) {
+	if (!ieee80211_has_morefrags(hdr->frame_control) &&
+	    (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
+	     rx->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) {
 		/* Change STA power saving mode only in the end of a frame
 		 * exchange sequence */
 		if (test_sta_flags(sta, WLAN_STA_PS) &&
@@ -1772,11 +1772,6 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		}
-		if (sdata->dev == sdata->local->mdev &&
-		    !(rx->flags & IEEE80211_RX_IN_SCAN))
-			/* do not receive anything via
-			 * master device when not scanning */
-			return 0;
 		break;
 	case IEEE80211_IF_TYPE_WDS:
 		if (bssid || !ieee80211_is_data(hdr->frame_control))
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 47d2c1bbfca..f2ba653b9d6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -320,7 +320,9 @@ int sta_info_insert(struct sta_info *sta)
 	/* notify driver */
 	if (local->ops->sta_notify) {
 		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
-			sdata = sdata->u.vlan.ap;
+			sdata = container_of(sdata->bss,
+					     struct ieee80211_sub_if_data,
+					     u.ap);
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
 				       STA_NOTIFY_ADD, sta->addr);
@@ -375,8 +377,10 @@ static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid)
 static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss,
 				   struct sta_info *sta)
 {
-	if (bss)
-		__bss_tim_set(bss, sta->aid);
+	BUG_ON(!bss);
+
+	__bss_tim_set(bss, sta->aid);
+
 	if (sta->local->ops->set_tim) {
 		sta->local->tim_in_locked_section = true;
 		sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1);
@@ -388,6 +392,8 @@ void sta_info_set_tim_bit(struct sta_info *sta)
 {
 	unsigned long flags;
 
+	BUG_ON(!sta->sdata->bss);
+
 	spin_lock_irqsave(&sta->local->sta_lock, flags);
 	__sta_info_set_tim_bit(sta->sdata->bss, sta);
 	spin_unlock_irqrestore(&sta->local->sta_lock, flags);
@@ -396,8 +402,10 @@ void sta_info_set_tim_bit(struct sta_info *sta)
 static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss,
 				     struct sta_info *sta)
 {
-	if (bss)
-		__bss_tim_clear(bss, sta->aid);
+	BUG_ON(!bss);
+
+	__bss_tim_clear(bss, sta->aid);
+
 	if (sta->local->ops->set_tim) {
 		sta->local->tim_in_locked_section = true;
 		sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0);
@@ -409,6 +417,8 @@ void sta_info_clear_tim_bit(struct sta_info *sta)
 {
 	unsigned long flags;
 
+	BUG_ON(!sta->sdata->bss);
+
 	spin_lock_irqsave(&sta->local->sta_lock, flags);
 	__sta_info_clear_tim_bit(sta->sdata->bss, sta);
 	spin_unlock_irqrestore(&sta->local->sta_lock, flags);
@@ -437,8 +447,9 @@ void __sta_info_unlink(struct sta_info **sta)
 	list_del(&(*sta)->list);
 
 	if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) {
-		if (sdata->bss)
-			atomic_dec(&sdata->bss->num_sta_ps);
+		BUG_ON(!sdata->bss);
+
+		atomic_dec(&sdata->bss->num_sta_ps);
 		__sta_info_clear_tim_bit(sdata->bss, *sta);
 	}
 
@@ -446,7 +457,9 @@ void __sta_info_unlink(struct sta_info **sta)
 
 	if (local->ops->sta_notify) {
 		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
-			sdata = sdata->u.vlan.ap;
+			sdata = container_of(sdata->bss,
+					     struct ieee80211_sub_if_data,
+					     u.ap);
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
 				       STA_NOTIFY_REMOVE, (*sta)->addr);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9bd9faac3c3..a757dcc1208 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -303,8 +303,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		struct ieee80211_if_ap *ap;
-		if (sdata->dev == local->mdev ||
-		    sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
 			continue;
 		ap = &sdata->u.ap;
 		skb = skb_dequeue(&ap->ps_bc_buf);
@@ -346,8 +345,12 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	 * This is done either by the hardware or us.
 	 */
 
-	/* not AP/IBSS or ordered frame */
-	if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER))
+	/* powersaving STAs only in AP/VLAN mode */
+	if (!tx->sdata->bss)
+		return TX_CONTINUE;
+
+	/* no buffering for ordered frames */
+	if (tx->fc & IEEE80211_FCTL_ORDER)
 		return TX_CONTINUE;
 
 	/* no stations in PS mode */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ce62b163b82..89ce4e07bd8 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -428,8 +428,6 @@ void ieee80211_iterate_active_interfaces(
 		case IEEE80211_IF_TYPE_MESH_POINT:
 			break;
 		}
-		if (sdata->dev == local->mdev)
-			continue;
 		if (netif_running(sdata->dev))
 			iterator(data, sdata->dev->dev_addr,
 				 &sdata->vif);
@@ -463,8 +461,6 @@ void ieee80211_iterate_active_interfaces_atomic(
 		case IEEE80211_IF_TYPE_MESH_POINT:
 			break;
 		}
-		if (sdata->dev == local->mdev)
-			continue;
 		if (netif_running(sdata->dev))
 			iterator(data, sdata->dev->dev_addr,
 				 &sdata->vif);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 207971e9ad7..e8e4a6215e8 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -627,16 +627,14 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev,
 	struct ieee80211_supported_band *sband;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (!sdata->bss)
-		return -ENODEV;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
 	/* target_rate = -1, rate->fixed = 0 means auto only, so use all rates
 	 * target_rate = X, rate->fixed = 1 means only rate X
 	 * target_rate = X, rate->fixed = 0 means all rates <= X */
-	sdata->bss->max_ratectrl_rateidx = -1;
-	sdata->bss->force_unicast_rateidx = -1;
+	sdata->max_ratectrl_rateidx = -1;
+	sdata->force_unicast_rateidx = -1;
 	if (rate->value < 0)
 		return 0;
 
@@ -645,9 +643,9 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev,
 		int this_rate = brate->bitrate;
 
 		if (target_rate == this_rate) {
-			sdata->bss->max_ratectrl_rateidx = i;
+			sdata->max_ratectrl_rateidx = i;
 			if (rate->fixed)
-				sdata->bss->force_unicast_rateidx = i;
+				sdata->force_unicast_rateidx = i;
 			err = 0;
 			break;
 		}
-- 
cgit v1.2.3-70-g09d2


From 75636525fbfa78fa33fd754c89785cfde750acd3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 9 Jul 2008 14:40:35 +0200
Subject: mac80211: revamp virtual interface handling

This patch revamps the virtual interface handling and makes the
code much easier to follow. Fewer functions, better names, less
spaghetti code.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            |  24 +--
 net/mac80211/debugfs.c        |  15 +-
 net/mac80211/debugfs_netdev.c |  14 +-
 net/mac80211/debugfs_netdev.h |   5 -
 net/mac80211/ieee80211_i.h    |  21 +--
 net/mac80211/iface.c          | 337 +++++++++++++++++++-----------------------
 net/mac80211/main.c           |  18 +--
 net/mac80211/wext.c           |   3 +-
 8 files changed, 172 insertions(+), 265 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3c95cd9bf8e..6aa49ad172a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -50,9 +50,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 	struct ieee80211_sub_if_data *sdata;
 	int err;
 
-	if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED))
-		return -ENODEV;
-
 	itype = nl80211_type_to_mac80211_type(type);
 	if (itype == IEEE80211_IF_TYPE_INVALID)
 		return -EINVAL;
@@ -68,35 +65,26 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 
 static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
-	char *name;
-
-	if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED))
-		return -ENODEV;
 
 	/* we're under RTNL */
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (!dev)
-		return 0;
+		return -ENODEV;
 
-	name = dev->name;
+	ieee80211_if_remove(dev);
 
-	return ieee80211_if_remove(local->mdev, name, -1);
+	return 0;
 }
 
 static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 				  enum nl80211_iftype type, u32 *flags,
 				  struct vif_params *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
 	enum ieee80211_if_types itype;
 	struct ieee80211_sub_if_data *sdata;
 
-	if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED))
-		return -ENODEV;
-
 	/* we're under RTNL */
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (!dev)
@@ -111,11 +99,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
-		return -EOPNOTSUPP;
-
-	ieee80211_if_reinit(dev);
-	ieee80211_if_set_type(dev, itype);
+	ieee80211_if_change_type(sdata, itype);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
 		ieee80211_if_sta_set_mesh_id(&sdata->u.sta,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index d20d90eead1..ee509f1109e 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -70,16 +70,6 @@ DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s",
 
 /* statistics stuff */
 
-static inline int rtnl_lock_local(struct ieee80211_local *local)
-{
-	rtnl_lock();
-	if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) {
-		rtnl_unlock();
-		return -ENODEV;
-	}
-	return 0;
-}
-
 #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...)			\
 	DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value)
 
@@ -96,10 +86,7 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local,
 	if (!local->ops->get_stats)
 		return -EOPNOTSUPP;
 
-	res = rtnl_lock_local(local);
-	if (res)
-		return res;
-
+	rtnl_lock();
 	res = local->ops->get_stats(local_to_hw(local), &stats);
 	rtnl_unlock();
 	if (!res)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 4aa6621f997..475f89a8aee 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -476,12 +476,12 @@ static void del_mesh_config(struct ieee80211_sub_if_data *sdata)
 }
 #endif
 
-static void del_files(struct ieee80211_sub_if_data *sdata, int type)
+static void del_files(struct ieee80211_sub_if_data *sdata)
 {
 	if (!sdata->debugfsdir)
 		return;
 
-	switch (type) {
+	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_MESH_POINT:
 #ifdef CONFIG_MAC80211_MESH
 		del_mesh_stats(sdata);
@@ -521,22 +521,16 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
 	sprintf(buf, "netdev:%s", sdata->dev->name);
 	sdata->debugfsdir = debugfs_create_dir(buf,
 		sdata->local->hw.wiphy->debugfsdir);
+	add_files(sdata);
 }
 
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
 {
-	del_files(sdata, sdata->vif.type);
+	del_files(sdata);
 	debugfs_remove(sdata->debugfsdir);
 	sdata->debugfsdir = NULL;
 }
 
-void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata,
-				      int oldtype)
-{
-	del_files(sdata, oldtype);
-	add_files(sdata);
-}
-
 static int netdev_notify(struct notifier_block *nb,
 			 unsigned long state,
 			 void *ndev)
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index a690071fde8..7af731f0b73 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -6,8 +6,6 @@
 #ifdef CONFIG_MAC80211_DEBUGFS
 void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
-void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata,
-				     int oldtype);
 void ieee80211_debugfs_netdev_init(void);
 void ieee80211_debugfs_netdev_exit(void);
 #else
@@ -17,9 +15,6 @@ static inline void ieee80211_debugfs_add_netdev(
 static inline void ieee80211_debugfs_remove_netdev(
 	struct ieee80211_sub_if_data *sdata)
 {}
-static inline void ieee80211_debugfs_change_if_type(
-	struct ieee80211_sub_if_data *sdata, int oldtype)
-{}
 static inline void ieee80211_debugfs_netdev_init(void)
 {}
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1b1fc53dad3..35bcdfef904 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -558,12 +558,6 @@ struct ieee80211_local {
 	bool tim_in_locked_section; /* see ieee80211_beacon_get() */
 	int tx_headroom; /* required headroom for hardware/radiotap */
 
-	enum {
-		IEEE80211_DEV_UNINITIALIZED = 0,
-		IEEE80211_DEV_REGISTERED,
-		IEEE80211_DEV_UNREGISTERED,
-	} reg_state;
-
 	/* Tasklet and skb queue to process calls from IRQ mode. All frames
 	 * added to skb_queue will be processed, but frames in
 	 * skb_queue_unreliable may be dropped if the total length of these
@@ -863,7 +857,6 @@ int ieee80211_hw_config(struct ieee80211_local *local);
 int ieee80211_if_config(struct net_device *dev);
 int ieee80211_if_config_beacon(struct net_device *dev);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
-void ieee80211_if_setup(struct net_device *dev);
 u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 			struct ieee80211_ht_info *req_ht_cap,
 			struct ieee80211_ht_bss_info *req_bss_cap);
@@ -933,16 +926,14 @@ static inline void ieee80211_start_mesh(struct net_device *dev)
 #endif
 
 /* interface handling */
+void ieee80211_if_setup(struct net_device *dev);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
-		     struct net_device **new_dev, int type,
+		     struct net_device **new_dev, enum ieee80211_if_types type,
 		     struct vif_params *params);
-void ieee80211_if_set_type(struct net_device *dev, int type);
-void ieee80211_if_reinit(struct net_device *dev);
-void __ieee80211_if_del(struct ieee80211_local *local,
-			struct ieee80211_sub_if_data *sdata);
-int ieee80211_if_remove(struct net_device *dev, const char *name, int id);
-void ieee80211_if_free(struct net_device *dev);
-void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata);
+void ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
+			      enum ieee80211_if_types type);
+void ieee80211_if_remove(struct net_device *dev);
+void ieee80211_remove_interfaces(struct ieee80211_local *local);
 
 /* tx handling */
 void ieee80211_clear_tx_pending(struct ieee80211_local *local);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f2aefd4b863..6cf121bebd7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -2,6 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,39 +18,149 @@
 #include "debugfs_netdev.h"
 #include "mesh.h"
 
-void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
+/*
+ * Called when the netdev is removed or, by the code below, before
+ * the interface type changes.
+ */
+static void ieee80211_teardown_sdata(struct net_device *dev)
 {
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct beacon_data *beacon;
+	struct sk_buff *skb;
+	int flushed;
 	int i;
 
-	/* Default values for sub-interface parameters */
-	sdata->drop_unencrypted = 0;
+	ieee80211_debugfs_remove_netdev(sdata);
+
+	/* free extra data */
+	ieee80211_free_keys(sdata);
+
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		skb_queue_head_init(&sdata->fragments[i].skb_list);
+		__skb_queue_purge(&sdata->fragments[i].skb_list);
+	sdata->fragment_next = 0;
 
-	INIT_LIST_HEAD(&sdata->key_list);
+	switch (sdata->vif.type) {
+	case IEEE80211_IF_TYPE_AP:
+		beacon = sdata->u.ap.beacon;
+		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+		synchronize_rcu();
+		kfree(beacon);
 
-	sdata->force_unicast_rateidx = -1;
-	sdata->max_ratectrl_rateidx = -1;
+		while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
+			local->total_ps_buffered--;
+			dev_kfree_skb(skb);
+		}
+
+		break;
+	case IEEE80211_IF_TYPE_MESH_POINT:
+		/* Allow compiler to elide mesh_rmc_free call. */
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			mesh_rmc_free(dev);
+		/* fall through */
+	case IEEE80211_IF_TYPE_STA:
+	case IEEE80211_IF_TYPE_IBSS:
+		kfree(sdata->u.sta.extra_ie);
+		kfree(sdata->u.sta.assocreq_ies);
+		kfree(sdata->u.sta.assocresp_ies);
+		kfree_skb(sdata->u.sta.probe_resp);
+		break;
+	case IEEE80211_IF_TYPE_WDS:
+	case IEEE80211_IF_TYPE_VLAN:
+	case IEEE80211_IF_TYPE_MNTR:
+		break;
+	case IEEE80211_IF_TYPE_INVALID:
+		BUG();
+		break;
+	}
+
+	flushed = sta_info_flush(local, sdata);
+	WARN_ON(flushed);
 }
 
-static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata)
+/*
+ * Helper function to initialise an interface to a specific type.
+ */
+static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
+				  enum ieee80211_if_types type)
 {
-	int i;
+	struct ieee80211_if_sta *ifsta;
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		__skb_queue_purge(&sdata->fragments[i].skb_list);
+	/* clear type-dependent union */
+	memset(&sdata->u, 0, sizeof(sdata->u));
+
+	/* and set some type-dependent values */
+	sdata->vif.type = type;
+
+	/* only monitor differs */
+	sdata->dev->type = ARPHRD_ETHER;
+
+	switch (type) {
+	case IEEE80211_IF_TYPE_AP:
+		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
+		INIT_LIST_HEAD(&sdata->u.ap.vlans);
+		break;
+	case IEEE80211_IF_TYPE_MESH_POINT:
+	case IEEE80211_IF_TYPE_STA:
+	case IEEE80211_IF_TYPE_IBSS:
+		ifsta = &sdata->u.sta;
+		INIT_WORK(&ifsta->work, ieee80211_sta_work);
+		setup_timer(&ifsta->timer, ieee80211_sta_timer,
+			    (unsigned long) sdata);
+		skb_queue_head_init(&ifsta->skb_queue);
+
+		ifsta->capab = WLAN_CAPABILITY_ESS;
+		ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
+			IEEE80211_AUTH_ALG_SHARED_KEY;
+		ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
+			IEEE80211_STA_AUTO_BSSID_SEL |
+			IEEE80211_STA_AUTO_CHANNEL_SEL;
+		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
+			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
+
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			ieee80211_mesh_init_sdata(sdata);
+		break;
+	case IEEE80211_IF_TYPE_MNTR:
+		sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
+		sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit;
+		sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
+				      MONITOR_FLAG_OTHER_BSS;
+		break;
+	case IEEE80211_IF_TYPE_WDS:
+	case IEEE80211_IF_TYPE_VLAN:
+		break;
+	case IEEE80211_IF_TYPE_INVALID:
+		BUG();
+		break;
+	}
+
+	ieee80211_debugfs_add_netdev(sdata);
+}
+
+void ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
+			      enum ieee80211_if_types type)
+{
+	/* Purge and reset type-dependent state. */
+	ieee80211_teardown_sdata(sdata->dev);
+	ieee80211_setup_sdata(sdata, type);
+
+	/* reset some values that shouldn't be kept across type changes */
+	sdata->basic_rates = 0;
+	sdata->drop_unencrypted = 0;
+	sdata->sequence = 0;
 }
 
-/* Must be called with rtnl lock held. */
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
-		     struct net_device **new_dev, int type,
+		     struct net_device **new_dev, enum ieee80211_if_types type,
 		     struct vif_params *params)
 {
 	struct net_device *ndev;
 	struct ieee80211_sub_if_data *sdata = NULL;
-	int ret;
+	int ret, i;
 
 	ASSERT_RTNL();
+
 	ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size,
 			    name, ieee80211_if_setup);
 	if (!ndev)
@@ -74,18 +185,28 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
 	sdata = netdev_priv(ndev);
 	ndev->ieee80211_ptr = &sdata->wdev;
+
+	/* initialise type-independent data */
 	sdata->wdev.wiphy = local->hw.wiphy;
-	sdata->vif.type = IEEE80211_IF_TYPE_AP;
-	sdata->dev = ndev;
 	sdata->local = local;
-	ieee80211_if_sdata_init(sdata);
+	sdata->dev = ndev;
+
+	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
+		skb_queue_head_init(&sdata->fragments[i].skb_list);
+
+	INIT_LIST_HEAD(&sdata->key_list);
+
+	sdata->force_unicast_rateidx = -1;
+	sdata->max_ratectrl_rateidx = -1;
+
+	/* setup type-dependent data */
+	ieee80211_setup_sdata(sdata, type);
 
 	ret = register_netdevice(ndev);
 	if (ret)
 		goto fail;
 
-	ieee80211_debugfs_add_netdev(sdata);
-	ieee80211_if_set_type(ndev, type);
+	ndev->uninit = ieee80211_teardown_sdata;
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) &&
 	    params && params->mesh_id_len)
@@ -93,11 +214,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 					     params->mesh_id_len,
 					     params->mesh_id);
 
-	/* we're under RTNL so all this is fine */
-	if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) {
-		__ieee80211_if_del(local, sdata);
-		return -ENODEV;
-	}
 	list_add_tail_rcu(&sdata->list, &local->interfaces);
 
 	if (new_dev)
@@ -105,181 +221,34 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 	return 0;
 
-fail:
+ fail:
 	free_netdev(ndev);
 	return ret;
 }
 
-void ieee80211_if_set_type(struct net_device *dev, int type)
+void ieee80211_if_remove(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	int oldtype = sdata->vif.type;
-
-	/*
-	 * Called even when register_netdevice fails, it would
-	 * oops if assigned before initialising the rest.
-	 */
-	dev->uninit = ieee80211_if_reinit;
-
-	/* most have no BSS pointer */
-	sdata->bss = NULL;
-	sdata->vif.type = type;
-
-	sdata->basic_rates = 0;
-
-	switch (type) {
-	case IEEE80211_IF_TYPE_WDS:
-	case IEEE80211_IF_TYPE_VLAN:
-		/* nothing special */
-		break;
-	case IEEE80211_IF_TYPE_AP:
-		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
-		INIT_LIST_HEAD(&sdata->u.ap.vlans);
-		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS: {
-		struct ieee80211_if_sta *ifsta;
-
-		ifsta = &sdata->u.sta;
-		INIT_WORK(&ifsta->work, ieee80211_sta_work);
-		setup_timer(&ifsta->timer, ieee80211_sta_timer,
-			    (unsigned long) sdata);
-		skb_queue_head_init(&ifsta->skb_queue);
-
-		ifsta->capab = WLAN_CAPABILITY_ESS;
-		ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
-			IEEE80211_AUTH_ALG_SHARED_KEY;
-		ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
-			IEEE80211_STA_AUTO_BSSID_SEL |
-			IEEE80211_STA_AUTO_CHANNEL_SEL;
-		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
-			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
-
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_mesh_init_sdata(sdata);
-		break;
-	}
-	case IEEE80211_IF_TYPE_MNTR:
-		dev->type = ARPHRD_IEEE80211_RADIOTAP;
-		dev->hard_start_xmit = ieee80211_monitor_start_xmit;
-		sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
-				      MONITOR_FLAG_OTHER_BSS;
-		break;
-	case IEEE80211_IF_TYPE_INVALID:
-		BUG();
-		break;
-	}
-	ieee80211_debugfs_change_if_type(sdata, oldtype);
-}
-
-/* Must be called with rtnl lock held. */
-void ieee80211_if_reinit(struct net_device *dev)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct sk_buff *skb;
-	int flushed;
 
 	ASSERT_RTNL();
 
-	ieee80211_free_keys(sdata);
-
-	ieee80211_if_sdata_deinit(sdata);
-
-	/* Need to handle mesh specially to allow eliding the function call */
-	if (ieee80211_vif_is_mesh(&sdata->vif))
-		mesh_rmc_free(dev);
-
-	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_INVALID:
-		/* cannot happen */
-		WARN_ON(1);
-		break;
-	case IEEE80211_IF_TYPE_AP: {
-		struct beacon_data *beacon;
-
-		beacon = sdata->u.ap.beacon;
-		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
-		synchronize_rcu();
-		kfree(beacon);
-
-		while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
-			local->total_ps_buffered--;
-			dev_kfree_skb(skb);
-		}
-
-		break;
-	}
-	case IEEE80211_IF_TYPE_WDS:
-	case IEEE80211_IF_TYPE_VLAN:
-		/* nothing to do */
-		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
-		kfree(sdata->u.sta.extra_ie);
-		sdata->u.sta.extra_ie = NULL;
-		kfree(sdata->u.sta.assocreq_ies);
-		sdata->u.sta.assocreq_ies = NULL;
-		kfree(sdata->u.sta.assocresp_ies);
-		sdata->u.sta.assocresp_ies = NULL;
-		if (sdata->u.sta.probe_resp) {
-			dev_kfree_skb(sdata->u.sta.probe_resp);
-			sdata->u.sta.probe_resp = NULL;
-		}
-
-		break;
-	case IEEE80211_IF_TYPE_MNTR:
-		dev->type = ARPHRD_ETHER;
-		break;
-	}
-
-	flushed = sta_info_flush(local, sdata);
-	WARN_ON(flushed);
-
-	memset(&sdata->u, 0, sizeof(sdata->u));
-	ieee80211_if_sdata_init(sdata);
-}
-
-/* Must be called with rtnl lock held. */
-void __ieee80211_if_del(struct ieee80211_local *local,
-			struct ieee80211_sub_if_data *sdata)
-{
-	struct net_device *dev = sdata->dev;
-
-	ieee80211_debugfs_remove_netdev(sdata);
+	list_del_rcu(&sdata->list);
+	synchronize_rcu();
 	unregister_netdevice(dev);
-	/*
-	 * The net_device will be freed by its destructor,
-	 * i.e. ieee80211_if_free.
-	 */
 }
 
-/* Must be called with rtnl lock held. */
-int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
+/*
+ * Remove all interfaces, may only be called at hardware unregistration
+ * time because it doesn't do RCU-safe list removals.
+ */
+void ieee80211_remove_interfaces(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata, *n;
+	struct ieee80211_sub_if_data *sdata, *tmp;
 
 	ASSERT_RTNL();
 
-	list_for_each_entry_safe(sdata, n, &local->interfaces, list) {
-		if ((sdata->vif.type == id || id == -1) &&
-		    strcmp(name, sdata->dev->name) == 0) {
-			list_del_rcu(&sdata->list);
-			synchronize_rcu();
-			__ieee80211_if_del(local, sdata);
-			return 0;
-		}
+	list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
+		list_del(&sdata->list);
+		unregister_netdevice(sdata->dev);
 	}
-	return -ENODEV;
-}
-
-void ieee80211_if_free(struct net_device *dev)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	ieee80211_if_sdata_deinit(sdata);
-	free_netdev(dev);
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 863923964d2..0759ab2ca3f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -971,7 +971,6 @@ static const struct header_ops ieee80211_header_ops = {
 	.cache_update	= eth_header_cache_update,
 };
 
-/* Must not be called for mdev */
 void ieee80211_if_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -981,7 +980,7 @@ void ieee80211_if_setup(struct net_device *dev)
 	dev->change_mtu = ieee80211_change_mtu;
 	dev->open = ieee80211_open;
 	dev->stop = ieee80211_stop;
-	dev->destructor = ieee80211_if_free;
+	dev->destructor = free_netdev;
 }
 
 /* everything else */
@@ -1776,7 +1775,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		printk(KERN_WARNING "%s: Failed to add default virtual iface\n",
 		       wiphy_name(local->hw.wiphy));
 
-	local->reg_state = IEEE80211_DEV_REGISTERED;
 	rtnl_unlock();
 
 	ieee80211_led_init(local);
@@ -1806,30 +1804,20 @@ EXPORT_SYMBOL(ieee80211_register_hw);
 void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata, *tmp;
 
 	tasklet_kill(&local->tx_pending_tasklet);
 	tasklet_kill(&local->tasklet);
 
 	rtnl_lock();
 
-	BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED);
-
-	local->reg_state = IEEE80211_DEV_UNREGISTERED;
-
 	/*
 	 * At this point, interface list manipulations are fine
 	 * because the driver cannot be handing us frames any
 	 * more and the tasklet is killed.
 	 */
 
-	/*
-	 * First, we remove all virtual interfaces.
-	 */
-	list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
-		list_del(&sdata->list);
-		__ieee80211_if_del(local, sdata);
-	}
+	/* First, we remove all virtual interfaces. */
+	ieee80211_remove_interfaces(local);
 
 	/* then, finally, remove the master interface */
 	unregister_netdevice(local->mdev);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index e8e4a6215e8..f2fdd334219 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -301,8 +301,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
 	if (netif_running(dev))
 		return -EBUSY;
 
-	ieee80211_if_reinit(dev);
-	ieee80211_if_set_type(dev, type);
+	ieee80211_if_change_type(sdata, type);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From f3947e2dfa3b18f375b7acd03b7ee2877d0751fc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 9 Jul 2008 14:40:36 +0200
Subject: mac80211: push interface checks down

This patch pushes the "netif_running()" and "same type as before"
checks down into ieee80211_if_change_type() to centralise the
logic instead of duplicating it for cfg80211 and wext.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  8 ++++----
 net/mac80211/ieee80211_i.h |  4 ++--
 net/mac80211/iface.c       | 20 ++++++++++++++++++--
 net/mac80211/wext.c        |  9 +--------
 4 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6aa49ad172a..ea0301025c1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -84,22 +84,22 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	struct net_device *dev;
 	enum ieee80211_if_types itype;
 	struct ieee80211_sub_if_data *sdata;
+	int ret;
 
 	/* we're under RTNL */
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (!dev)
 		return -ENODEV;
 
-	if (netif_running(dev))
-		return -EBUSY;
-
 	itype = nl80211_type_to_mac80211_type(type);
 	if (itype == IEEE80211_IF_TYPE_INVALID)
 		return -EINVAL;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	ieee80211_if_change_type(sdata, itype);
+	ret = ieee80211_if_change_type(sdata, itype);
+	if (ret)
+		return ret;
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
 		ieee80211_if_sta_set_mesh_id(&sdata->u.sta,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 35bcdfef904..2146c0c436d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -930,8 +930,8 @@ void ieee80211_if_setup(struct net_device *dev);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum ieee80211_if_types type,
 		     struct vif_params *params);
-void ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
-			      enum ieee80211_if_types type);
+int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_if_types type);
 void ieee80211_if_remove(struct net_device *dev);
 void ieee80211_remove_interfaces(struct ieee80211_local *local);
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6cf121bebd7..2e3adcb3ce2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -138,9 +138,23 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	ieee80211_debugfs_add_netdev(sdata);
 }
 
-void ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
-			      enum ieee80211_if_types type)
+int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_if_types type)
 {
+	ASSERT_RTNL();
+
+	if (type == sdata->vif.type)
+		return 0;
+
+	/*
+	 * We could, here, on changes between IBSS/STA/MESH modes,
+	 * invoke an MLME function instead that disassociates etc.
+	 * and goes into the requested mode.
+	 */
+
+	if (netif_running(sdata->dev))
+		return -EBUSY;
+
 	/* Purge and reset type-dependent state. */
 	ieee80211_teardown_sdata(sdata->dev);
 	ieee80211_setup_sdata(sdata, type);
@@ -149,6 +163,8 @@ void ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	sdata->basic_rates = 0;
 	sdata->drop_unencrypted = 0;
 	sdata->sequence = 0;
+
+	return 0;
 }
 
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index f2fdd334219..c041db9556c 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -296,14 +296,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	if (type == sdata->vif.type)
-		return 0;
-	if (netif_running(dev))
-		return -EBUSY;
-
-	ieee80211_if_change_type(sdata, type);
-
-	return 0;
+	return ieee80211_if_change_type(sdata, type);
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 9d139c810a2aa17365cc548d0cd2a189d8433c65 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 9 Jul 2008 14:40:37 +0200
Subject: mac80211: revamp beacon configuration

This patch changes mac80211's beacon configuration handling
to never pass skbs to the driver directly but rather always
require the driver to use ieee80211_beacon_get(). Additionally,
it introduces "change flags" on the config_interface() call
to enable drivers to figure out what is changing. Finally, it
removes the beacon_update() driver callback in favour of
having IBSS beacon delivered by ieee80211_beacon_get() as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  13 +++-
 drivers/net/wireless/b43/main.c             |  48 +++++-------
 drivers/net/wireless/b43legacy/main.c       |  45 +++++------
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  19 ++++-
 drivers/net/wireless/iwlwifi/iwl4965-base.c |  18 ++++-
 drivers/net/wireless/rt2x00/rt2400pci.c     |   2 +-
 drivers/net/wireless/rt2x00/rt2500pci.c     |   2 +-
 drivers/net/wireless/rt2x00/rt2500usb.c     |   2 +-
 drivers/net/wireless/rt2x00/rt2x00.h        |   2 +
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  10 +--
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  22 ++++--
 drivers/net/wireless/rt2x00/rt61pci.c       |   2 +-
 drivers/net/wireless/rt2x00/rt73usb.c       |   2 +-
 drivers/net/wireless/zd1211rw/zd_mac.c      |  22 ++----
 include/net/mac80211.h                      |  49 ++++--------
 net/mac80211/cfg.c                          |   4 +-
 net/mac80211/ieee80211_i.h                  |   3 +-
 net/mac80211/main.c                         |  59 +++++++--------
 net/mac80211/mlme.c                         | 112 +++++++++++-----------------
 net/mac80211/tx.c                           |  36 +++++----
 net/mac80211/wext.c                         |   2 +-
 21 files changed, 226 insertions(+), 248 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index a43e9b25169..217d506527a 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -207,7 +207,6 @@ static struct ieee80211_ops ath5k_hw_ops = {
 	.get_tx_stats 	= ath5k_get_tx_stats,
 	.get_tsf 	= ath5k_get_tsf,
 	.reset_tsf 	= ath5k_reset_tsf,
-	.beacon_update 	= ath5k_beacon_update,
 };
 
 /*
@@ -2785,6 +2784,18 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		 * a clean way of letting us retrieve this yet. */
 		ath5k_hw_set_associd(ah, ah->ah_bssid, 0);
 	}
+
+	if (conf->changed & IEEE80211_IFCC_BEACON &&
+	    vif->type == IEEE80211_IF_TYPE_IBSS) {
+		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+		if (!beacon) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		/* call old handler for now */
+		ath5k_beacon_update(hw, beacon);
+	}
+
 	mutex_unlock(&sc->lock);
 
 	return ath5k_reset(hw);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 9d2eb273b72..381dbd33dfc 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1675,14 +1675,24 @@ static void b43_beacon_update_trigger_work(struct work_struct *work)
 
 /* Asynchronously update the packet templates in template RAM.
  * Locking: Requires wl->irq_lock to be locked. */
-static void b43_update_templates(struct b43_wl *wl, struct sk_buff *beacon)
+static void b43_update_templates(struct b43_wl *wl)
 {
+	struct sk_buff *beacon;
+
 	/* This is the top half of the ansynchronous beacon update.
 	 * The bottom half is the beacon IRQ.
 	 * Beacon update must be asynchronous to avoid sending an
 	 * invalid beacon. This can happen for example, if the firmware
 	 * transmits a beacon while we are updating it. */
 
+	/* We could modify the existing beacon and set the aid bit in
+	 * the TIM field, but that would probably require resizing and
+	 * moving of data within the beacon template.
+	 * Simply request a new beacon and let mac80211 do the hard work. */
+	beacon = ieee80211_beacon_get(wl->hw, wl->vif);
+	if (unlikely(!beacon))
+		return;
+
 	if (wl->current_beacon)
 		dev_kfree_skb_any(wl->current_beacon);
 	wl->current_beacon = beacon;
@@ -3645,10 +3655,14 @@ static int b43_op_config_interface(struct ieee80211_hw *hw,
 	if (b43_status(dev) >= B43_STAT_INITIALIZED) {
 		if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) ||
 		    b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) {
-			B43_WARN_ON(conf->type != wl->if_type);
-			b43_set_ssid(dev, conf->ssid, conf->ssid_len);
-			if (conf->beacon)
-				b43_update_templates(wl, conf->beacon);
+			B43_WARN_ON(vif->type != wl->if_type);
+			if (conf->changed & IEEE80211_IFCC_SSID)
+				b43_set_ssid(dev, conf->ssid, conf->ssid_len);
+			if (conf->changed & IEEE80211_IFCC_BEACON)
+				b43_update_templates(wl);
+		} else if (b43_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) {
+			if (conf->changed & IEEE80211_IFCC_BEACON)
+				b43_update_templates(wl);
 		}
 		b43_write_mac_bssid_templates(dev);
 	}
@@ -4334,33 +4348,12 @@ out_unlock:
 }
 
 static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set)
-{
-	struct b43_wl *wl = hw_to_b43_wl(hw);
-	struct sk_buff *beacon;
-	unsigned long flags;
-
-	/* We could modify the existing beacon and set the aid bit in
-	 * the TIM field, but that would probably require resizing and
-	 * moving of data within the beacon template.
-	 * Simply request a new beacon and let mac80211 do the hard work. */
-	beacon = ieee80211_beacon_get(hw, wl->vif);
-	if (unlikely(!beacon))
-		return -ENOMEM;
-	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43_update_templates(wl, beacon);
-	spin_unlock_irqrestore(&wl->irq_lock, flags);
-
-	return 0;
-}
-
-static int b43_op_ibss_beacon_update(struct ieee80211_hw *hw,
-				     struct sk_buff *beacon)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	unsigned long flags;
 
 	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43_update_templates(wl, beacon);
+	b43_update_templates(wl);
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 
 	return 0;
@@ -4391,7 +4384,6 @@ static const struct ieee80211_ops b43_hw_ops = {
 	.stop			= b43_op_stop,
 	.set_retry_limit	= b43_op_set_retry_limit,
 	.set_tim		= b43_op_beacon_set_tim,
-	.beacon_update		= b43_op_ibss_beacon_update,
 	.sta_notify		= b43_op_sta_notify,
 };
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 069157eea05..a1b8bf3ee73 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -1138,14 +1138,22 @@ static void b43legacy_write_probe_resp_template(struct b43legacy_wldev *dev,
 
 /* Asynchronously update the packet templates in template RAM.
  * Locking: Requires wl->irq_lock to be locked. */
-static void b43legacy_update_templates(struct b43legacy_wl *wl,
-				       struct sk_buff *beacon)
+static void b43legacy_update_templates(struct b43legacy_wl *wl)
 {
+	struct sk_buff *beacon;
 	/* This is the top half of the ansynchronous beacon update. The bottom
 	 * half is the beacon IRQ. Beacon update must be asynchronous to avoid
 	 * sending an invalid beacon. This can happen for example, if the
 	 * firmware transmits a beacon while we are updating it. */
 
+	/* We could modify the existing beacon and set the aid bit in the TIM
+	 * field, but that would probably require resizing and moving of data
+	 * within the beacon template. Simply request a new beacon and let
+	 * mac80211 do the hard work. */
+	beacon = ieee80211_beacon_get(wl->hw, wl->vif);
+	if (unlikely(!beacon))
+		return;
+
 	if (wl->current_beacon)
 		dev_kfree_skb_any(wl->current_beacon);
 	wl->current_beacon = beacon;
@@ -2727,10 +2735,13 @@ static int b43legacy_op_config_interface(struct ieee80211_hw *hw,
 		memset(wl->bssid, 0, ETH_ALEN);
 	if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) {
 		if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) {
-			B43legacy_WARN_ON(conf->type != IEEE80211_IF_TYPE_AP);
+			B43legacy_WARN_ON(vif->type != IEEE80211_IF_TYPE_AP);
 			b43legacy_set_ssid(dev, conf->ssid, conf->ssid_len);
-			if (conf->beacon)
-				b43legacy_update_templates(wl, conf->beacon);
+			if (conf->changed & IEEE80211_IFCC_BEACON)
+				b43legacy_update_templates(wl);
+		} else if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) {
+			if (conf->changed & IEEE80211_IFCC_BEACON)
+				b43legacy_update_templates(wl);
 		}
 		b43legacy_write_mac_bssid_templates(dev);
 	}
@@ -3394,33 +3405,12 @@ out_unlock:
 
 static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw,
 				       int aid, int set)
-{
-	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
-	struct sk_buff *beacon;
-	unsigned long flags;
-
-	/* We could modify the existing beacon and set the aid bit in the TIM
-	 * field, but that would probably require resizing and moving of data
-	 * within the beacon template. Simply request a new beacon and let
-	 * mac80211 do the hard work. */
-	beacon = ieee80211_beacon_get(hw, wl->vif);
-	if (unlikely(!beacon))
-		return -ENOMEM;
-	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43legacy_update_templates(wl, beacon);
-	spin_unlock_irqrestore(&wl->irq_lock, flags);
-
-	return 0;
-}
-
-static int b43legacy_op_ibss_beacon_update(struct ieee80211_hw *hw,
-					   struct sk_buff *beacon)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	unsigned long flags;
 
 	spin_lock_irqsave(&wl->irq_lock, flags);
-	b43legacy_update_templates(wl, beacon);
+	b43legacy_update_templates(wl);
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 
 	return 0;
@@ -3440,7 +3430,6 @@ static const struct ieee80211_ops b43legacy_hw_ops = {
 	.stop			= b43legacy_op_stop,
 	.set_retry_limit	= b43legacy_op_set_retry_limit,
 	.set_tim		= b43legacy_op_beacon_set_tim,
-	.beacon_update		= b43legacy_op_ibss_beacon_update,
 };
 
 /* Hard-reset the chip. Do not call this directly.
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 1a7d18fea89..7d015f86ee8 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6907,6 +6907,9 @@ static void iwl3945_config_ap(struct iwl3945_priv *priv)
 	 * clear sta table, add BCAST sta... */
 }
 
+/* temporary */
+static int iwl3945_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb);
+
 static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 					struct ieee80211_vif *vif,
 				    struct ieee80211_if_conf *conf)
@@ -6924,10 +6927,21 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 		return 0;
 	}
 
+	/* handle this temporarily here */
+	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS &&
+	    conf->changed & IEEE80211_IFCC_BEACON) {
+		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+		if (!beacon)
+			return -ENOMEM;
+		rc = iwl3945_mac_beacon_update(hw, beacon);
+		if (rc)
+			return rc;
+	}
+
 	/* XXX: this MUST use conf->mac_addr */
 
 	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
-	    (!conf->beacon || !conf->ssid_len)) {
+	    (!conf->ssid_len)) {
 		IWL_DEBUG_MAC80211
 		    ("Leaving in AP mode because HostAPD is not ready.\n");
 		return 0;
@@ -6959,7 +6973,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 		if (priv->ibss_beacon)
 			dev_kfree_skb(priv->ibss_beacon);
 
-		priv->ibss_beacon = conf->beacon;
+		priv->ibss_beacon = ieee80211_beacon_get(hw, vif);
 	}
 
 	if (iwl3945_is_rfkill(priv))
@@ -7940,7 +7954,6 @@ static struct ieee80211_ops iwl3945_hw_ops = {
 	.conf_tx = iwl3945_mac_conf_tx,
 	.get_tsf = iwl3945_mac_get_tsf,
 	.reset_tsf = iwl3945_mac_reset_tsf,
-	.beacon_update = iwl3945_mac_beacon_update,
 	.hw_scan = iwl3945_mac_hw_scan
 };
 
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 7f65d9123b2..d6fe0ded59d 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -2912,6 +2912,9 @@ static void iwl4965_config_ap(struct iwl_priv *priv)
 	 * clear sta table, add BCAST sta... */
 }
 
+/* temporary */
+static int iwl4965_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb);
+
 static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 					struct ieee80211_vif *vif,
 				    struct ieee80211_if_conf *conf)
@@ -2929,8 +2932,18 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 		return 0;
 	}
 
+	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS &&
+	    conf->changed & IEEE80211_IFCC_BEACON) {
+		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+		if (!beacon)
+			return -ENOMEM;
+		rc = iwl4965_mac_beacon_update(hw, beacon);
+		if (rc)
+			return rc;
+	}
+
 	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
-	    (!conf->beacon || !conf->ssid_len)) {
+	    (!conf->ssid_len)) {
 		IWL_DEBUG_MAC80211
 		    ("Leaving in AP mode because HostAPD is not ready.\n");
 		return 0;
@@ -2962,7 +2975,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 		if (priv->ibss_beacon)
 			dev_kfree_skb(priv->ibss_beacon);
 
-		priv->ibss_beacon = conf->beacon;
+		priv->ibss_beacon = ieee80211_beacon_get(hw, vif);
 	}
 
 	if (iwl_is_rfkill(priv))
@@ -4090,7 +4103,6 @@ static struct ieee80211_ops iwl4965_hw_ops = {
 	.get_tx_stats = iwl4965_mac_get_tx_stats,
 	.conf_tx = iwl4965_mac_conf_tx,
 	.reset_tsf = iwl4965_mac_reset_tsf,
-	.beacon_update = iwl4965_mac_beacon_update,
 	.bss_info_changed = iwl4965_bss_info_changed,
 	.ampdu_action = iwl4965_mac_ampdu_action,
 	.hw_scan = iwl4965_mac_hw_scan
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index ee953ca0c6a..89b874ca610 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1581,7 +1581,6 @@ static const struct ieee80211_ops rt2400pci_mac80211_ops = {
 	.conf_tx		= rt2400pci_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
 	.get_tsf		= rt2400pci_get_tsf,
-	.beacon_update		= rt2400pci_beacon_update,
 	.tx_last_beacon		= rt2400pci_tx_last_beacon,
 };
 
@@ -1601,6 +1600,7 @@ static const struct rt2x00lib_ops rt2400pci_rt2x00_ops = {
 	.write_tx_data		= rt2x00pci_write_tx_data,
 	.kick_tx_queue		= rt2400pci_kick_tx_queue,
 	.fill_rxdone		= rt2400pci_fill_rxdone,
+	.beacon_update		= rt2400pci_beacon_update,
 	.config_filter		= rt2400pci_config_filter,
 	.config_intf		= rt2400pci_config_intf,
 	.config_erp		= rt2400pci_config_erp,
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index 0423c251c78..a64bb18322e 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1875,7 +1875,6 @@ static const struct ieee80211_ops rt2500pci_mac80211_ops = {
 	.conf_tx		= rt2x00mac_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
 	.get_tsf		= rt2500pci_get_tsf,
-	.beacon_update		= rt2500pci_beacon_update,
 	.tx_last_beacon		= rt2500pci_tx_last_beacon,
 };
 
@@ -1895,6 +1894,7 @@ static const struct rt2x00lib_ops rt2500pci_rt2x00_ops = {
 	.write_tx_data		= rt2x00pci_write_tx_data,
 	.kick_tx_queue		= rt2500pci_kick_tx_queue,
 	.fill_rxdone		= rt2500pci_fill_rxdone,
+	.beacon_update		= rt2500pci_beacon_update,
 	.config_filter		= rt2500pci_config_filter,
 	.config_intf		= rt2500pci_config_intf,
 	.config_erp		= rt2500pci_config_erp,
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 0dd1cb537b9..8ce1726d750 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1775,7 +1775,6 @@ static const struct ieee80211_ops rt2500usb_mac80211_ops = {
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
 	.conf_tx		= rt2x00mac_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
-	.beacon_update		= rt2500usb_beacon_update,
 };
 
 static const struct rt2x00lib_ops rt2500usb_rt2x00_ops = {
@@ -1793,6 +1792,7 @@ static const struct rt2x00lib_ops rt2500usb_rt2x00_ops = {
 	.get_tx_data_len	= rt2500usb_get_tx_data_len,
 	.kick_tx_queue		= rt2500usb_kick_tx_queue,
 	.fill_rxdone		= rt2500usb_fill_rxdone,
+	.beacon_update		= rt2500usb_beacon_update,
 	.config_filter		= rt2500usb_config_filter,
 	.config_intf		= rt2500usb_config_intf,
 	.config_erp		= rt2500usb_config_erp,
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index c07d9ef383f..9ad3ce43e6c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -534,6 +534,8 @@ struct rt2x00lib_ops {
 	/*
 	 * Configuration handlers.
 	 */
+	int (*beacon_update) (struct ieee80211_hw *hw, struct sk_buff *bcn);
+
 	void (*config_filter) (struct rt2x00_dev *rt2x00dev,
 			       const unsigned int filter_flags);
 	void (*config_intf) (struct rt2x00_dev *rt2x00dev,
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index b48c04e80a3..1b7f87799a7 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -409,7 +409,6 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac,
 {
 	struct rt2x00_dev *rt2x00dev = data;
 	struct rt2x00_intf *intf = vif_to_intf(vif);
-	struct sk_buff *skb;
 	struct ieee80211_bss_conf conf;
 	int delayed_flags;
 
@@ -436,10 +435,11 @@ static void rt2x00lib_intf_scheduled_iter(void *data, u8 *mac,
 		return;
 
 	if (delayed_flags & DELAYED_UPDATE_BEACON) {
-		skb = ieee80211_beacon_get(rt2x00dev->hw, vif);
-		if (skb &&
-		    rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw, skb))
-			dev_kfree_skb(skb);
+		struct ieee80211_if_conf conf;
+		conf.bssid = conf.ssid = NULL;
+		conf.ssid_len = 0;
+		conf.changed = IEEE80211_IFCC_BEACON;
+		rt2x00dev->ops->hw->config_interface(rt2x00dev->hw, vif, &conf);
 	}
 
 	if (delayed_flags & DELAYED_CONFIG_ERP)
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 3a1fb6d47e5..84b51f49175 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -348,6 +348,7 @@ int rt2x00mac_config_interface(struct ieee80211_hw *hw,
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct rt2x00_intf *intf = vif_to_intf(vif);
+	struct sk_buff *beacon;
 	int status;
 
 	/*
@@ -363,8 +364,11 @@ int rt2x00mac_config_interface(struct ieee80211_hw *hw,
 	 * If the interface does not work in master mode,
 	 * then the bssid value in the interface structure
 	 * should now be set.
+	 *
+	 * conf->bssid can be NULL if coming from the internal
+	 * beacon update routine.
 	 */
-	if (conf->type != IEEE80211_IF_TYPE_AP)
+	if (conf->bssid && vif->type != IEEE80211_IF_TYPE_AP)
 		memcpy(&intf->bssid, conf->bssid, ETH_ALEN);
 
 	spin_unlock(&intf->lock);
@@ -375,17 +379,23 @@ int rt2x00mac_config_interface(struct ieee80211_hw *hw,
 	 * values as arguments we make keep access to rt2x00_intf thread safe
 	 * even without the lock.
 	 */
-	rt2x00lib_config_intf(rt2x00dev, intf, conf->type, NULL, conf->bssid);
+	rt2x00lib_config_intf(rt2x00dev, intf, vif->type, NULL, conf->bssid);
 
 	/*
-	 * We only need to initialize the beacon when master mode is enabled.
+	 * We only need to initialize the beacon when in master/ibss mode.
 	 */
-	if (conf->type != IEEE80211_IF_TYPE_AP || !conf->beacon)
+	if ((vif->type != IEEE80211_IF_TYPE_AP &&
+	     vif->type != IEEE80211_IF_TYPE_IBSS) ||
+	    !(conf->changed & IEEE80211_IFCC_BEACON))
 		return 0;
 
-	status = rt2x00dev->ops->hw->beacon_update(rt2x00dev->hw, conf->beacon);
+	beacon = ieee80211_beacon_get(rt2x00dev->hw, vif);
+	if (!beacon)
+		return -ENOMEM;
+
+	status = rt2x00dev->ops->lib->beacon_update(rt2x00dev->hw, beacon);
 	if (status)
-		dev_kfree_skb(conf->beacon);
+		dev_kfree_skb(beacon);
 
 	return status;
 }
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index bbf1048f640..852d193a11a 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2427,7 +2427,6 @@ static const struct ieee80211_ops rt61pci_mac80211_ops = {
 	.conf_tx		= rt2x00mac_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
 	.get_tsf		= rt61pci_get_tsf,
-	.beacon_update		= rt61pci_beacon_update,
 };
 
 static const struct rt2x00lib_ops rt61pci_rt2x00_ops = {
@@ -2449,6 +2448,7 @@ static const struct rt2x00lib_ops rt61pci_rt2x00_ops = {
 	.write_tx_data		= rt2x00pci_write_tx_data,
 	.kick_tx_queue		= rt61pci_kick_tx_queue,
 	.fill_rxdone		= rt61pci_fill_rxdone,
+	.beacon_update		= rt61pci_beacon_update,
 	.config_filter		= rt61pci_config_filter,
 	.config_intf		= rt61pci_config_intf,
 	.config_erp		= rt61pci_config_erp,
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index 3ef318e098e..65720097242 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -2031,7 +2031,6 @@ static const struct ieee80211_ops rt73usb_mac80211_ops = {
 	.conf_tx		= rt2x00mac_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
 	.get_tsf		= rt73usb_get_tsf,
-	.beacon_update		= rt73usb_beacon_update,
 };
 
 static const struct rt2x00lib_ops rt73usb_rt2x00_ops = {
@@ -2052,6 +2051,7 @@ static const struct rt2x00lib_ops rt73usb_rt2x00_ops = {
 	.get_tx_data_len	= rt73usb_get_tx_data_len,
 	.kick_tx_queue		= rt73usb_kick_tx_queue,
 	.fill_rxdone		= rt73usb_fill_rxdone,
+	.beacon_update		= rt73usb_beacon_update,
 	.config_filter		= rt73usb_config_filter,
 	.config_intf		= rt73usb_config_intf,
 	.config_erp		= rt73usb_config_erp,
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 665f76af2fe..68e2749b2ce 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -727,15 +727,19 @@ static int zd_op_config_interface(struct ieee80211_hw *hw,
 	if (mac->type == IEEE80211_IF_TYPE_MESH_POINT ||
 	    mac->type == IEEE80211_IF_TYPE_IBSS) {
 		associated = true;
-		if (conf->beacon) {
-			r = zd_mac_config_beacon(hw, conf->beacon);
+		if (conf->changed & IEEE80211_IFCC_BEACON) {
+			struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+
+			if (!beacon)
+				return -ENOMEM;
+			r = zd_mac_config_beacon(hw, beacon);
 			if (r < 0)
 				return r;
 			r = zd_set_beacon_interval(&mac->chip, BCN_MODE_IBSS |
 					hw->conf.beacon_int);
 			if (r < 0)
 				return r;
-			kfree_skb(conf->beacon);
+			kfree_skb(beacon);
 		}
 	} else
 		associated = is_valid_ether_addr(conf->bssid);
@@ -889,17 +893,6 @@ static void zd_op_bss_info_changed(struct ieee80211_hw *hw,
 	}
 }
 
-static int zd_op_beacon_update(struct ieee80211_hw *hw,
-			       struct sk_buff *skb)
-{
-	struct zd_mac *mac = zd_hw_mac(hw);
-	zd_mac_config_beacon(hw, skb);
-	kfree_skb(skb);
-	zd_set_beacon_interval(&mac->chip, BCN_MODE_IBSS |
-					hw->conf.beacon_int);
-	return 0;
-}
-
 static const struct ieee80211_ops zd_ops = {
 	.tx			= zd_op_tx,
 	.start			= zd_op_start,
@@ -910,7 +903,6 @@ static const struct ieee80211_ops zd_ops = {
 	.config_interface	= zd_op_config_interface,
 	.configure_filter	= zd_op_configure_filter,
 	.bss_info_changed	= zd_op_bss_info_changed,
-	.beacon_update		= zd_op_beacon_update,
 };
 
 struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9672a04c4f7..47c072eab42 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -528,34 +528,39 @@ struct ieee80211_if_init_conf {
 	void *mac_addr;
 };
 
+/**
+ * enum ieee80211_if_conf_change - interface config change flags
+ *
+ * @IEEE80211_IFCC_BSSID: The BSSID changed.
+ * @IEEE80211_IFCC_SSID: The SSID changed.
+ * @IEEE80211_IFCC_BEACON: The beacon for this interface changed
+ *	(currently AP and MESH only), use ieee80211_beacon_get().
+ */
+enum ieee80211_if_conf_change {
+	IEEE80211_IFCC_BSSID	= BIT(0),
+	IEEE80211_IFCC_SSID	= BIT(1),
+	IEEE80211_IFCC_BEACON	= BIT(2),
+};
+
 /**
  * struct ieee80211_if_conf - configuration of an interface
  *
- * @type: type of the interface. This is always the same as was specified in
- *	&struct ieee80211_if_init_conf. The type of an interface never changes
- *	during the life of the interface; this field is present only for
- *	convenience.
+ * @changed: parameters that have changed, see &enum ieee80211_if_conf_change.
  * @bssid: BSSID of the network we are associated to/creating.
  * @ssid: used (together with @ssid_len) by drivers for hardware that
  *	generate beacons independently. The pointer is valid only during the
  *	config_interface() call, so copy the value somewhere if you need
  *	it.
  * @ssid_len: length of the @ssid field.
- * @beacon: beacon template. Valid only if @host_gen_beacon_template in
- *	&struct ieee80211_hw is set. The driver is responsible of freeing
- *	the sk_buff.
- * @beacon_control: tx_control for the beacon template, this field is only
- *	valid when the @beacon field was set.
  *
  * This structure is passed to the config_interface() callback of
  * &struct ieee80211_hw.
  */
 struct ieee80211_if_conf {
-	int type;
+	u32 changed;
 	u8 *bssid;
 	u8 *ssid;
 	size_t ssid_len;
-	struct sk_buff *beacon;
 };
 
 /**
@@ -683,15 +688,6 @@ enum ieee80211_tkip_key_type {
  * any particular flags. There are some exceptions to this rule,
  * however, so you are advised to review these flags carefully.
  *
- * @IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE:
- *	The device only needs to be supplied with a beacon template.
- *	If you need the host to generate each beacon then don't use
- *	this flag and call ieee80211_beacon_get() when you need the
- *	next beacon frame. Note that if you set this flag, you must
- *	implement the set_tim() callback for powersave mode to work
- *	properly.
- *	This flag is only relevant for access-point mode.
- *
  * @IEEE80211_HW_RX_INCLUDES_FCS:
  *	Indicates that received frames passed to the stack include
  *	the FCS at the end.
@@ -1151,17 +1147,6 @@ enum ieee80211_ampdu_mlme_action {
  *	function is optional if the firmware/hardware takes full care of
  *	TSF synchronization.
  *
- * @beacon_update: Setup beacon data for IBSS beacons. Unlike access point,
- *	IBSS uses a fixed beacon frame which is configured using this
- *	function.
- *	If the driver returns success (0) from this callback, it owns
- *	the skb. That means the driver is responsible to kfree_skb() it.
- *	The control structure is not dynamically allocated. That means the
- *	driver does not own the pointer and if it needs it somewhere
- *	outside of the context of this function, it must copy it
- *	somewhere else.
- *	This handler is required only for IBSS mode.
- *
  * @tx_last_beacon: Determine whether the last IBSS beacon was sent by us.
  *	This is needed only for IBSS mode and the result of this function is
  *	used to determine whether to reply to Probe Requests.
@@ -1219,8 +1204,6 @@ struct ieee80211_ops {
 			    struct ieee80211_tx_queue_stats *stats);
 	u64 (*get_tsf)(struct ieee80211_hw *hw);
 	void (*reset_tsf)(struct ieee80211_hw *hw);
-	int (*beacon_update)(struct ieee80211_hw *hw,
-			     struct sk_buff *skb);
 	int (*tx_last_beacon)(struct ieee80211_hw *hw);
 	int (*ampdu_action)(struct ieee80211_hw *hw,
 			    enum ieee80211_ampdu_mlme_action action,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ea0301025c1..8e7ba0e62cf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -469,7 +469,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 
 	kfree(old);
 
-	return ieee80211_if_config_beacon(sdata->dev);
+	return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
 }
 
 static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
@@ -523,7 +523,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 	synchronize_rcu();
 	kfree(old);
 
-	return ieee80211_if_config_beacon(dev);
+	return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
 }
 
 /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2146c0c436d..934c3ef4f0b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -854,8 +854,7 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 
 /* ieee80211.c */
 int ieee80211_hw_config(struct ieee80211_local *local);
-int ieee80211_if_config(struct net_device *dev);
-int ieee80211_if_config_beacon(struct net_device *dev);
+int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
 u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 			struct ieee80211_ht_info *req_ht_cap,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0759ab2ca3f..36859e79492 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -307,7 +307,8 @@ static int ieee80211_open(struct net_device *dev)
 		if (res)
 			goto err_stop;
 
-		ieee80211_if_config(dev);
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			ieee80211_start_mesh(sdata->dev);
 		changed |= ieee80211_reset_erp_info(dev);
 		ieee80211_bss_info_change_notify(sdata, changed);
 		ieee80211_enable_keys(sdata);
@@ -985,57 +986,47 @@ void ieee80211_if_setup(struct net_device *dev)
 
 /* everything else */
 
-static int __ieee80211_if_config(struct net_device *dev,
-				 struct sk_buff *beacon)
+int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_conf conf;
 
-	if (!local->ops->config_interface || !netif_running(dev))
+	if (WARN_ON(!netif_running(sdata->dev)))
+		return 0;
+
+	if (!local->ops->config_interface)
 		return 0;
 
 	memset(&conf, 0, sizeof(conf));
-	conf.type = sdata->vif.type;
+	conf.changed = changed;
+
 	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
 	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		conf.bssid = sdata->u.sta.bssid;
 		conf.ssid = sdata->u.sta.ssid;
 		conf.ssid_len = sdata->u.sta.ssid_len;
-	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		conf.beacon = beacon;
-		ieee80211_start_mesh(dev);
 	} else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+		conf.bssid = sdata->dev->dev_addr;
 		conf.ssid = sdata->u.ap.ssid;
 		conf.ssid_len = sdata->u.ap.ssid_len;
-		conf.beacon = beacon;
+	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		u8 zero[ETH_ALEN] = { 0 };
+		conf.bssid = zero;
+		conf.ssid = zero;
+		conf.ssid_len = 0;
+	} else {
+		WARN_ON(1);
+		return -EINVAL;
 	}
-	return local->ops->config_interface(local_to_hw(local),
-					    &sdata->vif, &conf);
-}
 
-int ieee80211_if_config(struct net_device *dev)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT &&
-	    (local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
-		return ieee80211_if_config_beacon(dev);
-	return __ieee80211_if_config(dev, NULL);
-}
+	if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID)))
+		return -EINVAL;
 
-int ieee80211_if_config_beacon(struct net_device *dev)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct sk_buff *skb;
+	if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID)))
+		return -EINVAL;
 
-	if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
-		return 0;
-	skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif);
-	if (!skb)
-		return -ENOMEM;
-	return __ieee80211_if_config(dev, skb);
+	return local->ops->config_interface(local_to_hw(local),
+					    &sdata->vif, &conf);
 }
 
 int ieee80211_hw_config(struct ieee80211_local *local)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 64d710a88b8..61d7f81bf45 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2406,8 +2406,6 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	int res, rates, i, j;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	struct ieee80211_tx_info *control;
-	struct rate_selection ratesel;
 	u8 *pos;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_supported_band *sband;
@@ -2425,7 +2423,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		local->ops->reset_tsf(local_to_hw(local));
 	}
 	memcpy(ifsta->bssid, bss->bssid, ETH_ALEN);
-	res = ieee80211_if_config(dev);
+	res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
 	if (res)
 		return res;
 
@@ -2439,19 +2437,16 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	if (res)
 		return res;
 
-	/* Set beacon template */
+	/* Build IBSS probe response */
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
-	do {
-		if (!skb)
-			break;
-
+	if (skb) {
 		skb_reserve(skb, local->hw.extra_tx_headroom);
 
 		mgmt = (struct ieee80211_mgmt *)
 			skb_put(skb, 24 + sizeof(mgmt->u.beacon));
 		memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
 		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						   IEEE80211_STYPE_BEACON);
+						   IEEE80211_STYPE_PROBE_RESP);
 		memset(mgmt->da, 0xff, ETH_ALEN);
 		memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
@@ -2495,61 +2490,22 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 			memcpy(pos, &bss->supp_rates[8], rates);
 		}
 
-		control = IEEE80211_SKB_CB(skb);
-
-		rate_control_get_rate(dev, sband, skb, &ratesel);
-		if (ratesel.rate_idx < 0) {
-			printk(KERN_DEBUG "%s: Failed to determine TX rate "
-			       "for IBSS beacon\n", dev->name);
-			break;
-		}
-		control->control.vif = &sdata->vif;
-		control->tx_rate_idx = ratesel.rate_idx;
-		if (sdata->bss_conf.use_short_preamble &&
-		    sband->bitrates[ratesel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
-			control->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
-		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
-		control->flags |= IEEE80211_TX_CTL_NO_ACK;
-		control->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
-		control->control.retry_limit = 1;
-
-		ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC);
-		if (ifsta->probe_resp) {
-			mgmt = (struct ieee80211_mgmt *)
-				ifsta->probe_resp->data;
-			mgmt->frame_control =
-				IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					     IEEE80211_STYPE_PROBE_RESP);
-		} else {
-			printk(KERN_DEBUG "%s: Could not allocate ProbeResp "
-			       "template for IBSS\n", dev->name);
-		}
-
-		if (local->ops->beacon_update &&
-		    local->ops->beacon_update(local_to_hw(local), skb) == 0) {
-			printk(KERN_DEBUG "%s: Configured IBSS beacon "
-			       "template\n", dev->name);
-			skb = NULL;
-		}
-
-		rates = 0;
-		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-		for (i = 0; i < bss->supp_rates_len; i++) {
-			int bitrate = (bss->supp_rates[i] & 0x7f) * 5;
-			for (j = 0; j < sband->n_bitrates; j++)
-				if (sband->bitrates[j].bitrate == bitrate)
-					rates |= BIT(j);
-		}
-		ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
+		ifsta->probe_resp = skb;
 
-		ieee80211_sta_def_wmm_params(dev, bss, 1);
-	} while (0);
+		ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
+	}
 
-	if (skb) {
-		printk(KERN_DEBUG "%s: Failed to configure IBSS beacon "
-		       "template\n", dev->name);
-		dev_kfree_skb(skb);
+	rates = 0;
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	for (i = 0; i < bss->supp_rates_len; i++) {
+		int bitrate = (bss->supp_rates[i] & 0x7f) * 5;
+		for (j = 0; j < sband->n_bitrates; j++)
+			if (sband->bitrates[j].bitrate == bitrate)
+				rates |= BIT(j);
 	}
+	ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
+
+	ieee80211_sta_def_wmm_params(dev, bss, 1);
 
 	ifsta->state = IEEE80211_IBSS_JOINED;
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
@@ -3333,7 +3289,7 @@ static void ieee80211_mesh_housekeeping(struct net_device *dev,
 
 	free_plinks = mesh_plink_availables(sdata);
 	if (free_plinks != sdata->u.sta.accepting_plinks)
-		ieee80211_if_config_beacon(dev);
+		ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
 
 	mod_timer(&ifsta->timer, jiffies +
 			IEEE80211_MESH_HOUSEKEEPING_INTERVAL);
@@ -3757,28 +3713,45 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta;
+	int res;
 
 	if (len > IEEE80211_MAX_SSID_LEN)
 		return -EINVAL;
 
 	ifsta = &sdata->u.sta;
 
-	if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0)
+	if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) {
+		memset(ifsta->ssid, 0, sizeof(ifsta->ssid));
+		memcpy(ifsta->ssid, ssid, len);
+		ifsta->ssid_len = len;
 		ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
-	memcpy(ifsta->ssid, ssid, len);
-	memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len);
-	ifsta->ssid_len = len;
+
+		res = 0;
+		/*
+		 * Hack! MLME code needs to be cleaned up to have different
+		 * entry points for configuration and internal selection change
+		 */
+		if (netif_running(sdata->dev))
+			res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
+		if (res) {
+			printk(KERN_DEBUG "%s: Failed to config new SSID to "
+			       "the low-level driver\n", dev->name);
+			return res;
+		}
+	}
 
 	if (len)
 		ifsta->flags |= IEEE80211_STA_SSID_SET;
 	else
 		ifsta->flags &= ~IEEE80211_STA_SSID_SET;
+
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
 	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
 		ifsta->ibss_join_req = jiffies;
 		ifsta->state = IEEE80211_IBSS_SEARCH;
 		return ieee80211_sta_find_ibss(dev, ifsta);
 	}
+
 	return 0;
 }
 
@@ -3804,7 +3777,12 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid)
 
 	if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
 		memcpy(ifsta->bssid, bssid, ETH_ALEN);
-		res = ieee80211_if_config(dev);
+		res = 0;
+		/*
+		 * Hack! See also ieee80211_sta_set_ssid.
+		 */
+		if (netif_running(sdata->dev))
+			res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
 		if (res) {
 			printk(KERN_DEBUG "%s: Failed to config new BSSID to "
 			       "the low-level driver\n", dev->name);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a757dcc1208..8843416e146 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1788,17 +1788,17 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct ieee80211_tx_info *info;
 	struct net_device *bdev;
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct ieee80211_if_ap *ap = NULL;
+	struct ieee80211_if_sta *ifsta = NULL;
 	struct rate_selection rsel;
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_mgmt *mgmt;
 	int *num_beacons;
-	bool err = true;
 	enum ieee80211_band band = local->hw.conf.channel->band;
 	u8 *pos;
 
@@ -1852,9 +1852,24 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 				       beacon->tail, beacon->tail_len);
 
 			num_beacons = &ap->num_beacons;
+		} else
+			goto out;
+	} else if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+		struct ieee80211_hdr *hdr;
+		ifsta = &sdata->u.sta;
 
-			err = false;
-		}
+		if (!ifsta->probe_resp)
+			goto out;
+
+		skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC);
+		if (!skb)
+			goto out;
+
+		hdr = (struct ieee80211_hdr *) skb->data;
+		hdr->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+						  IEEE80211_STYPE_BEACON);
+
+		num_beacons = &ifsta->num_beacons;
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400);
@@ -1881,17 +1896,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		mesh_mgmt_ies_add(skb, sdata->dev);
 
 		num_beacons = &sdata->u.sta.num_beacons;
-
-		err = false;
-	}
-
-	if (err) {
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "no beacon data avail for %s\n",
-			       bdev->name);
-#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
-		skb = NULL;
+	} else {
+		WARN_ON(1);
 		goto out;
 	}
 
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index c041db9556c..34fa8ed1e78 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -444,7 +444,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		memset(sdata->u.ap.ssid + len, 0,
 		       IEEE80211_MAX_SSID_LEN - len);
 		sdata->u.ap.ssid_len = len;
-		return ieee80211_if_config(dev);
+		return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
 	}
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3-70-g09d2


From 2560b6e2e4b87df211ea39b3b02498959b70b4e8 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 10 Jul 2008 00:47:19 +0300
Subject: mac80211: Fix ieee80211_rx_reorder_ampdu: ignore QoS null packets

This patch fixes the check at the entrance to ieee80211_rx_reorder_ampdu.
This check has been broken by 'mac80211: rx.c use new helpers'.

Letting QoS NULL packet in ieee80211_rx_reorder_ampdu led to packet loss in
RX.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 244ee2d50a5..ba332c9dc19 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2041,8 +2041,8 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
 
-	/* null data frames are excluded */
-	if (unlikely(ieee80211_is_nullfunc(hdr->frame_control)))
+	/* qos null data frames are excluded */
+	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
 		goto end_reorder;
 
 	/* new un-ordered ampdu frame - process it */
-- 
cgit v1.2.3-70-g09d2


From 1411f9b531f0a910cd1c85a337737c1e6ffbae6a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 10 Jul 2008 10:11:02 +0200
Subject: mac80211: fix RX sequence number check

According to 802.11-2007, we are doing the wrong thing in the
sequence number checks when receiving frames. This fixes it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ba332c9dc19..6d9ae67c27c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -334,13 +334,18 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 		else
 			rx->flags &= ~IEEE80211_RX_AMSDU;
 	} else {
-		if (unlikely(ieee80211_is_mgmt(hdr->frame_control))) {
-			/* Separate TID for management frames */
-			tid = NUM_RX_DATA_QUEUES - 1;
-		} else {
-			/* no qos control present */
-			tid = 0; /* 802.1d - Best Effort */
-		}
+		/*
+		 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"):
+		 *
+		 *	Sequence numbers for management frames, QoS data
+		 *	frames with a broadcast/multicast address in the
+		 *	Address 1 field, and all non-QoS data frames sent
+		 *	by QoS STAs are assigned using an additional single
+		 *	modulo-4096 counter, [...]
+		 *
+		 * We also use that counter for non-QoS STAs.
+		 */
+		tid = NUM_RX_DATA_QUEUES - 1;
 	}
 
 	rx->queue = tid;
-- 
cgit v1.2.3-70-g09d2


From 22bb1be4d271961846cd0889b0f8d671db773080 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 10 Jul 2008 11:16:47 +0200
Subject: wext: make sysfs bits optional and deprecate them

The /sys/class/net/*/wireless/ direcory is, as far as I know, not
used by anyone. Additionally, the same data is available via wext
ioctls. Hence the sysfs files are pretty much useless. This patch
makes them optional and schedules them for removal.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Jean Tourrilhes <jt@hpl.hp.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/feature-removal-schedule.txt | 10 ++++++++++
 net/core/net-sysfs.c                       |  4 ++--
 net/wireless/Kconfig                       | 11 +++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 8319c462c9f..db300e09c9a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -333,3 +333,13 @@ Why:	This option was introduced just to allow older lm-sensors userspace
 	to keep working over the upgrade to 2.6.26. At the scheduled time of
 	removal fixed lm-sensors (2.x or 3.x) should be readily available.
 Who:	Rene Herman <rene.herman@gmail.com>
+
+---------------------------
+
+What:	Code that is now under CONFIG_WIRELESS_EXT_SYSFS
+	(in net/core/net-sysfs.c)
+When:	After the only user (hal) has seen a release with the patches
+	for enough time, probably some time in 2010.
+Why:	Over 1K .text/.data size reduction, data is available in other
+	ways (ioctls)
+Who:	Johannes Berg <johannes@sipsolutions.net>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3f794131921..c1f4e0d428c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -318,7 +318,7 @@ static struct attribute_group netstat_group = {
 	.attrs  = netstat_attrs,
 };
 
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
 /* helper function that does all the locking etc for wireless stats */
 static ssize_t wireless_show(struct device *d, char *buf,
 			     ssize_t (*format)(const struct iw_statistics *,
@@ -459,7 +459,7 @@ int netdev_register_kobject(struct net_device *net)
 #ifdef CONFIG_SYSFS
 	*groups++ = &netstat_group;
 
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
 	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
 		*groups++ = &wireless_group;
 #endif
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 79270903bda..ab015c62d56 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -29,3 +29,14 @@ config WIRELESS_EXT
 
 	  Say N (if you can) unless you know you need wireless
 	  extensions for external modules.
+
+config WIRELESS_EXT_SYSFS
+	bool "Wireless extensions sysfs files"
+	default y
+	depends on WIRELESS_EXT && SYSFS
+	help
+	  This option enables the deprecated wireless statistics
+	  files in /sys/class/net/*/wireless/. The same information
+	  is available via the ioctls as well.
+
+	  Say Y if you have programs using it (we don't know of any).
-- 
cgit v1.2.3-70-g09d2


From f591fa5dbbbeaebd95c9c019b3a536a327fb79de Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 10 Jul 2008 11:21:26 +0200
Subject: mac80211: fix TX sequence numbers

This patch makes mac80211 assign proper sequence numbers to
QoS-data frames. It also removes the old sequence number code
because we noticed that only the driver or hardware can assign
sequence numbers to non-QoS-data and especially management
frames in a race-free manner because beacons aren't passed
through mac80211's TX path.

This patch also adds temporary code to the rt2x00 drivers to
not break them completely, that code will have to be reworked
for proper sequence numbers on beacons.

It also moves sequence number assignment down in the TX path
so no sequence numbers are assigned to frames that are dropped.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/xmit.c         |  3 +-
 drivers/net/wireless/b43legacy/xmit.c   |  3 +-
 drivers/net/wireless/rt2x00/rt2x00.h    |  2 +
 drivers/net/wireless/rt2x00/rt2x00mac.c | 13 ++++++
 include/net/mac80211.h                  | 12 +++++
 net/mac80211/ieee80211_i.h              |  2 -
 net/mac80211/iface.c                    |  1 -
 net/mac80211/sta_info.h                 |  1 +
 net/mac80211/tx.c                       | 79 +++++++++++++++++++++------------
 9 files changed, 82 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index bf6f6c1ed4c..8d54502222a 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -317,7 +317,8 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	/* MAC control */
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43_TXH_MAC_ACK;
-	if (!ieee80211_is_pspoll(fctl))
+	/* use hardware sequence counter as the non-TID counter */
+	if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
 		mac_ctl |= B43_TXH_MAC_HWSEQ;
 	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		mac_ctl |= B43_TXH_MAC_STMSDU;
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index a3540787eb5..e969ed8d412 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -295,8 +295,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	/* MAC control */
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43legacy_TX4_MAC_ACK;
-	if (!(((fctl & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) &&
-	      ((fctl & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)))
+	if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
 		mac_ctl |= B43legacy_TX4_MAC_HWSEQ;
 	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		mac_ctl |= B43legacy_TX4_MAC_STMSDU;
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index f0d7e083d8f..9fab0df18c3 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -364,6 +364,8 @@ struct rt2x00_intf {
 #define DELAYED_UPDATE_BEACON		0x00000001
 #define DELAYED_CONFIG_ERP		0x00000002
 #define DELAYED_LED_ASSOC		0x00000004
+
+	u16 seqno;
 };
 
 static inline struct rt2x00_intf* vif_to_intf(struct ieee80211_vif *vif)
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 16b72d9ca1c..77af1df5d89 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -96,6 +96,7 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
 	enum data_queue_qid qid = skb_get_queue_mapping(skb);
+	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct data_queue *queue;
 	u16 frame_control;
 
@@ -151,6 +152,18 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		}
 	}
 
+	/*
+	 * XXX: This is as wrong as the old mac80211 code was,
+	 *	due to beacons not getting sequence numbers assigned
+	 *	properly.
+	 */
+	if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
+		if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
+			intf->seqno += 0x10;
+		ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
+		ieee80211hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+	}
+
 	if (rt2x00queue_write_tx_frame(queue, skb)) {
 		ieee80211_stop_queue(rt2x00dev->hw, qid);
 		return NETDEV_TX_BUSY;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 47c072eab42..1dbd49fc557 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -239,6 +239,17 @@ struct ieee80211_bss_conf {
  * 	is for the whole aggregation.
  * @IEEE80211_TX_STAT_AMPDU_NO_BACK: no block ack was returned,
  * 	so consider using block ack request (BAR).
+ * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence
+ *	number to this frame, taking care of not overwriting the fragment
+ *	number and increasing the sequence number only when the
+ *	IEEE80211_TX_CTL_FIRST_FRAGMENT flags is set. mac80211 will properly
+ *	assign sequence numbers to QoS-data frames but cannot do so correctly
+ *	for non-QoS-data and management frames because beacons need them from
+ *	that counter as well and mac80211 cannot guarantee proper sequencing.
+ *	If this flag is set, the driver should instruct the hardware to
+ *	assign a sequence number to the frame or assign one itself. Cf. IEEE
+ *	802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for
+ *	beacons always be clear for frames without a sequence number field.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -265,6 +276,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_STAT_ACK			= BIT(21),
 	IEEE80211_TX_STAT_AMPDU			= BIT(22),
 	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(23),
+	IEEE80211_TX_CTL_ASSIGN_SEQ		= BIT(24),
 };
 
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 934c3ef4f0b..cbea0154ee3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -419,8 +419,6 @@ struct ieee80211_sub_if_data {
 	 */
 	u64 basic_rates;
 
-	u16 sequence;
-
 	/* Fragment table for host-based reassembly */
 	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
 	unsigned int fragment_next;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 2e3adcb3ce2..610ed1d9893 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -162,7 +162,6 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	/* reset some values that shouldn't be kept across type changes */
 	sdata->basic_rates = 0;
 	sdata->drop_unencrypted = 0;
-	sdata->sequence = 0;
 
 	return 0;
 }
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 94311dcfe04..109db787ccb 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -285,6 +285,7 @@ struct sta_info {
 	unsigned long tx_fragments;
 	int txrate_idx;
 	int last_txrate_idx;
+	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
 	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
 #endif
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8843416e146..0fbadd8b983 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -38,16 +38,6 @@
 
 /* misc utils */
 
-static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata,
-					      struct ieee80211_hdr *hdr)
-{
-	/* Set the sequence number for this frame. */
-	hdr->seq_ctrl = cpu_to_le16(sdata->sequence);
-
-	/* Increase the sequence number. */
-	sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ;
-}
-
 #ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP
 static void ieee80211_dump_frame(const char *ifname, const char *title,
 				 const struct sk_buff *skb)
@@ -274,17 +264,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
-static ieee80211_tx_result debug_noinline
-ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-
-	if (ieee80211_hdrlen(hdr->frame_control) >= 24)
-		ieee80211_include_sequence(tx->sdata, hdr);
-
-	return TX_CONTINUE;
-}
-
 /* This function is called whenever the AP is about to exceed the maximum limit
  * of buffered frames for power saving STAs. This situation should not really
  * happen often during normal operation, so dropping the oldest buffered packet
@@ -641,6 +620,49 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
+static ieee80211_tx_result debug_noinline
+ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+	u16 *seq;
+	u8 *qc;
+	int tid;
+
+	/* only for injected frames */
+	if (unlikely(ieee80211_is_ctl(hdr->frame_control)))
+		return TX_CONTINUE;
+
+	if (ieee80211_hdrlen(hdr->frame_control) < 24)
+		return TX_CONTINUE;
+
+	if (!ieee80211_is_data_qos(hdr->frame_control)) {
+		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+		return TX_CONTINUE;
+	}
+
+	/*
+	 * This should be true for injected/management frames only, for
+	 * management frames we have set the IEEE80211_TX_CTL_ASSIGN_SEQ
+	 * above since they are not QoS-data frames.
+	 */
+	if (!tx->sta)
+		return TX_CONTINUE;
+
+	/* include per-STA, per-TID sequence counter */
+
+	qc = ieee80211_get_qos_ctl(hdr);
+	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+	seq = &tx->sta->tid_seq[tid];
+
+	hdr->seq_ctrl = cpu_to_le16(*seq);
+
+	/* Increase the sequence number. */
+	*seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ;
+
+	return TX_CONTINUE;
+}
+
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 {
@@ -1110,12 +1132,12 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 		goto txh_done;
 
 	CALL_TXH(ieee80211_tx_h_check_assoc)
-	CALL_TXH(ieee80211_tx_h_sequence)
 	CALL_TXH(ieee80211_tx_h_ps_buf)
 	CALL_TXH(ieee80211_tx_h_select_key)
 	CALL_TXH(ieee80211_tx_h_michael_mic_add)
 	CALL_TXH(ieee80211_tx_h_rate_ctrl)
 	CALL_TXH(ieee80211_tx_h_misc)
+	CALL_TXH(ieee80211_tx_h_sequence)
 	CALL_TXH(ieee80211_tx_h_fragment)
 	/* handlers after fragment must be aware of tx info fragmentation! */
 	CALL_TXH(ieee80211_tx_h_encrypt)
@@ -1827,9 +1849,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			memcpy(skb_put(skb, beacon->head_len), beacon->head,
 			       beacon->head_len);
 
-			ieee80211_include_sequence(sdata,
-					(struct ieee80211_hdr *)skb->data);
-
 			/*
 			 * Not very nice, but we want to allow the driver to call
 			 * ieee80211_beacon_get() as a response to the set_tim()
@@ -1919,14 +1938,18 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 	info->control.vif = vif;
 	info->tx_rate_idx = rsel.rate_idx;
+
+	info->flags |= IEEE80211_TX_CTL_NO_ACK;
+	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
+	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 	if (sdata->bss_conf.use_short_preamble &&
 	    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
 		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
+
 	info->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
-	info->flags |= IEEE80211_TX_CTL_NO_ACK;
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 	info->control.retry_limit = 1;
-	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
+
 	(*num_beacons)++;
 out:
 	rcu_read_unlock();
-- 
cgit v1.2.3-70-g09d2


From f434b2d111d9ff84ebdd0f11a7ae42c761453259 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 10 Jul 2008 11:22:31 +0200
Subject: mac80211: fix struct ieee80211_tx_queue_params

Multiple issues:
 - there are no "default" values needed
 - cw_min/cw_max can be larger than documented
 - restructure to decrease size
 - use get_unaligned_le16

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 +++++-----
 net/mac80211/mlme.c    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1dbd49fc557..24a69f6075c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -115,17 +115,17 @@ enum ieee80211_max_queues {
  * The information provided in this structure is required for QoS
  * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29.
  *
- * @aifs: arbitration interface space [0..255, -1: use default]
- * @cw_min: minimum contention window [will be a value of the form
- *	2^n-1 in the range 1..1023; 0: use default]
+ * @aifs: arbitration interface space [0..255]
+ * @cw_min: minimum contention window [a value of the form
+ *	2^n-1 in the range 1..32767]
  * @cw_max: maximum contention window [like @cw_min]
  * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled
  */
 struct ieee80211_tx_queue_params {
-	s16 aifs;
+	u16 txop;
 	u16 cw_min;
 	u16 cw_max;
-	u16 txop;
+	u8 aifs;
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 61d7f81bf45..a4bbc8d6d0e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -345,7 +345,7 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 		params.aifs = pos[0] & 0x0f;
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
-		params.txop = pos[2] | (pos[3] << 8);
+		params.txop = get_unaligned_le16(pos + 2);
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
 		       "cWmin=%d cWmax=%d txop=%d\n",
-- 
cgit v1.2.3-70-g09d2


From 1e188637902eb4b62d325d3cc76b076724f3ec55 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 10 Jul 2008 17:54:14 +0300
Subject: mac80211: dont add a STA which is not in the same IBSS

This patch avoids adding STAs that don't belong to our IBSS
ieee80211_bssid_match matches also bcast address so also APs
were added

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a4bbc8d6d0e..8f51375317d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4360,7 +4360,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 		return NULL;
 	}
 
-	if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid))
+	if (compare_ether_addr(bssid, sdata->u.sta.bssid))
 		return NULL;
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-- 
cgit v1.2.3-70-g09d2


From 4c8894980010536915c4f5513ee180e3614aeca9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 14 Jul 2008 20:22:38 -0700
Subject: netfilter: Let nf_ct_kill() callers know if del_timer() returned
 true.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack.h | 20 ++++++++++----------
 net/netfilter/nf_conntrack_core.c    | 13 ++++++++-----
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d5d76ec7abb..8f5b75734dd 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -223,23 +223,23 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
 	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
-extern void __nf_ct_kill_acct(struct nf_conn *ct,
-				enum ip_conntrack_info ctinfo,
-				const struct sk_buff *skb,
-				int do_acct);
+extern bool __nf_ct_kill_acct(struct nf_conn *ct,
+			      enum ip_conntrack_info ctinfo,
+			      const struct sk_buff *skb,
+			      int do_acct);
 
 /* kill conntrack and do accounting */
-static inline void nf_ct_kill_acct(struct nf_conn *ct,
-				enum ip_conntrack_info ctinfo,
-				const struct sk_buff *skb)
+static inline bool nf_ct_kill_acct(struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo,
+				   const struct sk_buff *skb)
 {
-	__nf_ct_kill_acct(ct, ctinfo, skb, 1);
+	return __nf_ct_kill_acct(ct, ctinfo, skb, 1);
 }
 
 /* kill conntrack without accounting */
-static inline void nf_ct_kill(struct nf_conn *ct)
+static inline bool nf_ct_kill(struct nf_conn *ct)
 {
-	__nf_ct_kill_acct(ct, 0, NULL, 0);
+	return __nf_ct_kill_acct(ct, 0, NULL, 0);
 }
 
 /* These are for NAT.  Icky. */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 212a0888408..28d03e64200 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -848,10 +848,10 @@ acct:
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
-void __nf_ct_kill_acct(struct nf_conn *ct,
-		enum ip_conntrack_info ctinfo,
-		const struct sk_buff *skb,
-		int do_acct)
+bool __nf_ct_kill_acct(struct nf_conn *ct,
+		       enum ip_conntrack_info ctinfo,
+		       const struct sk_buff *skb,
+		       int do_acct)
 {
 #ifdef CONFIG_NF_CT_ACCT
 	if (do_acct) {
@@ -862,8 +862,11 @@ void __nf_ct_kill_acct(struct nf_conn *ct,
 		spin_unlock_bh(&nf_conntrack_lock);
 	}
 #endif
-	if (del_timer(&ct->timeout))
+	if (del_timer(&ct->timeout)) {
 		ct->timeout.function((unsigned long)ct);
+		return true;
+	}
+	return false;
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
-- 
cgit v1.2.3-70-g09d2


From 72d9794f444734af56ef12833b496326643e2964 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 20:36:32 -0700
Subject: net-sched: cls_flow: add perturbation support

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h |  1 +
 net/sched/cls_flow.c    | 52 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 44 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 99efbed81fa..7cf7824df77 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -374,6 +374,7 @@ enum
 	TCA_FLOW_ACT,
 	TCA_FLOW_POLICE,
 	TCA_FLOW_EMATCHES,
+	TCA_FLOW_PERTURB,
 	__TCA_FLOW_MAX
 };
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 971b867e048..8f63a1a9401 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -36,6 +36,8 @@ struct flow_filter {
 	struct list_head	list;
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
+	struct timer_list	perturb_timer;
+	u32			perturb_period;
 	u32			handle;
 
 	u32			nkeys;
@@ -47,11 +49,9 @@ struct flow_filter {
 	u32			addend;
 	u32			divisor;
 	u32			baseclass;
+	u32			hashrnd;
 };
 
-static u32 flow_hashrnd __read_mostly;
-static int flow_hashrnd_initted __read_mostly;
-
 static const struct tcf_ext_map flow_ext_map = {
 	.action	= TCA_FLOW_ACT,
 	.police	= TCA_FLOW_POLICE,
@@ -348,7 +348,7 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		}
 
 		if (f->mode == FLOW_MODE_HASH)
-			classid = jhash2(keys, f->nkeys, flow_hashrnd);
+			classid = jhash2(keys, f->nkeys, f->hashrnd);
 		else {
 			classid = keys[0];
 			classid = (classid & f->mask) ^ f->xor;
@@ -369,6 +369,15 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	return -1;
 }
 
+static void flow_perturbation(unsigned long arg)
+{
+	struct flow_filter *f = (struct flow_filter *)arg;
+
+	get_random_bytes(&f->hashrnd, 4);
+	if (f->perturb_period)
+		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
+}
+
 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
 	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
@@ -381,6 +390,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
 	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
 	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
+	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
 static int flow_change(struct tcf_proto *tp, unsigned long base,
@@ -394,6 +404,7 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 	unsigned int nkeys = 0;
+	unsigned int perturb_period = 0;
 	u32 baseclass = 0;
 	u32 keymask = 0;
 	u32 mode;
@@ -442,6 +453,14 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
 			goto err2;
+
+		if (mode == FLOW_MODE_HASH)
+			perturb_period = f->perturb_period;
+		if (tb[TCA_FLOW_PERTURB]) {
+			if (mode != FLOW_MODE_HASH)
+				goto err2;
+			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
@@ -455,6 +474,12 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
 			goto err2;
 
+		if (tb[TCA_FLOW_PERTURB]) {
+			if (mode != FLOW_MODE_HASH)
+				goto err2;
+			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+		}
+
 		if (TC_H_MAJ(baseclass) == 0)
 			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
 		if (TC_H_MIN(baseclass) == 0)
@@ -467,6 +492,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
 
 		f->handle = handle;
 		f->mask	  = ~0U;
+
+		get_random_bytes(&f->hashrnd, 4);
+		f->perturb_timer.function = flow_perturbation;
+		f->perturb_timer.data = (unsigned long)f;
+		init_timer_deferrable(&f->perturb_timer);
 	}
 
 	tcf_exts_change(tp, &f->exts, &e);
@@ -495,6 +525,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
 	if (baseclass)
 		f->baseclass = baseclass;
 
+	f->perturb_period = perturb_period;
+	del_timer(&f->perturb_timer);
+	if (perturb_period)
+		mod_timer(&f->perturb_timer, jiffies + perturb_period);
+
 	if (*arg == 0)
 		list_add_tail(&f->list, &head->filters);
 
@@ -512,6 +547,7 @@ err1:
 
 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
 {
+	del_timer_sync(&f->perturb_timer);
 	tcf_exts_destroy(tp, &f->exts);
 	tcf_em_tree_destroy(tp, &f->ematches);
 	kfree(f);
@@ -532,11 +568,6 @@ static int flow_init(struct tcf_proto *tp)
 {
 	struct flow_head *head;
 
-	if (!flow_hashrnd_initted) {
-		get_random_bytes(&flow_hashrnd, 4);
-		flow_hashrnd_initted = 1;
-	}
-
 	head = kzalloc(sizeof(*head), GFP_KERNEL);
 	if (head == NULL)
 		return -ENOBUFS;
@@ -605,6 +636,9 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
 	if (f->baseclass)
 		NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
 
+	if (f->perturb_period)
+		NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ);
+
 	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
 		goto nla_put_failure;
 #ifdef CONFIG_NET_EMATCH
-- 
cgit v1.2.3-70-g09d2


From 2aeb0b88b3c7a0e3bef55e7ff0efffd5d971aa57 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:49:46 -0700
Subject: af_packet: Check return of dev_set_promiscuity/allmulti

dev_set_promiscuity/allmulti might overflow.  Commit: "netdevice: Fix
promiscuity and allmulti overflow" in net-next makes
dev_set_promiscuity/allmulti return error number if overflow happened.

In af_packet, we check all positive increment for promiscuity and
allmulti to get error return.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index beca6402f1c..9f226916668 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1173,7 +1173,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 	return 0;
 }
 
-static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
+static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
+			 int what)
 {
 	switch (i->type) {
 	case PACKET_MR_MULTICAST:
@@ -1183,13 +1184,14 @@ static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int w
 			dev_mc_delete(dev, i->addr, i->alen, 0);
 		break;
 	case PACKET_MR_PROMISC:
-		dev_set_promiscuity(dev, what);
+		return dev_set_promiscuity(dev, what);
 		break;
 	case PACKET_MR_ALLMULTI:
-		dev_set_allmulti(dev, what);
+		return dev_set_allmulti(dev, what);
 		break;
 	default:;
 	}
+	return 0;
 }
 
 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
@@ -1243,7 +1245,11 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 	i->count = 1;
 	i->next = po->mclist;
 	po->mclist = i;
-	packet_dev_mc(dev, i, +1);
+	err = packet_dev_mc(dev, i, 1);
+	if (err) {
+		po->mclist = i->next;
+		kfree(i);
+	}
 
 done:
 	rtnl_unlock();
-- 
cgit v1.2.3-70-g09d2


From bc3f9076f671f128c82022428992c30be57f22d5 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:53:13 -0700
Subject: bridge: Check return of dev_set_promiscuity

dev_set_promiscuity/allmulti might overflow.
Commit: "netdevice: Fix promiscuity and allmulti overflow" in net-next makes
dev_set_promiscuity/allmulti return error number if overflow happened.

Here, we check the positive increment for promiscuity to get error return.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 497df086141..a072ea5ca6f 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -373,6 +373,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
+	err = dev_set_promiscuity(dev, 1);
+	if (err)
+		goto put_back;
+
 	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
 				   SYSFS_BRIDGE_PORT_ATTR);
 	if (err)
@@ -388,7 +392,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	rcu_assign_pointer(dev->br_port, p);
 	dev_disable_lro(dev);
-	dev_set_promiscuity(dev, 1);
 
 	list_add_rcu(&p->list, &br->port_list);
 
@@ -412,12 +415,12 @@ err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
 	kobject_del(&p->kobj);
-	goto put_back;
 err0:
 	kobject_put(&p->kobj);
-
+	dev_set_promiscuity(dev, -1);
 put_back:
 	dev_put(dev);
+	kfree(p);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5ae7b444137143a4e067b80354171ab128eb1b2b Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:54:23 -0700
Subject: ipv6: Check return of dev_set_allmulti

allmulti might overflow.
Commit: "netdevice: Fix promiscuity and allmulti overflow" in net-next makes
dev_set_promiscuity/allmulti return error number if overflow happened.

Here, we check the positive increment for allmulti to get error return.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cfac26d674e..2dd832592a3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -595,6 +595,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 	int vifi = vifc->mif6c_mifi;
 	struct mif_device *v = &vif6_table[vifi];
 	struct net_device *dev;
+	int err;
 
 	/* Is vif busy ? */
 	if (MIF_EXISTS(vifi))
@@ -612,6 +613,11 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		dev = ip6mr_reg_vif();
 		if (!dev)
 			return -ENOBUFS;
+		err = dev_set_allmulti(dev, 1);
+		if (err) {
+			unregister_netdevice(dev);
+			return err;
+		}
 		break;
 #endif
 	case 0:
@@ -619,13 +625,14 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		dev_put(dev);
+		err = dev_set_allmulti(dev, 1);
+		if (err)
+			return err;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	dev_set_allmulti(dev, 1);
-
 	/*
 	 *	Fill in the VIF structures
 	 */
-- 
cgit v1.2.3-70-g09d2


From 7af3db78a99f47b9ff40b8cb0bb08160ad6a3d6b Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:54:54 -0700
Subject: ipv6: Fix using after dev_put()

Patrick McHardy pointed it out.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2dd832592a3..0b41aa2675f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -443,6 +443,7 @@ static struct net_device *ip6mr_reg_vif(void)
 	if (dev_open(dev))
 		goto failure;
 
+	dev_hold(dev);
 	return dev;
 
 failure:
@@ -616,6 +617,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		err = dev_set_allmulti(dev, 1);
 		if (err) {
 			unregister_netdevice(dev);
+			dev_put(dev);
 			return err;
 		}
 		break;
@@ -624,10 +626,11 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
 		if (!dev)
 			return -EADDRNOTAVAIL;
-		dev_put(dev);
 		err = dev_set_allmulti(dev, 1);
-		if (err)
+		if (err) {
+			dev_put(dev);
 			return err;
+		}
 		break;
 	default:
 		return -EINVAL;
@@ -651,7 +654,6 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
-	dev_hold(dev);
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-- 
cgit v1.2.3-70-g09d2


From d607032db0ccd7274bee348df3214f6f52b24816 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:55:26 -0700
Subject: ipv4: Check return of dev_set_allmulti

allmulti might overflow.
Commit: "netdevice: Fix promiscuity and allmulti overflow" in net-next makes
dev_set_promiscuity/allmulti return error number if overflow happened.

Here, we check the positive increment for allmulti to get error return.

PS: For unwinding tunnel creating, we let ipip->ioctl() to handle it.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 438fab9c62a..2f4d8afd067 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -118,6 +118,31 @@ static struct timer_list ipmr_expire_timer;
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
+static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
+{
+	dev_close(dev);
+
+	dev = __dev_get_by_name(&init_net, "tunl0");
+	if (dev) {
+		struct ifreq ifr;
+		mm_segment_t	oldfs;
+		struct ip_tunnel_parm p;
+
+		memset(&p, 0, sizeof(p));
+		p.iph.daddr = v->vifc_rmt_addr.s_addr;
+		p.iph.saddr = v->vifc_lcl_addr.s_addr;
+		p.iph.version = 4;
+		p.iph.ihl = 5;
+		p.iph.protocol = IPPROTO_IPIP;
+		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
+		set_fs(oldfs);
+	}
+}
+
 static
 struct net_device *ipmr_new_tunnel(struct vifctl *v)
 {
@@ -389,6 +414,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 	struct vif_device *v = &vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
+	int err;
 
 	/* Is vif busy ? */
 	if (VIF_EXISTS(vifi))
@@ -406,18 +432,31 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		dev = ipmr_reg_vif();
 		if (!dev)
 			return -ENOBUFS;
+		err = dev_set_allmulti(dev, 1);
+		if (err) {
+			unregister_netdevice(dev);
+			return err;
+		}
 		break;
 #endif
 	case VIFF_TUNNEL:
 		dev = ipmr_new_tunnel(vifc);
 		if (!dev)
 			return -ENOBUFS;
+		err = dev_set_allmulti(dev, 1);
+		if (err) {
+			ipmr_del_tunnel(dev, vifc);
+			return err;
+		}
 		break;
 	case 0:
 		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		dev_put(dev);
+		err = dev_set_allmulti(dev, 1);
+		if (err)
+			return err;
 		break;
 	default:
 		return -EINVAL;
@@ -426,7 +465,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 		return -EADDRNOTAVAIL;
 	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
-	dev_set_allmulti(dev, +1);
 	ip_rt_multicast_event(in_dev);
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 7dc00c82cbb0119cf4663f65bbaa2cc55f961db2 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:56:34 -0700
Subject: ipv4: Fix ipmr unregister device oops

An oops happens during device unregister.

The following oops happened when I add two tunnels, which
use a same device, and then delete one tunnel.
Obviously deleting tunnel "A" causes device unregister, which
send a notification, and after receiving notification, ipmr do
unregister again for tunnel "B" which also use same device.
That is wrong.
After receiving notification, ipmr only needs to decrease reference
count and don't do duplicated unregister.
Fortunately, IPv6 side doesn't add tunnel in ip6mr, so it's clean.

This patch fixs:
- unregister device oops
- using after dev_put()

Here is the oops:
===
Jul 11 15:39:29 wangchen kernel: ------------[ cut here ]------------
Jul 11 15:39:29 wangchen kernel: kernel BUG at net/core/dev.c:3651!
Jul 11 15:39:29 wangchen kernel: invalid opcode: 0000 [#1]
Jul 11 15:39:29 wangchen kernel: Modules linked in: ipip tunnel4 nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs ipv6 snd_pcm_oss snd_mixer_oss snd_seq snd_seq_device af_packet binfmt_misc button battery ac loop dm_mod usbhid ff_memless pcmcia firmware_class ohci1394 8139too mii ieee1394 yenta_socket rsrc_nonstatic pcmcia_core ide_cd_mod cdrom snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm i2c_i801 snd_timer snd i2c_core soundcore snd_page_alloc rng_core shpchp ehci_hcd uhci_hcd pci_hotplug intel_agp agpgart usbcore ext3 jbd ata_piix ahci libata dock edd fan thermal processor thermal_sys piix sd_mod scsi_mod ide_disk ide_core [last unloaded: freq_table]
Jul 11 15:39:29 wangchen kernel:
Jul 11 15:39:29 wangchen kernel: Pid: 4102, comm: mroute Not tainted (2.6.26-rc9-default #69)
Jul 11 15:39:29 wangchen kernel: EIP: 0060:[<c024636b>] EFLAGS: 00010202 CPU: 0
Jul 11 15:39:29 wangchen kernel: EIP is at rollback_registered+0x61/0xe3
Jul 11 15:39:29 wangchen kernel: EAX: 00000001 EBX: ecba6000 ECX: 00000000 EDX: ffffffff
Jul 11 15:39:29 wangchen kernel: ESI: 00000001 EDI: ecba6000 EBP: c03de2e8 ESP: ed8e7c3c
Jul 11 15:39:29 wangchen kernel:  DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Jul 11 15:39:29 wangchen kernel: Process mroute (pid: 4102, ti=ed8e6000 task=ed41e830 task.ti=ed8e6000)
Jul 11 15:39:29 wangchen kernel: Stack: ecba6000 c024641c 00000028 c0284e1a 00000001 c03de2e8 ecba6000 eecff360
Jul 11 15:39:29 wangchen kernel:        c0284e4c c03536f4 fffffff8 00000000 c029a819 ecba6000 00000006 ecba6000
Jul 11 15:39:29 wangchen kernel:        00000000 ecba6000 c03de2c0 c012841b ffffffff 00000000 c024639f ecba6000
Jul 11 15:39:29 wangchen kernel: Call Trace:
Jul 11 15:39:29 wangchen kernel:  [<c024641c>] unregister_netdevice+0x2f/0x51
Jul 11 15:39:29 wangchen kernel:  [<c0284e1a>] vif_delete+0xaf/0xc3
Jul 11 15:39:29 wangchen kernel:  [<c0284e4c>] ipmr_device_event+0x1e/0x30
Jul 11 15:39:29 wangchen kernel:  [<c029a819>] notifier_call_chain+0x2a/0x47
Jul 11 15:39:29 wangchen kernel:  [<c012841b>] raw_notifier_call_chain+0x9/0xc
Jul 11 15:39:29 wangchen kernel:  [<c024639f>] rollback_registered+0x95/0xe3
Jul 11 15:39:29 wangchen kernel:  [<c024641c>] unregister_netdevice+0x2f/0x51
Jul 11 15:39:29 wangchen kernel:  [<c0284e1a>] vif_delete+0xaf/0xc3
Jul 11 15:39:29 wangchen kernel:  [<c0285eee>] ip_mroute_setsockopt+0x47a/0x801
Jul 11 15:39:29 wangchen kernel:  [<eea5a70c>] do_get_write_access+0x2df/0x313 [jbd]
Jul 11 15:39:29 wangchen kernel:  [<c01727c4>] __find_get_block_slow+0xda/0xe4
Jul 11 15:39:29 wangchen kernel:  [<c0172a7f>] __find_get_block+0xf8/0x122
Jul 11 15:39:29 wangchen kernel:  [<c0172a7f>] __find_get_block+0xf8/0x122
Jul 11 15:39:29 wangchen kernel:  [<eea5d563>] journal_cancel_revoke+0xda/0x110 [jbd]
Jul 11 15:39:29 wangchen kernel:  [<c0263501>] ip_setsockopt+0xa9/0x9ee
Jul 11 15:39:29 wangchen kernel:  [<eea5d563>] journal_cancel_revoke+0xda/0x110 [jbd]
Jul 11 15:39:29 wangchen kernel:  [<eea5a70c>] do_get_write_access+0x2df/0x313 [jbd]
Jul 11 15:39:29 wangchen kernel:  [<eea69287>] __ext3_get_inode_loc+0xcf/0x271 [ext3]
Jul 11 15:39:29 wangchen kernel:  [<eea743c7>] __ext3_journal_dirty_metadata+0x13/0x32 [ext3]
Jul 11 15:39:29 wangchen kernel:  [<c0116434>] __wake_up+0xf/0x15
Jul 11 15:39:29 wangchen kernel:  [<eea5a424>] journal_stop+0x1bd/0x1c6 [jbd]
Jul 11 15:39:29 wangchen kernel:  [<eea703a7>] __ext3_journal_stop+0x19/0x34 [ext3]
Jul 11 15:39:29 wangchen kernel:  [<c014291e>] get_page_from_freelist+0x94/0x369
Jul 11 15:39:29 wangchen kernel:  [<c01408f2>] filemap_fault+0x1ac/0x2fe
Jul 11 15:39:29 wangchen kernel:  [<c01a605e>] security_sk_alloc+0xd/0xf
Jul 11 15:39:29 wangchen kernel:  [<c023edea>] sk_prot_alloc+0x36/0x78
Jul 11 15:39:29 wangchen kernel:  [<c0240037>] sk_alloc+0x3a/0x40
Jul 11 15:39:29 wangchen kernel:  [<c0276062>] raw_hash_sk+0x46/0x4e
Jul 11 15:39:29 wangchen kernel:  [<c0166aff>] d_alloc+0x1b/0x157
Jul 11 15:39:29 wangchen kernel:  [<c023e4d1>] sock_common_setsockopt+0x12/0x16
Jul 11 15:39:29 wangchen kernel:  [<c023cb1e>] sys_setsockopt+0x6f/0x8e
Jul 11 15:39:29 wangchen kernel:  [<c023e105>] sys_socketcall+0x15c/0x19e
Jul 11 15:39:29 wangchen kernel:  [<c0103611>] sysenter_past_esp+0x6a/0x99
Jul 11 15:39:29 wangchen kernel:  [<c0290000>] unix_poll+0x69/0x78
Jul 11 15:39:29 wangchen kernel:  =======================
Jul 11 15:39:29 wangchen kernel: Code: 83 e0 01 00 00 85 c0 75 1f 53 53 68 12 81 31 c0 e8 3c 30 ed ff ba 3f 0e 00 00 b8 b9 7f 31 c0 83 c4 0c 5b e9 f5 26 ed ff 48 74 04 <0f> 0b eb fe 89 d8 e8 21 ff ff ff 89 d8 e8 62 ea ff ff c7 83 e0
Jul 11 15:39:29 wangchen kernel: EIP: [<c024636b>] rollback_registered+0x61/0xe3 SS:ESP 0068:ed8e7c3c
Jul 11 15:39:29 wangchen kernel: ---[ end trace c311acf85d169786 ]---
===

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2f4d8afd067..c9ab47b966b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -184,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 
 			if (dev_open(dev))
 				goto failure;
+			dev_hold(dev);
 		}
 	}
 	return dev;
@@ -250,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void)
 	if (dev_open(dev))
 		goto failure;
 
+	dev_hold(dev);
+
 	return dev;
 
 failure:
@@ -264,9 +267,10 @@ failure:
 
 /*
  *	Delete a VIF entry
+ *	@notify: Set to 1, if the caller is a notifier_call
  */
 
-static int vif_delete(int vifi)
+static int vif_delete(int vifi, int notify)
 {
 	struct vif_device *v;
 	struct net_device *dev;
@@ -309,7 +313,7 @@ static int vif_delete(int vifi)
 		ip_rt_multicast_event(in_dev);
 	}
 
-	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 		unregister_netdevice(dev);
 
 	dev_put(dev);
@@ -435,6 +439,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		err = dev_set_allmulti(dev, 1);
 		if (err) {
 			unregister_netdevice(dev);
+			dev_put(dev);
 			return err;
 		}
 		break;
@@ -446,6 +451,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		err = dev_set_allmulti(dev, 1);
 		if (err) {
 			ipmr_del_tunnel(dev, vifc);
+			dev_put(dev);
 			return err;
 		}
 		break;
@@ -453,10 +459,11 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
 		if (!dev)
 			return -EADDRNOTAVAIL;
-		dev_put(dev);
 		err = dev_set_allmulti(dev, 1);
-		if (err)
+		if (err) {
+			dev_put(dev);
 			return err;
+		}
 		break;
 	default:
 		return -EINVAL;
@@ -487,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
-	dev_hold(dev);
 	v->dev=dev;
 #ifdef CONFIG_IP_PIMSM
 	if (v->flags&VIFF_REGISTER)
@@ -834,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk)
 	 */
 	for (i=0; i<maxvif; i++) {
 		if (!(vif_table[i].flags&VIFF_STATIC))
-			vif_delete(i);
+			vif_delete(i, 0);
 	}
 
 	/*
@@ -947,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 		if (optname==MRT_ADD_VIF) {
 			ret = vif_add(&vif, sk==mroute_socket);
 		} else {
-			ret = vif_delete(vif.vifc_vifi);
+			ret = vif_delete(vif.vifc_vifi, 0);
 		}
 		rtnl_unlock();
 		return ret;
@@ -1126,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 	v=&vif_table[0];
 	for (ct=0;ct<maxvif;ct++,v++) {
 		if (v->dev==dev)
-			vif_delete(ct);
+			vif_delete(ct, 1);
 	}
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3-70-g09d2


From 89146504cbfeb120dd08ec7f9f8314c4986189b8 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 14 Jul 2008 20:59:03 -0700
Subject: 8021q: Check return of dev_set_promiscuity/allmulti

dev_set_promiscuity/allmulti might overflow.
Commit: "netdevice: Fix promiscuity and allmulti overflow" in net-next makes
dev_set_promiscuity/allmulti return error number if overflow happened.

Here, we check all positive increment for promiscuity and allmulti
to get error return.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8efa399823e..9efd3c67c1d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -518,19 +518,35 @@ static int vlan_dev_open(struct net_device *dev)
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
 		err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN);
 		if (err < 0)
-			return err;
+			goto out;
 	}
-	memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN);
 
-	if (dev->flags & IFF_ALLMULTI)
-		dev_set_allmulti(real_dev, 1);
-	if (dev->flags & IFF_PROMISC)
-		dev_set_promiscuity(real_dev, 1);
+	if (dev->flags & IFF_ALLMULTI) {
+		err = dev_set_allmulti(real_dev, 1);
+		if (err < 0)
+			goto del_unicast;
+	}
+	if (dev->flags & IFF_PROMISC) {
+		err = dev_set_promiscuity(real_dev, 1);
+		if (err < 0)
+			goto clear_allmulti;
+	}
+
+	memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN);
 
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
 	return 0;
+
+clear_allmulti:
+	if (dev->flags & IFF_ALLMULTI)
+		dev_set_allmulti(real_dev, -1);
+del_unicast:
+	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+		dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
+out:
+	return err;
 }
 
 static int vlan_dev_stop(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From 7197914c35b31a75cb6e85c7fc2ae93d0027c28e Mon Sep 17 00:00:00 2001
From: Johann Felix Soden <johfel@users.sourceforge.net>
Date: Mon, 14 Jul 2008 22:22:29 -0700
Subject: net: Remove references to wan-router.txt in Kconfigs

This patch removes references in drivers/net/wan/Kconfig and
net/wanrouter/Kconfig to Documentation/networking/wan-router.txt
which was removed in commit 99971e70fdc1862e120f3319fc0a4dba8c728acf
("[WANPIPE]: Forgotten bits of Sangoma drivers removal.").

Signed-off-by: Johann Felix Soden <johfel@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/Kconfig | 3 +--
 net/wanrouter/Kconfig   | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index d5140aed7b7..846be60e782 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -390,8 +390,7 @@ config WAN_ROUTER_DRIVERS
 
 	  Select driver your card and remember to say Y to "Wan Router."
 	  You will need the wan-tools package which is available from
-	  <ftp://ftp.sangoma.com/>. For more information read:
-	  <file:Documentation/networking/wan-router.txt>.
+	  <ftp://ftp.sangoma.com/>.
 
 	  Note that the answer to this question won't directly affect the
 	  kernel except for how subordinate drivers may be built:
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
index 1debe1cb054..61ceae0b956 100644
--- a/net/wanrouter/Kconfig
+++ b/net/wanrouter/Kconfig
@@ -20,8 +20,6 @@ config WAN_ROUTER
 	  wish to use your Linux box as a WAN router, say Y here and also to
 	  the WAN driver for your card, below.  You will then need the
 	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-	  Read <file:Documentation/networking/wan-router.txt> for more
-	  information.
 
 	  To compile WAN routing support as a module, choose M here: the
 	  module will be called wanrouter.
-- 
cgit v1.2.3-70-g09d2


From 83aa2e964b9b04effa304aaf3c1090b46812a04b Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jul 2008 22:28:25 -0700
Subject: netlabel: return msg overflow error from netlbl_cipsov4_list faster

Currently, we are trying to place the information from the kernel to
1, 2, 3 and 4 pages sequentially. These pages are allocated via slab.
Though, from the slab point of view steps 3 and 4 are equivalent on
most architectures. So, lets skip 3 pages attempt.

By the way, should we switch from .doit to .dumpit interface here?
The amount of data seems quite big for me.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_cipso_v4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 9080c61b71a..0aec318bf0e 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -591,7 +591,7 @@ list_retry:
 	if (nlsze_mult < 4) {
 		rcu_read_unlock();
 		kfree_skb(ans_skb);
-		nlsze_mult++;
+		nlsze_mult *= 2;
 		goto list_start;
 	}
 list_failure_lock:
-- 
cgit v1.2.3-70-g09d2


From 0ea522416b658dedfc9d565b331624a55a6260ad Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:42:19 -0700
Subject: tipc: Remove unneeded parameter to tipc_createport_raw()

This patch eliminates an unneeded parameter when creating a low-level
TIPC port object.  Instead of returning both the pointer to the port
structure and the port's reference ID, it now returns only the pointer
since the port structure contains the reference ID as one of its fields.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tipc/tipc_port.h | 13 +++----------
 net/tipc/port.c              | 28 ++++++++++------------------
 net/tipc/socket.c            | 15 +++++++--------
 3 files changed, 20 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
index 9923e41a821..c54917cbfa4 100644
--- a/include/net/tipc/tipc_port.h
+++ b/include/net/tipc/tipc_port.h
@@ -2,7 +2,7 @@
  * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
  * 
  * Copyright (c) 1994-2007, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2005-2008, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -75,17 +75,10 @@ struct tipc_port {
 };
 
 
-/**
- * tipc_createport_raw - create a native TIPC port and return it's reference
- *
- * Note: 'dispatcher' and 'wakeup' deliver a locked port.
- */
-
-u32 tipc_createport_raw(void *usr_handle,
+struct tipc_port *tipc_createport_raw(void *usr_handle,
 			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
 			void (*wakeup)(struct tipc_port *),
-			const u32 importance,
-			struct tipc_port **tp_ptr);
+			const u32 importance);
 
 int tipc_reject_msg(struct sk_buff *buf, u32 err);
 
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 2e0cff408ff..ffba1e7f06d 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -2,7 +2,7 @@
  * net/tipc/port.c: TIPC port code
  *
  * Copyright (c) 1992-2007, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
+ * Copyright (c) 2004-2008, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -213,16 +213,13 @@ exit:
 /**
  * tipc_createport_raw - create a generic TIPC port
  *
- * Returns port reference, or 0 if unable to create it
- *
- * Note: The newly created port is returned in the locked state.
+ * Returns pointer to (locked) TIPC port, or NULL if unable to create it
  */
 
-u32 tipc_createport_raw(void *usr_handle,
+struct tipc_port *tipc_createport_raw(void *usr_handle,
 			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
 			void (*wakeup)(struct tipc_port *),
-			const u32 importance,
-			struct tipc_port **tp_ptr)
+			const u32 importance)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -231,13 +228,13 @@ u32 tipc_createport_raw(void *usr_handle,
 	p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
 	if (!p_ptr) {
 		warn("Port creation failed, no memory\n");
-		return 0;
+		return NULL;
 	}
 	ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
 	if (!ref) {
 		warn("Port creation failed, reference table exhausted\n");
 		kfree(p_ptr);
-		return 0;
+		return NULL;
 	}
 
 	p_ptr->publ.usr_handle = usr_handle;
@@ -260,8 +257,7 @@ u32 tipc_createport_raw(void *usr_handle,
 	INIT_LIST_HEAD(&p_ptr->port_list);
 	list_add_tail(&p_ptr->port_list, &ports);
 	spin_unlock_bh(&tipc_port_list_lock);
-	*tp_ptr = &p_ptr->publ;
-	return ref;
+	return &(p_ptr->publ);
 }
 
 int tipc_deleteport(u32 ref)
@@ -1044,21 +1040,18 @@ int tipc_createport(u32 user_ref,
 {
 	struct user_port *up_ptr;
 	struct port *p_ptr;
-	struct tipc_port *tp_ptr;
-	u32 ref;
 
 	up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
 	if (!up_ptr) {
 		warn("Port creation failed, no memory\n");
 		return -ENOMEM;
 	}
-	ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup,
-				  importance, &tp_ptr);
-	if (ref == 0) {
+	p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher,
+						   port_wakeup, importance);
+	if (!p_ptr) {
 		kfree(up_ptr);
 		return -ENOMEM;
 	}
-	p_ptr = (struct port *)tp_ptr;
 
 	p_ptr->user_port = up_ptr;
 	up_ptr->user_ref = user_ref;
@@ -1074,7 +1067,6 @@ int tipc_createport(u32 user_ref,
 	INIT_LIST_HEAD(&up_ptr->uport_list);
 	tipc_reg_add_port(up_ptr);
 	*portref = p_ptr->publ.ref;
-	dbg(" tipc_createport: %x with ref %u\n", p_ptr, p_ptr->publ.ref);
 	tipc_port_unlock(p_ptr);
 	return TIPC_OK;
 }
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 38f48795b40..9c362c5759b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2,7 +2,7 @@
  * net/tipc/socket.c: TIPC socket API
  *
  * Copyright (c) 2001-2007, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
+ * Copyright (c) 2004-2008, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -189,7 +189,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 	socket_state state;
 	struct sock *sk;
 	struct tipc_port *tp_ptr;
-	u32 portref;
 
 	/* Validate arguments */
 
@@ -225,9 +224,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 
 	/* Allocate TIPC port for socket to use */
 
-	portref = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
-				      TIPC_LOW_IMPORTANCE, &tp_ptr);
-	if (unlikely(portref == 0)) {
+	tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
+				     TIPC_LOW_IMPORTANCE);
+	if (unlikely(!tp_ptr)) {
 		sk_free(sk);
 		return -ENOMEM;
 	}
@@ -240,14 +239,14 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
 	sock_init_data(sock, sk);
 	sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
 	sk->sk_backlog_rcv = backlog_rcv;
-	tipc_sk(sk)->p = tipc_get_port(portref);
+	tipc_sk(sk)->p = tp_ptr;
 
 	spin_unlock_bh(tp_ptr->lock);
 
 	if (sock->state == SS_READY) {
-		tipc_set_portunreturnable(portref, 1);
+		tipc_set_portunreturnable(tp_ptr->ref, 1);
 		if (sock->type == SOCK_DGRAM)
-			tipc_set_portunreliable(portref, 1);
+			tipc_set_portunreliable(tp_ptr->ref, 1);
 	}
 
 	atomic_inc(&tipc_user_count);
-- 
cgit v1.2.3-70-g09d2


From 8642bd9e04f51980b2b6293c66acf7e388c9a6e7 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:42:51 -0700
Subject: tipc: Optimize pointer dereferencing when receiving stream data

This patch eliminates an unnecessary pointer dereference when
accessing a stream-based socket's receive queue.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9c362c5759b..ddcb2a753aa 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1133,7 +1133,7 @@ restart:
 	/* Loop around if more data is required */
 
 	if ((sz_copied < buf_len)    /* didn't get all requested data */
-	    && (!skb_queue_empty(&sock->sk->sk_receive_queue) ||
+	    && (!skb_queue_empty(&sk->sk_receive_queue) ||
 		(flags & MSG_WAITALL))
 				     /* ... and more is ready or required */
 	    && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */
-- 
cgit v1.2.3-70-g09d2


From 2da59918e26837f305131cfac9c0f1b3b42bb8ae Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:43:32 -0700
Subject: tipc: Fix race condition that could cause accept() to fail

This patch ensurs that accept() returns successfully even when
the newly created socket is immediately disconnected by its peer.
Previously, accept() would fail if it was unable to pass back
the optional address info for the socket's peer before the
socket became disconnected; TIPC now allows accept() to gather
peer address information after disconnection.  As a bonus, the
revised code accesses the socket's port more efficiently, without
the overhead incurred by a reference table lookup.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ddcb2a753aa..1848693ebb8 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -63,6 +63,7 @@
 struct tipc_sock {
 	struct sock sk;
 	struct tipc_port *p;
+	struct tipc_portid peer_name;
 };
 
 #define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -377,27 +378,29 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
  * @sock: socket structure
  * @uaddr: area for returned socket address
  * @uaddr_len: area for returned length of socket address
- * @peer: 0 to obtain socket name, 1 to obtain peer socket name
+ * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
  *
  * Returns 0 on success, errno otherwise
  *
- * NOTE: This routine doesn't need to take the socket lock since it doesn't
- *       access any non-constant socket information.
+ * NOTE: This routine doesn't need to take the socket lock since it only
+ *       accesses socket information that is unchanging (or which changes in
+ * 	 a completely predictable manner).
  */
 
 static int get_name(struct socket *sock, struct sockaddr *uaddr,
 		    int *uaddr_len, int peer)
 {
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
-	u32 portref = tipc_sk_port(sock->sk)->ref;
-	u32 res;
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
 
 	if (peer) {
-		res = tipc_peer(portref, &addr->addr.id);
-		if (res)
-			return res;
+		if ((sock->state != SS_CONNECTED) &&
+			((peer != 2) || (sock->state != SS_DISCONNECTING)))
+			return -ENOTCONN;
+		addr->addr.id.ref = tsock->peer_name.ref;
+		addr->addr.id.node = tsock->peer_name.node;
 	} else {
-		tipc_ownidentity(portref, &addr->addr.id);
+		tipc_ownidentity(tsock->p->ref, &addr->addr.id);
 	}
 
 	*uaddr_len = sizeof(*addr);
@@ -766,18 +769,17 @@ exit:
 
 static int auto_connect(struct socket *sock, struct tipc_msg *msg)
 {
-	struct tipc_port *tport = tipc_sk_port(sock->sk);
-	struct tipc_portid peer;
+	struct tipc_sock *tsock = tipc_sk(sock->sk);
 
 	if (msg_errcode(msg)) {
 		sock->state = SS_DISCONNECTING;
 		return -ECONNREFUSED;
 	}
 
-	peer.ref = msg_origport(msg);
-	peer.node = msg_orignode(msg);
-	tipc_connect2port(tport->ref, &peer);
-	tipc_set_portimportance(tport->ref, msg_importance(msg));
+	tsock->peer_name.ref = msg_origport(msg);
+	tsock->peer_name.node = msg_orignode(msg);
+	tipc_connect2port(tsock->p->ref, &tsock->peer_name);
+	tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
 	sock->state = SS_CONNECTED;
 	return 0;
 }
@@ -1529,9 +1531,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 	res = tipc_create(sock_net(sock->sk), new_sock, 0);
 	if (!res) {
 		struct sock *new_sk = new_sock->sk;
-		struct tipc_port *new_tport = tipc_sk_port(new_sk);
+		struct tipc_sock *new_tsock = tipc_sk(new_sk);
+		struct tipc_port *new_tport = new_tsock->p;
 		u32 new_ref = new_tport->ref;
-		struct tipc_portid id;
 		struct tipc_msg *msg = buf_msg(buf);
 
 		lock_sock(new_sk);
@@ -1545,9 +1547,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 
 		/* Connect new socket to it's peer */
 
-		id.ref = msg_origport(msg);
-		id.node = msg_orignode(msg);
-		tipc_connect2port(new_ref, &id);
+		new_tsock->peer_name.ref = msg_origport(msg);
+		new_tsock->peer_name.node = msg_orignode(msg);
+		tipc_connect2port(new_ref, &new_tsock->peer_name);
 		new_sock->state = SS_CONNECTED;
 
 		tipc_set_portimportance(new_ref, msg_importance(msg));
-- 
cgit v1.2.3-70-g09d2


From 0e35fd5e5264bb46d1febbe9cd9aa08421c21a96 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:44:01 -0700
Subject: tipc: Eliminate improper use of TIPC_OK error code

This patch corrects many places where TIPC routines indicated
successful completion by returning TIPC_OK instead of 0.
(The TIPC_OK symbol has the value 0, but it should only be used
in contexts that deal with the error code field of a TIPC
message header.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c     | 10 +++++-----
 net/tipc/bearer.c    |  8 ++++----
 net/tipc/cluster.c   |  2 +-
 net/tipc/eth_media.c |  6 +++---
 net/tipc/link.c      | 18 +++++++++---------
 net/tipc/net.c       |  4 ++--
 net/tipc/port.c      | 34 +++++++++++++++++-----------------
 net/tipc/ref.c       |  2 +-
 net/tipc/user_reg.c  | 14 +++++++-------
 9 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a5883b1452f..b1ff16aa4bd 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -611,7 +611,7 @@ swap:
 		bcbearer->bpairs[bp_index].secondary = p;
 update:
 		if (bcbearer->remains_new.count == 0)
-			return TIPC_OK;
+			return 0;
 
 		bcbearer->remains = bcbearer->remains_new;
 	}
@@ -620,7 +620,7 @@ update:
 
 	bcbearer->bearer.publ.blocked = 1;
 	bcl->stats.bearer_congs++;
-	return ~TIPC_OK;
+	return 1;
 }
 
 /**
@@ -756,7 +756,7 @@ int tipc_bclink_reset_stats(void)
 	spin_lock_bh(&bc_lock);
 	memset(&bcl->stats, 0, sizeof(bcl->stats));
 	spin_unlock_bh(&bc_lock);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_bclink_set_queue_limits(u32 limit)
@@ -769,7 +769,7 @@ int tipc_bclink_set_queue_limits(u32 limit)
 	spin_lock_bh(&bc_lock);
 	tipc_link_set_queue_limits(bcl, limit);
 	spin_unlock_bh(&bc_lock);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_bclink_init(void)
@@ -810,7 +810,7 @@ int tipc_bclink_init(void)
 		tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE);
 	}
 
-	return TIPC_OK;
+	return 0;
 }
 
 void tipc_bclink_stop(void)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 271a375b49b..6a9aba3edd0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -370,7 +370,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
  */
 static int bearer_push(struct bearer *b_ptr)
 {
-	u32 res = TIPC_OK;
+	u32 res = 0;
 	struct link *ln, *tln;
 
 	if (b_ptr->publ.blocked)
@@ -607,7 +607,7 @@ int tipc_block_bearer(const char *name)
 	}
 	spin_unlock_bh(&b_ptr->publ.lock);
 	read_unlock_bh(&tipc_net_lock);
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -645,7 +645,7 @@ static int bearer_disable(const char *name)
 	}
 	spin_unlock_bh(&b_ptr->publ.lock);
 	memset(b_ptr, 0, sizeof(struct bearer));
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_disable_bearer(const char *name)
@@ -668,7 +668,7 @@ int tipc_bearer_init(void)
 	tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
 	media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
 	if (tipc_bearers && media_list) {
-		res = TIPC_OK;
+		res = 0;
 	} else {
 		kfree(tipc_bearers);
 		kfree(media_list);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index bc1db474fe0..46ee6c58532 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -571,6 +571,6 @@ exit:
 int tipc_cltr_init(void)
 {
 	tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves;
-	return tipc_cltr_create(tipc_own_addr) ? TIPC_OK : -ENOMEM;
+	return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM;
 }
 
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9cd35eec3e7..bc72fbc4f8b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -82,7 +82,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
 				 dev->dev_addr, clone->len);
 		dev_queue_xmit(clone);
 	}
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -113,12 +113,12 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 			if (likely(buf->len == size)) {
 				buf->next = NULL;
 				tipc_recv_msg(buf, eb_ptr->bearer);
-				return TIPC_OK;
+				return 0;
 			}
 		}
 	}
 	kfree_skb(buf);
-	return TIPC_OK;
+	return 0;
 }
 
 /**
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9784a8e963b..d60113ba4b1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1561,7 +1561,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 			l_ptr->retransm_queue_head = mod(++r_q_head);
 			l_ptr->retransm_queue_size = --r_q_size;
 			l_ptr->stats.retransmitted++;
-			return TIPC_OK;
+			return 0;
 		} else {
 			l_ptr->stats.bearer_congs++;
 			msg_dbg(buf_msg(buf), "|>DEF-RETR>");
@@ -1580,7 +1580,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 			l_ptr->unacked_window = 0;
 			buf_discard(buf);
 			l_ptr->proto_msg_queue = NULL;
-			return TIPC_OK;
+			return 0;
 		} else {
 			msg_dbg(buf_msg(buf), "|>DEF-PROT>");
 			l_ptr->stats.bearer_congs++;
@@ -1604,7 +1604,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 					msg_set_type(msg, CLOSED_MSG);
 				msg_dbg(msg, ">PUSH-DATA>");
 				l_ptr->next_out = buf->next;
-				return TIPC_OK;
+				return 0;
 			} else {
 				msg_dbg(msg, "|PUSH-DATA|");
 				l_ptr->stats.bearer_congs++;
@@ -1628,8 +1628,8 @@ void tipc_link_push_queue(struct link *l_ptr)
 
 	do {
 		res = tipc_link_push_packet(l_ptr);
-	}
-	while (res == TIPC_OK);
+	} while (!res);
+
 	if (res == PUSH_FAILED)
 		tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
 }
@@ -2998,7 +2998,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
 			link_set_supervision_props(l_ptr, new_value);
 			tipc_link_send_proto_msg(l_ptr, STATE_MSG,
 						 0, 0, new_value, 0, 0);
-			res = TIPC_OK;
+			res = 0;
 		}
 		break;
 	case TIPC_CMD_SET_LINK_PRI:
@@ -3007,14 +3007,14 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
 			l_ptr->priority = new_value;
 			tipc_link_send_proto_msg(l_ptr, STATE_MSG,
 						 0, 0, 0, new_value, 0);
-			res = TIPC_OK;
+			res = 0;
 		}
 		break;
 	case TIPC_CMD_SET_LINK_WINDOW:
 		if ((new_value >= TIPC_MIN_LINK_WIN) &&
 		    (new_value <= TIPC_MAX_LINK_WIN)) {
 			tipc_link_set_queue_limits(l_ptr, new_value);
-			res = TIPC_OK;
+			res = 0;
 		}
 		break;
 	}
@@ -3230,7 +3230,7 @@ int link_control(const char *name, u32 op, u32 val)
 			if (op == TIPC_CMD_UNBLOCK_LINK) {
 				l_ptr->blocked = 0;
 			}
-			res = TIPC_OK;
+			res = 0;
 		}
 		tipc_node_unlock(node);
 	}
diff --git a/net/tipc/net.c b/net/tipc/net.c
index cc51fa48367..ec7b04fbdc4 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -165,7 +165,7 @@ static int net_init(void)
 	if (!tipc_net.zones) {
 		return -ENOMEM;
 	}
-	return TIPC_OK;
+	return 0;
 }
 
 static void net_stop(void)
@@ -295,7 +295,7 @@ int tipc_net_start(u32 addr)
 	info("Started in network mode\n");
 	info("Own node address %s, network identity %u\n",
 	     addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
-	return TIPC_OK;
+	return 0;
 }
 
 void tipc_net_stop(void)
diff --git a/net/tipc/port.c b/net/tipc/port.c
index ffba1e7f06d..e70d27ea657 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -291,7 +291,7 @@ int tipc_deleteport(u32 ref)
 	kfree(p_ptr);
 	dbg("Deleted port %u\n", ref);
 	tipc_net_route_msg(buf);
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -336,7 +336,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
 		return -EINVAL;
 	*isunreliable = port_unreliable(p_ptr);
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
@@ -348,7 +348,7 @@ int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
 		return -EINVAL;
 	msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0));
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 static int port_unreturnable(struct port *p_ptr)
@@ -365,7 +365,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
 		return -EINVAL;
 	*isunrejectable = port_unreturnable(p_ptr);
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
@@ -377,7 +377,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
 		return -EINVAL;
 	msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0));
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 /*
@@ -963,7 +963,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
 		tipc_k_signal((Handler)port_dispatcher_sigh, 0);
 	}
 	spin_unlock_bh(&queue_lock);
-	return TIPC_OK;
+	return 0;
 }
 
 /*
@@ -1068,14 +1068,14 @@ int tipc_createport(u32 user_ref,
 	tipc_reg_add_port(up_ptr);
 	*portref = p_ptr->publ.ref;
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_ownidentity(u32 ref, struct tipc_portid *id)
 {
 	id->ref = ref;
 	id->node = tipc_own_addr;
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_portimportance(u32 ref, unsigned int *importance)
@@ -1087,7 +1087,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance)
 		return -EINVAL;
 	*importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_set_portimportance(u32 ref, unsigned int imp)
@@ -1102,7 +1102,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
 		return -EINVAL;
 	msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 
@@ -1137,7 +1137,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 		list_add(&publ->pport_list, &p_ptr->publications);
 		p_ptr->pub_count++;
 		p_ptr->publ.published = 1;
-		res = TIPC_OK;
+		res = 0;
 	}
 exit:
 	tipc_port_unlock(p_ptr);
@@ -1160,7 +1160,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 			tipc_nametbl_withdraw(publ->type, publ->lower,
 					      publ->ref, publ->key);
 		}
-		res = TIPC_OK;
+		res = 0;
 	} else {
 		list_for_each_entry_safe(publ, tpubl,
 					 &p_ptr->publications, pport_list) {
@@ -1174,7 +1174,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 				break;
 			tipc_nametbl_withdraw(publ->type, publ->lower,
 					      publ->ref, publ->key);
-			res = TIPC_OK;
+			res = 0;
 			break;
 		}
 	}
@@ -1218,7 +1218,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	tipc_nodesub_subscribe(&p_ptr->subscription,peer->node,
 			  (void *)(unsigned long)ref,
 			  (net_ev_handler)port_handle_node_down);
-	res = TIPC_OK;
+	res = 0;
 exit:
 	tipc_port_unlock(p_ptr);
 	p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
@@ -1240,7 +1240,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
 		/* let timer expire on it's own to avoid deadlock! */
 		tipc_nodesub_unsubscribe(
 			&((struct port *)tp_ptr)->subscription);
-		res = TIPC_OK;
+		res = 0;
 	} else {
 		res = -ENOTCONN;
 	}
@@ -1305,7 +1305,7 @@ int tipc_isconnected(u32 ref, int *isconnected)
 		return -EINVAL;
 	*isconnected = p_ptr->publ.connected;
 	tipc_port_unlock(p_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 int tipc_peer(u32 ref, struct tipc_portid *peer)
@@ -1319,7 +1319,7 @@ int tipc_peer(u32 ref, struct tipc_portid *peer)
 	if (p_ptr->publ.connected) {
 		peer->ref = port_peerport(p_ptr);
 		peer->node = port_peernode(p_ptr);
-		res = TIPC_OK;
+		res = 0;
 	} else
 		res = -ENOTCONN;
 	tipc_port_unlock(p_ptr);
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index a101de86824..414fc34b8be 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -123,7 +123,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
 	tipc_ref_table.index_mask = actual_size - 1;
 	tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
 
-	return TIPC_OK;
+	return 0;
 }
 
 /**
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
index 4146c40cd20..50692880316 100644
--- a/net/tipc/user_reg.c
+++ b/net/tipc/user_reg.c
@@ -91,7 +91,7 @@ static int reg_init(void)
 		}
 	}
 	spin_unlock_bh(&reg_lock);
-	return users ? TIPC_OK : -ENOMEM;
+	return users ? 0 : -ENOMEM;
 }
 
 /**
@@ -129,7 +129,7 @@ int tipc_reg_start(void)
 			tipc_k_signal((Handler)reg_callback,
 				      (unsigned long)&users[u]);
 	}
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -184,7 +184,7 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle)
 
 	if (cb && (tipc_mode != TIPC_NOT_RUNNING))
 		tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr);
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -230,7 +230,7 @@ int tipc_reg_add_port(struct user_port *up_ptr)
 	struct tipc_user *user_ptr;
 
 	if (up_ptr->user_ref == 0)
-		return TIPC_OK;
+		return 0;
 	if (up_ptr->user_ref > MAX_USERID)
 		return -EINVAL;
 	if ((tipc_mode == TIPC_NOT_RUNNING) || !users )
@@ -240,7 +240,7 @@ int tipc_reg_add_port(struct user_port *up_ptr)
 	user_ptr = &users[up_ptr->user_ref];
 	list_add(&up_ptr->uport_list, &user_ptr->ports);
 	spin_unlock_bh(&reg_lock);
-	return TIPC_OK;
+	return 0;
 }
 
 /**
@@ -250,7 +250,7 @@ int tipc_reg_add_port(struct user_port *up_ptr)
 int tipc_reg_remove_port(struct user_port *up_ptr)
 {
 	if (up_ptr->user_ref == 0)
-		return TIPC_OK;
+		return 0;
 	if (up_ptr->user_ref > MAX_USERID)
 		return -EINVAL;
 	if (!users )
@@ -259,6 +259,6 @@ int tipc_reg_remove_port(struct user_port *up_ptr)
 	spin_lock_bh(&reg_lock);
 	list_del_init(&up_ptr->uport_list);
 	spin_unlock_bh(&reg_lock);
-	return TIPC_OK;
+	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 08d2cf0f74b3ee5e773bb906043a0efe96ded229 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:44:32 -0700
Subject: tipc: Fix bug in scope checking for multicast messages

This patch ensures that TIPC's multicast message name lookup
algorithm does individualized scope checking for each published
name it examines.  Previously, scope checking was only done for
the first name in a name table publication list, which could
result in incoming multicast messages being delivered to ports
publishing names with "node" scope, or not being delivered to
ports publishing names with "cluster" or "zone" scope.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 096f7bd240a..1e53b0c8e73 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -710,9 +710,11 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 		if (sseq->lower > upper)
 			break;
 		publ = sseq->cluster_list;
-		if (publ && (publ->scope <= limit))
+		if (publ)
 			do {
-				if (publ->node == tipc_own_addr)
+				if (publ->scope > limit)
+					/* ignore out-of-scope publication */ ;
+				else if (publ->node == tipc_own_addr)
 					tipc_port_list_add(dports, publ->ref);
 				else
 					res = 1;
-- 
cgit v1.2.3-70-g09d2


From 1aad72d6cd518872c5f545320823bf7f4dafb026 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:44:58 -0700
Subject: tipc: Add missing locks when inspecting node list & link list

This patch ensures that TIPC configuration commands that display info
about neighboring nodes and their links take the spinlocks that
protect the node list and link lists from changing while the lists
are being traversed.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 34e9a2bb7c1..ee952ad6021 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -600,12 +600,14 @@ u32 tipc_available_nodes(const u32 domain)
 	struct node *n_ptr;
 	u32 cnt = 0;
 
+	read_lock_bh(&tipc_net_lock);
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
 		if (!in_scope(domain, n_ptr->addr))
 			continue;
 		if (tipc_node_is_up(n_ptr))
 			cnt++;
 	}
+	read_unlock_bh(&tipc_net_lock);
 	return cnt;
 }
 
@@ -625,19 +627,26 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (network address)");
 
-	if (!tipc_nodes)
+	read_lock_bh(&tipc_net_lock);
+	if (!tipc_nodes) {
+		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_none();
+	}
 
 	/* For now, get space for all other nodes
 	   (will need to modify this when slave nodes are supported */
 
 	payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
-	if (payload_size > 32768u)
+	if (payload_size > 32768u) {
+		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (too many nodes)");
+	}
 	buf = tipc_cfg_reply_alloc(payload_size);
-	if (!buf)
+	if (!buf) {
+		read_unlock_bh(&tipc_net_lock);
 		return NULL;
+	}
 
 	/* Add TLVs for all nodes in scope */
 
@@ -650,6 +659,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 				    &node_info, sizeof(node_info));
 	}
 
+	read_unlock_bh(&tipc_net_lock);
 	return buf;
 }
 
@@ -672,16 +682,22 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	if (tipc_mode != TIPC_NET_MODE)
 		return tipc_cfg_reply_none();
 
+	read_lock_bh(&tipc_net_lock);
+
 	/* Get space for all unicast links + multicast link */
 
 	payload_size = TLV_SPACE(sizeof(link_info)) *
 		(tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
-	if (payload_size > 32768u)
+	if (payload_size > 32768u) {
+		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (too many links)");
+	}
 	buf = tipc_cfg_reply_alloc(payload_size);
-	if (!buf)
+	if (!buf) {
+		read_unlock_bh(&tipc_net_lock);
 		return NULL;
+	}
 
 	/* Add TLV for broadcast link */
 
@@ -697,6 +713,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 
 		if (!in_scope(domain, n_ptr->addr))
 			continue;
+		tipc_node_lock(n_ptr);
 		for (i = 0; i < MAX_BEARERS; i++) {
 			if (!n_ptr->links[i])
 				continue;
@@ -706,7 +723,9 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 			tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
 					    &link_info, sizeof(link_info));
 		}
+		tipc_node_unlock(n_ptr);
 	}
 
+	read_unlock_bh(&tipc_net_lock);
 	return buf;
 }
-- 
cgit v1.2.3-70-g09d2


From 968edbe1c82f1a50d80225ed7e410aba419e55bf Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 14 Jul 2008 22:45:33 -0700
Subject: tipc: Optimization to multicast name lookup algorithm

This patch simplifies and speeds up TIPC's algorithm for identifying
on-node and off-node destinations that overlap a multicast name
sequence range.  Rather than traversing the list of all known name
publications within the cluster, it now traverses the (potentially
much shorter) list of name publications made by the node itself, and
determines if any off-node destinations exist by comparing the sizes
of the two lists.  (Since the node list must be a subset of the
cluster list, a difference in sizes means that at least one off-node
destination must exist.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1e53b0c8e73..cd72e22b132 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -2,7 +2,7 @@
  * net/tipc/name_table.c: TIPC name table code
  *
  * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 2004-2008, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -52,9 +52,16 @@ static int tipc_nametbl_size = 1024;		/* must be a power of 2 */
  * struct sub_seq - container for all published instances of a name sequence
  * @lower: name sequence lower bound
  * @upper: name sequence upper bound
- * @node_list: circular list of matching publications with >= node scope
- * @cluster_list: circular list of matching publications with >= cluster scope
- * @zone_list: circular list of matching publications with >= zone scope
+ * @node_list: circular list of publications made by own node
+ * @cluster_list: circular list of publications made by own cluster
+ * @zone_list: circular list of publications made by own zone
+ * @node_list_size: number of entries in "node_list"
+ * @cluster_list_size: number of entries in "cluster_list"
+ * @zone_list_size: number of entries in "zone_list"
+ *
+ * Note: The zone list always contains at least one entry, since all
+ *       publications of the associated name sequence belong to it.
+ *       (The cluster and node lists may be empty.)
  */
 
 struct sub_seq {
@@ -63,6 +70,9 @@ struct sub_seq {
 	struct publication *node_list;
 	struct publication *cluster_list;
 	struct publication *zone_list;
+	u32 node_list_size;
+	u32 cluster_list_size;
+	u32 zone_list_size;
 };
 
 /**
@@ -317,6 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n",
 	    publ, node, publ->node, publ->subscr.node);
 
+	sseq->zone_list_size++;
 	if (!sseq->zone_list)
 		sseq->zone_list = publ->zone_list_next = publ;
 	else {
@@ -325,6 +336,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	}
 
 	if (in_own_cluster(node)) {
+		sseq->cluster_list_size++;
 		if (!sseq->cluster_list)
 			sseq->cluster_list = publ->cluster_list_next = publ;
 		else {
@@ -335,6 +347,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	}
 
 	if (node == tipc_own_addr) {
+		sseq->node_list_size++;
 		if (!sseq->node_list)
 			sseq->node_list = publ->node_list_next = publ;
 		else {
@@ -411,6 +424,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
 	} else {
 		sseq->zone_list = NULL;
 	}
+	sseq->zone_list_size--;
 
 	/* Remove publication from cluster scope list, if present */
 
@@ -439,6 +453,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
 		} else {
 			sseq->cluster_list = NULL;
 		}
+		sseq->cluster_list_size--;
 	}
 end_cluster:
 
@@ -469,6 +484,7 @@ end_cluster:
 		} else {
 			sseq->node_list = NULL;
 		}
+		sseq->node_list_size--;
 	}
 end_node:
 
@@ -709,17 +725,18 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 
 		if (sseq->lower > upper)
 			break;
-		publ = sseq->cluster_list;
-		if (publ)
+
+		publ = sseq->node_list;
+		if (publ) {
 			do {
-				if (publ->scope > limit)
-					/* ignore out-of-scope publication */ ;
-				else if (publ->node == tipc_own_addr)
+				if (publ->scope <= limit)
 					tipc_port_list_add(dports, publ->ref);
-				else
-					res = 1;
-				publ = publ->cluster_list_next;
-			} while (publ != sseq->cluster_list);
+				publ = publ->node_list_next;
+			} while (publ != sseq->node_list);
+		}
+
+		if (sseq->cluster_list_size != sseq->node_list_size)
+			res = 1;
 	}
 
 	spin_unlock_bh(&seq->lock);
-- 
cgit v1.2.3-70-g09d2


From 6aa895b047720f71ec4eb11452f7c3ce8426941f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:49:06 -0700
Subject: vlan: Don't store VLAN tag in cb

Use a real skb member to store the skb to avoid clashes with qdiscs,
which are allowed to use the cb area themselves. As currently only real
devices that consume the skb set the NETIF_F_HW_VLAN_TX flag, no explicit
invalidation is neccessary.

The new member fills a hole on 64 bit, the skb layout changes from:

        __u32                      mark;                 /*   172     4 */
        sk_buff_data_t             transport_header;     /*   176     4 */
        sk_buff_data_t             network_header;       /*   180     4 */
        sk_buff_data_t             mac_header;           /*   184     4 */
        sk_buff_data_t             tail;                 /*   188     4 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        sk_buff_data_t             end;                  /*   192     4 */

        /* XXX 4 bytes hole, try to pack */

to

        __u32                      mark;                 /*   172     4 */
        __u16                      vlan_tci;             /*   176     2 */

        /* XXX 2 bytes hole, try to pack */

        sk_buff_data_t             transport_header;     /*   180     4 */
        sk_buff_data_t             network_header;       /*   184     4 */

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 31 +++++++------------------------
 include/linux/skbuff.h  |  3 +++
 net/core/skbuff.c       |  3 +++
 3 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 93f5d9b0e9f..9e7b49b8062 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -105,17 +105,8 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
-/* VLAN tx hw acceleration helpers. */
-struct vlan_skb_tx_cookie {
-	u32	magic;
-	u32	vlan_tag;
-};
-
-#define VLAN_TX_COOKIE_MAGIC	0x564c414e	/* "VLAN" in ascii. */
-#define VLAN_TX_SKB_CB(__skb)	((struct vlan_skb_tx_cookie *)&((__skb)->cb[0]))
-#define vlan_tx_tag_present(__skb) \
-	(VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC)
-#define vlan_tx_tag_get(__skb)	(VLAN_TX_SKB_CB(__skb)->vlan_tag)
+#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci)
+#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
@@ -210,17 +201,12 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
  *
- * Puts the VLAN TCI in @skb->cb[] and lets the device do the rest
+ * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest
  */
 static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
 						     u16 vlan_tci)
 {
-	struct vlan_skb_tx_cookie *cookie;
-
-	cookie = VLAN_TX_SKB_CB(skb);
-	cookie->magic = VLAN_TX_COOKIE_MAGIC;
-	cookie->vlan_tag = vlan_tci;
-
+	skb->vlan_tci = vlan_tci;
 	return skb;
 }
 
@@ -267,16 +253,13 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
  * @skb: skbuff to query
  * @vlan_tci: buffer to store vlaue
  *
- * Returns error if @skb->cb[] is not set correctly
+ * Returns error if @skb->vlan_tci is not set correctly
  */
 static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 					 u16 *vlan_tci)
 {
-	struct vlan_skb_tx_cookie *cookie;
-
-	cookie = VLAN_TX_SKB_CB(skb);
-	if (cookie->magic == VLAN_TX_COOKIE_MAGIC) {
-		*vlan_tci = cookie->vlan_tag;
+	if (vlan_tx_tag_present(skb)) {
+		*vlan_tci = skb->vlan_tci;
 		return 0;
 	} else {
 		*vlan_tci = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8f10e3d08fd..7ea44f6621f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -246,6 +246,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
+ *	@vlan_tci: vlan tag control information
  */
 
 struct sk_buff {
@@ -326,6 +327,8 @@ struct sk_buff {
 
 	__u32			mark;
 
+	__u16			vlan_tci;
+
 	sk_buff_data_t		transport_header;
 	sk_buff_data_t		network_header;
 	sk_buff_data_t		mac_header;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7c571560e9d..50a853f7cd8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -459,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->tc_verd		= old->tc_verd;
 #endif
 #endif
+	new->vlan_tci		= old->vlan_tci;
+
 	skb_copy_secmark(new, old);
 }
 
@@ -2286,6 +2288,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		skb_copy_queue_mapping(nskb, skb);
 		nskb->priority = skb->priority;
 		nskb->protocol = skb->protocol;
+		nskb->vlan_tci = skb->vlan_tci;
 		nskb->dst = dst_clone(skb->dst);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		nskb->pkt_type = skb->pkt_type;
-- 
cgit v1.2.3-70-g09d2


From bc1d0411b804ad190cdadabac48a10067f17b9e6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:49:30 -0700
Subject: vlan: deliver packets received with VLAN acceleration to network taps

When VLAN header stripping is used, packets currently bypass packet
sockets (and other network taps) completely. For locally existing
VLANs, they appear directly on the VLAN device, for unknown VLANs
they are silently dropped.

Add a new function netif_nit_deliver() to deliver incoming packets
to all network interface taps and use it in __vlan_hwaccel_rx() to
make VLAN packets visible on the underlying device.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/8021q/vlan_core.c     |  4 ++++
 net/core/dev.c            | 27 +++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b54ec16dfbd..ba5c4639ea9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1165,6 +1165,7 @@ extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
+extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
 extern int		dev_ethtool(struct net *net, struct ifreq *);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 68df12d3664..916061f681b 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -14,6 +14,9 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		return NET_RX_DROP;
 	}
 
+	skb->vlan_tci = vlan_tci;
+	netif_nit_deliver(skb);
+
 	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
 	if (skb->dev == NULL) {
 		dev_kfree_skb_any(skb);
@@ -22,6 +25,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		return NET_RX_SUCCESS;
 	}
 	skb->dev->last_rx = jiffies;
+	skb->vlan_tci = 0;
 
 	stats = &skb->dev->stats;
 	stats->rx_packets++;
diff --git a/net/core/dev.c b/net/core/dev.c
index a29a359b15d..feaab4898a5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2068,6 +2068,33 @@ out:
 }
 #endif
 
+/*
+ * 	netif_nit_deliver - deliver received packets to network taps
+ * 	@skb: buffer
+ *
+ * 	This function is used to deliver incoming packets to network
+ * 	taps. It should be used when the normal netif_receive_skb path
+ * 	is bypassed, for example because of VLAN acceleration.
+ */
+void netif_nit_deliver(struct sk_buff *skb)
+{
+	struct packet_type *ptype;
+
+	if (list_empty(&ptype_all))
+		return;
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		if (!ptype->dev || ptype->dev == skb->dev)
+			deliver_skb(skb, ptype, skb->dev);
+	}
+	rcu_read_unlock();
+}
+
 /**
  *	netif_receive_skb - process receive buffer from network
  *	@skb: buffer to process
-- 
cgit v1.2.3-70-g09d2


From bbd6ef87c544d88c30e4b762b1b61ef267a7d279 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:50:15 -0700
Subject: packet: support extensible, 64 bit clean mmaped ring structure

The tpacket_hdr is not 64 bit clean due to use of an unsigned long
and can't be extended because the following struct sockaddr_ll needs
to be at a fixed offset.

Add support for a version 2 tpacket protocol that removes these
limitations.

Userspace can query the header size through a new getsockopt option
and change the protocol version through a setsockopt option. The
changes needed to switch to the new protocol version are:

1. replace struct tpacket_hdr by struct tpacket2_hdr
2. query header len and save
3. set protocol version to 2
 - set up ring as usual
4. for getting the sockaddr_ll, use (void *)hdr + TPACKET_ALIGN(hdrlen)
   instead of (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))

Steps 2 and 4 can be omitted if the struct sockaddr_ll isn't needed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  21 ++++++
 net/packet/af_packet.c    | 179 +++++++++++++++++++++++++++++++++++++---------
 2 files changed, 167 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index ad09609227f..d4d3c82448f 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -43,6 +43,8 @@ struct sockaddr_ll
 #define PACKET_COPY_THRESH		7
 #define PACKET_AUXDATA			8
 #define PACKET_ORIGDEV			9
+#define PACKET_VERSION			10
+#define PACKET_HDRLEN			11
 
 struct tpacket_stats
 {
@@ -79,6 +81,25 @@ struct tpacket_hdr
 #define TPACKET_ALIGN(x)	(((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
 #define TPACKET_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
 
+struct tpacket2_hdr
+{
+	__u32		tp_status;
+	__u32		tp_len;
+	__u32		tp_snaplen;
+	__u16		tp_mac;
+	__u16		tp_net;
+	__u32		tp_sec;
+	__u32		tp_nsec;
+};
+
+#define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
+
+enum tpacket_versions
+{
+	TPACKET_V1,
+	TPACKET_V2,
+};
+
 /*
    Frame structure:
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9f226916668..4f059775d48 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -186,6 +186,8 @@ struct packet_sock {
 	unsigned int            pg_vec_order;
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
+	enum tpacket_versions	tp_version;
+	unsigned int		tp_hdrlen;
 #endif
 };
 
@@ -201,14 +203,52 @@ struct packet_skb_cb {
 
 #ifdef CONFIG_PACKET_MMAP
 
-static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
+static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
+				 int status)
 {
 	unsigned int pg_vec_pos, frame_offset;
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
 
 	pg_vec_pos = position / po->frames_per_block;
 	frame_offset = position % po->frames_per_block;
 
-	return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
+	h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		if (status != h.h1->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL)
+			return NULL;
+		break;
+	case TPACKET_V2:
+		if (status != h.h2->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL)
+			return NULL;
+		break;
+	}
+	return h.raw;
+}
+
+static void __packet_set_status(struct packet_sock *po, void *frame, int status)
+{
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
+
+	h.raw = frame;
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		h.h1->tp_status = status;
+		break;
+	case TPACKET_V2:
+		h.h2->tp_status = status;
+		break;
+	}
 }
 #endif
 
@@ -551,14 +591,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	struct sock *sk;
 	struct packet_sock *po;
 	struct sockaddr_ll *sll;
-	struct tpacket_hdr *h;
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
-	unsigned short macoff, netoff;
+	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
+	struct timespec ts;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -590,10 +635,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		snaplen = res;
 
 	if (sk->sk_type == SOCK_DGRAM) {
-		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16;
 	} else {
 		unsigned maclen = skb_network_offset(skb);
-		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
+		netoff = TPACKET_ALIGN(po->tp_hdrlen +
+				       (maclen < 16 ? 16 : maclen));
 		macoff = netoff - maclen;
 	}
 
@@ -616,9 +662,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	}
 
 	spin_lock(&sk->sk_receive_queue.lock);
-	h = packet_lookup_frame(po, po->head);
-
-	if (h->tp_status)
+	h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
+	if (!h.raw)
 		goto ring_is_full;
 	po->head = po->head != po->frame_max ? po->head+1 : 0;
 	po->stats.tp_packets++;
@@ -630,20 +675,40 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		status &= ~TP_STATUS_LOSING;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-	skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
+	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
-	h->tp_len = skb->len;
-	h->tp_snaplen = snaplen;
-	h->tp_mac = macoff;
-	h->tp_net = netoff;
-	if (skb->tstamp.tv64)
-		tv = ktime_to_timeval(skb->tstamp);
-	else
-		do_gettimeofday(&tv);
-	h->tp_sec = tv.tv_sec;
-	h->tp_usec = tv.tv_usec;
+	switch (po->tp_version) {
+	case TPACKET_V1:
+		h.h1->tp_len = skb->len;
+		h.h1->tp_snaplen = snaplen;
+		h.h1->tp_mac = macoff;
+		h.h1->tp_net = netoff;
+		if (skb->tstamp.tv64)
+			tv = ktime_to_timeval(skb->tstamp);
+		else
+			do_gettimeofday(&tv);
+		h.h1->tp_sec = tv.tv_sec;
+		h.h1->tp_usec = tv.tv_usec;
+		hdrlen = sizeof(*h.h1);
+		break;
+	case TPACKET_V2:
+		h.h2->tp_len = skb->len;
+		h.h2->tp_snaplen = snaplen;
+		h.h2->tp_mac = macoff;
+		h.h2->tp_net = netoff;
+		if (skb->tstamp.tv64)
+			ts = ktime_to_timespec(skb->tstamp);
+		else
+			getnstimeofday(&ts);
+		h.h2->tp_sec = ts.tv_sec;
+		h.h2->tp_nsec = ts.tv_nsec;
+		hdrlen = sizeof(*h.h2);
+		break;
+	default:
+		BUG();
+	}
 
-	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
+	sll = h.raw + TPACKET_ALIGN(hdrlen);
 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
 	sll->sll_family = AF_PACKET;
 	sll->sll_hatype = dev->type;
@@ -654,14 +719,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	else
 		sll->sll_ifindex = dev->ifindex;
 
-	h->tp_status = status;
+	__packet_set_status(po, h.raw, status);
 	smp_mb();
 
 	{
 		struct page *p_start, *p_end;
-		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
+		u8 *h_end = h.raw + macoff + snaplen - 1;
 
-		p_start = virt_to_page(h);
+		p_start = virt_to_page(h.raw);
 		p_end = virt_to_page(h_end);
 		while (p_start <= p_end) {
 			flush_dcache_page(p_start);
@@ -1362,6 +1427,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		pkt_sk(sk)->copy_thresh = val;
 		return 0;
 	}
+	case PACKET_VERSION:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		switch (val) {
+		case TPACKET_V1:
+		case TPACKET_V2:
+			po->tp_version = val;
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
 #endif
 	case PACKET_AUXDATA:
 	{
@@ -1437,6 +1521,31 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 
 		data = &val;
 		break;
+#ifdef CONFIG_PACKET_MMAP
+	case PACKET_VERSION:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->tp_version;
+		data = &val;
+		break;
+	case PACKET_HDRLEN:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		if (copy_from_user(&val, optval, len))
+			return -EFAULT;
+		switch (val) {
+		case TPACKET_V1:
+			val = sizeof(struct tpacket_hdr);
+			break;
+		case TPACKET_V2:
+			val = sizeof(struct tpacket2_hdr);
+			break;
+		default:
+			return -EINVAL;
+		}
+		data = &val;
+		break;
+#endif
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1570,11 +1679,8 @@ static unsigned int packet_poll(struct file * file, struct socket *sock,
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	if (po->pg_vec) {
 		unsigned last = po->head ? po->head-1 : po->frame_max;
-		struct tpacket_hdr *h;
-
-		h = packet_lookup_frame(po, last);
 
-		if (h->tp_status)
+		if (packet_lookup_frame(po, last, TP_STATUS_USER))
 			mask |= POLLIN | POLLRDNORM;
 	}
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1669,11 +1775,20 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 		if (unlikely(po->pg_vec))
 			return -EBUSY;
 
+		switch (po->tp_version) {
+		case TPACKET_V1:
+			po->tp_hdrlen = TPACKET_HDRLEN;
+			break;
+		case TPACKET_V2:
+			po->tp_hdrlen = TPACKET2_HDRLEN;
+			break;
+		}
+
 		if (unlikely((int)req->tp_block_size <= 0))
 			return -EINVAL;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			return -EINVAL;
-		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
+		if (unlikely(req->tp_frame_size < po->tp_hdrlen))
 			return -EINVAL;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
@@ -1692,13 +1807,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 			goto out;
 
 		for (i = 0; i < req->tp_block_nr; i++) {
-			char *ptr = pg_vec[i];
-			struct tpacket_hdr *header;
+			void *ptr = pg_vec[i];
 			int k;
 
 			for (k = 0; k < po->frames_per_block; k++) {
-				header = (struct tpacket_hdr *) ptr;
-				header->tp_status = TP_STATUS_KERNEL;
+				__packet_set_status(po, ptr, TP_STATUS_KERNEL);
 				ptr += req->tp_frame_size;
 			}
 		}
-- 
cgit v1.2.3-70-g09d2


From 393e52e33c6c26ec7db290dab803bac1bed962d4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:50:39 -0700
Subject: packet: deliver VLAN TCI to userspace

Store the VLAN tag in the auxillary data/tpacket2_hdr so userspace can
properly deal with hardware VLAN tagging/stripping.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h | 2 ++
 net/packet/af_packet.c    | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index d4d3c82448f..a630295b255 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -59,6 +59,7 @@ struct tpacket_auxdata
 	__u32		tp_snaplen;
 	__u16		tp_mac;
 	__u16		tp_net;
+	__u16		tp_vlan_tci;
 };
 
 struct tpacket_hdr
@@ -90,6 +91,7 @@ struct tpacket2_hdr
 	__u16		tp_net;
 	__u32		tp_sec;
 	__u32		tp_nsec;
+	__u16		tp_vlan_tci;
 };
 
 #define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4f059775d48..db792e02a37 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -702,6 +702,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 			getnstimeofday(&ts);
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
+		h.h2->tp_vlan_tci = skb->vlan_tci;
 		hdrlen = sizeof(*h.h2);
 		break;
 	default:
@@ -1172,6 +1173,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
+		aux.tp_vlan_tci = skb->vlan_tci;
 
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
-- 
cgit v1.2.3-70-g09d2


From 19b9a4e256758a0c032c915eebe0a39b370ea133 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:51:01 -0700
Subject: vlan: ethtool ->get_flags support

Allow to query LRO settings of underlying device when VLAN RX
acceleration is used.

Suggested by Ben Hutchings <bhutchings@solarflare.com>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9efd3c67c1d..29aa4cc2a26 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -725,9 +725,22 @@ static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
 	return real_dev->ethtool_ops->get_rx_csum(real_dev);
 }
 
+static u32 vlan_ethtool_get_flags(struct net_device *dev)
+{
+	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct net_device *real_dev = vlan->real_dev;
+
+	if (!(real_dev->features & NETIF_F_HW_VLAN_RX) ||
+	    real_dev->ethtool_ops == NULL ||
+	    real_dev->ethtool_ops->get_flags == NULL)
+		return 0;
+	return real_dev->ethtool_ops->get_flags(real_dev);
+}
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_rx_csum		= vlan_ethtool_get_rx_csum,
+	.get_flags		= vlan_ethtool_get_flags,
 };
 
 void vlan_setup(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From 1349fe9a6bc580fb80e1a43a93b68c15d674ed0a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:51:19 -0700
Subject: vlan: clean up vlan_dev_hard_header()

Remove some debugging and excessive comments, merge the two
dev_hard_header calls into one.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 55 +++++++++-------------------------------------------
 1 file changed, 9 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 29aa4cc2a26..c6678605fc0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -256,43 +256,18 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				unsigned int len)
 {
 	struct vlan_hdr *vhdr;
+	unsigned int vhdrlen = 0;
 	u16 vlan_tci = 0;
-	int rc = 0;
-	int build_vlan_header = 0;
-
-	pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
-		 __func__, skb, type, len, vlan_dev_info(dev)->vlan_id,
-		 daddr);
+	int rc;
 
 	if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
 		return -ENOSPC;
 
-	/* build vlan header only if re_order_header flag is NOT set.  This
-	 * fixes some programs that get confused when they see a VLAN device
-	 * sending a frame that is VLAN encoded (the consensus is that the VLAN
-	 * device should look completely like an Ethernet device when the
-	 * REORDER_HEADER flag is set)	The drawback to this is some extra
-	 * header shuffling in the hard_start_xmit.  Users can turn off this
-	 * REORDER behaviour with the vconfig tool.
-	 */
-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR))
-		build_vlan_header = 1;
-
-	if (build_vlan_header) {
+	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
-		/* build the four bytes that make this a VLAN header. */
-
-		/* Now, construct the second two bytes. This field looks
-		 * something like:
-		 * usr_priority: 3 bits	 (high bits)
-		 * CFI		 1 bit
-		 * VLAN ID	 12 bits (low bits)
-		 *
-		 */
 		vlan_tci = vlan_dev_info(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -300,37 +275,25 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		 *  put the length in here instead. It is up to the 802.2
 		 *  layer to carry protocol information.
 		 */
-
 		if (type != ETH_P_802_3)
 			vhdr->h_vlan_encapsulated_proto = htons(type);
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
 		skb->protocol = htons(ETH_P_8021Q);
+		type = ETH_P_8021Q;
+		vhdrlen = VLAN_HLEN;
 	}
 
 	/* Before delegating work to the lower layer, enter our MAC-address */
 	if (saddr == NULL)
 		saddr = dev->dev_addr;
 
+	/* Now make the underlying real hard header */
 	dev = vlan_dev_info(dev)->real_dev;
-
-	if (build_vlan_header) {
-		/* Now make the underlying real hard header */
-		rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr,
-				     len + VLAN_HLEN);
-		if (rc > 0)
-			rc += VLAN_HLEN;
-		else if (rc < 0)
-			rc -= VLAN_HLEN;
-	} else
-		/* If here, then we'll just make a normal looking ethernet
-		 * frame, but, the hard_start_xmit method will insert the tag
-		 * (it has to be able to do this for bridged and other skbs
-		 * that don't come down the protocol stack in an orderly manner.
-		 */
-		rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
-
+	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
+	if (rc > 0)
+		rc += vhdrlen;
 	return rc;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d80aa31bbffc4bc8b5be36c57cbea128f52e1e1f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:51:39 -0700
Subject: vlan: clean up hard_start_xmit functions

Remove excessive comments and debugging, use NETDEV_TX codes,
remove some empty lines.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 41 ++++++-----------------------------------
 1 file changed, 6 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c6678605fc0..c3adba9501c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -307,53 +307,31 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
-
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-		vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		int orig_headroom = skb_headroom(skb);
+	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+		unsigned int orig_headroom = skb_headroom(skb);
 		u16 vlan_tci;
 
-		/* This is not a VLAN frame...but we can fix that! */
 		vlan_dev_info(dev)->cnt_encap_on_xmit++;
 
-		pr_debug("%s: proto to encap: 0x%hx\n",
-			 __func__, ntohs(veth->h_vlan_proto));
-		/* Construct the second two bytes. This field looks something
-		 * like:
-		 * usr_priority: 3 bits	 (high bits)
-		 * CFI		 1 bit
-		 * VLAN ID	 12 bits (low bits)
-		 */
 		vlan_tci = vlan_dev_info(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-
 		skb = __vlan_put_tag(skb, vlan_tci);
 		if (!skb) {
 			stats->tx_dropped++;
-			return 0;
+			return NETDEV_TX_OK;
 		}
 
 		if (orig_headroom < VLAN_HLEN)
 			vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
 	}
 
-	pr_debug("%s: about to send skb: %p to dev: %s\n",
-		__func__, skb, skb->dev->name);
-	pr_debug("  " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n",
-		 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2],
-		 veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
-		 veth->h_source[0], veth->h_source[1], veth->h_source[2],
-		 veth->h_source[3], veth->h_source[4], veth->h_source[5],
-		 veth->h_vlan_proto, veth->h_vlan_TCI,
-		 veth->h_vlan_encapsulated_proto);
-
-	stats->tx_packets++; /* for statics only */
+	stats->tx_packets++;
 	stats->tx_bytes += skb->len;
 
 	skb->dev = vlan_dev_info(dev)->real_dev;
 	dev_queue_xmit(skb);
-
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
@@ -362,12 +340,6 @@ static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	struct net_device_stats *stats = &dev->stats;
 	u16 vlan_tci;
 
-	/* Construct the second two bytes. This field looks something
-	 * like:
-	 * usr_priority: 3 bits	 (high bits)
-	 * CFI		 1 bit
-	 * VLAN ID	 12 bits (low bits)
-	 */
 	vlan_tci = vlan_dev_info(dev)->vlan_id;
 	vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 	skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
@@ -377,8 +349,7 @@ static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 
 	skb->dev = vlan_dev_info(dev)->real_dev;
 	dev_queue_xmit(skb);
-
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
-- 
cgit v1.2.3-70-g09d2


From 61362766d769c934a9d12d5516323c544c161908 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Jul 2008 22:51:55 -0700
Subject: vlan: remove unnecessary include statements

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     |  8 ++------
 net/8021q/vlan_dev.c |  7 -------
 net/8021q/vlanproc.c | 11 ++---------
 3 files changed, 4 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 68bdcf4a795..9f04cab92f9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -18,21 +18,17 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#include <asm/uaccess.h> /* for copy_from_user */
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <net/datalink.h>
-#include <linux/mm.h>
-#include <linux/in.h>
 #include <linux/init.h>
-#include <net/p8022.h>
-#include <net/arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
+#include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <asm/uaccess.h>
 
 #include <linux/if_vlan.h>
 #include "vlan.h"
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c3adba9501c..6b985f23fd9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -21,22 +21,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/in.h>
-#include <linux/init.h>
-#include <asm/uaccess.h> /* for copy_from_user */
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-#include <net/datalink.h>
-#include <net/p8022.h>
 #include <net/arp.h>
 
 #include "vlan.h"
 #include "vlanproc.h"
 #include <linux/if_vlan.h>
-#include <net/ip.h>
 
 /*
  *	Rebuild the Ethernet MAC header. This is called after an ARP
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 6073a888b6f..0feefa4e1a4 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -18,16 +18,9 @@
  *****************************************************************************/
 
 #include <linux/module.h>
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
+#include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/mm.h>
-#include <linux/string.h>	/* inline mem*, str* functions */
-#include <linux/init.h>		/* __initfunc et al. */
-#include <asm/byteorder.h>	/* htons(), etc. */
-#include <asm/uaccess.h>	/* copy_to_user */
-#include <asm/io.h>
+#include <linux/string.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/fs.h>
-- 
cgit v1.2.3-70-g09d2


From 0388b0042624714e6f8db8cc7994101a0a02d392 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:00:43 -0700
Subject: icmp: add struct net argument to icmp_out_count

This routine deals with ICMP statistics, but doesn't have a
struct net at hands, so add one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h   | 3 ++-
 net/ipv4/icmp.c      | 2 +-
 net/ipv4/ip_output.c | 3 ++-
 net/ipv4/raw.c       | 3 ++-
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index dddb839ff4b..38ca2f39db7 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -44,12 +44,13 @@ DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 struct dst_entry;
 struct net_proto_family;
 struct sk_buff;
+struct net;
 
 extern void	icmp_send(struct sk_buff *skb_in,  int type, int code, __be32 info);
 extern int	icmp_rcv(struct sk_buff *skb);
 extern int	icmp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int	icmp_init(void);
-extern void	icmp_out_count(unsigned char type);
+extern void	icmp_out_count(struct net *net, unsigned char type);
 
 /* Move into dst.h ? */
 extern int 	xrlim_allow(struct dst_entry *dst, int timeout);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index aa7cf46853b..1ffe7add492 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -296,7 +296,7 @@ out:
 /*
  *	Maintain the counters used in the SNMP statistics for outgoing ICMP
  */
-void icmp_out_count(unsigned char type)
+void icmp_out_count(struct net *net, unsigned char type)
 {
 	ICMPMSGOUT_INC_STATS(type);
 	ICMP_INC_STATS(ICMP_MIB_OUTMSGS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f1278eecf56..f003186b93b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1211,6 +1211,7 @@ int ip_push_pending_frames(struct sock *sk)
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_options *opt = NULL;
 	struct rtable *rt = (struct rtable *)inet->cork.dst;
 	struct iphdr *iph;
@@ -1280,7 +1281,7 @@ int ip_push_pending_frames(struct sock *sk)
 	skb->dst = dst_clone(&rt->u.dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
-		icmp_out_count(((struct icmphdr *)
+		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
 	/* Netfilter gets whole the not fragmented skb. */
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 925fdf18cf9..7f39ea443ec 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -320,6 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct iphdr *iph;
 	struct sk_buff *skb;
 	unsigned int iphlen;
@@ -368,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
 	if (iph->protocol == IPPROTO_ICMP)
-		icmp_out_count(((struct icmphdr *)
+		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-- 
cgit v1.2.3-70-g09d2


From fd54d716b1f6a3551ec17a4bb34027727b2db09a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:01:40 -0700
Subject: inet: toss struct net initialization around

Some places, that deal with ICMP statistics already have where
to get a struct net from, but use it directly, without declaring
a separate variable on the stack.

Since I will need this net soon, I declare a struct net on the
stack and use it in the existing places in a separate patch not
to spoil the future ones.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c     | 3 ++-
 net/ipv4/icmp.c     | 4 +---
 net/ipv4/tcp_ipv4.c | 3 ++-
 net/ipv4/udp.c      | 3 ++-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 37d27bcb361..cc4f33461f2 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,13 +205,14 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	struct sock *sk;
 	__u64 seq;
 	int err;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < (iph->ihl << 2) + 8) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
 	}
 
-	sk = inet_lookup(dev_net(skb->dev), &dccp_hashinfo,
+	sk = inet_lookup(net, &dccp_hashinfo,
 			iph->daddr, dh->dccph_dport,
 			iph->saddr, dh->dccph_sport, inet_iif(skb));
 	if (sk == NULL) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1ffe7add492..56d6b943345 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -973,6 +973,7 @@ int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
 	struct rtable *rt = skb->rtable;
+	struct net *net = dev_net(rt->u.dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		int nh;
@@ -1027,9 +1028,6 @@ int icmp_rcv(struct sk_buff *skb)
 	 */
 
 	if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
-		struct net *net;
-
-		net = dev_net(rt->u.dst.dev);
 		/*
 		 *	RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
 		 *	  silently ignored (we let user decide with a sysctl).
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4300bcf2cea..ca41b77f3f3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -343,13 +343,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	struct sock *sk;
 	__u32 seq;
 	int err;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < (iph->ihl << 2) + 8) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
 	}
 
-	sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
+	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 			iph->saddr, th->source, inet_iif(skb));
 	if (!sk) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7187121e922..9342cfda3d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -354,8 +354,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
 	struct sock *sk;
 	int harderr;
 	int err;
+	struct net *net = dev_net(skb->dev);
 
-	sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
+	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
 			iph->saddr, uh->source, skb->dev->ifindex, udptable);
 	if (sk == NULL) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-- 
cgit v1.2.3-70-g09d2


From 75c939bb4d6da790f758a2a3dcc7432f6d8778ee Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:02:35 -0700
Subject: mib: add struct net to ICMP_INC_STATS

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index e34e981fe74..c86fef8c45e 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -31,7 +31,7 @@ struct icmp_err {
 extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
+#define ICMP_INC_STATS(net, field)	SNMP_INC_STATS(icmp_statistics, field)
 #define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 56d6b943345..e94de411cfa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -299,7 +299,7 @@ out:
 void icmp_out_count(struct net *net, unsigned char type)
 {
 	ICMPMSGOUT_INC_STATS(type);
-	ICMP_INC_STATS(ICMP_MIB_OUTMSGS);
+	ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From dcfc23cac103b54dbc00a6f52f47656ad5c75844 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:03:00 -0700
Subject: mib: add struct net to ICMP_INC_STATS_BH

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h  |  2 +-
 net/dccp/ipv4.c     |  4 ++--
 net/ipv4/icmp.c     | 10 +++++-----
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv4/udp.c      |  2 +-
 net/sctp/input.c    |  5 +++--
 6 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index c86fef8c45e..e2ae6409ff8 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -32,7 +32,7 @@ extern struct icmp_err icmp_err_convert[];
 DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 #define ICMP_INC_STATS(net, field)	SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
+#define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index cc4f33461f2..2b03c47cab1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -208,7 +208,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < (iph->ihl << 2) + 8) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 
@@ -216,7 +216,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 			iph->daddr, dh->dccph_dport,
 			iph->saddr, dh->dccph_sport, inet_iif(skb));
 	if (sk == NULL) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e94de411cfa..56a7bbc7950 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -763,7 +763,7 @@ static void icmp_unreach(struct sk_buff *skb)
 out:
 	return;
 out_err:
-	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 	goto out;
 }
 
@@ -803,7 +803,7 @@ static void icmp_redirect(struct sk_buff *skb)
 out:
 	return;
 out_err:
-	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
 	goto out;
 }
 
@@ -874,7 +874,7 @@ static void icmp_timestamp(struct sk_buff *skb)
 out:
 	return;
 out_err:
-	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
 	goto out;
 }
 
@@ -994,7 +994,7 @@ int icmp_rcv(struct sk_buff *skb)
 		skb_set_network_header(skb, nh);
 	}
 
-	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
+	ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
@@ -1053,7 +1053,7 @@ drop:
 	kfree_skb(skb);
 	return 0;
 error:
-	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 	goto drop;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ca41b77f3f3..0fefd440941 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -346,14 +346,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < (iph->ihl << 2) + 8) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 
 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
 			iph->saddr, th->source, inet_iif(skb));
 	if (!sk) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 	if (sk->sk_state == TCP_TIME_WAIT) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9342cfda3d0..fdd4faa1f12 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -359,7 +359,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
 			iph->saddr, uh->source, skb->dev->ifindex, udptable);
 	if (sk == NULL) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;	/* No socket for error */
 	}
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d354a23972d..ed8834e7f14 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -61,6 +61,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
+#include <net/net_namespace.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
@@ -534,7 +535,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	int err;
 
 	if (skb->len < ihlen + 8) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
 		return;
 	}
 
@@ -548,7 +549,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	skb->network_header = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
-		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
 		return;
 	}
 	/* Warning:  The sock lock is held.  Remember to call
-- 
cgit v1.2.3-70-g09d2


From 903fc1964e746b8d8e2971ea20c89b7aeab8bd9a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:03:35 -0700
Subject: mib: add struct net to ICMPMSGOUT_INC_STATS

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index e2ae6409ff8..ee9c8d4b7ba 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -33,7 +33,7 @@ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 #define ICMP_INC_STATS(net, field)	SNMP_INC_STATS(icmp_statistics, field)
 #define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmp_statistics, field)
-#define ICMPMSGOUT_INC_STATS(field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
+#define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
 
 struct dst_entry;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 56a7bbc7950..6f4c65b898b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -298,7 +298,7 @@ out:
  */
 void icmp_out_count(struct net *net, unsigned char type)
 {
-	ICMPMSGOUT_INC_STATS(type);
+	ICMPMSGOUT_INC_STATS(net, type);
 	ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
 }
 
-- 
cgit v1.2.3-70-g09d2


From f66ac03d497c162c70cd0ccc802ce1777073cdf3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 14 Jul 2008 23:04:00 -0700
Subject: mib: add struct net to ICMPMSGIN_INC_STATS_BH

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index ee9c8d4b7ba..03b9972b875 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -34,7 +34,7 @@ DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 #define ICMP_INC_STATS(net, field)	SNMP_INC_STATS(icmp_statistics, field)
 #define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
-#define ICMPMSGIN_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
+#define ICMPMSGIN_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6f4c65b898b..ea60ad41008 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1012,7 +1012,7 @@ int icmp_rcv(struct sk_buff *skb)
 
 	icmph = icmp_hdr(skb);
 
-	ICMPMSGIN_INC_STATS_BH(icmph->type);
+	ICMPMSGIN_INC_STATS_BH(net, icmph->type);
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
 	 *
-- 
cgit v1.2.3-70-g09d2


From f1f28aa3510ddb84c966bac65611bb866c77a092 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 00:08:33 -0700
Subject: netdev: Add addr_list_lock to struct net_device.

This will be used to protect the per-device unicast and multicast
address lists, as well as the callbacks into the drivers which
configure such state such as ->set_rx_mode() and ->set_multicast_list().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba5c4639ea9..fd036521918 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -609,6 +609,7 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
+	spinlock_t		addr_list_lock;
 	struct dev_addr_list	*uc_list;	/* Secondary unicast mac addresses */
 	int			uc_count;	/* Number of installed ucasts	*/
 	int			uc_promisc;
diff --git a/net/core/dev.c b/net/core/dev.c
index feaab4898a5..d933d1bfa6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3836,6 +3836,7 @@ int register_netdevice(struct net_device *dev)
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 
+	spin_lock_init(&dev->addr_list_lock);
 	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
-- 
cgit v1.2.3-70-g09d2


From e308a5d806c852f56590ffdd3834d0df0cbed8d7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 00:13:44 -0700
Subject: netdev: Add netdev->addr_list_lock protection.

Add netif_addr_{lock,unlock}{,_bh}() helpers.

Use them to protect operations that operate on or read
the network device unicast and multicast address lists.

Also use them in cases where the code simply wants to
block calls into the driver's ->set_rx_mode() and
->set_multicast_list() methods.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |  2 ++
 drivers/media/dvb/dvb-core/dvb_net.c           |  2 ++
 drivers/net/bonding/bond_main.c                |  8 ++++++++
 drivers/net/forcedeth.c                        | 16 ++++++++++++++++
 drivers/net/hamradio/6pack.c                   |  2 ++
 drivers/net/hamradio/mkiss.c                   |  2 ++
 drivers/net/ibm_newemac/core.c                 |  4 ++++
 drivers/net/sfc/efx.c                          |  2 ++
 drivers/net/wireless/libertas/main.c           |  2 ++
 include/linux/netdevice.h                      | 20 ++++++++++++++++++++
 net/core/dev.c                                 | 14 ++++++++++++++
 net/core/dev_mcast.c                           | 12 ++++++++++++
 net/mac80211/main.c                            |  4 ++++
 net/mac80211/mlme.c                            |  4 ++++
 14 files changed, 94 insertions(+)

(limited to 'net')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 3f663fb852c..261ab715043 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -775,6 +775,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 
 	local_irq_save(flags);
 	netif_tx_lock(dev);
+	netif_addr_lock(dev);
 	spin_lock(&priv->lock);
 
 	/*
@@ -851,6 +852,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	}
 
 	spin_unlock(&priv->lock);
+	netif_addr_unlock(dev);
 	netif_tx_unlock(dev);
 	local_irq_restore(flags);
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index c2334aef414..809d18c663b 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1134,6 +1134,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 	dvb_net_feed_stop(dev);
 	priv->rx_mode = RX_MODE_UNI;
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 
 	if (dev->flags & IFF_PROMISC) {
 		dprintk("%s: promiscuous mode\n", dev->name);
@@ -1158,6 +1159,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 		}
 	}
 
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	dvb_net_feed_start(dev);
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8ae7ff31321..ea71abd6f72 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1568,10 +1568,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		}
 
 		netif_tx_lock_bh(bond_dev);
+		netif_addr_lock(bond_dev);
 		/* upload master's mc_list to new slave */
 		for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
 			dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 		}
+		netif_addr_unlock(bond_dev);
 		netif_tx_unlock_bh(bond_dev);
 	}
 
@@ -1937,7 +1939,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 
 		/* flush master's mc_list from slave */
 		netif_tx_lock_bh(bond_dev);
+		netif_addr_lock(bond_dev);
 		bond_mc_list_flush(bond_dev, slave_dev);
+		netif_addr_unlock(bond_dev);
 		netif_tx_unlock_bh(bond_dev);
 	}
 
@@ -2060,7 +2064,9 @@ static int bond_release_all(struct net_device *bond_dev)
 
 			/* flush master's mc_list from slave */
 			netif_tx_lock_bh(bond_dev);
+			netif_addr_lock(bond_dev);
 			bond_mc_list_flush(bond_dev, slave_dev);
+			netif_addr_unlock(bond_dev);
 			netif_tx_unlock_bh(bond_dev);
 		}
 
@@ -4674,7 +4680,9 @@ static void bond_free_all(void)
 
 		bond_work_cancel_all(bond);
 		netif_tx_lock_bh(bond_dev);
+		netif_addr_lock(bond_dev);
 		bond_mc_list_destroy(bond);
+		netif_addr_unlock(bond_dev);
 		netif_tx_unlock_bh(bond_dev);
 		/* Release the bonded slaves */
 		bond_release_all(bond_dev);
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 786d668c612..4ed89fa9ae4 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2831,6 +2831,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		 */
 		nv_disable_irq(dev);
 		netif_tx_lock_bh(dev);
+		netif_addr_lock(dev);
 		spin_lock(&np->lock);
 		/* stop engines */
 		nv_stop_rxtx(dev);
@@ -2855,6 +2856,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		/* restart rx engine */
 		nv_start_rxtx(dev);
 		spin_unlock(&np->lock);
+		netif_addr_unlock(dev);
 		netif_tx_unlock_bh(dev);
 		nv_enable_irq(dev);
 	}
@@ -2891,6 +2893,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
 
 	if (netif_running(dev)) {
 		netif_tx_lock_bh(dev);
+		netif_addr_lock(dev);
 		spin_lock_irq(&np->lock);
 
 		/* stop rx engine */
@@ -2902,6 +2905,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
 		/* restart rx engine */
 		nv_start_rx(dev);
 		spin_unlock_irq(&np->lock);
+		netif_addr_unlock(dev);
 		netif_tx_unlock_bh(dev);
 	} else {
 		nv_copy_mac_to_hw(dev);
@@ -3971,6 +3975,7 @@ static void nv_do_nic_poll(unsigned long data)
 		printk(KERN_INFO "forcedeth: MAC in recoverable error state\n");
 		if (netif_running(dev)) {
 			netif_tx_lock_bh(dev);
+			netif_addr_lock(dev);
 			spin_lock(&np->lock);
 			/* stop engines */
 			nv_stop_rxtx(dev);
@@ -3995,6 +4000,7 @@ static void nv_do_nic_poll(unsigned long data)
 			/* restart rx engine */
 			nv_start_rxtx(dev);
 			spin_unlock(&np->lock);
+			netif_addr_unlock(dev);
 			netif_tx_unlock_bh(dev);
 		}
 	}
@@ -4202,6 +4208,7 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 
 		nv_disable_irq(dev);
 		netif_tx_lock_bh(dev);
+		netif_addr_lock(dev);
 		/* with plain spinlock lockdep complains */
 		spin_lock_irqsave(&np->lock, flags);
 		/* stop engines */
@@ -4215,6 +4222,7 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 		 */
 		nv_stop_rxtx(dev);
 		spin_unlock_irqrestore(&np->lock, flags);
+		netif_addr_unlock(dev);
 		netif_tx_unlock_bh(dev);
 	}
 
@@ -4360,10 +4368,12 @@ static int nv_nway_reset(struct net_device *dev)
 		if (netif_running(dev)) {
 			nv_disable_irq(dev);
 			netif_tx_lock_bh(dev);
+			netif_addr_lock(dev);
 			spin_lock(&np->lock);
 			/* stop engines */
 			nv_stop_rxtx(dev);
 			spin_unlock(&np->lock);
+			netif_addr_unlock(dev);
 			netif_tx_unlock_bh(dev);
 			printk(KERN_INFO "%s: link down.\n", dev->name);
 		}
@@ -4471,6 +4481,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 	if (netif_running(dev)) {
 		nv_disable_irq(dev);
 		netif_tx_lock_bh(dev);
+		netif_addr_lock(dev);
 		spin_lock(&np->lock);
 		/* stop engines */
 		nv_stop_rxtx(dev);
@@ -4519,6 +4530,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 		/* restart engines */
 		nv_start_rxtx(dev);
 		spin_unlock(&np->lock);
+		netif_addr_unlock(dev);
 		netif_tx_unlock_bh(dev);
 		nv_enable_irq(dev);
 	}
@@ -4556,10 +4568,12 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
 	if (netif_running(dev)) {
 		nv_disable_irq(dev);
 		netif_tx_lock_bh(dev);
+		netif_addr_lock(dev);
 		spin_lock(&np->lock);
 		/* stop engines */
 		nv_stop_rxtx(dev);
 		spin_unlock(&np->lock);
+		netif_addr_unlock(dev);
 		netif_tx_unlock_bh(dev);
 	}
 
@@ -4946,6 +4960,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
 			napi_disable(&np->napi);
 #endif
 			netif_tx_lock_bh(dev);
+			netif_addr_lock(dev);
 			spin_lock_irq(&np->lock);
 			nv_disable_hw_interrupts(dev, np->irqmask);
 			if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
@@ -4959,6 +4974,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
 			/* drain rx queue */
 			nv_drain_rxtx(dev);
 			spin_unlock_irq(&np->lock);
+			netif_addr_unlock(dev);
 			netif_tx_unlock_bh(dev);
 		}
 
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 06ad9f302b5..ffc937f5d15 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -300,7 +300,9 @@ static int sp_set_mac_address(struct net_device *dev, void *addr)
 	struct sockaddr_ax25 *sa = addr;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 
 	return 0;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 65166035aca..b8740e6a5ce 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -356,7 +356,9 @@ static int ax_set_mac_address(struct net_device *dev, void *addr)
 	struct sockaddr_ax25 *sa = addr;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 
 	return 0;
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index babc79ad490..9ca57d36559 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -295,7 +295,9 @@ static void emac_rx_disable(struct emac_instance *dev)
 static inline void emac_netif_stop(struct emac_instance *dev)
 {
 	netif_tx_lock_bh(dev->ndev);
+	netif_addr_lock(dev->ndev);
 	dev->no_mcast = 1;
+	netif_addr_unlock(dev->ndev);
 	netif_tx_unlock_bh(dev->ndev);
 	dev->ndev->trans_start = jiffies;	/* prevent tx timeout */
 	mal_poll_disable(dev->mal, &dev->commac);
@@ -305,9 +307,11 @@ static inline void emac_netif_stop(struct emac_instance *dev)
 static inline void emac_netif_start(struct emac_instance *dev)
 {
 	netif_tx_lock_bh(dev->ndev);
+	netif_addr_lock(dev->ndev);
 	dev->no_mcast = 0;
 	if (dev->mcast_pending && netif_running(dev->ndev))
 		__emac_set_multicast_list(dev);
+	netif_addr_unlock(dev->ndev);
 	netif_tx_unlock_bh(dev->ndev);
 
 	netif_wake_queue(dev->ndev);
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 74265d8553b..e1257e556e4 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -697,6 +697,8 @@ static void efx_stop_port(struct efx_nic *efx)
 	/* Serialise against efx_set_multicast_list() */
 	if (efx_dev_registered(efx)) {
 		netif_tx_lock_bh(efx->net_dev);
+		netif_addr_lock(efx->net_dev);
+		netif_addr_unlock(efx->net_dev);
 		netif_tx_unlock_bh(efx->net_dev);
 	}
 }
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index abd6d9ed8f4..42e9b2771ea 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -594,6 +594,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 		return nr_addrs;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	for (mc_list = dev->mc_list; mc_list; mc_list = mc_list->next) {
 		if (mac_in_list(cmd->maclist, nr_addrs, mc_list->dmi_addr)) {
 			lbs_deb_net("mcast address %s:%s skipped\n", dev->name,
@@ -608,6 +609,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 			    print_mac(mac, mc_list->dmi_addr));
 		i++;
 	}
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	if (mc_list)
 		return -EOVERFLOW;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fd036521918..570cf7affa7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1498,6 +1498,26 @@ static inline void netif_tx_disable(struct net_device *dev)
 	netif_tx_unlock_bh(dev);
 }
 
+static inline void netif_addr_lock(struct net_device *dev)
+{
+	spin_lock(&dev->addr_list_lock);
+}
+
+static inline void netif_addr_lock_bh(struct net_device *dev)
+{
+	spin_lock_bh(&dev->addr_list_lock);
+}
+
+static inline void netif_addr_unlock(struct net_device *dev)
+{
+	spin_unlock(&dev->addr_list_lock);
+}
+
+static inline void netif_addr_unlock_bh(struct net_device *dev)
+{
+	spin_unlock_bh(&dev->addr_list_lock);
+}
+
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
 extern void		ether_setup(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index d933d1bfa6f..ef1502d71f2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2982,7 +2982,9 @@ void __dev_set_rx_mode(struct net_device *dev)
 void dev_set_rx_mode(struct net_device *dev)
 {
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	__dev_set_rx_mode(dev);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 }
 
@@ -3062,9 +3064,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
 	ASSERT_RTNL();
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
 	if (!err)
 		__dev_set_rx_mode(dev);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -3088,9 +3092,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 	ASSERT_RTNL();
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
 	if (!err)
 		__dev_set_rx_mode(dev);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -3159,10 +3165,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 	int err = 0;
 
 	netif_tx_lock_bh(to);
+	netif_addr_lock(to);
 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
 			      &from->uc_list, &from->uc_count);
 	if (!err)
 		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
 	netif_tx_unlock_bh(to);
 	return err;
 }
@@ -3180,13 +3188,17 @@ EXPORT_SYMBOL(dev_unicast_sync);
 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 {
 	netif_tx_lock_bh(from);
+	netif_addr_lock(from);
 	netif_tx_lock_bh(to);
+	netif_addr_lock(to);
 
 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
 			  &from->uc_list, &from->uc_count);
 	__dev_set_rx_mode(to);
 
+	netif_addr_unlock(to);
 	netif_tx_unlock_bh(to);
+	netif_addr_unlock(from);
 	netif_tx_unlock_bh(from);
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
@@ -3208,6 +3220,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 static void dev_addr_discard(struct net_device *dev)
 {
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 
 	__dev_addr_discard(&dev->uc_list);
 	dev->uc_count = 0;
@@ -3215,6 +3228,7 @@ static void dev_addr_discard(struct net_device *dev)
 	__dev_addr_discard(&dev->mc_list);
 	dev->mc_count = 0;
 
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 }
 
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f8a3455f449..b6b2a129971 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -73,6 +73,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 	int err;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
 				addr, alen, glbl);
 	if (!err) {
@@ -83,6 +84,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 
 		__dev_set_rx_mode(dev);
 	}
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -96,9 +98,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 	int err;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
 	if (!err)
 		__dev_set_rx_mode(dev);
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	return err;
 }
@@ -120,10 +124,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
 	int err = 0;
 
 	netif_tx_lock_bh(to);
+	netif_addr_lock(to);
 	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
 			      &from->mc_list, &from->mc_count);
 	if (!err)
 		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
 	netif_tx_unlock_bh(to);
 
 	return err;
@@ -144,13 +150,17 @@ EXPORT_SYMBOL(dev_mc_sync);
 void dev_mc_unsync(struct net_device *to, struct net_device *from)
 {
 	netif_tx_lock_bh(from);
+	netif_addr_lock(from);
 	netif_tx_lock_bh(to);
+	netif_addr_lock(to);
 
 	__dev_addr_unsync(&to->mc_list, &to->mc_count,
 			  &from->mc_list, &from->mc_count);
 	__dev_set_rx_mode(to);
 
+	netif_addr_unlock(to);
 	netif_tx_unlock_bh(to);
+	netif_addr_unlock(from);
 	netif_tx_unlock_bh(from);
 }
 EXPORT_SYMBOL(dev_mc_unsync);
@@ -165,6 +175,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 		return 0;
 
 	netif_tx_lock_bh(dev);
+	netif_addr_lock(dev);
 	for (m = dev->mc_list; m; m = m->next) {
 		int i;
 
@@ -176,6 +187,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 
 		seq_putc(seq, '\n');
 	}
+	netif_addr_unlock(dev);
 	netif_tx_unlock_bh(dev);
 	return 0;
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 36859e79492..095b7d928d6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -292,7 +292,9 @@ static int ieee80211_open(struct net_device *dev)
 			local->fif_other_bss++;
 
 		netif_tx_lock_bh(local->mdev);
+		netif_addr_lock(local->mdev);
 		ieee80211_configure_filter(local);
+		netif_addr_unlock(local->mdev);
 		netif_tx_unlock_bh(local->mdev);
 		break;
 	case IEEE80211_IF_TYPE_STA:
@@ -491,7 +493,9 @@ static int ieee80211_stop(struct net_device *dev)
 			local->fif_other_bss--;
 
 		netif_tx_lock_bh(local->mdev);
+		netif_addr_lock(local->mdev);
 		ieee80211_configure_filter(local);
+		netif_addr_unlock(local->mdev);
 		netif_tx_unlock_bh(local->mdev);
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8f51375317d..1232ba25e1e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3869,6 +3869,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
 
 	netif_tx_lock_bh(local->mdev);
+	netif_addr_lock(local->mdev);
 	local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC;
 	local->ops->configure_filter(local_to_hw(local),
 				     FIF_BCN_PRBRESP_PROMISC,
@@ -3876,6 +3877,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 				     local->mdev->mc_count,
 				     local->mdev->mc_list);
 
+	netif_addr_unlock(local->mdev);
 	netif_tx_unlock_bh(local->mdev);
 
 	rcu_read_lock();
@@ -4063,12 +4065,14 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 	local->scan_dev = dev;
 
 	netif_tx_lock_bh(local->mdev);
+	netif_addr_lock(local->mdev);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
 	local->ops->configure_filter(local_to_hw(local),
 				     FIF_BCN_PRBRESP_PROMISC,
 				     &local->filter_flags,
 				     local->mdev->mc_count,
 				     local->mdev->mc_list);
+	netif_addr_unlock(local->mdev);
 	netif_tx_unlock_bh(local->mdev);
 
 	/* TODO: start scan as soon as all nullfunc frames are ACKed */
-- 
cgit v1.2.3-70-g09d2


From b9e40857682ecfc5bcd0356a23ff409883ffb982 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 00:15:08 -0700
Subject: netdev: Do not use TX lock to protect address lists.

Now that we have a specific lock to protect the network
device unicast and multicast lists, remove extraneous
grabs of the TX lock in cases where the code only needs
address list protection.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |  2 --
 drivers/media/dvb/dvb-core/dvb_net.c           |  6 ++--
 drivers/net/bonding/bond_main.c                | 24 ++++++----------
 drivers/net/sfc/efx.c                          |  6 ++--
 drivers/net/wireless/libertas/main.c           |  6 ++--
 net/core/dev.c                                 | 38 ++++++++------------------
 net/core/dev_mcast.c                           | 32 +++++++---------------
 net/mac80211/main.c                            | 12 +++-----
 net/mac80211/mlme.c                            |  6 ++--
 9 files changed, 42 insertions(+), 90 deletions(-)

(limited to 'net')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 261ab715043..cd2fb955040 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -774,7 +774,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	ipoib_mcast_stop_thread(dev, 0);
 
 	local_irq_save(flags);
-	netif_tx_lock(dev);
 	netif_addr_lock(dev);
 	spin_lock(&priv->lock);
 
@@ -853,7 +852,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 
 	spin_unlock(&priv->lock);
 	netif_addr_unlock(dev);
-	netif_tx_unlock(dev);
 	local_irq_restore(flags);
 
 	/* We have to cancel outside of the spinlock */
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 809d18c663b..c2c033722a9 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1133,8 +1133,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 
 	dvb_net_feed_stop(dev);
 	priv->rx_mode = RX_MODE_UNI;
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	if (dev->flags & IFF_PROMISC) {
 		dprintk("%s: promiscuous mode\n", dev->name);
@@ -1159,8 +1158,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 		}
 	}
 
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	dvb_net_feed_start(dev);
 }
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ea71abd6f72..fd87dbe7999 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1567,14 +1567,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				goto err_close;
 		}
 
-		netif_tx_lock_bh(bond_dev);
-		netif_addr_lock(bond_dev);
+		netif_addr_lock_bh(bond_dev);
 		/* upload master's mc_list to new slave */
 		for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
 			dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 		}
-		netif_addr_unlock(bond_dev);
-		netif_tx_unlock_bh(bond_dev);
+		netif_addr_unlock_bh(bond_dev);
 	}
 
 	if (bond->params.mode == BOND_MODE_8023AD) {
@@ -1938,11 +1936,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 		}
 
 		/* flush master's mc_list from slave */
-		netif_tx_lock_bh(bond_dev);
-		netif_addr_lock(bond_dev);
+		netif_addr_lock_bh(bond_dev);
 		bond_mc_list_flush(bond_dev, slave_dev);
-		netif_addr_unlock(bond_dev);
-		netif_tx_unlock_bh(bond_dev);
+		netif_addr_unlock_bh(bond_dev);
 	}
 
 	netdev_set_master(slave_dev, NULL);
@@ -2063,11 +2059,9 @@ static int bond_release_all(struct net_device *bond_dev)
 			}
 
 			/* flush master's mc_list from slave */
-			netif_tx_lock_bh(bond_dev);
-			netif_addr_lock(bond_dev);
+			netif_addr_lock_bh(bond_dev);
 			bond_mc_list_flush(bond_dev, slave_dev);
-			netif_addr_unlock(bond_dev);
-			netif_tx_unlock_bh(bond_dev);
+			netif_addr_unlock_bh(bond_dev);
 		}
 
 		netdev_set_master(slave_dev, NULL);
@@ -4679,11 +4673,9 @@ static void bond_free_all(void)
 		struct net_device *bond_dev = bond->dev;
 
 		bond_work_cancel_all(bond);
-		netif_tx_lock_bh(bond_dev);
-		netif_addr_lock(bond_dev);
+		netif_addr_lock_bh(bond_dev);
 		bond_mc_list_destroy(bond);
-		netif_addr_unlock(bond_dev);
-		netif_tx_unlock_bh(bond_dev);
+		netif_addr_unlock_bh(bond_dev);
 		/* Release the bonded slaves */
 		bond_release_all(bond_dev);
 		bond_destroy(bond);
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index e1257e556e4..7b2015f9e46 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -696,10 +696,8 @@ static void efx_stop_port(struct efx_nic *efx)
 
 	/* Serialise against efx_set_multicast_list() */
 	if (efx_dev_registered(efx)) {
-		netif_tx_lock_bh(efx->net_dev);
-		netif_addr_lock(efx->net_dev);
-		netif_addr_unlock(efx->net_dev);
-		netif_tx_unlock_bh(efx->net_dev);
+		netif_addr_lock_bh(efx->net_dev);
+		netif_addr_unlock_bh(efx->net_dev);
 	}
 }
 
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 42e9b2771ea..14d5d61cec4 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -593,8 +593,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 	if ((dev->flags & (IFF_UP|IFF_MULTICAST)) != (IFF_UP|IFF_MULTICAST))
 		return nr_addrs;
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	for (mc_list = dev->mc_list; mc_list; mc_list = mc_list->next) {
 		if (mac_in_list(cmd->maclist, nr_addrs, mc_list->dmi_addr)) {
 			lbs_deb_net("mcast address %s:%s skipped\n", dev->name,
@@ -609,8 +608,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 			    print_mac(mac, mc_list->dmi_addr));
 		i++;
 	}
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	if (mc_list)
 		return -EOVERFLOW;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ef1502d71f2..9b49f74a982 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2981,11 +2981,9 @@ void __dev_set_rx_mode(struct net_device *dev)
 
 void dev_set_rx_mode(struct net_device *dev)
 {
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	__dev_set_rx_mode(dev);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 }
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -3063,13 +3061,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
 
 	ASSERT_RTNL();
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
 	if (!err)
 		__dev_set_rx_mode(dev);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_delete);
@@ -3091,13 +3087,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 
 	ASSERT_RTNL();
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
 	if (!err)
 		__dev_set_rx_mode(dev);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_add);
@@ -3164,14 +3158,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 {
 	int err = 0;
 
-	netif_tx_lock_bh(to);
-	netif_addr_lock(to);
+	netif_addr_lock_bh(to);
 	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
 			      &from->uc_list, &from->uc_count);
 	if (!err)
 		__dev_set_rx_mode(to);
-	netif_addr_unlock(to);
-	netif_tx_unlock_bh(to);
+	netif_addr_unlock_bh(to);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_sync);
@@ -3187,9 +3179,7 @@ EXPORT_SYMBOL(dev_unicast_sync);
  */
 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 {
-	netif_tx_lock_bh(from);
-	netif_addr_lock(from);
-	netif_tx_lock_bh(to);
+	netif_addr_lock_bh(from);
 	netif_addr_lock(to);
 
 	__dev_addr_unsync(&to->uc_list, &to->uc_count,
@@ -3197,9 +3187,7 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 	__dev_set_rx_mode(to);
 
 	netif_addr_unlock(to);
-	netif_tx_unlock_bh(to);
-	netif_addr_unlock(from);
-	netif_tx_unlock_bh(from);
+	netif_addr_unlock_bh(from);
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
@@ -3219,8 +3207,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 
 static void dev_addr_discard(struct net_device *dev)
 {
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	__dev_addr_discard(&dev->uc_list);
 	dev->uc_count = 0;
@@ -3228,8 +3215,7 @@ static void dev_addr_discard(struct net_device *dev)
 	__dev_addr_discard(&dev->mc_list);
 	dev->mc_count = 0;
 
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 }
 
 unsigned dev_get_flags(const struct net_device *dev)
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index b6b2a129971..5402b3b38e0 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -72,8 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 {
 	int err;
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
 				addr, alen, glbl);
 	if (!err) {
@@ -84,8 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 
 		__dev_set_rx_mode(dev);
 	}
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	return err;
 }
 
@@ -97,13 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 {
 	int err;
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
 	if (!err)
 		__dev_set_rx_mode(dev);
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	return err;
 }
 
@@ -123,14 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
 {
 	int err = 0;
 
-	netif_tx_lock_bh(to);
-	netif_addr_lock(to);
+	netif_addr_lock_bh(to);
 	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
 			      &from->mc_list, &from->mc_count);
 	if (!err)
 		__dev_set_rx_mode(to);
-	netif_addr_unlock(to);
-	netif_tx_unlock_bh(to);
+	netif_addr_unlock_bh(to);
 
 	return err;
 }
@@ -149,9 +143,7 @@ EXPORT_SYMBOL(dev_mc_sync);
  */
 void dev_mc_unsync(struct net_device *to, struct net_device *from)
 {
-	netif_tx_lock_bh(from);
-	netif_addr_lock(from);
-	netif_tx_lock_bh(to);
+	netif_addr_lock_bh(from);
 	netif_addr_lock(to);
 
 	__dev_addr_unsync(&to->mc_list, &to->mc_count,
@@ -159,9 +151,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
 	__dev_set_rx_mode(to);
 
 	netif_addr_unlock(to);
-	netif_tx_unlock_bh(to);
-	netif_addr_unlock(from);
-	netif_tx_unlock_bh(from);
+	netif_addr_unlock_bh(from);
 }
 EXPORT_SYMBOL(dev_mc_unsync);
 
@@ -174,8 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		return 0;
 
-	netif_tx_lock_bh(dev);
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 	for (m = dev->mc_list; m; m = m->next) {
 		int i;
 
@@ -187,8 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 
 		seq_putc(seq, '\n');
 	}
-	netif_addr_unlock(dev);
-	netif_tx_unlock_bh(dev);
+	netif_addr_unlock_bh(dev);
 	return 0;
 }
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 095b7d928d6..af0056e7e5b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -291,11 +291,9 @@ static int ieee80211_open(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss++;
 
-		netif_tx_lock_bh(local->mdev);
-		netif_addr_lock(local->mdev);
+		netif_addr_lock_bh(local->mdev);
 		ieee80211_configure_filter(local);
-		netif_addr_unlock(local->mdev);
-		netif_tx_unlock_bh(local->mdev);
+		netif_addr_unlock_bh(local->mdev);
 		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
@@ -492,11 +490,9 @@ static int ieee80211_stop(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss--;
 
-		netif_tx_lock_bh(local->mdev);
-		netif_addr_lock(local->mdev);
+		netif_addr_lock_bh(local->mdev);
 		ieee80211_configure_filter(local);
-		netif_addr_unlock(local->mdev);
-		netif_tx_unlock_bh(local->mdev);
+		netif_addr_unlock_bh(local->mdev);
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
 	case IEEE80211_IF_TYPE_STA:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1232ba25e1e..d7c371e36bf 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4064,16 +4064,14 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 	local->scan_band = IEEE80211_BAND_2GHZ;
 	local->scan_dev = dev;
 
-	netif_tx_lock_bh(local->mdev);
-	netif_addr_lock(local->mdev);
+	netif_addr_lock_bh(local->mdev);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
 	local->ops->configure_filter(local_to_hw(local),
 				     FIF_BCN_PRBRESP_PROMISC,
 				     &local->filter_flags,
 				     local->mdev->mc_count,
 				     local->mdev->mc_list);
-	netif_addr_unlock(local->mdev);
-	netif_tx_unlock_bh(local->mdev);
+	netif_addr_unlock_bh(local->mdev);
 
 	/* TODO: start scan as soon as all nullfunc frames are ACKed */
 	queue_delayed_work(local->hw.workqueue, &local->scan_work,
-- 
cgit v1.2.3-70-g09d2


From 2870c43d1795bcb40b12bad6456f07ad8e64b3de Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Tue, 15 Jul 2008 00:49:11 -0700
Subject: net: refactor tcp splice receive path to improve readability

- move all of the details on offsets, lengths and buffers into a
single function instead of doing these operation from multiple places

- use a bottom up approach: try to avoid details in the high level
functions, introduce them gradually as we go deeper in the function
call stack

With helpful feedback from Jarek Poplawski.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 153 ++++++++++++++++++++++--------------------------------
 1 file changed, 61 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 50a853f7cd8..48a43727d12 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1282,114 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 	return 0;
 }
 
-/*
- * Map linear and fragment data from the skb to spd. Returns number of
- * pages mapped.
- */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-			     unsigned int *total_len,
-			     struct splice_pipe_desc *spd)
-{
-	unsigned int nr_pages = spd->nr_pages;
-	unsigned int poff, plen, len, toff, tlen;
-	int headlen, seg, error = 0;
-
-	toff = *offset;
-	tlen = *total_len;
-	if (!tlen) {
-		error = 1;
-		goto err;
+static inline void __segment_seek(struct page **page, unsigned int *poff,
+				  unsigned int *plen, unsigned int off)
+{
+	*poff += off;
+	*page += *poff / PAGE_SIZE;
+	*poff = *poff % PAGE_SIZE;
+	*plen -= off;
+}
+
+static inline int __splice_segment(struct page *page, unsigned int poff,
+				   unsigned int plen, unsigned int *off,
+				   unsigned int *len, struct sk_buff *skb,
+				   struct splice_pipe_desc *spd)
+{
+	if (!*len)
+		return 1;
+
+	/* skip this segment if already processed */
+	if (*off >= plen) {
+		*off -= plen;
+		return 0;
 	}
 
-	/*
-	 * if the offset is greater than the linear part, go directly to
-	 * the fragments.
-	 */
-	headlen = skb_headlen(skb);
-	if (toff >= headlen) {
-		toff -= headlen;
-		goto map_frag;
+	/* ignore any bits we already processed */
+	if (*off) {
+		__segment_seek(&page, &poff, &plen, *off);
+		*off = 0;
 	}
 
-	/*
-	 * first map the linear region into the pages/partial map, skipping
-	 * any potential initial offset.
-	 */
-	len = 0;
-	while (len < headlen) {
-		void *p = skb->data + len;
-
-		poff = (unsigned long) p & (PAGE_SIZE - 1);
-		plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
-		len += plen;
-
-		if (toff) {
-			if (plen <= toff) {
-				toff -= plen;
-				continue;
-			}
-			plen -= toff;
-			poff += toff;
-			toff = 0;
-		}
+	do {
+		unsigned int flen = min(*len, plen);
 
-		plen = min(plen, tlen);
-		if (!plen)
-			break;
+		/* the linear region may spread across several pages  */
+		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		/*
-		 * just jump directly to update and return, no point
-		 * in going over fragments when the output is full.
-		 */
-		error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb);
-		if (error)
-			goto done;
+		if (spd_fill_page(spd, page, flen, poff, skb))
+			return 1;
 
-		tlen -= plen;
-	}
+		__segment_seek(&page, &poff, &plen, flen);
+		*len -= flen;
+
+	} while (*len && plen);
+
+	return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. It reports failure if the
+ * pipe is full or if we already spliced the requested length.
+ */
+int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+		      unsigned int *len,
+		      struct splice_pipe_desc *spd)
+{
+	int seg;
+
+	/*
+	 * map the linear part
+	 */
+	if (__splice_segment(virt_to_page(skb->data),
+			     (unsigned long) skb->data & (PAGE_SIZE - 1),
+			     skb_headlen(skb),
+			     offset, len, skb, spd))
+		return 1;
 
 	/*
 	 * then map the fragments
 	 */
-map_frag:
 	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
-		plen = f->size;
-		poff = f->page_offset;
-
-		if (toff) {
-			if (plen <= toff) {
-				toff -= plen;
-				continue;
-			}
-			plen -= toff;
-			poff += toff;
-			toff = 0;
-		}
-
-		plen = min(plen, tlen);
-		if (!plen)
-			break;
-
-		error = spd_fill_page(spd, f->page, plen, poff, skb);
-		if (error)
-			break;
-
-		tlen -= plen;
+		if (__splice_segment(f->page, f->page_offset, f->size,
+				     offset, len, skb, spd))
+			return 1;
 	}
 
-done:
-	if (spd->nr_pages - nr_pages) {
-		*offset = 0;
-		*total_len = tlen;
-		return 0;
-	}
-err:
-	/* update the offset to reflect the linear part skip, if any */
-	if (!error)
-		*offset = toff;
-	return error;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From d0236f8f8223ad841f461ea3a635d452d194806b Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 15 Jul 2008 02:09:53 -0700
Subject: iucv: fix memory leak in cpu hotplug error path.

Fix memory leak in error path in CPU_UP_PREPARE notifier.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 531a206ce7a..d8e0635aace 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -567,8 +567,11 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 			return NOTIFY_BAD;
 		iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 				     GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
-		if (!iucv_param[cpu])
+		if (!iucv_param[cpu]) {
+			kfree(iucv_irq_data[cpu]);
+			iucv_irq_data[cpu] = NULL;
 			return NOTIFY_BAD;
+		}
 		break;
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-- 
cgit v1.2.3-70-g09d2


From 166b88d755f925139af7f7b75aa0a1b896ca0670 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jul 2008 16:03:26 -0400
Subject: SUNRPC: Use correct XDR encoding procedure for rpcbind SET/UNSET

The rpcbind versions 3 and 4 SET and UNSET procedures use the same
arguments as the GETADDR procedure.

While definitely a bug, this hasn't been a problem so far since the
kernel hasn't used version 3 or 4 SET and UNSET.  But this will change
in just a moment.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24e93e0a0a2..0021fad464e 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -426,6 +426,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 	map->r_status = status;
 }
 
+/*
+ * XDR functions for rpcbind
+ */
+
 static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
@@ -581,14 +585,14 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 };
 
 static struct rpc_procinfo rpcb_procedures3[] = {
-	PROC(SET,		mapping,	set),
-	PROC(UNSET,		mapping,	set),
+	PROC(SET,		getaddr,	set),
+	PROC(UNSET,		getaddr,	set),
 	PROC(GETADDR,		getaddr,	getaddr),
 };
 
 static struct rpc_procinfo rpcb_procedures4[] = {
-	PROC(SET,		mapping,	set),
-	PROC(UNSET,		mapping,	set),
+	PROC(SET,		getaddr,	set),
+	PROC(UNSET,		getaddr,	set),
 	PROC(GETADDR,		getaddr,	getaddr),
 	PROC(GETVERSADDR,	getaddr,	getaddr),
 };
-- 
cgit v1.2.3-70-g09d2


From cc5598b78fd320dd6d1f90c14491e08029f3c4f6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jul 2008 16:03:27 -0400
Subject: SUNRPC: Introduce a specific rpcb_create for contacting localhost

Add rpcb_create_local() for use by rpcb_register() and upcoming IPv6
registration functions.

Ensure any errors encountered by rpcb_create_local() are properly
reported.

We can also use a statically allocated constant loopback socket address
instead of one allocated on the stack and initialized every time the
function is called.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0021fad464e..35c1ded1fc4 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -116,6 +116,29 @@ static void rpcb_map_release(void *data)
 	kfree(map);
 }
 
+static const struct sockaddr_in rpcb_inaddr_loopback = {
+	.sin_family		= AF_INET,
+	.sin_addr.s_addr	= htonl(INADDR_LOOPBACK),
+	.sin_port		= htons(RPCBIND_PORT),
+};
+
+static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
+					  size_t addrlen, u32 version)
+{
+	struct rpc_create_args args = {
+		.protocol	= XPRT_TRANSPORT_UDP,
+		.address	= addr,
+		.addrsize	= addrlen,
+		.servername	= "localhost",
+		.program	= &rpcb_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= RPC_CLNT_CREATE_NOPING,
+	};
+
+	return rpc_create(&args);
+}
+
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 				    size_t salen, int proto, u32 version,
 				    int privileged)
@@ -161,10 +184,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
  */
 int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
-	struct sockaddr_in sin = {
-		.sin_family		= AF_INET,
-		.sin_addr.s_addr	= htonl(INADDR_LOOPBACK),
-	};
 	struct rpcbind_args map = {
 		.r_prog		= prog,
 		.r_vers		= vers,
@@ -184,14 +203,15 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 			"rpcbind\n", (port ? "" : "un"),
 			prog, vers, prot, port);
 
-	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-				sizeof(sin), XPRT_TRANSPORT_UDP, RPCBVERS_2, 1);
-	if (IS_ERR(rpcb_clnt))
-		return PTR_ERR(rpcb_clnt);
+	rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback,
+					sizeof(rpcb_inaddr_loopback),
+					RPCBVERS_2);
+	if (!IS_ERR(rpcb_clnt)) {
+		error = rpc_call_sync(rpcb_clnt, &msg, 0);
+		rpc_shutdown_client(rpcb_clnt);
+	} else
+		error = PTR_ERR(rpcb_clnt);
 
-	error = rpc_call_sync(rpcb_clnt, &msg, 0);
-
-	rpc_shutdown_client(rpcb_clnt);
 	if (error < 0)
 		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
-- 
cgit v1.2.3-70-g09d2


From 423d8b064771f5cd8b706a4839b18db9bb6c3c59 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jul 2008 16:03:28 -0400
Subject: SUNRPC: None of rpcb_create's callers wants a privileged source port

Clean up: Callers that required a privileged source port now use
rpcb_create_local(), so we can remove the @privileged argument from
rpcb_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 35c1ded1fc4..691bd216f46 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -140,8 +140,7 @@ static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
 }
 
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
-				    size_t salen, int proto, u32 version,
-				    int privileged)
+				    size_t salen, int proto, u32 version)
 {
 	struct rpc_create_args args = {
 		.protocol	= proto,
@@ -151,7 +150,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 		.program	= &rpcb_program,
 		.version	= version,
 		.authflavor	= RPC_AUTH_UNIX,
-		.flags		= RPC_CLNT_CREATE_NOPING,
+		.flags		= (RPC_CLNT_CREATE_NOPING |
+					RPC_CLNT_CREATE_NONPRIVPORT),
 	};
 
 	switch (srvaddr->sa_family) {
@@ -165,8 +165,6 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 		return NULL;
 	}
 
-	if (!privileged)
-		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 	return rpc_create(&args);
 }
 
@@ -255,7 +253,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 		__func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-				sizeof(*sin), prot, RPCBVERS_2, 0);
+				sizeof(*sin), prot, RPCBVERS_2);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -365,7 +363,7 @@ void rpcb_getport_async(struct rpc_task *task)
 		task->tk_pid, __func__, bind_version);
 
 	rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
-				bind_version, 0);
+				bind_version);
 	if (IS_ERR(rpcb_clnt)) {
 		status = PTR_ERR(rpcb_clnt);
 		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
-- 
cgit v1.2.3-70-g09d2


From babe80eb4994dfdc97d5be19a68b5af66d667585 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jul 2008 16:03:29 -0400
Subject: SUNRPC: Refactor rpcb_register to make rpcbindv4 support easier

rpcbind version 4 registration will reuse part of rpcb_register, so just
split it out into a separate function now.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 48 ++++++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 691bd216f46..8b75c306e66 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -168,6 +168,30 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 	return rpc_create(&args);
 }
 
+static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
+			      u32 version, struct rpc_message *msg,
+			      int *result)
+{
+	struct rpc_clnt *rpcb_clnt;
+	int error = 0;
+
+	*result = 0;
+
+	rpcb_clnt = rpcb_create_local(addr, addrlen, version);
+	if (!IS_ERR(rpcb_clnt)) {
+		error = rpc_call_sync(rpcb_clnt, msg, 0);
+		rpc_shutdown_client(rpcb_clnt);
+	} else
+		error = PTR_ERR(rpcb_clnt);
+
+	if (error < 0)
+		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+				"server (errno %d).\n", -error);
+	dprintk("RPC:       registration status %d/%d\n", error, *result);
+
+	return error;
+}
+
 /**
  * rpcb_register - set or unset a port registration with the local rpcbind svc
  * @prog: RPC program number to bind
@@ -189,33 +213,21 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 		.r_port		= port,
 	};
 	struct rpc_message msg = {
-		.rpc_proc	= &rpcb_procedures2[port ?
-					RPCBPROC_SET : RPCBPROC_UNSET],
 		.rpc_argp	= &map,
 		.rpc_resp	= okay,
 	};
-	struct rpc_clnt *rpcb_clnt;
-	int error = 0;
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
 			"rpcbind\n", (port ? "" : "un"),
 			prog, vers, prot, port);
 
-	rpcb_clnt = rpcb_create_local((struct sockaddr *)&rpcb_inaddr_loopback,
-					sizeof(rpcb_inaddr_loopback),
-					RPCBVERS_2);
-	if (!IS_ERR(rpcb_clnt)) {
-		error = rpc_call_sync(rpcb_clnt, &msg, 0);
-		rpc_shutdown_client(rpcb_clnt);
-	} else
-		error = PTR_ERR(rpcb_clnt);
+	msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
+	if (port)
+		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
 
-	if (error < 0)
-		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
-				"server (errno %d).\n", -error);
-	dprintk("RPC:       registration status %d/%d\n", error, *okay);
-
-	return error;
+	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
+					sizeof(rpcb_inaddr_loopback),
+					RPCBVERS_2, &msg, okay);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From c2e1b09ff237c0a3687b9a804cc8bf489743cffc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jul 2008 16:03:30 -0400
Subject: SUNRPC: Support registering IPv6 interfaces with local rpcbind daemon

Introduce a new API to register RPC services on IPv6 interfaces to allow
the NFS server and lockd to advertise on IPv6 networks.

Unlike rpcb_register(), the new rpcb_v4_register() function uses rpcbind
protocol version 4 to contact the local rpcbind daemon.  The version 4
SET/UNSET procedures allow services to register address families besides
AF_INET, register at specific network interfaces, and register transport
protocols besides UDP and TCP.  All of this functionality is exposed via
the new rpcb_v4_register() kernel API.

A user-space rpcbind daemon implementation that supports version 4 of the
rpcbind protocol is required in order to make use of this new API.

Note that rpcbind version 3 is sufficient to support the new rpcbind
facilities listed above, but most extant implementations use version 4.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |   3 +
 net/sunrpc/rpcb_clnt.c      | 178 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 177 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 764fd4c286e..e5bfe01ee30 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -125,6 +125,9 @@ void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 
 int		rpcb_register(u32, u32, int, unsigned short, int *);
+int		rpcb_v4_register(const u32 program, const u32 version,
+				 const struct sockaddr *address,
+				 const char *netid, int *result);
 int		rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 8b75c306e66..24db2b4d12d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -87,6 +87,7 @@ struct rpcbind_args {
 
 static struct rpc_procinfo rpcb_procedures2[];
 static struct rpc_procinfo rpcb_procedures3[];
+static struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
@@ -122,6 +123,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
 	.sin_port		= htons(RPCBIND_PORT),
 };
 
+static const struct sockaddr_in6 rpcb_in6addr_loopback = {
+	.sin6_family		= AF_INET6,
+	.sin6_addr		= IN6ADDR_LOOPBACK_INIT,
+	.sin6_port		= htons(RPCBIND_PORT),
+};
+
 static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
 					  size_t addrlen, u32 version)
 {
@@ -196,13 +203,38 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * rpcb_register - set or unset a port registration with the local rpcbind svc
  * @prog: RPC program number to bind
  * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
+ * @prot: transport protocol to register
  * @port: port value to register
- * @okay: result code
+ * @okay: OUT: result code
+ *
+ * RPC services invoke this function to advertise their contact
+ * information via the system's rpcbind daemon.  RPC services
+ * invoke this function once for each [program, version, transport]
+ * tuple they wish to advertise.
+ *
+ * Callers may also unregister RPC services that are no longer
+ * available by setting the passed-in port to zero.  This removes
+ * all registered transports for [program, version] from the local
+ * rpcbind database.
+ *
+ * Returns zero if the registration request was dispatched
+ * successfully and a reply was received.  The rpcbind daemon's
+ * boolean result code is stored in *okay.
  *
- * port == 0 means unregister, port != 0 means register.
+ * Returns an errno value and sets *result to zero if there was
+ * some problem that prevented the rpcbind request from being
+ * dispatched, or if the rpcbind daemon did not respond within
+ * the timeout.
  *
- * This routine supports only rpcbind version 2.
+ * This function uses rpcbind protocol version 2 to contact the
+ * local rpcbind daemon.
+ *
+ * Registration works over both AF_INET and AF_INET6, and services
+ * registered via this function are advertised as available for any
+ * address.  If the local rpcbind daemon is listening on AF_INET6,
+ * services registered via this function will be advertised on
+ * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
+ * addresses).
  */
 int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
@@ -230,6 +262,144 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 					RPCBVERS_2, &msg, okay);
 }
 
+/*
+ * Fill in AF_INET family-specific arguments to register
+ */
+static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
+				struct rpc_message *msg)
+{
+	struct rpcbind_args *map = msg->rpc_argp;
+	unsigned short port = ntohs(address_to_register->sin_port);
+	char buf[32];
+
+	/* Construct AF_INET universal address */
+	snprintf(buf, sizeof(buf),
+			NIPQUAD_FMT".%u.%u",
+			NIPQUAD(address_to_register->sin_addr.s_addr),
+			port >> 8, port & 0xff);
+	map->r_addr = buf;
+
+	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
+		"local rpcbind\n", (port ? "" : "un"),
+			map->r_prog, map->r_vers,
+			map->r_addr, map->r_netid);
+
+	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+	if (port)
+		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+
+	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
+					sizeof(rpcb_inaddr_loopback),
+					RPCBVERS_4, msg, msg->rpc_resp);
+}
+
+/*
+ * Fill in AF_INET6 family-specific arguments to register
+ */
+static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
+				struct rpc_message *msg)
+{
+	struct rpcbind_args *map = msg->rpc_argp;
+	unsigned short port = ntohs(address_to_register->sin6_port);
+	char buf[64];
+
+	/* Construct AF_INET6 universal address */
+	snprintf(buf, sizeof(buf),
+			NIP6_FMT".%u.%u",
+			NIP6(address_to_register->sin6_addr),
+			port >> 8, port & 0xff);
+	map->r_addr = buf;
+
+	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
+		"local rpcbind\n", (port ? "" : "un"),
+			map->r_prog, map->r_vers,
+			map->r_addr, map->r_netid);
+
+	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+	if (port)
+		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+
+	return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
+					sizeof(rpcb_in6addr_loopback),
+					RPCBVERS_4, msg, msg->rpc_resp);
+}
+
+/**
+ * rpcb_v4_register - set or unset a port registration with the local rpcbind
+ * @program: RPC program number of service to (un)register
+ * @version: RPC version number of service to (un)register
+ * @address: address family, IP address, and port to (un)register
+ * @netid: netid of transport protocol to (un)register
+ * @result: result code from rpcbind RPC call
+ *
+ * RPC services invoke this function to advertise their contact
+ * information via the system's rpcbind daemon.  RPC services
+ * invoke this function once for each [program, version, address,
+ * netid] tuple they wish to advertise.
+ *
+ * Callers may also unregister RPC services that are no longer
+ * available by setting the port number in the passed-in address
+ * to zero.  Callers pass a netid of "" to unregister all
+ * transport netids associated with [program, version, address].
+ *
+ * Returns zero if the registration request was dispatched
+ * successfully and a reply was received.  The rpcbind daemon's
+ * result code is stored in *result.
+ *
+ * Returns an errno value and sets *result to zero if there was
+ * some problem that prevented the rpcbind request from being
+ * dispatched, or if the rpcbind daemon did not respond within
+ * the timeout.
+ *
+ * This function uses rpcbind protocol version 4 to contact the
+ * local rpcbind daemon.  The local rpcbind daemon must support
+ * version 4 of the rpcbind protocol in order for these functions
+ * to register a service successfully.
+ *
+ * Supported netids include "udp" and "tcp" for UDP and TCP over
+ * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6,
+ * respectively.
+ *
+ * The contents of @address determine the address family and the
+ * port to be registered.  The usual practice is to pass INADDR_ANY
+ * as the raw address, but specifying a non-zero address is also
+ * supported by this API if the caller wishes to advertise an RPC
+ * service on a specific network interface.
+ *
+ * Note that passing in INADDR_ANY does not create the same service
+ * registration as IN6ADDR_ANY.  The former advertises an RPC
+ * service on any IPv4 address, but not on IPv6.  The latter
+ * advertises the service on all IPv4 and IPv6 addresses.
+ */
+int rpcb_v4_register(const u32 program, const u32 version,
+		     const struct sockaddr *address, const char *netid,
+		     int *result)
+{
+	struct rpcbind_args map = {
+		.r_prog		= program,
+		.r_vers		= version,
+		.r_netid	= netid,
+		.r_owner	= RPCB_OWNER_STRING,
+	};
+	struct rpc_message msg = {
+		.rpc_argp	= &map,
+		.rpc_resp	= result,
+	};
+
+	*result = 0;
+
+	switch (address->sa_family) {
+	case AF_INET:
+		return rpcb_register_netid4((struct sockaddr_in *)address,
+					    &msg);
+	case AF_INET6:
+		return rpcb_register_netid6((struct sockaddr_in6 *)address,
+					    &msg);
+	}
+
+	return -EAFNOSUPPORT;
+}
+
 /**
  * rpcb_getport_sync - obtain the port for an RPC service on a given host
  * @sin: address of remote peer
-- 
cgit v1.2.3-70-g09d2


From a86dc496b764ebb1431677b38eab45310e5a2ad4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 13:37:09 -0400
Subject: SUNRPC: Remove the BKL from the callback functions

Push it into those callback functions that actually need it.

Note that all the NFS operations use their own locking, so don't need the
BKL. Ditto for the rpcbind client.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 6 ++++++
 fs/lockd/svc4proc.c | 2 ++
 fs/lockd/svclock.c  | 7 ++++++-
 fs/lockd/svcproc.c  | 2 ++
 net/sunrpc/sched.c  | 9 +--------
 5 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fd7d4669776..1f6dc518505 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call)
 
 static void nlmclnt_rpc_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 die:
 	return;
  retry_rebind:
+	lock_kernel();
 	nlm_rebind_host(req->a_host);
+	unlock_kernel();
  retry_unlock:
 	rpc_restart_call(task);
 }
@@ -788,7 +792,9 @@ retry_cancel:
 	/* Don't ever retry more than 3 times */
 	if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
 		goto die;
+	lock_kernel();
 	nlm_rebind_host(req->a_host);
+	unlock_kernel();
 	rpc_restart_call(task);
 	rpc_delay(task, 30 * HZ);
 }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 385437e3387..2e27176ff42 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 
 static void nlm4svc_callback_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 81aca859bfd..56a08ab9a4c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
 
+	lock_kernel();
 	/* if the block is not on a list at this point then it has
 	 * been invalidated. Don't try to requeue it.
 	 *
@@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	 * for nlm_blocked?
 	 */
 	if (list_empty(&block->b_list))
-		return;
+		goto out;
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
+out:
+	unlock_kernel();
 }
 
 static void nlmsvc_grant_release(void *data)
 {
 	struct nlm_rqst		*call = data;
 
+	lock_kernel();
 	nlmsvc_release_block(call->a_block);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 88379cc6e0b..ce6952b50a7 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 
 static void nlmsvc_callback_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6288af05c20..385f427beda 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay);
  */
 static void rpc_prepare_task(struct rpc_task *task)
 {
-	lock_kernel();
 	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
-	unlock_kernel();
 }
 
 /*
@@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task)
 {
 	task->tk_action = NULL;
 	if (task->tk_ops->rpc_call_done != NULL) {
-		lock_kernel();
 		task->tk_ops->rpc_call_done(task, task->tk_calldata);
-		unlock_kernel();
 		if (task->tk_action != NULL) {
 			WARN_ON(RPC_ASSASSINATED(task));
 			/* Always release the RPC slot and buffer memory */
@@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task);
 
 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
 {
-	if (ops->rpc_release != NULL) {
-		lock_kernel();
+	if (ops->rpc_release != NULL)
 		ops->rpc_release(calldata);
-		unlock_kernel();
-	}
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From d56400504a40a4aa197af629300d76544169e821 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 16 Jul 2008 11:13:35 +0000
Subject: ipvs: Initialize mcast addr at compile time

There's no need to do it at runtime, the values are constant.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2d4a86f7332..8c900dfe832 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -139,7 +139,11 @@ char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 
 /* multicast addr */
-static struct sockaddr_in mcast_addr;
+static struct sockaddr_in mcast_addr = {
+	.sin_family		= AF_INET,
+	.sin_port		= __constant_htons(IP_VS_SYNC_PORT),
+	.sin_addr.s_addr	= __constant_htonl(IP_VS_SYNC_GROUP),
+};
 
 
 static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
@@ -862,11 +866,6 @@ static int sync_thread(void *startup)
 	/* set the maximum length of sync message */
 	set_sync_mesg_maxlen(state);
 
-	/* set up multicast address */
-	mcast_addr.sin_family = AF_INET;
-	mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
-	mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
-
 	add_wait_queue(&sync_wait, &wait);
 
 	set_sync_pid(state, task_pid_nr(current));
-- 
cgit v1.2.3-70-g09d2


From e6dd731c75cba986a485924f908e6e05b088ea9e Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 16 Jul 2008 11:13:43 +0000
Subject: ipvs: Use ERR_PTR for returning errors from make_receive_sock() and
 make_send_sock()

The additional information we now return to the caller is currently not used,
but will be used to return errors to user space.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 8c900dfe832..60b96823c9a 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -27,6 +27,7 @@
 #include <linux/in.h>
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
 #include <linux/udp.h>
+#include <linux/err.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
@@ -576,14 +577,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 static struct socket * make_send_sock(void)
 {
 	struct socket *sock;
+	int result;
 
 	/* First create a socket */
-	if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
 		IP_VS_ERR("Error during creation of socket; terminating\n");
-		return NULL;
+		return ERR_PTR(result);
 	}
 
-	if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
+	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	if (result < 0) {
 		IP_VS_ERR("Error setting outbound mcast interface\n");
 		goto error;
 	}
@@ -591,14 +595,15 @@ static struct socket * make_send_sock(void)
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);
 
-	if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
+	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	if (result < 0) {
 		IP_VS_ERR("Error binding address of the mcast interface\n");
 		goto error;
 	}
 
-	if (sock->ops->connect(sock,
-			       (struct sockaddr*)&mcast_addr,
-			       sizeof(struct sockaddr), 0) < 0) {
+	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr), 0);
+	if (result < 0) {
 		IP_VS_ERR("Error connecting to the multicast addr\n");
 		goto error;
 	}
@@ -607,7 +612,7 @@ static struct socket * make_send_sock(void)
 
   error:
 	sock_release(sock);
-	return NULL;
+	return ERR_PTR(result);
 }
 
 
@@ -617,27 +622,30 @@ static struct socket * make_send_sock(void)
 static struct socket * make_receive_sock(void)
 {
 	struct socket *sock;
+	int result;
 
 	/* First create a socket */
-	if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
 		IP_VS_ERR("Error during creation of socket; terminating\n");
-		return NULL;
+		return ERR_PTR(result);
 	}
 
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;
 
-	if (sock->ops->bind(sock,
-			    (struct sockaddr*)&mcast_addr,
-			    sizeof(struct sockaddr)) < 0) {
+	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr));
+	if (result < 0) {
 		IP_VS_ERR("Error binding to the multicast addr\n");
 		goto error;
 	}
 
 	/* join the multicast group */
-	if (join_mcast_group(sock->sk,
-			     (struct in_addr*)&mcast_addr.sin_addr,
-			     ip_vs_backup_mcast_ifn) < 0) {
+	result = join_mcast_group(sock->sk,
+			(struct in_addr *) &mcast_addr.sin_addr,
+			ip_vs_backup_mcast_ifn);
+	if (result < 0) {
 		IP_VS_ERR("Error joining to the multicast group\n");
 		goto error;
 	}
@@ -646,7 +654,7 @@ static struct socket * make_receive_sock(void)
 
   error:
 	sock_release(sock);
-	return NULL;
+	return ERR_PTR(result);
 }
 
 
@@ -719,7 +727,7 @@ static void sync_master_loop(void)
 
 	/* create the sending multicast socket */
 	sock = make_send_sock();
-	if (!sock)
+	if (IS_ERR(sock))
 		return;
 
 	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
@@ -772,7 +780,7 @@ static void sync_backup_loop(void)
 
 	/* create the receiving multicast socket */
 	sock = make_receive_sock();
-	if (!sock)
+	if (IS_ERR(sock))
 		goto out;
 
 	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
-- 
cgit v1.2.3-70-g09d2


From 998e7a76804b7a273a0460c2cdd5a51fa9856717 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 16 Jul 2008 11:13:50 +0000
Subject: ipvs: Use kthread_run() instead of doing a double-fork via
 kernel_thread()

This also moves the setup code out of the daemons, so that we're able to
return proper error codes to user space. The current code will return success
to user space when the daemon is started with an invald mcast interface. With
these changes we get an appropriate "No such device" error.

We longer need our own completion to be sure the daemons are actually running,
because they no longer contain code that can fail and kthread_run() takes care
of the rest.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 369 +++++++++++++++++----------------------------
 1 file changed, 136 insertions(+), 233 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 60b96823c9a..550563a5660 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -28,10 +28,10 @@
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
 #include <linux/udp.h>
 #include <linux/err.h>
+#include <linux/kthread.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
-#include <asm/uaccess.h>                /* for get_fs and set_fs */
 
 #include <net/ip_vs.h>
 
@@ -67,8 +67,8 @@ struct ip_vs_sync_conn_options {
 };
 
 struct ip_vs_sync_thread_data {
-	struct completion *startup;
-	int state;
+	struct socket *sock;
+	char *buf;
 };
 
 #define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
@@ -139,6 +139,10 @@ volatile int ip_vs_backup_syncid = 0;
 char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
 /* multicast addr */
 static struct sockaddr_in mcast_addr = {
 	.sin_family		= AF_INET,
@@ -147,14 +151,7 @@ static struct sockaddr_in mcast_addr = {
 };
 
 
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
-	spin_lock(&ip_vs_sync_lock);
-	list_add_tail(&sb->list, &ip_vs_sync_queue);
-	spin_unlock(&ip_vs_sync_lock);
-}
-
-static inline struct ip_vs_sync_buff * sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
 {
 	struct ip_vs_sync_buff *sb;
 
@@ -198,6 +195,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
 	kfree(sb);
 }
 
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+	spin_lock(&ip_vs_sync_lock);
+	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	else
+		ip_vs_sync_buff_release(sb);
+	spin_unlock(&ip_vs_sync_lock);
+}
+
 /*
  *	Get the current sync buffer if it has been created for more
  *	than the specified time or the specified time is zero.
@@ -712,43 +719,28 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 }
 
 
-static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
-static pid_t sync_master_pid = 0;
-static pid_t sync_backup_pid = 0;
-
-static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
-static int stop_master_sync = 0;
-static int stop_backup_sync = 0;
-
-static void sync_master_loop(void)
+static int sync_thread_master(void *data)
 {
-	struct socket *sock;
+	struct ip_vs_sync_thread_data *tinfo = data;
 	struct ip_vs_sync_buff *sb;
 
-	/* create the sending multicast socket */
-	sock = make_send_sock();
-	if (IS_ERR(sock))
-		return;
-
 	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
 		   "syncid = %d\n",
 		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
 
-	for (;;) {
-		while ((sb=sb_dequeue())) {
-			ip_vs_send_sync_msg(sock, sb->mesg);
+	while (!kthread_should_stop()) {
+		while ((sb = sb_dequeue())) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}
 
 		/* check if entries stay in curr_sb for 2 seconds */
-		if ((sb = get_curr_sync_buff(2*HZ))) {
-			ip_vs_send_sync_msg(sock, sb->mesg);
+		sb = get_curr_sync_buff(2 * HZ);
+		if (sb) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}
 
-		if (stop_master_sync)
-			break;
-
 		msleep_interruptible(1000);
 	}
 
@@ -763,262 +755,173 @@ static void sync_master_loop(void)
 	}
 
 	/* release the sending multicast socket */
-	sock_release(sock);
+	sock_release(tinfo->sock);
+	kfree(tinfo);
+
+	return 0;
 }
 
 
-static void sync_backup_loop(void)
+static int sync_thread_backup(void *data)
 {
-	struct socket *sock;
-	char *buf;
+	struct ip_vs_sync_thread_data *tinfo = data;
 	int len;
 
-	if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
-		IP_VS_ERR("sync_backup_loop: kmalloc error\n");
-		return;
-	}
-
-	/* create the receiving multicast socket */
-	sock = make_receive_sock();
-	if (IS_ERR(sock))
-		goto out;
-
 	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
 		   "syncid = %d\n",
 		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
 
-	for (;;) {
-		/* do you have data now? */
-		while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
-			if ((len =
-			     ip_vs_receive(sock, buf,
-					   sync_recv_mesg_maxlen)) <= 0) {
+	while (!kthread_should_stop()) {
+		/* do we have data now? */
+		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+			len = ip_vs_receive(tinfo->sock, tinfo->buf,
+					sync_recv_mesg_maxlen);
+			if (len <= 0) {
 				IP_VS_ERR("receiving message error\n");
 				break;
 			}
-			/* disable bottom half, because it accessed the data
+
+			/* disable bottom half, because it accesses the data
 			   shared by softirq while getting/creating conns */
 			local_bh_disable();
-			ip_vs_process_message(buf, len);
+			ip_vs_process_message(tinfo->buf, len);
 			local_bh_enable();
 		}
 
-		if (stop_backup_sync)
-			break;
-
 		msleep_interruptible(1000);
 	}
 
 	/* release the sending multicast socket */
-	sock_release(sock);
+	sock_release(tinfo->sock);
+	kfree(tinfo->buf);
+	kfree(tinfo);
 
-  out:
-	kfree(buf);
+	return 0;
 }
 
 
-static void set_sync_pid(int sync_state, pid_t sync_pid)
-{
-	if (sync_state == IP_VS_STATE_MASTER)
-		sync_master_pid = sync_pid;
-	else if (sync_state == IP_VS_STATE_BACKUP)
-		sync_backup_pid = sync_pid;
-}
-
-static void set_stop_sync(int sync_state, int set)
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 {
-	if (sync_state == IP_VS_STATE_MASTER)
-		stop_master_sync = set;
-	else if (sync_state == IP_VS_STATE_BACKUP)
-		stop_backup_sync = set;
-	else {
-		stop_master_sync = set;
-		stop_backup_sync = set;
-	}
-}
+	struct ip_vs_sync_thread_data *tinfo;
+	struct task_struct **realtask, *task;
+	struct socket *sock;
+	char *name, *buf = NULL;
+	int (*threadfn)(void *data);
+	int result = -ENOMEM;
 
-static int sync_thread(void *startup)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	mm_segment_t oldmm;
-	int state;
-	const char *name;
-	struct ip_vs_sync_thread_data *tinfo = startup;
+	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+		  sizeof(struct ip_vs_sync_conn));
 
-	/* increase the module use count */
-	ip_vs_use_count_inc();
+	if (state == IP_VS_STATE_MASTER) {
+		if (sync_master_thread)
+			return -EEXIST;
 
-	if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
-		state = IP_VS_STATE_MASTER;
+		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_master_mcast_ifn));
+		ip_vs_master_syncid = syncid;
+		realtask = &sync_master_thread;
 		name = "ipvs_syncmaster";
-	} else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
-		state = IP_VS_STATE_BACKUP;
+		threadfn = sync_thread_master;
+		sock = make_send_sock();
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (sync_backup_thread)
+			return -EEXIST;
+
+		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_backup_mcast_ifn));
+		ip_vs_backup_syncid = syncid;
+		realtask = &sync_backup_thread;
 		name = "ipvs_syncbackup";
+		threadfn = sync_thread_backup;
+		sock = make_receive_sock();
 	} else {
-		IP_VS_BUG();
-		ip_vs_use_count_dec();
 		return -EINVAL;
 	}
 
-	daemonize(name);
-
-	oldmm = get_fs();
-	set_fs(KERNEL_DS);
-
-	/* Block all signals */
-	spin_lock_irq(&current->sighand->siglock);
-	siginitsetinv(&current->blocked, 0);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	if (IS_ERR(sock)) {
+		result = PTR_ERR(sock);
+		goto out;
+	}
 
-	/* set the maximum length of sync message */
 	set_sync_mesg_maxlen(state);
+	if (state == IP_VS_STATE_BACKUP) {
+		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		if (!buf)
+			goto outsocket;
+	}
 
-	add_wait_queue(&sync_wait, &wait);
-
-	set_sync_pid(state, task_pid_nr(current));
-	complete(tinfo->startup);
-
-	/*
-	 * once we call the completion queue above, we should
-	 * null out that reference, since its allocated on the
-	 * stack of the creating kernel thread
-	 */
-	tinfo->startup = NULL;
-
-	/* processing master/backup loop here */
-	if (state == IP_VS_STATE_MASTER)
-		sync_master_loop();
-	else if (state == IP_VS_STATE_BACKUP)
-		sync_backup_loop();
-	else IP_VS_BUG();
-
-	remove_wait_queue(&sync_wait, &wait);
-
-	/* thread exits */
-
-	/*
-	 * If we weren't explicitly stopped, then we
-	 * exited in error, and should undo our state
-	 */
-	if ((!stop_master_sync) && (!stop_backup_sync))
-		ip_vs_sync_state -= tinfo->state;
+	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+	if (!tinfo)
+		goto outbuf;
 
-	set_sync_pid(state, 0);
-	IP_VS_INFO("sync thread stopped!\n");
+	tinfo->sock = sock;
+	tinfo->buf = buf;
 
-	set_fs(oldmm);
+	task = kthread_run(threadfn, tinfo, name);
+	if (IS_ERR(task)) {
+		result = PTR_ERR(task);
+		goto outtinfo;
+	}
 
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
+	/* mark as active */
+	*realtask = task;
+	ip_vs_sync_state |= state;
 
-	set_stop_sync(state, 0);
-	wake_up(&stop_sync_wait);
+	/* increase the module use count */
+	ip_vs_use_count_inc();
 
-	/*
-	 * we need to free the structure that was allocated
-	 * for us in start_sync_thread
-	 */
-	kfree(tinfo);
 	return 0;
-}
-
-
-static int fork_sync_thread(void *startup)
-{
-	pid_t pid;
-
-	/* fork the sync thread here, then the parent process of the
-	   sync thread is the init process after this thread exits. */
-  repeat:
-	if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
-		IP_VS_ERR("could not create sync_thread due to %d... "
-			  "retrying.\n", pid);
-		msleep_interruptible(1000);
-		goto repeat;
-	}
 
-	return 0;
+outtinfo:
+	kfree(tinfo);
+outbuf:
+	kfree(buf);
+outsocket:
+	sock_release(sock);
+out:
+	return result;
 }
 
 
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int stop_sync_thread(int state)
 {
-	DECLARE_COMPLETION_ONSTACK(startup);
-	pid_t pid;
-	struct ip_vs_sync_thread_data *tinfo;
-
-	if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
-	    (state == IP_VS_STATE_BACKUP && sync_backup_pid))
-		return -EEXIST;
-
-	/*
-	 * Note that tinfo will be freed in sync_thread on exit
-	 */
-	tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
-	if (!tinfo)
-		return -ENOMEM;
-
 	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
-		  sizeof(struct ip_vs_sync_conn));
 
-	ip_vs_sync_state |= state;
 	if (state == IP_VS_STATE_MASTER) {
-		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_master_mcast_ifn));
-		ip_vs_master_syncid = syncid;
-	} else {
-		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_backup_mcast_ifn));
-		ip_vs_backup_syncid = syncid;
-	}
-
-	tinfo->state = state;
-	tinfo->startup = &startup;
+		if (!sync_master_thread)
+			return -ESRCH;
 
-  repeat:
-	if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) {
-		IP_VS_ERR("could not create fork_sync_thread due to %d... "
-			  "retrying.\n", pid);
-		msleep_interruptible(1000);
-		goto repeat;
-	}
+		IP_VS_INFO("stopping master sync thread %d ...\n",
+			   task_pid_nr(sync_master_thread));
 
-	wait_for_completion(&startup);
-
-	return 0;
-}
-
-
-int stop_sync_thread(int state)
-{
-	DECLARE_WAITQUEUE(wait, current);
+		/*
+		 * The lock synchronizes with sb_queue_tail(), so that we don't
+		 * add sync buffers to the queue, when we are already in
+		 * progress of stopping the master sync daemon.
+		 */
 
-	if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
-	    (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
-		return -ESRCH;
+		spin_lock(&ip_vs_sync_lock);
+		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock(&ip_vs_sync_lock);
+		kthread_stop(sync_master_thread);
+		sync_master_thread = NULL;
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (!sync_backup_thread)
+			return -ESRCH;
+
+		IP_VS_INFO("stopping backup sync thread %d ...\n",
+			   task_pid_nr(sync_backup_thread));
+
+		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(sync_backup_thread);
+		sync_backup_thread = NULL;
+	} else {
+		return -EINVAL;
+	}
 
-	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-	IP_VS_INFO("stopping sync thread %d ...\n",
-		   (state == IP_VS_STATE_MASTER) ?
-		   sync_master_pid : sync_backup_pid);
-
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&stop_sync_wait, &wait);
-	set_stop_sync(state, 1);
-	ip_vs_sync_state -= state;
-	wake_up(&sync_wait);
-	schedule();
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&stop_sync_wait, &wait);
-
-	/* Note: no need to reap the sync thread, because its parent
-	   process is the init process */
-
-	if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
-	    (state == IP_VS_STATE_BACKUP && stop_backup_sync))
-		IP_VS_BUG();
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From ba6fd85021dec97d58373d9aea4bea8fc24258be Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 16 Jul 2008 11:13:56 +0000
Subject: ipvs: Put backup thread on mcast socket wait queue

Instead of doing an endless loop with sleeping for one second, we now put the
backup thread onto the mcast socket wait queue and it gets woken up as soon as
we have data to process.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 550563a5660..cf5ed758ea1 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -29,6 +29,7 @@
 #include <linux/udp.h>
 #include <linux/err.h>
 #include <linux/kthread.h>
+#include <linux/wait.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
@@ -772,6 +773,10 @@ static int sync_thread_backup(void *data)
 		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
 
 	while (!kthread_should_stop()) {
+		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+			 || kthread_should_stop());
+
 		/* do we have data now? */
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
@@ -787,8 +792,6 @@ static int sync_thread_backup(void *data)
 			ip_vs_process_message(tinfo->buf, len);
 			local_bh_enable();
 		}
-
-		msleep_interruptible(1000);
 	}
 
 	/* release the sending multicast socket */
-- 
cgit v1.2.3-70-g09d2


From 375c6bbabf210ded3151481ca1ac7f730903512b Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 16 Jul 2008 11:14:03 +0000
Subject: ipvs: Use schedule_timeout_interruptible() instead of
 msleep_interruptible()

So that kthread_stop() can wake up the thread and we don't have to wait one
second in the worst case for the daemon to actually stop.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index cf5ed758ea1..45e9bd96c28 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -742,7 +742,7 @@ static int sync_thread_master(void *data)
 			ip_vs_sync_buff_release(sb);
 		}
 
-		msleep_interruptible(1000);
+		schedule_timeout_interruptible(HZ);
 	}
 
 	/* clean up the sync_buff queue */
-- 
cgit v1.2.3-70-g09d2


From 9d3a0de7dc3c1c4456db5ceb92c445bef0a47681 Mon Sep 17 00:00:00 2001
From: "Rumen G. Bogdanovski" <rumen@voicecho.com>
Date: Wed, 16 Jul 2008 20:04:23 -0700
Subject: ipvs: More reliable synchronization on connection close

This patch enhances the synchronization of the closing connections
between the master and the backup director. It prevents the closed
connections to expire with the 15 min timeout of the ESTABLISHED
state on the backup and makes them expire as they would do on the
master with much shorter timeouts.

Signed-off-by: Rumen G. Bogdanovski <rumen@voicecho.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index bcf6276ba4b..a7879eafc3b 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -991,7 +991,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	       == sysctl_ip_vs_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE)))))
+	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
 		ip_vs_sync_conn(cp);
 	cp->old_state = cp->state;
 
-- 
cgit v1.2.3-70-g09d2


From 7b1c65faa27f5ade3915e4bbc9186b6e64d2d6ec Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 16 Jul 2008 20:12:30 -0700
Subject: net: make __skb_splice_bits static

net/core/skbuff.c:1335:5: warning: symbol '__skb_splice_bits' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 48a43727d12..e4115672b6c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1332,7 +1332,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * Map linear and fragment data from the skb to spd. It reports failure if the
  * pipe is full or if we already spliced the requested length.
  */
-int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		      unsigned int *len,
 		      struct splice_pipe_desc *spd)
 {
-- 
cgit v1.2.3-70-g09d2


From 70efce27fc3d54271519244dc5e47da4ed711dd4 Mon Sep 17 00:00:00 2001
From: Will Newton <will.newton@gmail.com>
Date: Wed, 16 Jul 2008 20:13:43 -0700
Subject: net/ipv4/tcp.c: Fix use of PULLHUP instead of POLLHUP in comments.

Change PULLHUP to POLLHUP in tcp_poll comments and clean up another
comment for grammar and coding style.

Signed-off-by: Will Newton <will.newton@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 56a133c6145..631133ebb20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -344,8 +344,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		return inet_csk_listen_poll(sk);
 
 	/* Socket is not locked. We are protected from async events
-	   by poll logic and correct handling of state changes
-	   made by another threads is impossible in any case.
+	 * by poll logic and correct handling of state changes
+	 * made by other threads is impossible in any case.
 	 */
 
 	mask = 0;
@@ -371,10 +371,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
 	 * if and only if shutdown has been made in both directions.
 	 * Actually, it is interesting to look how Solaris and DUX
-	 * solve this dilemma. I would prefer, if PULLHUP were maskable,
+	 * solve this dilemma. I would prefer, if POLLHUP were maskable,
 	 * then we could set it on SND_SHUTDOWN. BTW examples given
 	 * in Stevens' books assume exactly this behaviour, it explains
-	 * why PULLHUP is incompatible with POLLOUT.	--ANK
+	 * why POLLHUP is incompatible with POLLOUT.	--ANK
 	 *
 	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
 	 * blocking on fresh not-connected or disconnected socket. --ANK
-- 
cgit v1.2.3-70-g09d2


From 84a3aa000eacbaf841d745b07ef3a3280899056b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:19:08 -0700
Subject: ipv4: prepare net initialization for IP accounting

Some places, that deal with IP statistics already have where to
get a struct net from, but use it directly, without declaring
a separate variable on the stack.

So, save this net on the stack for future IP_XXX_STATS macros.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 3 ++-
 net/ipv4/ip_fragment.c          | 6 +++---
 net/ipv4/udp.c                  | 4 +++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5bbf0005151..8338e106665 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -338,9 +338,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 			    .uli_u = { .ports =
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
+	struct net *net = sock_net(sk);
 
 	security_req_classify_flow(req, &fl);
-	if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
+	if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fbd5804b5d8..e23be5b36e1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -187,8 +187,10 @@ static void ip_evictor(struct net *net)
 static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp;
+	struct net *net;
 
 	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+	net = container_of(qp->q.net, struct net, ipv4.frags);
 
 	spin_lock(&qp->q.lock);
 
@@ -202,9 +204,7 @@ static void ip_expire(unsigned long arg)
 
 	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
 		struct sk_buff *head = qp->q.fragments;
-		struct net *net;
 
-		net = container_of(qp->q.net, struct net, ipv4.frags);
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
 			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -570,9 +570,9 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	struct ipq *qp;
 	struct net *net;
 
+	net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
 
-	net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
 	/* Start by cleaning up the memory. */
 	if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
 		ip_evictor(net);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fdd4faa1f12..048ef57edc1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -656,8 +656,10 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .uli_u = { .ports =
 					       { .sport = inet->sport,
 						 .dport = dport } } };
+		struct net *net = sock_net(sk);
+
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
+		err = ip_route_output_flow(net, &rt, &fl, sk, 1);
 		if (err) {
 			if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-- 
cgit v1.2.3-70-g09d2


From 5e38e270444f2629de7a706b5a9ca1b333d14517 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:19:49 -0700
Subject: mib: add net to IP_INC_STATS

All the callers already have either the net itself, or the place
where to get it from.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      |  2 +-
 net/ipv4/ip_forward.c |  2 +-
 net/ipv4/ip_output.c  | 30 +++++++++++++++---------------
 net/ipv4/raw.c        |  2 +-
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 673ecdbe72f..b9aaa32e475 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -157,7 +157,7 @@ struct ipv4_config
 
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
+#define IP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(ip_statistics, field); } while (0)
 #define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
 #define IP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index da14725916d..7f78a5a7e1e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -88,7 +88,7 @@ int ip_forward(struct sk_buff *skb)
 
 	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
 		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
-		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(dst_mtu(&rt->u.dst)));
 		goto drop;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f003186b93b..465544f6281 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -182,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 
 	if (rt->rt_type == RTN_MULTICAST)
-		IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
 	else if (rt->rt_type == RTN_BROADCAST)
-		IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
 
 	/* Be paranoid, rather than too clever. */
 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -244,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb)
 	/*
 	 *	If the indicated interface is up and running, send the packet.
 	 */
-	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -298,7 +298,7 @@ int ip_output(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dst->dev;
 
-	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -389,7 +389,7 @@ packet_routed:
 	return ip_local_out(skb);
 
 no_route:
-	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
@@ -451,7 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	iph = ip_hdr(skb);
 
 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
-		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(ip_skb_dst_mtu(skb)));
 		kfree_skb(skb);
@@ -542,7 +542,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 			err = output(skb);
 
 			if (!err)
-				IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+				IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
 			if (err || !frag)
 				break;
 
@@ -552,7 +552,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 		}
 
 		if (err == 0) {
-			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+			IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
 			return 0;
 		}
 
@@ -561,7 +561,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 			kfree_skb(frag);
 			frag = skb;
 		}
-		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		return err;
 	}
 
@@ -673,15 +673,15 @@ slow_path:
 		if (err)
 			goto fail;
 
-		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
 	}
 	kfree_skb(skb);
-	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
 	return err;
 
 fail:
 	kfree_skb(skb);
-	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 	return err;
 }
 
@@ -1047,7 +1047,7 @@ alloc_new_skb:
 
 error:
 	inet->cork.length -= length;
-	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
@@ -1189,7 +1189,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 
 error:
 	inet->cork.length -= size;
-	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
@@ -1298,7 +1298,7 @@ out:
 	return err;
 
 error:
-	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
 	goto out;
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7f39ea443ec..cd975743bcd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -385,7 +385,7 @@ error_fault:
 	err = -EFAULT;
 	kfree_skb(skb);
 error:
-	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7c73a6faffae0bfae70639113aecf06af666e714 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:20:11 -0700
Subject: mib: add net to IP_INC_STATS_BH

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h                |  2 +-
 net/dccp/ipv4.c                 |  2 +-
 net/ipv4/datagram.c             |  2 +-
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/ip_forward.c           |  4 ++--
 net/ipv4/ip_fragment.c          | 17 ++++++++++-------
 net/ipv4/ip_input.c             | 30 ++++++++++++++++--------------
 net/ipv4/ipmr.c                 |  4 ++--
 net/ipv4/route.c                |  3 ++-
 net/ipv4/tcp_ipv4.c             |  2 +-
 net/ipv4/udp.c                  |  2 +-
 net/sctp/output.c               |  3 ++-
 12 files changed, 41 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index b9aaa32e475..9973ce08d8f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -158,7 +158,7 @@ struct ipv4_config
 extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 #define IP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(ip_statistics, field); } while (0)
-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
+#define IP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(ip_statistics, field); } while (0)
 #define IP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2b03c47cab1..9f760a1e312 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -465,7 +465,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 
 	security_skb_classify_flow(skb, &fl);
 	if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
-		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
 
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0c0c73f368c..5e6c5a0f3fd 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			       inet->sport, usin->sin_port, sk, 1);
 	if (err) {
 		if (err == -ENETUNREACH)
-			IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 		return err;
 	}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8338e106665..bb81c958b74 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -342,12 +342,12 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 
 	security_req_classify_flow(req, &fl);
 	if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
-		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
 		ip_rt_put(rt);
-		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
 	return &rt->u.dst;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 7f78a5a7e1e..450016b89a1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
@@ -123,7 +123,7 @@ sr_failed:
 
 too_many_hops:
 	/* Tell the sender its packet died... */
-	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
 	icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e23be5b36e1..65364c06ada 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -199,8 +199,8 @@ static void ip_expire(unsigned long arg)
 
 	ipq_kill(qp);
 
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 
 	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
 		struct sk_buff *head = qp->q.fragments;
@@ -261,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp)
 	rc = qp->q.fragments && (end - start) > max;
 
 	if (rc) {
-		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+		struct net *net;
+
+		net = container_of(qp->q.net, struct net, ipv4.frags);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 	}
 
 	return rc;
@@ -545,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
 	return 0;
 
@@ -560,7 +563,7 @@ out_oversize:
 			"Oversized IP packet from " NIPQUAD_FMT ".\n",
 			NIPQUAD(qp->saddr));
 out_fail:
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS);
 	return err;
 }
 
@@ -571,7 +574,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	struct net *net;
 
 	net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
 	if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
@@ -590,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 		return ret;
 	}
 
-	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -ENOMEM;
 }
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7c26428ea67..043f640df4b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -230,16 +230,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 				protocol = -ret;
 				goto resubmit;
 			}
-			IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+			IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
 		} else {
 			if (!raw) {
 				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-					IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+					IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
 					icmp_send(skb, ICMP_DEST_UNREACH,
 						  ICMP_PROT_UNREACH, 0);
 				}
 			} else
-				IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+				IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
 			kfree_skb(skb);
 		}
 	}
@@ -281,7 +281,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 					      --ANK (980813)
 	*/
 	if (skb_cow(skb, skb_headroom(skb))) {
-		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -290,7 +290,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
 
 	if (ip_options_compile(dev_net(dev), opt, skb)) {
-		IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
@@ -334,9 +334,11 @@ static int ip_rcv_finish(struct sk_buff *skb)
 					 skb->dev);
 		if (unlikely(err)) {
 			if (err == -EHOSTUNREACH)
-				IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+				IP_INC_STATS_BH(dev_net(skb->dev),
+						IPSTATS_MIB_INADDRERRORS);
 			else if (err == -ENETUNREACH)
-				IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+				IP_INC_STATS_BH(dev_net(skb->dev),
+						IPSTATS_MIB_INNOROUTES);
 			goto drop;
 		}
 	}
@@ -357,9 +359,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 	rt = skb->rtable;
 	if (rt->rt_type == RTN_MULTICAST)
-		IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+		IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
 	else if (rt->rt_type == RTN_BROADCAST)
-		IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+		IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
 
 	return dst_input(skb);
 
@@ -382,10 +384,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto drop;
 
-	IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
 		goto out;
 	}
 
@@ -418,7 +420,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
-		IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	} else if (len < (iph->ihl*4))
 		goto inhdr_error;
@@ -428,7 +430,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	 * Note this now means skb->len holds ntohs(iph->tot_len).
 	 */
 	if (pskb_trim_rcsum(skb, len)) {
-		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -439,7 +441,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		       ip_rcv_finish);
 
 inhdr_error:
-	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
 drop:
 	kfree_skb(skb);
 out:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c9ab47b966b..033c712c3a5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1178,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
@@ -1241,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 		   to blackhole.
 		 */
 
-		IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		ip_rt_put(rt);
 		goto out_free;
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 79c1e74263a..e4ab0ac94f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1429,7 +1429,8 @@ static int ip_error(struct sk_buff *skb)
 			break;
 		case ENETUNREACH:
 			code = ICMP_NET_UNREACH;
-			IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+			IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+					IPSTATS_MIB_INNOROUTES);
 			break;
 		case EACCES:
 			code = ICMP_PKT_FILTERED;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0fefd440941..7797d528701 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -175,7 +175,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			       inet->sport, usin->sin_port, sk, 1);
 	if (tmp < 0) {
 		if (tmp == -ENETUNREACH)
-			IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 		return tmp;
 	}
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 048ef57edc1..1560d11ba6e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -662,7 +662,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		err = ip_route_output_flow(net, &rt, &fl, sk, 1);
 		if (err) {
 			if (err == -ENETUNREACH)
-				IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+				IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 			goto out;
 		}
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index abcd00dc05e..9a63a3fb901 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -50,6 +50,7 @@
 #include <linux/init.h>
 #include <net/inet_ecn.h>
 #include <net/icmp.h>
+#include <net/net_namespace.h>
 
 #ifndef TEST_FRAME
 #include <net/tcp.h>
@@ -595,7 +596,7 @@ out:
 	return err;
 no_route:
 	kfree_skb(nskb);
-	IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+	IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
 
 	/* FIXME: Returning the 'err' will effect all the associations
 	 * associated with a socket, although only one of the paths of the
-- 
cgit v1.2.3-70-g09d2


From c5346fe396f5e22bbfb3ec037c43891c3c57d3e6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:20:33 -0700
Subject: mib: add net to IP_ADD_STATS_BH

Very simple - only ip_evictor (fragments) requires such.
This patch ends up the IP_XXX_STATS patching.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       | 2 +-
 net/ipv4/ip_fragment.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 9973ce08d8f..93d0e093ff4 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -159,7 +159,7 @@ extern struct ipv4_config ipv4_config;
 DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 #define IP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(ip_statistics, field); } while (0)
 #define IP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(ip_statistics, field); } while (0)
-#define IP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ip_statistics, field, val)
+#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
 #define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 65364c06ada..38d38f05801 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -178,7 +178,7 @@ static void ip_evictor(struct net *net)
 
 	evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
 	if (evicted)
-		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
+		IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a86b1e3019455283a677c2485cfeda2dc36df3eb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:20:58 -0700
Subject: inet: prepare struct net for TCP MIB accounting

This is the same as the first patch in the set, but preparing
the net for TCP_XXX_STATS - save the struct net on the stack
where required and possible.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 10 +++++++---
 net/ipv6/tcp_ipv6.c |  3 ++-
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7797d528701..db3bf9be076 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -544,6 +544,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
 #endif
+	struct net *net;
 
 	/* Never send a reset in response to a reset. */
 	if (th->rst)
@@ -594,7 +595,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
-	ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
+	net = dev_net(skb->dst->dev);
+	ip_send_reply(net->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
 
 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
@@ -619,6 +621,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			];
 	} rep;
 	struct ip_reply_arg arg;
+	struct net *net = dev_net(skb->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
@@ -668,7 +671,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 	if (oif)
 		arg.bound_dev_if = oif;
 
-	ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
+	ip_send_reply(net->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
 
 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
@@ -1505,6 +1508,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto discard_it;
@@ -1539,7 +1543,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags	 = iph->tos;
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
-	sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
+	sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
 			th->source, iph->daddr, th->dest, inet_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 30dbab7cc3c..fc5f7166208 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1617,6 +1617,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto discard_it;
@@ -1648,7 +1649,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo,
+	sk = __inet6_lookup(net, &tcp_hashinfo,
 			&ipv6_hdr(skb)->saddr, th->source,
 			&ipv6_hdr(skb)->daddr, ntohs(th->dest),
 			inet6_iif(skb));
-- 
cgit v1.2.3-70-g09d2


From a9c19329eccdb145a08a4a2e969d7b40c54c9bcc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:21:42 -0700
Subject: tcp: add net to tcp_mib_init

This one sets TCP MIBs after zeroing them, and thus requires
the net.

The existing single caller can use init_net (temporarily).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h  | 2 +-
 net/ipv4/af_inet.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 048f8de724c..b9d0ba618b6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1024,7 +1024,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 
 #define TCP_CHECK_TIMER(sk) do { } while (0)
 
-static inline void tcp_mib_init(void)
+static inline void tcp_mib_init(struct net *net)
 {
 	/* See RFC 2012 */
 	TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dc411335c14..95a966dd191 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1363,7 +1363,7 @@ static int __init init_ipv4_mibs(void)
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 
-	tcp_mib_init();
+	tcp_mib_init(&init_net);
 
 	return 0;
 
-- 
cgit v1.2.3-70-g09d2


From 81cc8a75d944fa39fc333c2c329c8e8b3c62cada Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:22:04 -0700
Subject: mib: add net to TCP_INC_STATS

Fortunately (almost) all the TCP code has a sock to get the net from :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/tcp.c        |  4 ++--
 net/ipv4/tcp_output.c | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b9d0ba618b6..15065157afc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -266,7 +266,7 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num)
 extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
+#define TCP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(tcp_statistics, field); } while (0)
 #define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
 #define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
 #define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 631133ebb20..aa79198fb02 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1668,12 +1668,12 @@ void tcp_set_state(struct sock *sk, int state)
 	switch (state) {
 	case TCP_ESTABLISHED:
 		if (oldstate != TCP_ESTABLISHED)
-			TCP_INC_STATS(TCP_MIB_CURRESTAB);
+			TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
 		break;
 
 	case TCP_CLOSE:
 		if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
-			TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+			TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
 
 		sk->sk_prot->unhash(sk);
 		if (inet_csk(sk)->icsk_bind_hash &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index edef2afe905..c3b0da82698 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -618,7 +618,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_event_data_sent(tp, skb, sk);
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
-		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+		TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (likely(err <= 0))
@@ -1910,7 +1910,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
-		TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
 
 		tp->total_retrans++;
 
@@ -2132,7 +2132,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 
-	TCP_INC_STATS(TCP_MIB_OUTRSTS);
+	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
 }
 
 /* WARNING: This routine must only be called when we have already sent
@@ -2258,7 +2258,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			      );
 
 	th->doff = (tcp_header_size >> 2);
-	TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
@@ -2367,7 +2367,7 @@ int tcp_connect(struct sock *sk)
 	 */
 	tp->snd_nxt = tp->write_seq;
 	tp->pushed_seq = tp->write_seq;
-	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
+	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-- 
cgit v1.2.3-70-g09d2


From 63231bddf6514778792d3784f63822473d250fc0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:22:25 -0700
Subject: mib: add net to TCP_INC_STATS_BH

Same as before - the sock is always there to get the net from,
but there are also some places with the net already saved on
the stack.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  2 +-
 net/ipv4/tcp.c           |  2 +-
 net/ipv4/tcp_input.c     |  6 +++---
 net/ipv4/tcp_ipv4.c      | 14 +++++++-------
 net/ipv4/tcp_minisocks.c |  4 ++--
 net/ipv6/tcp_ipv6.c      | 14 +++++++-------
 6 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 15065157afc..8a2cd076be8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -267,7 +267,7 @@ extern struct proto tcp_prot;
 
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 #define TCP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(tcp_statistics, field); } while (0)
-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
+#define TCP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(tcp_statistics, field); } while (0)
 #define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
 #define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index aa79198fb02..f742f84026b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2663,7 +2663,7 @@ EXPORT_SYMBOL(__tcp_put_md5sig_pool);
 void tcp_done(struct sock *sk)
 {
 	if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-		TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
 	tcp_set_state(sk, TCP_CLOSE);
 	tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6ea970a151..01e8004911b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4795,7 +4795,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tcp_data_snd_check(sk);
 				return 0;
 			} else { /* Header too small */
-				TCP_INC_STATS_BH(TCP_MIB_INERRS);
+				TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 				goto discard;
 			}
 		} else {
@@ -4934,7 +4934,7 @@ slow_path:
 	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
 	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
 		tcp_reset(sk);
 		return 1;
@@ -4957,7 +4957,7 @@ step5:
 	return 0;
 
 csum_error:
-	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 
 discard:
 	__kfree_skb(skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db3bf9be076..e876312b950 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -599,8 +599,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	ip_send_reply(net->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
 
-	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 }
 
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
@@ -674,7 +674,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 	ip_send_reply(net->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
 
-	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 }
 
 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1494,7 +1494,7 @@ discard:
 	return 0;
 
 csum_err:
-	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 }
 
@@ -1514,7 +1514,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		goto discard_it;
 
 	/* Count it even if it's bad */
-	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
 
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
@@ -1590,7 +1590,7 @@ no_tcp_socket:
 
 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
 bad_packet:
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
 		tcp_v4_send_reset(NULL, skb);
 	}
@@ -1611,7 +1611,7 @@ do_time_wait:
 	}
 
 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 		inet_twsk_put(inet_twsk(sk));
 		goto discard_it;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ea68a478fad..8b02b103996 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -480,7 +480,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rx_opt.mss_clamp = req->mss;
 		TCP_ECN_openreq_child(newtp, req);
 
-		TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
+		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;
 }
@@ -630,7 +630,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		 *	   "fourth, check the SYN bit"
 		 */
 		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
-			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 			goto embryonic_reset;
 		}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fc5f7166208..3895d91ea9f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1004,8 +1004,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
-			TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-			TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+			TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 			return;
 		}
 	}
@@ -1089,7 +1089,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
-			TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 			return;
 		}
 	}
@@ -1579,7 +1579,7 @@ discard:
 	kfree_skb(skb);
 	return 0;
 csum_err:
-	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 
 
@@ -1625,7 +1625,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	/*
 	 *	Count it even if it's bad.
 	 */
-	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
 
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
@@ -1697,7 +1697,7 @@ no_tcp_socket:
 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 bad_packet:
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
 		tcp_v6_send_reset(NULL, skb);
 	}
@@ -1722,7 +1722,7 @@ do_time_wait:
 	}
 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 		inet_twsk_put(inet_twsk(sk));
 		goto discard_it;
 	}
-- 
cgit v1.2.3-70-g09d2


From 74688e487a407a33d42879957b478601aca616b8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:22:46 -0700
Subject: mib: add net to TCP_DEC_STATS

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 net/ipv4/tcp.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8a2cd076be8..92d87691aa6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -268,7 +268,7 @@ extern struct proto tcp_prot;
 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 #define TCP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(tcp_statistics, field); } while (0)
 #define TCP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(tcp_statistics, field); } while (0)
-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
+#define TCP_DEC_STATS(net, field)	do { (void)net; SNMP_DEC_STATS(tcp_statistics, field); } while (0)
 #define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f742f84026b..525dcf53415 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1682,7 +1682,7 @@ void tcp_set_state(struct sock *sk, int state)
 		/* fall through */
 	default:
 		if (oldstate==TCP_ESTABLISHED)
-			TCP_DEC_STATS(TCP_MIB_CURRESTAB);
+			TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
 	}
 
 	/* Change state AFTER socket is unhashed to avoid closed
-- 
cgit v1.2.3-70-g09d2


From 5c52ba170f8167511bdb65b981f4582100c40675 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:28:10 -0700
Subject: sock: add net to prot->enter_memory_pressure callback

The tcp_enter_memory_pressure calls NET_INC_STATS, but doesn't
have where to get the net from.

I decided to add a sk argument, not the net itself, only to factor
all the required sock_net(sk) calls inside the enter_memory_pressure
callback itself.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 4 ++--
 include/net/tcp.h      | 2 +-
 net/core/sock.c        | 2 +-
 net/decnet/af_decnet.c | 2 +-
 net/ipv4/tcp.c         | 4 ++--
 net/sctp/socket.c      | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 3f4897ab432..06c5259aff3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -565,7 +565,7 @@ struct proto {
 #endif
 
 	/* Memory pressure */
-	void			(*enter_memory_pressure)(void);
+	void			(*enter_memory_pressure)(struct sock *sk);
 	atomic_t		*memory_allocated;	/* Current allocated memory. */
 	atomic_t		*sockets_allocated;	/* Current number of sockets. */
 	/*
@@ -1210,7 +1210,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
 
 	page = alloc_pages(sk->sk_allocation, 0);
 	if (!page) {
-		sk->sk_prot->enter_memory_pressure();
+		sk->sk_prot->enter_memory_pressure(sk);
 		sk_stream_moderate_sndbuf(sk);
 	}
 	return page;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4d788181654..c25cb5278b9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -975,7 +975,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->rmt_port = tcp_hdr(skb)->source;
 }
 
-extern void tcp_enter_memory_pressure(void);
+extern void tcp_enter_memory_pressure(struct sock *sk);
 
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c0ba52e530..10a64d57078 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1442,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	/* Under pressure. */
 	if (allocated > prot->sysctl_mem[1])
 		if (prot->enter_memory_pressure)
-			prot->enter_memory_pressure();
+			prot->enter_memory_pressure(sk);
 
 	/* Over hard limit. */
 	if (allocated > prot->sysctl_mem[2])
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 931bdf9cb75..61b7df577dd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -451,7 +451,7 @@ static void dn_destruct(struct sock *sk)
 
 static int dn_memory_pressure;
 
-static void dn_enter_memory_pressure(void)
+static void dn_enter_memory_pressure(struct sock *sk)
 {
 	if (!dn_memory_pressure) {
 		dn_memory_pressure = 1;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 525dcf53415..bc8559a6f7e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -316,7 +316,7 @@ int tcp_memory_pressure __read_mostly;
 
 EXPORT_SYMBOL(tcp_memory_pressure);
 
-void tcp_enter_memory_pressure(void)
+void tcp_enter_memory_pressure(struct sock *sk)
 {
 	if (!tcp_memory_pressure) {
 		NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
@@ -649,7 +649,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 		}
 		__kfree_skb(skb);
 	} else {
-		sk->sk_prot->enter_memory_pressure();
+		sk->sk_prot->enter_memory_pressure(sk);
 		sk_stream_moderate_sndbuf(sk);
 	}
 	return NULL;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index df5572c39f0..6aba01b0ce4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -116,7 +116,7 @@ static int sctp_memory_pressure;
 static atomic_t sctp_memory_allocated;
 static atomic_t sctp_sockets_allocated;
 
-static void sctp_enter_memory_pressure(void)
+static void sctp_enter_memory_pressure(struct sock *sk)
 {
 	sctp_memory_pressure = 1;
 }
-- 
cgit v1.2.3-70-g09d2


From ca12a1a443a51298afcca627ad0bcbd8ad1dcddc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:28:42 -0700
Subject: inet: prepare net on the stack for NET accounting macros

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c     | 3 ++-
 net/ipv4/arp.c      | 3 ++-
 net/ipv6/tcp_ipv6.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index eec3c471789..83cc9bbc620 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -93,8 +93,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct sock *sk;
 	int err;
 	__u64 seq;
+	struct net *net = dev_net(skb->dev);
 
-	sk = inet6_lookup(dev_net(skb->dev), &dccp_hashinfo,
+	sk = inet6_lookup(net, &dccp_hashinfo,
 			&hdr->daddr, dh->dccph_dport,
 			&hdr->saddr, dh->dccph_sport, inet6_iif(skb));
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 29df75a6bcc..aab98b8a994 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -421,8 +421,9 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	struct rtable *rt;
 	int flag = 0;
 	/*unsigned long now; */
+	struct net *net = dev_net(dev);
 
-	if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
+	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
 	if (rt->u.dst.dev != dev) {
 		NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3895d91ea9f..d58b83ac06f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -323,8 +323,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	int err;
 	struct tcp_sock *tp;
 	__u32 seq;
+	struct net *net = dev_net(skb->dev);
 
-	sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr,
+	sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
 			th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 
 	if (sk == NULL) {
-- 
cgit v1.2.3-70-g09d2


From 1ed834655a0d42ecd80ff051e681e2ea44747b6c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:29:51 -0700
Subject: tcp: replace tcp_sock argument with sock in some places

These places have a tcp_sock, but we'd prefer the sock itself to
get net from it. Fortunately, tcp_sk macro is just a type cast, so
this replace is really cheap.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 01e8004911b..f50d8433f04 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1169,10 +1169,11 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 		tp->lost_retrans_low = new_low_seq;
 }
 
-static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
 			   struct tcp_sack_block_wire *sp, int num_sacks,
 			   u32 prior_snd_una)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
 	u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
 	int dup_sack = 0;
@@ -1434,7 +1435,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 		tcp_highest_sack_reset(sk);
 	}
 
-	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
+	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
 					 num_sacks, prior_snd_una);
 	if (found_dup_sack)
 		flag |= FLAG_DSACKING_ACK;
@@ -3711,8 +3712,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 	return 0;
 }
 
-static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		int mib_idx;
 
@@ -3731,10 +3734,12 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
 	}
 }
 
-static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!tp->rx_opt.dsack)
-		tcp_dsack_set(tp, seq, end_seq);
+		tcp_dsack_set(sk, seq, end_seq);
 	else
 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
@@ -3753,7 +3758,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
 				end_seq = tp->rcv_nxt;
-			tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+			tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
 		}
 	}
 
@@ -3906,7 +3911,7 @@ static void tcp_ofo_queue(struct sock *sk)
 			__u32 dsack = dsack_high;
 			if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
 				dsack_high = TCP_SKB_CB(skb)->end_seq;
-			tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+			tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
 		}
 
 		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
@@ -4035,7 +4040,7 @@ queue_and_out:
 	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
 		/* A retransmit, 2nd most common case.  Force an immediate ack. */
 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
-		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
 out_of_window:
 		tcp_enter_quickack_mode(sk);
@@ -4057,7 +4062,7 @@ drop:
 			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
 			   TCP_SKB_CB(skb)->end_seq);
 
-		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
+		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
 
 		/* If window is closed, drop tail of packet. But after
 		 * remembering D-SACK for its head made in previous line.
@@ -4122,12 +4127,12 @@ drop:
 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
 				/* All the bits are present. Drop. */
 				__kfree_skb(skb);
-				tcp_dsack_set(tp, seq, end_seq);
+				tcp_dsack_set(sk, seq, end_seq);
 				goto add_sack;
 			}
 			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
 				/* Partial overlap. */
-				tcp_dsack_set(tp, seq,
+				tcp_dsack_set(sk, seq,
 					      TCP_SKB_CB(skb1)->end_seq);
 			} else {
 				skb1 = skb1->prev;
@@ -4140,12 +4145,12 @@ drop:
 		       (struct sk_buff *)&tp->out_of_order_queue &&
 		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
 			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-				tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+				tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 						 end_seq);
 				break;
 			}
 			__skb_unlink(skb1, &tp->out_of_order_queue);
-			tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 					 TCP_SKB_CB(skb1)->end_seq);
 			__kfree_skb(skb1);
 		}
-- 
cgit v1.2.3-70-g09d2


From 4e6734447dbc7a0a85e09616821c0782d9fb1141 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:30:14 -0700
Subject: mib: add net to NET_INC_STATS

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 2 +-
 net/ipv4/tcp.c        | 2 +-
 net/ipv4/tcp_output.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 93d0e093ff4..b42a4349985 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -161,7 +161,7 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 #define IP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(ip_statistics, field); } while (0)
 #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
+#define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
 #define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
 #define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
 #define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bc8559a6f7e..85f08291e92 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -319,7 +319,7 @@ EXPORT_SYMBOL(tcp_memory_pressure);
 void tcp_enter_memory_pressure(struct sock *sk)
 {
 	if (!tcp_memory_pressure) {
-		NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
 		tcp_memory_pressure = 1;
 	}
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c3b0da82698..176f0702b8a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2119,7 +2119,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* NOTE: No TCP options attached and we never retransmit this. */
 	skb = alloc_skb(MAX_TCP_HEADER, priority);
 	if (!skb) {
-		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
 		return;
 	}
 
@@ -2130,7 +2130,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Send it off. */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
-		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
 
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
 }
-- 
cgit v1.2.3-70-g09d2


From de0744af1fe2d0a3d428f6af0f2fe1f6179b1a9c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:31:16 -0700
Subject: mib: add net to NET_INC_STATS_BH

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h            |  2 +-
 include/net/tcp.h           |  2 +-
 net/dccp/ipv4.c             | 10 +++----
 net/dccp/ipv6.c             |  8 +++---
 net/dccp/timer.c            |  4 +--
 net/ipv4/arp.c              |  2 +-
 net/ipv4/inet_hashtables.c  |  4 +--
 net/ipv4/syncookies.c       |  6 ++---
 net/ipv4/tcp.c              |  6 +++--
 net/ipv4/tcp_input.c        | 65 +++++++++++++++++++++++----------------------
 net/ipv4/tcp_ipv4.c         | 12 ++++-----
 net/ipv4/tcp_minisocks.c    |  6 ++---
 net/ipv4/tcp_output.c       |  4 +--
 net/ipv4/tcp_timer.c        | 12 ++++-----
 net/ipv6/inet6_hashtables.c |  4 +--
 net/ipv6/syncookies.c       |  6 ++---
 net/ipv6/tcp_ipv6.c         | 10 +++----
 net/sctp/input.c            |  2 +-
 18 files changed, 84 insertions(+), 81 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index b42a4349985..21df167a8e5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -162,7 +162,7 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
+#define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
 #define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
 #define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c25cb5278b9..60e5be8b925 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -894,7 +894,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 
 			while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
 				sk->sk_backlog_rcv(sk, skb1);
-				NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED);
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED);
 			}
 
 			tp->ucopy.memory = 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9f760a1e312..2622ace17c4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -230,7 +230,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	 * servers this needs to be solved differently.
 	 */
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == DCCP_CLOSED)
 		goto out;
@@ -239,7 +239,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	seq = dccp_hdr_seq(dh);
 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
 	    !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
-		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
 
@@ -286,7 +286,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		BUG_TRAP(!req->sk);
 
 		if (seq != dccp_rsk(req)->dreq_iss) {
-			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
 		/*
@@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 	return newsk;
 
 exit_overflow:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 exit:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	dst_release(dst);
 	return NULL;
 }
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 83cc9bbc620..b74e8b2cbe5 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -111,7 +111,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == DCCP_CLOSED)
 		goto out;
@@ -189,7 +189,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		BUG_TRAP(req->sk == NULL);
 
 		if (seq != dccp_rsk(req)->dreq_iss) {
-			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
 
@@ -630,9 +630,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	return newsk;
 
 out_overflow:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 out:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	if (opt != NULL && opt != np->opt)
 		sock_kfree_s(sk, opt, opt->tot_len);
 	dst_release(dst);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8703a792b56..3608d5342ca 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -224,7 +224,7 @@ static void dccp_delack_timer(unsigned long data)
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
 		icsk->icsk_ack.blocked = 1;
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		sk_reset_timer(sk, &icsk->icsk_delack_timer,
 			       jiffies + TCP_DELACK_MIN);
 		goto out;
@@ -254,7 +254,7 @@ static void dccp_delack_timer(unsigned long data)
 			icsk->icsk_ack.ato = TCP_ATO_MIN;
 		}
 		dccp_send_ack(sk);
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
 	}
 out:
 	bh_unlock_sock(sk);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index aab98b8a994..b043eda60b0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -426,7 +426,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
 	if (rt->u.dst.dev != dev) {
-		NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
+		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
 		flag = 1;
 	}
 	ip_rt_put(rt);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index eca5899729e..115f53722d2 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -312,11 +312,11 @@ unique:
 
 	if (twp) {
 		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 	} else if (tw) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fdde2ae07e2..51bc24d3b8a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,7 +173,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 		;
 	*mssp = msstab[mssind] + 1;
 
-	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
 	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
 				     th->source, th->dest, ntohl(th->seq),
@@ -269,11 +269,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
 	    (mss = cookie_check(skb, cookie)) == 0) {
-		NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
 	}
 
-	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 85f08291e92..9e0e45c3780 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1871,7 +1871,8 @@ adjudge_to_death:
 		if (tp->linger2 < 0) {
 			tcp_set_state(sk, TCP_CLOSE);
 			tcp_send_active_reset(sk, GFP_ATOMIC);
-			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+			NET_INC_STATS_BH(sock_net(sk),
+					LINUX_MIB_TCPABORTONLINGER);
 		} else {
 			const int tmo = tcp_fin_time(sk);
 
@@ -1893,7 +1894,8 @@ adjudge_to_death:
 				       "sockets\n");
 			tcp_set_state(sk, TCP_CLOSE);
 			tcp_send_active_reset(sk, GFP_ATOMIC);
-			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+			NET_INC_STATS_BH(sock_net(sk),
+					LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f50d8433f04..fac49a6e161 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -961,7 +961,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 		else
 			mib_idx = LINUX_MIB_TCPSACKREORDER;
 
-		NET_INC_STATS_BH(mib_idx);
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 #if FASTRETRANS_DEBUG > 1
 		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
 		       tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1157,7 +1157,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 				tp->lost_out += tcp_skb_pcount(skb);
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			}
-			NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
 		} else {
 			if (before(ack_seq, new_low_seq))
 				new_low_seq = ack_seq;
@@ -1181,7 +1181,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
 	if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
 		dup_sack = 1;
 		tcp_dsack_seen(tp);
-		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
 	} else if (num_sacks > 1) {
 		u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
 		u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1190,7 +1190,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
 		    !before(start_seq_0, start_seq_1)) {
 			dup_sack = 1;
 			tcp_dsack_seen(tp);
-			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+			NET_INC_STATS_BH(sock_net(sk),
+					LINUX_MIB_TCPDSACKOFORECV);
 		}
 	}
 
@@ -1476,7 +1477,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 				mib_idx = LINUX_MIB_TCPSACKDISCARD;
 			}
 
-			NET_INC_STATS_BH(mib_idx);
+			NET_INC_STATS_BH(sock_net(sk), mib_idx);
 			if (i == 0)
 				first_sack_index = -1;
 			continue;
@@ -1969,7 +1970,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
 {
 	if (flag & FLAG_SACK_RENEGING) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
-		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
 
 		tcp_enter_loss(sk, 1);
 		icsk->icsk_retransmits++;
@@ -2401,7 +2402,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
 		else
 			mib_idx = LINUX_MIB_TCPFULLUNDO;
 
-		NET_INC_STATS_BH(mib_idx);
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 		tp->undo_marker = 0;
 	}
 	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
@@ -2424,7 +2425,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
 		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwr(sk, 1);
 		tp->undo_marker = 0;
-		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
 	}
 }
 
@@ -2447,7 +2448,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 
 		DBGUNDO(sk, "Hoe");
 		tcp_undo_cwr(sk, 0);
-		NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
 
 		/* So... Do not make Hoe's retransmit yet.
 		 * If the first packet was delayed, the rest
@@ -2476,7 +2477,7 @@ static int tcp_try_undo_loss(struct sock *sk)
 		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
 		tcp_undo_cwr(sk, 1);
-		NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
 		inet_csk(sk)->icsk_retransmits = 0;
 		tp->undo_marker = 0;
 		if (tcp_is_sack(tp))
@@ -2595,7 +2596,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
 		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
-		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
 	}
 
 	/* D. Check consistency of the current state. */
@@ -2700,7 +2701,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 		else
 			mib_idx = LINUX_MIB_TCPSACKRECOVERY;
 
-		NET_INC_STATS_BH(mib_idx);
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 		tp->high_seq = tp->snd_nxt;
 		tp->prior_ssthresh = 0;
@@ -3211,7 +3212,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 		}
 		tp->frto_counter = 0;
 		tp->undo_marker = 0;
-		NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
 	}
 	return 0;
 }
@@ -3264,12 +3265,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 
 		tcp_ca_event(sk, CA_EVENT_FAST_ACK);
 
-		NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
 	} else {
 		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
 			flag |= FLAG_DATA;
 		else
-			NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
 
 		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
@@ -3724,7 +3725,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 		else
 			mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
 
-		NET_INC_STATS_BH(mib_idx);
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
@@ -3750,7 +3751,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
 		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -4039,7 +4040,7 @@ queue_and_out:
 
 	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
 		/* A retransmit, 2nd most common case.  Force an immediate ack. */
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
 out_of_window:
@@ -4181,7 +4182,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			struct sk_buff *next = skb->next;
 			__skb_unlink(skb, list);
 			__kfree_skb(skb);
-			NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
 			skb = next;
 			continue;
 		}
@@ -4249,7 +4250,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 				struct sk_buff *next = skb->next;
 				__skb_unlink(skb, list);
 				__kfree_skb(skb);
-				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
 				skb = next;
 				if (skb == tail ||
 				    tcp_hdr(skb)->syn ||
@@ -4312,7 +4313,7 @@ static int tcp_prune_ofo_queue(struct sock *sk)
 	int res = 0;
 
 	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
 		__skb_queue_purge(&tp->out_of_order_queue);
 
 		/* Reset SACK state.  A conforming SACK implementation will
@@ -4341,7 +4342,7 @@ static int tcp_prune_queue(struct sock *sk)
 
 	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
 
-	NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		tcp_clamp_window(sk);
@@ -4370,7 +4371,7 @@ static int tcp_prune_queue(struct sock *sk)
 	 * drop receive data on the floor.  It will get retransmitted
 	 * and hopefully then we'll have sufficient space.
 	 */
-	NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
 
 	/* Massive buffer overcommit. */
 	tp->pred_flags = 0;
@@ -4837,7 +4838,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 					__skb_pull(skb, tcp_header_len);
 					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-					NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
+					NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
 				}
 				if (copied_early)
 					tcp_cleanup_rbuf(sk, skb->len);
@@ -4860,7 +4861,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				if ((int)skb->truesize > sk->sk_forward_alloc)
 					goto step5;
 
-				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
 				__skb_pull(skb, tcp_header_len);
@@ -4904,7 +4905,7 @@ slow_path:
 	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
-			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 			tcp_send_dupack(sk, skb);
 			goto discard;
 		}
@@ -4940,7 +4941,7 @@ slow_path:
 
 	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
 		tcp_reset(sk);
 		return 1;
 	}
@@ -4996,7 +4997,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
 			     tcp_time_stamp)) {
-			NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
 			goto reset_and_undo;
 		}
 
@@ -5280,7 +5281,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
-			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 			tcp_send_dupack(sk, skb);
 			goto discard;
 		}
@@ -5309,7 +5310,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 *	Check for a SYN in window.
 	 */
 	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
 		tcp_reset(sk);
 		return 1;
 	}
@@ -5391,7 +5392,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 					    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 					     after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
 						tcp_done(sk);
-						NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+						NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 						return 1;
 					}
 
@@ -5451,7 +5452,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
-				NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 				tcp_reset(sk);
 				return 1;
 			}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e876312b950..29adc668ad5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -366,7 +366,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	 * servers this needs to be solved differently.
 	 */
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
@@ -375,7 +375,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
-		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
 
@@ -422,7 +422,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 		BUG_TRAP(!req->sk);
 
 		if (seq != tcp_rsk(req)->snt_isn) {
-			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
 
@@ -1251,7 +1251,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
-				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
 				goto drop_and_release;
 			}
 		}
@@ -1365,9 +1365,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	return newsk;
 
 exit_overflow:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 exit:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	dst_release(dst);
 	return NULL;
 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8b02b103996..204c4216266 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -244,7 +244,7 @@ kill:
 	}
 
 	if (paws_reject)
-		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
 
 	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
@@ -611,7 +611,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		if (!(flg & TCP_FLAG_RST))
 			req->rsk_ops->send_ack(skb, req);
 		if (paws_reject)
-			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 		return NULL;
 	}
 
@@ -695,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		}
 
 	embryonic_reset:
-		NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
 		if (!(flg & TCP_FLAG_RST))
 			req->rsk_ops->send_reset(sk, skb);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 176f0702b8a..36a19707f67 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1995,7 +1995,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 						mib_idx = LINUX_MIB_TCPFASTRETRANS;
 					else
 						mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-					NET_INC_STATS_BH(mib_idx);
+					NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 					if (skb == tcp_write_queue_head(sk))
 						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2065,7 +2065,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
 
-		NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
 	}
 }
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6a480d1fd8f..328e0cf42b3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -48,7 +48,7 @@ static void tcp_write_err(struct sock *sk)
 	sk->sk_error_report(sk);
 
 	tcp_done(sk);
-	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
 }
 
 /* Do not allow orphaned sockets to eat all our resources.
@@ -89,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
 		if (do_reset)
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 		tcp_done(sk);
-		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
 		return 1;
 	}
 	return 0;
@@ -179,7 +179,7 @@ static void tcp_delack_timer(unsigned long data)
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
 		icsk->icsk_ack.blocked = 1;
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
 		goto out_unlock;
 	}
@@ -198,7 +198,7 @@ static void tcp_delack_timer(unsigned long data)
 	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
 		struct sk_buff *skb;
 
-		NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
 
 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 			sk->sk_backlog_rcv(sk, skb);
@@ -218,7 +218,7 @@ static void tcp_delack_timer(unsigned long data)
 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
 		}
 		tcp_send_ack(sk);
-		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
 	}
 	TCP_CHECK_TIMER(sk);
 
@@ -346,7 +346,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 		} else {
 			mib_idx = LINUX_MIB_TCPTIMEOUTS;
 		}
-		NET_INC_STATS_BH(mib_idx);
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 	}
 
 	if (tcp_use_frto(sk)) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index a9cc8ab33a4..00a8a5f9380 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -210,11 +210,11 @@ unique:
 
 	if (twp != NULL) {
 		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
 	} else if (tw != NULL) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 3ecc1157994..6a68eeb7bbf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 		;
 	*mssp = msstab[mssind] + 1;
 
-	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
 	return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
 				     th->dest, ntohl(th->seq),
@@ -177,11 +177,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
 		(mss = cookie_check(skb, cookie)) == 0) {
-		NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
 	}
 
-	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d58b83ac06f..ca5b93a5c02 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -340,7 +340,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
@@ -349,7 +349,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
-		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
 
@@ -424,7 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		BUG_TRAP(req->sk == NULL);
 
 		if (seq != tcp_rsk(req)->snt_isn) {
-			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 			goto out;
 		}
 
@@ -1449,9 +1449,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	return newsk;
 
 out_overflow:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 out:
-	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	if (opt && opt != np->opt)
 		sock_kfree_s(sk, opt, opt->tot_len);
 	dst_release(dst);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ed8834e7f14..5ed93c05c23 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -486,7 +486,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
 	 * servers this needs to be solved differently.
 	 */
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	*app = asoc;
 	*tpp = transport;
-- 
cgit v1.2.3-70-g09d2


From 6f67c817fcfd94f5ca0f14b114b7fa25c0210c8b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:31:39 -0700
Subject: mib: add net to NET_INC_STATS_USER

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 net/ipv4/tcp.c   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 21df167a8e5..79d13192ecc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -163,7 +163,7 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
 #define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
+#define NET_INC_STATS_USER(net, field) 	do { (void)net; SNMP_INC_STATS_USER(net_statistics, field); } while (0)
 #define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9e0e45c3780..dec0b868111 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1153,7 +1153,7 @@ static void tcp_prequeue_process(struct sock *sk)
 	struct sk_buff *skb;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED);
+	NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
 
 	/* RX process wants to run with disabled BHs, though it is not
 	 * necessary */
@@ -1793,13 +1793,13 @@ void tcp_close(struct sock *sk, long timeout)
 	 */
 	if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
-		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
 		tcp_send_active_reset(sk, GFP_KERNEL);
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
-		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 	} else if (tcp_close_state(sk)) {
 		/* We FIN if the application ate all the data before
 		 * zapping the connection.
-- 
cgit v1.2.3-70-g09d2


From f2bf415cfed703de5ba94d25cdb160920c01fb00 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:32:25 -0700
Subject: mib: add net to NET_ADD_STATS_BH

This one is tricky.

The thing is that this macro is only used when killing tw buckets,
but since this killer is promiscuous wrt to which net each particular
tw belongs to, I have to use it only when NET_NS is off. When the net
namespaces are on, I use the INET_INC_STATS_BH for each bucket.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h              |  2 +-
 net/ipv4/inet_timewait_sock.c | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 79d13192ecc..a8275b1338d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -164,7 +164,7 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
 #define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
 #define NET_INC_STATS_USER(net, field) 	do { (void)net; SNMP_INC_STATS_USER(net_statistics, field); } while (0)
-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
+#define NET_ADD_STATS_BH(net, field, adnd) do { (void)net; SNMP_ADD_STATS_BH(net_statistics, field, adnd); } while (0)
 #define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void *mib[], int offt);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 06006a5ac8b..75c2def8f9a 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -160,6 +160,9 @@ rescan:
 		__inet_twsk_del_dead_node(tw);
 		spin_unlock(&twdr->death_lock);
 		__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+#endif
 		inet_twsk_put(tw);
 		killed++;
 		spin_lock(&twdr->death_lock);
@@ -178,8 +181,9 @@ rescan:
 	}
 
 	twdr->tw_count -= killed;
-	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
-
+#ifndef CONFIG_NET_NS
+	NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
+#endif
 	return ret;
 }
 
@@ -372,6 +376,9 @@ void inet_twdr_twcal_tick(unsigned long data)
 						       &twdr->twcal_row[slot]) {
 				__inet_twsk_del_dead_node(tw);
 				__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+				NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+#endif
 				inet_twsk_put(tw);
 				killed++;
 			}
@@ -395,7 +402,9 @@ void inet_twdr_twcal_tick(unsigned long data)
 out:
 	if ((twdr->tw_count -= killed) == 0)
 		del_timer(&twdr->tw_timer);
-	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+#ifndef CONFIG_NET_NS
+	NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
+#endif
 	spin_unlock(&twdr->death_lock);
 }
 
-- 
cgit v1.2.3-70-g09d2


From ed88098e25d77bef3b2ad8c9d8e2ebf454d9ccbf Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 16 Jul 2008 20:32:45 -0700
Subject: mib: add net to NET_ADD_STATS_USER

Done with NET_XXX_STATS macros :)

To be continued...

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 net/ipv4/tcp.c   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index a8275b1338d..02924fb4bdb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,7 +165,7 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
 #define NET_INC_STATS_USER(net, field) 	do { (void)net; SNMP_INC_STATS_USER(net_statistics, field); } while (0)
 #define NET_ADD_STATS_BH(net, field, adnd) do { (void)net; SNMP_ADD_STATS_BH(net_statistics, field, adnd); } while (0)
-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(net, field, adnd) do { (void)net; SNMP_ADD_STATS_USER(net_statistics, field, adnd); } while (0)
 
 extern unsigned long snmp_fold_field(void *mib[], int offt);
 extern int snmp_mib_init(void *ptr[2], size_t mibsize);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dec0b868111..eec8cf7c024 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1475,7 +1475,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			/* __ Restore normal policy in scheduler __ */
 
 			if ((chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+				NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
 				len -= chunk;
 				copied += chunk;
 			}
@@ -1486,7 +1486,7 @@ do_prequeue:
 				tcp_prequeue_process(sk);
 
 				if ((chunk = len - tp->ucopy.len) != 0) {
-					NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+					NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
 					len -= chunk;
 					copied += chunk;
 				}
@@ -1601,7 +1601,7 @@ skip_copy:
 			tcp_prequeue_process(sk);
 
 			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+				NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
 				len -= chunk;
 				copied += chunk;
 			}
-- 
cgit v1.2.3-70-g09d2


From 9a6d276e85aa3d8f308fc5e8de6892daeb60ae5f Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 16 Jul 2008 20:50:49 -0700
Subject: core: add stat to track unresolved discards in neighbor cache

in __neigh_event_send, if we have a neighbour entry which is in
NUD_INCOMPLETE state, we enqueue any outbound frames to that neighbour
to the neighbours arp_queue, which is default capped to a length of 3
skbs.  If that queue exceeds its set length, it will drop an skb on
the queue to enqueue the newly arrived skb.  This results in a drop
for which we have no statistics incremented.  This patch adds an
unresolved_discards stat to /proc/net/stat/ndisc_cache to track these
lost frames.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 4 +++-
 net/core/neighbour.c    | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index dc420fecafb..aa4b708654a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -75,7 +75,7 @@ struct neigh_statistics
 	unsigned long destroys;		/* number of destroyed neighs */
 	unsigned long hash_grows;	/* number of hash resizes */
 
-	unsigned long res_failed;	/* nomber of failed resolutions */
+	unsigned long res_failed;	/* number of failed resolutions */
 
 	unsigned long lookups;		/* number of lookups */
 	unsigned long hits;		/* number of hits (among lookups) */
@@ -85,6 +85,8 @@ struct neigh_statistics
 
 	unsigned long periodic_gc_runs;	/* number of periodic GC runs */
 	unsigned long forced_gc_runs;	/* number of forced GC runs */
+
+	unsigned long unres_discards;	/* number of unresolved drops */
 };
 
 #define NEIGH_CACHE_STAT_INC(tbl, field)				\
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 65f01f71b3f..f62c8af85d3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 				buff = neigh->arp_queue.next;
 				__skb_unlink(buff, &neigh->arp_queue);
 				kfree_skb(buff);
+				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
 			__skb_queue_tail(&neigh->arp_queue, skb);
 		}
@@ -2462,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 	struct neigh_statistics *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs\n");
+		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
-			"%08lx %08lx  %08lx %08lx\n",
+			"%08lx %08lx  %08lx %08lx %08lx\n",
 		   atomic_read(&tbl->entries),
 
 		   st->allocs,
@@ -2483,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 		   st->rcv_probes_ucast,
 
 		   st->periodic_gc_runs,
-		   st->forced_gc_runs
+		   st->forced_gc_runs,
+		   st->unres_discards
 		   );
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 51ce7ec92187b22ab1598987bb5f9776f6e0ebad Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 16 Jul 2008 20:51:47 -0700
Subject: garp: retry sending JoinIn messages after allocation failures

Increase reliability by retrying to send JoinIn messages after memory
allocation failures on each TRANSMIT_PDU event until it succeeds.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/garp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/802/garp.c b/net/802/garp.c
index 3b78f7b74fd..1dcb0660c49 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -323,7 +323,10 @@ static void garp_attr_event(struct garp_applicant *app,
 	case GARP_ACTION_NONE:
 		break;
 	case GARP_ACTION_S_JOIN_IN:
-		garp_pdu_append_attr(app, attr, GARP_JOIN_IN);
+		/* When appending the attribute fails, don't update state in
+		 * order to retry on next TRANSMIT_PDU event. */
+		if (garp_pdu_append_attr(app, attr, GARP_JOIN_IN) < 0)
+			return;
 		break;
 	case GARP_ACTION_S_LEAVE_EMPTY:
 		garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY);
-- 
cgit v1.2.3-70-g09d2


From e8a0464cc950972824e2e128028ae3db666ec1ed Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 00:34:19 -0700
Subject: netdev: Allocate multiple queues for TX.

alloc_netdev_mq() now allocates an array of netdev_queue
structures for TX, based upon the queue_count argument.

Furthermore, all accesses to the TX queues are now vectored
through the netdev_get_tx_queue() and netdev_for_each_tx_queue()
interfaces.  This makes it easy to grep the tree for all
things that want to get to a TX queue of a net device.

Problem spots which are not really multiqueue aware yet, and
only work with one queue, can easily be spotted by grepping
for all netdev_get_tx_queue() calls that pass in a zero index.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c         |   6 +-
 drivers/net/hamradio/bpqether.c         |   6 +-
 drivers/net/ifb.c                       |  12 ++-
 drivers/net/macvlan.c                   |   6 +-
 drivers/net/wireless/hostap/hostap_hw.c |   6 +-
 include/linux/netdevice.h               |  69 ++++++++-----
 include/net/sch_generic.h               |  37 +++++--
 net/8021q/vlan_dev.c                    |  10 +-
 net/core/dev.c                          |  40 +++++--
 net/core/rtnetlink.c                    |   2 +-
 net/mac80211/main.c                     |   4 +-
 net/mac80211/wme.c                      |  12 +--
 net/netrom/af_netrom.c                  |   6 +-
 net/rose/af_rose.c                      |   6 +-
 net/sched/cls_api.c                     |   4 +-
 net/sched/sch_api.c                     |  32 ++++--
 net/sched/sch_generic.c                 | 178 +++++++++++++++++++++++---------
 net/sched/sch_teql.c                    |  21 ++--
 18 files changed, 320 insertions(+), 137 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fd87dbe7999..9737c06045d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5042,7 +5042,9 @@ static int bond_check_params(struct bond_params *params)
 
 static struct lock_class_key bonding_netdev_xmit_lock_key;
 
-static void bond_set_lockdep_class_one(struct netdev_queue *txq)
+static void bond_set_lockdep_class_one(struct net_device *dev,
+				       struct netdev_queue *txq,
+				       void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock,
 			  &bonding_netdev_xmit_lock_key);
@@ -5050,7 +5052,7 @@ static void bond_set_lockdep_class_one(struct netdev_queue *txq)
 
 static void bond_set_lockdep_class(struct net_device *dev)
 {
-	bond_set_lockdep_class_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
 }
 
 /* Create a new bond based on the specified name and bonding parameters.
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index fb186b8c3d4..b6500b2aacf 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -124,14 +124,16 @@ static LIST_HEAD(bpq_devices);
  */
 static struct lock_class_key bpq_netdev_xmit_lock_key;
 
-static void bpq_set_lockdep_class_one(struct netdev_queue *txq)
+static void bpq_set_lockdep_class_one(struct net_device *dev,
+				      struct netdev_queue *txq,
+				      void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key);
 }
 
 static void bpq_set_lockdep_class(struct net_device *dev)
 {
-	bpq_set_lockdep_class_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL);
 }
 
 /* ------------------------------------------------------------------------ */
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index ccbd6554f6e..897b05e79ed 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -229,14 +229,20 @@ module_param(numifbs, int, 0);
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
 /*
- * dev_ifb->tx_queue.lock is usually taken after dev->rx_queue.lock,
+ * dev_ifb's TX queue lock is usually taken after dev->rx_queue.lock,
  * reversely to e.g. qdisc_lock_tree(). It should be safe until
- * ifb doesn't take dev->tx_queue.lock with dev_ifb->rx_queue.lock.
+ * ifb doesn't take dev's TX queue lock with dev_ifb->rx_queue.lock.
  * But lockdep should know that ifb has different locks from dev.
  */
 static struct lock_class_key ifb_tx_queue_lock_key;
 static struct lock_class_key ifb_rx_queue_lock_key;
 
+static void set_tx_lockdep_key(struct net_device *dev,
+			       struct netdev_queue *txq,
+			       void *_unused)
+{
+	lockdep_set_class(&txq->lock, &ifb_tx_queue_lock_key);
+}
 
 static int __init ifb_init_one(int index)
 {
@@ -258,7 +264,7 @@ static int __init ifb_init_one(int index)
 	if (err < 0)
 		goto err;
 
-	lockdep_set_class(&dev_ifb->tx_queue.lock, &ifb_tx_queue_lock_key);
+	netdev_for_each_tx_queue(dev_ifb, set_tx_lockdep_key, NULL);
 	lockdep_set_class(&dev_ifb->rx_queue.lock, &ifb_rx_queue_lock_key);
 
 	return 0;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 980001c2cf9..72745ce588c 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -285,7 +285,9 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key;
 #define MACVLAN_STATE_MASK \
 	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 
-static void macvlan_set_lockdep_class_one(struct netdev_queue *txq)
+static void macvlan_set_lockdep_class_one(struct net_device *dev,
+					  struct netdev_queue *txq,
+					  void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock,
 			  &macvlan_netdev_xmit_lock_key);
@@ -293,7 +295,7 @@ static void macvlan_set_lockdep_class_one(struct netdev_queue *txq)
 
 static void macvlan_set_lockdep_class(struct net_device *dev)
 {
-	macvlan_set_lockdep_class_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL);
 }
 
 static int macvlan_init(struct net_device *dev)
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index c1f4bb005d9..13d5882f1f2 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3102,7 +3102,9 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
  */
 static struct lock_class_key hostap_netdev_xmit_lock_key;
 
-static void prism2_set_lockdep_class_one(struct netdev_queue *txq)
+static void prism2_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock,
 			  &hostap_netdev_xmit_lock_key);
@@ -3110,7 +3112,7 @@ static void prism2_set_lockdep_class_one(struct netdev_queue *txq)
 
 static void prism2_set_lockdep_class(struct net_device *dev)
 {
-	prism2_set_lockdep_class_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL);
 }
 
 static struct net_device *
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 570cf7affa7..f25d4f5a31b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -463,7 +463,7 @@ struct netdev_queue {
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	struct netdev_queue	*next_sched;
-};
+} ____cacheline_aligned_in_smp;
 
 /*
  *	The DEVICE structure.
@@ -641,7 +641,9 @@ struct net_device
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
 	struct netdev_queue	rx_queue;
-	struct netdev_queue	tx_queue ____cacheline_aligned_in_smp;
+
+	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
+	unsigned int		num_tx_queues;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 /*
@@ -764,6 +766,25 @@ struct net_device
 #define	NETDEV_ALIGN		32
 #define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
 
+static inline
+struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
+					 unsigned int index)
+{
+	return &dev->_tx[index];
+}
+
+static inline void netdev_for_each_tx_queue(struct net_device *dev,
+					    void (*f)(struct net_device *,
+						      struct netdev_queue *,
+						      void *),
+					    void *arg)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		f(dev, &dev->_tx[i], arg);
+}
+
 /*
  * Net namespace inlines
  */
@@ -977,7 +998,7 @@ static inline void netif_schedule_queue(struct netdev_queue *txq)
 
 static inline void netif_schedule(struct net_device *dev)
 {
-	netif_schedule_queue(&dev->tx_queue);
+	netif_schedule_queue(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -993,7 +1014,7 @@ static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
 
 static inline void netif_start_queue(struct net_device *dev)
 {
-	netif_tx_start_queue(&dev->tx_queue);
+	netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -1017,7 +1038,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 
 static inline void netif_wake_queue(struct net_device *dev)
 {
-	netif_tx_wake_queue(&dev->tx_queue);
+	netif_tx_wake_queue(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -1034,7 +1055,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 
 static inline void netif_stop_queue(struct net_device *dev)
 {
-	netif_tx_stop_queue(&dev->tx_queue);
+	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -1050,7 +1071,7 @@ static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 
 static inline int netif_queue_stopped(const struct net_device *dev)
 {
-	return netif_tx_queue_stopped(&dev->tx_queue);
+	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -1134,7 +1155,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #endif
 	if (test_and_clear_bit(__QUEUE_STATE_XOFF,
 			       &dev->egress_subqueue[queue_index].state))
-		__netif_schedule(&dev->tx_queue);
+		__netif_schedule(netdev_get_tx_queue(dev, 0));
 }
 
 /**
@@ -1430,18 +1451,19 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 
 static inline void netif_tx_lock(struct net_device *dev)
 {
-	__netif_tx_lock(&dev->tx_queue, smp_processor_id());
-}
+	int cpu = smp_processor_id();
+	unsigned int i;
 
-static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
-{
-	spin_lock_bh(&txq->_xmit_lock);
-	txq->xmit_lock_owner = smp_processor_id();
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		__netif_tx_lock(txq, cpu);
+	}
 }
 
 static inline void netif_tx_lock_bh(struct net_device *dev)
 {
-	__netif_tx_lock_bh(&dev->tx_queue);
+	local_bh_disable();
+	netif_tx_lock(dev);
 }
 
 static inline int __netif_tx_trylock(struct netdev_queue *txq)
@@ -1454,7 +1476,7 @@ static inline int __netif_tx_trylock(struct netdev_queue *txq)
 
 static inline int netif_tx_trylock(struct net_device *dev)
 {
-	return __netif_tx_trylock(&dev->tx_queue);
+	return __netif_tx_trylock(netdev_get_tx_queue(dev, 0));
 }
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
@@ -1465,18 +1487,19 @@ static inline void __netif_tx_unlock(struct netdev_queue *txq)
 
 static inline void netif_tx_unlock(struct net_device *dev)
 {
-	__netif_tx_unlock(&dev->tx_queue);
-}
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		__netif_tx_unlock(txq);
+	}
 
-static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock_bh(&txq->_xmit_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
 {
-	__netif_tx_unlock_bh(&dev->tx_queue);
+	netif_tx_unlock(dev);
+	local_bh_enable();
 }
 
 #define HARD_TX_LOCK(dev, txq, cpu) {			\
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5ba66b55557..b47f556c66f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -230,32 +230,47 @@ extern void tcf_destroy_chain(struct tcf_proto **fl);
 /* Reset all TX qdiscs of a device.  */
 static inline void qdisc_reset_all_tx(struct net_device *dev)
 {
-	qdisc_reset(dev->tx_queue.qdisc);
+	unsigned int i;
+	for (i = 0; i < dev->num_tx_queues; i++)
+		qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc);
 }
 
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
-	const struct netdev_queue *txq = &dev->tx_queue;
-	const struct Qdisc *q = txq->qdisc;
+	unsigned int i;
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		const struct Qdisc *q = txq->qdisc;
 
-	return (q->q.qlen == 0);
+		if (q->q.qlen)
+			return false;
+	}
+	return true;
 }
 
 /* Are any of the TX qdiscs changing?  */
 static inline bool qdisc_tx_changing(struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
-
-	return (txq->qdisc != txq->qdisc_sleeping);
+	unsigned int i;
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		if (txq->qdisc != txq->qdisc_sleeping)
+			return true;
+	}
+	return false;
 }
 
-/* Is the device using the noop qdisc?  */
+/* Is the device using the noop qdisc on all queues?  */
 static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 {
-	const struct netdev_queue *txq = &dev->tx_queue;
-
-	return (txq->qdisc == &noop_qdisc);
+	unsigned int i;
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		if (txq->qdisc != &noop_qdisc)
+			return false;
+	}
+	return true;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6b985f23fd9..f42bc2b26b8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -570,16 +570,18 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
  */
 static struct lock_class_key vlan_netdev_xmit_lock_key;
 
-static void vlan_dev_set_lockdep_one(struct netdev_queue *txq,
-				     int subclass)
+static void vlan_dev_set_lockdep_one(struct net_device *dev,
+				     struct netdev_queue *txq,
+				     void *_subclass)
 {
 	lockdep_set_class_and_subclass(&txq->_xmit_lock,
-				       &vlan_netdev_xmit_lock_key, subclass);
+				       &vlan_netdev_xmit_lock_key,
+				       *(int *)_subclass);
 }
 
 static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
 {
-	vlan_dev_set_lockdep_one(&dev->tx_queue, subclass);
+	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
 }
 
 static const struct header_ops vlan_header_ops = {
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b49f74a982..69378f25069 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1666,6 +1666,12 @@ out_kfree_skb:
  *          --BLG
  */
 
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+					struct sk_buff *skb)
+{
+	return netdev_get_tx_queue(dev, 0);
+}
+
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -1702,7 +1708,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 	}
 
 gso:
-	txq = &dev->tx_queue;
+	txq = dev_pick_tx(dev, skb);
 	spin_lock_prefetch(&txq->lock);
 
 	/* Disable soft irqs for various locks below. Also
@@ -3788,8 +3794,9 @@ static void rollback_registered(struct net_device *dev)
 	dev_put(dev);
 }
 
-static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue,
-					  struct net_device *dev)
+static void __netdev_init_queue_locks_one(struct net_device *dev,
+					  struct netdev_queue *dev_queue,
+					  void *_unused)
 {
 	spin_lock_init(&dev_queue->_xmit_lock);
 	netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
@@ -3798,8 +3805,8 @@ static void __netdev_init_queue_locks_one(struct netdev_queue *dev_queue,
 
 static void netdev_init_queue_locks(struct net_device *dev)
 {
-	__netdev_init_queue_locks_one(&dev->tx_queue, dev);
-	__netdev_init_queue_locks_one(&dev->rx_queue, dev);
+	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
 }
 
 /**
@@ -4119,7 +4126,8 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
 }
 
 static void netdev_init_one_queue(struct net_device *dev,
-				  struct netdev_queue *queue)
+				  struct netdev_queue *queue,
+				  void *_unused)
 {
 	spin_lock_init(&queue->lock);
 	queue->dev = dev;
@@ -4127,8 +4135,8 @@ static void netdev_init_one_queue(struct net_device *dev,
 
 static void netdev_init_queues(struct net_device *dev)
 {
-	netdev_init_one_queue(dev, &dev->rx_queue);
-	netdev_init_one_queue(dev, &dev->tx_queue);
+	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 }
 
 /**
@@ -4145,9 +4153,10 @@ static void netdev_init_queues(struct net_device *dev)
 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
-	void *p;
+	struct netdev_queue *tx;
 	struct net_device *dev;
 	int alloc_size;
+	void *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -4167,11 +4176,22 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
+	tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+	if (!tx) {
+		printk(KERN_ERR "alloc_netdev: Unable to allocate "
+		       "tx qdiscs.\n");
+		kfree(p);
+		return NULL;
+	}
+
 	dev = (struct net_device *)
 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
 	dev->padded = (char *)dev - (char *)p;
 	dev_net_set(dev, &init_net);
 
+	dev->_tx = tx;
+	dev->num_tx_queues = queue_count;
+
 	if (sizeof_priv) {
 		dev->priv = ((char *)dev +
 			     ((sizeof(struct net_device) +
@@ -4205,6 +4225,8 @@ void free_netdev(struct net_device *dev)
 {
 	release_net(dev_net(dev));
 
+	kfree(dev->_tx);
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		kfree((char *)dev - dev->padded);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8ef9f1db610..71edb8b3634 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -636,7 +636,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (dev->master)
 		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	txq = &dev->tx_queue;
+	txq = netdev_get_tx_queue(dev, 0);
 	if (txq->qdisc_sleeping)
 		NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index af0056e7e5b..b486e634f4f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -621,7 +621,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* ensure that TX flow won't interrupt us
 	 * until the end of the call to requeue function */
-	txq = &local->mdev->tx_queue;
+	txq = netdev_get_tx_queue(local->mdev, 0);
 	spin_lock_bh(&txq->lock);
 
 	/* create a new queue for this aggregation */
@@ -862,7 +862,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 
 	/* avoid ordering issues: we are the only one that can modify
 	 * the content of the qdiscs */
-	txq = &local->mdev->tx_queue;
+	txq = netdev_get_tx_queue(local->mdev, 0);
 	spin_lock_bh(&txq->lock);
 	/* remove the queue for this aggregation */
 	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 6ae43a3c772..f014cd38c2d 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -574,7 +574,7 @@ static struct Qdisc_ops wme_qdisc_ops __read_mostly =
 
 void ieee80211_install_qdisc(struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 	struct Qdisc *qdisc;
 
 	qdisc = qdisc_create_dflt(dev, txq,
@@ -596,7 +596,7 @@ void ieee80211_install_qdisc(struct net_device *dev)
 
 int ieee80211_qdisc_installed(struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 
 	return txq->qdisc_sleeping->ops == &wme_qdisc_ops;
 }
@@ -617,7 +617,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 			struct sta_info *sta, u16 tid)
 {
 	int i;
-	struct netdev_queue *txq = &local->mdev->tx_queue;
+	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
 	struct ieee80211_sched_data *q =
 			qdisc_priv(txq->qdisc_sleeping);
 	DECLARE_MAC_BUF(mac);
@@ -652,14 +652,14 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 }
 
 /**
- * the caller needs to hold local->mdev->tx_queue.lock
+ * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock
  */
 void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,
 				   u8 requeue)
 {
 	struct ieee80211_hw *hw = &local->hw;
-	struct netdev_queue *txq = &local->mdev->tx_queue;
+	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
 	struct ieee80211_sched_data *q =
 		qdisc_priv(txq->qdisc_sleeping);
 	int agg_queue = sta->tid_to_tx_q[tid];
@@ -676,7 +676,7 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 
 void ieee80211_requeue(struct ieee80211_local *local, int queue)
 {
-	struct netdev_queue *txq = &local->mdev->tx_queue;
+	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
 	struct Qdisc *root_qd = txq->qdisc_sleeping;
 	struct ieee80211_sched_data *q = qdisc_priv(root_qd);
 	struct Qdisc *qdisc = q->queues[queue];
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 819afc449e1..d41be0d66eb 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -74,14 +74,16 @@ static const struct proto_ops nr_proto_ops;
  */
 static struct lock_class_key nr_netdev_xmit_lock_key;
 
-static void nr_set_lockdep_one(struct netdev_queue *txq)
+static void nr_set_lockdep_one(struct net_device *dev,
+			       struct netdev_queue *txq,
+			       void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
 }
 
 static void nr_set_lockdep_key(struct net_device *dev)
 {
-	nr_set_lockdep_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
 }
 
 /*
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 7dbbc089162..f3a691f3490 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -75,14 +75,16 @@ ax25_address rose_callsign;
  */
 static struct lock_class_key rose_netdev_xmit_lock_key;
 
-static void rose_set_lockdep_one(struct netdev_queue *txq)
+static void rose_set_lockdep_one(struct net_device *dev,
+				 struct netdev_queue *txq,
+				 void *_unused)
 {
 	lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
 }
 
 static void rose_set_lockdep_key(struct net_device *dev)
 {
-	rose_set_lockdep_one(&dev->tx_queue);
+	netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
 }
 
 /*
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b483bbea611..d0b0a9b1439 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -166,7 +166,7 @@ replay:
 
 	/* Find qdisc */
 	if (!parent) {
-		struct netdev_queue *dev_queue = &dev->tx_queue;
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 		q = dev_queue->qdisc_sleeping;
 		parent = q->handle;
 	} else {
@@ -410,7 +410,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
-	dev_queue = &dev->tx_queue;
+	dev_queue = netdev_get_tx_queue(dev, 0);
 	if (!tcm->tcm_parent)
 		q = dev_queue->qdisc_sleeping;
 	else
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 95873f8dd37..830ccc544a1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -183,9 +183,8 @@ EXPORT_SYMBOL(unregister_qdisc);
    (root qdisc, all its children, children of children etc.)
  */
 
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+static struct Qdisc *__qdisc_lookup(struct netdev_queue *dev_queue, u32 handle)
 {
-	struct netdev_queue *dev_queue = &dev->tx_queue;
 	struct Qdisc *q;
 
 	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
@@ -195,6 +194,19 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	return NULL;
 }
 
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		struct Qdisc *q = __qdisc_lookup(txq, handle);
+		if (q)
+			return q;
+	}
+	return NULL;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
@@ -462,7 +474,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 		}
 
 	} else {
-		dev_queue = &dev->tx_queue;
+		dev_queue = netdev_get_tx_queue(dev, 0);
 		oqdisc = dev_queue->qdisc_sleeping;
 
 		/* Prune old scheduler */
@@ -742,7 +754,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 				q = dev->rx_queue.qdisc;
 			}
 		} else {
-			struct netdev_queue *dev_queue = &dev->tx_queue;
+			struct netdev_queue *dev_queue;
+			dev_queue = netdev_get_tx_queue(dev, 0);
 			q = dev_queue->qdisc_sleeping;
 		}
 		if (!q)
@@ -817,7 +830,8 @@ replay:
 				q = dev->rx_queue.qdisc;
 			}
 		} else {
-			struct netdev_queue *dev_queue = &dev->tx_queue;
+			struct netdev_queue *dev_queue;
+			dev_queue = netdev_get_tx_queue(dev, 0);
 			q = dev_queue->qdisc_sleeping;
 		}
 
@@ -899,7 +913,7 @@ create_n_graft:
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else
-		q = qdisc_create(dev, &dev->tx_queue,
+		q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
 				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
 	if (q == NULL) {
@@ -1025,7 +1039,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		if (idx > s_idx)
 			s_q_idx = 0;
 		q_idx = 0;
-		dev_queue = &dev->tx_queue;
+		dev_queue = netdev_get_tx_queue(dev, 0);
 		list_for_each_entry(q, &dev_queue->qdisc_list, list) {
 			if (q_idx < s_q_idx) {
 				q_idx++;
@@ -1098,7 +1112,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Step 1. Determine qdisc handle X:0 */
 
-	dev_queue = &dev->tx_queue;
+	dev_queue = netdev_get_tx_queue(dev, 0);
 	if (pid != TC_H_ROOT) {
 		u32 qid1 = TC_H_MAJ(pid);
 
@@ -1275,7 +1289,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	dev_queue = &dev->tx_queue;
+	dev_queue = netdev_get_tx_queue(dev, 0);
 	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
 		if (t < s_t || !q->ops->cl_ops ||
 		    (tcm->tcm_parent &&
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 243de935b18..4e2b865cbba 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -40,20 +40,30 @@
  */
 
 void qdisc_lock_tree(struct net_device *dev)
-	__acquires(dev->tx_queue.lock)
 	__acquires(dev->rx_queue.lock)
 {
-	spin_lock_bh(&dev->tx_queue.lock);
+	unsigned int i;
+
+	local_bh_disable();
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		spin_lock(&txq->lock);
+	}
 	spin_lock(&dev->rx_queue.lock);
 }
 EXPORT_SYMBOL(qdisc_lock_tree);
 
 void qdisc_unlock_tree(struct net_device *dev)
 	__releases(dev->rx_queue.lock)
-	__releases(dev->tx_queue.lock)
 {
+	unsigned int i;
+
 	spin_unlock(&dev->rx_queue.lock);
-	spin_unlock_bh(&dev->tx_queue.lock);
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		spin_unlock(&txq->lock);
+	}
+	local_bh_enable();
 }
 EXPORT_SYMBOL(qdisc_unlock_tree);
 
@@ -212,22 +222,37 @@ void __qdisc_run(struct netdev_queue *txq)
 static void dev_watchdog(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
-	struct netdev_queue *txq = &dev->tx_queue;
 
 	netif_tx_lock(dev);
-	if (txq->qdisc != &noop_qdisc) {
+	if (!qdisc_tx_is_noop(dev)) {
 		if (netif_device_present(dev) &&
 		    netif_running(dev) &&
 		    netif_carrier_ok(dev)) {
-			if (netif_queue_stopped(dev) &&
-			    time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) {
+			int some_queue_stopped = 0;
+			unsigned int i;
+
+			for (i = 0; i < dev->num_tx_queues; i++) {
+				struct netdev_queue *txq;
+
+				txq = netdev_get_tx_queue(dev, i);
+				if (netif_tx_queue_stopped(txq)) {
+					some_queue_stopped = 1;
+					break;
+				}
+			}
 
-				printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n",
+			if (some_queue_stopped &&
+			    time_after(jiffies, (dev->trans_start +
+						 dev->watchdog_timeo))) {
+				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
+				       "transmit timed out\n",
 				       dev->name);
 				dev->tx_timeout(dev);
 				WARN_ON_ONCE(1);
 			}
-			if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo)))
+			if (!mod_timer(&dev->watchdog_timer,
+				       round_jiffies(jiffies +
+						     dev->watchdog_timeo)))
 				dev_hold(dev);
 		}
 	}
@@ -542,9 +567,55 @@ void qdisc_destroy(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
+static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		if (txq->qdisc_sleeping != &noop_qdisc)
+			return false;
+	}
+	return true;
+}
+
+static void attach_one_default_qdisc(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     void *_unused)
+{
+	struct Qdisc *qdisc;
+
+	if (dev->tx_queue_len) {
+		qdisc = qdisc_create_dflt(dev, dev_queue,
+					  &pfifo_fast_ops, TC_H_ROOT);
+		if (!qdisc) {
+			printk(KERN_INFO "%s: activation failed\n", dev->name);
+			return;
+		}
+		list_add_tail(&qdisc->list, &dev_queue->qdisc_list);
+	} else {
+		qdisc =  &noqueue_qdisc;
+	}
+	dev_queue->qdisc_sleeping = qdisc;
+}
+
+static void transition_one_qdisc(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 void *_need_watchdog)
+{
+	int *need_watchdog_p = _need_watchdog;
+
+	spin_lock_bh(&dev_queue->lock);
+	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
+	if (dev_queue->qdisc != &noqueue_qdisc)
+		*need_watchdog_p = 1;
+	spin_unlock_bh(&dev_queue->lock);
+}
+
 void dev_activate(struct net_device *dev)
 {
-	struct netdev_queue *txq = &dev->tx_queue;
+	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
 	   create default one i.e. pfifo_fast for devices,
@@ -552,39 +623,27 @@ void dev_activate(struct net_device *dev)
 	   virtual interfaces
 	 */
 
-	if (txq->qdisc_sleeping == &noop_qdisc) {
-		struct Qdisc *qdisc;
-		if (dev->tx_queue_len) {
-			qdisc = qdisc_create_dflt(dev, txq,
-						  &pfifo_fast_ops,
-						  TC_H_ROOT);
-			if (qdisc == NULL) {
-				printk(KERN_INFO "%s: activation failed\n", dev->name);
-				return;
-			}
-			list_add_tail(&qdisc->list, &txq->qdisc_list);
-		} else {
-			qdisc =  &noqueue_qdisc;
-		}
-		txq->qdisc_sleeping = qdisc;
-	}
+	if (dev_all_qdisc_sleeping_noop(dev))
+		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
 
 	if (!netif_carrier_ok(dev))
 		/* Delay activation until next carrier-on event */
 		return;
 
-	spin_lock_bh(&txq->lock);
-	rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping);
-	if (txq->qdisc != &noqueue_qdisc) {
+	need_watchdog = 0;
+	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
+
+	if (need_watchdog) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
-	spin_unlock_bh(&txq->lock);
 }
 
-static void dev_deactivate_queue(struct netdev_queue *dev_queue,
-				 struct Qdisc *qdisc_default)
+static void dev_deactivate_queue(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 void *_qdisc_default)
 {
+	struct Qdisc *qdisc_default = _qdisc_default;
 	struct Qdisc *qdisc;
 	struct sk_buff *skb;
 
@@ -603,12 +662,35 @@ static void dev_deactivate_queue(struct netdev_queue *dev_queue,
 	kfree_skb(skb);
 }
 
+static bool some_qdisc_is_running(struct net_device *dev, int lock)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *dev_queue;
+		int val;
+
+		dev_queue = netdev_get_tx_queue(dev, i);
+
+		if (lock)
+			spin_lock_bh(&dev_queue->lock);
+
+		val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state);
+
+		if (lock)
+			spin_unlock_bh(&dev_queue->lock);
+
+		if (val)
+			return true;
+	}
+	return false;
+}
+
 void dev_deactivate(struct net_device *dev)
 {
-	struct netdev_queue *dev_queue = &dev->tx_queue;
-	int running;
+	bool running;
 
-	dev_deactivate_queue(dev_queue, &noop_qdisc);
+	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -617,17 +699,14 @@ void dev_deactivate(struct net_device *dev)
 
 	/* Wait for outstanding qdisc_run calls. */
 	do {
-		while (test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state))
+		while (some_qdisc_is_running(dev, 0))
 			yield();
 
 		/*
 		 * Double-check inside queue lock to ensure that all effects
 		 * of the queue run are visible when we return.
 		 */
-		spin_lock_bh(&dev_queue->lock);
-		running = test_bit(__QUEUE_STATE_QDISC_RUNNING,
-				   &dev_queue->state);
-		spin_unlock_bh(&dev_queue->lock);
+		running = some_qdisc_is_running(dev, 1);
 
 		/*
 		 * The running flag should never be set at this point because
@@ -642,8 +721,10 @@ void dev_deactivate(struct net_device *dev)
 
 static void dev_init_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
-				     struct Qdisc *qdisc)
+				     void *_qdisc)
 {
+	struct Qdisc *qdisc = _qdisc;
+
 	dev_queue->qdisc = qdisc;
 	dev_queue->qdisc_sleeping = qdisc;
 	INIT_LIST_HEAD(&dev_queue->qdisc_list);
@@ -652,18 +733,19 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 void dev_init_scheduler(struct net_device *dev)
 {
 	qdisc_lock_tree(dev);
-	dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc);
+	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
 	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
 	qdisc_unlock_tree(dev);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
 
-static void dev_shutdown_scheduler_queue(struct net_device *dev,
-					 struct netdev_queue *dev_queue,
-					 struct Qdisc *qdisc_default)
+static void shutdown_scheduler_queue(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     void *_qdisc_default)
 {
 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
 		dev_queue->qdisc = qdisc_default;
@@ -676,8 +758,8 @@ static void dev_shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
 	qdisc_lock_tree(dev);
-	dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc);
-	dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
+	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
 	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
 	qdisc_unlock_tree(dev);
 }
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 8ac05981be2..44a2c3451f4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -111,7 +111,7 @@ teql_dequeue(struct Qdisc* sch)
 	struct sk_buff *skb;
 
 	skb = __skb_dequeue(&dat->q);
-	dat_queue = &dat->m->dev->tx_queue;
+	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 	if (skb == NULL) {
 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
 		if (m) {
@@ -155,10 +155,13 @@ teql_destroy(struct Qdisc* sch)
 				if (q == master->slaves) {
 					master->slaves = NEXT_SLAVE(q);
 					if (q == master->slaves) {
+						struct netdev_queue *txq;
+
+						txq = netdev_get_tx_queue(master->dev, 0);
 						master->slaves = NULL;
-						spin_lock_bh(&master->dev->tx_queue.lock);
-						qdisc_reset(master->dev->tx_queue.qdisc);
-						spin_unlock_bh(&master->dev->tx_queue.lock);
+						spin_lock_bh(&txq->lock);
+						qdisc_reset(txq->qdisc);
+						spin_unlock_bh(&txq->lock);
 					}
 				}
 				skb_queue_purge(&dat->q);
@@ -218,7 +221,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 static int
 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 {
-	struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc);
+	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
+	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 	struct neighbour *mn = skb->dst->neighbour;
 	struct neighbour *n = q->ncache;
 
@@ -254,7 +258,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 static inline int teql_resolve(struct sk_buff *skb,
 			       struct sk_buff *skb_res, struct net_device *dev)
 {
-	if (dev->tx_queue.qdisc == &noop_qdisc)
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+	if (txq->qdisc == &noop_qdisc)
 		return -ENODEV;
 
 	if (dev->header_ops == NULL ||
@@ -285,8 +290,10 @@ restart:
 
 	do {
 		struct net_device *slave = qdisc_dev(q);
+		struct netdev_queue *slave_txq;
 
-		if (slave->tx_queue.qdisc_sleeping != q)
+		slave_txq = netdev_get_tx_queue(slave, 0);
+		if (slave_txq->qdisc_sleeping != q)
 			continue;
 		if (netif_queue_stopped(slave) ||
 		    __netif_subqueue_stopped(slave, subq) ||
-- 
cgit v1.2.3-70-g09d2


From 09e83b5d7d1878065e2453239b49b684cd0fe4e5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 01:52:12 -0700
Subject: netdev: Kill NETIF_F_MULTI_QUEUE.

There is no need for a feature bit for something that
can be tested by simply checking the TX queue count.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/multiqueue.txt | 11 -----------
 drivers/net/cpmac.c                     |  1 -
 drivers/net/igb/igb_main.c              |  2 --
 drivers/net/ixgbe/ixgbe_main.c          |  2 --
 drivers/net/s2io.c                      |  2 --
 include/linux/netdevice.h               |  4 +---
 net/mac80211/main.c                     |  3 ---
 7 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index e6dc1ee9e8f..d391ea63114 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -24,15 +24,4 @@ netif_{start|stop|wake}_subqueue() functions to manage each queue while the
 device is still operational.  netdev->queue_lock is still used when the device
 comes online or when it's completely shut down (unregister_netdev(), etc.).
 
-Finally, the base driver should indicate that it is a multiqueue device.  The
-feature flag NETIF_F_MULTI_QUEUE should be added to the netdev->features
-bitmap on device initialization.  Below is an example from e1000:
-
-#ifdef CONFIG_E1000_MQ
-	if ( (adapter->hw.mac.type == e1000_82571) ||
-	     (adapter->hw.mac.type == e1000_82572) ||
-	     (adapter->hw.mac.type == e1000_80003es2lan))
-		netdev->features |= NETIF_F_MULTI_QUEUE;
-#endif
-
 Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index d630e2a72f4..7c7b54e4828 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -1165,7 +1165,6 @@ static int __devinit cpmac_probe(struct platform_device *pdev)
 	dev->set_multicast_list = cpmac_set_multicast_list;
 	dev->tx_timeout         = cpmac_tx_timeout;
 	dev->ethtool_ops        = &cpmac_ethtool_ops;
-	dev->features |= NETIF_F_MULTI_QUEUE;
 
 	netif_napi_add(dev, &priv->napi, cpmac_poll, 64);
 
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 64a150a16f3..471c194cd54 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1155,8 +1155,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
-	netdev->features |= NETIF_F_MULTI_QUEUE;
-
 	netdev->features |= NETIF_F_LLTX;
 	adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 7d8bf94d378..e6df9233f5e 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3566,8 +3566,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
-	netdev->features |= NETIF_F_MULTI_QUEUE;
-
 	/* make sure the EEPROM is good */
 	if (ixgbe_validate_eeprom_checksum(hw, NULL) < 0) {
 		dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n");
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 517425dcb77..5f0fcb04aff 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -7966,8 +7966,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		dev->features |= NETIF_F_UFO;
 		dev->features |= NETIF_F_HW_CSUM;
 	}
-	if (config->multiq)
-		dev->features |= NETIF_F_MULTI_QUEUE;
 	dev->tx_timeout = &s2io_tx_watchdog;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
 	INIT_WORK(&sp->rst_timer_task, s2io_restart_nic);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f25d4f5a31b..c02227b9dd7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -533,7 +533,6 @@ struct net_device
 #define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
 					/* do not use LLTX in new drivers */
 #define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
-#define NETIF_F_MULTI_QUEUE	16384	/* Has multiple TX/RX queues */
 #define NETIF_F_LRO		32768	/* large receive offload */
 
 	/* Segmentation offload features */
@@ -1163,11 +1162,10 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
  *	@dev: network device
  *
  * Check if device has multiple transmit queues
- * Always falls if NETDEVICE_MULTIQUEUE is not configured
  */
 static inline int netif_is_multiqueue(const struct net_device *dev)
 {
-	return (!!(NETIF_F_MULTI_QUEUE & dev->features));
+	return (dev->num_tx_queues > 1);
 }
 
 /* Use this variant when it is known for sure that it
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b486e634f4f..c74607eda1e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1678,9 +1678,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (!mdev)
 		goto fail_mdev_alloc;
 
-	if (ieee80211_num_queues(hw) > 1)
-		mdev->features |= NETIF_F_MULTI_QUEUE;
-
 	mwdev = netdev_priv(mdev);
 	mdev->ieee80211_ptr = mwdev;
 	mwdev->wiphy = local->hw.wiphy;
-- 
cgit v1.2.3-70-g09d2


From 1d8ae3fdeb001b8f534a6782c261aba6ec1779f5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 02:52:19 -0700
Subject: pkt_sched: Remove RR scheduler.

This actually fixes a bug added by the RR scheduler changes.  The
->bands and ->prio2band parameters were being set outside of the
sch_tree_lock() and thus could result in strange behavior and
inconsistencies.

It might be possible, in the new design (where there will be one qdisc
per device TX queue) to allow similar functionality via a TX hash
algorithm for RR but I really see no reason to export this aspect of
how these multiqueue cards actually implement the scheduling of the
the individual DMA TX rings and the single physical MAC/PHY port.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   9 ---
 net/sched/sch_prio.c      | 136 ++++++----------------------------------------
 2 files changed, 16 insertions(+), 129 deletions(-)

(limited to 'net')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbb7ac37960..87f4e0fa8f2 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -103,15 +103,6 @@ struct tc_prio_qopt
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
 
-enum
-{
-	TCA_PRIO_UNSPEC,
-	TCA_PRIO_MQ,
-	__TCA_PRIO_MAX
-};
-
-#define TCA_PRIO_MAX    (__TCA_PRIO_MAX - 1)
-
 /* TBF section */
 
 struct tc_tbf_qopt
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 39157f7bc04..536ca474dc6 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -24,11 +24,9 @@
 struct prio_sched_data
 {
 	int bands;
-	int curband; /* for round-robin */
 	struct tcf_proto *filter_list;
 	u8  prio2band[TC_PRIO_MAX+1];
 	struct Qdisc *queues[TCQ_PRIO_BANDS];
-	int mq;
 };
 
 
@@ -55,17 +53,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if (!q->filter_list || err < 0) {
 			if (TC_H_MAJ(band))
 				band = 0;
-			band = q->prio2band[band&TC_PRIO_MAX];
-			goto out;
+			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
 		}
 		band = res.classid;
 	}
 	band = TC_H_MIN(band) - 1;
 	if (band >= q->bands)
-		band = q->prio2band[0];
-out:
-	if (q->mq)
-		skb_set_queue_mapping(skb, band);
+		return q->queues[q->prio2band[0]];
+
 	return q->queues[band];
 }
 
@@ -123,68 +118,23 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 }
 
 
-static struct sk_buff *
-prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 {
-	struct sk_buff *skb;
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int prio;
-	struct Qdisc *qdisc;
 
 	for (prio = 0; prio < q->bands; prio++) {
-		/* Check if the target subqueue is available before
-		 * pulling an skb.  This way we avoid excessive requeues
-		 * for slower queues.
-		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch),
-					      (q->mq ? prio : 0))) {
-			qdisc = q->queues[prio];
-			skb = qdisc->dequeue(qdisc);
-			if (skb) {
-				sch->q.qlen--;
-				return skb;
-			}
+		struct Qdisc *qdisc = q->queues[prio];
+		struct sk_buff *skb = qdisc->dequeue(qdisc);
+		if (skb) {
+			sch->q.qlen--;
+			return skb;
 		}
 	}
 	return NULL;
 
 }
 
-static struct sk_buff *rr_dequeue(struct Qdisc* sch)
-{
-	struct sk_buff *skb;
-	struct prio_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *qdisc;
-	int bandcount;
-
-	/* Only take one pass through the queues.  If nothing is available,
-	 * return nothing.
-	 */
-	for (bandcount = 0; bandcount < q->bands; bandcount++) {
-		/* Check if the target subqueue is available before
-		 * pulling an skb.  This way we avoid excessive requeues
-		 * for slower queues.  If the queue is stopped, try the
-		 * next queue.
-		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch),
-					      (q->mq ? q->curband : 0))) {
-			qdisc = q->queues[q->curband];
-			skb = qdisc->dequeue(qdisc);
-			if (skb) {
-				sch->q.qlen--;
-				q->curband++;
-				if (q->curband >= q->bands)
-					q->curband = 0;
-				return skb;
-			}
-		}
-		q->curband++;
-		if (q->curband >= q->bands)
-			q->curband = 0;
-	}
-	return NULL;
-}
-
 static unsigned int prio_drop(struct Qdisc* sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
@@ -229,45 +179,22 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	struct tc_prio_qopt *qopt;
-	struct nlattr *tb[TCA_PRIO_MAX + 1];
-	int err;
 	int i;
 
-	err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
-				      sizeof(*qopt));
-	if (err < 0)
-		return err;
-
-	q->bands = qopt->bands;
-	/* If we're multiqueue, make sure the number of incoming bands
-	 * matches the number of queues on the device we're associating with.
-	 * If the number of bands requested is zero, then set q->bands to
-	 * dev->egress_subqueue_count.  Also, the root qdisc must be the
-	 * only one that is enabled for multiqueue, since it's the only one
-	 * that interacts with the underlying device.
-	 */
-	q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
-	if (q->mq) {
-		if (sch->parent != TC_H_ROOT)
-			return -EINVAL;
-		if (netif_is_multiqueue(qdisc_dev(sch))) {
-			if (q->bands == 0)
-				q->bands = qdisc_dev(sch)->egress_subqueue_count;
-			else if (q->bands != qdisc_dev(sch)->egress_subqueue_count)
-				return -EINVAL;
-		} else
-			return -EOPNOTSUPP;
-	}
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
 
-	if (q->bands > TCQ_PRIO_BANDS || q->bands < 2)
+	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
 		return -EINVAL;
 
 	for (i=0; i<=TC_PRIO_MAX; i++) {
-		if (qopt->priomap[i] >= q->bands)
+		if (qopt->priomap[i] >= qopt->bands)
 			return -EINVAL;
 	}
 
 	sch_tree_lock(sch);
+	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
 	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
@@ -333,10 +260,6 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if (q->mq) {
-		if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
-			goto nla_put_failure;
-	}
 	nla_nest_compat_end(skb, nest);
 
 	return skb->len;
@@ -509,44 +432,17 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
-static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
-	.next		=	NULL,
-	.cl_ops		=	&prio_class_ops,
-	.id		=	"rr",
-	.priv_size	=	sizeof(struct prio_sched_data),
-	.enqueue	=	prio_enqueue,
-	.dequeue	=	rr_dequeue,
-	.requeue	=	prio_requeue,
-	.drop		=	prio_drop,
-	.init		=	prio_init,
-	.reset		=	prio_reset,
-	.destroy	=	prio_destroy,
-	.change		=	prio_tune,
-	.dump		=	prio_dump,
-	.owner		=	THIS_MODULE,
-};
-
 static int __init prio_module_init(void)
 {
-	int err;
-
-	err = register_qdisc(&prio_qdisc_ops);
-	if (err < 0)
-		return err;
-	err = register_qdisc(&rr_qdisc_ops);
-	if (err < 0)
-		unregister_qdisc(&prio_qdisc_ops);
-	return err;
+	return register_qdisc(&prio_qdisc_ops);
 }
 
 static void __exit prio_module_exit(void)
 {
 	unregister_qdisc(&prio_qdisc_ops);
-	unregister_qdisc(&rr_qdisc_ops);
 }
 
 module_init(prio_module_init)
 module_exit(prio_module_exit)
 
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("sch_rr");
-- 
cgit v1.2.3-70-g09d2


From 24344d2600108b9b79a60c0e4c43b3c499856d14 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 02:53:04 -0700
Subject: mac80211: Temporarily mark QoS support BROKEN.

We will undo this after a few changsets.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index d2038418e2b..10579def490 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,6 +15,7 @@ config MAC80211_QOS
 	def_bool y
 	depends on MAC80211
 	depends on NET_SCHED
+	depends on BROKEN
 
 comment "QoS/HT support disabled"
 	depends on MAC80211 && !MAC80211_QOS
-- 
cgit v1.2.3-70-g09d2


From fd2ea0a79faad824258af5dcec1927aa24d81c16 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 01:56:23 -0700
Subject: net: Use queue aware tests throughout.

This effectively "flips the switch" by making the core networking
and multiqueue-aware drivers use the new TX multiqueue structures.

Non-multiqueue drivers need no changes.  The interfaces they use such
as netif_stop_queue() degenerate into an operation on TX queue zero.
So everything "just works" for them.

Code that really wants to do "X" to all TX queues now invokes a
routine that does so, such as netif_tx_wake_all_queues(),
netif_tx_stop_all_queues(), etc.

pktgen and netpoll required a little bit more surgery than the others.

In particular the pktgen changes, whilst functional, could be largely
improved.  The initial check in pktgen_xmit() will sometimes check the
wrong queue, which is mostly harmless.  The thing to do is probably to
invoke fill_packet() earlier.

The bulk of the netpoll changes is to make the code operate solely on
the TX queue indicated by by the SKB queue mapping.

Setting of the SKB queue mapping is entirely confined inside of
net/core/dev.c:dev_pick_tx().  If we end up needing any kind of
special semantics (drops, for example) it will be implemented here.

Finally, we now have a "real_num_tx_queues" which is where the driver
indicates how many TX queues are actually active.

With IGB changes from Jeff Kirsher.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cpmac.c               | 20 ++++------
 drivers/net/igb/igb_main.c        | 19 +++------
 drivers/net/ixgbe/ixgbe_ethtool.c | 10 +----
 drivers/net/ixgbe/ixgbe_main.c    | 15 +++----
 drivers/net/s2io.c                | 48 +++++++++--------------
 include/linux/netdevice.h         | 82 ++++++++++++++++++++++++++++++++++-----
 include/net/pkt_sched.h           |  4 +-
 net/core/dev.c                    | 28 ++++++-------
 net/core/netpoll.c                | 24 +++++++-----
 net/core/pktgen.c                 | 69 ++++++++++++++++++++------------
 net/sched/sch_generic.c           |  5 +--
 net/sched/sch_teql.c              |  6 +--
 12 files changed, 187 insertions(+), 143 deletions(-)

(limited to 'net')

diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 7c7b54e4828..fbd4280c102 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -544,7 +544,7 @@ fatal_error:
 
 	spin_unlock(&priv->rx_lock);
 	netif_rx_complete(priv->dev, napi);
-	netif_stop_queue(priv->dev);
+	netif_tx_stop_all_queues(priv->dev);
 	napi_disable(&priv->napi);
 
 	atomic_inc(&priv->reset_pending);
@@ -750,9 +750,7 @@ static void cpmac_hw_error(struct work_struct *work)
 	barrier();
 	atomic_dec(&priv->reset_pending);
 
-	for (i = 0; i < CPMAC_QUEUES; i++)
-		netif_wake_subqueue(priv->dev, i);
-	netif_wake_queue(priv->dev);
+	netif_tx_wake_all_queues(priv->dev);
 	cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3);
 }
 
@@ -781,7 +779,7 @@ static void cpmac_check_status(struct net_device *dev)
 				     dev->name, tx_code, tx_channel, macstatus);
 		}
 
-		netif_stop_queue(dev);
+		netif_tx_stop_all_queues(dev);
 		cpmac_hw_stop(dev);
 		if (schedule_work(&priv->reset_work))
 			atomic_inc(&priv->reset_pending);
@@ -842,9 +840,7 @@ static void cpmac_tx_timeout(struct net_device *dev)
 	barrier();
 	atomic_dec(&priv->reset_pending);
 
-	netif_wake_queue(priv->dev);
-	for (i = 0; i < CPMAC_QUEUES; i++)
-		netif_wake_subqueue(dev, i);
+	netif_tx_wake_all_queues(priv->dev);
 }
 
 static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -935,7 +931,7 @@ static void cpmac_adjust_link(struct net_device *dev)
 
 	spin_lock(&priv->lock);
 	if (priv->phy->link) {
-		netif_start_queue(dev);
+		netif_tx_start_all_queues(dev);
 		if (priv->phy->duplex != priv->oldduplex) {
 			new_state = 1;
 			priv->oldduplex = priv->phy->duplex;
@@ -949,10 +945,10 @@ static void cpmac_adjust_link(struct net_device *dev)
 		if (!priv->oldlink) {
 			new_state = 1;
 			priv->oldlink = 1;
-			netif_schedule(dev);
+			netif_tx_schedule_all(dev);
 		}
 	} else if (priv->oldlink) {
-		netif_stop_queue(dev);
+		netif_tx_stop_all_queues(dev);
 		new_state = 1;
 		priv->oldlink = 0;
 		priv->oldspeed = 0;
@@ -1072,7 +1068,7 @@ static int cpmac_stop(struct net_device *dev)
 	struct cpmac_priv *priv = netdev_priv(dev);
 	struct resource *mem;
 
-	netif_stop_queue(dev);
+	netif_tx_stop_all_queues(dev);
 
 	cancel_work_sync(&priv->reset_work);
 	napi_disable(&priv->napi);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 471c194cd54..81bba6983dd 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -533,7 +533,7 @@ msi_only:
 		adapter->flags |= IGB_FLAG_HAS_MSI;
 
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
+	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
 	return;
 }
 
@@ -821,9 +821,7 @@ void igb_down(struct igb_adapter *adapter)
 	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
 	/* flush and sleep below */
 
-	netif_stop_queue(netdev);
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		netif_stop_subqueue(netdev, i);
+	netif_tx_stop_all_queues(netdev);
 
 	/* disable transmits in the hardware */
 	tctl = rd32(E1000_TCTL);
@@ -1266,9 +1264,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 
 	/* tell the stack to leave us alone until igb_open() is called */
 	netif_carrier_off(netdev);
-	netif_stop_queue(netdev);
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		netif_stop_subqueue(netdev, i);
+	netif_tx_stop_all_queues(netdev);
 
 	strcpy(netdev->name, "eth%d");
 	err = register_netdev(netdev);
@@ -2315,7 +2311,6 @@ static void igb_watchdog_task(struct work_struct *work)
 	struct e1000_mac_info *mac = &adapter->hw.mac;
 	u32 link;
 	s32 ret_val;
-	int i;
 
 	if ((netif_carrier_ok(netdev)) &&
 	    (rd32(E1000_STATUS) & E1000_STATUS_LU))
@@ -2371,9 +2366,7 @@ static void igb_watchdog_task(struct work_struct *work)
 			}
 
 			netif_carrier_on(netdev);
-			netif_wake_queue(netdev);
-			for (i = 0; i < adapter->num_tx_queues; i++)
-				netif_wake_subqueue(netdev, i);
+			netif_tx_wake_all_queues(netdev);
 
 			if (!test_bit(__IGB_DOWN, &adapter->state))
 				mod_timer(&adapter->phy_info_timer,
@@ -2385,9 +2378,7 @@ static void igb_watchdog_task(struct work_struct *work)
 			adapter->link_duplex = 0;
 			dev_info(&adapter->pdev->dev, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
-			netif_stop_queue(netdev);
-			for (i = 0; i < adapter->num_tx_queues; i++)
-				netif_stop_subqueue(netdev, i);
+			netif_tx_stop_all_queues(netdev);
 			if (!test_bit(__IGB_DOWN, &adapter->state))
 				mod_timer(&adapter->phy_info_timer,
 					  round_jiffies(jiffies + 2 * HZ));
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 81b769093d2..3efe5dda10a 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -252,16 +252,10 @@ static int ixgbe_set_tso(struct net_device *netdev, u32 data)
 		netdev->features |= NETIF_F_TSO;
 		netdev->features |= NETIF_F_TSO6;
 	} else {
-		struct ixgbe_adapter *adapter = netdev_priv(netdev);
-		int i;
-		netif_stop_queue(netdev);
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
+		netif_tx_stop_all_queues(netdev);
 		netdev->features &= ~NETIF_F_TSO;
 		netdev->features &= ~NETIF_F_TSO6;
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-		netif_start_queue(netdev);
+		netif_tx_start_all_queues(netdev);
 	}
 	return 0;
 }
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e6df9233f5e..6af8fb5c4b5 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2013,7 +2013,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 	del_timer_sync(&adapter->watchdog_timer);
 
 	netif_carrier_off(netdev);
-	netif_stop_queue(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	if (!pci_channel_offline(adapter->pdev))
 		ixgbe_reset(adapter);
@@ -2359,7 +2359,7 @@ try_msi:
 
 out:
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
+	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
 
 	return err;
 }
@@ -2896,7 +2896,6 @@ static void ixgbe_watchdog(unsigned long data)
 	struct net_device *netdev = adapter->netdev;
 	bool link_up;
 	u32 link_speed = 0;
-	int i;
 
 	adapter->hw.mac.ops.check_link(&adapter->hw, &(link_speed), &link_up);
 
@@ -2917,9 +2916,7 @@ static void ixgbe_watchdog(unsigned long data)
 				 (FLOW_TX ? "TX" : "None"))));
 
 			netif_carrier_on(netdev);
-			netif_wake_queue(netdev);
-			for (i = 0; i < adapter->num_tx_queues; i++)
-				netif_wake_subqueue(netdev, i);
+			netif_tx_wake_all_queues(netdev);
 		} else {
 			/* Force detection of hung controller */
 			adapter->detect_tx_hung = true;
@@ -2928,7 +2925,7 @@ static void ixgbe_watchdog(unsigned long data)
 		if (netif_carrier_ok(netdev)) {
 			DPRINTK(LINK, INFO, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
-			netif_stop_queue(netdev);
+			netif_tx_stop_all_queues(netdev);
 		}
 	}
 
@@ -3631,9 +3628,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	ixgbe_start_hw(hw);
 
 	netif_carrier_off(netdev);
-	netif_stop_queue(netdev);
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		netif_stop_subqueue(netdev, i);
+	netif_tx_stop_all_queues(netdev);
 
 	ixgbe_napi_add_all(adapter);
 
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 5f0fcb04aff..9dae40ccf04 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -545,63 +545,53 @@ static struct pci_driver s2io_driver = {
 /* netqueue manipulation helper functions */
 static inline void s2io_stop_all_tx_queue(struct s2io_nic *sp)
 {
-	int i;
-	if (sp->config.multiq) {
-		for (i = 0; i < sp->config.tx_fifo_num; i++)
-			netif_stop_subqueue(sp->dev, i);
-	} else {
+	if (!sp->config.multiq) {
+		int i;
+
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_STOP;
-		netif_stop_queue(sp->dev);
 	}
+	netif_tx_stop_all_queues(sp->dev);
 }
 
 static inline void s2io_stop_tx_queue(struct s2io_nic *sp, int fifo_no)
 {
-	if (sp->config.multiq)
-		netif_stop_subqueue(sp->dev, fifo_no);
-	else {
+	if (!sp->config.multiq)
 		sp->mac_control.fifos[fifo_no].queue_state =
 			FIFO_QUEUE_STOP;
-		netif_stop_queue(sp->dev);
-	}
+
+	netif_tx_stop_all_queues(sp->dev);
 }
 
 static inline void s2io_start_all_tx_queue(struct s2io_nic *sp)
 {
-	int i;
-	if (sp->config.multiq) {
-		for (i = 0; i < sp->config.tx_fifo_num; i++)
-			netif_start_subqueue(sp->dev, i);
-	} else {
+	if (!sp->config.multiq) {
+		int i;
+
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
-		netif_start_queue(sp->dev);
 	}
+	netif_tx_start_all_queues(sp->dev);
 }
 
 static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
 {
-	if (sp->config.multiq)
-		netif_start_subqueue(sp->dev, fifo_no);
-	else {
+	if (!sp->config.multiq)
 		sp->mac_control.fifos[fifo_no].queue_state =
 			FIFO_QUEUE_START;
-		netif_start_queue(sp->dev);
-	}
+
+	netif_tx_start_all_queues(sp->dev);
 }
 
 static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp)
 {
-	int i;
-	if (sp->config.multiq) {
-		for (i = 0; i < sp->config.tx_fifo_num; i++)
-			netif_wake_subqueue(sp->dev, i);
-	} else {
+	if (!sp->config.multiq) {
+		int i;
+
 		for (i = 0; i < sp->config.tx_fifo_num; i++)
 			sp->mac_control.fifos[i].queue_state = FIFO_QUEUE_START;
-		netif_wake_queue(sp->dev);
 	}
+	netif_tx_wake_all_queues(sp->dev);
 }
 
 static inline void s2io_wake_tx_queue(
@@ -8691,5 +8681,5 @@ static void s2io_io_resume(struct pci_dev *pdev)
 	}
 
 	netif_device_attach(netdev);
-	netif_wake_queue(netdev);
+	netif_tx_wake_all_queues(netdev);
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c02227b9dd7..b5c1e7df64f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -642,7 +642,13 @@ struct net_device
 	struct netdev_queue	rx_queue;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
+
+	/* Number of TX queues allocated at alloc_netdev_mq() time  */
 	unsigned int		num_tx_queues;
+
+	/* Number of TX queues currently active in device  */
+	unsigned int		real_num_tx_queues;
+
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 /*
@@ -1000,6 +1006,14 @@ static inline void netif_schedule(struct net_device *dev)
 	netif_schedule_queue(netdev_get_tx_queue(dev, 0));
 }
 
+static inline void netif_tx_schedule_all(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		netif_schedule_queue(netdev_get_tx_queue(dev, i));
+}
+
 /**
  *	netif_start_queue - allow transmit
  *	@dev: network device
@@ -1016,6 +1030,16 @@ static inline void netif_start_queue(struct net_device *dev)
 	netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
 }
 
+static inline void netif_tx_start_all_queues(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		netif_tx_start_queue(txq);
+	}
+}
+
 /**
  *	netif_wake_queue - restart transmit
  *	@dev: network device
@@ -1040,6 +1064,16 @@ static inline void netif_wake_queue(struct net_device *dev)
 	netif_tx_wake_queue(netdev_get_tx_queue(dev, 0));
 }
 
+static inline void netif_tx_wake_all_queues(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		netif_tx_wake_queue(txq);
+	}
+}
+
 /**
  *	netif_stop_queue - stop transmitted packets
  *	@dev: network device
@@ -1057,6 +1091,16 @@ static inline void netif_stop_queue(struct net_device *dev)
 	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
 }
 
+static inline void netif_tx_stop_all_queues(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		netif_tx_stop_queue(txq);
+	}
+}
+
 /**
  *	netif_queue_stopped - test if transmit queue is flowblocked
  *	@dev: network device
@@ -1100,7 +1144,8 @@ static inline int netif_running(const struct net_device *dev)
  */
 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
 {
-	clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+	clear_bit(__QUEUE_STATE_XOFF, &txq->state);
 }
 
 /**
@@ -1112,11 +1157,12 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
  */
 static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 {
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap())
 		return;
 #endif
-	set_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+	set_bit(__QUEUE_STATE_XOFF, &txq->state);
 }
 
 /**
@@ -1129,8 +1175,8 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 static inline int __netif_subqueue_stopped(const struct net_device *dev,
 					 u16 queue_index)
 {
-	return test_bit(__QUEUE_STATE_XOFF,
-			&dev->egress_subqueue[queue_index].state);
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+	return test_bit(__QUEUE_STATE_XOFF, &txq->state);
 }
 
 static inline int netif_subqueue_stopped(const struct net_device *dev,
@@ -1148,13 +1194,13 @@ static inline int netif_subqueue_stopped(const struct net_device *dev,
  */
 static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 {
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap())
 		return;
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF,
-			       &dev->egress_subqueue[queue_index].state))
-		__netif_schedule(netdev_get_tx_queue(dev, 0));
+	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
+		__netif_schedule(txq);
 }
 
 /**
@@ -1198,7 +1244,8 @@ extern int		dev_set_mtu(struct net_device *, int);
 extern int		dev_set_mac_address(struct net_device *,
 					    struct sockaddr *);
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
-					    struct net_device *dev);
+					    struct net_device *dev,
+					    struct netdev_queue *txq);
 
 extern int		netdev_budget;
 
@@ -1447,6 +1494,12 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 	txq->xmit_lock_owner = cpu;
 }
 
+static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
+{
+	spin_lock_bh(&txq->_xmit_lock);
+	txq->xmit_lock_owner = smp_processor_id();
+}
+
 static inline void netif_tx_lock(struct net_device *dev)
 {
 	int cpu = smp_processor_id();
@@ -1483,6 +1536,12 @@ static inline void __netif_tx_unlock(struct netdev_queue *txq)
 	spin_unlock(&txq->_xmit_lock);
 }
 
+static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock_bh(&txq->_xmit_lock);
+}
+
 static inline void netif_tx_unlock(struct net_device *dev)
 {
 	unsigned int i;
@@ -1514,8 +1573,13 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 
 static inline void netif_tx_disable(struct net_device *dev)
 {
+	unsigned int i;
+
 	netif_tx_lock_bh(dev);
-	netif_stop_queue(dev);
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		netif_tx_stop_queue(txq);
+	}
 	netif_tx_unlock_bh(dev);
 }
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d58c1a5eb84..cb952781560 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -88,9 +88,7 @@ extern void __qdisc_run(struct netdev_queue *txq);
 
 static inline void qdisc_run(struct netdev_queue *txq)
 {
-	struct net_device *dev = txq->dev;
-
-	if (!netif_queue_stopped(dev) &&
+	if (!netif_tx_queue_stopped(txq) &&
 	    !test_and_set_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state))
 		__qdisc_run(txq);
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 69378f25069..f027a1ac4fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1598,7 +1598,8 @@ static int dev_gso_segment(struct sk_buff *skb)
 	return 0;
 }
 
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+			struct netdev_queue *txq)
 {
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
@@ -1627,9 +1628,7 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
-		if (unlikely((netif_queue_stopped(dev) ||
-			     netif_subqueue_stopped(dev, skb)) &&
-			     skb->next))
+		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
@@ -1669,7 +1668,10 @@ out_kfree_skb:
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
-	return netdev_get_tx_queue(dev, 0);
+	u16 queue_index = 0;
+
+	skb_set_queue_mapping(skb, queue_index);
+	return netdev_get_tx_queue(dev, queue_index);
 }
 
 int dev_queue_xmit(struct sk_buff *skb)
@@ -1737,8 +1739,6 @@ gso:
 		spin_lock(&txq->lock);
 		q = txq->qdisc;
 		if (q->enqueue) {
-			/* reset queue_mapping to zero */
-			skb_set_queue_mapping(skb, 0);
 			rc = q->enqueue(skb, q);
 			qdisc_run(txq);
 			spin_unlock(&txq->lock);
@@ -1768,10 +1768,9 @@ gso:
 
 			HARD_TX_LOCK(dev, txq, cpu);
 
-			if (!netif_queue_stopped(dev) &&
-			    !netif_subqueue_stopped(dev, skb)) {
+			if (!netif_tx_queue_stopped(txq)) {
 				rc = 0;
-				if (!dev_hard_start_xmit(skb, dev)) {
+				if (!dev_hard_start_xmit(skb, dev, txq)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
@@ -4160,8 +4159,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
-	alloc_size = sizeof(struct net_device) +
-		     sizeof(struct net_device_subqueue) * (queue_count - 1);
+	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
 		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4191,16 +4189,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->_tx = tx;
 	dev->num_tx_queues = queue_count;
+	dev->real_num_tx_queues = queue_count;
 
 	if (sizeof_priv) {
 		dev->priv = ((char *)dev +
-			     ((sizeof(struct net_device) +
-			       (sizeof(struct net_device_subqueue) *
-				(queue_count - 1)) + NETDEV_ALIGN_CONST)
+			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
 			      & ~NETDEV_ALIGN_CONST));
 	}
 
-	dev->egress_subqueue_count = queue_count;
 	dev->gso_max_size = GSO_MAX_SIZE;
 
 	netdev_init_queues(dev);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8fb134da034..c12720895ec 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work)
 
 	while ((skb = skb_dequeue(&npinfo->txq))) {
 		struct net_device *dev = skb->dev;
+		struct netdev_queue *txq;
 
 		if (!netif_device_present(dev) || !netif_running(dev)) {
 			__kfree_skb(skb);
 			continue;
 		}
 
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
 		local_irq_save(flags);
-		netif_tx_lock(dev);
-		if ((netif_queue_stopped(dev) ||
-		     netif_subqueue_stopped(dev, skb)) ||
-		     dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+		__netif_tx_lock(txq, smp_processor_id());
+		if (netif_tx_queue_stopped(txq) ||
+		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
-			netif_tx_unlock(dev);
+			__netif_tx_unlock(txq);
 			local_irq_restore(flags);
 
 			schedule_delayed_work(&npinfo->tx_work, HZ/10);
 			return;
 		}
-		netif_tx_unlock(dev);
+		__netif_tx_unlock(txq);
 		local_irq_restore(flags);
 	}
 }
@@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 	/* don't get messages out of order, and no recursion */
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
+		struct netdev_queue *txq;
 		unsigned long flags;
 
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
 		local_irq_save(flags);
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
-			if (netif_tx_trylock(dev)) {
-				if (!netif_queue_stopped(dev) &&
-				    !netif_subqueue_stopped(dev, skb))
+			if (__netif_tx_trylock(txq)) {
+				if (!netif_tx_queue_stopped(txq))
 					status = dev->hard_start_xmit(skb, dev);
-				netif_tx_unlock(dev);
+				__netif_tx_unlock(txq);
 
 				if (status == NETDEV_TX_OK)
 					break;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdf537707e5..906802db4ed 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 	}
 }
 #endif
+static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
+{
+	if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+		__u16 t;
+		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+			t = random32() %
+				(pkt_dev->queue_map_max -
+				 pkt_dev->queue_map_min + 1)
+				+ pkt_dev->queue_map_min;
+		} else {
+			t = pkt_dev->cur_queue_map + 1;
+			if (t > pkt_dev->queue_map_max)
+				t = pkt_dev->queue_map_min;
+		}
+		pkt_dev->cur_queue_map = t;
+	}
+}
+
 /* Increment/randomize headers according to flags and current values
  * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
  */
@@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		pkt_dev->cur_pkt_size = t;
 	}
 
-	if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
-		__u16 t;
-		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
-			t = random32() %
-				(pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
-				+ pkt_dev->queue_map_min;
-		} else {
-			t = pkt_dev->cur_queue_map + 1;
-			if (t > pkt_dev->queue_map_max)
-				t = pkt_dev->queue_map_min;
-		}
-		pkt_dev->cur_queue_map = t;
-	}
+	set_cur_queue_map(pkt_dev);
 
 	pkt_dev->flows[flow].count++;
 }
@@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
 	__be16 *svlan_tci = NULL;                /* Encapsulates priority and SVLAN ID */
 	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
-
+	u16 queue_map;
 
 	if (pkt_dev->nr_labels)
 		protocol = htons(ETH_P_MPLS_UC);
@@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
+	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
-	skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+	skb_set_queue_mapping(skb, queue_map);
 	iph = ip_hdr(skb);
 	udph = udp_hdr(skb);
 
@@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
 	__be16 *svlan_tci = NULL;                /* Encapsulates priority and SVLAN ID */
 	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+	u16 queue_map;
 
 	if (pkt_dev->nr_labels)
 		protocol = htons(ETH_P_MPLS_UC);
@@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
+	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
 
 	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
@@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
-	skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+	skb_set_queue_mapping(skb, queue_map);
 	iph = ipv6_hdr(skb);
 	udph = udp_hdr(skb);
 
@@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
 static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = NULL;
+	struct netdev_queue *txq;
 	__u64 idle_start = 0;
+	u16 queue_map;
 	int ret;
 
 	odev = pkt_dev->odev;
@@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		}
 	}
 
-	if ((netif_queue_stopped(odev) ||
-	     (pkt_dev->skb &&
-	      netif_subqueue_stopped(odev, pkt_dev->skb))) ||
+	if (!pkt_dev->skb) {
+		set_cur_queue_map(pkt_dev);
+		queue_map = pkt_dev->cur_queue_map;
+	} else {
+		queue_map = skb_get_queue_mapping(pkt_dev->skb);
+	}
+
+	txq = netdev_get_tx_queue(odev, queue_map);
+	if (netif_tx_queue_stopped(txq) ||
 	    need_resched()) {
 		idle_start = getCurUs();
 
@@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->idle_acc += getCurUs() - idle_start;
 
-		if (netif_queue_stopped(odev) ||
-		    netif_subqueue_stopped(odev, pkt_dev->skb)) {
+		if (netif_tx_queue_stopped(txq)) {
 			pkt_dev->next_tx_us = getCurUs();	/* TODO */
 			pkt_dev->next_tx_ns = 0;
 			goto out;	/* Try the next interface */
@@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		}
 	}
 
-	netif_tx_lock_bh(odev);
-	if (!netif_queue_stopped(odev) &&
-	    !netif_subqueue_stopped(odev, pkt_dev->skb)) {
+	/* fill_packet() might have changed the queue */
+	queue_map = skb_get_queue_mapping(pkt_dev->skb);
+	txq = netdev_get_tx_queue(odev, queue_map);
+
+	__netif_tx_lock_bh(txq);
+	if (!netif_tx_queue_stopped(txq)) {
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
@@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		pkt_dev->next_tx_ns = 0;
 	}
 
-	netif_tx_unlock_bh(odev);
+	__netif_tx_unlock_bh(txq);
 
 	/* If pkt_dev->count is zero, then run forever */
 	if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4e2b865cbba..2f575b9017d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -166,7 +166,7 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
-		ret = dev_hard_start_xmit(skb, dev);
+		ret = dev_hard_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
 	spin_lock(&txq->lock);
@@ -198,11 +198,10 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 
 void __qdisc_run(struct netdev_queue *txq)
 {
-	struct net_device *dev = txq->dev;
 	unsigned long start_time = jiffies;
 
 	while (qdisc_restart(txq)) {
-		if (netif_queue_stopped(dev))
+		if (netif_tx_queue_stopped(txq))
 			break;
 
 		/*
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 44a2c3451f4..ade3372221c 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -295,8 +295,7 @@ restart:
 		slave_txq = netdev_get_tx_queue(slave, 0);
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
-		if (netif_queue_stopped(slave) ||
-		    __netif_subqueue_stopped(slave, subq) ||
+		if (__netif_subqueue_stopped(slave, subq) ||
 		    !netif_running(slave)) {
 			busy = 1;
 			continue;
@@ -305,8 +304,7 @@ restart:
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
 			if (netif_tx_trylock(slave)) {
-				if (!netif_queue_stopped(slave) &&
-				    !__netif_subqueue_stopped(slave, subq) &&
+				if (!__netif_subqueue_stopped(slave, subq) &&
 				    slave->hard_start_xmit(skb, slave) == 0) {
 					netif_tx_unlock(slave);
 					master->slaves = NEXT_SLAVE(q);
-- 
cgit v1.2.3-70-g09d2


From eae792b722fef08dcf3aee88266ee7def9710757 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 03:03:33 -0700
Subject: netdev: Add netdev->select_queue() method.

Devices or device layers can set this to control the queue selection
performed by dev_pick_tx().

This function runs under RCU protection, which allows overriding
functions to have some way of synchronizing with things like dynamic
->real_num_tx_queues adjustments.

This makes the spinlock prefetch in dev_queue_xmit() a little bit
less effective, but that's the price right now for correctness.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 9 ++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fdac1159253..9464e645296 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -724,6 +724,9 @@ struct net_device
 	void                    (*poll_controller)(struct net_device *dev);
 #endif
 
+	u16			(*select_queue)(struct net_device *dev,
+						struct sk_buff *skb);
+
 #ifdef CONFIG_NET_NS
 	/* Network namespace this network device is inside */
 	struct net		*nd_net;
diff --git a/net/core/dev.c b/net/core/dev.c
index f027a1ac4fb..7ca9564d2f4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1670,6 +1670,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 {
 	u16 queue_index = 0;
 
+	if (dev->select_queue)
+		queue_index = dev->select_queue(dev, skb);
+
 	skb_set_queue_mapping(skb, queue_index);
 	return netdev_get_tx_queue(dev, queue_index);
 }
@@ -1710,14 +1713,14 @@ int dev_queue_xmit(struct sk_buff *skb)
 	}
 
 gso:
-	txq = dev_pick_tx(dev, skb);
-	spin_lock_prefetch(&txq->lock);
-
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 
+	txq = dev_pick_tx(dev, skb);
+	spin_lock_prefetch(&txq->lock);
+
 	/* Updates of qdisc are serialized by queue->lock.
 	 * The struct Qdisc which is pointed to by qdisc is now a
 	 * rcu structure - it may be accessed without acquiring
-- 
cgit v1.2.3-70-g09d2


From 51cb6db0f5654f08a4a6bfa3888dc36a51c2df3e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 03:34:57 -0700
Subject: mac80211: Reimplement WME using ->select_queue().

The only behavior change is that we do not drop packets under any
circumstances.  If that is absolutely needed, we could easily add it
back.

With cleanups and help from Johannes Berg.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mac80211.h     |   8 -
 net/mac80211/Kconfig       |  11 -
 net/mac80211/Makefile      |   2 +-
 net/mac80211/ieee80211_i.h |  15 +-
 net/mac80211/main.c        |  61 ++---
 net/mac80211/util.c        |  14 +-
 net/mac80211/wme.c         | 602 ++++++++-------------------------------------
 net/mac80211/wme.h         |  35 +--
 8 files changed, 124 insertions(+), 624 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 24a69f6075c..4dd3d93e196 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -847,20 +847,12 @@ static inline void SET_IEEE80211_PERM_ADDR(struct ieee80211_hw *hw, u8 *addr)
 
 static inline int ieee80211_num_regular_queues(struct ieee80211_hw *hw)
 {
-#ifdef CONFIG_MAC80211_QOS
 	return hw->queues;
-#else
-	return 1;
-#endif
 }
 
 static inline int ieee80211_num_queues(struct ieee80211_hw *hw)
 {
-#ifdef CONFIG_MAC80211_QOS
 	return hw->queues + hw->ampdu_queues;
-#else
-	return 1;
-#endif
 }
 
 static inline struct ieee80211_rate *
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 10579def490..80d693392b0 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -11,17 +11,6 @@ config MAC80211
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
 
-config MAC80211_QOS
-	def_bool y
-	depends on MAC80211
-	depends on NET_SCHED
-	depends on BROKEN
-
-comment "QoS/HT support disabled"
-	depends on MAC80211 && !MAC80211_QOS
-comment "QoS/HT support needs CONFIG_NET_SCHED"
-	depends on MAC80211 && !NET_SCHED
-
 menu "Rate control algorithm selection"
 	depends on MAC80211 != n
 
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index fa47438e338..a169b0201d6 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -18,10 +18,10 @@ mac80211-y := \
 	tx.o \
 	key.o \
 	util.o \
+	wme.o \
 	event.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
-mac80211-$(CONFIG_MAC80211_QOS) += wme.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
 	debugfs.o \
 	debugfs_sta.o \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cbea0154ee3..a4f9a832722 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -25,6 +25,7 @@
 #include <linux/etherdevice.h>
 #include <net/wireless.h>
 #include <net/iw_handler.h>
+#include <net/mac80211.h>
 #include "key.h"
 #include "sta_info.h"
 
@@ -537,6 +538,9 @@ enum {
 	IEEE80211_ADDBA_MSG	= 4,
 };
 
+/* maximum number of hardware queues we support. */
+#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
+
 struct ieee80211_local {
 	/* embed the driver visible part.
 	 * don't cast (use the static inlines below), but we keep
@@ -545,6 +549,8 @@ struct ieee80211_local {
 
 	const struct ieee80211_ops *ops;
 
+	unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)];
+
 	struct net_device *mdev; /* wmaster# - "master" 802.11 device */
 	int open_count;
 	int monitors, cooked_mntrs;
@@ -740,15 +746,6 @@ struct ieee80211_local {
 #endif
 };
 
-static inline int ieee80211_is_multiqueue(struct ieee80211_local *local)
-{
-#ifdef CONFIG_MAC80211_QOS
-	return netif_is_multiqueue(local->mdev);
-#else
-	return 0;
-#endif
-}
-
 static inline struct ieee80211_sub_if_data *
 IEEE80211_DEV_TO_SUB_IF(struct net_device *dev)
 {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c74607eda1e..f1a83d450ea 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -114,7 +114,7 @@ static int ieee80211_master_open(struct net_device *dev)
 	if (res)
 		return res;
 
-	netif_start_queue(local->mdev);
+	netif_tx_start_all_queues(local->mdev);
 
 	return 0;
 }
@@ -375,7 +375,7 @@ static int ieee80211_open(struct net_device *dev)
 		queue_work(local->hw.workqueue, &ifsta->work);
 	}
 
-	netif_start_queue(dev);
+	netif_tx_start_all_queues(dev);
 
 	return 0;
  err_del_interface:
@@ -400,7 +400,7 @@ static int ieee80211_stop(struct net_device *dev)
 	/*
 	 * Stop TX on this interface first.
 	 */
-	netif_stop_queue(dev);
+	netif_tx_stop_all_queues(dev);
 
 	/*
 	 * Now delete all active aggregation sessions.
@@ -554,7 +554,6 @@ static int ieee80211_stop(struct net_device *dev)
 int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct netdev_queue *txq;
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata;
 	u16 start_seq_num = 0;
@@ -619,11 +618,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 			(unsigned long)&sta->timer_to_tid[tid];
 	init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
 
-	/* ensure that TX flow won't interrupt us
-	 * until the end of the call to requeue function */
-	txq = netdev_get_tx_queue(local->mdev, 0);
-	spin_lock_bh(&txq->lock);
-
 	/* create a new queue for this aggregation */
 	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
 
@@ -650,7 +644,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 		/* No need to requeue the packets in the agg queue, since we
 		 * held the tx lock: no packet could be enqueued to the newly
 		 * allocated queue */
-		 ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
+		ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
 					" tid %d\n", tid);
@@ -661,7 +655,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	/* Will put all the packets in the new SW queue */
 	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
-	spin_unlock_bh(&txq->lock);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -687,7 +680,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 err_unlock_queue:
 	kfree(sta->ampdu_mlme.tid_tx[tid]);
 	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&txq->lock);
 	ret = -EBUSY;
 err_unlock_sta:
 	spin_unlock_bh(&sta->lock);
@@ -812,7 +804,6 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
 void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct netdev_queue *txq;
 	struct sta_info *sta;
 	u8 *state;
 	int agg_queue;
@@ -844,8 +835,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	state = &sta->ampdu_mlme.tid_state_tx[tid];
 
 	/* NOTE: no need to use sta->lock in this state check, as
-	 * ieee80211_stop_tx_ba_session will let only
-	 * one stop call to pass through per sta/tid */
+	 * ieee80211_stop_tx_ba_session will let only one stop call to
+	 * pass through per sta/tid
+	 */
 	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
@@ -860,19 +852,14 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 
 	agg_queue = sta->tid_to_tx_q[tid];
 
-	/* avoid ordering issues: we are the only one that can modify
-	 * the content of the qdiscs */
-	txq = netdev_get_tx_queue(local->mdev, 0);
-	spin_lock_bh(&txq->lock);
-	/* remove the queue for this aggregation */
 	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-	spin_unlock_bh(&txq->lock);
 
-	/* we just requeued the all the frames that were in the removed
-	 * queue, and since we might miss a softirq we do netif_schedule_queue.
-	 * ieee80211_wake_queue is not used here as this queue is not
-	 * necessarily stopped */
-	netif_schedule_queue(txq);
+	/* We just requeued the all the frames that were in the
+	 * removed queue, and since we might miss a softirq we do
+	 * netif_schedule_queue.  ieee80211_wake_queue is not used
+	 * here as this queue is not necessarily stopped
+	 */
+	netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue));
 	spin_lock_bh(&sta->lock);
 	*state = HT_AGG_STATE_IDLE;
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
@@ -1660,17 +1647,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	 * We use the number of queues for feature tests (QoS, HT) internally
 	 * so restrict them appropriately.
 	 */
-#ifdef CONFIG_MAC80211_QOS
 	if (hw->queues > IEEE80211_MAX_QUEUES)
 		hw->queues = IEEE80211_MAX_QUEUES;
 	if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES)
 		hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES;
 	if (hw->queues < 4)
 		hw->ampdu_queues = 0;
-#else
-	hw->queues = 1;
-	hw->ampdu_queues = 0;
-#endif
 
 	mdev = alloc_netdev_mq(sizeof(struct wireless_dev),
 			       "wmaster%d", ether_setup,
@@ -1754,7 +1736,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_wep;
 	}
 
-	ieee80211_install_qdisc(local->mdev);
+	local->mdev->select_queue = ieee80211_select_queue;
 
 	/* add one default STA interface */
 	result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -1852,23 +1834,11 @@ static int __init ieee80211_init(void)
 
 	ret = rc80211_pid_init();
 	if (ret)
-		goto out;
-
-	ret = ieee80211_wme_register();
-	if (ret) {
-		printk(KERN_DEBUG "ieee80211_init: failed to "
-		       "initialize WME (err=%d)\n", ret);
-		goto out_cleanup_pid;
-	}
+		return ret;
 
 	ieee80211_debugfs_netdev_init();
 
 	return 0;
-
- out_cleanup_pid:
-	rc80211_pid_exit();
- out:
-	return ret;
 }
 
 static void __exit ieee80211_exit(void)
@@ -1884,7 +1854,6 @@ static void __exit ieee80211_exit(void)
 	if (mesh_allocated)
 		ieee80211s_stop();
 
-	ieee80211_wme_unregister();
 	ieee80211_debugfs_netdev_exit();
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 89ce4e07bd8..19f85e1b369 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -363,12 +363,7 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue)
 	if (test_bit(queue, local->queues_pending)) {
 		tasklet_schedule(&local->tx_pending_tasklet);
 	} else {
-		if (ieee80211_is_multiqueue(local)) {
-			netif_wake_subqueue(local->mdev, queue);
-		} else {
-			WARN_ON(queue != 0);
-			netif_wake_queue(local->mdev);
-		}
+		netif_wake_subqueue(local->mdev, queue);
 	}
 }
 EXPORT_SYMBOL(ieee80211_wake_queue);
@@ -377,12 +372,7 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (ieee80211_is_multiqueue(local)) {
-		netif_stop_subqueue(local->mdev, queue);
-	} else {
-		WARN_ON(queue != 0);
-		netif_stop_queue(local->mdev);
-	}
+	netif_stop_subqueue(local->mdev, queue);
 }
 EXPORT_SYMBOL(ieee80211_stop_queue);
 
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index f014cd38c2d..b21cfec4b6c 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -18,67 +18,42 @@
 #include "ieee80211_i.h"
 #include "wme.h"
 
-/* maximum number of hardware queues we support. */
-#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
-/* current number of hardware queues we support. */
-#define QD_NUM(hw) ((hw)->queues + (hw)->ampdu_queues)
-
-/*
- * Default mapping in classifier to work with default
+/* Default mapping in classifier to work with default
  * queue setup.
  */
 const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 };
 
-struct ieee80211_sched_data
-{
-	unsigned long qdisc_pool[BITS_TO_LONGS(QD_MAX_QUEUES)];
-	struct tcf_proto *filter_list;
-	struct Qdisc *queues[QD_MAX_QUEUES];
-	struct sk_buff_head requeued[QD_MAX_QUEUES];
-};
-
 static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
 
-/* given a data frame determine the 802.1p/1d tag to use */
-static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
+/* Given a data frame determine the 802.1p/1d tag to use.  */
+static unsigned int classify_1d(struct sk_buff *skb)
 {
-	struct iphdr *ip;
-	int dscp;
-	int offset;
-
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct tcf_result res = { -1, 0 };
-
-	/* if there is a user set filter list, call out to that */
-	if (q->filter_list) {
-		tc_classify(skb, q->filter_list, &res);
-		if (res.class != -1)
-			return res.class;
-	}
+	unsigned int dscp;
 
 	/* skb->priority values from 256->263 are magic values to
-	 * directly indicate a specific 802.1d priority.
-	 * This is used to allow 802.1d priority to be passed directly in
-	 * from VLAN tags, etc. */
+	 * directly indicate a specific 802.1d priority.  This is used
+	 * to allow 802.1d priority to be passed directly in from VLAN
+	 * tags, etc.
+	 */
 	if (skb->priority >= 256 && skb->priority <= 263)
 		return skb->priority - 256;
 
-	/* check there is a valid IP header present */
-	offset = ieee80211_get_hdrlen_from_skb(skb);
-	if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) ||
-	    memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr)))
-		return 0;
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		dscp = ip_hdr(skb)->tos & 0xfc;
+		break;
 
-	ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr));
+	default:
+		return 0;
+	}
 
-	dscp = ip->tos & 0xfc;
 	if (dscp & 0x1c)
 		return 0;
 	return dscp >> 5;
 }
 
 
-static inline int wme_downgrade_ac(struct sk_buff *skb)
+static int wme_downgrade_ac(struct sk_buff *skb)
 {
 	switch (skb->priority) {
 	case 6:
@@ -99,11 +74,10 @@ static inline int wme_downgrade_ac(struct sk_buff *skb)
 }
 
 
-/* positive return value indicates which queue to use
- * negative return value indicates to drop the frame */
-static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
+/* Indicate which queue to use.  */
+static u16 classify80211(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	if (!ieee80211_is_data(hdr->frame_control)) {
@@ -123,13 +97,15 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 
 	/* use the data classifier to determine what 802.1d tag the
 	 * data frame has */
-	skb->priority = classify_1d(skb, qd);
+	skb->priority = classify_1d(skb);
 
 	/* in case we are a client verify acm is not set for this ac */
 	while (unlikely(local->wmm_acm & BIT(skb->priority))) {
 		if (wme_downgrade_ac(skb)) {
-			/* No AC with lower priority has acm=0, drop packet. */
-			return -1;
+			/* The old code would drop the packet in this
+			 * case.
+			 */
+			return 0;
 		}
 	}
 
@@ -137,28 +113,29 @@ static int classify80211(struct sk_buff *skb, struct Qdisc *qd)
 	return ieee802_1d_to_ac[skb->priority];
 }
 
-
-static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
+u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 {
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct Qdisc *qdisc;
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct sta_info *sta;
-	int err, queue;
+	u16 queue;
 	u8 tid;
 
+	queue = classify80211(skb, dev);
+	if (unlikely(queue >= local->hw.queues))
+		queue = local->hw.queues - 1;
+
 	if (info->flags & IEEE80211_TX_CTL_REQUEUE) {
-		queue = skb_get_queue_mapping(skb);
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr->addr1);
 		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 		if (sta) {
+			struct ieee80211_hw *hw = &local->hw;
 			int ampdu_queue = sta->tid_to_tx_q[tid];
-			if ((ampdu_queue < QD_NUM(hw)) &&
-			    test_bit(ampdu_queue, q->qdisc_pool)) {
+
+			if ((ampdu_queue < ieee80211_num_queues(hw)) &&
+			    test_bit(ampdu_queue, local->queue_pool)) {
 				queue = ampdu_queue;
 				info->flags |= IEEE80211_TX_CTL_AMPDU;
 			} else {
@@ -166,17 +143,12 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 			}
 		}
 		rcu_read_unlock();
-		skb_queue_tail(&q->requeued[queue], skb);
-		qd->q.qlen++;
-		return 0;
-	}
-
-	queue = classify80211(skb, qd);
 
-	if (unlikely(queue >= local->hw.queues))
-		queue = local->hw.queues - 1;
+		return queue;
+	}
 
-	/* now we know the 1d priority, fill in the QoS header if there is one
+	/* Now we know the 1d priority, fill in the QoS header if
+	 * there is one.
 	 */
 	if (ieee80211_is_data_qos(hdr->frame_control)) {
 		u8 *p = ieee80211_get_qos_ctl(hdr);
@@ -194,8 +166,10 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
 			int ampdu_queue = sta->tid_to_tx_q[tid];
-			if ((ampdu_queue < QD_NUM(hw)) &&
-			    test_bit(ampdu_queue, q->qdisc_pool)) {
+			struct ieee80211_hw *hw = &local->hw;
+
+			if ((ampdu_queue < ieee80211_num_queues(hw)) &&
+			    test_bit(ampdu_queue, local->queue_pool)) {
 				queue = ampdu_queue;
 				info->flags |= IEEE80211_TX_CTL_AMPDU;
 			} else {
@@ -206,421 +180,13 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd)
 		rcu_read_unlock();
 	}
 
-	if (unlikely(queue < 0)) {
-			kfree_skb(skb);
-			err = NET_XMIT_DROP;
-	} else {
-		skb_set_queue_mapping(skb, queue);
-		qdisc = q->queues[queue];
-		err = qdisc->enqueue(skb, qdisc);
-		if (err == NET_XMIT_SUCCESS) {
-			qd->q.qlen++;
-			qd->bstats.bytes += skb->len;
-			qd->bstats.packets++;
-			return NET_XMIT_SUCCESS;
-		}
-	}
-	qd->qstats.drops++;
-	return err;
-}
-
-
-/* TODO: clean up the cases where master_hard_start_xmit
- * returns non 0 - it shouldn't ever do that. Once done we
- * can remove this function */
-static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct Qdisc *qdisc;
-	int err;
-
-	/* we recorded which queue to use earlier! */
-	qdisc = q->queues[skb_get_queue_mapping(skb)];
-
-	if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) {
-		qd->q.qlen++;
-		return 0;
-	}
-	qd->qstats.drops++;
-	return err;
-}
-
-
-static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct net_device *dev = qdisc_dev(qd);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	struct sk_buff *skb;
-	struct Qdisc *qdisc;
-	int queue;
-
-	/* check all the h/w queues in numeric/priority order */
-	for (queue = 0; queue < QD_NUM(hw); queue++) {
-		/* see if there is room in this hardware queue */
-		if (__netif_subqueue_stopped(local->mdev, queue) ||
-		    !test_bit(queue, q->qdisc_pool))
-			continue;
-
-		/* there is space - try and get a frame */
-		skb = skb_dequeue(&q->requeued[queue]);
-		if (skb) {
-			qd->q.qlen--;
-			return skb;
-		}
-
-		qdisc = q->queues[queue];
-		skb = qdisc->dequeue(qdisc);
-		if (skb) {
-			qd->q.qlen--;
-			return skb;
-		}
-	}
-	/* returning a NULL here when all the h/w queues are full means we
-	 * never need to call netif_stop_queue in the driver */
-	return NULL;
-}
-
-
-static void wme_qdiscop_reset(struct Qdisc* qd)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	int queue;
-
-	/* QUESTION: should we have some hardware flush functionality here? */
-
-	for (queue = 0; queue < QD_NUM(hw); queue++) {
-		skb_queue_purge(&q->requeued[queue]);
-		qdisc_reset(q->queues[queue]);
-	}
-	qd->q.qlen = 0;
-}
-
-
-static void wme_qdiscop_destroy(struct Qdisc* qd)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	int queue;
-
-	tcf_destroy_chain(&q->filter_list);
-
-	for (queue = 0; queue < QD_NUM(hw); queue++) {
-		skb_queue_purge(&q->requeued[queue]);
-		qdisc_destroy(q->queues[queue]);
-		q->queues[queue] = &noop_qdisc;
-	}
-}
-
-
-/* called whenever parameters are updated on existing qdisc */
-static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
-{
-	return 0;
-}
-
-
-/* called during initial creation of qdisc on device */
-static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct net_device *dev = qdisc_dev(qd);
-	struct ieee80211_local *local;
-	struct ieee80211_hw *hw;
-	int err = 0, i;
-
-	/* check that device is a mac80211 device */
-	if (!dev->ieee80211_ptr ||
-	    dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
-		return -EINVAL;
-
-	local = wdev_priv(dev->ieee80211_ptr);
-	hw = &local->hw;
-
-	/* only allow on master dev */
-	if (dev != local->mdev)
-		return -EINVAL;
-
-	/* ensure that we are root qdisc */
-	if (qd->parent != TC_H_ROOT)
-		return -EINVAL;
-
-	if (qd->flags & TCQ_F_INGRESS)
-		return -EINVAL;
-
-	/* if options were passed in, set them */
-	if (opt)
-		err = wme_qdiscop_tune(qd, opt);
-
-	/* create child queues */
-	for (i = 0; i < QD_NUM(hw); i++) {
-		skb_queue_head_init(&q->requeued[i]);
-		q->queues[i] = qdisc_create_dflt(qdisc_dev(qd), qd->dev_queue,
-						 &pfifo_qdisc_ops,
-						 qd->handle);
-		if (!q->queues[i]) {
-			q->queues[i] = &noop_qdisc;
-			printk(KERN_ERR "%s child qdisc %i creation failed\n",
-			       dev->name, i);
-		}
-	}
-
-	/* non-aggregation queues: reserve/mark as used */
-	for (i = 0; i < local->hw.queues; i++)
-		set_bit(i, q->qdisc_pool);
-
-	return err;
-}
-
-static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
-{
-	return -1;
-}
-
-
-static int wme_classop_graft(struct Qdisc *qd, unsigned long arg,
-			     struct Qdisc *new, struct Qdisc **old)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	unsigned long queue = arg - 1;
-
-	if (queue >= QD_NUM(hw))
-		return -EINVAL;
-
-	if (!new)
-		new = &noop_qdisc;
-
-	sch_tree_lock(qd);
-	*old = q->queues[queue];
-	q->queues[queue] = new;
-	qdisc_reset(*old);
-	sch_tree_unlock(qd);
-
-	return 0;
-}
-
-
-static struct Qdisc *
-wme_classop_leaf(struct Qdisc *qd, unsigned long arg)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	unsigned long queue = arg - 1;
-
-	if (queue >= QD_NUM(hw))
-		return NULL;
-
-	return q->queues[queue];
-}
-
-
-static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid)
-{
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	unsigned long queue = TC_H_MIN(classid);
-
-	if (queue - 1 >= QD_NUM(hw))
-		return 0;
-
 	return queue;
 }
 
-
-static unsigned long wme_classop_bind(struct Qdisc *qd, unsigned long parent,
-				      u32 classid)
-{
-	return wme_classop_get(qd, classid);
-}
-
-
-static void wme_classop_put(struct Qdisc *q, unsigned long cl)
-{
-}
-
-
-static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
-			      struct nlattr **tca, unsigned long *arg)
-{
-	unsigned long cl = *arg;
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-
-	if (cl - 1 > QD_NUM(hw))
-		return -ENOENT;
-
-	/* TODO: put code to program hardware queue parameters here,
-	 * to allow programming from tc command line */
-
-	return 0;
-}
-
-
-/* we don't support deleting hardware queues
- * when we add WMM-SA support - TSPECs may be deleted here */
-static int wme_classop_delete(struct Qdisc *qd, unsigned long cl)
-{
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-
-	if (cl - 1 > QD_NUM(hw))
-		return -ENOENT;
-	return 0;
-}
-
-
-static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl,
-				  struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-
-	if (cl - 1 > QD_NUM(hw))
-		return -ENOENT;
-	tcm->tcm_handle = TC_H_MIN(cl);
-	tcm->tcm_parent = qd->handle;
-	tcm->tcm_info = q->queues[cl-1]->handle; /* do we need this? */
-	return 0;
-}
-
-
-static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg)
-{
-	struct ieee80211_local *local = wdev_priv(qdisc_dev(qd)->ieee80211_ptr);
-	struct ieee80211_hw *hw = &local->hw;
-	int queue;
-
-	if (arg->stop)
-		return;
-
-	for (queue = 0; queue < QD_NUM(hw); queue++) {
-		if (arg->count < arg->skip) {
-			arg->count++;
-			continue;
-		}
-		/* we should return classids for our internal queues here
-		 * as well as the external ones */
-		if (arg->fn(qd, queue+1, arg) < 0) {
-			arg->stop = 1;
-			break;
-		}
-		arg->count++;
-	}
-}
-
-
-static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd,
-						unsigned long cl)
-{
-	struct ieee80211_sched_data *q = qdisc_priv(qd);
-
-	if (cl)
-		return NULL;
-
-	return &q->filter_list;
-}
-
-
-/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached)
- * - these are the operations on the classes */
-static const struct Qdisc_class_ops class_ops =
-{
-	.graft = wme_classop_graft,
-	.leaf = wme_classop_leaf,
-
-	.get = wme_classop_get,
-	.put = wme_classop_put,
-	.change = wme_classop_change,
-	.delete = wme_classop_delete,
-	.walk = wme_classop_walk,
-
-	.tcf_chain = wme_classop_find_tcf,
-	.bind_tcf = wme_classop_bind,
-	.unbind_tcf = wme_classop_put,
-
-	.dump = wme_classop_dump_class,
-};
-
-
-/* queueing discipline operations */
-static struct Qdisc_ops wme_qdisc_ops __read_mostly =
-{
-	.next = NULL,
-	.cl_ops = &class_ops,
-	.id = "ieee80211",
-	.priv_size = sizeof(struct ieee80211_sched_data),
-
-	.enqueue = wme_qdiscop_enqueue,
-	.dequeue = wme_qdiscop_dequeue,
-	.requeue = wme_qdiscop_requeue,
-	.drop = NULL, /* drop not needed since we are always the root qdisc */
-
-	.init = wme_qdiscop_init,
-	.reset = wme_qdiscop_reset,
-	.destroy = wme_qdiscop_destroy,
-	.change = wme_qdiscop_tune,
-
-	.dump = wme_qdiscop_dump,
-};
-
-
-void ieee80211_install_qdisc(struct net_device *dev)
-{
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
-	struct Qdisc *qdisc;
-
-	qdisc = qdisc_create_dflt(dev, txq,
-				  &wme_qdisc_ops, TC_H_ROOT);
-	if (!qdisc) {
-		printk(KERN_ERR "%s: qdisc installation failed\n", dev->name);
-		return;
-	}
-
-	/* same handle as would be allocated by qdisc_alloc_handle() */
-	qdisc->handle = 0x80010000;
-
-	qdisc_lock_tree(dev);
-	list_add_tail(&qdisc->list, &txq->qdisc_list);
-	txq->qdisc_sleeping = qdisc;
-	qdisc_unlock_tree(dev);
-}
-
-
-int ieee80211_qdisc_installed(struct net_device *dev)
-{
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
-
-	return txq->qdisc_sleeping->ops == &wme_qdisc_ops;
-}
-
-
-int ieee80211_wme_register(void)
-{
-	return register_qdisc(&wme_qdisc_ops);
-}
-
-
-void ieee80211_wme_unregister(void)
-{
-	unregister_qdisc(&wme_qdisc_ops);
-}
-
 int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
-			struct sta_info *sta, u16 tid)
+			       struct sta_info *sta, u16 tid)
 {
 	int i;
-	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
-	struct ieee80211_sched_data *q =
-			qdisc_priv(txq->qdisc_sleeping);
-	DECLARE_MAC_BUF(mac);
 
 	/* prepare the filter and save it for the SW queue
 	 * matching the received HW queue */
@@ -629,8 +195,8 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 		return -EPERM;
 
 	/* try to get a Qdisc from the pool */
-	for (i = local->hw.queues; i < QD_NUM(&local->hw); i++)
-		if (!test_and_set_bit(i, q->qdisc_pool)) {
+	for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++)
+		if (!test_and_set_bit(i, local->queue_pool)) {
 			ieee80211_stop_queue(local_to_hw(local), i);
 			sta->tid_to_tx_q[tid] = i;
 
@@ -639,11 +205,13 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 			 * on the previous queue
 			 * since HT is strict in order */
 #ifdef CONFIG_MAC80211_HT_DEBUG
-			if (net_ratelimit())
+			if (net_ratelimit()) {
+				DECLARE_MAC_BUF(mac);
 				printk(KERN_DEBUG "allocated aggregation queue"
 					" %d tid %d addr %s pool=0x%lX\n",
 					i, tid, print_mac(mac, sta->addr),
-					q->qdisc_pool[0]);
+					local->queue_pool[0]);
+			}
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 			return 0;
 		}
@@ -658,40 +226,68 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,
 				   u8 requeue)
 {
-	struct ieee80211_hw *hw = &local->hw;
-	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
-	struct ieee80211_sched_data *q =
-		qdisc_priv(txq->qdisc_sleeping);
 	int agg_queue = sta->tid_to_tx_q[tid];
+	struct ieee80211_hw *hw = &local->hw;
 
 	/* return the qdisc to the pool */
-	clear_bit(agg_queue, q->qdisc_pool);
-	sta->tid_to_tx_q[tid] = QD_NUM(hw);
+	clear_bit(agg_queue, local->queue_pool);
+	sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw);
 
-	if (requeue)
+	if (requeue) {
 		ieee80211_requeue(local, agg_queue);
-	else
-		q->queues[agg_queue]->ops->reset(q->queues[agg_queue]);
+	} else {
+		struct netdev_queue *txq;
+
+		txq = netdev_get_tx_queue(local->mdev, agg_queue);
+
+		spin_lock_bh(&txq->lock);
+		qdisc_reset(txq->qdisc);
+		spin_unlock_bh(&txq->lock);
+	}
 }
 
 void ieee80211_requeue(struct ieee80211_local *local, int queue)
 {
-	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, 0);
-	struct Qdisc *root_qd = txq->qdisc_sleeping;
-	struct ieee80211_sched_data *q = qdisc_priv(root_qd);
-	struct Qdisc *qdisc = q->queues[queue];
-	struct sk_buff *skb = NULL;
+	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue);
+	struct sk_buff_head list;
+	struct Qdisc *qdisc;
 	u32 len;
 
+	rcu_read_lock_bh();
+
+	qdisc = rcu_dereference(txq->qdisc);
 	if (!qdisc || !qdisc->dequeue)
-		return;
+		goto out_unlock;
+
+	skb_queue_head_init(&list);
 
+	spin_lock(&txq->lock);
 	for (len = qdisc->q.qlen; len > 0; len--) {
-		skb = qdisc->dequeue(qdisc);
-		root_qd->q.qlen--;
-		/* packet will be classified again and */
-		/* skb->packet_data->queue will be overridden if needed */
+		struct sk_buff *skb = qdisc->dequeue(qdisc);
+
 		if (skb)
-			wme_qdiscop_enqueue(skb, root_qd);
+			__skb_queue_tail(&list, skb);
+	}
+	spin_unlock(&txq->lock);
+
+	for (len = list.qlen; len > 0; len--) {
+		struct sk_buff *skb = __skb_dequeue(&list);
+		u16 new_queue;
+
+		BUG_ON(!skb);
+		new_queue = ieee80211_select_queue(local->mdev, skb);
+		skb_set_queue_mapping(skb, new_queue);
+
+		txq = netdev_get_tx_queue(local->mdev, new_queue);
+
+		spin_lock(&txq->lock);
+
+		qdisc = rcu_dereference(txq->qdisc);
+		qdisc->enqueue(skb, qdisc);
+
+		spin_unlock(&txq->lock);
 	}
+
+out_unlock:
+	rcu_read_unlock_bh();
 }
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index 1aca609eccf..04de28c071a 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -23,45 +23,12 @@
 
 extern const int ieee802_1d_to_ac[8];
 
-#ifdef CONFIG_MAC80211_QOS
-void ieee80211_install_qdisc(struct net_device *dev);
-int ieee80211_qdisc_installed(struct net_device *dev);
+u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb);
 int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 			       struct sta_info *sta, u16 tid);
 void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 				   struct sta_info *sta, u16 tid,
 				   u8 requeue);
 void ieee80211_requeue(struct ieee80211_local *local, int queue);
-int ieee80211_wme_register(void);
-void ieee80211_wme_unregister(void);
-#else
-static inline void ieee80211_install_qdisc(struct net_device *dev)
-{
-}
-static inline int ieee80211_qdisc_installed(struct net_device *dev)
-{
-	return 0;
-}
-static inline int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
-					     struct sta_info *sta, u16 tid)
-{
-	return -EAGAIN;
-}
-static inline void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
-						 struct sta_info *sta, u16 tid,
-						 u8 requeue)
-{
-}
-static inline void ieee80211_requeue(struct ieee80211_local *local, int queue)
-{
-}
-static inline int ieee80211_wme_register(void)
-{
-	return 0;
-}
-static inline void ieee80211_wme_unregister(void)
-{
-}
-#endif /* CONFIG_NET_SCHED */
 
 #endif /* _WME_H */
-- 
cgit v1.2.3-70-g09d2


From 8f0f2223cc08a5ae9a77f40edfe02e8a9f1abd77 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 03:47:03 -0700
Subject: net: Implement simple sw TX hashing.

It just xor hashes over IPv4/IPv6 addresses and ports of transport.

The only assumption it makes is that skb_network_header() is set
correctly.

With bug fixes from Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7ca9564d2f4..467bfb32512 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -121,6 +121,9 @@
 #include <linux/ctype.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
 
 #include "net-sysfs.h"
 
@@ -1665,6 +1668,53 @@ out_kfree_skb:
  *          --BLG
  */
 
+static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+	u32 *addr, *ports, hash, ihl;
+	u8 ip_proto;
+	int alen;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		ip_proto = ip_hdr(skb)->protocol;
+		addr = &ip_hdr(skb)->saddr;
+		ihl = ip_hdr(skb)->ihl;
+		alen = 2;
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		ip_proto = ipv6_hdr(skb)->nexthdr;
+		addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		ihl = (40 >> 2);
+		alen = 8;
+		break;
+	default:
+		return 0;
+	}
+
+	ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
+
+	hash = 0;
+	while (alen--)
+		hash ^= *addr++;
+
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+	case IPPROTO_AH:
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		hash ^= *ports;
+		break;
+
+	default:
+		break;
+	}
+
+	return hash % dev->real_num_tx_queues;
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
@@ -1672,6 +1722,8 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 
 	if (dev->select_queue)
 		queue_index = dev->select_queue(dev, skb);
+	else if (dev->real_num_tx_queues > 1)
+		queue_index = simple_tx_hash(dev, skb);
 
 	skb_set_queue_mapping(skb, queue_index);
 	return netdev_get_tx_queue(dev, queue_index);
-- 
cgit v1.2.3-70-g09d2


From d3b753db7c4f1f37a98b51974d484fda5d86dab5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Jul 2008 20:14:35 -0700
Subject: pkt_sched: Move gso_skb into Qdisc.

We liberate any dangling gso_skb during qdisc destruction.

It really only matters for the root qdisc.  But when qdiscs
can be shared by multiple netdev_queue objects, we can't
have the gso_skb in the netdev_queue any more.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 include/net/sch_generic.h |  1 +
 net/sched/sch_generic.c   | 19 +++++++++----------
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 787fbfc5aeb..0883fcf2d16 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -449,7 +449,6 @@ struct netdev_queue {
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
 	unsigned long		state;
-	struct sk_buff		*gso_skb;
 	spinlock_t		_xmit_lock;
 	int			xmit_lock_owner;
 	struct Qdisc		*qdisc_sleeping;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b47f556c66f..b96c3d9e10a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@ struct Qdisc
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
+	struct sk_buff		*gso_skb;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
 	struct list_head	list;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2f575b9017d..2bd75befa06 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -77,7 +77,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb,
 				  struct Qdisc *q)
 {
 	if (unlikely(skb->next))
-		dev_queue->gso_skb = skb;
+		q->gso_skb = skb;
 	else
 		q->ops->requeue(skb, q);
 
@@ -85,13 +85,12 @@ static inline int dev_requeue_skb(struct sk_buff *skb,
 	return 0;
 }
 
-static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue,
-					  struct Qdisc *q)
+static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
 	struct sk_buff *skb;
 
-	if ((skb = dev_queue->gso_skb))
-		dev_queue->gso_skb = NULL;
+	if ((skb = q->gso_skb))
+		q->gso_skb = NULL;
 	else
 		skb = q->dequeue(q);
 
@@ -155,10 +154,9 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 	struct sk_buff *skb;
 
 	/* Dequeue packet */
-	if (unlikely((skb = dequeue_skb(txq, q)) == NULL))
+	if (unlikely((skb = dequeue_skb(q)) == NULL))
 		return 0;
 
-
 	/* And release queue */
 	spin_unlock(&txq->lock);
 
@@ -643,8 +641,8 @@ static void dev_deactivate_queue(struct net_device *dev,
 				 void *_qdisc_default)
 {
 	struct Qdisc *qdisc_default = _qdisc_default;
+	struct sk_buff *skb = NULL;
 	struct Qdisc *qdisc;
-	struct sk_buff *skb;
 
 	spin_lock_bh(&dev_queue->lock);
 
@@ -652,9 +650,10 @@ static void dev_deactivate_queue(struct net_device *dev,
 	if (qdisc) {
 		dev_queue->qdisc = qdisc_default;
 		qdisc_reset(qdisc);
+
+		skb = qdisc->gso_skb;
+		qdisc->gso_skb = NULL;
 	}
-	skb = dev_queue->gso_skb;
-	dev_queue->gso_skb = NULL;
 
 	spin_unlock_bh(&dev_queue->lock);
 
-- 
cgit v1.2.3-70-g09d2


From e2627c8c2241bce45e368e150654d076b58a4595 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 00:56:32 -0700
Subject: pkt_sched: Make QDISC_RUNNING a qdisc state.

Currently it is associated with a netdev_queue, but when we have
qdisc sharing that no longer makes any sense.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 include/net/pkt_sched.h   |  4 +++-
 include/net/sch_generic.h |  6 ++++++
 net/sched/sch_generic.c   | 18 ++++++++++--------
 4 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0883fcf2d16..9240a95793b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -441,7 +441,6 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t
 {
 	__QUEUE_STATE_XOFF,
-	__QUEUE_STATE_QDISC_RUNNING,
 };
 
 struct netdev_queue {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index cb952781560..06a442d8518 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -88,8 +88,10 @@ extern void __qdisc_run(struct netdev_queue *txq);
 
 static inline void qdisc_run(struct netdev_queue *txq)
 {
+	struct Qdisc *q = txq->qdisc;
+
 	if (!netif_tx_queue_stopped(txq) &&
-	    !test_and_set_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state))
+	    !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
 		__qdisc_run(txq);
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b96c3d9e10a..bc2a09da21b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,6 +23,11 @@ struct qdisc_rate_table
 	int		refcnt;
 };
 
+enum qdisc_state_t
+{
+	__QDISC_STATE_RUNNING,
+};
+
 struct Qdisc
 {
 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -36,6 +41,7 @@ struct Qdisc
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
+	unsigned long		state;
 	struct sk_buff		*gso_skb;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2bd75befa06..ac208c2b2d1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -130,8 +130,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 /*
  * NOTE: Called under queue->lock with locally disabled BH.
  *
- * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process
- * this queue at a time. queue->lock serializes queue accesses for
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. queue->lock serializes queue accesses for
  * this queue AND txq->qdisc pointer itself.
  *
  *  netif_tx_lock serializes accesses to device driver.
@@ -146,9 +146,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  *				>0 - queue is not empty.
  *
  */
-static inline int qdisc_restart(struct netdev_queue *txq)
+static inline int qdisc_restart(struct netdev_queue *txq,
+				struct Qdisc *q)
 {
-	struct Qdisc *q = txq->qdisc;
 	int ret = NETDEV_TX_BUSY;
 	struct net_device *dev;
 	struct sk_buff *skb;
@@ -168,7 +168,6 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 	HARD_TX_UNLOCK(dev, txq);
 
 	spin_lock(&txq->lock);
-	q = txq->qdisc;
 
 	switch (ret) {
 	case NETDEV_TX_OK:
@@ -197,8 +196,9 @@ static inline int qdisc_restart(struct netdev_queue *txq)
 void __qdisc_run(struct netdev_queue *txq)
 {
 	unsigned long start_time = jiffies;
+	struct Qdisc *q = txq->qdisc;
 
-	while (qdisc_restart(txq)) {
+	while (qdisc_restart(txq, q)) {
 		if (netif_tx_queue_stopped(txq))
 			break;
 
@@ -213,7 +213,7 @@ void __qdisc_run(struct netdev_queue *txq)
 		}
 	}
 
-	clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state);
+	clear_bit(__QDISC_STATE_RUNNING, &q->state);
 }
 
 static void dev_watchdog(unsigned long arg)
@@ -666,14 +666,16 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock)
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *dev_queue;
+		struct Qdisc *q;
 		int val;
 
 		dev_queue = netdev_get_tx_queue(dev, i);
+		q = dev_queue->qdisc;
 
 		if (lock)
 			spin_lock_bh(&dev_queue->lock);
 
-		val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state);
+		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
 
 		if (lock)
 			spin_unlock_bh(&dev_queue->lock);
-- 
cgit v1.2.3-70-g09d2


From 7698b4fcabcd790efc4f226bada1e7b5870653af Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 01:42:40 -0700
Subject: pkt_sched: Add and use qdisc_root() and qdisc_root_lock().

When code wants to lock the qdisc tree state, the logic
operation it's doing is locking the top-level qdisc that
sits of the root of the netdev_queue.

Add qdisc_root_lock() to represent this and convert the
easiest cases.

In order for this to work out in all cases, we have to
hook up the noop_qdisc to a dummy netdev_queue.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++++
 net/sched/sch_api.c       |  8 ++++----
 net/sched/sch_cbq.c       |  9 +++++----
 net/sched/sch_generic.c   | 21 ++++++++++++++++-----
 net/sched/sch_hfsc.c      |  4 ++--
 net/sched/sch_htb.c       | 18 ++++++++++--------
 net/sched/sch_netem.c     |  9 ++++++---
 7 files changed, 55 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bc2a09da21b..92417825d38 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -161,6 +161,18 @@ struct tcf_proto
 	struct tcf_proto_ops	*ops;
 };
 
+static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc)
+{
+	return qdisc->dev_queue->qdisc;
+}
+
+static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc)
+{
+	struct Qdisc *root = qdisc_root(qdisc);
+
+	return &root->dev_queue->lock;
+}
+
 static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 830ccc544a1..19c244a0083 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -633,7 +633,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
 		if (tca[TCA_RATE]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
-						&sch->dev_queue->lock,
+						qdisc_root_lock(sch),
 						tca[TCA_RATE]);
 			if (err) {
 				/*
@@ -675,7 +675,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 	}
 	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
-				      &sch->dev_queue->lock, tca[TCA_RATE]);
+				      qdisc_root_lock(sch), tca[TCA_RATE]);
 	return 0;
 }
 
@@ -967,7 +967,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	q->qstats.qlen = q->q.qlen;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-					 TCA_XSTATS, &q->dev_queue->lock, &d) < 0)
+					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
 		goto nla_put_failure;
 
 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1216,7 +1216,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-					 TCA_XSTATS, &q->dev_queue->lock, &d) < 0)
+					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
 		goto nla_put_failure;
 
 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4efc836cbf3..37ae653db68 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1744,12 +1744,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
 
 	if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
+		spinlock_t *root_lock = qdisc_root_lock(sch);
 		struct cbq_sched_data *q = qdisc_priv(sch);
 
-		spin_lock_bh(&sch->dev_queue->lock);
+		spin_lock_bh(root_lock);
 		if (q->rx_class == cl)
 			q->rx_class = NULL;
-		spin_unlock_bh(&sch->dev_queue->lock);
+		spin_unlock_bh(root_lock);
 #endif
 
 		cbq_destroy_class(sch, cl);
@@ -1828,7 +1829,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev_queue->lock,
+					      qdisc_root_lock(sch),
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1919,7 +1920,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev_queue->lock, tca[TCA_RATE]);
+				  qdisc_root_lock(sch), tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ac208c2b2d1..739a8711ab3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -151,14 +151,17 @@ static inline int qdisc_restart(struct netdev_queue *txq,
 {
 	int ret = NETDEV_TX_BUSY;
 	struct net_device *dev;
+	spinlock_t *root_lock;
 	struct sk_buff *skb;
 
 	/* Dequeue packet */
 	if (unlikely((skb = dequeue_skb(q)) == NULL))
 		return 0;
 
-	/* And release queue */
-	spin_unlock(&txq->lock);
+	root_lock = qdisc_root_lock(q);
+
+	/* And release qdisc */
+	spin_unlock(root_lock);
 
 	dev = txq->dev;
 
@@ -167,7 +170,7 @@ static inline int qdisc_restart(struct netdev_queue *txq,
 		ret = dev_hard_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
-	spin_lock(&txq->lock);
+	spin_lock(root_lock);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
@@ -345,12 +348,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
+static struct netdev_queue noop_netdev_queue = {
+	.lock		=	__SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock),
+	.qdisc		=	&noop_qdisc,
+};
+
 struct Qdisc noop_qdisc = {
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+	.dev_queue	=	&noop_netdev_queue,
 };
 EXPORT_SYMBOL(noop_qdisc);
 
@@ -666,19 +675,21 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock)
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *dev_queue;
+		spinlock_t *root_lock;
 		struct Qdisc *q;
 		int val;
 
 		dev_queue = netdev_get_tx_queue(dev, i);
 		q = dev_queue->qdisc;
+		root_lock = qdisc_root_lock(q);
 
 		if (lock)
-			spin_lock_bh(&dev_queue->lock);
+			spin_lock_bh(root_lock);
 
 		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
 
 		if (lock)
-			spin_unlock_bh(&dev_queue->lock);
+			spin_unlock_bh(root_lock);
 
 		if (val)
 			return true;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 997d520ca58..5090708ba38 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev_queue->lock,
+					      qdisc_root_lock(sch),
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev_queue->lock, tca[TCA_RATE]);
+				  qdisc_root_lock(sch), tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c8ca54cc26b..ee48457eaa4 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1039,11 +1039,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
+	spinlock_t *root_lock = qdisc_root_lock(sch);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
-	spin_lock_bh(&sch->dev_queue->lock);
+	spin_lock_bh(root_lock);
 
 	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
@@ -1057,11 +1058,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	nla_nest_end(skb, nest);
 
-	spin_unlock_bh(&sch->dev_queue->lock);
+	spin_unlock_bh(root_lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&sch->dev_queue->lock);
+	spin_unlock_bh(root_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1070,10 +1071,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
+	spinlock_t *root_lock = qdisc_root_lock(sch);
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
-	spin_lock_bh(&sch->dev_queue->lock);
+	spin_lock_bh(root_lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1095,11 +1097,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
 	nla_nest_end(skb, nest);
-	spin_unlock_bh(&sch->dev_queue->lock);
+	spin_unlock_bh(root_lock);
 	return skb->len;
 
 nla_put_failure:
-	spin_unlock_bh(&sch->dev_queue->lock);
+	spin_unlock_bh(root_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1365,7 +1367,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			goto failure;
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  &sch->dev_queue->lock,
+				  qdisc_root_lock(sch),
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		cl->children = 0;
@@ -1420,7 +1422,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      &sch->dev_queue->lock,
+					      qdisc_root_lock(sch),
 					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bc585f2089f..c5ea40c9eb2 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -180,7 +180,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = sch->dev_queue->qdisc;
+		struct Qdisc *rootq = qdisc_root(sch);
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
@@ -319,6 +319,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	unsigned long n = nla_len(attr)/sizeof(__s16);
 	const __s16 *data = nla_data(attr);
+	spinlock_t *root_lock;
 	struct disttable *d;
 	int i;
 
@@ -333,9 +334,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	for (i = 0; i < n; i++)
 		d->table[i] = data[i];
 
-	spin_lock_bh(&sch->dev_queue->lock);
+	root_lock = qdisc_root_lock(sch);
+
+	spin_lock_bh(root_lock);
 	d = xchg(&q->delay_dist, d);
-	spin_unlock_bh(&sch->dev_queue->lock);
+	spin_unlock_bh(root_lock);
 
 	kfree(d);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 37437bb2e1ae8af470dfcd5b4ff454110894ccaf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 02:15:04 -0700
Subject: pkt_sched: Schedule qdiscs instead of netdev_queue.

When we have shared qdiscs, packets come out of the qdiscs
for multiple transmit queues.

Therefore it doesn't make any sense to schedule the transmit
queue when logically we cannot know ahead of time the TX
queue of the SKB that the qdisc->dequeue() will give us.

Just for sanity I added a BUG check to make sure we never
get into a state where the noop_qdisc is scheduled.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++-----
 include/net/pkt_sched.h   | 11 +++-----
 include/net/sch_generic.h |  2 ++
 net/core/dev.c            | 68 +++++++++++++++++++----------------------------
 net/sched/sch_api.c       |  3 +--
 net/sched/sch_cbq.c       |  2 +-
 net/sched/sch_generic.c   | 30 ++++++++++-----------
 7 files changed, 55 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9240a95793b..1e839fa0143 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -275,7 +275,6 @@ enum netdev_state_t
 {
 	__LINK_STATE_START,
 	__LINK_STATE_PRESENT,
-	__LINK_STATE_SCHED,
 	__LINK_STATE_NOCARRIER,
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
@@ -452,7 +451,6 @@ struct netdev_queue {
 	int			xmit_lock_owner;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
-	struct netdev_queue	*next_sched;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -969,7 +967,7 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data
 {
-	struct netdev_queue	*output_queue;
+	struct Qdisc		*output_queue;
 	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
@@ -984,12 +982,12 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
-extern void __netif_schedule(struct netdev_queue *txq);
+extern void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
 	if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
-		__netif_schedule(txq);
+		__netif_schedule(txq->qdisc);
 }
 
 static inline void netif_tx_schedule_all(struct net_device *dev)
@@ -1042,7 +1040,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 	}
 #endif
 	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
-		__netif_schedule(dev_queue);
+		__netif_schedule(dev_queue->qdisc);
 }
 
 static inline void netif_wake_queue(struct net_device *dev)
@@ -1186,7 +1184,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 		return;
 #endif
 	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
-		__netif_schedule(txq);
+		__netif_schedule(txq->qdisc);
 }
 
 /**
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 06a442d8518..e4e30052e4e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -84,15 +84,12 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 		struct nlattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 
-extern void __qdisc_run(struct netdev_queue *txq);
+extern void __qdisc_run(struct Qdisc *q);
 
-static inline void qdisc_run(struct netdev_queue *txq)
+static inline void qdisc_run(struct Qdisc *q)
 {
-	struct Qdisc *q = txq->qdisc;
-
-	if (!netif_tx_queue_stopped(txq) &&
-	    !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
-		__qdisc_run(txq);
+	if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+		__qdisc_run(q);
 }
 
 extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 92417825d38..3cc4b5cd8c6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -26,6 +26,7 @@ struct qdisc_rate_table
 enum qdisc_state_t
 {
 	__QDISC_STATE_RUNNING,
+	__QDISC_STATE_SCHED,
 };
 
 struct Qdisc
@@ -45,6 +46,7 @@ struct Qdisc
 	struct sk_buff		*gso_skb;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
+	struct Qdisc		*next_sched;
 	struct list_head	list;
 
 	struct gnet_stats_basic	bstats;
diff --git a/net/core/dev.c b/net/core/dev.c
index 467bfb32512..0b909b74f69 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1323,18 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-void __netif_schedule(struct netdev_queue *txq)
+void __netif_schedule(struct Qdisc *q)
 {
-	struct net_device *dev = txq->dev;
+	BUG_ON(q == &noop_qdisc);
 
-	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
 		struct softnet_data *sd;
 		unsigned long flags;
 
 		local_irq_save(flags);
 		sd = &__get_cpu_var(softnet_data);
-		txq->next_sched = sd->output_queue;
-		sd->output_queue = txq;
+		q->next_sched = sd->output_queue;
+		sd->output_queue = q;
 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
 		local_irq_restore(flags);
 	}
@@ -1771,37 +1771,23 @@ gso:
 	rcu_read_lock_bh();
 
 	txq = dev_pick_tx(dev, skb);
-	spin_lock_prefetch(&txq->lock);
-
-	/* Updates of qdisc are serialized by queue->lock.
-	 * The struct Qdisc which is pointed to by qdisc is now a
-	 * rcu structure - it may be accessed without acquiring
-	 * a lock (but the structure may be stale.) The freeing of the
-	 * qdisc will be deferred until it's known that there are no
-	 * more references to it.
-	 *
-	 * If the qdisc has an enqueue function, we still need to
-	 * hold the queue->lock before calling it, since queue->lock
-	 * also serializes access to the device queue.
-	 */
-
 	q = rcu_dereference(txq->qdisc);
+
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		/* Grab device queue */
-		spin_lock(&txq->lock);
-		q = txq->qdisc;
-		if (q->enqueue) {
-			rc = q->enqueue(skb, q);
-			qdisc_run(txq);
-			spin_unlock(&txq->lock);
-
-			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-			goto out;
-		}
-		spin_unlock(&txq->lock);
+		spinlock_t *root_lock = qdisc_root_lock(q);
+
+		spin_lock(root_lock);
+
+		rc = q->enqueue(skb, q);
+		qdisc_run(q);
+
+		spin_unlock(root_lock);
+
+		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+		goto out;
 	}
 
 	/* The device has no queue. Common case for software devices:
@@ -1974,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h)
 	}
 
 	if (sd->output_queue) {
-		struct netdev_queue *head;
+		struct Qdisc *head;
 
 		local_irq_disable();
 		head = sd->output_queue;
@@ -1982,18 +1968,20 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_enable();
 
 		while (head) {
-			struct netdev_queue *txq = head;
-			struct net_device *dev = txq->dev;
+			struct Qdisc *q = head;
+			spinlock_t *root_lock;
+
 			head = head->next_sched;
 
 			smp_mb__before_clear_bit();
-			clear_bit(__LINK_STATE_SCHED, &dev->state);
+			clear_bit(__QDISC_STATE_SCHED, &q->state);
 
-			if (spin_trylock(&txq->lock)) {
-				qdisc_run(txq);
-				spin_unlock(&txq->lock);
+			root_lock = qdisc_root_lock(q);
+			if (spin_trylock(root_lock)) {
+				qdisc_run(q);
+				spin_unlock(root_lock);
 			} else {
-				netif_schedule_queue(txq);
+				__netif_schedule(q);
 			}
 		}
 	}
@@ -4459,7 +4447,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct netdev_queue **list_net;
+	struct Qdisc **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 19c244a0083..8e8c5becc34 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -294,11 +294,10 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
-	struct netdev_queue *txq = wd->qdisc->dev_queue;
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	smp_wmb();
-	netif_schedule_queue(txq);
+	__netif_schedule(wd->qdisc);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 37ae653db68..a3953bbe2d7 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -650,7 +650,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule_queue(sch->dev_queue);
+	__netif_schedule(sch);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 739a8711ab3..dd5c4e70abe 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -72,16 +72,14 @@ static inline int qdisc_qlen(struct Qdisc *q)
 	return q->q.qlen;
 }
 
-static inline int dev_requeue_skb(struct sk_buff *skb,
-				  struct netdev_queue *dev_queue,
-				  struct Qdisc *q)
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	if (unlikely(skb->next))
 		q->gso_skb = skb;
 	else
 		q->ops->requeue(skb, q);
 
-	netif_schedule_queue(dev_queue);
+	__netif_schedule(q);
 	return 0;
 }
 
@@ -121,7 +119,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * some time.
 		 */
 		__get_cpu_var(netdev_rx_stat).cpu_collision++;
-		ret = dev_requeue_skb(skb, dev_queue, q);
+		ret = dev_requeue_skb(skb, q);
 	}
 
 	return ret;
@@ -146,9 +144,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  *				>0 - queue is not empty.
  *
  */
-static inline int qdisc_restart(struct netdev_queue *txq,
-				struct Qdisc *q)
+static inline int qdisc_restart(struct Qdisc *q)
 {
+	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
 	struct net_device *dev;
 	spinlock_t *root_lock;
@@ -163,7 +161,8 @@ static inline int qdisc_restart(struct netdev_queue *txq,
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	dev = txq->dev;
+	dev = qdisc_dev(q);
+	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_subqueue_stopped(dev, skb))
@@ -189,29 +188,28 @@ static inline int qdisc_restart(struct netdev_queue *txq,
 			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
 			       dev->name, ret, q->q.qlen);
 
-		ret = dev_requeue_skb(skb, txq, q);
+		ret = dev_requeue_skb(skb, q);
 		break;
 	}
 
+	if (ret && netif_tx_queue_stopped(txq))
+		ret = 0;
+
 	return ret;
 }
 
-void __qdisc_run(struct netdev_queue *txq)
+void __qdisc_run(struct Qdisc *q)
 {
 	unsigned long start_time = jiffies;
-	struct Qdisc *q = txq->qdisc;
-
-	while (qdisc_restart(txq, q)) {
-		if (netif_tx_queue_stopped(txq))
-			break;
 
+	while (qdisc_restart(q)) {
 		/*
 		 * Postpone processing if
 		 * 1. another process needs the CPU;
 		 * 2. we've been doing it for too long.
 		 */
 		if (need_resched() || jiffies != start_time) {
-			netif_schedule_queue(txq);
+			__netif_schedule(q);
 			break;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 16361127ebed0fb8f9d7cc94c6e137eaf710f676 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 02:23:17 -0700
Subject: pkt_sched: dev_init_scheduler() does not need to lock qdisc tree.

We are registering the device, there is no way anyone can get
at this object's qdiscs yet in any meaningful way.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index dd5c4e70abe..7e078c59319 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -741,10 +741,8 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 
 void dev_init_scheduler(struct net_device *dev)
 {
-	qdisc_lock_tree(dev);
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
 	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
-	qdisc_unlock_tree(dev);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
-- 
cgit v1.2.3-70-g09d2


From 8a34c5dc3a7c6431f1cd94c0904be81b296e08ca Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 00:47:45 -0700
Subject: pkt_sched: Perform bulk of qdisc destruction in RCU.

This allows less strict control of access to the qdisc attached to a
netdev_queue.  It is even allowed to enqueue into a qdisc which is
in the process of being destroyed.  The RCU handler will toss out
those packets.

We will need this to handle sharing of a qdisc amongst multiple
TX queues.  In such a setup the lock has to be shared, so will
be inside of the qdisc itself.  At which point the netdev_queue
lock cannot be used to hard synchronize access to the ->qdisc
pointer.

One operation we have to keep inside of qdisc_destroy() is the list
deletion.  It is the only piece of state visible after the RCU quiesce
period, so we have to undo it early and under the appropriate locking.

The operations in the RCU handler do not need any looking because the
qdisc tree is no longer visible to anything at that point.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7e078c59319..082db8abe70 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -545,6 +545,17 @@ EXPORT_SYMBOL(qdisc_reset);
 static void __qdisc_destroy(struct rcu_head *head)
 {
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
+	const struct Qdisc_ops  *ops = qdisc->ops;
+
+	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
+
+	module_put(ops->owner);
+	dev_put(qdisc_dev(qdisc));
+
 	kfree((char *) qdisc - qdisc->padded);
 }
 
@@ -552,21 +563,12 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-	const struct Qdisc_ops  *ops = qdisc->ops;
-
 	if (qdisc->flags & TCQ_F_BUILTIN ||
 	    !atomic_dec_and_test(&qdisc->refcnt))
 		return;
 
 	list_del(&qdisc->list);
-	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
-	if (ops->reset)
-		ops->reset(qdisc);
-	if (ops->destroy)
-		ops->destroy(qdisc);
 
-	module_put(ops->owner);
-	dev_put(qdisc_dev(qdisc));
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
 EXPORT_SYMBOL(qdisc_destroy);
-- 
cgit v1.2.3-70-g09d2


From 17715e62a5e5c7224e5f906a4b8f9e5084100118 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 02:36:04 -0700
Subject: pkt_sched: Use per-queue locking in shutdown_scheduler_queue.

This eliminates another qdisc_lock_tree user.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 082db8abe70..efa418a1b34 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -757,18 +757,20 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
+		spinlock_t *root_lock = qdisc_root_lock(qdisc);
+
 		dev_queue->qdisc = qdisc_default;
 		dev_queue->qdisc_sleeping = qdisc_default;
 
+		spin_lock(root_lock);
 		qdisc_destroy(qdisc);
+		spin_unlock(root_lock);
 	}
 }
 
 void dev_shutdown(struct net_device *dev)
 {
-	qdisc_lock_tree(dev);
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
 	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
-	qdisc_unlock_tree(dev);
 }
-- 
cgit v1.2.3-70-g09d2


From 55dbc640c31db373fa07eb1e3af9b8eadbdf80db Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 02:40:45 -0700
Subject: pkt_sched: Remove qdisc_lock_tree usage in cls_api.c

It just wants the qdisc tree for the filter to be synchronized.
So just BH lock qdisc_root_lock(q) instead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d0b0a9b1439..d2b6f54a626 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -120,6 +120,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
+	spinlock_t *root_lock;
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
@@ -204,6 +205,8 @@ replay:
 		}
 	}
 
+	root_lock = qdisc_root_lock(q);
+
 	if (tp == NULL) {
 		/* Proto-tcf does not exist, create new one */
 
@@ -263,10 +266,10 @@ replay:
 			goto errout;
 		}
 
-		qdisc_lock_tree(dev);
+		spin_lock_bh(root_lock);
 		tp->next = *back;
 		*back = tp;
-		qdisc_unlock_tree(dev);
+		spin_unlock_bh(root_lock);
 
 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
 		goto errout;
@@ -275,9 +278,9 @@ replay:
 
 	if (fh == 0) {
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
-			qdisc_lock_tree(dev);
+			spin_lock_bh(root_lock);
 			*back = tp->next;
-			qdisc_unlock_tree(dev);
+			spin_lock_bh(root_lock);
 
 			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
 			tcf_destroy(tp);
-- 
cgit v1.2.3-70-g09d2


From 15b458fa65cbba395724a99ab1b7d3785ca76c1c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 02:42:51 -0700
Subject: pkt_sched: Kill qdisc_lock_tree usage in cls_route.c

It just wants the qdisc tree to be synchronized, so grabbing
qdisc_root_lock() is sufficient.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_route.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 5a16ca28aa3..481260a4f10 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -73,11 +73,13 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
 }
 
 static inline
-void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
+void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
-	qdisc_lock_tree(dev);
+	spinlock_t *root_lock = qdisc_root_lock(q);
+
+	spin_lock_bh(root_lock);
 	memset(head->fastmap, 0, sizeof(head->fastmap));
-	qdisc_unlock_tree(dev);
+	spin_unlock_bh(root_lock);
 }
 
 static inline void
@@ -302,7 +304,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 			*fp = f->next;
 			tcf_tree_unlock(tp);
 
-			route4_reset_fastmap(qdisc_dev(tp->q), head, f->id);
+			route4_reset_fastmap(tp->q, head, f->id);
 			route4_delete_filter(tp, f);
 
 			/* Strip tree */
@@ -500,7 +502,7 @@ reinsert:
 	}
 	tcf_tree_unlock(tp);
 
-	route4_reset_fastmap(qdisc_dev(tp->q), head, f->id);
+	route4_reset_fastmap(tp->q, head, f->id);
 	*arg = (unsigned long)f;
 	return 0;
 
-- 
cgit v1.2.3-70-g09d2


From ead81cc5fc6d996db6afb20f211241612610a07a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 00:50:32 -0700
Subject: netdevice: Move qdisc_list back into net_device proper.

And give it it's own lock.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 ++-
 net/core/dev.c            |  2 ++
 net/sched/sch_api.c       | 31 +++++++------------------------
 net/sched/sch_generic.c   |  9 +++++++--
 4 files changed, 18 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1e839fa0143..3170bcef734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -450,7 +450,6 @@ struct netdev_queue {
 	spinlock_t		_xmit_lock;
 	int			xmit_lock_owner;
 	struct Qdisc		*qdisc_sleeping;
-	struct list_head	qdisc_list;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -638,6 +637,8 @@ struct net_device
 	unsigned int		real_num_tx_queues;
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
+	spinlock_t		qdisc_list_lock;
+	struct list_head	qdisc_list;
 
 /*
  * One part is mostly used on xmit path (device)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b909b74f69..6741e344ac5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3886,6 +3886,8 @@ int register_netdevice(struct net_device *dev)
 	net = dev_net(dev);
 
 	spin_lock_init(&dev->addr_list_lock);
+	spin_lock_init(&dev->qdisc_list_lock);
+	INIT_LIST_HEAD(&dev->qdisc_list);
 	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8e8c5becc34..6958fe7c9a7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -183,30 +183,17 @@ EXPORT_SYMBOL(unregister_qdisc);
    (root qdisc, all its children, children of children etc.)
  */
 
-static struct Qdisc *__qdisc_lookup(struct netdev_queue *dev_queue, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	struct Qdisc *q;
 
-	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
+	list_for_each_entry(q, &dev->qdisc_list, list) {
 		if (q->handle == handle)
 			return q;
 	}
 	return NULL;
 }
 
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		struct Qdisc *q = __qdisc_lookup(txq, handle);
-		if (q)
-			return q;
-	}
-	return NULL;
-}
-
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
@@ -645,9 +632,9 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out3;
 			}
 		}
-		qdisc_lock_tree(dev);
-		list_add_tail(&sch->list, &dev_queue->qdisc_list);
-		qdisc_unlock_tree(dev);
+		spin_lock_bh(&dev->qdisc_list_lock);
+		list_add_tail(&sch->list, &dev->qdisc_list);
+		spin_unlock_bh(&dev->qdisc_list_lock);
 
 		return sch;
 	}
@@ -1032,14 +1019,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	read_lock(&dev_base_lock);
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
-		struct netdev_queue *dev_queue;
 		if (idx < s_idx)
 			goto cont;
 		if (idx > s_idx)
 			s_q_idx = 0;
 		q_idx = 0;
-		dev_queue = netdev_get_tx_queue(dev, 0);
-		list_for_each_entry(q, &dev_queue->qdisc_list, list) {
+		list_for_each_entry(q, &dev->qdisc_list, list) {
 			if (q_idx < s_q_idx) {
 				q_idx++;
 				continue;
@@ -1269,7 +1254,6 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct netdev_queue *dev_queue;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -1288,8 +1272,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	dev_queue = netdev_get_tx_queue(dev, 0);
-	list_for_each_entry(q, &dev_queue->qdisc_list, list) {
+	list_for_each_entry(q, &dev->qdisc_list, list) {
 		if (t < s_t || !q->ops->cl_ops ||
 		    (tcm->tcm_parent &&
 		     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index efa418a1b34..8cdf0b4a6a5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -563,11 +563,15 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
+	struct net_device *dev = qdisc_dev(qdisc);
+
 	if (qdisc->flags & TCQ_F_BUILTIN ||
 	    !atomic_dec_and_test(&qdisc->refcnt))
 		return;
 
+	spin_lock_bh(&dev->qdisc_list_lock);
 	list_del(&qdisc->list);
+	spin_unlock_bh(&dev->qdisc_list_lock);
 
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
@@ -599,7 +603,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
 		}
-		list_add_tail(&qdisc->list, &dev_queue->qdisc_list);
+		spin_lock_bh(&dev->qdisc_list_lock);
+		list_add_tail(&qdisc->list, &dev->qdisc_list);
+		spin_unlock_bh(&dev->qdisc_list_lock);
 	} else {
 		qdisc =  &noqueue_qdisc;
 	}
@@ -738,7 +744,6 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 
 	dev_queue->qdisc = qdisc;
 	dev_queue->qdisc_sleeping = qdisc;
-	INIT_LIST_HEAD(&dev_queue->qdisc_list);
 }
 
 void dev_init_scheduler(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From 53049978df1d9ae55bf397c9879e6b33218352db Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 03:00:19 -0700
Subject: pkt_sched: Make qdisc grafting locking more specific.

Lock the root of the qdisc being operated upon.

All explicit references to qdisc_tree_lock() are now gone.
The only remaining uses are via the sch_tree_{lock,unlock}()
and tcf_tree_{lock,unlock}() macros.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6958fe7c9a7..74924893ef7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -441,15 +441,29 @@ static struct Qdisc *
 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 {
 	struct netdev_queue *dev_queue;
+	spinlock_t *root_lock;
 	struct Qdisc *oqdisc;
+	int ingress;
 
 	if (dev->flags & IFF_UP)
 		dev_deactivate(dev);
 
-	qdisc_lock_tree(dev);
-	if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
+	ingress = 0;
+	if (qdisc && qdisc->flags&TCQ_F_INGRESS)
+		ingress = 1;
+
+	if (ingress) {
 		dev_queue = &dev->rx_queue;
 		oqdisc = dev_queue->qdisc;
+	} else {
+		dev_queue = netdev_get_tx_queue(dev, 0);
+		oqdisc = dev_queue->qdisc_sleeping;
+	}
+
+	root_lock = qdisc_root_lock(oqdisc);
+	spin_lock_bh(root_lock);
+
+	if (ingress) {
 		/* Prune old scheduler */
 		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
 			/* delete */
@@ -460,9 +474,6 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 		}
 
 	} else {
-		dev_queue = netdev_get_tx_queue(dev, 0);
-		oqdisc = dev_queue->qdisc_sleeping;
-
 		/* Prune old scheduler */
 		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
 			qdisc_reset(oqdisc);
@@ -474,7 +485,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 		dev_queue->qdisc = &noop_qdisc;
 	}
 
-	qdisc_unlock_tree(dev);
+	spin_unlock_bh(root_lock);
 
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
@@ -765,10 +776,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
 			return err;
 		if (q) {
+			spinlock_t *root_lock = qdisc_root_lock(q);
+
 			qdisc_notify(skb, n, clid, q, NULL);
-			qdisc_lock_tree(dev);
+			spin_unlock_bh(root_lock);
 			qdisc_destroy(q);
-			qdisc_unlock_tree(dev);
+			spin_unlock_bh(root_lock);
 		}
 	} else {
 		qdisc_notify(skb, n, clid, NULL, q);
@@ -911,20 +924,24 @@ create_n_graft:
 graft:
 	if (1) {
 		struct Qdisc *old_q = NULL;
+		spinlock_t *root_lock;
+
 		err = qdisc_graft(dev, p, clid, q, &old_q);
 		if (err) {
 			if (q) {
-				qdisc_lock_tree(dev);
+				root_lock = qdisc_root_lock(q);
+				spin_lock_bh(root_lock);
 				qdisc_destroy(q);
-				qdisc_unlock_tree(dev);
+				spin_unlock_bh(root_lock);
 			}
 			return err;
 		}
 		qdisc_notify(skb, n, clid, old_q, q);
 		if (old_q) {
-			qdisc_lock_tree(dev);
+			root_lock = qdisc_root_lock(old_q);
+			spin_lock_bh(root_lock);
 			qdisc_destroy(old_q);
-			qdisc_unlock_tree(dev);
+			spin_unlock_bh(root_lock);
 		}
 	}
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From c7e4f3bbb4ba4e48ab3b529d5016e454cee1ccd6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jul 2008 03:22:39 -0700
Subject: pkt_sched: Kill qdisc_lock_tree and qdisc_unlock_tree.

No longer used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 ---
 net/sched/sch_generic.c   | 36 +++---------------------------------
 2 files changed, 3 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 60e1e9f9e45..1eef8d0c999 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -180,9 +180,6 @@ static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
 	return qdisc->dev_queue->dev;
 }
 
-extern void qdisc_lock_tree(struct net_device *dev);
-extern void qdisc_unlock_tree(struct net_device *dev);
-
 static inline void sch_tree_lock(struct Qdisc *q)
 {
 	spin_lock_bh(qdisc_root_lock(q));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8cdf0b4a6a5..3d53e92ad9c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,44 +29,14 @@
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
- * queue->lock spinlock.
+ * qdisc_root_lock(qdisc) spinlock.
  *
  * The idea is the following:
- * - enqueue, dequeue are serialized via top level device
- *   spinlock queue->lock.
- * - ingress filtering is serialized via top level device
- *   spinlock dev->rx_queue.lock.
+ * - enqueue, dequeue are serialized via qdisc root lock
+ * - ingress filtering is also serialized via qdisc root lock
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
-void qdisc_lock_tree(struct net_device *dev)
-	__acquires(dev->rx_queue.lock)
-{
-	unsigned int i;
-
-	local_bh_disable();
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		spin_lock(&txq->lock);
-	}
-	spin_lock(&dev->rx_queue.lock);
-}
-EXPORT_SYMBOL(qdisc_lock_tree);
-
-void qdisc_unlock_tree(struct net_device *dev)
-	__releases(dev->rx_queue.lock)
-{
-	unsigned int i;
-
-	spin_unlock(&dev->rx_queue.lock);
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		spin_unlock(&txq->lock);
-	}
-	local_bh_enable();
-}
-EXPORT_SYMBOL(qdisc_unlock_tree);
-
 static inline int qdisc_qlen(struct Qdisc *q)
 {
 	return q->q.qlen;
-- 
cgit v1.2.3-70-g09d2


From 83874000929ed63aef30b44083a9f713135ff040 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 00:53:03 -0700
Subject: pkt_sched: Kill netdev_queue lock.

We can simply use the qdisc->q.lock for all of the
qdisc tree synchronization.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         | 20 --------------------
 include/linux/netdevice.h |  1 -
 include/net/sch_generic.h |  7 ++++++-
 net/core/dev.c            |  9 +++++----
 net/mac80211/wme.c        | 19 ++++++++++++-------
 net/sched/sch_generic.c   | 32 +++++++++++++++-----------------
 net/sched/sch_teql.c      |  7 +++++--
 7 files changed, 43 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 897b05e79ed..0960e69b2da 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -35,7 +35,6 @@
 #include <linux/moduleparam.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
-#include <linux/lockdep.h>
 
 #define TX_TIMEOUT  (2*HZ)
 
@@ -228,22 +227,6 @@ static struct rtnl_link_ops ifb_link_ops __read_mostly = {
 module_param(numifbs, int, 0);
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
-/*
- * dev_ifb's TX queue lock is usually taken after dev->rx_queue.lock,
- * reversely to e.g. qdisc_lock_tree(). It should be safe until
- * ifb doesn't take dev's TX queue lock with dev_ifb->rx_queue.lock.
- * But lockdep should know that ifb has different locks from dev.
- */
-static struct lock_class_key ifb_tx_queue_lock_key;
-static struct lock_class_key ifb_rx_queue_lock_key;
-
-static void set_tx_lockdep_key(struct net_device *dev,
-			       struct netdev_queue *txq,
-			       void *_unused)
-{
-	lockdep_set_class(&txq->lock, &ifb_tx_queue_lock_key);
-}
-
 static int __init ifb_init_one(int index)
 {
 	struct net_device *dev_ifb;
@@ -264,9 +247,6 @@ static int __init ifb_init_one(int index)
 	if (err < 0)
 		goto err;
 
-	netdev_for_each_tx_queue(dev_ifb, set_tx_lockdep_key, NULL);
-	lockdep_set_class(&dev_ifb->rx_queue.lock, &ifb_rx_queue_lock_key);
-
 	return 0;
 
 err:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3170bcef734..9c5a6885011 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -443,7 +443,6 @@ enum netdev_queue_state_t
 };
 
 struct netdev_queue {
-	spinlock_t		lock;
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
 	unsigned long		state;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1eef8d0c999..2902a42564f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -163,6 +163,11 @@ struct tcf_proto
 	struct tcf_proto_ops	*ops;
 };
 
+static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
+{
+	return &qdisc->q.lock;
+}
+
 static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->qdisc;
@@ -172,7 +177,7 @@ static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc)
 {
 	struct Qdisc *root = qdisc_root(qdisc);
 
-	return &root->dev_queue->lock;
+	return qdisc_lock(root);
 }
 
 static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6741e344ac5..32a13772c1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2080,10 +2080,12 @@ static int ing_filter(struct sk_buff *skb)
 
 	rxq = &dev->rx_queue;
 
-	spin_lock(&rxq->lock);
-	if ((q = rxq->qdisc) != NULL)
+	q = rxq->qdisc;
+	if (q) {
+		spin_lock(qdisc_lock(q));
 		result = q->enqueue(skb, q);
-	spin_unlock(&rxq->lock);
+		spin_unlock(qdisc_lock(q));
+	}
 
 	return result;
 }
@@ -4173,7 +4175,6 @@ static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue,
 				  void *_unused)
 {
-	spin_lock_init(&queue->lock);
 	queue->dev = dev;
 }
 
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index b21cfec4b6c..6e8099e7704 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -237,12 +237,14 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 		ieee80211_requeue(local, agg_queue);
 	} else {
 		struct netdev_queue *txq;
+		spinlock_t *root_lock;
 
 		txq = netdev_get_tx_queue(local->mdev, agg_queue);
+		root_lock = qdisc_root_lock(txq->qdisc);
 
-		spin_lock_bh(&txq->lock);
+		spin_lock_bh(root_lock);
 		qdisc_reset(txq->qdisc);
-		spin_unlock_bh(&txq->lock);
+		spin_unlock_bh(root_lock);
 	}
 }
 
@@ -250,6 +252,7 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 {
 	struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue);
 	struct sk_buff_head list;
+	spinlock_t *root_lock;
 	struct Qdisc *qdisc;
 	u32 len;
 
@@ -261,14 +264,15 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 
 	skb_queue_head_init(&list);
 
-	spin_lock(&txq->lock);
+	root_lock = qdisc_root_lock(qdisc);
+	spin_lock(root_lock);
 	for (len = qdisc->q.qlen; len > 0; len--) {
 		struct sk_buff *skb = qdisc->dequeue(qdisc);
 
 		if (skb)
 			__skb_queue_tail(&list, skb);
 	}
-	spin_unlock(&txq->lock);
+	spin_unlock(root_lock);
 
 	for (len = list.qlen; len > 0; len--) {
 		struct sk_buff *skb = __skb_dequeue(&list);
@@ -280,12 +284,13 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 
 		txq = netdev_get_tx_queue(local->mdev, new_queue);
 
-		spin_lock(&txq->lock);
 
 		qdisc = rcu_dereference(txq->qdisc);
-		qdisc->enqueue(skb, qdisc);
+		root_lock = qdisc_root_lock(qdisc);
 
-		spin_unlock(&txq->lock);
+		spin_lock(root_lock);
+		qdisc->enqueue(skb, qdisc);
+		spin_unlock(root_lock);
 	}
 
 out_unlock:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3d53e92ad9c..8fc580b3e17 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,15 +96,15 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 }
 
 /*
- * NOTE: Called under queue->lock with locally disabled BH.
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
  *
  * __QDISC_STATE_RUNNING guarantees only one CPU can process
- * this qdisc at a time. queue->lock serializes queue accesses for
- * this queue AND txq->qdisc pointer itself.
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
  *
  *  netif_tx_lock serializes accesses to device driver.
  *
- *  queue->lock and netif_tx_lock are mutually exclusive,
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
  *  if one is grabbed, another must be free.
  *
  * Note, that this procedure can be called by a watchdog timer
@@ -317,7 +317,6 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 };
 
 static struct netdev_queue noop_netdev_queue = {
-	.lock		=	__SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock),
 	.qdisc		=	&noop_qdisc,
 };
 
@@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
 EXPORT_SYMBOL(noop_qdisc);
@@ -498,7 +498,7 @@ errout:
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
 
-/* Under queue->lock and BH! */
+/* Under qdisc_root_lock(qdisc) and BH! */
 
 void qdisc_reset(struct Qdisc *qdisc)
 {
@@ -526,10 +526,12 @@ static void __qdisc_destroy(struct rcu_head *head)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
+	kfree_skb(qdisc->gso_skb);
+
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-/* Under queue->lock and BH! */
+/* Under qdisc_root_lock(qdisc) and BH! */
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
@@ -586,13 +588,12 @@ static void transition_one_qdisc(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
 				 void *_need_watchdog)
 {
+	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
 	int *need_watchdog_p = _need_watchdog;
 
-	spin_lock_bh(&dev_queue->lock);
-	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
-	if (dev_queue->qdisc != &noqueue_qdisc)
+	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
+	if (new_qdisc != &noqueue_qdisc)
 		*need_watchdog_p = 1;
-	spin_unlock_bh(&dev_queue->lock);
 }
 
 void dev_activate(struct net_device *dev)
@@ -629,19 +630,16 @@ static void dev_deactivate_queue(struct net_device *dev,
 	struct sk_buff *skb = NULL;
 	struct Qdisc *qdisc;
 
-	spin_lock_bh(&dev_queue->lock);
-
 	qdisc = dev_queue->qdisc;
 	if (qdisc) {
+		spin_lock_bh(qdisc_lock(qdisc));
+
 		dev_queue->qdisc = qdisc_default;
 		qdisc_reset(qdisc);
 
-		skb = qdisc->gso_skb;
-		qdisc->gso_skb = NULL;
+		spin_unlock_bh(qdisc_lock(qdisc));
 	}
 
-	spin_unlock_bh(&dev_queue->lock);
-
 	kfree_skb(skb);
 }
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ade3372221c..8b0ff345f9d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -156,12 +156,15 @@ teql_destroy(struct Qdisc* sch)
 					master->slaves = NEXT_SLAVE(q);
 					if (q == master->slaves) {
 						struct netdev_queue *txq;
+						spinlock_t *root_lock;
 
 						txq = netdev_get_tx_queue(master->dev, 0);
 						master->slaves = NULL;
-						spin_lock_bh(&txq->lock);
+
+						root_lock = qdisc_root_lock(txq->qdisc);
+						spin_lock_bh(root_lock);
 						qdisc_reset(txq->qdisc);
-						spin_unlock_bh(&txq->lock);
+						spin_unlock_bh(root_lock);
 					}
 				}
 				skb_queue_purge(&dat->q);
-- 
cgit v1.2.3-70-g09d2


From 99194cff398d056e5ee469647c294466c246c88a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 04:54:10 -0700
Subject: pkt_sched: Add multiqueue handling to qdisc_graft().

Move the destruction of the old queue into qdisc_graft().

When operating on a root qdisc (ie. "parent == NULL"), apply
the operation to all queues.  The caller has grabbed a single
implicit reference for this graft, therefore when we apply the
change to more than one queue we must grab additional qdisc
references.

Otherwise, we are operating on a class of a specific parent qdisc, and
therefore no multiqueue handling is necessary.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 101 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 74924893ef7..b3ef8307204 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -435,28 +435,22 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 	return i>0 ? autohandle : 0;
 }
 
-/* Attach toplevel qdisc to device dev */
+/* Attach toplevel qdisc to device queue. */
 
-static struct Qdisc *
-dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
+static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+				     struct Qdisc *qdisc)
 {
-	struct netdev_queue *dev_queue;
 	spinlock_t *root_lock;
 	struct Qdisc *oqdisc;
 	int ingress;
 
-	if (dev->flags & IFF_UP)
-		dev_deactivate(dev);
-
 	ingress = 0;
 	if (qdisc && qdisc->flags&TCQ_F_INGRESS)
 		ingress = 1;
 
 	if (ingress) {
-		dev_queue = &dev->rx_queue;
 		oqdisc = dev_queue->qdisc;
 	} else {
-		dev_queue = netdev_get_tx_queue(dev, 0);
 		oqdisc = dev_queue->qdisc_sleeping;
 	}
 
@@ -487,9 +481,6 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 
 	spin_unlock_bh(root_lock);
 
-	if (dev->flags & IFF_UP)
-		dev_activate(dev);
-
 	return oqdisc;
 }
 
@@ -521,26 +512,66 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
-/* Graft qdisc "new" to class "classid" of qdisc "parent" or
-   to device "dev".
+static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+			       struct Qdisc *old, struct Qdisc *new)
+{
+	if (new || old)
+		qdisc_notify(skb, n, clid, old, new);
 
-   Old qdisc is not destroyed but returned in *old.
+	if (old) {
+		spin_lock_bh(&old->q.lock);
+		qdisc_destroy(old);
+		spin_unlock_bh(&old->q.lock);
+	}
+}
+
+/* Graft qdisc "new" to class "classid" of qdisc "parent" or
+ * to device "dev".
+ *
+ * When appropriate send a netlink notification using 'skb'
+ * and "n".
+ *
+ * On success, destroy old qdisc.
  */
 
 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
-		       u32 classid,
-		       struct Qdisc *new, struct Qdisc **old)
+		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
+		       struct Qdisc *new, struct Qdisc *old)
 {
+	struct Qdisc *q = old;
 	int err = 0;
-	struct Qdisc *q = *old;
-
 
 	if (parent == NULL) {
-		if (q && q->flags&TCQ_F_INGRESS) {
-			*old = dev_graft_qdisc(dev, q);
-		} else {
-			*old = dev_graft_qdisc(dev, new);
+		unsigned int i, num_q, ingress;
+
+		ingress = 0;
+		num_q = dev->num_tx_queues;
+		if (q && q->flags & TCQ_F_INGRESS) {
+			num_q = 1;
+			ingress = 1;
 		}
+
+		if (dev->flags & IFF_UP)
+			dev_deactivate(dev);
+
+		for (i = 0; i < num_q; i++) {
+			struct netdev_queue *dev_queue = &dev->rx_queue;
+
+			if (!ingress)
+				dev_queue = netdev_get_tx_queue(dev, i);
+
+			if (ingress) {
+				old = dev_graft_qdisc(dev_queue, q);
+			} else {
+				old = dev_graft_qdisc(dev_queue, new);
+				if (new && i > 0)
+					atomic_inc(&new->refcnt);
+			}
+			notify_and_destroy(skb, n, classid, old, new);
+		}
+
+		if (dev->flags & IFF_UP)
+			dev_activate(dev);
 	} else {
 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
 
@@ -549,10 +580,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		if (cops) {
 			unsigned long cl = cops->get(parent, classid);
 			if (cl) {
-				err = cops->graft(parent, cl, new, old);
+				err = cops->graft(parent, cl, new, &old);
 				cops->put(parent, cl);
 			}
 		}
+		if (!err)
+			notify_and_destroy(skb, n, classid, old, new);
 	}
 	return err;
 }
@@ -773,16 +806,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return -EINVAL;
 		if (q->handle == 0)
 			return -ENOENT;
-		if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
+		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
 			return err;
-		if (q) {
-			spinlock_t *root_lock = qdisc_root_lock(q);
-
-			qdisc_notify(skb, n, clid, q, NULL);
-			spin_unlock_bh(root_lock);
-			qdisc_destroy(q);
-			spin_unlock_bh(root_lock);
-		}
 	} else {
 		qdisc_notify(skb, n, clid, NULL, q);
 	}
@@ -923,10 +948,9 @@ create_n_graft:
 
 graft:
 	if (1) {
-		struct Qdisc *old_q = NULL;
 		spinlock_t *root_lock;
 
-		err = qdisc_graft(dev, p, clid, q, &old_q);
+		err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
 		if (err) {
 			if (q) {
 				root_lock = qdisc_root_lock(q);
@@ -936,13 +960,6 @@ graft:
 			}
 			return err;
 		}
-		qdisc_notify(skb, n, clid, old_q, q);
-		if (old_q) {
-			root_lock = qdisc_root_lock(old_q);
-			spin_lock_bh(root_lock);
-			qdisc_destroy(old_q);
-			spin_unlock_bh(root_lock);
-		}
 	}
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From a0c80b80e0fb48129e4e9d6a9ede914f9ff1850d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 01:46:06 -0700
Subject: pkt_sched: Make default qdisc nonshared-multiqueue safe.

Instead of 'pfifo_fast' we have just plain 'fifo_fast'.
No priority queues, just a straight FIFO.

This is necessary in order to legally have a seperate
qdisc per queue in multi-TX-queue setups, and thus get
full parallelization.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 99 +++++++++++--------------------------------------
 1 file changed, 22 insertions(+), 77 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8fc580b3e17..e244c462e6b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -349,99 +349,44 @@ static struct Qdisc noqueue_qdisc = {
 };
 
 
-static const u8 prio2band[TC_PRIO_MAX+1] =
-	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
-
-/* 3-band FIFO queue: old style, but should be a bit faster than
-   generic prio+fifo combination.
- */
-
-#define PFIFO_FAST_BANDS 3
-
-static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
-					     struct Qdisc *qdisc)
-{
-	struct sk_buff_head *list = qdisc_priv(qdisc);
-	return list + prio2band[skb->priority & TC_PRIO_MAX];
-}
-
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
-	struct sk_buff_head *list = prio2list(skb, qdisc);
+	struct sk_buff_head *list = &qdisc->q;
 
-	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
-		qdisc->q.qlen++;
+	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len)
 		return __qdisc_enqueue_tail(skb, qdisc, list);
-	}
 
 	return qdisc_drop(skb, qdisc);
 }
 
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc)
 {
-	int prio;
-	struct sk_buff_head *list = qdisc_priv(qdisc);
+	struct sk_buff_head *list = &qdisc->q;
 
-	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
-		if (!skb_queue_empty(list + prio)) {
-			qdisc->q.qlen--;
-			return __qdisc_dequeue_head(qdisc, list + prio);
-		}
-	}
+	if (!skb_queue_empty(list))
+		return __qdisc_dequeue_head(qdisc, list);
 
 	return NULL;
 }
 
-static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
-	qdisc->q.qlen++;
-	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
+	return __qdisc_requeue(skb, qdisc, &qdisc->q);
 }
 
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void fifo_fast_reset(struct Qdisc* qdisc)
 {
-	int prio;
-	struct sk_buff_head *list = qdisc_priv(qdisc);
-
-	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
-		__qdisc_reset_queue(qdisc, list + prio);
-
+	__qdisc_reset_queue(qdisc, &qdisc->q);
 	qdisc->qstats.backlog = 0;
-	qdisc->q.qlen = 0;
 }
 
-static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
-{
-	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
-
-	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
-	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-	return skb->len;
-
-nla_put_failure:
-	return -1;
-}
-
-static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
-{
-	int prio;
-	struct sk_buff_head *list = qdisc_priv(qdisc);
-
-	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
-		skb_queue_head_init(list + prio);
-
-	return 0;
-}
-
-static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
-	.id		=	"pfifo_fast",
-	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
-	.enqueue	=	pfifo_fast_enqueue,
-	.dequeue	=	pfifo_fast_dequeue,
-	.requeue	=	pfifo_fast_requeue,
-	.init		=	pfifo_fast_init,
-	.reset		=	pfifo_fast_reset,
-	.dump		=	pfifo_fast_dump,
+static struct Qdisc_ops fifo_fast_ops __read_mostly = {
+	.id		=	"fifo_fast",
+	.priv_size	=	0,
+	.enqueue	=	fifo_fast_enqueue,
+	.dequeue	=	fifo_fast_dequeue,
+	.requeue	=	fifo_fast_requeue,
+	.reset		=	fifo_fast_reset,
 	.owner		=	THIS_MODULE,
 };
 
@@ -570,7 +515,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 
 	if (dev->tx_queue_len) {
 		qdisc = qdisc_create_dflt(dev, dev_queue,
-					  &pfifo_fast_ops, TC_H_ROOT);
+					  &fifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
@@ -601,9 +546,9 @@ void dev_activate(struct net_device *dev)
 	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
-	   create default one i.e. pfifo_fast for devices,
-	   which need queueing and noqueue_qdisc for
-	   virtual interfaces
+	 * create default one i.e. fifo_fast for devices,
+	 * which need queueing and noqueue_qdisc for
+	 * virtual interfaces.
 	 */
 
 	if (dev_all_qdisc_sleeping_noop(dev))
-- 
cgit v1.2.3-70-g09d2


From 9b4661bd6e5437508e0920608f3213c23212cd1b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:01:44 -0700
Subject: ipv4: add pernet mib operations

These ones are currently empty, but stuff from init_ipv4_mibs will
sequentially migrate there.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 95a966dd191..b4b77aa0795 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -110,6 +110,7 @@
 #include <net/ipip.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
+#include <net/net_namespace.h>
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
@@ -1339,6 +1340,20 @@ static struct net_protocol icmp_protocol = {
 	.netns_ok =	1,
 };
 
+static __net_init int ipv4_mib_init_net(struct net *net)
+{
+	return 0;
+}
+
+static __net_exit void ipv4_mib_exit_net(struct net *net)
+{
+}
+
+static __net_initdata struct pernet_operations ipv4_mib_ops = {
+	.init = ipv4_mib_init_net,
+	.exit = ipv4_mib_exit_net,
+};
+
 static int __init init_ipv4_mibs(void)
 {
 	if (snmp_mib_init((void **)net_statistics,
@@ -1365,8 +1380,13 @@ static int __init init_ipv4_mibs(void)
 
 	tcp_mib_init(&init_net);
 
+	if (register_pernet_subsys(&ipv4_mib_ops))
+		goto err_net;
+
 	return 0;
 
+err_net:
+	snmp_mib_free((void **)udplite_statistics);
 err_udplite_mib:
 	snmp_mib_free((void **)udp_statistics);
 err_udp_mib:
-- 
cgit v1.2.3-70-g09d2


From 57ef42d59d1c1d79be59fc3c6380ae14234e38c3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:02:08 -0700
Subject: mib: put tcp statistics on struct net

Proc temporary uses stats from init_net.

BTW, TCP_XXX_STATS are beautiful (w/o do { } while (0) facing) again :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  1 +
 include/net/tcp.h       |  9 ++++-----
 net/ipv4/af_inet.c      | 16 +++++++++-------
 net/ipv4/proc.c         |  4 ++--
 net/ipv4/tcp.c          |  3 ---
 5 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 9f4b31ed18c..8f96079bf0e 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -4,6 +4,7 @@
 #include <net/snmp.h>
 
 struct netns_mib {
+	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 };
 
 #endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 60e5be8b925..92d7b551dc5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -265,11 +265,10 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num)
 
 extern struct proto tcp_prot;
 
-DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
-#define TCP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(tcp_statistics, field); } while (0)
-#define TCP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(tcp_statistics, field); } while (0)
-#define TCP_DEC_STATS(net, field)	do { (void)net; SNMP_DEC_STATS(tcp_statistics, field); } while (0)
-#define TCP_ADD_STATS_USER(net, field, val) do { (void)net; SNMP_ADD_STATS_USER(tcp_statistics, field, val); } while (0)
+#define TCP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.tcp_statistics, field)
+#define TCP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
+#define TCP_DEC_STATS(net, field)	SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
+#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4b77aa0795..c1a3e986f8b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1342,11 +1342,20 @@ static struct net_protocol icmp_protocol = {
 
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
+	if (snmp_mib_init((void **)net->mib.tcp_statistics,
+			  sizeof(struct tcp_mib)) < 0)
+		goto err_tcp_mib;
+
+	tcp_mib_init(net);
 	return 0;
+
+err_tcp_mib:
+	return -ENOMEM;
 }
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.tcp_statistics);
 }
 
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1368,9 +1377,6 @@ static int __init init_ipv4_mibs(void)
 	if (snmp_mib_init((void **)icmpmsg_statistics,
 			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)tcp_statistics,
-			  sizeof(struct tcp_mib)) < 0)
-		goto err_tcp_mib;
 	if (snmp_mib_init((void **)udp_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udp_mib;
@@ -1378,8 +1384,6 @@ static int __init init_ipv4_mibs(void)
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 
-	tcp_mib_init(&init_net);
-
 	if (register_pernet_subsys(&ipv4_mib_ops))
 		goto err_net;
 
@@ -1390,8 +1394,6 @@ err_net:
 err_udplite_mib:
 	snmp_mib_free((void **)udp_statistics);
 err_udp_mib:
-	snmp_mib_free((void **)tcp_statistics);
-err_tcp_mib:
 	snmp_mib_free((void **)icmpmsg_statistics);
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmp_statistics);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index eb5cee279c5..19e1d8e257d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -359,11 +359,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   snmp_fold_field((void **)tcp_statistics,
+				   snmp_fold_field((void **)init_net.mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   snmp_fold_field((void **)tcp_statistics,
+				   snmp_fold_field((void **)init_net.mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 	}
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eec8cf7c024..827e6132af5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,8 +277,6 @@
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
-
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -2802,4 +2800,3 @@ EXPORT_SYMBOL(tcp_splice_read);
 EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
-EXPORT_SYMBOL(tcp_statistics);
-- 
cgit v1.2.3-70-g09d2


From a20f5799ca7ceb24d63c74b6fdad4b0c0ee91f4f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:02:42 -0700
Subject: mib: put ip statistics on struct net

Similar to tcp one.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h        |  7 +++----
 include/net/netns/mib.h |  1 +
 net/ipv4/af_inet.c      | 11 ++++++-----
 net/ipv4/ip_input.c     |  8 --------
 net/ipv4/proc.c         |  4 ++--
 5 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 02924fb4bdb..ff2535ada02 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -156,10 +156,9 @@ struct ipv4_config
 };
 
 extern struct ipv4_config ipv4_config;
-DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
-#define IP_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(ip_statistics, field); } while (0)
-#define IP_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(ip_statistics, field); } while (0)
-#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val)
+#define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
+#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
+#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
 DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
 #define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 8f96079bf0e..1094ebbf0b4 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -5,6 +5,7 @@
 
 struct netns_mib {
 	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+	DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 };
 
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c1a3e986f8b..3090a9307c4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1345,16 +1345,22 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.tcp_statistics,
 			  sizeof(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
+	if (snmp_mib_init((void **)net->mib.ip_statistics,
+			  sizeof(struct ipstats_mib)) < 0)
+		goto err_ip_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_ip_mib:
+	snmp_mib_free((void **)net->mib.tcp_statistics);
 err_tcp_mib:
 	return -ENOMEM;
 }
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.ip_statistics);
 	snmp_mib_free((void **)net->mib.tcp_statistics);
 }
 
@@ -1368,9 +1374,6 @@ static int __init init_ipv4_mibs(void)
 	if (snmp_mib_init((void **)net_statistics,
 			  sizeof(struct linux_mib)) < 0)
 		goto err_net_mib;
-	if (snmp_mib_init((void **)ip_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
-		goto err_ip_mib;
 	if (snmp_mib_init((void **)icmp_statistics,
 			  sizeof(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
@@ -1398,8 +1401,6 @@ err_udp_mib:
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmp_statistics);
 err_icmp_mib:
-	snmp_mib_free((void **)ip_statistics);
-err_ip_mib:
 	snmp_mib_free((void **)net_statistics);
 err_net_mib:
 	return -ENOMEM;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 043f640df4b..e0bed56c51f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -144,12 +144,6 @@
 #include <linux/mroute.h>
 #include <linux/netlink.h>
 
-/*
- *	SNMP management statistics
- */
-
-DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
-
 /*
  *	Process Router Attention IP option
  */
@@ -447,5 +441,3 @@ drop:
 out:
 	return NET_RX_DROP;
 }
-
-EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 19e1d8e257d..2698bb2ce98 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -344,7 +344,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)ip_statistics,
+			   snmp_fold_field((void **)init_net.mib.ip_statistics,
 					   snmp4_ipstats_list[i].entry));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
@@ -431,7 +431,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)ip_statistics,
+			   snmp_fold_field((void **)init_net.mib.ip_statistics,
 					   snmp4_ipextstats_list[i].entry));
 
 	seq_putc(seq, '\n');
-- 
cgit v1.2.3-70-g09d2


From 61a7e26028b94805fd686a6dc9dbd9941f8f19b0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:03:08 -0700
Subject: mib: put net statistics on struct net

Similar to ip and tcp ones :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h        | 11 +++++------
 include/net/netns/mib.h |  1 +
 net/ipv4/af_inet.c      | 14 ++++++--------
 net/ipv4/proc.c         |  2 +-
 4 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index ff2535ada02..b5862b97520 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -159,12 +159,11 @@ extern struct ipv4_config ipv4_config;
 #define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
 #define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
-DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
-#define NET_INC_STATS(net, field)	do { (void)net; SNMP_INC_STATS(net_statistics, field); } while (0)
-#define NET_INC_STATS_BH(net, field)	do { (void)net; SNMP_INC_STATS_BH(net_statistics, field); } while (0)
-#define NET_INC_STATS_USER(net, field) 	do { (void)net; SNMP_INC_STATS_USER(net_statistics, field); } while (0)
-#define NET_ADD_STATS_BH(net, field, adnd) do { (void)net; SNMP_ADD_STATS_BH(net_statistics, field, adnd); } while (0)
-#define NET_ADD_STATS_USER(net, field, adnd) do { (void)net; SNMP_ADD_STATS_USER(net_statistics, field, adnd); } while (0)
+#define NET_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.net_statistics, field)
+#define NET_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
+#define NET_INC_STATS_USER(net, field) 	SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
+#define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
+#define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void *mib[], int offt);
 extern int snmp_mib_init(void *ptr[2], size_t mibsize);
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 1094ebbf0b4..b5b1115adfa 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -6,6 +6,7 @@
 struct netns_mib {
 	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 	DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+	DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
 };
 
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3090a9307c4..776ed3199b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -115,8 +115,6 @@
 #include <linux/mroute.h>
 #endif
 
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-
 extern void ip_mc_drop_socket(struct sock *sk);
 
 /* The inetsw table contains everything that inet_create needs to
@@ -1348,10 +1346,15 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.ip_statistics,
 			  sizeof(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
+	if (snmp_mib_init((void **)net->mib.net_statistics,
+			  sizeof(struct linux_mib)) < 0)
+		goto err_net_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_net_mib:
+	snmp_mib_free((void **)net->mib.ip_statistics);
 err_ip_mib:
 	snmp_mib_free((void **)net->mib.tcp_statistics);
 err_tcp_mib:
@@ -1360,6 +1363,7 @@ err_tcp_mib:
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.net_statistics);
 	snmp_mib_free((void **)net->mib.ip_statistics);
 	snmp_mib_free((void **)net->mib.tcp_statistics);
 }
@@ -1371,9 +1375,6 @@ static __net_initdata struct pernet_operations ipv4_mib_ops = {
 
 static int __init init_ipv4_mibs(void)
 {
-	if (snmp_mib_init((void **)net_statistics,
-			  sizeof(struct linux_mib)) < 0)
-		goto err_net_mib;
 	if (snmp_mib_init((void **)icmp_statistics,
 			  sizeof(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
@@ -1401,8 +1402,6 @@ err_udp_mib:
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmp_statistics);
 err_icmp_mib:
-	snmp_mib_free((void **)net_statistics);
-err_net_mib:
 	return -ENOMEM;
 }
 
@@ -1582,5 +1581,4 @@ EXPORT_SYMBOL(inet_sock_destruct);
 EXPORT_SYMBOL(inet_stream_connect);
 EXPORT_SYMBOL(inet_stream_ops);
 EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(net_statistics);
 EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 2698bb2ce98..ef38b1dccfd 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -421,7 +421,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)net_statistics,
+			   snmp_fold_field((void **)init_net.mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
-- 
cgit v1.2.3-70-g09d2


From 2f275f91a438abd8eec5321798d66a4ffe6869fa Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:03:27 -0700
Subject: mib: put udp statistics on struct net

Similar to... ouch, I repeat myself.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  1 +
 include/net/udp.h       |  9 ++++-----
 net/ipv4/af_inet.c      | 11 ++++++-----
 net/ipv4/proc.c         |  2 +-
 net/ipv4/udp.c          |  3 ---
 5 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index b5b1115adfa..738c87ce183 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -7,6 +7,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 	DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 	DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
+	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
 };
 
 #endif
diff --git a/include/net/udp.h b/include/net/udp.h
index 3e551592aa7..ba554415270 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -148,7 +148,6 @@ extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int optlen,
 				   int (*push_pending_frames)(struct sock *));
 
-DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 
 /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
@@ -158,12 +157,12 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 /*
  * 	SNMP statistics for UDP and UDP-Lite
  */
-#define UDP_INC_STATS_USER(net, field, is_udplite)	      do { (void)net; \
+#define UDP_INC_STATS_USER(net, field, is_udplite)	      do { \
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field);       \
-	else		SNMP_INC_STATS_USER(udp_statistics, field);  }  while(0)
-#define UDP_INC_STATS_BH(net, field, is_udplite) 	      do { (void)net; \
+	else		SNMP_INC_STATS_USER((net)->mib.udp_statistics, field);  }  while(0)
+#define UDP_INC_STATS_BH(net, field, is_udplite) 	      do { \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field);         \
-	else		SNMP_INC_STATS_BH(udp_statistics, field);    }  while(0)
+	else		SNMP_INC_STATS_BH((net)->mib.udp_statistics, field);    }  while(0)
 
 #define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { (void)net;  \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 776ed3199b5..1f418164ebf 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1349,10 +1349,15 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.net_statistics,
 			  sizeof(struct linux_mib)) < 0)
 		goto err_net_mib;
+	if (snmp_mib_init((void **)net->mib.udp_statistics,
+			  sizeof(struct udp_mib)) < 0)
+		goto err_udp_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_udp_mib:
+	snmp_mib_free((void **)net->mib.net_statistics);
 err_net_mib:
 	snmp_mib_free((void **)net->mib.ip_statistics);
 err_ip_mib:
@@ -1363,6 +1368,7 @@ err_tcp_mib:
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.udp_statistics);
 	snmp_mib_free((void **)net->mib.net_statistics);
 	snmp_mib_free((void **)net->mib.ip_statistics);
 	snmp_mib_free((void **)net->mib.tcp_statistics);
@@ -1381,9 +1387,6 @@ static int __init init_ipv4_mibs(void)
 	if (snmp_mib_init((void **)icmpmsg_statistics,
 			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)udp_statistics,
-			  sizeof(struct udp_mib)) < 0)
-		goto err_udp_mib;
 	if (snmp_mib_init((void **)udplite_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
@@ -1396,8 +1399,6 @@ static int __init init_ipv4_mibs(void)
 err_net:
 	snmp_mib_free((void **)udplite_statistics);
 err_udplite_mib:
-	snmp_mib_free((void **)udp_statistics);
-err_udp_mib:
 	snmp_mib_free((void **)icmpmsg_statistics);
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmp_statistics);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ef38b1dccfd..869085c8e43 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -374,7 +374,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)udp_statistics,
+			   snmp_fold_field((void **)init_net.mib.udp_statistics,
 					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1560d11ba6e..a751770947a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -108,9 +108,6 @@
  *	Snmp MIB for the UDP layer
  */
 
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
-EXPORT_SYMBOL(udp_statistics);
-
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 EXPORT_SYMBOL(udp_stats_in6);
 
-- 
cgit v1.2.3-70-g09d2


From 386019d3514b3ed9de8d0b05b67e638a7048375b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:03:45 -0700
Subject: mib: put udplite statistics on struct net

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  1 +
 include/net/udp.h       |  5 ++---
 net/ipv4/af_inet.c      | 11 ++++++-----
 net/ipv4/proc.c         |  2 +-
 net/ipv4/udplite.c      |  1 -
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 738c87ce183..23e699fdb5c 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -8,6 +8,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
 	DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
 	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
+	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
 };
 
 #endif
diff --git a/include/net/udp.h b/include/net/udp.h
index ba554415270..addcdc67234 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -151,17 +151,16 @@ extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 
 /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
-DECLARE_SNMP_STAT(struct udp_mib, udplite_statistics);
 DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 
 /*
  * 	SNMP statistics for UDP and UDP-Lite
  */
 #define UDP_INC_STATS_USER(net, field, is_udplite)	      do { \
-	if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field);       \
+	if (is_udplite) SNMP_INC_STATS_USER((net)->mib.udplite_statistics, field);       \
 	else		SNMP_INC_STATS_USER((net)->mib.udp_statistics, field);  }  while(0)
 #define UDP_INC_STATS_BH(net, field, is_udplite) 	      do { \
-	if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field);         \
+	if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field);         \
 	else		SNMP_INC_STATS_BH((net)->mib.udp_statistics, field);    }  while(0)
 
 #define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { (void)net;  \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1f418164ebf..bf1f200c657 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1352,10 +1352,15 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.udp_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udp_mib;
+	if (snmp_mib_init((void **)net->mib.udplite_statistics,
+			  sizeof(struct udp_mib)) < 0)
+		goto err_udplite_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_udplite_mib:
+	snmp_mib_free((void **)net->mib.udp_statistics);
 err_udp_mib:
 	snmp_mib_free((void **)net->mib.net_statistics);
 err_net_mib:
@@ -1368,6 +1373,7 @@ err_tcp_mib:
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.udplite_statistics);
 	snmp_mib_free((void **)net->mib.udp_statistics);
 	snmp_mib_free((void **)net->mib.net_statistics);
 	snmp_mib_free((void **)net->mib.ip_statistics);
@@ -1387,9 +1393,6 @@ static int __init init_ipv4_mibs(void)
 	if (snmp_mib_init((void **)icmpmsg_statistics,
 			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)udplite_statistics,
-			  sizeof(struct udp_mib)) < 0)
-		goto err_udplite_mib;
 
 	if (register_pernet_subsys(&ipv4_mib_ops))
 		goto err_net;
@@ -1397,8 +1400,6 @@ static int __init init_ipv4_mibs(void)
 	return 0;
 
 err_net:
-	snmp_mib_free((void **)udplite_statistics);
-err_udplite_mib:
 	snmp_mib_free((void **)icmpmsg_statistics);
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmp_statistics);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 869085c8e43..765418334b3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -385,7 +385,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)udplite_statistics,
+			   snmp_fold_field((void **)init_net.mib.udplite_statistics,
 					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 4ad16b6d513..3c807964da9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -11,7 +11,6 @@
  *		2 of the License, or (at your option) any later version.
  */
 #include "udp_impl.h"
-DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics)	__read_mostly;
 
 struct hlist_head 	udplite_hash[UDP_HTABLE_SIZE];
 
-- 
cgit v1.2.3-70-g09d2


From b60538a0d737609213e4b758881913498d3ff0b4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:04:02 -0700
Subject: mib: put icmp statistics on struct net

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h      |  5 ++---
 include/net/netns/mib.h |  1 +
 net/ipv4/af_inet.c      | 11 ++++++-----
 net/ipv4/icmp.c         |  2 --
 net/ipv4/proc.c         |  8 ++++----
 5 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 03b9972b875..e14f2c03f42 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -29,10 +29,9 @@ struct icmp_err {
 };
 
 extern struct icmp_err icmp_err_convert[];
-DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
-#define ICMP_INC_STATS(net, field)	SNMP_INC_STATS(icmp_statistics, field)
-#define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmp_statistics, field)
+#define ICMP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.icmp_statistics, field)
+#define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
 
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 23e699fdb5c..03f5abdf58e 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -9,6 +9,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
 	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
+	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 };
 
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bf1f200c657..5c72bb3adad 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1355,10 +1355,15 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.udplite_statistics,
 			  sizeof(struct udp_mib)) < 0)
 		goto err_udplite_mib;
+	if (snmp_mib_init((void **)net->mib.icmp_statistics,
+			  sizeof(struct icmp_mib)) < 0)
+		goto err_icmp_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_icmp_mib:
+	snmp_mib_free((void **)net->mib.udplite_statistics);
 err_udplite_mib:
 	snmp_mib_free((void **)net->mib.udp_statistics);
 err_udp_mib:
@@ -1373,6 +1378,7 @@ err_tcp_mib:
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.icmp_statistics);
 	snmp_mib_free((void **)net->mib.udplite_statistics);
 	snmp_mib_free((void **)net->mib.udp_statistics);
 	snmp_mib_free((void **)net->mib.net_statistics);
@@ -1387,9 +1393,6 @@ static __net_initdata struct pernet_operations ipv4_mib_ops = {
 
 static int __init init_ipv4_mibs(void)
 {
-	if (snmp_mib_init((void **)icmp_statistics,
-			  sizeof(struct icmp_mib)) < 0)
-		goto err_icmp_mib;
 	if (snmp_mib_init((void **)icmpmsg_statistics,
 			  sizeof(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
@@ -1402,8 +1405,6 @@ static int __init init_ipv4_mibs(void)
 err_net:
 	snmp_mib_free((void **)icmpmsg_statistics);
 err_icmpmsg_mib:
-	snmp_mib_free((void **)icmp_statistics);
-err_icmp_mib:
 	return -ENOMEM;
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ea60ad41008..33f958902d9 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,6 @@ struct icmp_bxm {
 /*
  *	Statistics
  */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
 DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;
 
 /* An array of errno for error messages from dest unreach. */
@@ -1213,5 +1212,4 @@ int __init icmp_init(void)
 
 EXPORT_SYMBOL(icmp_err_convert);
 EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(icmp_statistics);
 EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 765418334b3..6b43cce0558 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -311,15 +311,15 @@ static void icmp_put(struct seq_file *seq)
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu",
-		snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS));
+		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_INMSGS),
+		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_INERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			snmp_fold_field((void **) icmpmsg_statistics,
 				icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS),
-		snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS));
+		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			snmp_fold_field((void **) icmpmsg_statistics,
-- 
cgit v1.2.3-70-g09d2


From 923c6586b0dc0a00df07a1608185437145a0c68b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:04:22 -0700
Subject: mib: put icmpmsg statistics on struct net

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h      |  5 ++---
 include/net/netns/mib.h |  1 +
 net/ipv4/af_inet.c      | 12 ++++++------
 net/ipv4/icmp.c         |  5 -----
 net/ipv4/proc.c         | 10 +++++-----
 5 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index e14f2c03f42..dfa72d4e890 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -29,11 +29,10 @@ struct icmp_err {
 };
 
 extern struct icmp_err icmp_err_convert[];
-DECLARE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 #define ICMP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
-#define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS(icmpmsg_statistics, field+256)
-#define ICMPMSGIN_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH(icmpmsg_statistics, field)
+#define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.icmpmsg_statistics, field+256)
+#define ICMPMSGIN_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.icmpmsg_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 03f5abdf58e..44914760464 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -10,6 +10,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
 	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+	DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
 };
 
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5c72bb3adad..36ff6dc769c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1358,10 +1358,15 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	if (snmp_mib_init((void **)net->mib.icmp_statistics,
 			  sizeof(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
+	if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
+			  sizeof(struct icmpmsg_mib)) < 0)
+		goto err_icmpmsg_mib;
 
 	tcp_mib_init(net);
 	return 0;
 
+err_icmpmsg_mib:
+	snmp_mib_free((void **)net->mib.icmp_statistics);
 err_icmp_mib:
 	snmp_mib_free((void **)net->mib.udplite_statistics);
 err_udplite_mib:
@@ -1378,6 +1383,7 @@ err_tcp_mib:
 
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.icmpmsg_statistics);
 	snmp_mib_free((void **)net->mib.icmp_statistics);
 	snmp_mib_free((void **)net->mib.udplite_statistics);
 	snmp_mib_free((void **)net->mib.udp_statistics);
@@ -1393,18 +1399,12 @@ static __net_initdata struct pernet_operations ipv4_mib_ops = {
 
 static int __init init_ipv4_mibs(void)
 {
-	if (snmp_mib_init((void **)icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib)) < 0)
-		goto err_icmpmsg_mib;
-
 	if (register_pernet_subsys(&ipv4_mib_ops))
 		goto err_net;
 
 	return 0;
 
 err_net:
-	snmp_mib_free((void **)icmpmsg_statistics);
-err_icmpmsg_mib:
 	return -ENOMEM;
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 33f958902d9..860558633b2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -111,11 +111,6 @@ struct icmp_bxm {
 	unsigned char  optbuf[40];
 };
 
-/*
- *	Statistics
- */
-DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;
-
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6b43cce0558..e11144b0134 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -270,7 +270,7 @@ static void icmpmsg_put(struct seq_file *seq)
 	count = 0;
 	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
 
-		if (snmp_fold_field((void **) icmpmsg_statistics, i))
+		if (snmp_fold_field((void **) init_net.mib.icmpmsg_statistics, i))
 			out[count++] = i;
 		if (count < PERLINE)
 			continue;
@@ -282,7 +282,7 @@ static void icmpmsg_put(struct seq_file *seq)
 		seq_printf(seq, "\nIcmpMsg: ");
 		for (j = 0; j < PERLINE; ++j)
 			seq_printf(seq, " %lu",
-				snmp_fold_field((void **) icmpmsg_statistics,
+				snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
 				out[j]));
 		seq_putc(seq, '\n');
 	}
@@ -294,7 +294,7 @@ static void icmpmsg_put(struct seq_file *seq)
 		seq_printf(seq, "\nIcmpMsg:");
 		for (j = 0; j < count; ++j)
 			seq_printf(seq, " %lu", snmp_fold_field((void **)
-				icmpmsg_statistics, out[j]));
+				init_net.mib.icmpmsg_statistics, out[j]));
 	}
 
 #undef PERLINE
@@ -315,14 +315,14 @@ static void icmp_put(struct seq_file *seq)
 		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_INERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) icmpmsg_statistics,
+			snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
 				icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
 		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTMSGS),
 		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) icmpmsg_statistics,
+			snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
 				icmpmibmap[i].index | 0x100));
 }
 
-- 
cgit v1.2.3-70-g09d2


From d89cbbb1e69a3bfea38038fb058bd51013902ef3 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:04:51 -0700
Subject: ipv4: clean the init_ipv4_mibs error paths

After moving all the stuff outside this function it looks
a bit ugly - make it look better.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 36ff6dc769c..dd919d84285 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1399,13 +1399,7 @@ static __net_initdata struct pernet_operations ipv4_mib_ops = {
 
 static int __init init_ipv4_mibs(void)
 {
-	if (register_pernet_subsys(&ipv4_mib_ops))
-		goto err_net;
-
-	return 0;
-
-err_net:
-	return -ENOMEM;
+	return register_pernet_subsys(&ipv4_mib_ops);
 }
 
 static int ipv4_proc_init(void);
-- 
cgit v1.2.3-70-g09d2


From 7b7a9dfdf6ccda647f54ea5fa3bd0ac17a189eeb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:05:17 -0700
Subject: proc: create /proc/net/netstat file in each net

Now all the shown in it statistics is netnsizated, time to
show it in appropriate net.

The appropriate net init/exit ops already exist - they make
the sockstat file per net - so just extend them.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e11144b0134..df8c4affde0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -413,6 +413,7 @@ static const struct file_operations snmp_seq_fops = {
 static int netstat_seq_show(struct seq_file *seq, void *v)
 {
 	int i;
+	struct net *net = seq->private;
 
 	seq_puts(seq, "TcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
@@ -421,7 +422,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)init_net.mib.net_statistics,
+			   snmp_fold_field((void **)net->mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
@@ -431,7 +432,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)init_net.mib.ip_statistics,
+			   snmp_fold_field((void **)net->mib.ip_statistics,
 					   snmp4_ipextstats_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -440,7 +441,32 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 static int netstat_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, netstat_seq_show, NULL);
+	int err;
+	struct net *net;
+
+	err = -ENXIO;
+	net = get_proc_net(inode);
+	if (net == NULL)
+		goto err_net;
+
+	err = single_open(file, netstat_seq_show, net);
+	if (err < 0)
+		goto err_open;
+
+	return 0;
+
+err_open:
+	put_net(net);
+err_net:
+	return err;
+}
+
+static int netstat_seq_release(struct inode *inode, struct file *file)
+{
+	struct net *net = ((struct seq_file *)file->private_data)->private;
+
+	put_net(net);
+	return single_release(inode, file);
 }
 
 static const struct file_operations netstat_seq_fops = {
@@ -448,18 +474,26 @@ static const struct file_operations netstat_seq_fops = {
 	.open	 = netstat_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = netstat_seq_release,
 };
 
 static __net_init int ip_proc_init_net(struct net *net)
 {
 	if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
 		return -ENOMEM;
+	if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
+		goto out_netstat;
+
 	return 0;
+
+out_netstat:
+	proc_net_remove(net, "sockstat");
+	return -ENOMEM;
 }
 
 static __net_exit void ip_proc_exit_net(struct net *net)
 {
+	proc_net_remove(net, "netstat");
 	proc_net_remove(net, "sockstat");
 }
 
@@ -475,16 +509,11 @@ int __init ip_misc_proc_init(void)
 	if (register_pernet_subsys(&ip_proc_ops))
 		goto out_pernet;
 
-	if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
-		goto out_netstat;
-
 	if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
 		goto out_snmp;
 out:
 	return rc;
 out_snmp:
-	proc_net_remove(&init_net, "netstat");
-out_netstat:
 	unregister_pernet_subsys(&ip_proc_ops);
 out_pernet:
 	rc = -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 229bf0cbaa054f1502ab4ee219f05985d2c838d1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:06:04 -0700
Subject: proc: create /proc/net/snmp file in each net

All the statistics shown in this file have been made per-net already.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c | 70 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index df8c4affde0..367b81f6f57 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -266,11 +266,12 @@ static void icmpmsg_put(struct seq_file *seq)
 
 	int j, i, count;
 	static int out[PERLINE];
+	struct net *net = seq->private;
 
 	count = 0;
 	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
 
-		if (snmp_fold_field((void **) init_net.mib.icmpmsg_statistics, i))
+		if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
 			out[count++] = i;
 		if (count < PERLINE)
 			continue;
@@ -282,7 +283,7 @@ static void icmpmsg_put(struct seq_file *seq)
 		seq_printf(seq, "\nIcmpMsg: ");
 		for (j = 0; j < PERLINE; ++j)
 			seq_printf(seq, " %lu",
-				snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
+				snmp_fold_field((void **) net->mib.icmpmsg_statistics,
 				out[j]));
 		seq_putc(seq, '\n');
 	}
@@ -294,7 +295,7 @@ static void icmpmsg_put(struct seq_file *seq)
 		seq_printf(seq, "\nIcmpMsg:");
 		for (j = 0; j < count; ++j)
 			seq_printf(seq, " %lu", snmp_fold_field((void **)
-				init_net.mib.icmpmsg_statistics, out[j]));
+				net->mib.icmpmsg_statistics, out[j]));
 	}
 
 #undef PERLINE
@@ -303,6 +304,7 @@ static void icmpmsg_put(struct seq_file *seq)
 static void icmp_put(struct seq_file *seq)
 {
 	int i;
+	struct net *net = seq->private;
 
 	seq_puts(seq, "\nIcmp: InMsgs InErrors");
 	for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -311,18 +313,18 @@ static void icmp_put(struct seq_file *seq)
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu",
-		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_INERRORS));
+		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
+			snmp_fold_field((void **) net->mib.icmpmsg_statistics,
 				icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTMSGS),
-		snmp_fold_field((void **) init_net.mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+		snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			snmp_fold_field((void **) init_net.mib.icmpmsg_statistics,
+			snmp_fold_field((void **) net->mib.icmpmsg_statistics,
 				icmpmibmap[i].index | 0x100));
 }
 
@@ -332,6 +334,7 @@ static void icmp_put(struct seq_file *seq)
 static int snmp_seq_show(struct seq_file *seq, void *v)
 {
 	int i;
+	struct net *net = seq->private;
 
 	seq_puts(seq, "Ip: Forwarding DefaultTTL");
 
@@ -344,7 +347,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)init_net.mib.ip_statistics,
+			   snmp_fold_field((void **)net->mib.ip_statistics,
 					   snmp4_ipstats_list[i].entry));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
@@ -359,11 +362,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   snmp_fold_field((void **)init_net.mib.tcp_statistics,
+				   snmp_fold_field((void **)net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   snmp_fold_field((void **)init_net.mib.tcp_statistics,
+				   snmp_fold_field((void **)net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 	}
 
@@ -374,7 +377,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)init_net.mib.udp_statistics,
+			   snmp_fold_field((void **)net->mib.udp_statistics,
 					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
@@ -385,7 +388,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void **)init_net.mib.udplite_statistics,
+			   snmp_fold_field((void **)net->mib.udplite_statistics,
 					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -394,7 +397,32 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 static int snmp_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, snmp_seq_show, NULL);
+	int err;
+	struct net *net;
+
+	err = -ENXIO;
+	net = get_proc_net(inode);
+	if (net == NULL)
+		goto err_net;
+
+	err = single_open(file, snmp_seq_show, net);
+	if (err < 0)
+		goto err_open;
+
+	return 0;
+
+err_open:
+	put_net(net);
+err_net:
+	return err;
+}
+
+static int snmp_seq_release(struct inode *inode, struct file *file)
+{
+	struct net *net = ((struct seq_file *)file->private_data)->private;
+
+	put_net(net);
+	return single_release(inode, file);
 }
 
 static const struct file_operations snmp_seq_fops = {
@@ -402,7 +430,7 @@ static const struct file_operations snmp_seq_fops = {
 	.open	 = snmp_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = snmp_seq_release,
 };
 
 
@@ -483,9 +511,13 @@ static __net_init int ip_proc_init_net(struct net *net)
 		return -ENOMEM;
 	if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
 		goto out_netstat;
+	if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
+		goto out_snmp;
 
 	return 0;
 
+out_snmp:
+	proc_net_remove(net, "netstat");
 out_netstat:
 	proc_net_remove(net, "sockstat");
 	return -ENOMEM;
@@ -493,6 +525,7 @@ out_netstat:
 
 static __net_exit void ip_proc_exit_net(struct net *net)
 {
+	proc_net_remove(net, "snmp");
 	proc_net_remove(net, "netstat");
 	proc_net_remove(net, "sockstat");
 }
@@ -509,12 +542,9 @@ int __init ip_misc_proc_init(void)
 	if (register_pernet_subsys(&ip_proc_ops))
 		goto out_pernet;
 
-	if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
-		goto out_snmp;
 out:
 	return rc;
-out_snmp:
-	unregister_pernet_subsys(&ip_proc_ops);
+
 out_pernet:
 	rc = -ENOMEM;
 	goto out;
-- 
cgit v1.2.3-70-g09d2


From 8e3461d01bdbc3dbf993448ed9ad4acaaeb6495d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:06:26 -0700
Subject: proc: show per-net ip_devconf.forwarding in /proc/net/snmp

This one has become per-net long ago, but the appropriate file
is per-net only now.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 367b81f6f57..120e1f7461d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -342,7 +342,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-		   IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
+		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
 		   sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-- 
cgit v1.2.3-70-g09d2


From 60bdde95807e982a824be9cfdd35055cc721a88a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:06:50 -0700
Subject: proc: clean the ip_misc_proc_init and ip_proc_init_net error paths

After all this stuff is moved outside, this function can look better.

Besides, I tuned the error path in ip_proc_init_net to make it have
only 2 exit points, not 3.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 120e1f7461d..554390431a4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -508,7 +508,7 @@ static const struct file_operations netstat_seq_fops = {
 static __net_init int ip_proc_init_net(struct net *net)
 {
 	if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
-		return -ENOMEM;
+		goto out_sockstat;
 	if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
 		goto out_netstat;
 	if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
@@ -520,6 +520,7 @@ out_snmp:
 	proc_net_remove(net, "netstat");
 out_netstat:
 	proc_net_remove(net, "sockstat");
+out_sockstat:
 	return -ENOMEM;
 }
 
@@ -537,16 +538,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = {
 
 int __init ip_misc_proc_init(void)
 {
-	int rc = 0;
-
-	if (register_pernet_subsys(&ip_proc_ops))
-		goto out_pernet;
-
-out:
-	return rc;
-
-out_pernet:
-	rc = -ENOMEM;
-	goto out;
+	return register_pernet_subsys(&ip_proc_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From de05c557b24c7dffc6d392e3db120cf11c9f6ae7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:07:21 -0700
Subject: proc: consolidate per-net single_open callers

There are already 7 of them - time to kill some duplicate code.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c           | 24 +++++++++++++++++++
 include/linux/seq_file_net.h |  2 ++
 net/ipv4/fib_trie.c          | 13 +---------
 net/ipv4/proc.c              | 57 +++-----------------------------------------
 net/ipv6/proc.c              | 19 +--------------
 net/ipv6/route.c             | 26 ++------------------
 6 files changed, 33 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 83f357b30d7..ab232ad2e6f 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -51,6 +51,30 @@ int seq_open_net(struct inode *ino, struct file *f,
 }
 EXPORT_SYMBOL_GPL(seq_open_net);
 
+int single_open_net(struct inode *inode, struct file *file,
+		int (*show)(struct seq_file *, void *))
+{
+	int err;
+	struct net *net;
+
+	err = -ENXIO;
+	net = get_proc_net(inode);
+	if (net == NULL)
+		goto err_net;
+
+	err = single_open(file, show, net);
+	if (err < 0)
+		goto err_open;
+
+	return 0;
+
+err_open:
+	put_net(net);
+err_net:
+	return err;
+}
+EXPORT_SYMBOL_GPL(single_open_net);
+
 int seq_release_net(struct inode *ino, struct file *f)
 {
 	struct seq_file *seq;
diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 4ac52542a56..87dcc0ecf6e 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -14,6 +14,8 @@ struct seq_net_private {
 
 int seq_open_net(struct inode *, struct file *,
 		 const struct seq_operations *, int);
+int single_open_net(struct inode *, struct file *file,
+		int (*show)(struct seq_file *, void *));
 int seq_release_net(struct inode *, struct file *);
 static inline struct net *seq_file_net(struct seq_file *seq)
 {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f155a66d6eb..6009df238ed 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2251,18 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 
 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net;
-
-	net = get_proc_net(inode);
-	if (net == NULL)
-		return -ENXIO;
-	err = single_open(file, fib_triestat_seq_show, net);
-	if (err < 0) {
-		put_net(net);
-		return err;
-	}
-	return 0;
+	return single_open_net(inode, file, fib_triestat_seq_show);
 }
 
 static int fib_triestat_seq_release(struct inode *ino, struct file *f)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 554390431a4..daf5d3c80ce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -71,24 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 
 static int sockstat_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net;
-
-	err = -ENXIO;
-	net = get_proc_net(inode);
-	if (net == NULL)
-		goto err_net;
-
-	err = single_open(file, sockstat_seq_show, net);
-	if (err < 0)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	put_net(net);
-err_net:
-	return err;
+	return single_open_net(inode, file, sockstat_seq_show);
 }
 
 static int sockstat_seq_release(struct inode *inode, struct file *file)
@@ -397,24 +380,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 static int snmp_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net;
-
-	err = -ENXIO;
-	net = get_proc_net(inode);
-	if (net == NULL)
-		goto err_net;
-
-	err = single_open(file, snmp_seq_show, net);
-	if (err < 0)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	put_net(net);
-err_net:
-	return err;
+	return single_open_net(inode, file, snmp_seq_show);
 }
 
 static int snmp_seq_release(struct inode *inode, struct file *file)
@@ -469,24 +435,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 static int netstat_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net;
-
-	err = -ENXIO;
-	net = get_proc_net(inode);
-	if (net == NULL)
-		goto err_net;
-
-	err = single_open(file, netstat_seq_show, net);
-	if (err < 0)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	put_net(net);
-err_net:
-	return err;
+	return single_open_net(inode, file, netstat_seq_show);
 }
 
 static int netstat_seq_release(struct inode *inode, struct file *file)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index cbc7e514d3e..29c5a79444c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -183,24 +183,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 
 static int sockstat6_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net;
-
-	err = -ENXIO;
-	net = get_proc_net(inode);
-	if (net == NULL)
-		goto err_net;
-
-	err = single_open(file, sockstat6_seq_show, net);
-	if (err < 0)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	put_net(net);
-err_net:
-	return err;
+	return single_open_net(inode, file, sockstat6_seq_show);
 }
 
 static int sockstat6_seq_release(struct inode *inode, struct file *file)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5d6c166dfbb..fb7ff8f0c6d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2416,18 +2416,7 @@ static int ipv6_route_show(struct seq_file *m, void *v)
 
 static int ipv6_route_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net = get_proc_net(inode);
-	if (!net)
-		return -ENXIO;
-
-	err = single_open(file, ipv6_route_show, net);
-	if (err < 0) {
-		put_net(net);
-		return err;
-	}
-
-	return 0;
+	return single_open_net(inode, file, ipv6_route_show);
 }
 
 static int ipv6_route_release(struct inode *inode, struct file *file)
@@ -2463,18 +2452,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 
 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 {
-	int err;
-	struct net *net = get_proc_net(inode);
-	if (!net)
-		return -ENXIO;
-
-	err = single_open(file, rt6_stats_seq_show, net);
-	if (err < 0) {
-		put_net(net);
-		return err;
-	}
-
-	return 0;
+	return single_open_net(inode, file, rt6_stats_seq_show);
 }
 
 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
-- 
cgit v1.2.3-70-g09d2


From b6fcbdb4f283f7ba67cec3cda6be23da8e959031 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 18 Jul 2008 04:07:44 -0700
Subject: proc: consolidate per-net single-release callers

They are symmetrical to single_open ones :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c           |  8 ++++++++
 include/linux/seq_file_net.h |  1 +
 net/ipv4/fib_trie.c          |  9 +--------
 net/ipv4/proc.c              | 30 +++---------------------------
 net/ipv6/proc.c              | 10 +---------
 net/ipv6/route.c             | 20 ++------------------
 6 files changed, 16 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ab232ad2e6f..b224a28e0c1 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -87,6 +87,14 @@ int seq_release_net(struct inode *ino, struct file *f)
 }
 EXPORT_SYMBOL_GPL(seq_release_net);
 
+int single_release_net(struct inode *ino, struct file *f)
+{
+	struct seq_file *seq = f->private_data;
+	put_net(seq->private);
+	return single_release(ino, f);
+}
+EXPORT_SYMBOL_GPL(single_release_net);
+
 static struct net *get_proc_task_net(struct inode *dir)
 {
 	struct task_struct *task;
diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 87dcc0ecf6e..32c89bbe24a 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -17,6 +17,7 @@ int seq_open_net(struct inode *, struct file *,
 int single_open_net(struct inode *, struct file *file,
 		int (*show)(struct seq_file *, void *));
 int seq_release_net(struct inode *, struct file *);
+int single_release_net(struct inode *, struct file *);
 static inline struct net *seq_file_net(struct seq_file *seq)
 {
 #ifdef CONFIG_NET_NS
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6009df238ed..5cb72786a8a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2254,19 +2254,12 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, fib_triestat_seq_show);
 }
 
-static int fib_triestat_seq_release(struct inode *ino, struct file *f)
-{
-	struct seq_file *seq = f->private_data;
-	put_net(seq->private);
-	return single_release(ino, f);
-}
-
 static const struct file_operations fib_triestat_fops = {
 	.owner	= THIS_MODULE,
 	.open	= fib_triestat_seq_open,
 	.read	= seq_read,
 	.llseek	= seq_lseek,
-	.release = fib_triestat_seq_release,
+	.release = single_release_net,
 };
 
 static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index daf5d3c80ce..834356ea99d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -74,20 +74,12 @@ static int sockstat_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, sockstat_seq_show);
 }
 
-static int sockstat_seq_release(struct inode *inode, struct file *file)
-{
-	struct net *net = ((struct seq_file *)file->private_data)->private;
-
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations sockstat_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = sockstat_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = sockstat_seq_release,
+	.release = single_release_net,
 };
 
 /* snmp items */
@@ -383,20 +375,12 @@ static int snmp_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, snmp_seq_show);
 }
 
-static int snmp_seq_release(struct inode *inode, struct file *file)
-{
-	struct net *net = ((struct seq_file *)file->private_data)->private;
-
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations snmp_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = snmp_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = snmp_seq_release,
+	.release = single_release_net,
 };
 
 
@@ -438,20 +422,12 @@ static int netstat_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, netstat_seq_show);
 }
 
-static int netstat_seq_release(struct inode *inode, struct file *file)
-{
-	struct net *net = ((struct seq_file *)file->private_data)->private;
-
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations netstat_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = netstat_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = netstat_seq_release,
+	.release = single_release_net,
 };
 
 static __net_init int ip_proc_init_net(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 29c5a79444c..70940b3654a 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -186,20 +186,12 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, sockstat6_seq_show);
 }
 
-static int sockstat6_seq_release(struct inode *inode, struct file *file)
-{
-	struct net *net = ((struct seq_file *)file->private_data)->private;
-
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations sockstat6_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = sockstat6_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = sockstat6_seq_release,
+	.release = single_release_net,
 };
 
 static int snmp6_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb7ff8f0c6d..cb8a51271b6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2419,20 +2419,12 @@ static int ipv6_route_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, ipv6_route_show);
 }
 
-static int ipv6_route_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct net *net = seq->private;
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations ipv6_route_proc_fops = {
 	.owner		= THIS_MODULE,
 	.open		= ipv6_route_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= ipv6_route_release,
+	.release	= single_release_net,
 };
 
 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
@@ -2455,20 +2447,12 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 	return single_open_net(inode, file, rt6_stats_seq_show);
 }
 
-static int rt6_stats_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct net *net = (struct net *)seq->private;
-	put_net(net);
-	return single_release(inode, file);
-}
-
 static const struct file_operations rt6_stats_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = rt6_stats_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = rt6_stats_seq_release,
+	.release = single_release_net,
 };
 #endif	/* CONFIG_PROC_FS */
 
-- 
cgit v1.2.3-70-g09d2


From 65c011845316d3c1381f478ca0d8265c43b3b039 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 15 Jul 2008 14:14:30 -0700
Subject: cpumask: Replace cpumask_of_cpu with cpumask_of_cpu_ptr

  * This patch replaces the dangerous lvalue version of cpumask_of_cpu
    with new cpumask_of_cpu_ptr macros.  These are patterned after the
    node_to_cpumask_ptr macros.

    In general terms, if there is a cpumask_of_cpu_map[] then a pointer to
    the cpumask_of_cpu_map[cpu] entry is used.  The cpumask_of_cpu_map
    is provided when there is a large NR_CPUS count, reducing
    greatly the amount of code generated and stack space used for
    cpumask_of_cpu().  The pointer to the cpumask_t value is needed for
    calling set_cpus_allowed_ptr() to reduce the amount of stack space
    needed to pass the cpumask_t value.

    If there isn't a cpumask_of_cpu_map[], then a temporary variable is
    declared and filled in with value from cpumask_of_cpu(cpu) as well as
    a pointer variable pointing to this temporary variable.  Afterwards,
    the pointer is used to reference the cpumask value.  The compiler
    will optimize out the extra dereference through the pointer as well
    as the stack space used for the pointer, resulting in identical code.

    A good example of the orthogonal usages is in net/sunrpc/svc.c:

	case SVC_POOL_PERCPU:
	{
		unsigned int cpu = m->pool_to[pidx];
		cpumask_of_cpu_ptr(cpumask, cpu);

		*oldmask = current->cpus_allowed;
		set_cpus_allowed_ptr(current, cpumask);
		return 1;
	}
	case SVC_POOL_PERNODE:
	{
		unsigned int node = m->pool_to[pidx];
		node_to_cpumask_ptr(nodecpumask, node);

		*oldmask = current->cpus_allowed;
		set_cpus_allowed_ptr(current, nodecpumask);
		return 1;
	}

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/cstate.c                    |  3 ++-
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       | 10 +++++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 15 +++++++----
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c |  9 ++++---
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  3 ++-
 arch/x86/kernel/cpu/intel_cacheinfo.c            |  3 ++-
 arch/x86/kernel/microcode.c                      | 13 +++++++---
 arch/x86/kernel/reboot.c                         | 14 +++++++----
 drivers/acpi/processor_throttling.c              | 11 +++++---
 drivers/firmware/dcdbas.c                        |  3 ++-
 include/linux/cpumask.h                          | 32 ++++++++++++++++++++----
 kernel/stop_machine.c                            |  3 ++-
 kernel/trace/trace_sysprof.c                     |  4 ++-
 net/sunrpc/svc.c                                 |  3 ++-
 14 files changed, 91 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c2502eb9aa8..9220cf46aa1 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,6 +73,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpumask_t saved_mask;
+	cpumask_of_cpu_ptr(new_mask, cpu);
 	int retval;
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int edx_part;
@@ -91,7 +92,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 
 	/* Make sure we are running on right CPU */
 	saved_mask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	retval = set_cpus_allowed_ptr(current, new_mask);
 	if (retval)
 		return -1;
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index dd097b83583..ff2fff56f0a 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,10 +200,12 @@ static void drv_read(struct drv_cmd *cmd)
 static void drv_write(struct drv_cmd *cmd)
 {
 	cpumask_t saved_mask = current->cpus_allowed;
+	cpumask_of_cpu_ptr_declare(cpu_mask);
 	unsigned int i;
 
 	for_each_cpu_mask_nr(i, cmd->mask) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
+		cpumask_of_cpu_ptr_next(cpu_mask, i);
+		set_cpus_allowed_ptr(current, cpu_mask);
 		do_drv_write(cmd);
 	}
 
@@ -267,11 +269,12 @@ static unsigned int get_measured_perf(unsigned int cpu)
 	} aperf_cur, mperf_cur;
 
 	cpumask_t saved_mask;
+	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	unsigned int perf_percent;
 	unsigned int retval;
 
 	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, cpu_mask);
 	if (get_cpu() != cpu) {
 		/* We were not able to run on requested processor */
 		put_cpu();
@@ -337,6 +340,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
+	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
 	unsigned int freq;
 	unsigned int cached_freq;
@@ -349,7 +353,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	}
 
 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
+	freq = extract_freq(get_cur_val(cpu_mask), data);
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c45ca6d4dce..53c7b693697 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -479,11 +479,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
 static int check_supported_cpu(unsigned int cpu)
 {
 	cpumask_t oldmask;
+	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	u32 eax, ebx, ecx, edx;
 	unsigned int rc = 0;
 
 	oldmask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, cpu_mask);
 
 	if (smp_processor_id() != cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -1016,6 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
 static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
 {
 	cpumask_t oldmask;
+	cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
@@ -1030,7 +1032,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
 
 	/* only run on specific CPU from here on */
 	oldmask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
+	set_cpus_allowed_ptr(current, cpu_mask);
 
 	if (smp_processor_id() != pol->cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1105,6 +1107,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask;
+	cpumask_of_cpu_ptr_declare(newmask);
 	int rc;
 
 	if (!cpu_online(pol->cpu))
@@ -1156,7 +1159,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 
 	/* only run on specific CPU from here on */
 	oldmask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
+	cpumask_of_cpu_ptr_next(newmask, pol->cpu);
+	set_cpus_allowed_ptr(current, newmask);
 
 	if (smp_processor_id() != pol->cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1178,7 +1182,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	set_cpus_allowed_ptr(current, &oldmask);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		pol->cpus = cpumask_of_cpu(pol->cpu);
+		pol->cpus = *newmask;
 	else
 		pol->cpus = per_cpu(cpu_core_map, pol->cpu);
 	data->available_cores = &(pol->cpus);
@@ -1244,6 +1248,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask = current->cpus_allowed;
+	cpumask_of_cpu_ptr(newmask, cpu);
 	unsigned int khz = 0;
 	unsigned int first;
 
@@ -1253,7 +1258,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
 	if (!data)
 		return -EINVAL;
 
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, newmask);
 	if (smp_processor_id() != cpu) {
 		printk(KERN_ERR PFX
 			"limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 8b0dd6f2a1a..fd561bb26c6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -313,9 +313,10 @@ static unsigned int get_cur_freq(unsigned int cpu)
 	unsigned l, h;
 	unsigned clock_freq;
 	cpumask_t saved_mask;
+	cpumask_of_cpu_ptr(new_mask, cpu);
 
 	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, new_mask);
 	if (smp_processor_id() != cpu)
 		return 0;
 
@@ -554,9 +555,11 @@ static int centrino_target (struct cpufreq_policy *policy,
 		 */
 
 		if (!cpus_empty(covered_cpus)) {
+			cpumask_of_cpu_ptr_declare(new_mask);
+
 			for_each_cpu_mask_nr(j, covered_cpus) {
-				set_cpus_allowed_ptr(current,
-						     &cpumask_of_cpu(j));
+				cpumask_of_cpu_ptr_next(new_mask, j);
+				set_cpus_allowed_ptr(current, new_mask);
 				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
 			}
 		}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61..2f3728dc24f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,7 +244,8 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
 
 static unsigned int speedstep_get(unsigned int cpu)
 {
-	return _speedstep_get(&cpumask_of_cpu(cpu));
+	cpumask_of_cpu_ptr(newmask, cpu);
+	return _speedstep_get(newmask);
 }
 
 /**
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index a7b0f8f1736..e4b8d189d7e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,6 +516,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 	unsigned long		j;
 	int			retval;
 	cpumask_t		oldmask;
+	cpumask_of_cpu_ptr(newmask, cpu);
 
 	if (num_cache_leaves == 0)
 		return -ENOENT;
@@ -526,7 +527,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 		return -ENOMEM;
 
 	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	retval = set_cpus_allowed_ptr(current, newmask);
 	if (retval)
 		goto out;
 
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 56b933119a0..58520169e35 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,6 +388,7 @@ static int do_microcode_update (void)
 	void *new_mc = NULL;
 	int cpu;
 	cpumask_t old;
+	cpumask_of_cpu_ptr_declare(newmask);
 
 	old = current->cpus_allowed;
 
@@ -404,7 +405,8 @@ static int do_microcode_update (void)
 
 			if (!uci->valid)
 				continue;
-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+			cpumask_of_cpu_ptr_next(newmask, cpu);
+			set_cpus_allowed_ptr(current, newmask);
 			error = get_maching_microcode(new_mc, cpu);
 			if (error < 0)
 				goto out;
@@ -574,6 +576,7 @@ static int apply_microcode_check_cpu(int cpu)
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	cpumask_t old;
+	cpumask_of_cpu_ptr(newmask, cpu);
 	unsigned int val[2];
 	int err = 0;
 
@@ -582,7 +585,7 @@ static int apply_microcode_check_cpu(int cpu)
 		return 0;
 
 	old = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, newmask);
 
 	/* Check if the microcode we have in memory matches the CPU */
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -620,11 +623,12 @@ static int apply_microcode_check_cpu(int cpu)
 static void microcode_init_cpu(int cpu, int resume)
 {
 	cpumask_t old;
+	cpumask_of_cpu_ptr(newmask, cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	old = current->cpus_allowed;
 
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	set_cpus_allowed_ptr(current, newmask);
 	mutex_lock(&microcode_mutex);
 	collect_cpu_info(cpu);
 	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -656,11 +660,12 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
 		return -EINVAL;
 	if (val == 1) {
 		cpumask_t old;
+		cpumask_of_cpu_ptr(newmask, cpu);
 
 		old = current->cpus_allowed;
 
 		get_online_cpus();
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+		set_cpus_allowed_ptr(current, newmask);
 
 		mutex_lock(&microcode_mutex);
 		if (uci->valid)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f8a62160e15..214bbdfc851 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -403,24 +403,28 @@ void native_machine_shutdown(void)
 {
 	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
-	int reboot_cpu_id;
 
 	/* The boot cpu is always logical cpu 0 */
-	reboot_cpu_id = 0;
+	int reboot_cpu_id = 0;
+	cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
 
 #ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
-		cpu_online(reboot_cpu))
+		cpu_online(reboot_cpu)) {
 		reboot_cpu_id = reboot_cpu;
+		cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
+	}
 #endif
 
 	/* Make certain the cpu I'm about to reboot on is online */
-	if (!cpu_online(reboot_cpu_id))
+	if (!cpu_online(reboot_cpu_id)) {
 		reboot_cpu_id = smp_processor_id();
+		cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
+	}
 
 	/* Make certain I only run on the appropriate processor */
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
+	set_cpus_allowed_ptr(current, newmask);
 
 	/* O.K Now that I'm on the appropriate processor,
 	 * stop all of the others.
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index a56fc6c4394..a2c3f9cfa54 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -827,6 +827,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
 static int acpi_processor_get_throttling(struct acpi_processor *pr)
 {
 	cpumask_t saved_mask;
+	cpumask_of_cpu_ptr_declare(new_mask);
 	int ret;
 
 	if (!pr)
@@ -838,7 +839,8 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
 	 * Migrate task to the cpu pointed by pr.
 	 */
 	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
+	cpumask_of_cpu_ptr_next(new_mask, pr->id);
+	set_cpus_allowed_ptr(current, new_mask);
 	ret = pr->throttling.acpi_processor_get_throttling(pr);
 	/* restore the previous state */
 	set_cpus_allowed_ptr(current, &saved_mask);
@@ -987,6 +989,7 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
 int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 {
 	cpumask_t saved_mask;
+	cpumask_of_cpu_ptr_declare(new_mask);
 	int ret = 0;
 	unsigned int i;
 	struct acpi_processor *match_pr;
@@ -1025,7 +1028,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 	 * it can be called only for the cpu pointed by pr.
 	 */
 	if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
+		cpumask_of_cpu_ptr_next(new_mask, pr->id);
+		set_cpus_allowed_ptr(current, new_mask);
 		ret = p_throttling->acpi_processor_set_throttling(pr,
 						t_state.target_state);
 	} else {
@@ -1056,7 +1060,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 				continue;
 			}
 			t_state.cpu = i;
-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
+			cpumask_of_cpu_ptr_next(new_mask, i);
+			set_cpus_allowed_ptr(current, new_mask);
 			ret = match_pr->throttling.
 				acpi_processor_set_throttling(
 				match_pr, t_state.target_state);
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 25918f7dfd0..0b624e927a6 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -254,6 +254,7 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
 static int smi_request(struct smi_cmd *smi_cmd)
 {
 	cpumask_t old_mask;
+	cpumask_of_cpu_ptr(new_mask, 0);
 	int ret = 0;
 
 	if (smi_cmd->magic != SMI_CMD_MAGIC) {
@@ -264,7 +265,7 @@ static int smi_request(struct smi_cmd *smi_cmd)
 
 	/* SMI requires CPU 0 */
 	old_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
+	set_cpus_allowed_ptr(current, new_mask);
 	if (smp_processor_id() != 0) {
 		dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
 			__func__);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 80226e77614..2dbd9a287e7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -62,6 +62,15 @@
  * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
  *
  * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
+ *ifdef CONFIG_HAS_CPUMASK_OF_CPU
+ * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t *v
+ * cpumask_of_cpu_ptr_next(v, cpu)	Sets v = &cpumask_of_cpu_map[cpu]
+ * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
+ *else
+ * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t _v and *v = &_v
+ * cpumask_of_cpu_ptr_next(v, cpu)	Sets _v = cpumask_of_cpu(cpu)
+ * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
+ *endif
  * CPU_MASK_ALL				Initializer - all bits set
  * CPU_MASK_NONE			Initializer - no bits set
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
@@ -236,11 +245,16 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 
 #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
 extern cpumask_t *cpumask_of_cpu_map;
-#define cpumask_of_cpu(cpu)    (cpumask_of_cpu_map[cpu])
-
+#define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
+#define	cpumask_of_cpu_ptr(v, cpu)					\
+		const cpumask_t *v = &cpumask_of_cpu(cpu)
+#define	cpumask_of_cpu_ptr_declare(v)					\
+		const cpumask_t *v
+#define cpumask_of_cpu_ptr_next(v, cpu)					\
+					v = &cpumask_of_cpu(cpu)
 #else
 #define cpumask_of_cpu(cpu)						\
-(*({									\
+({									\
 	typeof(_unused_cpumask_arg_) m;					\
 	if (sizeof(m) == sizeof(unsigned long)) {			\
 		m.bits[0] = 1UL<<(cpu);					\
@@ -248,8 +262,16 @@ extern cpumask_t *cpumask_of_cpu_map;
 		cpus_clear(m);						\
 		cpu_set((cpu), m);					\
 	}								\
-	&m;								\
-}))
+	m;								\
+})
+#define	cpumask_of_cpu_ptr(v, cpu) 					\
+		cpumask_t _##v = cpumask_of_cpu(cpu);			\
+		const cpumask_t *v = &_##v
+#define	cpumask_of_cpu_ptr_declare(v)					\
+		cpumask_t _##v;						\
+		const cpumask_t *v = &_##v
+#define cpumask_of_cpu_ptr_next(v, cpu)					\
+					_##v = cpumask_of_cpu(cpu)
 #endif
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba9b2054ecb..738b411ff2d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,8 +33,9 @@ static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
+	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
 
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
+	set_cpus_allowed_ptr(current, cpumask);
 
 	/* Ack: we are alive */
 	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 2301e1e7c60..63528086337 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,7 +213,9 @@ static void start_stack_timers(void)
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+		cpumask_of_cpu_ptr(new_mask, cpu);
+
+		set_cpus_allowed_ptr(current, new_mask);
 		start_stack_timer(cpu);
 	}
 	set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d43cf8ddff6..083d1268813 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -314,9 +314,10 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
 	case SVC_POOL_PERCPU:
 	{
 		unsigned int cpu = m->pool_to[pidx];
+		cpumask_of_cpu_ptr(cpumask, cpu);
 
 		*oldmask = current->cpus_allowed;
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+		set_cpus_allowed_ptr(current, cpumask);
 		return 1;
 	}
 	case SVC_POOL_PERNODE:
-- 
cgit v1.2.3-70-g09d2


From 8913336a7e8d56e984109a3137d6c0e3362596a4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 18 Jul 2008 18:05:19 -0700
Subject: packet: add PACKET_RESERVE sockopt

Add new sockopt to reserve some headroom in the mmaped ring frames in
front of the packet payload. This can be used f.i. when the VLAN header
needs to be (re)constructed to avoid moving the entire payload.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 29 ++++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index a630295b255..18db0668065 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -45,6 +45,7 @@ struct sockaddr_ll
 #define PACKET_ORIGDEV			9
 #define PACKET_VERSION			10
 #define PACKET_HDRLEN			11
+#define PACKET_RESERVE			12
 
 struct tpacket_stats
 {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index db792e02a37..de73bcb5235 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -188,6 +188,7 @@ struct packet_sock {
 	unsigned int		pg_vec_len;
 	enum tpacket_versions	tp_version;
 	unsigned int		tp_hdrlen;
+	unsigned int		tp_reserve;
 #endif
 };
 
@@ -635,11 +636,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		snaplen = res;
 
 	if (sk->sk_type == SOCK_DGRAM) {
-		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16;
+		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
+				  po->tp_reserve;
 	} else {
 		unsigned maclen = skb_network_offset(skb);
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
-				       (maclen < 16 ? 16 : maclen));
+				       (maclen < 16 ? 16 : maclen)) +
+			po->tp_reserve;
 		macoff = netoff - maclen;
 	}
 
@@ -1448,6 +1451,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 			return -EINVAL;
 		}
 	}
+	case PACKET_RESERVE:
+	{
+		unsigned int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		po->tp_reserve = val;
+		return 0;
+	}
 #endif
 	case PACKET_AUXDATA:
 	{
@@ -1547,6 +1563,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		}
 		data = &val;
 		break;
+	case PACKET_RESERVE:
+		if (len > sizeof(unsigned int))
+			len = sizeof(unsigned int);
+		val = po->tp_reserve;
+		data = &val;
+		break;
 #endif
 	default:
 		return -ENOPROTOOPT;
@@ -1790,7 +1812,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 			return -EINVAL;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			return -EINVAL;
-		if (unlikely(req->tp_frame_size < po->tp_hdrlen))
+		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
+						  po->tp_reserve))
 			return -EINVAL;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 72b25a913ed9b1ab49c7022adaf3f271a65ea219 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jul 2008 20:54:17 -0700
Subject: pkt_sched: Get rid of u32_list.

The u32_list is just an indirect way of maintaining a reference
to a U32 node on a per-qdisc basis.

Just add an explicit node pointer for u32 to struct Qdisc an do
away with this global list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 ++
 net/sched/cls_u32.c       | 18 +++---------------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0a158ff4de1..8a44386b35c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -56,6 +56,8 @@ struct Qdisc
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
 
+	void			*u32_node;
+
 	/* This field is deprecated, but it is still used by CBQ
 	 * and it will live until better solution will be invented.
 	 */
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4d755444c44..527db2559dd 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -75,7 +75,6 @@ struct tc_u_hnode
 
 struct tc_u_common
 {
-	struct tc_u_common	*next;
 	struct tc_u_hnode	*hlist;
 	struct Qdisc		*q;
 	int			refcnt;
@@ -87,8 +86,6 @@ static const struct tcf_ext_map u32_ext_map = {
 	.police = TCA_U32_POLICE
 };
 
-static struct tc_u_common *u32_list;
-
 static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
 {
 	unsigned h = ntohl(key & sel->hmask)>>fshift;
@@ -287,9 +284,7 @@ static int u32_init(struct tcf_proto *tp)
 	struct tc_u_hnode *root_ht;
 	struct tc_u_common *tp_c;
 
-	for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
-		if (tp_c->q == tp->q)
-			break;
+	tp_c = tp->q->u32_node;
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
@@ -307,8 +302,7 @@ static int u32_init(struct tcf_proto *tp)
 			return -ENOBUFS;
 		}
 		tp_c->q = tp->q;
-		tp_c->next = u32_list;
-		u32_list = tp_c;
+		tp->q->u32_node = tp_c;
 	}
 
 	tp_c->refcnt++;
@@ -402,14 +396,8 @@ static void u32_destroy(struct tcf_proto *tp)
 
 	if (--tp_c->refcnt == 0) {
 		struct tc_u_hnode *ht;
-		struct tc_u_common **tp_cp;
 
-		for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
-			if (*tp_cp == tp_c) {
-				*tp_cp = tp_c->next;
-				break;
-			}
-		}
+		tp->q->u32_node = NULL;
 
 		for (ht = tp_c->hlist; ht; ht = ht->next) {
 			ht->refcnt--;
-- 
cgit v1.2.3-70-g09d2


From 3072367300aa8c779e3a14ee8e89de079e90f3ad Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jul 2008 22:50:15 -0700
Subject: pkt_sched: Manage qdisc list inside of root qdisc.

Idea is from Patrick McHardy.

Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue.  This solves all kinds of
visibility issues during qdisc destruction.

The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.

The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup().  That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 -
 net/core/dev.c            |   2 -
 net/sched/sch_api.c       | 175 ++++++++++++++++++++++++++++++++++------------
 net/sched/sch_generic.c   |  10 +--
 4 files changed, 133 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c5a6885011..812bcd8b436 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -636,8 +636,6 @@ struct net_device
 	unsigned int		real_num_tx_queues;
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
-	spinlock_t		qdisc_list_lock;
-	struct list_head	qdisc_list;
 
 /*
  * One part is mostly used on xmit path (device)
diff --git a/net/core/dev.c b/net/core/dev.c
index e54acde839d..065b9817e20 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev)
 	net = dev_net(dev);
 
 	spin_lock_init(&dev->addr_list_lock);
-	spin_lock_init(&dev->qdisc_list_lock);
-	INIT_LIST_HEAD(&dev->qdisc_list);
 	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b3ef8307204..fb43731c986 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
-	struct Qdisc *q;
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		struct Qdisc *q, *txq_root = txq->qdisc;
 
-	list_for_each_entry(q, &dev->qdisc_list, list) {
-		if (q->handle == handle)
-			return q;
+		if (!(txq_root->flags & TCQ_F_BUILTIN) &&
+		    txq_root->handle == handle)
+			return txq_root;
+
+		list_for_each_entry(q, &txq_root->list, list) {
+			if (q->handle == handle)
+				return q;
+		}
 	}
 	return NULL;
 }
@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out3;
 			}
 		}
-		spin_lock_bh(&dev->qdisc_list_lock);
-		list_add_tail(&sch->list, &dev->qdisc_list);
-		spin_unlock_bh(&dev->qdisc_list_lock);
+		if (parent)
+			list_add_tail(&sch->list, &dev_queue->qdisc->list);
 
 		return sch;
 	}
@@ -1037,13 +1045,57 @@ err_out:
 	return -EINVAL;
 }
 
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+	return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
+static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
+			      struct netlink_callback *cb,
+			      int *q_idx_p, int s_q_idx)
+{
+	int ret = 0, q_idx = *q_idx_p;
+	struct Qdisc *q;
+
+	if (!root)
+		return 0;
+
+	q = root;
+	if (q_idx < s_q_idx) {
+		q_idx++;
+	} else {
+		if (!tc_qdisc_dump_ignore(q) &&
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+			goto done;
+		q_idx++;
+	}
+	list_for_each_entry(q, &root->list, list) {
+		if (q_idx < s_q_idx) {
+			q_idx++;
+			continue;
+		}
+		if (!tc_qdisc_dump_ignore(q) && 
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+			goto done;
+		q_idx++;
+	}
+
+out:
+	*q_idx_p = q_idx;
+	return ret;
+done:
+	ret = -1;
+	goto out;
+}
+
 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
 	int idx, q_idx;
 	int s_idx, s_q_idx;
 	struct net_device *dev;
-	struct Qdisc *q;
 
 	if (net != &init_net)
 		return 0;
@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	read_lock(&dev_base_lock);
 	idx = 0;
 	for_each_netdev(&init_net, dev) {
+		struct netdev_queue *dev_queue;
+
 		if (idx < s_idx)
 			goto cont;
 		if (idx > s_idx)
 			s_q_idx = 0;
 		q_idx = 0;
-		list_for_each_entry(q, &dev->qdisc_list, list) {
-			if (q_idx < s_q_idx) {
-				q_idx++;
-				continue;
-			}
-			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
-				goto done;
-			q_idx++;
-		}
+
+		dev_queue = netdev_get_tx_queue(dev, 0);
+		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+			goto done;
+
+		dev_queue = &dev->rx_queue;
+		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+			goto done;
+
 cont:
 		idx++;
 	}
@@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
 }
 
+static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
+				struct tcmsg *tcm, struct netlink_callback *cb,
+				int *t_p, int s_t)
+{
+	struct qdisc_dump_args arg;
+
+	if (tc_qdisc_dump_ignore(q) ||
+	    *t_p < s_t || !q->ops->cl_ops ||
+	    (tcm->tcm_parent &&
+	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
+		(*t_p)++;
+		return 0;
+	}
+	if (*t_p > s_t)
+		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+	arg.w.fn = qdisc_class_dump;
+	arg.skb = skb;
+	arg.cb = cb;
+	arg.w.stop  = 0;
+	arg.w.skip = cb->args[1];
+	arg.w.count = 0;
+	q->ops->cl_ops->walk(q, &arg.w);
+	cb->args[1] = arg.w.count;
+	if (arg.w.stop)
+		return -1;
+	(*t_p)++;
+	return 0;
+}
+
+static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
+			       struct tcmsg *tcm, struct netlink_callback *cb,
+			       int *t_p, int s_t)
+{
+	struct Qdisc *q;
+
+	if (!root)
+		return 0;
+
+	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
+		return -1;
+
+	list_for_each_entry(q, &root->list, list) {
+		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
 	struct net *net = sock_net(skb->sk);
-	int t;
-	int s_t;
+	struct netdev_queue *dev_queue;
 	struct net_device *dev;
-	struct Qdisc *q;
-	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
-	struct qdisc_dump_args arg;
+	int t, s_t;
 
 	if (net != &init_net)
 		return 0;
@@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	list_for_each_entry(q, &dev->qdisc_list, list) {
-		if (t < s_t || !q->ops->cl_ops ||
-		    (tcm->tcm_parent &&
-		     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
-			t++;
-			continue;
-		}
-		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
-		arg.w.fn = qdisc_class_dump;
-		arg.skb = skb;
-		arg.cb = cb;
-		arg.w.stop  = 0;
-		arg.w.skip = cb->args[1];
-		arg.w.count = 0;
-		q->ops->cl_ops->walk(q, &arg.w);
-		cb->args[1] = arg.w.count;
-		if (arg.w.stop)
-			break;
-		t++;
-	}
+	dev_queue = netdev_get_tx_queue(dev, 0);
+	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+		goto done;
+
+	dev_queue = &dev->rx_queue;
+	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+		goto done;
 
+done:
 	cb->args[0] = t;
 
 	dev_put(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e244c462e6b..14cc443d049 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-	struct net_device *dev = qdisc_dev(qdisc);
-
 	if (qdisc->flags & TCQ_F_BUILTIN ||
 	    !atomic_dec_and_test(&qdisc->refcnt))
 		return;
 
-	spin_lock_bh(&dev->qdisc_list_lock);
-	list_del(&qdisc->list);
-	spin_unlock_bh(&dev->qdisc_list_lock);
+	if (qdisc->parent)
+		list_del(&qdisc->list);
 
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev,
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
 		}
-		spin_lock_bh(&dev->qdisc_list_lock);
-		list_add_tail(&qdisc->list, &dev->qdisc_list);
-		spin_unlock_bh(&dev->qdisc_list_lock);
 	} else {
 		qdisc =  &noqueue_qdisc;
 	}
-- 
cgit v1.2.3-70-g09d2


From 30ee42be00b7a50929a73cb617f70b1d3219eb69 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jul 2008 23:00:11 -0700
Subject: pkt_sched: Fix noqueue_qdisc initialization.

Like noop_qdisc, it needs a dummy backpointer and
explicit qdisc->q.lock initialization.

Based upon a report by Stephen Hemminger.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 14cc443d049..522a41a9f90 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -340,12 +340,19 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
+static struct Qdisc noqueue_qdisc;
+static struct netdev_queue noqueue_netdev_queue = {
+	.qdisc		=	&noqueue_qdisc,
+};
+
 static struct Qdisc noqueue_qdisc = {
 	.enqueue	=	NULL,
 	.dequeue	=	noop_dequeue,
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
+	.dev_queue	=	&noqueue_netdev_queue,
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From c1e20f7c8b9ccbafc9ea78f2b406738728ce6b81 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 18 Jul 2008 23:02:15 -0700
Subject: tcp: RTT metrics scaling

Some of the metrics (RTT, RTTVAR and RTAX_RTO_MIN) are stored in
kernel units (jiffies) and this leaks out through the netlink API to
user space where the units for jiffies are unknown.

This patches changes the kernel to convert to/from milliseconds. This
changes the ABI, but milliseconds seemed like the most natural unit
for these parameters.  Values available via syscall in
/proc/net/rt_cache and netlink will be in milliseconds.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h    | 12 ++++++++++++
 net/ipv4/tcp_input.c | 31 ++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 002500e631f..c5c318a628f 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -128,6 +128,18 @@ static inline u32 dst_mtu(const struct dst_entry *dst)
 	return mtu;
 }
 
+/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
+static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
+{
+	return msecs_to_jiffies(dst_metric(dst, metric));
+}
+
+static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric,
+				      unsigned long rtt)
+{
+	dst->metrics[metric-1] = jiffies_to_msecs(rtt);
+}
+
 static inline u32
 dst_allfrag(const struct dst_entry *dst)
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fac49a6e161..88810bc0137 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -602,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk)
 	u32 rto_min = TCP_RTO_MIN;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-		rto_min = dst_metric(dst, RTAX_RTO_MIN);
+		rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
 	return rto_min;
 }
 
@@ -729,6 +729,7 @@ void tcp_update_metrics(struct sock *sk)
 	if (dst && (dst->flags & DST_HOST)) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		int m;
+		unsigned long rtt;
 
 		if (icsk->icsk_backoff || !tp->srtt) {
 			/* This session failed to estimate rtt. Why?
@@ -740,7 +741,8 @@ void tcp_update_metrics(struct sock *sk)
 			return;
 		}
 
-		m = dst_metric(dst, RTAX_RTT) - tp->srtt;
+		rtt = dst_metric_rtt(dst, RTAX_RTT);
+		m = rtt - tp->srtt;
 
 		/* If newly calculated rtt larger than stored one,
 		 * store new one. Otherwise, use EWMA. Remember,
@@ -748,12 +750,13 @@ void tcp_update_metrics(struct sock *sk)
 		 */
 		if (!(dst_metric_locked(dst, RTAX_RTT))) {
 			if (m <= 0)
-				dst->metrics[RTAX_RTT - 1] = tp->srtt;
+				set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
 			else
-				dst->metrics[RTAX_RTT - 1] -= (m >> 3);
+				set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
 		}
 
 		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+			unsigned long var;
 			if (m < 0)
 				m = -m;
 
@@ -762,11 +765,13 @@ void tcp_update_metrics(struct sock *sk)
 			if (m < tp->mdev)
 				m = tp->mdev;
 
-			if (m >= dst_metric(dst, RTAX_RTTVAR))
-				dst->metrics[RTAX_RTTVAR - 1] = m;
+			var = dst_metric_rtt(dst, RTAX_RTTVAR);
+			if (m >= var)
+				var = m;
 			else
-				dst->metrics[RTAX_RTTVAR-1] -=
-					(dst_metric(dst, RTAX_RTTVAR) - m)>>2;
+				var -= (var - m) >> 2;
+
+			set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
 		}
 
 		if (tp->snd_ssthresh >= 0xFFFF) {
@@ -897,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk)
 	if (dst_metric(dst, RTAX_RTT) == 0)
 		goto reset;
 
-	if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+	if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
 		goto reset;
 
 	/* Initial rtt is determined from SYN,SYN-ACK.
@@ -914,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk)
 	 * to low value, and then abruptly stops to do it and starts to delay
 	 * ACKs, wait for troubles.
 	 */
-	if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
-		tp->srtt = dst_metric(dst, RTAX_RTT);
+	if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
+		tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
 		tp->rtt_seq = tp->snd_nxt;
 	}
-	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
-		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
+	if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
+		tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
 		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 	}
 	tcp_set_rto(sk);
-- 
cgit v1.2.3-70-g09d2


From c4e85f82edcd6027cfe67331a2e00741b009756b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jul 2008 23:03:44 -0700
Subject: sctp: Don't abort initialization when CONFIG_PROC_FS=n

This puts CONFIG_PROC_FS defines around the proc init/exit functions
and also avoids compiling proc.c if procfs is not supported.
Also make SCTP_DBG_OBJCNT depend on procfs.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/Kconfig    |  4 ++--
 net/sctp/Makefile   |  4 ++--
 net/sctp/protocol.c | 11 ++++++++++-
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 0b79f869c4e..58b3e882a18 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -47,11 +47,11 @@ config SCTP_DBG_MSG
 
 config SCTP_DBG_OBJCNT
 	bool "SCTP: Debug object counts"
+	depends on PROC_FS
 	help
 	  If you say Y, this will enable debugging support for counting the 
 	  type of objects that are currently allocated.  This is useful for 
-	  identifying memory leaks.   If the /proc filesystem is enabled this 
-	  debug information can be viewed by 
+	  identifying memory leaks. This debug information can be viewed by
 	  'cat /proc/net/sctp/sctp_dbg_objcnt'
 
 	  If unsure, say N
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index f5356b9d5ee..6b794734380 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -9,10 +9,10 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  transport.o chunk.o sm_make_chunk.o ulpevent.o \
 	  inqueue.o outqueue.o ulpqueue.o command.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
-	  output.o input.o debug.o ssnmap.o proc.o \
-	  auth.o
+	  output.o input.o debug.o ssnmap.o auth.o
 
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
+sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
 
 sctp-$(subst m,y,$(CONFIG_IPV6))	+= ipv6.o
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 98c6a882016..dd811a8456a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -64,9 +64,12 @@
 
 /* Global data structures. */
 struct sctp_globals sctp_globals __read_mostly;
-struct proc_dir_entry	*proc_net_sctp;
 DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry	*proc_net_sctp;
+#endif
+
 struct idr sctp_assocs_id;
 DEFINE_SPINLOCK(sctp_assocs_id_lock);
 
@@ -97,6 +100,7 @@ struct sock *sctp_get_ctl_sock(void)
 /* Set up the proc fs entry for the SCTP protocol. */
 static __init int sctp_proc_init(void)
 {
+#ifdef CONFIG_PROC_FS
 	if (!proc_net_sctp) {
 		struct proc_dir_entry *ent;
 		ent = proc_mkdir("sctp", init_net.proc_net);
@@ -131,6 +135,9 @@ out_snmp_proc_init:
 	}
 out_nomem:
 	return -ENOMEM;
+#else
+	return 0;
+#endif /* CONFIG_PROC_FS */
 }
 
 /* Clean up the proc fs entry for the SCTP protocol.
@@ -139,6 +146,7 @@ out_nomem:
  */
 static void sctp_proc_exit(void)
 {
+#ifdef CONFIG_PROC_FS
 	sctp_snmp_proc_exit();
 	sctp_eps_proc_exit();
 	sctp_assocs_proc_exit();
@@ -148,6 +156,7 @@ static void sctp_proc_exit(void)
 		proc_net_sctp = NULL;
 		remove_proc_entry("sctp", init_net.proc_net);
 	}
+#endif
 }
 
 /* Private helper to extract ipv4 address and stash them in
-- 
cgit v1.2.3-70-g09d2


From 6d0ccbac688207ca0616ab5094932af4db4747b3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jul 2008 23:04:39 -0700
Subject: sctp: Prevent uninitialized memory access

valgrind reports uninizialized memory accesses when running
sctp inside the network simulation cradle simulator:

 Conditional jump or move depends on uninitialised value(s)
    at 0x570E34A: sctp_assoc_sync_pmtu (associola.c:1324)
    by 0x57427DA: sctp_packet_transmit (output.c:403)
    by 0x5710EFF: sctp_outq_flush (outqueue.c:824)
    by 0x5710B88: sctp_outq_uncork (outqueue.c:701)
    by 0x5745262: sctp_cmd_interpreter (sm_sideeffect.c:1548)
    by 0x57444B7: sctp_side_effects (sm_sideeffect.c:976)
    by 0x5744460: sctp_do_sm (sm_sideeffect.c:945)
    by 0x572157D: sctp_primitive_ASSOCIATE (primitive.c:94)
    by 0x5725C04: __sctp_connect (socket.c:1094)
    by 0x57297DC: sctp_connect (socket.c:3297)

 Conditional jump or move depends on uninitialised value(s)
    at 0x575D3A5: mod_timer (timer.c:630)
    by 0x5752B78: sctp_cmd_hb_timers_start (sm_sideeffect.c:555)
    by 0x5754133: sctp_cmd_interpreter (sm_sideeffect.c:1448)
    by 0x5753607: sctp_side_effects (sm_sideeffect.c:976)
    by 0x57535B0: sctp_do_sm (sm_sideeffect.c:945)
    by 0x571E9AE: sctp_endpoint_bh_rcv (endpointola.c:474)
    by 0x573347F: sctp_inq_push (inqueue.c:104)
    by 0x572EF93: sctp_rcv (input.c:256)
    by 0x5689623: ip_local_deliver_finish (ip_input.c:230)
    by 0x5689759: ip_local_deliver (ip_input.c:268)
    by 0x5689CAC: ip_rcv_finish (dst.h:246)

#1 is due to "if (t->pmtu_pending)".
8a4794914f9cf2681235ec2311e189fe307c28c7 "[SCTP] Flag a pmtu change request"
suggests it should be initialized to 0.

#2 is the heartbeat timer 'expires' value, which is uninizialised, but
test by mod_timer().
T3_rtx_timer seems to be affected by the same problem, so initialize it, too.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 1 +
 net/sctp/transport.c | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 35b6a023a6d..ec2a0a33fd7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -653,6 +653,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 
 	SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to "
 			  "%d\n", asoc, asoc->pathmtu);
+	peer->pmtu_pending = 0;
 
 	asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 3f34f61221e..e745c118f23 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -100,6 +100,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
+	peer->T3_rtx_timer.expires = 0;
+	peer->hb_timer.expires = 0;
+
 	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
-- 
cgit v1.2.3-70-g09d2


From 7dab83de50c7b2b7ceac695a0b56fa6c0f95b0bc Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 18 Jul 2008 23:05:40 -0700
Subject: sctp: Support ipv6only AF_INET6 sockets.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  2 ++
 net/sctp/bind_addr.c       | 37 +++++++++++++++++++++++++++++++++++++
 net/sctp/ipv6.c            | 20 ++++++++++++++++----
 net/sctp/protocol.c        |  7 +++++++
 net/sctp/sm_make_chunk.c   |  7 ++++++-
 net/sctp/socket.c          | 30 +++++++++++++++++++++++++-----
 6 files changed, 93 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 82116e84ee3..70eb64a7e1a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1211,6 +1211,8 @@ int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
 int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
 int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
 			 struct sctp_sock *);
+int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *,
+			 struct sctp_sock *, struct sctp_sock *);
 int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
 			 const union sctp_addr *addr);
 union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 80e6df06967..f62bc246893 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -348,6 +348,43 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
 	return match;
 }
 
+/* Does the address 'addr' conflict with any addresses in
+ * the bp.
+ */
+int sctp_bind_addr_conflict(struct sctp_bind_addr *bp,
+			    const union sctp_addr *addr,
+			    struct sctp_sock *bp_sp,
+			    struct sctp_sock *addr_sp)
+{
+	struct sctp_sockaddr_entry *laddr;
+	int conflict = 0;
+	struct sctp_sock *sp;
+
+	/* Pick the IPv6 socket as the basis of comparison
+	 * since it's usually a superset of the IPv4.
+	 * If there is no IPv6 socket, then default to bind_addr.
+	 */
+	if (sctp_opt2sk(bp_sp)->sk_family == AF_INET6)
+		sp = bp_sp;
+	else if (sctp_opt2sk(addr_sp)->sk_family == AF_INET6)
+		sp = addr_sp;
+	else
+		sp = bp_sp;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
+
+		conflict = sp->pf->cmp_addr(&laddr->a, addr, sp);
+		if (conflict)
+			break;
+	}
+	rcu_read_unlock();
+
+	return conflict;
+}
+
 /* Get the state of the entry in the bind_addr_list */
 int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
 			 const union sctp_addr *addr)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a2f4d4d5159..a238d6834b3 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -818,7 +818,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
 		return 1;
 	/* v4-mapped-v6 addresses */
 	case AF_INET:
-		if (!__ipv6_only_sock(sctp_opt2sk(sp)) && sp->v4mapped)
+		if (!__ipv6_only_sock(sctp_opt2sk(sp)))
 			return 1;
 	default:
 		return 0;
@@ -840,6 +840,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
 
 	if (!af1 || !af2)
 		return 0;
+
+	/* If the socket is IPv6 only, v4 addrs will not match */
+	if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2)
+		return 0;
+
 	/* Today, wildcard AF_INET/AF_INET6. */
 	if (sctp_is_any(addr1) || sctp_is_any(addr2))
 		return 1;
@@ -876,7 +881,11 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
 				return 0;
 			}
 			dev_put(dev);
+		} else if (type == IPV6_ADDR_MAPPED) {
+			if (!opt->v4mapped)
+				return 0;
 		}
+
 		af = opt->pf->af;
 	}
 	return af->available(addr, opt);
@@ -919,9 +928,12 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
 static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 				      __be16 *types)
 {
-	types[0] = SCTP_PARAM_IPV4_ADDRESS;
-	types[1] = SCTP_PARAM_IPV6_ADDRESS;
-	return 2;
+	types[0] = SCTP_PARAM_IPV6_ADDRESS;
+	if (!opt || !ipv6_only_sock(sctp_opt2sk(opt))) {
+		types[1] = SCTP_PARAM_IPV4_ADDRESS;
+		return 2;
+	}
+	return 1;
 }
 
 static const struct proto_ops inet6_seqpacket_ops = {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index dd811a8456a..cdd881142f2 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -381,6 +381,10 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
 			      struct sctp_sock *sp,
 			      const struct sk_buff *skb)
 {
+	/* IPv4 addresses not allowed */
+	if (sp && ipv6_only_sock(sctp_opt2sk(sp)))
+		return 0;
+
 	/* Is this a non-unicast address or a unusable SCTP address? */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr))
 		return 0;
@@ -404,6 +408,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
 	   !sysctl_ip_nonlocal_bind)
 		return 0;
 
+	if (ipv6_only_sock(sctp_opt2sk(sp)))
+		return 0;
+
 	return 1;
 }
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bbc7107c86c..e8ca4e54981 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2364,8 +2364,13 @@ static int sctp_process_param(struct sctp_association *asoc,
 	case SCTP_PARAM_IPV6_ADDRESS:
 		if (PF_INET6 != asoc->base.sk->sk_family)
 			break;
-		/* Fall through. */
+		goto do_addr_param;
+
 	case SCTP_PARAM_IPV4_ADDRESS:
+		/* v4 addresses are not allowed on v6-only socket */
+		if (ipv6_only_sock(asoc->base.sk))
+			break;
+do_addr_param:
 		af = sctp_get_af_specific(param_type2af(param.p->type));
 		af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
 		scope = sctp_scope(peer_addr);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6aba01b0ce4..a0e879bb202 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -308,9 +308,16 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
 	if (len < sizeof (struct sockaddr))
 		return NULL;
 
-	/* Does this PF support this AF? */
-	if (!opt->pf->af_supported(addr->sa.sa_family, opt))
-		return NULL;
+	/* V4 mapped address are really of AF_INET family */
+	if (addr->sa.sa_family == AF_INET6 &&
+	    ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
+		if (!opt->pf->af_supported(AF_INET, opt))
+			return NULL;
+	} else {
+		/* Does this PF support this AF? */
+		if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+			return NULL;
+	}
 
 	/* If we get this far, af is valid. */
 	af = sctp_get_af_specific(addr->sa.sa_family);
@@ -4395,6 +4402,11 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 				    (AF_INET6 == addr->a.sa.sa_family))
 					continue;
 
+				if ((PF_INET6 == sk->sk_family) &&
+				    inet_v6_ipv6only(sk) &&
+				    (AF_INET == addr->a.sa.sa_family))
+					continue;
+
 				cnt++;
 			}
 			rcu_read_unlock();
@@ -4435,6 +4447,10 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
 		if ((PF_INET == sk->sk_family) &&
 		    (AF_INET6 == addr->a.sa.sa_family))
 			continue;
+		if ((PF_INET6 == sk->sk_family) &&
+		    inet_v6_ipv6only(sk) &&
+		    (AF_INET == addr->a.sa.sa_family))
+			continue;
 		memcpy(&temp, &addr->a, sizeof(temp));
 		if (!temp.v4.sin_port)
 			temp.v4.sin_port = htons(port);
@@ -4470,6 +4486,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
 		if ((PF_INET == sk->sk_family) &&
 		    (AF_INET6 == addr->a.sa.sa_family))
 			continue;
+		if ((PF_INET6 == sk->sk_family) &&
+		    inet_v6_ipv6only(sk) &&
+		    (AF_INET == addr->a.sa.sa_family))
+			continue;
 		memcpy(&temp, &addr->a, sizeof(temp));
 		if (!temp.v4.sin_port)
 			temp.v4.sin_port = htons(port);
@@ -5568,8 +5588,8 @@ pp_found:
 			    sk2->sk_state != SCTP_SS_LISTENING)
 				continue;
 
-			if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
-						 sctp_sk(sk))) {
+			if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr,
+						 sctp_sk(sk2), sctp_sk(sk))) {
 				ret = (long)sk2;
 				goto fail_unlock;
 			}
-- 
cgit v1.2.3-70-g09d2


From 23b29ed80bd7184398317a111dc488605cb66c7f Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 18 Jul 2008 23:06:07 -0700
Subject: sctp: Do not leak memory on multiple listen() calls

SCTP permits multiple listen call and on subsequent calls
we leak he memory allocated for the crypto transforms.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a0e879bb202..fd7ed9d46a4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5773,7 +5773,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		goto out;
 
 	/* Allocate HMAC for generating cookie. */
-	if (sctp_hmac_alg) {
+	if (!sctp_sk(sk)->hmac && sctp_hmac_alg) {
 		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
 		if (IS_ERR(tfm)) {
 			if (net_ratelimit()) {
@@ -5801,7 +5801,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		goto cleanup;
 
 	/* Store away the transform reference. */
-	sctp_sk(sk)->hmac = tfm;
+	if (!sctp_sk(sk)->hmac)
+		sctp_sk(sk)->hmac = tfm;
 out:
 	sctp_release_sock(sk);
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 4e54064e0a13b7a7d4a481123c1783f770538e30 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 18 Jul 2008 23:06:32 -0700
Subject: sctp: Allow only 1 listening socket with SO_REUSEADDR

When multiple socket bind to the same port with SO_REUSEADDR,
only 1 can be listining.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fd7ed9d46a4..79bece16aed 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -377,18 +377,19 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
+	/* See if the address matches any of the addresses we may have
+	 * already bound before checking against other endpoints.
+	 */
+	if (sctp_bind_addr_match(bp, addr, sp))
+		return -EINVAL;
+
 	/* Make sure we are allowed to bind here.
 	 * The function sctp_get_port_local() does duplicate address
 	 * detection.
 	 */
 	addr->v4.sin_port = htons(snum);
 	if ((ret = sctp_get_port_local(sk, addr))) {
-		if (ret == (long) sk) {
-			/* This endpoint has a conflicting address. */
-			return -EINVAL;
-		} else {
-			return -EADDRINUSE;
-		}
+		return -EADDRINUSE;
 	}
 
 	/* Refresh ephemeral port.  */
@@ -5584,8 +5585,9 @@ pp_found:
 			struct sctp_endpoint *ep2;
 			ep2 = sctp_sk(sk2)->ep;
 
-			if (reuse && sk2->sk_reuse &&
-			    sk2->sk_state != SCTP_SS_LISTENING)
+			if (sk == sk2 ||
+			    (reuse && sk2->sk_reuse &&
+			     sk2->sk_state != SCTP_SS_LISTENING))
 				continue;
 
 			if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr,
@@ -5702,8 +5704,13 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
 	if (!ep->base.bind_addr.port) {
 		if (sctp_autobind(sk))
 			return -EAGAIN;
-	} else
+	} else {
+		if (sctp_get_port(sk, inet_sk(sk)->num)) {
+			sk->sk_state = SCTP_SS_CLOSED;
+			return -EADDRINUSE;
+		}
 		sctp_sk(sk)->bind_hash->fastreuse = 0;
+	}
 
 	sctp_hash_endpoint(ep);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 336d3262df71fcd2661180bb35d5ea41b4cbca58 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 18 Jul 2008 23:07:09 -0700
Subject: sctp: remove unnecessary byteshifting, calculate directly in
 big-endian

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/checksum.h            | 23 ++++++++++++++---------
 net/ipv4/netfilter/nf_nat_proto_sctp.c |  4 ++--
 net/sctp/input.c                       |  4 ++--
 net/sctp/output.c                      |  4 ++--
 4 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index ba75c67cb99..b799fb21519 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -46,9 +46,14 @@
 #include <net/sctp/sctp.h>
 #include <linux/crc32c.h>
 
-static inline __u32 sctp_start_cksum(__u8 *buffer, __u16 length)
+static inline __be32 sctp_crc32c(__be32 crc, u8 *buffer, u16 length)
 {
-	__u32 crc = ~(__u32) 0;
+	return (__force __be32)crc32c((__force u32)crc, buffer, length);
+}
+
+static inline __be32 sctp_start_cksum(__u8 *buffer, __u16 length)
+{
+	__be32 crc = ~cpu_to_be32(0);
 	__u8  zero[sizeof(__u32)] = {0};
 
 	/* Optimize this routine to be SCTP specific, knowing how
@@ -56,23 +61,23 @@ static inline __u32 sctp_start_cksum(__u8 *buffer, __u16 length)
 	 */
 
 	/* Calculate CRC up to the checksum. */
-	crc = crc32c(crc, buffer, sizeof(struct sctphdr) - sizeof(__u32));
+	crc = sctp_crc32c(crc, buffer, sizeof(struct sctphdr) - sizeof(__u32));
 
 	/* Skip checksum field of the header. */
-	crc = crc32c(crc, zero, sizeof(__u32));
+	crc = sctp_crc32c(crc, zero, sizeof(__u32));
 
 	/* Calculate the rest of the CRC. */
-	crc = crc32c(crc, &buffer[sizeof(struct sctphdr)],
+	crc = sctp_crc32c(crc, &buffer[sizeof(struct sctphdr)],
 			    length - sizeof(struct sctphdr));
 	return crc;
 }
 
-static inline __u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
+static inline __be32 sctp_update_cksum(__u8 *buffer, __u16 length, __be32 crc32)
 {
-	return crc32c(crc32, buffer, length);
+	return sctp_crc32c(crc32, buffer, length);
 }
 
-static inline __u32 sctp_end_cksum(__u32 crc32)
+static inline __be32 sctp_end_cksum(__be32 crc32)
 {
-	return ntohl(~crc32);
+	return ~crc32;
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 82e4c0e286b..65e470bc612 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	sctp_sctphdr_t *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 	__be32 oldip, newip;
-	u32 crc32;
+	__be32 crc32;
 
 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return false;
@@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 		crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
 					  crc32);
 	crc32 = sctp_end_cksum(crc32);
-	hdr->checksum = htonl(crc32);
+	hdr->checksum = crc32;
 
 	return true;
 }
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5ed93c05c23..a49fa80b57b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -83,8 +83,8 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
 {
 	struct sk_buff *list = skb_shinfo(skb)->frag_list;
 	struct sctphdr *sh = sctp_hdr(skb);
-	__u32 cmp = ntohl(sh->checksum);
-	__u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+	__be32 cmp = sh->checksum;
+	__be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
 
 	for (; list; list = list->next)
 		val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9a63a3fb901..45684646b1d 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -365,7 +365,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	struct sctp_transport *tp = packet->transport;
 	struct sctp_association *asoc = tp->asoc;
 	struct sctphdr *sh;
-	__u32 crc32 = 0;
+	__be32 crc32 = __constant_cpu_to_be32(0);
 	struct sk_buff *nskb;
 	struct sctp_chunk *chunk, *tmp;
 	struct sock *sk;
@@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	/* 3) Put the resultant value into the checksum field in the
 	 *    common header, and leave the rest of the bits unchanged.
 	 */
-	sh->checksum = htonl(crc32);
+	sh->checksum = crc32;
 
 	/* IP layer ECN support
 	 * From RFC 2481
-- 
cgit v1.2.3-70-g09d2


From 845525a642c1c9e1335c33a274d4273906ee58eb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 18 Jul 2008 23:08:21 -0700
Subject: sctp: Update sctp global memory limit allocations.

Update sctp global memory limit allocations to be the same as TCP.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index cdd881142f2..ed9acffe810 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -52,6 +52,8 @@
 #include <linux/inetdevice.h>
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
@@ -1080,6 +1082,7 @@ SCTP_STATIC __init int sctp_init(void)
 	int status = -EINVAL;
 	unsigned long goal;
 	unsigned long limit;
+	unsigned long nr_pages;
 	int max_share;
 	int order;
 
@@ -1175,8 +1178,9 @@ SCTP_STATIC __init int sctp_init(void)
 	 * Note this initalizes the data in sctpv6_prot too
 	 * Unabashedly stolen from tcp_init
 	 */
-	limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
-	limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+	nr_pages = totalram_pages - totalhigh_pages;
+	limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+	limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
 	limit = max(limit, 128UL);
 	sysctl_sctp_mem[0] = limit / 4 * 3;
 	sysctl_sctp_mem[1] = limit;
@@ -1186,7 +1190,7 @@ SCTP_STATIC __init int sctp_init(void)
 	limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
 	max_share = min(4UL*1024*1024, limit);
 
-	sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
+	sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
 	sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
-- 
cgit v1.2.3-70-g09d2


From 49a72dfb8814c2d65bd9f8c9c6daf6395a1ec58d Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Sat, 19 Jul 2008 00:01:42 -0700
Subject: tcp: Fix MD5 signatures for non-linear skbs

Currently, the MD5 code assumes that the SKBs are linear and, in the case
that they aren't, happily goes off and hashes off the end of the SKB and
into random memory.

Reported by Stephen Hemminger in [1]. Advice thanks to Stephen and Evgeniy
Polyakov. Also includes a couple of missed route_caps from Stephen's patch
in [2].

[1] http://marc.info/?l=linux-netdev&m=121445989106145&w=2
[2] http://marc.info/?l=linux-netdev&m=121459157816964&w=2

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  29 +++++------
 net/ipv4/tcp.c        | 127 ++++++++++++++++++++-------------------------
 net/ipv4/tcp_ipv4.c   | 140 ++++++++++++++++++++++++++++++--------------------
 net/ipv4/tcp_output.c |  14 ++---
 net/ipv6/tcp_ipv6.c   | 127 +++++++++++++++++++++++++++++----------------
 5 files changed, 242 insertions(+), 195 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92d7b551dc5..31f5bbfc59b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1112,20 +1112,12 @@ struct tcp_md5sig_pool {
 #define TCP_MD5SIG_MAXKEYS	(~(u32)0)	/* really?! */
 
 /* - functions */
-extern int			tcp_calc_md5_hash(char *md5_hash,
-						  struct tcp_md5sig_key *key,
-						  int bplen,
-						  struct tcphdr *th,
-						  unsigned int tcplen,
-						  struct tcp_md5sig_pool *hp);
-
-extern int			tcp_v4_calc_md5_hash(char *md5_hash,
-						     struct tcp_md5sig_key *key,
-						     struct sock *sk,
-						     struct dst_entry *dst,
-						     struct request_sock *req,
-						     struct tcphdr *th,
-						     unsigned int tcplen);
+extern int			tcp_v4_md5_hash_skb(char *md5_hash,
+						    struct tcp_md5sig_key *key,
+						    struct sock *sk,
+						    struct request_sock *req,
+						    struct sk_buff *skb);
+
 extern struct tcp_md5sig_key	*tcp_v4_md5_lookup(struct sock *sk,
 						   struct sock *addr_sk);
 
@@ -1152,6 +1144,11 @@ extern void			tcp_free_md5sig_pool(void);
 
 extern struct tcp_md5sig_pool	*__tcp_get_md5sig_pool(int cpu);
 extern void			__tcp_put_md5sig_pool(void);
+extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
+extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
+				 unsigned header_len);
+extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
+			    struct tcp_md5sig_key *key);
 
 static inline
 struct tcp_md5sig_pool		*tcp_get_md5sig_pool(void)
@@ -1381,10 +1378,8 @@ struct tcp_sock_af_ops {
 	int			(*calc_md5_hash) (char *location,
 						  struct tcp_md5sig_key *md5,
 						  struct sock *sk,
-						  struct dst_entry *dst,
 						  struct request_sock *req,
-						  struct tcphdr *th,
-						  unsigned int len);
+						  struct sk_buff *skb);
 	int			(*md5_add) (struct sock *sk,
 					    struct sock *addr_sk,
 					    u8 *newkey,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 827e6132af5..0b491bf03db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2465,76 +2465,6 @@ static unsigned long tcp_md5sig_users;
 static struct tcp_md5sig_pool **tcp_md5sig_pool;
 static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
 
-int tcp_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-		      int bplen,
-		      struct tcphdr *th, unsigned int tcplen,
-		      struct tcp_md5sig_pool *hp)
-{
-	struct scatterlist sg[4];
-	__u16 data_len;
-	int block = 0;
-	__sum16 cksum;
-	struct hash_desc *desc = &hp->md5_desc;
-	int err;
-	unsigned int nbytes = 0;
-
-	sg_init_table(sg, 4);
-
-	/* 1. The TCP pseudo-header */
-	sg_set_buf(&sg[block++], &hp->md5_blk, bplen);
-	nbytes += bplen;
-
-	/* 2. The TCP header, excluding options, and assuming a
-	 * checksum of zero
-	 */
-	cksum = th->check;
-	th->check = 0;
-	sg_set_buf(&sg[block++], th, sizeof(*th));
-	nbytes += sizeof(*th);
-
-	/* 3. The TCP segment data (if any) */
-	data_len = tcplen - (th->doff << 2);
-	if (data_len > 0) {
-		u8 *data = (u8 *)th + (th->doff << 2);
-		sg_set_buf(&sg[block++], data, data_len);
-		nbytes += data_len;
-	}
-
-	/* 4. an independently-specified key or password, known to both
-	 * TCPs and presumably connection-specific
-	 */
-	sg_set_buf(&sg[block++], key->key, key->keylen);
-	nbytes += key->keylen;
-
-	sg_mark_end(&sg[block - 1]);
-
-	/* Now store the hash into the packet */
-	err = crypto_hash_init(desc);
-	if (err) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s(): hash_init failed\n", __func__);
-		return -1;
-	}
-	err = crypto_hash_update(desc, sg, nbytes);
-	if (err) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s(): hash_update failed\n", __func__);
-		return -1;
-	}
-	err = crypto_hash_final(desc, md5_hash);
-	if (err) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s(): hash_final failed\n", __func__);
-		return -1;
-	}
-
-	/* Reset header */
-	th->check = cksum;
-
-	return 0;
-}
-EXPORT_SYMBOL(tcp_calc_md5_hash);
-
 static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
 {
 	int cpu;
@@ -2658,6 +2588,63 @@ void __tcp_put_md5sig_pool(void)
 }
 
 EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+
+int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
+			struct tcphdr *th)
+{
+	struct scatterlist sg;
+	int err;
+
+	__sum16 old_checksum = th->check;
+	th->check = 0;
+	/* options aren't included in the hash */
+	sg_init_one(&sg, th, sizeof(struct tcphdr));
+	err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
+	th->check = old_checksum;
+	return err;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_header);
+
+int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
+			  struct sk_buff *skb, unsigned header_len)
+{
+	struct scatterlist sg;
+	const struct tcphdr *tp = tcp_hdr(skb);
+	struct hash_desc *desc = &hp->md5_desc;
+	unsigned i;
+	const unsigned head_data_len = skb_headlen(skb) > header_len ?
+				       skb_headlen(skb) - header_len : 0;
+	const struct skb_shared_info *shi = skb_shinfo(skb);
+
+	sg_init_table(&sg, 1);
+
+	sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
+	if (crypto_hash_update(desc, &sg, head_data_len))
+		return 1;
+
+	for (i = 0; i < shi->nr_frags; ++i) {
+		const struct skb_frag_struct *f = &shi->frags[i];
+		sg_set_page(&sg, f->page, f->size, f->page_offset);
+		if (crypto_hash_update(desc, &sg, f->size))
+			return 1;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_skb_data);
+
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+{
+	struct scatterlist sg;
+
+	sg_init_one(&sg, key->key, key->keylen);
+	return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_key);
+
 #endif
 
 void tcp_done(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 29adc668ad5..5400d75ff17 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -87,9 +87,8 @@ int sysctl_tcp_low_latency __read_mostly;
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
 						   __be32 addr);
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-				   __be32 saddr, __be32 daddr,
-				   struct tcphdr *th, unsigned int tcplen);
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+			       __be32 daddr, __be32 saddr, struct tcphdr *th);
 #else
 static inline
 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
@@ -583,11 +582,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 		rep.th.doff = arg.iov[0].iov_len / 4;
 
-		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
-					key,
-					ip_hdr(skb)->daddr,
-					ip_hdr(skb)->saddr,
-					&rep.th, arg.iov[0].iov_len);
+		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
+				     key, ip_hdr(skb)->daddr,
+				     ip_hdr(skb)->saddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -657,11 +654,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 		rep.th.doff = arg.iov[0].iov_len/4;
 
-		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
-					key,
-					ip_hdr(skb)->daddr,
-					ip_hdr(skb)->saddr,
-					&rep.th, arg.iov[0].iov_len);
+		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
+				    key, ip_hdr(skb)->daddr,
+				    ip_hdr(skb)->saddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -989,28 +984,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 				 newkey, cmd.tcpm_keylen);
 }
 
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-				   __be32 saddr, __be32 daddr,
-				   struct tcphdr *th,
-				   unsigned int tcplen)
+static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+					__be32 daddr, __be32 saddr, int nbytes)
 {
-	struct tcp_md5sig_pool *hp;
 	struct tcp4_pseudohdr *bp;
-	int err;
-
-	/*
-	 * Okay, so RFC2385 is turned on for this connection,
-	 * so we need to generate the MD5 hash for the packet now.
-	 */
-
-	hp = tcp_get_md5sig_pool();
-	if (!hp)
-		goto clear_hash_noput;
+	struct scatterlist sg;
 
 	bp = &hp->md5_blk.ip4;
 
 	/*
-	 * The TCP pseudo-header (in the order: source IP address,
+	 * 1. the TCP pseudo-header (in the order: source IP address,
 	 * destination IP address, zero-padded protocol number, and
 	 * segment length)
 	 */
@@ -1018,48 +1001,95 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
 	bp->daddr = daddr;
 	bp->pad = 0;
 	bp->protocol = IPPROTO_TCP;
-	bp->len = htons(tcplen);
+	bp->len = cpu_to_be16(nbytes);
 
-	err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
-				th, tcplen, hp);
-	if (err)
+	sg_init_one(&sg, bp, sizeof(*bp));
+	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
+
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+			       __be32 daddr, __be32 saddr, struct tcphdr *th)
+{
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
 		goto clear_hash;
 
-	/* Free up the crypto pool */
 	tcp_put_md5sig_pool();
-out:
 	return 0;
+
 clear_hash:
 	tcp_put_md5sig_pool();
 clear_hash_noput:
 	memset(md5_hash, 0, 16);
-	goto out;
+	return 1;
 }
 
-int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-			 struct sock *sk,
-			 struct dst_entry *dst,
-			 struct request_sock *req,
-			 struct tcphdr *th,
-			 unsigned int tcplen)
+int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+			struct sock *sk, struct request_sock *req,
+			struct sk_buff *skb)
 {
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+	struct tcphdr *th = tcp_hdr(skb);
 	__be32 saddr, daddr;
 
 	if (sk) {
 		saddr = inet_sk(sk)->saddr;
 		daddr = inet_sk(sk)->daddr;
+	} else if (req) {
+		saddr = inet_rsk(req)->loc_addr;
+		daddr = inet_rsk(req)->rmt_addr;
 	} else {
-		struct rtable *rt = (struct rtable *)dst;
-		BUG_ON(!rt);
-		saddr = rt->rt_src;
-		daddr = rt->rt_dst;
+		const struct iphdr *iph = ip_hdr(skb);
+		saddr = iph->saddr;
+		daddr = iph->daddr;
 	}
-	return tcp_v4_do_calc_md5_hash(md5_hash, key,
-				       saddr, daddr,
-				       th, tcplen);
+
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+
+	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
+		goto clear_hash;
+
+	tcp_put_md5sig_pool();
+	return 0;
+
+clear_hash:
+	tcp_put_md5sig_pool();
+clear_hash_noput:
+	memset(md5_hash, 0, 16);
+	return 1;
 }
 
-EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
+EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
 
 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 {
@@ -1104,10 +1134,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	/* Okay, so this is hash_expected and hash_location -
 	 * so we need to calculate the checksum.
 	 */
-	genhash = tcp_v4_do_calc_md5_hash(newhash,
-					  hash_expected,
-					  iph->saddr, iph->daddr,
-					  th, skb->len);
+	genhash = tcp_v4_md5_hash_skb(newhash,
+				      hash_expected,
+				      NULL, NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
@@ -1356,6 +1385,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		if (newkey != NULL)
 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
 					  newkey, key->keylen);
+		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 	}
 #endif
 
@@ -1719,7 +1749,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
 	.md5_lookup		= tcp_v4_md5_lookup,
-	.calc_md5_hash		= tcp_v4_calc_md5_hash,
+	.calc_md5_hash		= tcp_v4_md5_hash_skb,
 	.md5_add		= tcp_v4_md5_add_func,
 	.md5_parse		= tcp_v4_parse_md5_keys,
 };
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 36a19707f67..958ff486165 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -540,8 +540,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	 * room for it.
 	 */
 	md5 = tp->af_specific->md5_lookup(sk, sk);
-	if (md5)
+	if (md5) {
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
+		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+	}
 #endif
 
 	skb_push(skb, tcp_header_size);
@@ -602,10 +604,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
 		tp->af_specific->calc_md5_hash(md5_hash_location,
-					       md5,
-					       sk, NULL, NULL,
-					       tcp_hdr(skb),
-					       skb->len);
+					       md5, sk, NULL, skb);
 	}
 #endif
 
@@ -2264,10 +2263,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5) {
 		tp->af_specific->calc_md5_hash(md5_hash_location,
-					       md5,
-					       NULL, dst, req,
-					       tcp_hdr(skb),
-					       skb->len);
+					       md5, NULL, req, skb);
 	}
 #endif
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ca5b93a5c02..ae45f983501 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -736,64 +736,105 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 	return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
 }
 
-static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-				   struct in6_addr *saddr,
-				   struct in6_addr *daddr,
-				   struct tcphdr *th, unsigned int tcplen)
+static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+					struct in6_addr *daddr,
+					struct in6_addr *saddr, int nbytes)
 {
-	struct tcp_md5sig_pool *hp;
 	struct tcp6_pseudohdr *bp;
-	int err;
-
-	hp = tcp_get_md5sig_pool();
-	if (!hp) {
-		printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
-		goto clear_hash_noput;
-	}
+	struct scatterlist sg;
 
 	bp = &hp->md5_blk.ip6;
-
 	/* 1. TCP pseudo-header (RFC2460) */
 	ipv6_addr_copy(&bp->saddr, saddr);
 	ipv6_addr_copy(&bp->daddr, daddr);
-	bp->len = htonl(tcplen);
-	bp->protocol = htonl(IPPROTO_TCP);
+	bp->protocol = cpu_to_be32(IPPROTO_TCP);
+	bp->len = cpu_to_be16(nbytes);
 
-	err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp),
-				th, tcplen, hp);
+	sg_init_one(&sg, bp, sizeof(*bp));
+	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
 
-	if (err)
+static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+			       struct in6_addr *daddr, struct in6_addr *saddr,
+			       struct tcphdr *th)
+{
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
 		goto clear_hash;
 
-	/* Free up the crypto pool */
 	tcp_put_md5sig_pool();
-out:
 	return 0;
+
 clear_hash:
 	tcp_put_md5sig_pool();
 clear_hash_noput:
 	memset(md5_hash, 0, 16);
-	goto out;
+	return 1;
 }
 
-static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-				struct sock *sk,
-				struct dst_entry *dst,
-				struct request_sock *req,
-				struct tcphdr *th, unsigned int tcplen)
+static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+			       struct sock *sk, struct request_sock *req,
+			       struct sk_buff *skb)
 {
 	struct in6_addr *saddr, *daddr;
+	struct tcp_md5sig_pool *hp;
+	struct hash_desc *desc;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (sk) {
 		saddr = &inet6_sk(sk)->saddr;
 		daddr = &inet6_sk(sk)->daddr;
-	} else {
+	} else if (req) {
 		saddr = &inet6_rsk(req)->loc_addr;
 		daddr = &inet6_rsk(req)->rmt_addr;
+	} else {
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		saddr = &ip6h->saddr;
+		daddr = &ip6h->daddr;
 	}
-	return tcp_v6_do_calc_md5_hash(md5_hash, key,
-				       saddr, daddr,
-				       th, tcplen);
+
+	hp = tcp_get_md5sig_pool();
+	if (!hp)
+		goto clear_hash_noput;
+	desc = &hp->md5_desc;
+
+	if (crypto_hash_init(desc))
+		goto clear_hash;
+
+	if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+		goto clear_hash;
+	if (tcp_md5_hash_header(hp, th))
+		goto clear_hash;
+	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+		goto clear_hash;
+	if (tcp_md5_hash_key(hp, key))
+		goto clear_hash;
+	if (crypto_hash_final(desc, md5_hash))
+		goto clear_hash;
+
+	tcp_put_md5sig_pool();
+	return 0;
+
+clear_hash:
+	tcp_put_md5sig_pool();
+clear_hash_noput:
+	memset(md5_hash, 0, 16);
+	return 1;
 }
 
 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
@@ -834,10 +875,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* check the signature */
-	genhash = tcp_v6_do_calc_md5_hash(newhash,
-					  hash_expected,
-					  &ip6h->saddr, &ip6h->daddr,
-					  th, skb->len);
+	genhash = tcp_v6_md5_hash_skb(newhash,
+				      hash_expected,
+				      NULL, NULL, skb);
+
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
 			printk(KERN_INFO "MD5 Hash %s for "
@@ -974,10 +1015,9 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
-					&ipv6_hdr(skb)->daddr,
-					&ipv6_hdr(skb)->saddr,
-					t1, tot_len);
+		tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
+				    &ipv6_hdr(skb)->daddr,
+				    &ipv6_hdr(skb)->saddr, t1);
 	}
 #endif
 
@@ -1064,10 +1104,9 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	if (key) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
-					&ipv6_hdr(skb)->daddr,
-					&ipv6_hdr(skb)->saddr,
-					t1, tot_len);
+		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+				    &ipv6_hdr(skb)->daddr,
+				    &ipv6_hdr(skb)->saddr, t1);
 	}
 #endif
 
@@ -1783,7 +1822,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = {
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
 	.md5_lookup	=	tcp_v6_md5_lookup,
-	.calc_md5_hash	=	tcp_v6_calc_md5_hash,
+	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 	.md5_add	=	tcp_v6_md5_add_func,
 	.md5_parse	=	tcp_v6_parse_md5_keys,
 };
@@ -1815,7 +1854,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = {
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
 	.md5_lookup	=	tcp_v4_md5_lookup,
-	.calc_md5_hash	=	tcp_v4_calc_md5_hash,
+	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
 	.md5_add	=	tcp_v6_md5_add_func,
 	.md5_parse	=	tcp_v6_parse_md5_keys,
 };
-- 
cgit v1.2.3-70-g09d2


From 33ad798c924b4a1afad3593f2796d465040aadd5 Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Sat, 19 Jul 2008 00:04:31 -0700
Subject: tcp: options clean up

This should fix the following bugs:
  * Connections with MD5 signatures produce invalid packets whenever SACK
    options are included
  * MD5 signatures are counted twice in the MSS calculations

Behaviour changes:
  * A SYN with MD5 + SACK + TS elicits a SYNACK with MD5 + SACK

    This is because we can't fit any SACK blocks in a packet with MD5 + TS
    options. There was discussion about disabling SACK rather than TS in
    order to fit in better with old, buggy kernels, but that was deemed to
    be unnecessary.

  * SYNs with MD5 don't include a TS option

    See above.

Additionally, it removes a bunch of duplicated logic for calculating options,
which should help avoid these sort of issues in the future.

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |   2 +
 net/ipv4/tcp_output.c | 432 +++++++++++++++++++++++++++-----------------------
 2 files changed, 238 insertions(+), 196 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 31f5bbfc59b..8983386356a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -50,6 +50,7 @@ extern atomic_t tcp_orphan_count;
 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 #define MAX_TCP_HEADER	(128 + MAX_HEADER)
+#define MAX_TCP_OPTION_SPACE 40
 
 /* 
  * Never offer a window over 32767 without using window scaling. Some
@@ -184,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_BASE_ALIGNED	4
 #define TCPOLEN_SACK_PERBLOCK		8
 #define TCPOLEN_MD5SIG_ALIGNED		20
+#define TCPOLEN_MSS_ALIGNED		4
 
 /* Flags in tp->nonagle */
 #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 958ff486165..1fa683c0ba9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -345,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
 
-static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
-					 __u32 tstamp, __u8 **md5_hash)
-{
-	if (tp->rx_opt.tstamp_ok) {
+#define OPTION_SACK_ADVERTISE	(1 << 0)
+#define OPTION_TS		(1 << 1)
+#define OPTION_MD5		(1 << 2)
+
+struct tcp_out_options {
+	u8 options;		/* bit field of OPTION_* */
+	u8 ws;			/* window scale, 0 to disable */
+	u8 num_sack_blocks;	/* number of SACK blocks to include */
+	u16 mss;		/* 0 to disable */
+	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+};
+
+static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+			      const struct tcp_out_options *opts,
+			      __u8 **md5_hash) {
+	if (unlikely(OPTION_MD5 & opts->options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_TIMESTAMP << 8) |
-			       TCPOLEN_TIMESTAMP);
-		*ptr++ = htonl(tstamp);
-		*ptr++ = htonl(tp->rx_opt.ts_recent);
+			       (TCPOPT_MD5SIG << 8) |
+			       TCPOLEN_MD5SIG);
+		*md5_hash = (__u8 *)ptr;
+		ptr += 4;
+	} else {
+		*md5_hash = NULL;
 	}
-	if (tp->rx_opt.eff_sacks) {
-		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+
+	if (likely(OPTION_TS & opts->options)) {
+		if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+				       (TCPOLEN_SACK_PERM << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+		} else {
+			*ptr++ = htonl((TCPOPT_NOP << 24) |
+				       (TCPOPT_NOP << 16) |
+				       (TCPOPT_TIMESTAMP << 8) |
+				       TCPOLEN_TIMESTAMP);
+		}
+		*ptr++ = htonl(opts->tsval);
+		*ptr++ = htonl(opts->tsecr);
+	}
+
+	if (unlikely(opts->mss)) {
+		*ptr++ = htonl((TCPOPT_MSS << 24) |
+			       (TCPOLEN_MSS << 16) |
+			       opts->mss);
+	}
+
+	if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
+		     !(OPTION_TS & opts->options))) {
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_NOP << 16) |
+			       (TCPOPT_SACK_PERM << 8) |
+			       TCPOLEN_SACK_PERM);
+	}
+
+	if (unlikely(opts->ws)) {
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_WINDOW << 16) |
+			       (TCPOLEN_WINDOW << 8) |
+			       opts->ws);
+	}
+
+	if (unlikely(opts->num_sack_blocks)) {
+		struct tcp_sack_block *sp = tp->rx_opt.dsack ?
+			tp->duplicate_sack : tp->selective_acks;
 		int this_sack;
 
 		*ptr++ = htonl((TCPOPT_NOP  << 24) |
 			       (TCPOPT_NOP  << 16) |
 			       (TCPOPT_SACK <<  8) |
-			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+			       (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
 						     TCPOLEN_SACK_PERBLOCK)));
 
-		for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+		for (this_sack = 0; this_sack < opts->num_sack_blocks;
+		     ++this_sack) {
 			*ptr++ = htonl(sp[this_sack].start_seq);
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
@@ -376,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 			tp->rx_opt.eff_sacks--;
 		}
 	}
+}
+
+static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+				struct tcp_out_options *opts,
+				struct tcp_md5sig_key **md5) {
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned size = 0;
+
 #ifdef CONFIG_TCP_MD5SIG
-	if (md5_hash) {
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *)ptr;
+	*md5 = tp->af_specific->md5_lookup(sk, sk);
+	if (*md5) {
+		opts->options |= OPTION_MD5;
+		size += TCPOLEN_MD5SIG_ALIGNED;
 	}
+#else
+	*md5 = NULL;
 #endif
+
+	/* We always get an MSS option.  The option bytes which will be seen in
+	 * normal data packets should timestamps be used, must be in the MSS
+	 * advertised.  But we subtract them from tp->mss_cache so that
+	 * calculations in tcp_sendmsg are simpler etc.  So account for this
+	 * fact here if necessary.  If we don't do this correctly, as a
+	 * receiver we won't recognize data packets as being full sized when we
+	 * should, and thus we won't abide by the delayed ACK rules correctly.
+	 * SACKs don't matter, we never delay an ACK when we have any of those
+	 * going out.  */
+	opts->mss = tcp_advertise_mss(sk);
+	size += TCPOLEN_MSS_ALIGNED;
+
+	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
+		opts->options |= OPTION_TS;
+		opts->tsval = TCP_SKB_CB(skb)->when;
+		opts->tsecr = tp->rx_opt.ts_recent;
+		size += TCPOLEN_TSTAMP_ALIGNED;
+	}
+	if (likely(sysctl_tcp_window_scaling)) {
+		opts->ws = tp->rx_opt.rcv_wscale;
+		size += TCPOLEN_WSCALE_ALIGNED;
+	}
+	if (likely(sysctl_tcp_sack)) {
+		opts->options |= OPTION_SACK_ADVERTISE;
+		if (unlikely(!OPTION_TS & opts->options))
+			size += TCPOLEN_SACKPERM_ALIGNED;
+	}
+
+	return size;
 }
 
-/* Construct a tcp options header for a SYN or SYN_ACK packet.
- * If this is every changed make sure to change the definition of
- * MAX_SYN_SIZE to match the new maximum number of options that you
- * can generate.
- *
- * Note - that with the RFC2385 TCP option, we make room for the
- * 16 byte MD5 hash. This will be filled in later, so the pointer for the
- * location to be filled is passed back up.
- */
-static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
-				  int offer_wscale, int wscale, __u32 tstamp,
-				  __u32 ts_recent, __u8 **md5_hash)
-{
-	/* We always get an MSS option.
-	 * The option bytes which will be seen in normal data
-	 * packets should timestamps be used, must be in the MSS
-	 * advertised.  But we subtract them from tp->mss_cache so
-	 * that calculations in tcp_sendmsg are simpler etc.
-	 * So account for this fact here if necessary.  If we
-	 * don't do this correctly, as a receiver we won't
-	 * recognize data packets as being full sized when we
-	 * should, and thus we won't abide by the delayed ACK
-	 * rules correctly.
-	 * SACKs don't matter, we never delay an ACK when we
-	 * have any of those going out.
-	 */
-	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
-	if (ts) {
-		if (sack)
-			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
-				       (TCPOLEN_SACK_PERM << 16) |
-				       (TCPOPT_TIMESTAMP << 8) |
-				       TCPOLEN_TIMESTAMP);
-		else
-			*ptr++ = htonl((TCPOPT_NOP << 24) |
-				       (TCPOPT_NOP << 16) |
-				       (TCPOPT_TIMESTAMP << 8) |
-				       TCPOLEN_TIMESTAMP);
-		*ptr++ = htonl(tstamp);		/* TSVAL */
-		*ptr++ = htonl(ts_recent);	/* TSECR */
-	} else if (sack)
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_SACK_PERM << 8) |
-			       TCPOLEN_SACK_PERM);
-	if (offer_wscale)
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_WINDOW << 16) |
-			       (TCPOLEN_WINDOW << 8) |
-			       (wscale));
+static unsigned tcp_synack_options(struct sock *sk,
+				   struct request_sock *req,
+				   unsigned mss, struct sk_buff *skb,
+				   struct tcp_out_options *opts,
+				   struct tcp_md5sig_key **md5) {
+	unsigned size = 0;
+	struct inet_request_sock *ireq = inet_rsk(req);
+	char doing_ts;
+
 #ifdef CONFIG_TCP_MD5SIG
-	/*
-	 * If MD5 is enabled, then we set the option, and include the size
-	 * (always 18). The actual MD5 hash is added just before the
-	 * packet is sent.
-	 */
-	if (md5_hash) {
-		*ptr++ = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		*md5_hash = (__u8 *)ptr;
+	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
+	if (*md5) {
+		opts->options |= OPTION_MD5;
+		size += TCPOLEN_MD5SIG_ALIGNED;
 	}
+#else
+	*md5 = NULL;
 #endif
+
+	/* we can't fit any SACK blocks in a packet with MD5 + TS
+	   options. There was discussion about disabling SACK rather than TS in
+	   order to fit in better with old, buggy kernels, but that was deemed
+	   to be unnecessary. */
+	doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
+
+	opts->mss = mss;
+	size += TCPOLEN_MSS_ALIGNED;
+
+	if (likely(ireq->wscale_ok)) {
+		opts->ws = ireq->rcv_wscale;
+		size += TCPOLEN_WSCALE_ALIGNED;
+	}
+	if (likely(doing_ts)) {
+		opts->options |= OPTION_TS;
+		opts->tsval = TCP_SKB_CB(skb)->when;
+		opts->tsecr = req->ts_recent;
+		size += TCPOLEN_TSTAMP_ALIGNED;
+	}
+	if (likely(ireq->sack_ok)) {
+		opts->options |= OPTION_SACK_ADVERTISE;
+		if (unlikely(!doing_ts))
+			size += TCPOLEN_SACKPERM_ALIGNED;
+	}
+
+	return size;
+}
+
+static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
+					struct tcp_out_options *opts,
+					struct tcp_md5sig_key **md5) {
+	struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+	*md5 = tp->af_specific->md5_lookup(sk, sk);
+	if (unlikely(*md5)) {
+		opts->options |= OPTION_MD5;
+		size += TCPOLEN_MD5SIG_ALIGNED;
+	}
+#else
+	*md5 = NULL;
+#endif
+
+	if (likely(tp->rx_opt.tstamp_ok)) {
+		opts->options |= OPTION_TS;
+		opts->tsval = tcb ? tcb->when : 0;
+		opts->tsecr = tp->rx_opt.ts_recent;
+		size += TCPOLEN_TSTAMP_ALIGNED;
+	}
+
+	if (unlikely(tp->rx_opt.eff_sacks)) {
+		const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+		opts->num_sack_blocks =
+			min_t(unsigned, tp->rx_opt.eff_sacks,
+			      (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
+			      TCPOLEN_SACK_PERBLOCK);
+		size += TCPOLEN_SACK_BASE_ALIGNED +
+			opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+	}
+
+	return size;
 }
 
 /* This routine actually transmits TCP packets queued in by
@@ -471,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	struct inet_sock *inet;
 	struct tcp_sock *tp;
 	struct tcp_skb_cb *tcb;
-	int tcp_header_size;
-#ifdef CONFIG_TCP_MD5SIG
+	struct tcp_out_options opts;
+	unsigned tcp_options_size, tcp_header_size;
 	struct tcp_md5sig_key *md5;
 	__u8 *md5_hash_location;
-#endif
 	struct tcphdr *th;
-	int sysctl_flags;
 	int err;
 
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
@@ -500,52 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	inet = inet_sk(sk);
 	tp = tcp_sk(sk);
 	tcb = TCP_SKB_CB(skb);
-	tcp_header_size = tp->tcp_header_len;
-
-#define SYSCTL_FLAG_TSTAMPS	0x1
-#define SYSCTL_FLAG_WSCALE	0x2
-#define SYSCTL_FLAG_SACK	0x4
+	memset(&opts, 0, sizeof(opts));
 
-	sysctl_flags = 0;
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
-		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-		if (sysctl_tcp_timestamps) {
-			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
-		}
-		if (sysctl_tcp_window_scaling) {
-			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-			sysctl_flags |= SYSCTL_FLAG_WSCALE;
-		}
-		if (sysctl_tcp_sack) {
-			sysctl_flags |= SYSCTL_FLAG_SACK;
-			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
-				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
-		}
-	} else if (unlikely(tp->rx_opt.eff_sacks)) {
-		/* A SACK is 2 pad bytes, a 2 byte header, plus
-		 * 2 32-bit sequence numbers for each SACK block.
-		 */
-		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-				    (tp->rx_opt.eff_sacks *
-				     TCPOLEN_SACK_PERBLOCK));
-	}
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+	else
+		tcp_options_size = tcp_established_options(sk, skb, &opts,
+							   &md5);
+	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
-#ifdef CONFIG_TCP_MD5SIG
-	/*
-	 * Are we doing MD5 on this segment? If so - make
-	 * room for it.
-	 */
-	md5 = tp->af_specific->md5_lookup(sk, sk);
-	if (md5) {
-		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-	}
-#endif
-
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
 	skb_set_owner_w(skb, sk);
@@ -576,33 +650,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		th->urg			= 1;
 	}
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
-		tcp_syn_build_options((__be32 *)(th + 1),
-				      tcp_advertise_mss(sk),
-				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
-				      (sysctl_flags & SYSCTL_FLAG_SACK),
-				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-				      tp->rx_opt.rcv_wscale,
-				      tcb->when,
-				      tp->rx_opt.ts_recent,
-
-#ifdef CONFIG_TCP_MD5SIG
-				      md5 ? &md5_hash_location :
-#endif
-				      NULL);
-	} else {
-		tcp_build_and_update_options((__be32 *)(th + 1),
-					     tp, tcb->when,
-#ifdef CONFIG_TCP_MD5SIG
-					     md5 ? &md5_hash_location :
-#endif
-					     NULL);
+	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
-	}
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
+		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5, sk, NULL, skb);
 	}
@@ -626,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcp_enter_cwr(sk, 1);
 
 	return net_xmit_eval(err);
-
-#undef SYSCTL_FLAG_TSTAMPS
-#undef SYSCTL_FLAG_WSCALE
-#undef SYSCTL_FLAG_SACK
 }
 
 /* This routine just queue's the buffer
@@ -970,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	u32 mss_now;
 	u16 xmit_size_goal;
 	int doing_tso = 0;
+	unsigned header_len;
+	struct tcp_out_options opts;
+	struct tcp_md5sig_key *md5;
 
 	mss_now = tp->mss_cache;
 
@@ -982,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
-	if (tp->rx_opt.eff_sacks)
-		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
-			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
-
-#ifdef CONFIG_TCP_MD5SIG
-	if (tp->af_specific->md5_lookup(sk, sk))
-		mss_now -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
+	header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+		     sizeof(struct tcphdr);
+	/* The mss_cache is sized based on tp->tcp_header_len, which assumes
+	 * some common options. If this is an odd packet (because we have SACK
+	 * blocks etc) then our calculated header_len will be different, and
+	 * we have to adjust mss_now correspondingly */
+	if (header_len != tp->tcp_header_len) {
+		int delta = (int) header_len - tp->tcp_header_len;
+		mss_now -= delta;
+	}
 
 	xmit_size_goal = mss_now;
 
@@ -2179,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcphdr *th;
 	int tcp_header_size;
+	struct tcp_out_options opts;
 	struct sk_buff *skb;
-#ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *md5;
 	__u8 *md5_hash_location;
-#endif
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
 	if (skb == NULL)
@@ -2194,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	skb->dst = dst_clone(dst);
 
-	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
-			   (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
-			   (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
-			   /* SACK_PERM is in the place of NOP NOP of TS */
-			   ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+		__u8 rcv_wscale;
+		/* Set this up on the first call only */
+		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+		/* tcp_full_space because it is guaranteed to be the first packet */
+		tcp_select_initial_window(tcp_full_space(sk),
+			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			&req->rcv_wnd,
+			&req->window_clamp,
+			ireq->wscale_ok,
+			&rcv_wscale);
+		ireq->rcv_wscale = rcv_wscale;
+	}
+
+	memset(&opts, 0, sizeof(opts));
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_header_size = tcp_synack_options(sk, req,
+					     dst_metric(dst, RTAX_ADVMSS),
+					     skb, &opts, &md5) +
+			  sizeof(struct tcphdr);
 
-#ifdef CONFIG_TCP_MD5SIG
-	/* Are we doing MD5 on this segment? If so - make room for it */
-	md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
-	if (md5)
-		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-#endif
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
 
@@ -2223,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
-	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-		__u8 rcv_wscale;
-		/* Set this up on the first call only */
-		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-		/* tcp_full_space because it is guaranteed to be the first packet */
-		tcp_select_initial_window(tcp_full_space(sk),
-			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-			&req->rcv_wnd,
-			&req->window_clamp,
-			ireq->wscale_ok,
-			&rcv_wscale);
-		ireq->rcv_wscale = rcv_wscale;
-	}
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2244,18 +2295,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
 	else
 #endif
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
-			      ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
-			      TCP_SKB_CB(skb)->when,
-			      req->ts_recent,
-			      (
-#ifdef CONFIG_TCP_MD5SIG
-			       md5 ? &md5_hash_location :
-#endif
-			       NULL)
-			      );
-
+	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
-- 
cgit v1.2.3-70-g09d2


From 4389dded7767d24290463f2a8302ba3253ebdd56 Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Sat, 19 Jul 2008 00:07:02 -0700
Subject: tcp: Remove redundant checks when setting eff_sacks

Remove redundant checks when setting eff_sacks and make the number of SACKs a
compile time constant. Now that the options code knows how many SACK blocks can
fit in the header, we don't need to have the SACK code guessing at it.

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  6 ++++++
 net/ipv4/tcp_input.c | 25 ++++++++++---------------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 07e79bdb9cd..2e2557388e3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -224,6 +224,12 @@ struct tcp_options_received {
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+/* This is the max number of SACKS that we'll generate and process. It's safe
+ * to increse this, although since:
+ *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
+ * only four options will fit in a standard TCP header */
+#define TCP_NUM_SACKS 4
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 88810bc0137..1f5e6049883 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1423,10 +1423,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 	unsigned char *ptr = (skb_transport_header(ack_skb) +
 			      TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
-	struct tcp_sack_block sp[4];
+	struct tcp_sack_block sp[TCP_NUM_SACKS];
 	struct tcp_sack_block *cache;
 	struct sk_buff *skb;
-	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
+	int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
 	int used_sacks;
 	int reord = tp->packets_out;
 	int flag = 0;
@@ -3735,8 +3735,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
-					   4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
 	}
 }
 
@@ -3791,9 +3790,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 * Decrease num_sacks.
 			 */
 			tp->rx_opt.num_sacks--;
-			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
-						   tp->rx_opt.dsack,
-						   4 - tp->rx_opt.tstamp_ok);
+			tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+					       tp->rx_opt.dsack;
 			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i + 1];
 			continue;
@@ -3843,7 +3841,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	 *
 	 * If the sack array is full, forget about the last one.
 	 */
-	if (this_sack >= 4) {
+	if (this_sack >= TCP_NUM_SACKS) {
 		this_sack--;
 		tp->rx_opt.num_sacks--;
 		sp--;
@@ -3856,8 +3854,7 @@ new_sack:
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
 	tp->rx_opt.num_sacks++;
-	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
-				   4 - tp->rx_opt.tstamp_ok);
+	tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -3894,9 +3891,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	}
 	if (num_sacks != tp->rx_opt.num_sacks) {
 		tp->rx_opt.num_sacks = num_sacks;
-		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
-					   tp->rx_opt.dsack,
-					   4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+				       tp->rx_opt.dsack;
 	}
 }
 
@@ -3975,8 +3971,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	if (tp->rx_opt.dsack) {
 		tp->rx_opt.dsack = 0;
-		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
-					     4 - tp->rx_opt.tstamp_ok);
+		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
 	}
 
 	/*  Queue data for delivery to the user.
-- 
cgit v1.2.3-70-g09d2


From bdccc4ca13a639d759206c5b21ed73f8a813eaba Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Sat, 19 Jul 2008 00:15:13 -0700
Subject: tcp: fix kernel panic with listening_get_next

# BUG: unable to handle kernel NULL pointer dereference at
0000000000000038
IP: [<ffffffff821ed01e>] listening_get_next+0x50/0x1b3
PGD 11e4b9067 PUD 11d16c067 PMD 0
Oops: 0000 [1] SMP
last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
CPU 3
Modules linked in: bridge ipv6 button battery ac loop dm_mod tg3 ext3
jbd edd fan thermal processor thermal_sys hwmon sg sata_svw libata dock
serverworks sd_mod scsi_mod ide_disk ide_core [last unloaded: freq_table]
Pid: 3368, comm: slpd Not tainted 2.6.26-rc2-mm1-lxc4 #1
RIP: 0010:[<ffffffff821ed01e>] [<ffffffff821ed01e>]
listening_get_next+0x50/0x1b3
RSP: 0018:ffff81011e1fbe18 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff8100be0ad3c0 RCX: ffff8100619f50c0
RDX: ffffffff82475be0 RSI: ffff81011d9ae6c0 RDI: ffff8100be0ad508
RBP: ffff81011f4f1240 R08: 00000000ffffffff R09: ffff8101185b6780
R10: 000000000000002d R11: ffffffff820fdbfa R12: ffff8100be0ad3c8
R13: ffff8100be0ad6a0 R14: ffff8100be0ad3c0 R15: ffffffff825b8ce0
FS: 00007f6a0ebd16d0(0000) GS:ffff81011f424540(0000)
knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000038 CR3: 000000011dc20000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process slpd (pid: 3368, threadinfo ffff81011e1fa000, task
ffff81011f4b8660)
Stack: 00000000000002ee ffff81011f5a57c0 ffff81011f4f1240
ffff81011e1fbe90
0000000000001000 0000000000000000 00007fff16bf2590 ffffffff821ed9c8
ffff81011f5a57c0 ffff81011d9ae6c0 000000000000041a ffffffff820b0abd
Call Trace:
[<ffffffff821ed9c8>] ? tcp_seq_next+0x34/0x7e
[<ffffffff820b0abd>] ? seq_read+0x1aa/0x29d
[<ffffffff820d21b4>] ? proc_reg_read+0x73/0x8e
[<ffffffff8209769c>] ? vfs_read+0xaa/0x152
[<ffffffff82097a7d>] ? sys_read+0x45/0x6e
[<ffffffff8200bd2b>] ? system_call_after_swapgs+0x7b/0x80


Code: 31 a9 25 00 e9 b5 00 00 00 ff 45 20 83 7d 0c 01 75 79 4c 8b 75 10
48 8b 0e eb 1d 48 8b 51 20 0f b7 45 08 39 02 75 0e 48 8b 41 28 <4c> 39
78 38 0f 84 93 00 00 00 48 8b 09 48 85 c9 75 de 8b 55 1c
RIP [<ffffffff821ed01e>] listening_get_next+0x50/0x1b3
RSP <ffff81011e1fbe18>
CR2: 0000000000000038

This kernel panic appears with CONFIG_NET_NS=y.

How to reproduce ?

    On the buggy host (host A)
       * ip addr add 1.2.3.4/24 dev eth0

    On a remote host (host B)
       * ip addr add 1.2.3.5/24 dev eth0
       * iptables -A INPUT -p tcp -s 1.2.3.4 -j DROP
       * ssh 1.2.3.4

    On host A:
       * netstat -ta or cat /proc/net/tcp

This bug happens when reading /proc/net/tcp[6] when there is a req_sock
at the SYN_RECV state.

When a SYN is received the minisock is created and the sk field is set to
NULL. In the listening_get_next function, we try to look at the field
req->sk->sk_net.

When looking at how to fix this bug, I noticed that is useless to do
the check for the minisock belonging to the namespace. A minisock belongs
to a listen point and this one is per namespace, so when browsing the
minisock they are always per namespace.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5400d75ff17..a82df630756 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1892,8 +1892,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 		req = req->dl_next;
 		while (1) {
 			while (req) {
-				if (req->rsk_ops->family == st->family &&
-				    net_eq(sock_net(req->sk), net)) {
+				if (req->rsk_ops->family == st->family) {
 					cur = req;
 					goto out;
 				}
-- 
cgit v1.2.3-70-g09d2


From 725a8ff04a5dc473cd9d8eb7817ca96fc36c7789 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 19 Jul 2008 00:28:58 -0700
Subject: ipv6: remove unused parameter from ip6_ra_control

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       | 4 +---
 net/ipv6/ipv6_sockglue.c | 7 ++-----
 net/ipv6/raw.c           | 2 +-
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c2222ee74d6..6b7982d3dda 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -227,9 +227,7 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 		atomic_dec(&fl->users);
 }
 
-extern int 			ip6_ra_control(struct sock *sk, int sel,
-					       void (*destructor)(struct sock *));
-
+extern int 			ip6_ra_control(struct sock *sk, int sel);
 
 extern int			ipv6_parse_hopopts(struct sk_buff *skb);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 030c0c956f9..8c6ea07f4d5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -59,7 +59,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
 
-int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+int ip6_ra_control(struct sock *sk, int sel)
 {
 	struct ip6_ra_chain *ra, *new_ra, **rap;
 
@@ -81,8 +81,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 			*rap = ra->next;
 			write_unlock_bh(&ip6_ra_lock);
 
-			if (ra->destructor)
-				ra->destructor(sk);
 			sock_put(sk);
 			kfree(ra);
 			return 0;
@@ -94,7 +92,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 	}
 	new_ra->sk = sk;
 	new_ra->sel = sel;
-	new_ra->destructor = destructor;
 	new_ra->next = ra;
 	*rap = new_ra;
 	sock_hold(sk);
@@ -632,7 +629,7 @@ done:
 	case IPV6_ROUTER_ALERT:
 		if (optlen < sizeof(int))
 			goto e_inval;
-		retv = ip6_ra_control(sk, val, NULL);
+		retv = ip6_ra_control(sk, val);
 		break;
 	case IPV6_MTU_DISCOVER:
 		if (optlen < sizeof(int))
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 34cfb3f41c2..01d47674f7e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1157,7 +1157,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 static void rawv6_close(struct sock *sk, long timeout)
 {
 	if (inet_sk(sk)->num == IPPROTO_RAW)
-		ip6_ra_control(sk, -1, NULL);
+		ip6_ra_control(sk, -1);
 	ip6mr_sk_done(sk);
 	sk_common_release(sk);
 }
-- 
cgit v1.2.3-70-g09d2


From 721499e8931c5732202481ae24f2dfbf9910f129 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 19 Jul 2008 22:34:43 -0700
Subject: netns: Use net_eq() to compare net-namespaces for optimization.

Without CONFIG_NET_NS, namespace is always &init_net.
Compiler will be able to omit namespace comparisons with this patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/aarp.c                |  4 ++--
 net/appletalk/ddp.c                 |  6 +++---
 net/atm/clip.c                      |  2 +-
 net/atm/mpc.c                       |  2 +-
 net/ax25/af_ax25.c                  |  2 +-
 net/ax25/ax25_in.c                  |  2 +-
 net/bridge/br_notify.c              |  2 +-
 net/bridge/br_stp_bpdu.c            |  2 +-
 net/can/af_can.c                    |  4 ++--
 net/can/bcm.c                       |  2 +-
 net/can/raw.c                       |  2 +-
 net/core/pktgen.c                   |  2 +-
 net/decnet/af_decnet.c              |  2 +-
 net/decnet/dn_route.c               |  2 +-
 net/econet/af_econet.c              |  4 ++--
 net/ipv4/igmp.c                     | 26 +++++++++++++-------------
 net/ipv4/ipconfig.c                 |  4 ++--
 net/ipv4/ipmr.c                     |  2 +-
 net/ipv4/netfilter/ip_queue.c       |  2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c |  2 +-
 net/ipv6/ip6mr.c                    |  2 +-
 net/ipv6/netfilter/ip6_queue.c      |  2 +-
 net/ipv6/proc.c                     |  2 +-
 net/ipx/af_ipx.c                    |  4 ++--
 net/irda/irlap_frame.c              |  2 +-
 net/llc/llc_input.c                 |  2 +-
 net/netfilter/nf_sockopt.c          |  2 +-
 net/netfilter/nfnetlink_queue.c     |  2 +-
 net/netlabel/netlabel_unlabeled.c   |  2 +-
 net/netrom/af_netrom.c              |  2 +-
 net/packet/af_packet.c              |  2 +-
 net/rose/af_rose.c                  |  2 +-
 net/sctp/protocol.c                 |  2 +-
 net/tipc/eth_media.c                |  4 ++--
 net/wireless/wext.c                 |  2 +-
 net/x25/af_x25.c                    |  2 +-
 net/x25/x25_dev.c                   |  2 +-
 net/xfrm/xfrm_policy.c              |  2 +-
 38 files changed, 58 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 25aa37ce943..b25c1e909d1 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -333,7 +333,7 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = ptr;
 	int ct;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_DOWN) {
@@ -716,7 +716,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct atalk_addr sa, *ma, da;
 	struct atalk_iface *ifa;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto out0;
 
 	/* We only do Ethernet SNAP AARP. */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 44cd42f7786..07b5b82c5ea 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -648,7 +648,7 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_DOWN)
@@ -1405,7 +1405,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 	int origlen;
 	__u16 len_hops;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto freeit;
 
 	/* Don't mangle buffer if shared */
@@ -1493,7 +1493,7 @@ freeit:
 static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
 {
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto freeit;
 
 	/* Expand any short form frames */
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 6f8223ebf55..5b5b96344ce 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -612,7 +612,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = arg;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_UNREGISTER) {
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 9db332e7a6c..4fccaa1e07b 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -964,7 +964,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo
 
 	dev = (struct net_device *)dev_ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (dev->name == NULL || strncmp(dev->name, "lec", 3))
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 97eaa23ad9e..01c83e2a4c1 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -116,7 +116,7 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = (struct net_device *)ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* Reject non AX.25 devices */
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 33790a8efbc..4a5ba978a80 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -451,7 +451,7 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb->sk = NULL;		/* Initially we don't know who it's for */
 	skb->destructor = NULL;	/* Who initializes this, dammit?! */
 
-	if (dev_net(dev) != &init_net) {
+	if (!net_eq(dev_net(dev), &init_net)) {
 		kfree_skb(skb);
 		return 0;
 	}
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 88d8ec7b314..76340bdd052 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -35,7 +35,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	struct net_bridge_port *p = dev->br_port;
 	struct net_bridge *br;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* not a port of a bridge */
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 99647617451..8b200f96f72 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -140,7 +140,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto err;
 
 	if (!p)
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 484bbf6dd03..8035fbf526a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -615,7 +615,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	int matches;
 
-	if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) {
+	if (dev->type != ARPHRD_CAN || !net_eq(dev_net(dev), &init_net)) {
 		kfree_skb(skb);
 		return 0;
 	}
@@ -728,7 +728,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 	struct net_device *dev = (struct net_device *)data;
 	struct dev_rcv_lists *d;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (dev->type != ARPHRD_CAN)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 72c2ce904f8..d0dd382001e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1303,7 +1303,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
 	struct bcm_op *op;
 	int notify_enodev = 0;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (dev->type != ARPHRD_CAN)
diff --git a/net/can/raw.c b/net/can/raw.c
index 3e46ee36a1a..6e0663faaf9 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -210,7 +210,7 @@ static int raw_notifier(struct notifier_block *nb,
 	struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
 	struct sock *sk = &ro->sk;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (dev->type != ARPHRD_CAN)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 906802db4ed..c7d484f7e1c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1875,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused,
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* It is OK that we do not hold the group lock right now,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 61b7df577dd..3c23ab33dbc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2089,7 +2089,7 @@ static int dn_device_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = (struct net_device *)ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	switch(event) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index f50e88bf266..821bd1cdec0 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -580,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
 	unsigned char padlen = 0;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto dump_it;
 
 	if (dn == NULL)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index d35127bb84e..8789d2bb1b0 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1062,7 +1062,7 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	struct sock *sk;
 	struct ec_device *edev = dev->ec_ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	if (skb->pkt_type == PACKET_OTHERHOST)
@@ -1119,7 +1119,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void
 	struct net_device *dev = (struct net_device *)data;
 	struct ec_device *edev;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	switch (msg) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 68e84a933e9..6203ece5360 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1196,7 +1196,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	for (im=in_dev->mc_list; im; im=im->next) {
@@ -1278,7 +1278,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
@@ -1308,7 +1308,7 @@ void ip_mc_down(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	for (i=in_dev->mc_list; i; i=i->next)
@@ -1331,7 +1331,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
 {
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	in_dev->mc_tomb = NULL;
@@ -1357,7 +1357,7 @@ void ip_mc_up(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
@@ -1376,7 +1376,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
+	if (!net_eq(dev_net(in_dev->dev), &init_net))
 		return;
 
 	/* Deactivate timers */
@@ -1760,7 +1760,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -1831,7 +1831,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	u32 ifindex;
 	int ret = -EADDRNOTAVAIL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -1879,7 +1879,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -2015,7 +2015,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	    msf->imsf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -2098,7 +2098,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -2163,7 +2163,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return -EPROTONOSUPPORT;
 
 	rtnl_lock();
@@ -2250,7 +2250,7 @@ void ip_mc_drop_socket(struct sock *sk)
 	if (inet->mc_list == NULL)
 		return;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return;
 
 	rtnl_lock();
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b88aa9afa42..42065fff46c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	unsigned char *sha, *tha;		/* s for "source", t for "target" */
 	struct ic_device *d;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -852,7 +852,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	struct ic_device *d;
 	int len, ext_len;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	/* Perform verifications before taking the lock.  */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 033c712c3a5..c519b8d30ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1124,7 +1124,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 	struct vif_device *v;
 	int ct;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_UNREGISTER)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index aa33a4a7a71..432ce9d1c11 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -477,7 +477,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* Drop any packets associated with the downed device */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 84c26dd27d8..0841aefaa50 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
 {
 	const struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_DOWN) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0b41aa2675f..095bc453ff4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -935,7 +935,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 	struct mif_device *v;
 	int ct;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_UNREGISTER)
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 1b8815f6153..5859c046cbc 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -480,7 +480,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* Drop any packets associated with the downed device */
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 70940b3654a..f82f6074cf8 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -214,7 +214,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
 	if (!idev || !idev->dev)
 		return -EINVAL;
 
-	if (dev_net(idev->dev) != &init_net)
+	if (!net_eq(dev_net(idev->dev), &init_net))
 		return 0;
 
 	if (!proc_net_devsnmp6)
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 81ae8735f5e..b6e70f92e7f 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -335,7 +335,7 @@ static int ipxitf_device_event(struct notifier_block *notifier,
 	struct net_device *dev = ptr;
 	struct ipx_interface *i, *tmp;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_DOWN && event != NETDEV_UP)
@@ -1636,7 +1636,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
 	u16 ipx_pktsize;
 	int rc = 0;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	/* Not ours */
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 90894534f3c..f17b65af9c9 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -1326,7 +1326,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
 	int command;
 	__u8 control;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto out;
 
 	/* FIXME: should we get our own field? */
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 1c45f172991..57ad974e4d9 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -150,7 +150,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	int (*rcv)(struct sk_buff *, struct net_device *,
 		   struct packet_type *, struct net_device *);
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	/*
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 69d699f95f4..01489681fa9 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -65,7 +65,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf,
 {
 	struct nf_sockopt_ops *ops;
 
-	if (sock_net(sk) != &init_net)
+	if (!net_eq(sock_net(sk), &init_net))
 		return ERR_PTR(-ENOPROTOOPT);
 
 	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 04e9c965f8c..8c860112ce0 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -555,7 +555,7 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* Drop any packets associated with the downed device */
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 56f80872924..921c118ead8 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -954,7 +954,7 @@ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this,
 	struct net_device *dev = ptr;
 	struct netlbl_unlhsh_iface *iface = NULL;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	/* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index d41be0d66eb..fccc250f95f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -118,7 +118,7 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi
 {
 	struct net_device *dev = (struct net_device *)ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_DOWN)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index de73bcb5235..d56cae112dc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1677,7 +1677,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 		case SIOCGIFDSTADDR:
 		case SIOCSIFDSTADDR:
 		case SIOCSIFFLAGS:
-			if (sock_net(sk) != &init_net)
+			if (!net_eq(sock_net(sk), &init_net))
 				return -ENOIOCTLCMD;
 			return inet_dgram_ops.ioctl(sock, cmd, arg);
 #endif
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f3a691f3490..dbc963b4f5f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -209,7 +209,7 @@ static int rose_device_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = (struct net_device *)ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_DOWN)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ed9acffe810..a6e0818bcff 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -668,7 +668,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 	struct sctp_sockaddr_entry *temp;
 	int found = 0;
 
-	if (dev_net(ifa->ifa_dev->dev) != &init_net)
+	if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net))
 		return NOTIFY_DONE;
 
 	switch (ev) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index bc72fbc4f8b..fe43ef7dd7e 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -101,7 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 	struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
 	u32 size;
 
-	if (dev_net(dev) != &init_net) {
+	if (!net_eq(dev_net(dev), &init_net)) {
 		kfree_skb(buf);
 		return 0;
 	}
@@ -198,7 +198,7 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
 	struct eth_bearer *eb_ptr = &eth_bearers[0];
 	struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	while ((eb_ptr->dev != dev)) {
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 273a8435999..df5b3886c36 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1299,7 +1299,7 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 	struct sk_buff *skb;
 	int err;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 7b1c6ef0455..9fc5b023d11 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -191,7 +191,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = ptr;
 	struct x25_neigh *nb;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	if (dev->type == ARPHRD_X25
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 3ff206c0ae9..3e1efe53464 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -95,7 +95,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
 	struct sk_buff *nskb;
 	struct x25_neigh *nb;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
 
 	nskb = skb_copy(skb, GFP_ATOMIC);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cae9fd81554..841b32a2e68 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2360,7 +2360,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
+	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
 	switch (event) {
-- 
cgit v1.2.3-70-g09d2


From 53b7997fd5c62408d10b9aafb38974ce90fd2356 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 19 Jul 2008 22:35:03 -0700
Subject: ipv6 netns: Make several "global" sysctl variables namespace aware.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c      |  8 +++++---
 net/ipv6/anycast.c       |  2 +-
 net/ipv6/exthdrs.c       |  2 +-
 net/ipv6/ip6_input.c     |  2 +-
 net/ipv6/ip6_output.c    |  4 ++--
 net/ipv6/ipv6_sockglue.c |  2 +-
 net/ipv6/mcast.c         |  2 +-
 net/ipv6/ndisc.c         | 11 +++++++----
 net/ipv6/route.c         |  4 ++--
 9 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 30184e0dd74..580ae506c39 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1863,6 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 		struct inet6_ifaddr * ifp;
 		struct in6_addr addr;
 		int create = 0, update_lft = 0;
+		struct net *net = dev_net(dev);
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
@@ -1881,7 +1882,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 
 ok:
 
-		ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1);
+		ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
@@ -1889,7 +1890,7 @@ ok:
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 			if (in6_dev->cnf.optimistic_dad &&
-			    !ipv6_devconf.forwarding)
+			    !net->ipv6.devconf_all->forwarding)
 				addr_flags = IFA_F_OPTIMISTIC;
 #endif
 
@@ -2314,11 +2315,12 @@ static void init_loopback(struct net_device *dev)
 static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
 {
 	struct inet6_ifaddr * ifp;
+	struct net *net = dev_net(idev->dev);
 	u32 addr_flags = IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if (idev->cnf.optimistic_dad &&
-	    !ipv6_devconf.forwarding)
+	    !net->ipv6.devconf_all->forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e1b29fabdf..8336cd81cb4 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -60,7 +60,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	struct inet6_dev *idev;
 	struct ipv6_ac_socklist *pac;
 	struct net *net = sock_net(sk);
-	int	ishost = !ipv6_devconf.forwarding;
+	int	ishost = !net->ipv6.devconf_all->forwarding;
 	int	err = 0;
 
 	if (!capable(CAP_NET_ADMIN))
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 9f1084b4c0e..837c830d6d8 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -319,7 +319,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	int n, i;
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
-	int accept_source_route = ipv6_devconf.accept_source_route;
+	int accept_source_route = dev_net(skb->dev)->ipv6.devconf_all->accept_source_route;
 
 	idev = in6_dev_get(skb->dev);
 	if (idev) {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index ea81c614dde..7e14cccd056 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -249,7 +249,7 @@ int ip6_mc_input(struct sk_buff *skb)
 	/*
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
-	if (ipv6_devconf.mc_forwarding &&
+	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
 		/*
 		 * Okay, we try to forward - split and duplicate
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0981c1ef305..6407c64ea4a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -411,7 +411,7 @@ int ip6_forward(struct sk_buff *skb)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct net *net = dev_net(dst->dev);
 
-	if (ipv6_devconf.forwarding == 0)
+	if (net->ipv6.devconf_all->forwarding == 0)
 		goto error;
 
 	if (skb_warn_if_lro(skb))
@@ -458,7 +458,7 @@ int ip6_forward(struct sk_buff *skb)
 	}
 
 	/* XXX: idev->cnf.proxy_ndp? */
-	if (ipv6_devconf.proxy_ndp &&
+	if (net->ipv6.devconf_all->proxy_ndp &&
 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 		int proxied = ip6_forward_proxy_check(skb);
 		if (proxied > 0)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8c6ea07f4d5..ea33b26512c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1038,7 +1038,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			dst_release(dst);
 		}
 		if (val < 0)
-			val = ipv6_devconf.hop_limit;
+			val = sock_net(sk)->ipv6.devconf_all->hop_limit;
 		break;
 	}
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bd2fe4cfafa..1b285371124 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -151,7 +151,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 #define IGMP6_UNSOLICITED_IVAL	(10*HZ)
 #define MLD_QRV_DEFAULT		2
 
-#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
 		(idev)->cnf.force_mld_version == 1 || \
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 282fdb31f8e..beb48e3f038 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -784,15 +784,17 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 		idev = ifp->idev;
 	} else {
+		struct net *net = dev_net(dev);
+
 		idev = in6_dev_get(dev);
 		if (!idev) {
 			/* XXX: count this drop? */
 			return;
 		}
 
-		if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
+		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
 		    (idev->cnf.forwarding &&
-		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
+		     (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
 		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
@@ -921,6 +923,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 
 	if (neigh) {
 		u8 old_flags = neigh->flags;
+		struct net *net = dev_net(dev);
 
 		if (neigh->nud_state & NUD_FAILED)
 			goto out;
@@ -931,8 +934,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		 * has already sent a NA to us.
 		 */
 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
-		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
-		    pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
+		    net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+		    pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
 			/* XXX: idev->cnf.prixy_ndp */
 			goto out;
 		}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cb8a51271b6..615b328de25 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -676,7 +676,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	int strict = 0;
 	int attempts = 3;
 	int err;
-	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
+	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
 
@@ -1058,7 +1058,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
 			hoplimit = idev->cnf.hop_limit;
 			in6_dev_put(idev);
 		} else
-			hoplimit = ipv6_devconf.hop_limit;
+			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
 	}
 	return hoplimit;
 }
-- 
cgit v1.2.3-70-g09d2


From 230b183921ecbaa5fedc0d35ad6ba7bb64b6e06a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 19 Jul 2008 22:35:47 -0700
Subject: net: Use standard structures for generic socket address structures.

Use sockaddr_storage{} for generic socket address storage
and ensures proper alignment.
Use sockaddr{} for pointers to omit several casts.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h |  6 ++--
 include/net/compat.h   |  2 +-
 net/compat.c           |  2 +-
 net/core/iovec.c       |  2 +-
 net/socket.c           | 82 ++++++++++++++++++++++++++++----------------------
 5 files changed, 52 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index bd2b30a74e7..950af631e7f 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -306,10 +306,10 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  int offset, 
 					  unsigned int len, __wsum *csump);
 
-extern int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode);
+extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
-extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
-extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
+extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen);
+extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
 #endif
diff --git a/include/net/compat.h b/include/net/compat.h
index 164cb682e22..5bbf8bf9efe 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -32,7 +32,7 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 #endif /* defined(CONFIG_COMPAT) */
 
 extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
-extern int verify_compat_iovec(struct msghdr *, struct iovec *, char *, int);
+extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
 extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
diff --git a/net/compat.c b/net/compat.c
index c823f6f290c..6e1b03b5193 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -75,7 +75,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
 
 /* I've named the args so it is easy to tell whose space the pointers are in. */
 int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
-		   char *kern_address, int mode)
+		   struct sockaddr *kern_address, int mode)
 {
 	int tot_len;
 
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 755c37fdaee..4c9c0121c9d 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -36,7 +36,7 @@
  *	in any case.
  */
 
-int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
 	int size, err, ct;
 
diff --git a/net/socket.c b/net/socket.c
index 81fe8251304..1ba57d88898 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -180,9 +180,9 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
  *	invalid addresses -EFAULT is returned. On a success 0 is returned.
  */
 
-int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
+int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
 {
-	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
+	if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 		return -EINVAL;
 	if (ulen == 0)
 		return 0;
@@ -208,7 +208,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
  *	specified. Zero is returned for a success.
  */
 
-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
 		      int __user *ulen)
 {
 	int err;
@@ -219,7 +219,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
 		return err;
 	if (len > klen)
 		len = klen;
-	if (len < 0 || len > MAX_SOCK_ADDR)
+	if (len < 0 || len > sizeof(struct sockaddr_storage))
 		return -EINVAL;
 	if (len) {
 		if (audit_sockaddr(klen, kaddr))
@@ -1342,20 +1342,20 @@ out_fd:
 asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
 {
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock) {
-		err = move_addr_to_kernel(umyaddr, addrlen, address);
+		err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
 		if (err >= 0) {
 			err = security_socket_bind(sock,
-						   (struct sockaddr *)address,
+						   (struct sockaddr *)&address,
 						   addrlen);
 			if (!err)
 				err = sock->ops->bind(sock,
 						      (struct sockaddr *)
-						      address, addrlen);
+						      &address, addrlen);
 		}
 		fput_light(sock->file, fput_needed);
 	}
@@ -1407,7 +1407,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	struct socket *sock, *newsock;
 	struct file *newfile;
 	int err, len, newfd, fput_needed;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1446,13 +1446,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
 					  &len, 2) < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
-		err = move_addr_to_user(address, len, upeer_sockaddr,
-					upeer_addrlen);
+		err = move_addr_to_user((struct sockaddr *)&address,
+					len, upeer_sockaddr, upeer_addrlen);
 		if (err < 0)
 			goto out_fd;
 	}
@@ -1495,22 +1495,22 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
 			    int addrlen)
 {
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
-	err = move_addr_to_kernel(uservaddr, addrlen, address);
+	err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
 	if (err < 0)
 		goto out_put;
 
 	err =
-	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
 	if (err)
 		goto out_put;
 
-	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
+	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
 				 sock->file->f_flags);
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1527,7 +1527,7 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
 				int __user *usockaddr_len)
 {
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int len, err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1538,10 +1538,10 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
 	if (err)
 		goto out_put;
 
-	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+	err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
 	if (err)
 		goto out_put;
-	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+	err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
 
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1558,7 +1558,7 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
 				int __user *usockaddr_len)
 {
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int len, err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1570,10 +1570,10 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
 		}
 
 		err =
-		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
+		    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
 				       1);
 		if (!err)
-			err = move_addr_to_user(address, len, usockaddr,
+			err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
 						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
@@ -1591,7 +1591,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
 			   int addr_len)
 {
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int err;
 	struct msghdr msg;
 	struct iovec iov;
@@ -1610,10 +1610,10 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
 	msg.msg_controllen = 0;
 	msg.msg_namelen = 0;
 	if (addr) {
-		err = move_addr_to_kernel(addr, addr_len, address);
+		err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
 		if (err < 0)
 			goto out_put;
-		msg.msg_name = address;
+		msg.msg_name = (struct sockaddr *)&address;
 		msg.msg_namelen = addr_len;
 	}
 	if (sock->file->f_flags & O_NONBLOCK)
@@ -1649,7 +1649,7 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
 	struct socket *sock;
 	struct iovec iov;
 	struct msghdr msg;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	int err, err2;
 	int fput_needed;
 
@@ -1663,14 +1663,15 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
 	msg.msg_iov = &iov;
 	iov.iov_len = size;
 	iov.iov_base = ubuf;
-	msg.msg_name = address;
-	msg.msg_namelen = MAX_SOCK_ADDR;
+	msg.msg_name = (struct sockaddr *)&address;
+	msg.msg_namelen = sizeof(address);
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg, size, flags);
 
 	if (err >= 0 && addr != NULL) {
-		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		err2 = move_addr_to_user((struct sockaddr *)&address,
+					 msg.msg_namelen, addr, addr_len);
 		if (err2 < 0)
 			err = err2;
 	}
@@ -1790,7 +1791,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 	struct compat_msghdr __user *msg_compat =
 	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
-	char address[MAX_SOCK_ADDR];
+	struct sockaddr_storage address;
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
@@ -1828,9 +1829,13 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 
 	/* This will also move the address data into kernel space */
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
+		err = verify_compat_iovec(&msg_sys, iov,
+					  (struct sockaddr *)&address,
+					  VERIFY_READ);
 	} else
-		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
+		err = verify_iovec(&msg_sys, iov,
+				   (struct sockaddr *)&address,
+				   VERIFY_READ);
 	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
@@ -1901,7 +1906,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
 	int fput_needed;
 
 	/* kernel mode address */
-	char addr[MAX_SOCK_ADDR];
+	struct sockaddr_storage addr;
 
 	/* user mode address pointers */
 	struct sockaddr __user *uaddr;
@@ -1939,9 +1944,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
 	uaddr = (__force void __user *)msg_sys.msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+		err = verify_compat_iovec(&msg_sys, iov,
+					  (struct sockaddr *)&addr,
+					  VERIFY_WRITE);
 	} else
-		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+		err = verify_iovec(&msg_sys, iov,
+				   (struct sockaddr *)&addr,
+				   VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
@@ -1957,7 +1966,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
 	len = err;
 
 	if (uaddr != NULL) {
-		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+		err = move_addr_to_user((struct sockaddr *)&addr,
+					msg_sys.msg_namelen, uaddr,
 					uaddr_len);
 		if (err < 0)
 			goto out_freeiov;
-- 
cgit v1.2.3-70-g09d2


From a6ffb404dc03f806a257faaab831a6cb55c0b790 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 19 Jul 2008 22:36:07 -0700
Subject: ipv6 mcast: Omit redundant address family checks in ip6_mc_source().

The caller has alredy checked for them.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1b285371124..e7c03bcc278 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -367,10 +367,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	int pmclocked = 0;
 	int err;
 
-	if (pgsr->gsr_group.ss_family != AF_INET6 ||
-	    pgsr->gsr_source.ss_family != AF_INET6)
-		return -EINVAL;
-
 	source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
 	group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
 
-- 
cgit v1.2.3-70-g09d2


From 5f86173bdf15981ca49d0434f638b68f70a35644 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 20 Jul 2008 00:08:04 -0700
Subject: net_sched: Add qdisc_enqueue wrapper

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 10 ++++++++++
 net/core/dev.c            |  4 ++--
 net/mac80211/wme.c        |  2 +-
 net/sched/sch_atm.c       |  2 +-
 net/sched/sch_cbq.c       |  5 +++--
 net/sched/sch_dsmark.c    |  2 +-
 net/sched/sch_hfsc.c      |  2 +-
 net/sched/sch_htb.c       |  3 +--
 net/sched/sch_netem.c     | 20 ++++++++++++--------
 net/sched/sch_prio.c      |  3 ++-
 net/sched/sch_red.c       |  2 +-
 net/sched/sch_tbf.c       |  3 ++-
 12 files changed, 37 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8a44386b35c..f396dff335a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -306,6 +306,16 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 	return true;
 }
 
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	return sch->enqueue(skb, sch);
+}
+
+static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
+{
+	return qdisc_enqueue(skb, sch);
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 065b9817e20..2eed17bcb2d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1781,7 +1781,7 @@ gso:
 
 		spin_lock(root_lock);
 
-		rc = q->enqueue(skb, q);
+		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 
 		spin_unlock(root_lock);
@@ -2083,7 +2083,7 @@ static int ing_filter(struct sk_buff *skb)
 	q = rxq->qdisc;
 	if (q) {
 		spin_lock(qdisc_lock(q));
-		result = q->enqueue(skb, q);
+		result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
 	}
 
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 6e8099e7704..07edda0b8a5 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -289,7 +289,7 @@ void ieee80211_requeue(struct ieee80211_local *local, int queue)
 		root_lock = qdisc_root_lock(qdisc);
 
 		spin_lock(root_lock);
-		qdisc->enqueue(skb, qdisc);
+		qdisc_enqueue_root(skb, qdisc);
 		spin_unlock(root_lock);
 	}
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 0de757e3be4..68ed35e2a76 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -429,7 +429,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #endif
 	}
 
-	ret = flow->q->enqueue(skb, flow->q);
+	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
 		sch->qstats.drops++;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index a3953bbe2d7..1afe3eece62 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -387,7 +387,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #ifdef CONFIG_NET_CLS_ACT
 	cl->q->__parent = sch;
 #endif
-	if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+	ret = qdisc_enqueue(skb, cl->q);
+	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->bstats.packets++;
 		sch->bstats.bytes+=len;
@@ -671,7 +672,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		q->rx_class = cl;
 		cl->q->__parent = sch;
 
-		if (cl->q->enqueue(skb, cl->q) == 0) {
+		if (qdisc_enqueue(skb, cl->q) == 0) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
 			sch->bstats.bytes+=len;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3aafbd17393..44d347e831c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -252,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 
-	err = p->q->enqueue(skb, p->q);
+	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
 		sch->qstats.drops++;
 		return err;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5090708ba38..fd61ed6ee1e 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1586,7 +1586,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	len = skb->len;
-	err = cl->qdisc->enqueue(skb, cl->qdisc);
+	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		cl->qstats.drops++;
 		sch->qstats.drops++;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ee48457eaa4..72b5a946178 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -572,8 +572,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
-		   NET_XMIT_SUCCESS) {
+	} else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
 		sch->qstats.drops++;
 		cl->qstats.drops++;
 		return NET_XMIT_DROP;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c5ea40c9eb2..13c4821e42b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -82,6 +82,12 @@ struct netem_skb_cb {
 	psched_time_t	time_to_send;
 };
 
+static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb));
+	return (struct netem_skb_cb *)skb->cb;
+}
+
 /* init_crandom - initialize correlated random number generator
  * Use entropy source for initial seed.
  */
@@ -184,7 +190,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 		q->duplicate = 0;
 
-		rootq->enqueue(skb2, rootq);
+		qdisc_enqueue_root(skb2, rootq);
 		q->duplicate = dupsave;
 	}
 
@@ -205,7 +211,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 	}
 
-	cb = (struct netem_skb_cb *)skb->cb;
+	cb = netem_skb_cb(skb);
 	if (q->gap == 0 		/* not doing reordering */
 	    || q->counter < q->gap 	/* inside last reordering gap */
 	    || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -218,7 +224,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		now = psched_get_time();
 		cb->time_to_send = now + delay;
 		++q->counter;
-		ret = q->qdisc->enqueue(skb, q->qdisc);
+		ret = qdisc_enqueue(skb, q->qdisc);
 	} else {
 		/*
 		 * Do re-ordering by putting one out of N packets at the front
@@ -277,8 +283,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 
 	skb = q->qdisc->dequeue(q->qdisc);
 	if (skb) {
-		const struct netem_skb_cb *cb
-			= (const struct netem_skb_cb *)skb->cb;
+		const struct netem_skb_cb *cb = netem_skb_cb(skb);
 		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
@@ -457,7 +462,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct sk_buff_head *list = &sch->q;
-	psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
+	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
 	struct sk_buff *skb;
 
 	if (likely(skb_queue_len(list) < q->limit)) {
@@ -468,8 +473,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		}
 
 		skb_queue_reverse_walk(list, skb) {
-			const struct netem_skb_cb *cb
-				= (const struct netem_skb_cb *)skb->cb;
+			const struct netem_skb_cb *cb = netem_skb_cb(skb);
 
 			if (tnext >= cb->time_to_send)
 				break;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 536ca474dc6..d29c2f87fc0 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -81,7 +81,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 #endif
 
-	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
 		sch->bstats.bytes += skb->len;
 		sch->bstats.packets++;
 		sch->q.qlen++;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 77098acf0ad..b48a391bc12 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -92,7 +92,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			break;
 	}
 
-	ret = child->enqueue(skb, child);
+	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->bstats.bytes += skb->len;
 		sch->bstats.packets++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 444c227fcb6..7d705b86dae 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -133,7 +133,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		return NET_XMIT_DROP;
 	}
 
-	if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
+	ret = qdisc_enqueue(skb, q->qdisc);
+	if (ret != 0) {
 		sch->qstats.drops++;
 		return ret;
 	}
-- 
cgit v1.2.3-70-g09d2


From 0abf77e55a2459aa9905be4b226e4729d5b4f0cb Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 20 Jul 2008 00:08:27 -0700
Subject: net_sched: Add accessor function for packet length for qdiscs

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 17 +++++++++++------
 net/sched/act_gact.c      |  2 +-
 net/sched/act_ipt.c       |  2 +-
 net/sched/act_mirred.c    |  4 ++--
 net/sched/act_nat.c       |  2 +-
 net/sched/act_pedit.c     |  2 +-
 net/sched/act_police.c    |  8 ++++----
 net/sched/act_simple.c    |  2 +-
 net/sched/sch_atm.c       |  4 ++--
 net/sched/sch_cbq.c       | 14 ++++++--------
 net/sched/sch_dsmark.c    |  2 +-
 net/sched/sch_fifo.c      |  2 +-
 net/sched/sch_gred.c      | 12 ++++++------
 net/sched/sch_hfsc.c      | 14 ++++++--------
 net/sched/sch_htb.c       |  9 +++++----
 net/sched/sch_ingress.c   |  2 +-
 net/sched/sch_netem.c     |  6 +++---
 net/sched/sch_prio.c      |  2 +-
 net/sched/sch_red.c       |  2 +-
 net/sched/sch_sfq.c       | 16 ++++++++--------
 net/sched/sch_tbf.c       |  6 +++---
 net/sched/sch_teql.c      |  6 +++---
 22 files changed, 69 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f396dff335a..8229520e088 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -306,6 +306,11 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 	return true;
 }
 
+static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
+{
+	return skb->len;
+}
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	return sch->enqueue(skb, sch);
@@ -320,8 +325,8 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
-	sch->qstats.backlog += skb->len;
-	sch->bstats.bytes += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 
 	return NET_XMIT_SUCCESS;
@@ -338,7 +343,7 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL))
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	return skb;
 }
@@ -354,7 +359,7 @@ static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue_tail(list);
 
 	if (likely(skb != NULL))
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	return skb;
 }
@@ -368,7 +373,7 @@ static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
 				  struct sk_buff_head *list)
 {
 	__skb_queue_head(list, skb);
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	sch->qstats.requeues++;
 
 	return NET_XMIT_SUCCESS;
@@ -401,7 +406,7 @@ static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
 	struct sk_buff *skb = __qdisc_dequeue_tail(sch, list);
 
 	if (likely(skb != NULL)) {
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		return len;
 	}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 422872c4f14..ac04289da5d 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 #else
 	action = gact->tcf_action;
 #endif
-	gact->tcf_bstats.bytes += skb->len;
+	gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	gact->tcf_bstats.packets++;
 	if (action == TC_ACT_SHOT)
 		gact->tcf_qstats.drops++;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index da696fd3e34..d1263b3c96c 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -205,7 +205,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	spin_lock(&ipt->tcf_lock);
 
 	ipt->tcf_tm.lastuse = jiffies;
-	ipt->tcf_bstats.bytes += skb->len;
+	ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	ipt->tcf_bstats.packets++;
 
 	/* yes, we have to worry about both in and out dev
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1aff005d95c..70341c020b6 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -164,7 +164,7 @@ bad_mirred:
 		if (skb2 != NULL)
 			kfree_skb(skb2);
 		m->tcf_qstats.overlimits++;
-		m->tcf_bstats.bytes += skb->len;
+		m->tcf_bstats.bytes += qdisc_pkt_len(skb);
 		m->tcf_bstats.packets++;
 		spin_unlock(&m->tcf_lock);
 		/* should we be asking for packet to be dropped?
@@ -184,7 +184,7 @@ bad_mirred:
 		goto bad_mirred;
 	}
 
-	m->tcf_bstats.bytes += skb2->len;
+	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS))
 		if (m->tcfm_ok_push)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 0a3c8339767..7b39ed485bc 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	egress = p->flags & TCA_NAT_FLAG_EGRESS;
 	action = p->tcf_action;
 
-	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	p->tcf_bstats.packets++;
 
 	spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3cc4cb9e500..d5f4e340486 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 bad:
 	p->tcf_qstats.overlimits++;
 done:
-	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	p->tcf_bstats.packets++;
 	spin_unlock(&p->tcf_lock);
 	return p->tcf_action;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0898120bbcc..32c3f9d9fb7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&police->tcf_lock);
 
-	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	police->tcf_bstats.packets++;
 
 	if (police->tcfp_ewma_rate &&
@@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 		return police->tcf_action;
 	}
 
-	if (skb->len <= police->tcfp_mtu) {
+	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
 		if (police->tcfp_R_tab == NULL) {
 			spin_unlock(&police->tcf_lock);
 			return police->tcfp_result;
@@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
 				ptoks = (long)L2T_P(police, police->tcfp_mtu);
-			ptoks -= L2T_P(police, skb->len);
+			ptoks -= L2T_P(police, qdisc_pkt_len(skb));
 		}
 		toks += police->tcfp_toks;
 		if (toks > (long)police->tcfp_burst)
 			toks = police->tcfp_burst;
-		toks -= L2T(police, skb->len);
+		toks -= L2T(police, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1d421d059ca..e7851ce92cf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -41,7 +41,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 
 	spin_lock(&d->tcf_lock);
 	d->tcf_tm.lastuse = jiffies;
-	d->tcf_bstats.bytes += skb->len;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
 	d->tcf_bstats.packets++;
 
 	/* print policy string followed by _ then packet count
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 68ed35e2a76..04faa835be1 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -437,9 +437,9 @@ drop: __maybe_unused
 			flow->qstats.drops++;
 		return ret;
 	}
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
-	flow->bstats.bytes += skb->len;
+	flow->bstats.bytes += qdisc_pkt_len(skb);
 	flow->bstats.packets++;
 	/*
 	 * Okay, this may seem weird. We pretend we've dropped the packet if
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1afe3eece62..f1d2f8ec8b4 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -370,7 +370,6 @@ static int
 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	int len = skb->len;
 	int uninitialized_var(ret);
 	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 
@@ -391,7 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->bstats.packets++;
-		sch->bstats.bytes+=len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		cbq_mark_toplevel(q, cl);
 		if (!cl->next_alive)
 			cbq_activate_class(cl);
@@ -658,7 +657,6 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 #ifdef CONFIG_NET_CLS_ACT
 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 {
-	int len = skb->len;
 	struct Qdisc *sch = child->__parent;
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = q->rx_class;
@@ -675,7 +673,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		if (qdisc_enqueue(skb, cl->q) == 0) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
-			sch->bstats.bytes+=len;
+			sch->bstats.bytes += qdisc_pkt_len(skb);
 			if (!cl->next_alive)
 				cbq_activate_class(cl);
 			return 0;
@@ -881,7 +879,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 			if (skb == NULL)
 				goto skip_class;
 
-			cl->deficit -= skb->len;
+			cl->deficit -= qdisc_pkt_len(skb);
 			q->tx_class = cl;
 			q->tx_borrowed = borrow;
 			if (borrow != cl) {
@@ -889,11 +887,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 				borrow->xstats.borrows++;
 				cl->xstats.borrows++;
 #else
-				borrow->xstats.borrows += skb->len;
-				cl->xstats.borrows += skb->len;
+				borrow->xstats.borrows += qdisc_pkt_len(skb);
+				cl->xstats.borrows += qdisc_pkt_len(skb);
 #endif
 			}
-			q->tx_len = skb->len;
+			q->tx_len = qdisc_pkt_len(skb);
 
 			if (cl->deficit <= 0) {
 				q->active[prio] = cl;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 44d347e831c..a935676987e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -258,7 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 	sch->q.qlen++;
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 1d97fa42c90..23d258bfe8a 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 
-	if (likely(sch->qstats.backlog + skb->len <= q->limit))
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_reshape_fail(skb, sch);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 39fa28511f0..c1ad6b8de10 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	q->packetsin++;
-	q->bytesin += skb->len;
+	q->bytesin += qdisc_pkt_len(skb);
 
 	if (gred_wred_mode(t))
 		gred_load_wred_set(t, q);
@@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			break;
 	}
 
-	if (q->backlog + skb->len <= q->limit) {
-		q->backlog += skb->len;
+	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+		q->backlog += qdisc_pkt_len(skb);
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
@@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	} else {
 		if (red_is_idling(&q->parms))
 			red_end_of_idle_period(&q->parms);
-		q->backlog += skb->len;
+		q->backlog += qdisc_pkt_len(skb);
 	}
 
 	return qdisc_requeue(skb, sch);
@@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 				       "VQ 0x%x after dequeue, screwing up "
 				       "backlog.\n", tc_index_to_dp(skb));
 		} else {
-			q->backlog -= skb->len;
+			q->backlog -= qdisc_pkt_len(skb);
 
 			if (!q->backlog && !gred_wred_mode(t))
 				red_start_of_idle_period(&q->parms);
@@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 	skb = qdisc_dequeue_tail(sch);
 	if (skb) {
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 		struct gred_sched_data *q;
 		u16 dp = tc_index_to_dp(skb);
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fd61ed6ee1e..0ae7d19dcba 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch)
 			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
 		return 0;
 	}
-	len = skb->len;
+	len = qdisc_pkt_len(skb);
 	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
 		if (net_ratelimit())
 			printk("qdisc_peek_len: failed to requeue\n");
@@ -1574,7 +1574,6 @@ static int
 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct hfsc_class *cl;
-	unsigned int len;
 	int err;
 
 	cl = hfsc_classify(skb, sch, &err);
@@ -1585,7 +1584,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	len = skb->len;
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		cl->qstats.drops++;
@@ -1594,12 +1592,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	if (cl->qdisc->q.qlen == 1)
-		set_active(cl, len);
+		set_active(cl, qdisc_pkt_len(skb));
 
 	cl->bstats.packets++;
-	cl->bstats.bytes += len;
+	cl->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
-	sch->bstats.bytes += len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -1649,9 +1647,9 @@ hfsc_dequeue(struct Qdisc *sch)
 		return NULL;
 	}
 
-	update_vf(cl, skb->len, cur_time);
+	update_vf(cl, qdisc_pkt_len(skb), cur_time);
 	if (realtime)
-		cl->cl_cumul += skb->len;
+		cl->cl_cumul += qdisc_pkt_len(skb);
 
 	if (cl->qdisc->q.qlen != 0) {
 		if (cl->cl_flags & HFSC_RSC) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 72b5a946178..30c999c61b0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -579,13 +579,13 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	} else {
 		cl->bstats.packets +=
 			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-		cl->bstats.bytes += skb->len;
+		cl->bstats.bytes += qdisc_pkt_len(skb);
 		htb_activate(q, cl);
 	}
 
 	sch->q.qlen++;
 	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -642,7 +642,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 			     int level, struct sk_buff *skb)
 {
-	int bytes = skb->len;
+	int bytes = qdisc_pkt_len(skb);
 	long toks, diff;
 	enum htb_cmode old_mode;
 
@@ -855,7 +855,8 @@ next:
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
-		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+		if (cl->un.leaf.deficit[level] < 0) {
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
 			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 					  ptr[0]) + prio);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 956c80ad596..4a2b7737435 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	result = tc_classify(skb, p->filter_list, &res);
 
 	sch->bstats.packets++;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	switch (result) {
 	case TC_ACT_SHOT:
 		result = TC_ACT_SHOT;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 13c4821e42b..ae49be00022 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -237,7 +237,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 	} else
 		sch->qstats.drops++;
@@ -481,8 +481,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 
 		__skb_queue_after(list, skb, nskb);
 
-		sch->qstats.backlog += nskb->len;
-		sch->bstats.bytes += nskb->len;
+		sch->qstats.backlog += qdisc_pkt_len(nskb);
+		sch->bstats.bytes += qdisc_pkt_len(nskb);
 		sch->bstats.packets++;
 
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index d29c2f87fc0..f849243eb09 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b48a391bc12..3f2d1d7f3bb 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,7 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
 	} else {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8458f630fac..8589da66656 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 	if (d > 1) {
 		sfq_index x = q->dep[d + SFQ_DEPTH].next;
 		skb = q->qs[x].prev;
-		len = skb->len;
+		len = qdisc_pkt_len(skb);
 		__skb_unlink(skb, &q->qs[x]);
 		kfree_skb(skb);
 		sfq_dec(q, x);
@@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		q->next[q->tail] = q->next[d];
 		q->allot[q->next[d]] += q->quantum;
 		skb = q->qs[d].prev;
-		len = skb->len;
+		len = qdisc_pkt_len(skb);
 		__skb_unlink(skb, &q->qs[d]);
 		kfree_skb(skb);
 		sfq_dec(q, d);
@@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (q->qs[x].qlen >= q->limit)
 		return qdisc_drop(skb, sch);
 
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	__skb_queue_tail(&q->qs[x], skb);
 	sfq_inc(q, x);
 	if (q->qs[x].qlen == 1) {		/* The flow is new */
@@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 	if (++sch->q.qlen <= q->limit) {
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		q->hash[x] = hash;
 	}
 
-	sch->qstats.backlog += skb->len;
+	sch->qstats.backlog += qdisc_pkt_len(skb);
 	__skb_queue_head(&q->qs[x], skb);
 	/* If selected queue has length q->limit+1, this means that
 	 * all another queues are empty and we do simple tail drop.
@@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		skb = q->qs[x].prev;
 		__skb_unlink(skb, &q->qs[x]);
 		sch->qstats.drops++;
-		sch->qstats.backlog -= skb->len;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		return NET_XMIT_CN;
 	}
@@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch)
 	skb = __skb_dequeue(&q->qs[a]);
 	sfq_dec(q, a);
 	sch->q.qlen--;
-	sch->qstats.backlog -= skb->len;
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
@@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch)
 		}
 		q->next[q->tail] = a;
 		q->allot[a] += q->quantum;
-	} else if ((q->allot[a] -= skb->len) <= 0) {
+	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
 		q->tail = a;
 		a = q->next[a];
 		q->allot[a] += q->quantum;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7d705b86dae..b296672f763 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,7 +123,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
 
-	if (skb->len > q->max_size) {
+	if (qdisc_pkt_len(skb) > q->max_size) {
 		sch->qstats.drops++;
 #ifdef CONFIG_NET_CLS_ACT
 		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
@@ -140,7 +140,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	sch->q.qlen++;
-	sch->bstats.bytes += skb->len;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
 	sch->bstats.packets++;
 	return 0;
 }
@@ -181,7 +181,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		psched_time_t now;
 		long toks;
 		long ptoks = 0;
-		unsigned int len = skb->len;
+		unsigned int len = qdisc_pkt_len(skb);
 
 		now = psched_get_time();
 		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 8b0ff345f9d..537223642b6 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -83,7 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	if (q->q.qlen < dev->tx_queue_len) {
 		__skb_queue_tail(&q->q, skb);
-		sch->bstats.bytes += skb->len;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		return 0;
 	}
@@ -278,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct Qdisc *start, *q;
 	int busy;
 	int nores;
-	int len = skb->len;
 	int subq = skb_get_queue_mapping(skb);
 	struct sk_buff *skb_res = NULL;
 
@@ -313,7 +312,8 @@ restart:
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
-					master->stats.tx_bytes += len;
+					master->stats.tx_bytes +=
+						qdisc_pkt_len(skb);
 					return 0;
 				}
 				netif_tx_unlock(slave);
-- 
cgit v1.2.3-70-g09d2


From 175f9c1bba9b825d22b142d183c9e175488b260c Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 20 Jul 2008 00:08:47 -0700
Subject: net_sched: Add size table for qdiscs

Add size table functions for qdiscs and calculate packet size in
qdisc_enqueue().

Based on patch by Patrick McHardy
 http://marc.info/?l=linux-netdev&m=115201979221729&w=2

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  20 ++++++
 include/linux/rtnetlink.h |   1 +
 include/net/pkt_sched.h   |   1 +
 include/net/sch_generic.h |  25 +++++++-
 net/sched/sch_api.c       | 151 +++++++++++++++++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c   |   1 +
 net/sched/sch_netem.c     |   5 +-
 7 files changed, 199 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 87f4e0fa8f2..e5de421ac7b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -85,6 +85,26 @@ struct tc_ratespec
 
 #define TC_RTAB_SIZE	1024
 
+struct tc_sizespec {
+	unsigned char	cell_log;
+	unsigned char	size_log;
+	short		cell_align;
+	int		overhead;
+	unsigned int	linklayer;
+	unsigned int	mpu;
+	unsigned int	mtu;
+	unsigned int	tsize;
+};
+
+enum {
+	TCA_STAB_UNSPEC,
+	TCA_STAB_BASE,
+	TCA_STAB_DATA,
+	__TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
 /* FIFO section */
 
 struct tc_fifo_qopt
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index b358c704d10..f4d386c191f 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -482,6 +482,7 @@ enum
 	TCA_RATE,
 	TCA_FCNT,
 	TCA_STATS2,
+	TCA_STAB,
 	__TCA_MAX
 };
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index e4e30052e4e..6affcfaa123 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -83,6 +83,7 @@ extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 		struct nlattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
+extern void qdisc_put_stab(struct qdisc_size_table *tab);
 
 extern void __qdisc_run(struct Qdisc *q);
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8229520e088..db9ad655eb8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -29,6 +29,13 @@ enum qdisc_state_t
 	__QDISC_STATE_SCHED,
 };
 
+struct qdisc_size_table {
+	struct list_head	list;
+	struct tc_sizespec	szopts;
+	int			refcnt;
+	u16			data[];
+};
+
 struct Qdisc
 {
 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
@@ -39,6 +46,7 @@ struct Qdisc
 #define TCQ_F_INGRESS	4
 	int			padded;
 	struct Qdisc_ops	*ops;
+	struct qdisc_size_table	*stab;
 	u32			handle;
 	u32			parent;
 	atomic_t		refcnt;
@@ -165,6 +173,16 @@ struct tcf_proto
 	struct tcf_proto_ops	*ops;
 };
 
+struct qdisc_skb_cb {
+	unsigned int		pkt_len;
+	char			data[];
+};
+
+static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+{
+	return (struct qdisc_skb_cb *)skb->cb;
+}
+
 static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
 {
 	return &qdisc->q.lock;
@@ -257,6 +275,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct netdev_queue *dev_queue,
 				       struct Qdisc_ops *ops, u32 parentid);
+extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
+				   struct qdisc_size_table *stab);
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
@@ -308,16 +328,19 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 
 static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 {
-	return skb->len;
+	return qdisc_skb_cb(skb)->pkt_len;
 }
 
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
+	if (sch->stab)
+		qdisc_calculate_pkt_len(skb, sch->stab);
 	return sch->enqueue(skb, sch);
 }
 
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
 	return qdisc_enqueue(skb, sch);
 }
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fb43731c986..5219d5f9d75 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -286,6 +286,129 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 }
 EXPORT_SYMBOL(qdisc_put_rtab);
 
+static LIST_HEAD(qdisc_stab_list);
+static DEFINE_SPINLOCK(qdisc_stab_lock);
+
+static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
+	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
+	[TCA_STAB_DATA] = { .type = NLA_BINARY },
+};
+
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
+{
+	struct nlattr *tb[TCA_STAB_MAX + 1];
+	struct qdisc_size_table *stab;
+	struct tc_sizespec *s;
+	unsigned int tsize = 0;
+	u16 *tab = NULL;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+	if (err < 0)
+		return ERR_PTR(err);
+	if (!tb[TCA_STAB_BASE])
+		return ERR_PTR(-EINVAL);
+
+	s = nla_data(tb[TCA_STAB_BASE]);
+
+	if (s->tsize > 0) {
+		if (!tb[TCA_STAB_DATA])
+			return ERR_PTR(-EINVAL);
+		tab = nla_data(tb[TCA_STAB_DATA]);
+		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
+	}
+
+	if (!s || tsize != s->tsize || (!tab && tsize > 0))
+		return ERR_PTR(-EINVAL);
+
+	spin_lock(&qdisc_stab_lock);
+
+	list_for_each_entry(stab, &qdisc_stab_list, list) {
+		if (memcmp(&stab->szopts, s, sizeof(*s)))
+			continue;
+		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
+			continue;
+		stab->refcnt++;
+		spin_unlock(&qdisc_stab_lock);
+		return stab;
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+
+	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	stab->refcnt = 1;
+	stab->szopts = *s;
+	if (tsize > 0)
+		memcpy(stab->data, tab, tsize * sizeof(u16));
+
+	spin_lock(&qdisc_stab_lock);
+	list_add_tail(&stab->list, &qdisc_stab_list);
+	spin_unlock(&qdisc_stab_lock);
+
+	return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+	if (!tab)
+		return;
+
+	spin_lock(&qdisc_stab_lock);
+
+	if (--tab->refcnt == 0) {
+		list_del(&tab->list);
+		kfree(tab);
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
+static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_STAB);
+	NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+	nla_nest_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	int pkt_len, slot;
+
+	pkt_len = skb->len + stab->szopts.overhead;
+	if (unlikely(!stab->szopts.tsize))
+		goto out;
+
+	slot = pkt_len + stab->szopts.cell_align;
+	if (unlikely(slot < 0))
+		slot = 0;
+
+	slot >>= stab->szopts.cell_log;
+	if (likely(slot < stab->szopts.tsize))
+		pkt_len = stab->data[slot];
+	else
+		pkt_len = stab->data[stab->szopts.tsize - 1] *
+				(slot / stab->szopts.tsize) +
+				stab->data[slot % stab->szopts.tsize];
+
+	pkt_len <<= stab->szopts.size_log;
+out:
+	if (unlikely(pkt_len < 1))
+		pkt_len = 1;
+	qdisc_skb_cb(skb)->pkt_len = pkt_len;
+}
+EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+
 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -613,6 +736,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	struct nlattr *kind = tca[TCA_KIND];
 	struct Qdisc *sch;
 	struct Qdisc_ops *ops;
+	struct qdisc_size_table *stab;
 
 	ops = qdisc_lookup_ops(kind);
 #ifdef CONFIG_KMOD
@@ -670,6 +794,14 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	sch->handle = handle;
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+		if (tca[TCA_STAB]) {
+			stab = qdisc_get_stab(tca[TCA_STAB]);
+			if (IS_ERR(stab)) {
+				err = PTR_ERR(stab);
+				goto err_out3;
+			}
+			sch->stab = stab;
+		}
 		if (tca[TCA_RATE]) {
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
 						qdisc_root_lock(sch),
@@ -691,6 +823,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		return sch;
 	}
 err_out3:
+	qdisc_put_stab(sch->stab);
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);
 err_out2:
@@ -702,15 +835,26 @@ err_out:
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-	if (tca[TCA_OPTIONS]) {
-		int err;
+	struct qdisc_size_table *stab = NULL;
+	int err = 0;
 
+	if (tca[TCA_OPTIONS]) {
 		if (sch->ops->change == NULL)
 			return -EINVAL;
 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
 		if (err)
 			return err;
 	}
+
+	if (tca[TCA_STAB]) {
+		stab = qdisc_get_stab(tca[TCA_STAB]);
+		if (IS_ERR(stab))
+			return PTR_ERR(stab);
+	}
+
+	qdisc_put_stab(sch->stab);
+	sch->stab = stab;
+
 	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
 				      qdisc_root_lock(sch), tca[TCA_RATE]);
@@ -994,6 +1138,9 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
 
+	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+		goto nla_put_failure;
+
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
 		goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 522a41a9f90..27a51f04db4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -469,6 +469,7 @@ static void __qdisc_destroy(struct rcu_head *head)
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
 	const struct Qdisc_ops  *ops = qdisc->ops;
 
+	qdisc_put_stab(qdisc->stab);
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
 	if (ops->reset)
 		ops->reset(qdisc);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ae49be00022..a5908570067 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -84,8 +84,9 @@ struct netem_skb_cb {
 
 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb));
-	return (struct netem_skb_cb *)skb->cb;
+	BUILD_BUG_ON(sizeof(skb->cb) <
+		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
+	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
 /* init_crandom - initialize correlated random number generator
-- 
cgit v1.2.3-70-g09d2


From fb65a7c091529bfffb1262515252c0d0f6241c5c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 20 Jul 2008 10:18:44 -0700
Subject: iucv: Fix bad merging.

Noticed by Stephen Rothwell.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index a598c738484..265b1b289a3 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -523,12 +523,8 @@ static int iucv_enable(void)
 	rc = -EIO;
 	get_online_cpus();
 	for_each_online_cpu(cpu)
-<<<<<<< HEAD:net/iucv/iucv.c
-		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1);
-=======
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 	preempt_enable();
->>>>>>> 5b664cb235e97afbf34db9c4d77f08ebd725335e:net/iucv/iucv.c
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
@@ -551,13 +547,7 @@ out:
  */
 static void iucv_disable(void)
 {
-<<<<<<< HEAD:net/iucv/iucv.c
-	get_online_cpus();
-	on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1);
-	put_online_cpus();
-=======
 	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
->>>>>>> 5b664cb235e97afbf34db9c4d77f08ebd725335e:net/iucv/iucv.c
 	kfree(iucv_path_table);
 }
 
-- 
cgit v1.2.3-70-g09d2


From a352def21a642133758b868c71bee12ab34ad5c5 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Wed, 16 Jul 2008 21:53:12 +0100
Subject: tty: Ldisc revamp

Move the line disciplines towards a conventional ->ops arrangement.  For
the moment the actual 'tty_ldisc' struct in the tty is kept as part of
the tty struct but this can then be changed if it turns out that when it
all settles down we want to refcount ldiscs separately to the tty.

Pull the ldisc code out of /proc and put it with our ldisc code.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/bluetooth/hci_ldisc.c      |   6 +-
 drivers/char/cyclades.c            |   3 +-
 drivers/char/epca.c                |   4 +-
 drivers/char/ip2/i2lib.c           |   4 +-
 drivers/char/ip2/ip2main.c         |   7 +-
 drivers/char/n_hdlc.c              |   6 +-
 drivers/char/n_r3964.c             |   2 +-
 drivers/char/n_tty.c               |   2 +-
 drivers/char/pcmcia/synclink_cs.c  |   4 +-
 drivers/char/pty.c                 |  10 +-
 drivers/char/selection.c           |   3 +-
 drivers/char/synclink.c            |   4 +-
 drivers/char/synclink_gt.c         |   4 +-
 drivers/char/synclinkmp.c          |   4 +-
 drivers/char/tty_io.c              | 336 +++++++++++++++++++++++--------------
 drivers/char/tty_ioctl.c           |  16 +-
 drivers/input/serio/serport.c      |   2 +-
 drivers/isdn/capi/capi.c           |   4 +-
 drivers/isdn/gigaset/ser-gigaset.c |   2 +-
 drivers/net/hamradio/6pack.c       |   2 +-
 drivers/net/hamradio/mkiss.c       |   2 +-
 drivers/net/irda/irtty-sir.c       |   2 +-
 drivers/net/ppp_async.c            |   2 +-
 drivers/net/ppp_synctty.c          |   2 +-
 drivers/net/slip.c                 |   2 +-
 drivers/net/wan/pc300_tty.c        |   4 +-
 drivers/net/wan/x25_asy.c          |   2 +-
 fs/proc/proc_tty.c                 |  48 ------
 include/linux/tty.h                |   9 +-
 include/linux/tty_ldisc.h          |   7 +-
 net/bluetooth/rfcomm/tty.c         |  13 +-
 net/irda/ircomm/ircomm_tty.c       |  14 +-
 32 files changed, 286 insertions(+), 246 deletions(-)

(limited to 'net')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index e5cd856a2fe..69df187d74c 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -282,8 +282,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
 	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
 	   open path is before the ldisc is referencable */
 
-	if (tty->ldisc.flush_buffer)
-		tty->ldisc.flush_buffer(tty);
+	if (tty->ldisc.ops->flush_buffer)
+		tty->ldisc.ops->flush_buffer(tty);
 	tty_driver_flush_buffer(tty);
 
 	return 0;
@@ -514,7 +514,7 @@ static unsigned int hci_uart_tty_poll(struct tty_struct *tty,
 
 static int __init hci_uart_init(void)
 {
-	static struct tty_ldisc hci_uart_ldisc;
+	static struct tty_ldisc_ops hci_uart_ldisc;
 	int err;
 
 	BT_INFO("HCI UART driver ver %s", VERSION);
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 6bff9d87dc5..a957dbcc5a4 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5246,7 +5246,8 @@ cyclades_get_proc_info(char *buf, char **start, off_t offset, int length,
 					HZ, info->idle_stats.recv_bytes,
 					(cur_jifs - info->idle_stats.recv_idle)/
 					HZ, info->idle_stats.overruns,
-					(long)info->tty->ldisc.num);
+					/* FIXME: double check locking */
+					(long)info->tty->ldisc.ops->num);
 			else
 				size = sprintf(buf + len, "%3d %8lu %10lu %8lu "
 					"%10lu %8lu %9lu %6ld\n",
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 60a4df7dac1..aa8e19f44b4 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2262,8 +2262,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
 			tty_wait_until_sent(tty, 0);
 		} else {
 			/* ldisc lock already held in ioctl */
-			if (tty->ldisc.flush_buffer)
-				tty->ldisc.flush_buffer(tty);
+			if (tty->ldisc.ops->flush_buffer)
+				tty->ldisc.ops->flush_buffer(tty);
 		}
 		unlock_kernel();
 		/* Fall Thru */
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 938879cc7bc..0061e18aff6 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh)
 		amountToMove = count;
 	}
 	// Move the first block
-	pCh->pTTY->ldisc.receive_buf( pCh->pTTY, 
+	pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
 		 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
 	// If we needed to wrap, do the second data move
 	if (count > amountToMove) {
-		pCh->pTTY->ldisc.receive_buf( pCh->pTTY, 
+		pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
 		 pCh->Ibuf, NULL, count - amountToMove );
 	}
 	// Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 9a2394cda94..5dc74404058 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1289,11 +1289,12 @@ static void do_input(struct work_struct *work)
 // code duplicated from n_tty (ldisc)
 static inline void  isig(int sig, struct tty_struct *tty, int flush)
 {
+	/* FIXME: This is completely bogus */
 	if (tty->pgrp)
 		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
-		if ( tty->ldisc.flush_buffer )  
-			tty->ldisc.flush_buffer(tty);
+		if ( tty->ldisc.ops->flush_buffer )  
+			tty->ldisc.ops->flush_buffer(tty);
 		i2InputFlush( tty->driver_data );
 	}
 }
@@ -1342,7 +1343,7 @@ static void do_status(struct work_struct *work)
 		}
 		tmp = pCh->pTTY->real_raw;
 		pCh->pTTY->real_raw = 0;
-		pCh->pTTY->ldisc.receive_buf( pCh->pTTY, &brkc, &brkf, 1 );
+		pCh->pTTY->ldisc->ops.receive_buf( pCh->pTTY, &brkc, &brkf, 1 );
 		pCh->pTTY->real_raw = tmp;
 	}
 #endif /* NEVER_HAPPENS_AS_SETUP_XXX */
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index a35bfd7ee80..ed4e03333ab 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -199,7 +199,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty);
 #define tty2n_hdlc(tty)	((struct n_hdlc *) ((tty)->disc_data))
 #define n_hdlc2tty(n_hdlc)	((n_hdlc)->tty)
 
-static struct tty_ldisc n_hdlc_ldisc = {
+static struct tty_ldisc_ops n_hdlc_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "hdlc",
@@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty)
 #endif
 	
 	/* Flush any pending characters in the driver and discipline. */
-	if (tty->ldisc.flush_buffer)
-		tty->ldisc.flush_buffer(tty);
+	if (tty->ldisc.ops->flush_buffer)
+		tty->ldisc.ops->flush_buffer(tty);
 
 	tty_driver_flush_buffer(tty);
 		
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index 90216906233..ae377aa473b 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -143,7 +143,7 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		char *fp, int count);
 
-static struct tty_ldisc tty_ldisc_N_R3964 = {
+static struct tty_ldisc_ops tty_ldisc_N_R3964 = {
 	.owner = THIS_MODULE,
 	.magic = TTY_LDISC_MAGIC,
 	.name = "R3964",
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 8096389b0dc..708c2b1dbe5 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1573,7 +1573,7 @@ static unsigned int normal_poll(struct tty_struct *tty, struct file *file,
 	return mask;
 }
 
-struct tty_ldisc tty_ldisc_N_TTY = {
+struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.magic           = TTY_LDISC_MAGIC,
 	.name            = "n_tty",
 	.open            = n_tty_open,
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 1dd0e992c83..95743e2682f 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -514,8 +514,8 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 		return;
 	ld = tty_ldisc_ref(tty);
 	if (ld) {
-		if (ld->receive_buf)
-			ld->receive_buf(tty, data, flags, count);
+		if (ld->ops->receive_buf)
+			ld->ops->receive_buf(tty, data, flags, count);
 		tty_ldisc_deref(ld);
 	}
 }
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 0a05c038ae6..76b27932d22 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -111,7 +111,7 @@ static int pty_write(struct tty_struct * tty, const unsigned char *buf, int coun
 	c = to->receive_room;
 	if (c > count)
 		c = count;
-	to->ldisc.receive_buf(to, buf, NULL, c);
+	to->ldisc.ops->receive_buf(to, buf, NULL, c);
 	
 	return c;
 }
@@ -149,11 +149,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty)
 	int count;
 
 	/* We should get the line discipline lock for "tty->link" */
-	if (!to || !to->ldisc.chars_in_buffer)
+	if (!to || !to->ldisc.ops->chars_in_buffer)
 		return 0;
 
 	/* The ldisc must report 0 if no characters available to be read */
-	count = to->ldisc.chars_in_buffer(to);
+	count = to->ldisc.ops->chars_in_buffer(to);
 
 	if (tty->driver->subtype == PTY_TYPE_SLAVE) return count;
 
@@ -186,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty)
 	if (!to)
 		return;
 	
-	if (to->ldisc.flush_buffer)
-		to->ldisc.flush_buffer(to);
+	if (to->ldisc.ops->flush_buffer)
+		to->ldisc.ops->flush_buffer(to);
 	
 	if (to->packet) {
 		spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index d63f5ccc29e..2978a49a172 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,8 @@ int paste_selection(struct tty_struct *tty)
 		}
 		count = sel_buffer_lth - pasted;
 		count = min(count, tty->receive_room);
-		tty->ldisc.receive_buf(tty, sel_buffer + pasted, NULL, count);
+		tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+								NULL, count);
 		pasted += count;
 	}
 	remove_wait_queue(&vc->paste_wait, &wait);
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index ac5080df256..5e4b2e638d0 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -975,8 +975,8 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 		return;
 	ld = tty_ldisc_ref(tty);
 	if (ld) {
-		if (ld->receive_buf)
-			ld->receive_buf(tty, data, flags, count);
+		if (ld->ops->receive_buf)
+			ld->ops->receive_buf(tty, data, flags, count);
 		tty_ldisc_deref(ld);
 	}
 }
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 55c1653be00..e473778cd6f 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -641,8 +641,8 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 		return;
 	ld = tty_ldisc_ref(tty);
 	if (ld) {
-		if (ld->receive_buf)
-			ld->receive_buf(tty, data, flags, count);
+		if (ld->ops->receive_buf)
+			ld->ops->receive_buf(tty, data, flags, count);
 		tty_ldisc_deref(ld);
 	}
 }
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index bec54866e0b..5341b5aaf8b 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -712,8 +712,8 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 		return;
 	ld = tty_ldisc_ref(tty);
 	if (ld) {
-		if (ld->receive_buf)
-			ld->receive_buf(tty, data, flags, count);
+		if (ld->ops->receive_buf)
+			ld->ops->receive_buf(tty, data, flags, count);
 		tty_ldisc_deref(ld);
 	}
 }
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 047a17339f8..54c4ada460e 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -95,8 +95,9 @@
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/seq_file.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/system.h>
 
 #include <linux/kbd_kern.h>
@@ -682,7 +683,7 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 static DEFINE_SPINLOCK(tty_ldisc_lock);
 static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
 /* Line disc dispatch table */
-static struct tty_ldisc tty_ldiscs[NR_LDISCS];
+static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS];
 
 /**
  *	tty_register_ldisc	-	install a line discipline
@@ -697,7 +698,7 @@ static struct tty_ldisc tty_ldiscs[NR_LDISCS];
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -706,10 +707,9 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
 		return -EINVAL;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	tty_ldiscs[disc] = *new_ldisc;
-	tty_ldiscs[disc].num = disc;
-	tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
-	tty_ldiscs[disc].refcount = 0;
+	tty_ldiscs[disc] = new_ldisc;
+	new_ldisc->num = disc;
+	new_ldisc->refcount = 0;
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 
 	return ret;
@@ -737,19 +737,56 @@ int tty_unregister_ldisc(int disc)
 		return -EINVAL;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (tty_ldiscs[disc].refcount)
+	if (tty_ldiscs[disc]->refcount)
 		ret = -EBUSY;
 	else
-		tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED;
+		tty_ldiscs[disc] = NULL;
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 
 	return ret;
 }
 EXPORT_SYMBOL(tty_unregister_ldisc);
 
+
+/**
+ *	tty_ldisc_try_get	-	try and reference an ldisc
+ *	@disc: ldisc number
+ *	@ld: tty ldisc structure to complete
+ *
+ *	Attempt to open and lock a line discipline into place. Return
+ *	the line discipline refcounted and assigned in ld. On an error
+ *	report the error code back
+ */
+
+static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+{
+	unsigned long flags;
+	struct tty_ldisc_ops *ldops;
+	int err = -EINVAL;
+	
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	ld->ops = NULL;
+	ldops = tty_ldiscs[disc];
+	/* Check the entry is defined */
+	if (ldops) {
+		/* If the module is being unloaded we can't use it */
+		if (!try_module_get(ldops->owner))
+			err = -EAGAIN;
+		else {
+			/* lock it */
+			ldops->refcount++;
+			ld->ops = ldops;
+			err = 0;
+		}
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return err;
+}
+
 /**
  *	tty_ldisc_get		-	take a reference to an ldisc
  *	@disc: ldisc number
+ *	@ld: tty line discipline structure to use
  *
  *	Takes a reference to a line discipline. Deals with refcounts and
  *	module locking counts. Returns NULL if the discipline is not available.
@@ -760,32 +797,20 @@ EXPORT_SYMBOL(tty_unregister_ldisc);
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-struct tty_ldisc *tty_ldisc_get(int disc)
+static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
 {
-	unsigned long flags;
-	struct tty_ldisc *ld;
+	int err;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
-		return NULL;
-
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-
-	ld = &tty_ldiscs[disc];
-	/* Check the entry is defined */
-	if (ld->flags & LDISC_FLAG_DEFINED) {
-		/* If the module is being unloaded we can't use it */
-		if (!try_module_get(ld->owner))
-			ld = NULL;
-		else /* lock it */
-			ld->refcount++;
-	} else
-		ld = NULL;
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	return ld;
+		return -EINVAL;
+	err = tty_ldisc_try_get(disc, ld);
+	if (err == -EAGAIN) {
+		request_module("tty-ldisc-%d", disc);
+		err = tty_ldisc_try_get(disc, ld);
+	}
+	return err;
 }
 
-EXPORT_SYMBOL_GPL(tty_ldisc_get);
-
 /**
  *	tty_ldisc_put		-	drop ldisc reference
  *	@disc: ldisc number
@@ -797,22 +822,67 @@ EXPORT_SYMBOL_GPL(tty_ldisc_get);
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-void tty_ldisc_put(int disc)
+static void tty_ldisc_put(struct tty_ldisc_ops *ld)
 {
-	struct tty_ldisc *ld;
 	unsigned long flags;
+	int disc = ld->num;
 
 	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = &tty_ldiscs[disc];
+	ld = tty_ldiscs[disc];
 	BUG_ON(ld->refcount == 0);
 	ld->refcount--;
 	module_put(ld->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
 
-EXPORT_SYMBOL_GPL(tty_ldisc_put);
+static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
+{
+	return (*pos < NR_LDISCS) ? pos : NULL;
+}
+
+static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return (*pos < NR_LDISCS) ? pos : NULL;
+}
+
+static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
+{
+	int i = *(loff_t *)v;
+	struct tty_ldisc ld;
+	
+	if (tty_ldisc_get(i, &ld) < 0)
+		return 0;
+	seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
+	tty_ldisc_put(ld.ops);
+	return 0;
+}
+
+static const struct seq_operations tty_ldiscs_seq_ops = {
+	.start	= tty_ldiscs_seq_start,
+	.next	= tty_ldiscs_seq_next,
+	.stop	= tty_ldiscs_seq_stop,
+	.show	= tty_ldiscs_seq_show,
+};
+
+static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &tty_ldiscs_seq_ops);
+}
+
+const struct file_operations tty_ldiscs_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= proc_tty_ldiscs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
 
 /**
  *	tty_ldisc_assign	-	set ldisc on a tty
@@ -829,8 +899,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_put);
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
+	ld->refcount = 0;
 	tty->ldisc = *ld;
-	tty->ldisc.refcount = 0;
 }
 
 /**
@@ -953,6 +1023,41 @@ static void tty_ldisc_enable(struct tty_struct *tty)
 	wake_up(&tty_ldisc_wait);
 }
 
+/**
+ *	tty_ldisc_restore	-	helper for tty ldisc change
+ *	@tty: tty to recover
+ *	@old: previous ldisc
+ *
+ *	Restore the previous line discipline or N_TTY when a line discipline
+ *	change fails due to an open error
+ */
+
+static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
+{
+	char buf[64];
+	struct tty_ldisc new_ldisc;
+
+	/* There is an outstanding reference here so this is safe */
+	tty_ldisc_get(old->ops->num, old);
+	tty_ldisc_assign(tty, old);
+	tty_set_termios_ldisc(tty, old->ops->num);
+	if (old->ops->open && (old->ops->open(tty) < 0)) {
+		tty_ldisc_put(old->ops);
+		/* This driver is always present */
+		if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+			panic("n_tty: get");
+		tty_ldisc_assign(tty, &new_ldisc);
+		tty_set_termios_ldisc(tty, N_TTY);
+		if (new_ldisc.ops->open) {
+			int r = new_ldisc.ops->open(tty);
+				if (r < 0)
+				panic("Couldn't open N_TTY ldisc for "
+				      "%s --- error %d.",
+				      tty_name(tty, buf), r);
+		}
+	}
+}
+
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -967,28 +1072,18 @@ static void tty_ldisc_enable(struct tty_struct *tty)
 
 static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
-	int retval = 0;
-	struct tty_ldisc o_ldisc;
-	char buf[64];
+	int retval;
+	struct tty_ldisc o_ldisc, new_ldisc;
 	int work;
 	unsigned long flags;
-	struct tty_ldisc *ld;
 	struct tty_struct *o_tty;
 
-	if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
-		return -EINVAL;
-
 restart:
-
-	ld = tty_ldisc_get(ldisc);
-	/* Eduardo Blanco <ejbs@cs.cs.com.uy> */
-	/* Cyrus Durgin <cider@speakeasy.org> */
-	if (ld == NULL) {
-		request_module("tty-ldisc-%d", ldisc);
-		ld = tty_ldisc_get(ldisc);
-	}
-	if (ld == NULL)
-		return -EINVAL;
+	/* This is a bit ugly for now but means we can break the 'ldisc
+	   is part of the tty struct' assumption later */
+	retval = tty_ldisc_get(ldisc, &new_ldisc);
+	if (retval)
+		return retval;
 
 	/*
 	 *	Problem: What do we do if this blocks ?
@@ -996,8 +1091,8 @@ restart:
 
 	tty_wait_until_sent(tty, 0);
 
-	if (tty->ldisc.num == ldisc) {
-		tty_ldisc_put(ldisc);
+	if (tty->ldisc.ops->num == ldisc) {
+		tty_ldisc_put(new_ldisc.ops);
 		return 0;
 	}
 
@@ -1024,7 +1119,7 @@ restart:
 			/* Free the new ldisc we grabbed. Must drop the lock
 			   first. */
 			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(ldisc);
+			tty_ldisc_put(o_ldisc.ops);
 			/*
 			 * There are several reasons we may be busy, including
 			 * random momentary I/O traffic. We must therefore
@@ -1038,7 +1133,7 @@ restart:
 		}
 		if (o_tty && o_tty->ldisc.refcount) {
 			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(ldisc);
+			tty_ldisc_put(o_tty->ldisc.ops);
 			if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
 				return -ERESTARTSYS;
 			goto restart;
@@ -1049,8 +1144,9 @@ restart:
 	 *	another ldisc change
 	 */
 	if (!test_bit(TTY_LDISC, &tty->flags)) {
+		struct tty_ldisc *ld;
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		tty_ldisc_put(ldisc);
+		tty_ldisc_put(new_ldisc.ops);
 		ld = tty_ldisc_ref_wait(tty);
 		tty_ldisc_deref(ld);
 		goto restart;
@@ -1060,7 +1156,7 @@ restart:
 	if (o_tty)
 		clear_bit(TTY_LDISC, &o_tty->flags);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-
+	
 	/*
 	 *	From this point on we know nobody has an ldisc
 	 *	usage reference, nor can they obtain one until
@@ -1070,45 +1166,30 @@ restart:
 	work = cancel_delayed_work(&tty->buf.work);
 	/*
 	 * Wait for ->hangup_work and ->buf.work handlers to terminate
+	 * MUST NOT hold locks here.
 	 */
 	flush_scheduled_work();
 	/* Shutdown the current discipline. */
-	if (tty->ldisc.close)
-		(tty->ldisc.close)(tty);
+	if (o_ldisc.ops->close)
+		(o_ldisc.ops->close)(tty);
 
 	/* Now set up the new line discipline. */
-	tty_ldisc_assign(tty, ld);
+	tty_ldisc_assign(tty, &new_ldisc);
 	tty_set_termios_ldisc(tty, ldisc);
-	if (tty->ldisc.open)
-		retval = (tty->ldisc.open)(tty);
+	if (new_ldisc.ops->open)
+		retval = (new_ldisc.ops->open)(tty);
 	if (retval < 0) {
-		tty_ldisc_put(ldisc);
-		/* There is an outstanding reference here so this is safe */
-		tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num));
-		tty_set_termios_ldisc(tty, tty->ldisc.num);
-		if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
-			tty_ldisc_put(o_ldisc.num);
-			/* This driver is always present */
-			tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
-			tty_set_termios_ldisc(tty, N_TTY);
-			if (tty->ldisc.open) {
-				int r = tty->ldisc.open(tty);
-
-				if (r < 0)
-					panic("Couldn't open N_TTY ldisc for "
-					      "%s --- error %d.",
-					      tty_name(tty, buf), r);
-			}
-		}
+		tty_ldisc_put(new_ldisc.ops);
+		tty_ldisc_restore(tty, &o_ldisc);
 	}
 	/* At this point we hold a reference to the new ldisc and a
 	   a reference to the old ldisc. If we ended up flipping back
 	   to the existing ldisc we have two references to it */
 
-	if (tty->ldisc.num != o_ldisc.num && tty->ops->set_ldisc)
+	if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
 		tty->ops->set_ldisc(tty);
 
-	tty_ldisc_put(o_ldisc.num);
+	tty_ldisc_put(o_ldisc.ops);
 
 	/*
 	 *	Allow ldisc referencing to occur as soon as the driver
@@ -1335,8 +1416,8 @@ void tty_wakeup(struct tty_struct *tty)
 	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
 		ld = tty_ldisc_ref(tty);
 		if (ld) {
-			if (ld->write_wakeup)
-				ld->write_wakeup(tty);
+			if (ld->ops->write_wakeup)
+				ld->ops->write_wakeup(tty);
 			tty_ldisc_deref(ld);
 		}
 	}
@@ -1357,8 +1438,8 @@ void tty_ldisc_flush(struct tty_struct *tty)
 {
 	struct tty_ldisc *ld = tty_ldisc_ref(tty);
 	if (ld) {
-		if (ld->flush_buffer)
-			ld->flush_buffer(tty);
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
 		tty_ldisc_deref(ld);
 	}
 	tty_buffer_flush(tty);
@@ -1386,7 +1467,7 @@ static void tty_reset_termios(struct tty_struct *tty)
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
  *
- *	This can be called by the "eventd" kernel thread.  That is process
+k *	This can be called by the "eventd" kernel thread.  That is process
  *	synchronous but doesn't hold any locks, so we need to make sure we
  *	have the appropriate locks for what we're doing.
  *
@@ -1449,14 +1530,14 @@ static void do_tty_hangup(struct work_struct *work)
 	ld = tty_ldisc_ref(tty);
 	if (ld != NULL) {
 		/* We may have no line discipline at this point */
-		if (ld->flush_buffer)
-			ld->flush_buffer(tty);
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
 		tty_driver_flush_buffer(tty);
 		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-		    ld->write_wakeup)
-			ld->write_wakeup(tty);
-		if (ld->hangup)
-			ld->hangup(tty);
+		    ld->ops->write_wakeup)
+			ld->ops->write_wakeup(tty);
+		if (ld->ops->hangup)
+			ld->ops->hangup(tty);
 	}
 	/*
 	 * FIXME: Once we trust the LDISC code better we can wait here for
@@ -1825,8 +1906,8 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 	/* We want to wait for the line discipline to sort out in this
 	   situation */
 	ld = tty_ldisc_ref_wait(tty);
-	if (ld->read)
-		i = (ld->read)(tty, file, buf, count);
+	if (ld->ops->read)
+		i = (ld->ops->read)(tty, file, buf, count);
 	else
 		i = -EIO;
 	tty_ldisc_deref(ld);
@@ -1978,10 +2059,10 @@ static ssize_t tty_write(struct file *file, const char __user *buf,
 		printk(KERN_ERR "tty driver %s lacks a write_room method.\n",
 			tty->driver->name);
 	ld = tty_ldisc_ref_wait(tty);
-	if (!ld->write)
+	if (!ld->ops->write)
 		ret = -EIO;
 	else
-		ret = do_tty_write(ld->write, tty, file, buf, count);
+		ret = do_tty_write(ld->ops->write, tty, file, buf, count);
 	tty_ldisc_deref(ld);
 	return ret;
 }
@@ -2076,6 +2157,7 @@ static int init_dev(struct tty_driver *driver, int idx,
 	struct ktermios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct ktermios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
 	int retval = 0;
+	struct tty_ldisc *ld;
 
 	/* check whether we're reopening an existing tty */
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
@@ -2224,17 +2306,19 @@ static int init_dev(struct tty_driver *driver, int idx,
 	 * If we fail here just call release_tty to clean up.  No need
 	 * to decrement the use counts, as release_tty doesn't care.
 	 */
+	 
+	ld = &tty->ldisc;
 
-	if (tty->ldisc.open) {
-		retval = (tty->ldisc.open)(tty);
+	if (ld->ops->open) {
+		retval = (ld->ops->open)(tty);
 		if (retval)
 			goto release_mem_out;
 	}
-	if (o_tty && o_tty->ldisc.open) {
-		retval = (o_tty->ldisc.open)(o_tty);
+	if (o_tty && o_tty->ldisc.ops->open) {
+		retval = (o_tty->ldisc.ops->open)(o_tty);
 		if (retval) {
-			if (tty->ldisc.close)
-				(tty->ldisc.close)(tty);
+			if (ld->ops->close)
+				(ld->ops->close)(tty);
 			goto release_mem_out;
 		}
 		tty_ldisc_enable(o_tty);
@@ -2378,6 +2462,7 @@ static void release_tty(struct tty_struct *tty, int idx)
 static void release_dev(struct file *filp)
 {
 	struct tty_struct *tty, *o_tty;
+	struct tty_ldisc ld;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	devpts;
 	int	idx;
@@ -2611,26 +2696,27 @@ static void release_dev(struct file *filp)
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 	/*
 	 * Shutdown the current line discipline, and reset it to N_TTY.
-	 * N.B. why reset ldisc when we're releasing the memory??
 	 *
 	 * FIXME: this MUST get fixed for the new reflocking
 	 */
-	if (tty->ldisc.close)
-		(tty->ldisc.close)(tty);
-	tty_ldisc_put(tty->ldisc.num);
+	if (tty->ldisc.ops->close)
+		(tty->ldisc.ops->close)(tty);
+	tty_ldisc_put(tty->ldisc.ops);
 
 	/*
 	 *	Switch the line discipline back
 	 */
-	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+	WARN_ON(tty_ldisc_get(N_TTY, &ld));
+	tty_ldisc_assign(tty, &ld);
 	tty_set_termios_ldisc(tty, N_TTY);
 	if (o_tty) {
 		/* FIXME: could o_tty be in setldisc here ? */
 		clear_bit(TTY_LDISC, &o_tty->flags);
-		if (o_tty->ldisc.close)
-			(o_tty->ldisc.close)(o_tty);
-		tty_ldisc_put(o_tty->ldisc.num);
-		tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY));
+		if (o_tty->ldisc.ops->close)
+			(o_tty->ldisc.ops->close)(o_tty);
+		tty_ldisc_put(o_tty->ldisc.ops);
+		WARN_ON(tty_ldisc_get(N_TTY, &ld));
+		tty_ldisc_assign(o_tty, &ld);
 		tty_set_termios_ldisc(o_tty, N_TTY);
 	}
 	/*
@@ -2899,8 +2985,8 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
 		return 0;
 
 	ld = tty_ldisc_ref_wait(tty);
-	if (ld->poll)
-		ret = (ld->poll)(tty, filp, wait);
+	if (ld->ops->poll)
+		ret = (ld->ops->poll)(tty, filp, wait);
 	tty_ldisc_deref(ld);
 	return ret;
 }
@@ -2974,7 +3060,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
 	if (get_user(ch, p))
 		return -EFAULT;
 	ld = tty_ldisc_ref_wait(tty);
-	ld->receive_buf(tty, &ch, &mbz, 1);
+	ld->ops->receive_buf(tty, &ch, &mbz, 1);
 	tty_ldisc_deref(ld);
 	return 0;
 }
@@ -3528,7 +3614,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc.num, (int __user *)p);
+		return put_user(tty->ldisc.ops->num, (int __user *)p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 #ifdef CONFIG_VT
@@ -3581,8 +3667,8 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 	ld = tty_ldisc_ref_wait(tty);
 	retval = -EINVAL;
-	if (ld->ioctl) {
-		retval = ld->ioctl(tty, file, cmd, arg);
+	if (ld->ops->ioctl) {
+		retval = ld->ops->ioctl(tty, file, cmd, arg);
 		if (retval == -ENOIOCTLCMD)
 			retval = -EINVAL;
 	}
@@ -3609,8 +3695,8 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 	}
 
 	ld = tty_ldisc_ref_wait(tty);
-	if (ld->compat_ioctl)
-		retval = ld->compat_ioctl(tty, file, cmd, arg);
+	if (ld->ops->compat_ioctl)
+		retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
 	tty_ldisc_deref(ld);
 
 	return retval;
@@ -3782,7 +3868,8 @@ static void flush_to_ldisc(struct work_struct *work)
 			flag_buf = head->flag_buf_ptr + head->read;
 			head->read += count;
 			spin_unlock_irqrestore(&tty->buf.lock, flags);
-			disc->receive_buf(tty, char_buf, flag_buf, count);
+			disc->ops->receive_buf(tty, char_buf,
+							flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
 		}
 		/* Restore the queue head */
@@ -3843,9 +3930,12 @@ EXPORT_SYMBOL(tty_flip_buffer_push);
 
 static void initialize_tty_struct(struct tty_struct *tty)
 {
+	struct tty_ldisc ld;
 	memset(tty, 0, sizeof(struct tty_struct));
 	tty->magic = TTY_MAGIC;
-	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
+	if (tty_ldisc_get(N_TTY, &ld) < 0)
+		panic("n_tty: init_tty");
+	tty_ldisc_assign(tty, &ld);
 	tty->session = NULL;
 	tty->pgrp = NULL;
 	tty->overrun_time = jiffies;
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 8f81139d619..ea9fc5d03b9 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -491,8 +491,8 @@ static void change_termios(struct tty_struct *tty, struct ktermios *new_termios)
 
 	ld = tty_ldisc_ref(tty);
 	if (ld != NULL) {
-		if (ld->set_termios)
-			(ld->set_termios)(tty, &old_termios);
+		if (ld->ops->set_termios)
+			(ld->ops->set_termios)(tty, &old_termios);
 		tty_ldisc_deref(ld);
 	}
 	mutex_unlock(&tty->termios_mutex);
@@ -552,8 +552,8 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt)
 	ld = tty_ldisc_ref(tty);
 
 	if (ld != NULL) {
-		if ((opt & TERMIOS_FLUSH) && ld->flush_buffer)
-			ld->flush_buffer(tty);
+		if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
 		tty_ldisc_deref(ld);
 	}
 
@@ -959,12 +959,12 @@ int tty_perform_flush(struct tty_struct *tty, unsigned long arg)
 	ld = tty_ldisc_ref(tty);
 	switch (arg) {
 	case TCIFLUSH:
-		if (ld && ld->flush_buffer)
-			ld->flush_buffer(tty);
+		if (ld && ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
 		break;
 	case TCIOFLUSH:
-		if (ld && ld->flush_buffer)
-			ld->flush_buffer(tty);
+		if (ld && ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
 		/* fall through */
 	case TCOFLUSH:
 		tty_driver_flush_buffer(tty);
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index 7ff71ba7b7c..b9694b6445d 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -216,7 +216,7 @@ static void serport_ldisc_write_wakeup(struct tty_struct * tty)
  * The line discipline structure.
  */
 
-static struct tty_ldisc serport_ldisc = {
+static struct tty_ldisc_ops serport_ldisc = {
 	.owner =	THIS_MODULE,
 	.name =		"input",
 	.open =		serport_ldisc_open,
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 2095153582f..8a35029caca 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -466,7 +466,7 @@ static int handle_recv_skb(struct capiminor *mp, struct sk_buff *skb)
 	ld = tty_ldisc_ref(mp->tty);
 	if (ld == NULL)
 		return -1;
-	if (ld->receive_buf == NULL) {
+	if (ld->ops->receive_buf == NULL) {
 #if defined(_DEBUG_DATAFLOW) || defined(_DEBUG_TTYFUNCS)
 		printk(KERN_DEBUG "capi: ldisc has no receive_buf function\n");
 #endif
@@ -501,7 +501,7 @@ static int handle_recv_skb(struct capiminor *mp, struct sk_buff *skb)
 	printk(KERN_DEBUG "capi: DATA_B3_RESP %u len=%d => ldisc\n",
 				datahandle, skb->len);
 #endif
-	ld->receive_buf(mp->tty, skb->data, NULL, skb->len);
+	ld->ops->receive_buf(mp->tty, skb->data, NULL, skb->len);
 	kfree_skb(skb);
 	tty_ldisc_deref(ld);
 	return 0;
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 45d1ee93cd3..5e89fa17781 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -766,7 +766,7 @@ gigaset_tty_wakeup(struct tty_struct *tty)
 	cs_put(cs);
 }
 
-static struct tty_ldisc gigaset_ldisc = {
+static struct tty_ldisc_ops gigaset_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "ser_gigaset",
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 9d5721287d6..19dd0a61749 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -783,7 +783,7 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
 	return err;
 }
 
-static struct tty_ldisc sp_ldisc = {
+static struct tty_ldisc_ops sp_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "6pack",
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 65166035aca..c6ca47599fd 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -969,7 +969,7 @@ out:
 	mkiss_put(ax);
 }
 
-static struct tty_ldisc ax_ldisc = {
+static struct tty_ldisc_ops ax_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "mkiss",
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index e6f40b7f904..9e33196f945 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -533,7 +533,7 @@ static void irtty_close(struct tty_struct *tty)
 
 /* ------------------------------------------------------- */
 
-static struct tty_ldisc irda_ldisc = {
+static struct tty_ldisc_ops irda_ldisc = {
 	.magic		= TTY_LDISC_MAGIC,
  	.name		= "irda",
 	.flags		= 0,
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index f1a52def124..451bdb57d6f 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -378,7 +378,7 @@ ppp_asynctty_wakeup(struct tty_struct *tty)
 }
 
 
-static struct tty_ldisc ppp_ldisc = {
+static struct tty_ldisc_ops ppp_ldisc = {
 	.owner  = THIS_MODULE,
 	.magic	= TTY_LDISC_MAGIC,
 	.name	= "ppp",
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index b8f0369a71e..801d8f99d47 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -418,7 +418,7 @@ ppp_sync_wakeup(struct tty_struct *tty)
 }
 
 
-static struct tty_ldisc ppp_sync_ldisc = {
+static struct tty_ldisc_ops ppp_sync_ldisc = {
 	.owner	= THIS_MODULE,
 	.magic	= TTY_LDISC_MAGIC,
 	.name	= "pppsync",
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 84af68fdb6c..1d58991d395 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -1301,7 +1301,7 @@ static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 #endif
 /* VSV changes end */
 
-static struct tty_ldisc	sl_ldisc = {
+static struct tty_ldisc_ops sl_ldisc = {
 	.owner 		= THIS_MODULE,
 	.magic 		= TTY_LDISC_MAGIC,
 	.name 		= "slip",
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index e03eef2f228..c2c10c63226 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -688,9 +688,9 @@ static void cpc_tty_rx_work(struct work_struct *work)
 				if (cpc_tty->tty) {
 					ld = tty_ldisc_ref(cpc_tty->tty);
 					if (ld) {
-						if (ld->receive_buf) {
+						if (ld->ops->receive_buf) {
 							CPC_TTY_DBG("%s: call line disc. receive_buf\n",cpc_tty->name);
-							ld->receive_buf(cpc_tty->tty, (char *)(buf->data), &flags, buf->size);
+							ld->ops->receive_buf(cpc_tty->tty, (char *)(buf->data), &flags, buf->size);
 						}
 						tty_ldisc_deref(ld);
 					}
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 069f8bb0a99..2a6c7a60756 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -754,7 +754,7 @@ static void x25_asy_setup(struct net_device *dev)
 	dev->flags		= IFF_NOARP;
 }
 
-static struct tty_ldisc x25_ldisc = {
+static struct tty_ldisc_ops x25_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "X.25",
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 21f490f5d65..d153946d6d1 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = {
 	.release	= seq_release,
 };
 
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
-{
-	return (*pos < NR_LDISCS) ? pos : NULL;
-}
-
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return (*pos < NR_LDISCS) ? pos : NULL;
-}
-
-static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
-{
-}
-
-static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
-{
-	int i = *(loff_t *)v;
-	struct tty_ldisc *ld;
-	
-	ld = tty_ldisc_get(i);
-	if (ld == NULL)
-		return 0;
-	seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
-	tty_ldisc_put(i);
-	return 0;
-}
-
-static const struct seq_operations tty_ldiscs_seq_ops = {
-	.start	= tty_ldiscs_seq_start,
-	.next	= tty_ldiscs_seq_next,
-	.stop	= tty_ldiscs_seq_stop,
-	.show	= tty_ldiscs_seq_show,
-};
-
-static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &tty_ldiscs_seq_ops);
-}
-
-static const struct file_operations tty_ldiscs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_tty_ldiscs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * This function is called by tty_register_driver() to handle
  * registering the driver's /proc handler into /proc/tty/driver/<foo>
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 324a3b231d4..013711ea738 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,6 +185,7 @@ struct tty_struct {
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
+	/* The ldisc objects are protected by tty_ldisc_lock at the moment */
 	struct tty_ldisc ldisc;
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
@@ -289,7 +290,7 @@ extern void tty_wait_until_sent(struct tty_struct * tty, long timeout);
 extern int tty_check_change(struct tty_struct * tty);
 extern void stop_tty(struct tty_struct * tty);
 extern void start_tty(struct tty_struct * tty);
-extern int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc);
+extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_register_driver(struct tty_driver *driver);
 extern int tty_unregister_driver(struct tty_driver *driver);
@@ -330,9 +331,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
-
-extern struct tty_ldisc *tty_ldisc_get(int);
-extern void tty_ldisc_put(int);
+extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
 extern void tty_ldisc_flush(struct tty_struct *tty);
@@ -354,7 +353,7 @@ extern int tty_write_lock(struct tty_struct *tty, int ndelay);
 
 
 /* n_tty.c */
-extern struct tty_ldisc tty_ldisc_N_TTY;
+extern struct tty_ldisc_ops tty_ldisc_N_TTY;
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 6226504d910..40f38d89677 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -104,7 +104,7 @@
 #include <linux/fs.h>
 #include <linux/wait.h>
 
-struct tty_ldisc {
+struct tty_ldisc_ops {
 	int	magic;
 	char	*name;
 	int	num;
@@ -142,6 +142,11 @@ struct tty_ldisc {
 	int refcount;
 };
 
+struct tty_ldisc {
+	struct tty_ldisc_ops *ops;
+	int refcount;
+};
+
 #define TTY_LDISC_MAGIC	0x5403
 
 #define LDISC_FLAG_DEFINED	0x00000001
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index c9191871c1e..0a387f2eb7a 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -617,14 +617,7 @@ static void rfcomm_tty_wakeup(unsigned long arg)
 		return;
 
 	BT_DBG("dev %p tty %p", dev, tty);
-
-	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup)
-		(tty->ldisc.write_wakeup)(tty);
-
-	wake_up_interruptible(&tty->write_wait);
-#ifdef SERIAL_HAVE_POLL_WAIT
-	wake_up_interruptible(&tty->poll_wait);
-#endif
+	tty_wakeup(tty);
 }
 
 static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
@@ -1005,9 +998,7 @@ static void rfcomm_tty_flush_buffer(struct tty_struct *tty)
 		return;
 
 	skb_queue_purge(&dev->dlc->tx_queue);
-
-	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup)
-		tty->ldisc.write_wakeup(tty);
+	tty_wakeup(tty);
 }
 
 static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch)
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 76c3057d017..e4e2caeb9d8 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -650,12 +650,7 @@ static void ircomm_tty_do_softint(struct work_struct *work)
 	}
 
 	/* Check if user (still) wants to be waken up */
-	if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
-	    tty->ldisc.write_wakeup)
-	{
-		(tty->ldisc.write_wakeup)(tty);
-	}
-	wake_up_interruptible(&tty->write_wait);
+	tty_wakeup(tty);
 }
 
 /*
@@ -1141,6 +1136,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
 				      struct sk_buff *skb)
 {
 	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	struct tty_ldisc *ld;
 
 	IRDA_DEBUG(2, "%s()\n", __func__ );
 
@@ -1173,7 +1169,11 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
 	 * involve the flip buffers, since we are not running in an interrupt
 	 * handler
 	 */
-	self->tty->ldisc.receive_buf(self->tty, skb->data, NULL, skb->len);
+
+	ld = tty_ldisc_ref(self->tty);
+	if (ld)
+		ld->ops->receive_buf(self->tty, skb->data, NULL, skb->len);
+	tty_ldisc_deref(ld);
 
 	/* No need to kfree_skb - see ircomm_ttp_data_indication() */
 
-- 
cgit v1.2.3-70-g09d2


From 3a682fbd732d3d27bec722a923952b0938e8a404 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 20 Jul 2008 18:13:01 -0700
Subject: pkt_sched: Fix build with NET_SCHED disabled.

The stab bits can't be referenced uniless the full
packet scheduler layer is enabled.

Reported by Stephen Rothwell.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 ++
 net/sched/sch_generic.c   | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index db9ad655eb8..b5f40d7ef72 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -333,8 +333,10 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
+#ifdef CONFIG_NET_SCHED
 	if (sch->stab)
 		qdisc_calculate_pkt_len(skb, sch->stab);
+#endif
 	return sch->enqueue(skb, sch);
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27a51f04db4..0ddf69286f9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -469,7 +469,9 @@ static void __qdisc_destroy(struct rcu_head *head)
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
 	const struct Qdisc_ops  *ops = qdisc->ops;
 
+#ifdef CONFIG_NET_SCHED
 	qdisc_put_stab(qdisc->stab);
+#endif
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
 	if (ops->reset)
 		ops->reset(qdisc);
-- 
cgit v1.2.3-70-g09d2


From 702beb87d6b4e08cca394b210679e5d7c2ac9383 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 20 Jul 2008 18:17:02 -0700
Subject: ipv6: Fix warning in addrconf code.

Reported by Linus.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/ipv6/addrconf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 580ae506c39..9f4fcce6379 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2315,12 +2315,11 @@ static void init_loopback(struct net_device *dev)
 static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
 {
 	struct inet6_ifaddr * ifp;
-	struct net *net = dev_net(idev->dev);
 	u32 addr_flags = IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if (idev->cnf.optimistic_dad &&
-	    !net->ipv6.devconf_all->forwarding)
+	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From c3ee84163e5bc0dc2e1ccf1d3fc412debca73bab Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Mon, 21 Jul 2008 09:18:07 -0700
Subject: pkt_sched: Remove unused variable skb in dev_deactivate_queue
 function.

Removed unused variable 'skb' in the dev_deactivate_queue function

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0ddf69286f9..09dead33580 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -576,7 +576,6 @@ static void dev_deactivate_queue(struct net_device *dev,
 				 void *_qdisc_default)
 {
 	struct Qdisc *qdisc_default = _qdisc_default;
-	struct sk_buff *skb = NULL;
 	struct Qdisc *qdisc;
 
 	qdisc = dev_queue->qdisc;
@@ -588,8 +587,6 @@ static void dev_deactivate_queue(struct net_device *dev,
 
 		spin_unlock_bh(qdisc_lock(qdisc));
 	}
-
-	kfree_skb(skb);
 }
 
 static bool some_qdisc_is_running(struct net_device *dev, int lock)
-- 
cgit v1.2.3-70-g09d2


From b6b2fed1f4802b8fcc9d7548a8f785225d38f9a3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 21 Jul 2008 09:48:06 -0700
Subject: net: Improve simple_tx_hash().

Based upon feedback from Eric Dumazet and Andi Kleen.

Cure several deficiencies in simple_tx_hash() by using
jhash + reciprocol multiply.

1) Eliminates expensive modulus operation.

2) Makes hash less attackable by using random seed.

3) Eliminates endianness hash distribution issues.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2eed17bcb2d..7e2d5274333 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -124,6 +124,8 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
 
 #include "net-sysfs.h"
 
@@ -1668,34 +1670,37 @@ out_kfree_skb:
  *          --BLG
  */
 
+static u32 simple_tx_hashrnd;
+static int simple_tx_hashrnd_initialized = 0;
+
 static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 {
-	u32 *addr, *ports, hash, ihl;
+	u32 addr1, addr2, ports;
+	u32 hash, ihl;
 	u8 ip_proto;
-	int alen;
+
+	if (unlikely(!simple_tx_hashrnd_initialized)) {
+		get_random_bytes(&simple_tx_hashrnd, 4);
+		simple_tx_hashrnd_initialized = 1;
+	}
 
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
 		ip_proto = ip_hdr(skb)->protocol;
-		addr = &ip_hdr(skb)->saddr;
+		addr1 = ip_hdr(skb)->saddr;
+		addr2 = ip_hdr(skb)->daddr;
 		ihl = ip_hdr(skb)->ihl;
-		alen = 2;
 		break;
 	case __constant_htons(ETH_P_IPV6):
 		ip_proto = ipv6_hdr(skb)->nexthdr;
-		addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
 		ihl = (40 >> 2);
-		alen = 8;
 		break;
 	default:
 		return 0;
 	}
 
-	ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
-
-	hash = 0;
-	while (alen--)
-		hash ^= *addr++;
 
 	switch (ip_proto) {
 	case IPPROTO_TCP:
@@ -1705,14 +1710,17 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 	case IPPROTO_AH:
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
-		hash ^= *ports;
+		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
 		break;
 
 	default:
+		ports = 0;
 		break;
 	}
 
-	return hash % dev->real_num_tx_queues;
+	hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+
+	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
 
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 867d79fb9a4d5929ad8335c896fcfe11c3b2ef14 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 21 Jul 2008 09:54:18 -0700
Subject: net: In __netif_schedule() use WARN_ON instead of BUG_ON

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7e2d5274333..cbc34c0db37 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1327,7 +1327,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 void __netif_schedule(struct Qdisc *q)
 {
-	BUG_ON(q == &noop_qdisc);
+	if (WARN_ON_ONCE(q == &noop_qdisc))
+		return;
 
 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
 		struct softnet_data *sd;
-- 
cgit v1.2.3-70-g09d2


From d3678b463df73f5060d7420915080e19baeb379b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 21 Jul 2008 09:56:13 -0700
Subject: Revert "pkt_sched: Make default qdisc nonshared-multiqueue safe."

This reverts commit a0c80b80e0fb48129e4e9d6a9ede914f9ff1850d.

After discussions with Jamal and Herbert on netdev, we should
provide at least minimal prioritization at the qdisc level
even in multiqueue situations.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 99 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 77 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 09dead33580..cb625b4d6da 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -356,44 +356,99 @@ static struct Qdisc noqueue_qdisc = {
 };
 
 
-static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static const u8 prio2band[TC_PRIO_MAX+1] =
+	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+
+/* 3-band FIFO queue: old style, but should be a bit faster than
+   generic prio+fifo combination.
+ */
+
+#define PFIFO_FAST_BANDS 3
+
+static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
+					     struct Qdisc *qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+	return list + prio2band[skb->priority & TC_PRIO_MAX];
+}
+
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
-	struct sk_buff_head *list = &qdisc->q;
+	struct sk_buff_head *list = prio2list(skb, qdisc);
 
-	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len)
+	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
+		qdisc->q.qlen++;
 		return __qdisc_enqueue_tail(skb, qdisc, list);
+	}
 
 	return qdisc_drop(skb, qdisc);
 }
 
-static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 {
-	struct sk_buff_head *list = &qdisc->q;
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
 
-	if (!skb_queue_empty(list))
-		return __qdisc_dequeue_head(qdisc, list);
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+		if (!skb_queue_empty(list + prio)) {
+			qdisc->q.qlen--;
+			return __qdisc_dequeue_head(qdisc, list + prio);
+		}
+	}
 
 	return NULL;
 }
 
-static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
-	return __qdisc_requeue(skb, qdisc, &qdisc->q);
+	qdisc->q.qlen++;
+	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
 }
 
-static void fifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc* qdisc)
 {
-	__qdisc_reset_queue(qdisc, &qdisc->q);
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+		__qdisc_reset_queue(qdisc, list + prio);
+
 	qdisc->qstats.backlog = 0;
+	qdisc->q.qlen = 0;
 }
 
-static struct Qdisc_ops fifo_fast_ops __read_mostly = {
-	.id		=	"fifo_fast",
-	.priv_size	=	0,
-	.enqueue	=	fifo_fast_enqueue,
-	.dequeue	=	fifo_fast_dequeue,
-	.requeue	=	fifo_fast_requeue,
-	.reset		=	fifo_fast_reset,
+static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
+{
+	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
+
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+		skb_queue_head_init(list + prio);
+
+	return 0;
+}
+
+static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+	.id		=	"pfifo_fast",
+	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
+	.enqueue	=	pfifo_fast_enqueue,
+	.dequeue	=	pfifo_fast_dequeue,
+	.requeue	=	pfifo_fast_requeue,
+	.init		=	pfifo_fast_init,
+	.reset		=	pfifo_fast_reset,
+	.dump		=	pfifo_fast_dump,
 	.owner		=	THIS_MODULE,
 };
 
@@ -522,7 +577,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 
 	if (dev->tx_queue_len) {
 		qdisc = qdisc_create_dflt(dev, dev_queue,
-					  &fifo_fast_ops, TC_H_ROOT);
+					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
@@ -550,9 +605,9 @@ void dev_activate(struct net_device *dev)
 	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
-	 * create default one i.e. fifo_fast for devices,
-	 * which need queueing and noqueue_qdisc for
-	 * virtual interfaces.
+	   create default one i.e. pfifo_fast for devices,
+	   which need queueing and noqueue_qdisc for
+	   virtual interfaces
 	 */
 
 	if (dev_all_qdisc_sleeping_noop(dev))
-- 
cgit v1.2.3-70-g09d2


From 0dbff689c2f299e8f63911247925f2728d087688 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 21 Jul 2008 10:00:51 -0700
Subject: netfilter: nf_nat_core: eliminate useless find_appropriate_src for
 IP_NAT_RANGE_PROTO_RANDOM

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index d2a887fc8d9..6c6a3cba8d5 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -240,12 +240,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   This is only required for source (ie. NAT/masq) mappings.
 	   So far, we don't do local source mappings, so multiple
 	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == IP_NAT_MANIP_SRC &&
+	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
 		if (find_appropriate_src(orig_tuple, tuple, range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
-			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
-				if (!nf_nat_used_tuple(tuple, ct))
-					return;
+			if (!nf_nat_used_tuple(tuple, ct))
+				return;
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 584015727a3b88b46602b20077b46cd04f8b4ab3 Mon Sep 17 00:00:00 2001
From: Krzysztof Piotr Oledzki <ole@ans.pl>
Date: Mon, 21 Jul 2008 10:01:34 -0700
Subject: netfilter: accounting rework: ct_extend + 64bit counters (v4)

Initially netfilter has had 64bit counters for conntrack-based accounting, but
it was changed in 2.6.14 to save memory. Unfortunately in-kernel 64bit counters are
still required, for example for "connbytes" extension. However, 64bit counters
waste a lot of memory and it was not possible to enable/disable it runtime.

This patch:
 - reimplements accounting with respect to the extension infrastructure,
 - makes one global version of seq_print_acct() instead of two seq_print_counters(),
 - makes it possible to enable it at boot time (for CONFIG_SYSCTL/CONFIG_SYSFS=n),
 - makes it possible to enable/disable it at runtime by sysctl or sysfs,
 - extends counters from 32bit to 64bit,
 - renames ip_conntrack_counter -> nf_conn_counter,
 - enables accounting code unconditionally (no longer depends on CONFIG_NF_CT_ACCT),
 - set initial accounting enable state based on CONFIG_NF_CT_ACCT
 - removes buggy IPCT_COUNTER_FILLING event handling.

If accounting is enabled newly created connections get additional acct extend.
Old connections are not changed as it is not possible to add a ct_extend area
to confirmed conntrack. Accounting is performed for all connections with
acct extend regardless of a current state of "net.netfilter.nf_conntrack_acct".

Signed-off-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt         |  10 ++
 Documentation/kernel-parameters.txt                |   7 ++
 include/linux/netfilter/nf_conntrack_common.h      |   8 +-
 include/linux/netfilter/nfnetlink_conntrack.h      |   8 +-
 include/net/netfilter/nf_conntrack.h               |   6 --
 include/net/netfilter/nf_conntrack_acct.h          |  51 ++++++++++
 include/net/netfilter/nf_conntrack_extend.h        |   2 +
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  18 +---
 net/netfilter/Kconfig                              |   9 ++
 net/netfilter/Makefile                             |   2 +-
 net/netfilter/nf_conntrack_acct.c                  | 104 +++++++++++++++++++++
 net/netfilter/nf_conntrack_core.c                  |  39 +++++---
 net/netfilter/nf_conntrack_netlink.c               |  44 +++++----
 net/netfilter/nf_conntrack_standalone.c            |  18 +---
 net/netfilter/xt_connbytes.c                       |   8 +-
 15 files changed, 248 insertions(+), 86 deletions(-)
 create mode 100644 include/net/netfilter/nf_conntrack_acct.h
 create mode 100644 net/netfilter/nf_conntrack_acct.c

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 86334b6f823..9f73587219e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -336,3 +336,13 @@ When:	After the only user (hal) has seen a release with the patches
 Why:	Over 1K .text/.data size reduction, data is available in other
 	ways (ioctls)
 Who:	Johannes Berg <johannes@sipsolutions.net>
+
+---------------------------
+
+What: CONFIG_NF_CT_ACCT
+When: 2.6.29
+Why:  Accounting can now be enabled/disabled without kernel recompilation.
+      Currently used only to set a default value for a feature that is also
+      controlled by a kernel/module/sysfs/sysctl parameter.
+Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
+
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 09ad7450647..e4ef2758440 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1279,6 +1279,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			This usage is only documented in each driver source
 			file if at all.
 
+	nf_conntrack.acct=
+			[NETFILTER] Enable connection tracking flow accounting
+			0 to disable accounting
+			1 to enable accounting
+			Default value depends on CONFIG_NF_CT_ACCT that is
+			going to be removed in 2.6.29.
+
 	nfsaddrs=	[NFS]
 			See Documentation/filesystems/nfsroot.txt.
 
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index bad1eb760f6..885cbe28226 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -122,7 +122,7 @@ enum ip_conntrack_events
 	IPCT_NATINFO_BIT = 10,
 	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
 
-	/* Counter highest bit has been set */
+	/* Counter highest bit has been set, unused */
 	IPCT_COUNTER_FILLING_BIT = 11,
 	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
 
@@ -145,12 +145,6 @@ enum ip_conntrack_expect_events {
 };
 
 #ifdef __KERNEL__
-struct ip_conntrack_counter
-{
-	u_int32_t packets;
-	u_int32_t bytes;
-};
-
 struct ip_conntrack_stat
 {
 	unsigned int searched;
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 759bc043dc6..c19595c8930 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -115,10 +115,10 @@ enum ctattr_protoinfo_sctp {
 
 enum ctattr_counters {
 	CTA_COUNTERS_UNSPEC,
-	CTA_COUNTERS_PACKETS,		/* old 64bit counters */
-	CTA_COUNTERS_BYTES,		/* old 64bit counters */
-	CTA_COUNTERS32_PACKETS,
-	CTA_COUNTERS32_BYTES,
+	CTA_COUNTERS_PACKETS,		/* 64bit counters */
+	CTA_COUNTERS_BYTES,		/* 64bit counters */
+	CTA_COUNTERS32_PACKETS,		/* old 32bit counters, unused */
+	CTA_COUNTERS32_BYTES,		/* old 32bit counters, unused */
 	__CTA_COUNTERS_MAX
 };
 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8f5b75734dd..0741ad592da 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -88,7 +88,6 @@ struct nf_conn_help {
 	u8 expecting[NF_CT_MAX_EXPECT_CLASSES];
 };
 
-
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
@@ -111,11 +110,6 @@ struct nf_conn
 	/* Timer function; drops refcnt when it goes off. */
 	struct timer_list timeout;
 
-#ifdef CONFIG_NF_CT_ACCT
-	/* Accounting Information (same cache line as other written members) */
-	struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
-#endif
-
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	u_int32_t mark;
 #endif
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
new file mode 100644
index 00000000000..5d5ae55d54c
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -0,0 +1,51 @@
+/*
+ * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _NF_CONNTRACK_ACCT_H
+#define _NF_CONNTRACK_ACCT_H
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_counter {
+	u_int64_t packets;
+	u_int64_t bytes;
+};
+
+extern int nf_ct_acct;
+
+static inline
+struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
+{
+	return nf_ct_ext_find(ct, NF_CT_EXT_ACCT);
+}
+
+static inline
+struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+	struct nf_conn_counter *acct;
+
+	if (!nf_ct_acct)
+		return NULL;
+
+	acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
+	if (!acct)
+		pr_debug("failed to add accounting extension area");
+
+
+	return acct;
+};
+
+extern unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
+
+extern int nf_conntrack_acct_init(void);
+extern void nf_conntrack_acct_fini(void);
+
+#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index f80c0ed6d87..da8ee52613a 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -7,11 +7,13 @@ enum nf_ct_ext_id
 {
 	NF_CT_EXT_HELPER,
 	NF_CT_EXT_NAT,
+	NF_CT_EXT_ACCT,
 	NF_CT_EXT_NUM,
 };
 
 #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
 #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
+#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 40a46d48249..3a020720e40 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -18,19 +18,7 @@
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
-
-#ifdef CONFIG_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
-		   const struct ip_conntrack_counter *counter)
-{
-	return seq_printf(s, "packets=%llu bytes=%llu ",
-			  (unsigned long long)counter->packets,
-			  (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y)	0
-#endif
+#include <net/netfilter/nf_conntrack_acct.h>
 
 struct ct_iter_state {
 	unsigned int bucket;
@@ -127,7 +115,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 			l3proto, l4proto))
 		return -ENOSPC;
 
-	if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL]))
+	if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
 		return -ENOSPC;
 
 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
@@ -138,7 +126,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 			l3proto, l4proto))
 		return -ENOSPC;
 
-	if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY]))
+	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
 		return -ENOSPC;
 
 	if (test_bit(IPS_ASSURED_BIT, &ct->status))
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 316c7af1d2b..ee898e74808 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -49,6 +49,15 @@ config NF_CT_ACCT
 	  Those counters can be used for flow-based accounting or the
 	  `connbytes' match.
 
+	  Please note that currently this option only sets a default state.
+	  You may change it at boot time with nf_conntrack.acct=0/1 kernel
+	  paramater or by loading the nf_conntrack module with acct=0/1.
+
+	  You may also disable/enable it on a running system with:
+	   sysctl net.netfilter.nf_conntrack_acct=0/1
+
+	  This option will be removed in 2.6.29.
+
 	  If unsure, say `N'.
 
 config NF_CONNTRACK_MARK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5c4b183f642..3bd2cc556ae 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
new file mode 100644
index 00000000000..59bd8b903a1
--- /dev/null
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -0,0 +1,104 @@
+/* Accouting handling for netfilter. */
+
+/*
+ * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/netfilter.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+
+#ifdef CONFIG_NF_CT_ACCT
+#define NF_CT_ACCT_DEFAULT 1
+#else
+#define NF_CT_ACCT_DEFAULT 0
+#endif
+
+int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
+EXPORT_SYMBOL_GPL(nf_ct_acct);
+
+module_param_named(acct, nf_ct_acct, bool, 0644);
+MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *acct_sysctl_header;
+static struct ctl_table acct_sysctl_table[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nf_conntrack_acct",
+		.data		= &nf_ct_acct,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{}
+};
+#endif /* CONFIG_SYSCTL */
+
+unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
+{
+	struct nf_conn_counter *acct;
+
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
+		return 0;
+
+	return seq_printf(s, "packets=%llu bytes=%llu ",
+			  (unsigned long long)acct[dir].packets,
+			  (unsigned long long)acct[dir].bytes);
+};
+EXPORT_SYMBOL_GPL(seq_print_acct);
+
+static struct nf_ct_ext_type acct_extend __read_mostly = {
+	.len	= sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]),
+	.align	= __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]),
+	.id	= NF_CT_EXT_ACCT,
+};
+
+int nf_conntrack_acct_init(void)
+{
+	int ret;
+
+#ifdef CONFIG_NF_CT_ACCT
+	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+	printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+	printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
+#endif
+
+	ret = nf_ct_extend_register(&acct_extend);
+	if (ret < 0) {
+		printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+		return ret;
+	}
+
+#ifdef CONFIG_SYSCTL
+	acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
+				acct_sysctl_table);
+
+	if (!acct_sysctl_header) {
+		nf_ct_extend_unregister(&acct_extend);
+
+		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+		return -ENOMEM;
+	}
+#endif
+
+	return 0;
+}
+
+void nf_conntrack_acct_fini(void)
+{
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(acct_sysctl_header);
+#endif
+	nf_ct_extend_unregister(&acct_extend);
+}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 28d03e64200..c519d090bdb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -37,6 +37,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_acct.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
@@ -555,6 +556,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		return NULL;
 	}
 
+	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(tuple);
 	if (exp) {
@@ -828,17 +831,16 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 	}
 
 acct:
-#ifdef CONFIG_NF_CT_ACCT
 	if (do_acct) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-			skb->len - skb_network_offset(skb);
+		struct nf_conn_counter *acct;
 
-		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
-		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
-			event |= IPCT_COUNTER_FILLING;
+		acct = nf_conn_acct_find(ct);
+		if (acct) {
+			acct[CTINFO2DIR(ctinfo)].packets++;
+			acct[CTINFO2DIR(ctinfo)].bytes +=
+				skb->len - skb_network_offset(skb);
+		}
 	}
-#endif
 
 	spin_unlock_bh(&nf_conntrack_lock);
 
@@ -853,15 +855,19 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       int do_acct)
 {
-#ifdef CONFIG_NF_CT_ACCT
 	if (do_acct) {
+		struct nf_conn_counter *acct;
+
 		spin_lock_bh(&nf_conntrack_lock);
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-			skb->len - skb_network_offset(skb);
+		acct = nf_conn_acct_find(ct);
+		if (acct) {
+			acct[CTINFO2DIR(ctinfo)].packets++;
+			acct[CTINFO2DIR(ctinfo)].bytes +=
+				skb->len - skb_network_offset(skb);
+		}
 		spin_unlock_bh(&nf_conntrack_lock);
 	}
-#endif
+
 	if (del_timer(&ct->timeout)) {
 		ct->timeout.function((unsigned long)ct);
 		return true;
@@ -1029,6 +1035,7 @@ void nf_conntrack_cleanup(void)
 	nf_conntrack_proto_fini();
 	nf_conntrack_helper_fini();
 	nf_conntrack_expect_fini();
+	nf_conntrack_acct_fini();
 }
 
 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1168,6 +1175,10 @@ int __init nf_conntrack_init(void)
 	if (ret < 0)
 		goto out_fini_expect;
 
+	ret = nf_conntrack_acct_init();
+	if (ret < 0)
+		goto out_fini_helper;
+
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
 	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
@@ -1180,6 +1191,8 @@ int __init nf_conntrack_init(void)
 
 	return ret;
 
+out_fini_helper:
+	nf_conntrack_helper_fini();
 out_fini_expect:
 	nf_conntrack_expect_fini();
 out_fini_proto:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 95a7967731f..105a616c5c7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_acct.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -206,22 +207,26 @@ nla_put_failure:
 	return -1;
 }
 
-#ifdef CONFIG_NF_CT_ACCT
 static int
 ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
 			enum ip_conntrack_dir dir)
 {
 	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
 	struct nlattr *nest_count;
+	const struct nf_conn_counter *acct;
+
+	acct = nf_conn_acct_find(ct);
+	if (!acct)
+		return 0;
 
 	nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
 	if (!nest_count)
 		goto nla_put_failure;
 
-	NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS,
-		     htonl(ct->counters[dir].packets));
-	NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES,
-		     htonl(ct->counters[dir].bytes));
+	NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS,
+		     cpu_to_be64(acct[dir].packets));
+	NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES,
+		     cpu_to_be64(acct[dir].bytes));
 
 	nla_nest_end(skb, nest_count);
 
@@ -230,9 +235,6 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
 nla_put_failure:
 	return -1;
 }
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
 static inline int
@@ -501,11 +503,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 			goto nla_put_failure;
 #endif
 
-		if (events & IPCT_COUNTER_FILLING &&
-		    (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		     ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
-			goto nla_put_failure;
-
 		if (events & IPCT_RELATED &&
 		    ctnetlink_dump_master(skb, ct) < 0)
 			goto nla_put_failure;
@@ -576,11 +573,15 @@ restart:
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
-#ifdef CONFIG_NF_CT_ACCT
+
 			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
-						IPCTNL_MSG_CT_GET_CTRZERO)
-				memset(&ct->counters, 0, sizeof(ct->counters));
-#endif
+						IPCTNL_MSG_CT_GET_CTRZERO) {
+				struct nf_conn_counter *acct;
+
+				acct = nf_conn_acct_find(ct);
+				if (acct)
+					memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
+			}
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -832,14 +833,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	u_int8_t u3 = nfmsg->nfgen_family;
 	int err = 0;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-#ifndef CONFIG_NF_CT_ACCT
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
-			return -ENOTSUPP;
-#endif
+	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
-	}
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
@@ -1152,6 +1148,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 			goto err;
 	}
 
+	nf_ct_acct_ext_add(ct, GFP_KERNEL);
+
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 46ea542d0df..869ef9349d0 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -25,6 +25,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
 
 MODULE_LICENSE("GPL");
 
@@ -38,19 +39,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 }
 EXPORT_SYMBOL_GPL(print_tuple);
 
-#ifdef CONFIG_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
-		   const struct ip_conntrack_counter *counter)
-{
-	return seq_printf(s, "packets=%llu bytes=%llu ",
-			  (unsigned long long)counter->packets,
-			  (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y)	0
-#endif
-
 struct ct_iter_state {
 	unsigned int bucket;
 };
@@ -146,7 +134,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 			l3proto, l4proto))
 		return -ENOSPC;
 
-	if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL]))
+	if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
 		return -ENOSPC;
 
 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
@@ -157,7 +145,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 			l3proto, l4proto))
 		return -ENOSPC;
 
-	if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY]))
+	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
 		return -ENOSPC;
 
 	if (test_bit(IPS_ASSURED_BIT, &ct->status))
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d7e8983cd37..3e39c4fe193 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -8,6 +8,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -27,12 +28,15 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	u_int64_t bytes = 0;
 	u_int64_t pkts = 0;
-	const struct ip_conntrack_counter *counters;
+	const struct nf_conn_counter *counters;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct)
 		return false;
-	counters = ct->counters;
+
+	counters = nf_conn_acct_find(ct);
+	if (!counters)
+		return false;
 
 	switch (sinfo->what) {
 	case XT_CONNBYTES_PKTS:
-- 
cgit v1.2.3-70-g09d2


From 280763c053fee297d95b474f2c145990670371e6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 21 Jul 2008 10:02:12 -0700
Subject: netfilter: xt_time: fix time's time_mt()'s use of do_div()

Fix netfilter xt_time's time_mt()'s use of do_div() on an s64 by using
div_s64() instead.

This was introduced by patch ee4411a1b1e0b679c99686629b5eab5a072ce49f
("[NETFILTER]: x_tables: add xt_time match").

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ed76baab473..9f328593287 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in,
 		__net_timestamp((struct sk_buff *)skb);
 
 	stamp = ktime_to_ns(skb->tstamp);
-	do_div(stamp, NSEC_PER_SEC);
+	stamp = div_s64(stamp, NSEC_PER_SEC);
 
 	if (info->flags & XT_TIME_LOCAL_TZ)
 		/* Adjust for local timezone */
-- 
cgit v1.2.3-70-g09d2


From 72961ecf84d67d6359a1b30f9b2a8427f13e1e71 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Mon, 21 Jul 2008 10:02:35 -0700
Subject: netfilter: nfnetlink_log: send complete hardware header

This patch adds some fields to NFLOG to be able to send the complete
hardware header with all necessary informations.
It sends to userspace:
 * the type of hardware link
 * the lenght of hardware header
 * the hardware header

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_log.h | 3 +++
 net/netfilter/nfnetlink_log.c           | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index a8572133292..f661731f3cb 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -48,6 +48,9 @@ enum nfulnl_attr_type {
 	NFULA_SEQ,			/* instance-local sequence number */
 	NFULA_SEQ_GLOBAL,		/* global sequence number */
 	NFULA_GID,			/* group id of socket */
+	NFULA_HWTYPE,			/* hardware type */
+	NFULA_HWHEADER,			/* hardware header */
+	NFULA_HWLEN,			/* hardware header length */
 
 	__NFULA_MAX
 };
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b8173af8c24..9a35b57ab76 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -453,6 +453,14 @@ __build_packet_message(struct nfulnl_instance *inst,
 		}
 	}
 
+	if (indev && skb_mac_header_was_set(skb)) {
+		NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type));
+		NLA_PUT_BE16(inst->skb, NFULA_HWLEN,
+			     htons(skb->dev->hard_header_len));
+		NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
+			skb_mac_header(skb));
+	}
+
 	if (skb->tstamp.tv64) {
 		struct nfulnl_msg_packet_timestamp ts;
 		struct timeval tv = ktime_to_timeval(skb->tstamp);
-- 
cgit v1.2.3-70-g09d2


From db1a75bdcc1766dc7e1fae9201ae287dcbcb6c66 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 21 Jul 2008 10:02:59 -0700
Subject: netfilter: xt_TCPMSS: collapse tcpmss_reverse_mtu{4,6} into one
 function

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_TCPMSS.c | 42 +++++++++++++-----------------------------
 1 file changed, 13 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 217e2b68632..beb5094703c 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -147,17 +147,21 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	return TCPOLEN_MSS;
 }
 
-static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph)
+static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
+				    unsigned int family)
 {
-	struct flowi fl = {
-		.fl4_dst = iph->saddr,
-	};
+	struct flowi fl = {};
 	const struct nf_afinfo *ai;
 	struct rtable *rt = NULL;
 	u_int32_t mtu     = ~0U;
 
+	if (family == PF_INET)
+		fl.fl4_dst = ip_hdr(skb)->saddr;
+	else
+		fl.fl6_dst = ipv6_hdr(skb)->saddr;
+
 	rcu_read_lock();
-	ai = nf_get_afinfo(AF_INET);
+	ai = nf_get_afinfo(family);
 	if (ai != NULL)
 		ai->route((struct dst_entry **)&rt, &fl);
 	rcu_read_unlock();
@@ -178,7 +182,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph),
+	ret = tcpmss_mangle_packet(skb, targinfo,
+				   tcpmss_reverse_mtu(skb, PF_INET),
 				   iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
 	if (ret < 0)
@@ -193,28 +198,6 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph)
-{
-	struct flowi fl = {
-		.fl6_dst = iph->saddr,
-	};
-	const struct nf_afinfo *ai;
-	struct rtable *rt = NULL;
-	u_int32_t mtu     = ~0U;
-
-	rcu_read_lock();
-	ai = nf_get_afinfo(AF_INET6);
-	if (ai != NULL)
-		ai->route((struct dst_entry **)&rt, &fl);
-	rcu_read_unlock();
-
-	if (rt != NULL) {
-		mtu = dst_mtu(&rt->u.dst);
-		dst_release(&rt->u.dst);
-	}
-	return mtu;
-}
-
 static unsigned int
 tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
            const struct net_device *out, unsigned int hooknum,
@@ -229,7 +212,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
 	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0)
 		return NF_DROP;
-	ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h),
+	ret = tcpmss_mangle_packet(skb, targinfo,
+				   tcpmss_reverse_mtu(skb, PF_INET6),
 				   tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
 	if (ret < 0)
-- 
cgit v1.2.3-70-g09d2


From c71529e42ce39c167dc53430cb8f3d5634af77df Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Jul 2008 10:03:23 -0700
Subject: netfilter: nf_nat_sip: c= is optional for session

According to RFC2327, the connection information is optional
in the session description since it can be specified in the
media description instead.

My provider does exactly that and does not provide any connection
information in the session description.  As a result the new
kernel drops all invite responses.

This patch makes it optional as documented.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_sip.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 4334d5cabc5..14544320c54 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb,
 			     buffer, buflen);
 }
 
-static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
-				  unsigned int dataoff, unsigned int *datalen,
-				  enum sdp_header_types type,
-				  enum sdp_header_types term,
-				  char *buffer, int buflen)
+static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
+			     unsigned int dataoff, unsigned int *datalen,
+			     enum sdp_header_types type,
+			     enum sdp_header_types term,
+			     char *buffer, int buflen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
@@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
 
 	if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
 				  &matchoff, &matchlen) <= 0)
-		return 0;
+		return -ENOENT;
 	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
-			     buffer, buflen);
+			     buffer, buflen) ? 0 : -EINVAL;
 }
 
 static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
@@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip));
-	if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
-			       buffer, buflen))
+	if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
+			      buffer, buflen))
 		return 0;
 
 	return mangle_content_len(skb, dptr, datalen);
@@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
 
 	/* Mangle session description owner and contact addresses */
 	buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip));
-	if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
+	if (mangle_sdp_packet(skb, dptr, dataoff, datalen,
 			       SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
 			       buffer, buflen))
 		return 0;
 
-	if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
-			       SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
-			       buffer, buflen))
+	switch (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+				  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
+				  buffer, buflen)) {
+	case 0:
+	/*
+	 * RFC 2327:
+	 *
+	 * Session description
+	 *
+	 * c=* (connection information - not required if included in all media)
+	 */
+	case -ENOENT:
+		break;
+	default:
 		return 0;
+	}
 
 	return mangle_content_len(skb, dptr, datalen);
 }
-- 
cgit v1.2.3-70-g09d2


From 5547cd0ae8b46db9a084505239294eed9b8c8e2d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Jul 2008 10:03:49 -0700
Subject: netfilter: nf_conntrack_sctp: fix sparse warnings

Introduced by a258860e (netfilter: ctnetlink: add full support for SCTP to ctnetlink):

net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: incorrect type in argument 1 (different base types)
net/netfilter/nf_conntrack_proto_sctp.c:483:2:    expected unsigned int [unsigned] [usertype] x
net/netfilter/nf_conntrack_proto_sctp.c:483:2:    got restricted unsigned int const <noident>
net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:483:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: incorrect type in argument 1 (different base types)
net/netfilter/nf_conntrack_proto_sctp.c:487:2:    expected unsigned int [unsigned] [usertype] x
net/netfilter/nf_conntrack_proto_sctp.c:487:2:    got restricted unsigned int const <noident>
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:487:2: warning: cast from restricted type
net/netfilter/nf_conntrack_proto_sctp.c:532:42: warning: incorrect type in assignment (different base types)
net/netfilter/nf_conntrack_proto_sctp.c:532:42:    expected restricted unsigned int <noident>
net/netfilter/nf_conntrack_proto_sctp.c:532:42:    got unsigned int
net/netfilter/nf_conntrack_proto_sctp.c:534:39: warning: incorrect type in assignment (different base types)
net/netfilter/nf_conntrack_proto_sctp.c:534:39:    expected restricted unsigned int <noident>
net/netfilter/nf_conntrack_proto_sctp.c:534:39:    got unsigned int

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 41183a4d2d6..30aa5b94a77 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -482,11 +482,11 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 
 	NLA_PUT_BE32(skb,
 		     CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
-		     htonl(ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]));
+		     ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]);
 
 	NLA_PUT_BE32(skb,
 		     CTA_PROTOINFO_SCTP_VTAG_REPLY,
-		     htonl(ct->proto.sctp.vtag[IP_CT_DIR_REPLY]));
+		     ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
 
 	read_unlock_bh(&sctp_lock);
 
@@ -530,9 +530,9 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
 	write_lock_bh(&sctp_lock);
 	ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
 	ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
-		ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]));
+		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
 	ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-		ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]));
+		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
 	write_unlock_bh(&sctp_lock);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 847499ce71bdcc8fc542062df6ebed3e596608dd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jul 2008 13:21:35 -0700
Subject: ipv6: use timer pending

This fixes the bridge reference count problem and cleanups ipv6 FIB
timer management.  Don't use expires field, because it is not a proper
way to test, instead use timer_pending().

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4de2b9efcac..944095cf5e3 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -661,17 +661,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
-	if (net->ipv6.ip6_fib_timer->expires == 0 &&
+	if (!timer_pending(net->ipv6.ip6_fib_timer) &&
 	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
-		mod_timer(net->ipv6.ip6_fib_timer, jiffies +
-			  net->ipv6.sysctl.ip6_rt_gc_interval);
+		mod_timer(net->ipv6.ip6_fib_timer,
+			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 void fib6_force_start_gc(struct net *net)
 {
-	if (net->ipv6.ip6_fib_timer->expires == 0)
-		mod_timer(net->ipv6.ip6_fib_timer, jiffies +
-			  net->ipv6.sysctl.ip6_rt_gc_interval);
+	if (!timer_pending(net->ipv6.ip6_fib_timer))
+		mod_timer(net->ipv6.ip6_fib_timer,
+			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 7943986ca1138ac99597b1aa4dc893012dcfdc08 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jul 2008 13:28:44 -0700
Subject: net: use kcalloc in netdev_queue alloc

Minor nit, use size_t for allocation size and kcalloc to allocate
an array. Probably makes no actual code difference.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index cbc34c0db37..1698b399898 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4207,7 +4207,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 {
 	struct netdev_queue *tx;
 	struct net_device *dev;
-	int alloc_size;
+	size_t alloc_size;
 	void *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -4227,7 +4227,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
-	tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
 	if (!tx) {
 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
 		       "tx qdiscs.\n");
-- 
cgit v1.2.3-70-g09d2


From 6579e57b31d79d31d9b806e41ba48774e73257dc Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 21 Jul 2008 13:31:48 -0700
Subject: net: Print the module name as part of the watchdog message

As suggested by Dave:

This patch adds a function to get the driver name from a struct net_device,
and consequently uses this in the watchdog timeout handler to print as
part of the message.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 20 ++++++++++++++++++++
 net/sched/sch_generic.c   |  6 +++---
 3 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 812bcd8b436..f5ea445f89f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1645,6 +1645,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v);
 extern int netdev_class_create_file(struct class_attribute *class_attr);
 extern void netdev_class_remove_file(struct class_attribute *class_attr);
 
+extern char *netdev_drivername(struct net_device *dev, char *buffer, int len);
+
 extern void linkwatch_run_queue(void);
 
 extern int netdev_compute_features(unsigned long all, unsigned long one);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1698b399898..ad5598d2bb3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4686,6 +4686,26 @@ err_name:
 	return -ENOMEM;
 }
 
+char *netdev_drivername(struct net_device *dev, char *buffer, int len)
+{
+	struct device_driver *driver;
+	struct device *parent;
+
+	if (len <= 0 || !buffer)
+		return buffer;
+	buffer[0] = 0;
+
+	parent = dev->dev.parent;
+
+	if (!parent)
+		return buffer;
+
+	driver = parent->driver;
+	if (driver && driver->name)
+		strlcpy(buffer, driver->name, len);
+	return buffer;
+}
+
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb625b4d6da..4ac7e3a8c25 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -212,9 +212,9 @@ static void dev_watchdog(unsigned long arg)
 			if (some_queue_stopped &&
 			    time_after(jiffies, (dev->trans_start +
 						 dev->watchdog_timeo))) {
-				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
-				       "transmit timed out\n",
-				       dev->name);
+				char drivername[64];
+				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				       dev->name, netdev_drivername(dev, drivername, 64));
 				dev->tx_timeout(dev);
 				WARN_ON_ONCE(1);
 			}
-- 
cgit v1.2.3-70-g09d2


From 47112e25da41d9059626033986dc3353e101f815 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 21 Jul 2008 13:35:08 -0700
Subject: udplite: Protection against coverage value wrap-around

This patch clamps the cscov setsockopt values to a maximum of 0xFFFF.

Setsockopt values greater than 0xffff can cause an unwanted
wrap-around.  Further, IPv6 jumbograms are not supported (RFC 3838,
3.5), so that values greater than 0xffff are not even useful.

Further changes: fixed a typo in the documentation.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/udplite.txt | 2 +-
 net/ipv4/udp.c                       | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt
index 3870f280280..855d8da57a2 100644
--- a/Documentation/networking/udplite.txt
+++ b/Documentation/networking/udplite.txt
@@ -148,7 +148,7 @@
         getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...);
 
   is meaningless (as in TCP). Packets with a zero checksum field are
-  illegal (cf. RFC 3828, sec. 3.1) will be silently discarded.
+  illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded.
 
   4) Fragmentation
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a751770947a..383d17359d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1325,6 +1325,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
 			val = 8;
+		else if (val > USHORT_MAX)
+			val = USHORT_MAX;
 		up->pcslen = val;
 		up->pcflag |= UDPLITE_SEND_CC;
 		break;
@@ -1337,6 +1339,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
 			val = 8;
+		else if (val > USHORT_MAX)
+			val = USHORT_MAX;
 		up->pcrlen = val;
 		up->pcflag |= UDPLITE_RECV_CC;
 		break;
-- 
cgit v1.2.3-70-g09d2


From b32d13102d39ed411d152a7ffcc5f66d5b3b1b49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 21 Jul 2008 18:45:34 -0700
Subject: tcp: Fix bitmask test in tcp_syn_options()

As reported by Alexey Dobriyan:

	  CHECK   net/ipv4/tcp_output.c
	net/ipv4/tcp_output.c:475:7: warning: dubious: !x & y

And sparse is damn right!

	if (unlikely(!OPTION_TS & opts->options))
		    ^^^
		size += TCPOLEN_SACKPERM_ALIGNED;

OPTION_TS is (1 << 1), so condition will never trigger.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1fa683c0ba9..a00532de2a8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -472,7 +472,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	}
 	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
-		if (unlikely(!OPTION_TS & opts->options))
+		if (unlikely(!(OPTION_TS & opts->options)))
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 16be63fd1670000b96b76cb55b6f1bead21b4c4b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 27 May 2008 11:50:16 -0700
Subject: bluetooth: remove improper bluetooth class symlinks.

Don't create symlinks in a class to a device that is not owned by the
class.  If the bluetooth subsystem really wants to point to all of the
devices it controls, it needs to create real devices, not fake symlinks.

Cc: Maxim Krasnyansky <maxk@qualcomm.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/bluetooth/hci_sysfs.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 844ca5f1b2d..c85bf8f678d 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -398,10 +398,6 @@ int hci_register_sysfs(struct hci_dev *hdev)
 		if (device_create_file(dev, bt_attrs[i]) < 0)
 			BT_ERR("Failed to create device attribute");
 
-	if (sysfs_create_link(&bt_class->subsys.kobj,
-				&dev->kobj, kobject_name(&dev->kobj)) < 0)
-		BT_ERR("Failed to create class symlink");
-
 	return 0;
 }
 
@@ -409,9 +405,6 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 {
 	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
 
-	sysfs_remove_link(&bt_class->subsys.kobj,
-					kobject_name(&hdev->dev.kobj));
-
 	device_del(&hdev->dev);
 }
 
-- 
cgit v1.2.3-70-g09d2


From d29f749e252bcdbfe7a75a58f0ee92da16f127c0 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 22 Jul 2008 14:09:06 -0700
Subject: net: Fix build failure with 'make mandocs'.

The function header comments have to go with the functions
they are documenting, or things go horribly wrong when we
try to process them with the docbook tools.

Warning(include/linux/netdevice.h:1006): No description found for parameter 'dev_queue'
Warning(include/linux/netdevice.h:1033): No description found for parameter 'dev_queue'
Warning(include/linux/netdevice.h:1067): No description found for parameter 'dev_queue'
Warning(include/linux/netdevice.h:1093): No description found for parameter 'dev_queue'
Warning(include/linux/netdevice.h:1474): No description found for parameter 'txq'
Error(net/core/dev.c:1674): cannot understand prototype: 'u32 simple_tx_hashrnd; '

Signed-off-by: Dave Jones <davej@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 58 +++++++++++++++++++++++------------------------
 net/core/dev.c            | 51 ++++++++++++++++++++---------------------
 2 files changed, 54 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f5ea445f89f..b4d056ceab9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -996,17 +996,17 @@ static inline void netif_tx_schedule_all(struct net_device *dev)
 		netif_schedule_queue(netdev_get_tx_queue(dev, i));
 }
 
+static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
+{
+	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 /**
  *	netif_start_queue - allow transmit
  *	@dev: network device
  *
  *	Allow upper layers to call the device hard_start_xmit routine.
  */
-static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
-{
-	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
-}
-
 static inline void netif_start_queue(struct net_device *dev)
 {
 	netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
@@ -1022,13 +1022,6 @@ static inline void netif_tx_start_all_queues(struct net_device *dev)
 	}
 }
 
-/**
- *	netif_wake_queue - restart transmit
- *	@dev: network device
- *
- *	Allow upper layers to call the device hard_start_xmit routine.
- *	Used for flow control when transmit resources are available.
- */
 static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 {
 #ifdef CONFIG_NETPOLL_TRAP
@@ -1041,6 +1034,13 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 		__netif_schedule(dev_queue->qdisc);
 }
 
+/**
+ *	netif_wake_queue - restart transmit
+ *	@dev: network device
+ *
+ *	Allow upper layers to call the device hard_start_xmit routine.
+ *	Used for flow control when transmit resources are available.
+ */
 static inline void netif_wake_queue(struct net_device *dev)
 {
 	netif_tx_wake_queue(netdev_get_tx_queue(dev, 0));
@@ -1056,6 +1056,11 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
 	}
 }
 
+static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
+{
+	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 /**
  *	netif_stop_queue - stop transmitted packets
  *	@dev: network device
@@ -1063,11 +1068,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
  *	Stop upper layers calling the device hard_start_xmit routine.
  *	Used for flow control when transmit resources are unavailable.
  */
-static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
-{
-	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
-}
-
 static inline void netif_stop_queue(struct net_device *dev)
 {
 	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
@@ -1083,17 +1083,17 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev)
 	}
 }
 
+static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
+{
+	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+}
+
 /**
  *	netif_queue_stopped - test if transmit queue is flowblocked
  *	@dev: network device
  *
  *	Test if transmit queue on device is currently unable to send.
  */
-static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
-{
-	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
-}
-
 static inline int netif_queue_stopped(const struct net_device *dev)
 {
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
@@ -1463,13 +1463,6 @@ static inline void netif_rx_complete(struct net_device *dev,
 	local_irq_restore(flags);
 }
 
-/**
- *	netif_tx_lock - grab network device transmit lock
- *	@dev: network device
- *	@cpu: cpu number of lock owner
- *
- * Get network device transmit lock
- */
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
@@ -1482,6 +1475,13 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 	txq->xmit_lock_owner = smp_processor_id();
 }
 
+/**
+ *	netif_tx_lock - grab network device transmit lock
+ *	@dev: network device
+ *	@cpu: cpu number of lock owner
+ *
+ * Get network device transmit lock
+ */
 static inline void netif_tx_lock(struct net_device *dev)
 {
 	int cpu = smp_processor_id();
diff --git a/net/core/dev.c b/net/core/dev.c
index ad5598d2bb3..65eea83613e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1645,32 +1645,6 @@ out_kfree_skb:
 	return 0;
 }
 
-/**
- *	dev_queue_xmit - transmit a buffer
- *	@skb: buffer to transmit
- *
- *	Queue a buffer for transmission to a network device. The caller must
- *	have set the device and priority and built the buffer before calling
- *	this function. The function can be called from an interrupt.
- *
- *	A negative errno code is returned on a failure. A success does not
- *	guarantee the frame will be transmitted as it may be dropped due
- *	to congestion or traffic shaping.
- *
- * -----------------------------------------------------------------------------------
- *      I notice this method can also return errors from the queue disciplines,
- *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
- *      be positive.
- *
- *      Regardless of the return value, the skb is consumed, so it is currently
- *      difficult to retry a send to this method.  (You can bump the ref count
- *      before sending to hold a reference for retry if you are careful.)
- *
- *      When calling this method, interrupts MUST be enabled.  This is because
- *      the BH enable code must have IRQs enabled so that it will not deadlock.
- *          --BLG
- */
-
 static u32 simple_tx_hashrnd;
 static int simple_tx_hashrnd_initialized = 0;
 
@@ -1738,6 +1712,31 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+/**
+ *	dev_queue_xmit - transmit a buffer
+ *	@skb: buffer to transmit
+ *
+ *	Queue a buffer for transmission to a network device. The caller must
+ *	have set the device and priority and built the buffer before calling
+ *	this function. The function can be called from an interrupt.
+ *
+ *	A negative errno code is returned on a failure. A success does not
+ *	guarantee the frame will be transmitted as it may be dropped due
+ *	to congestion or traffic shaping.
+ *
+ * -----------------------------------------------------------------------------------
+ *      I notice this method can also return errors from the queue disciplines,
+ *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
+ *      be positive.
+ *
+ *      Regardless of the return value, the skb is consumed, so it is currently
+ *      difficult to retry a send to this method.  (You can bump the ref count
+ *      before sending to hold a reference for retry if you are careful.)
+ *
+ *      When calling this method, interrupts MUST be enabled.  This is because
+ *      the BH enable code must have IRQs enabled so that it will not deadlock.
+ *          --BLG
+ */
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-- 
cgit v1.2.3-70-g09d2


From cf508b1211dbe576778ff445ea1b4b0bcfa5c4ea Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 22 Jul 2008 14:16:42 -0700
Subject: netdev: Handle ->addr_list_lock just like ->_xmit_lock for lockdep.

The new address list lock needs to handle the same device layering
issues that the _xmit_lock one does.

This integrates work done by Patrick McHardy.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c         |  3 +++
 drivers/net/hamradio/bpqether.c         |  2 ++
 drivers/net/macvlan.c                   |  3 +++
 drivers/net/wireless/hostap/hostap_hw.c |  3 +++
 net/8021q/vlan_dev.c                    |  4 ++++
 net/core/dev.c                          | 27 +++++++++++++++++++++------
 net/netrom/af_netrom.c                  |  2 ++
 net/rose/af_rose.c                      |  2 ++
 8 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 9737c06045d..a641eeaa2a2 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5041,6 +5041,7 @@ static int bond_check_params(struct bond_params *params)
 }
 
 static struct lock_class_key bonding_netdev_xmit_lock_key;
+static struct lock_class_key bonding_netdev_addr_lock_key;
 
 static void bond_set_lockdep_class_one(struct net_device *dev,
 				       struct netdev_queue *txq,
@@ -5052,6 +5053,8 @@ static void bond_set_lockdep_class_one(struct net_device *dev,
 
 static void bond_set_lockdep_class(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock,
+			  &bonding_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
 }
 
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index b6500b2aacf..58f4b1d7bf1 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -123,6 +123,7 @@ static LIST_HEAD(bpq_devices);
  * off into a separate class since they always nest.
  */
 static struct lock_class_key bpq_netdev_xmit_lock_key;
+static struct lock_class_key bpq_netdev_addr_lock_key;
 
 static void bpq_set_lockdep_class_one(struct net_device *dev,
 				      struct netdev_queue *txq,
@@ -133,6 +134,7 @@ static void bpq_set_lockdep_class_one(struct net_device *dev,
 
 static void bpq_set_lockdep_class(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL);
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index efbc15567dd..42394505bb5 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -276,6 +276,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
  * separate class since they always nest.
  */
 static struct lock_class_key macvlan_netdev_xmit_lock_key;
+static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
@@ -295,6 +296,8 @@ static void macvlan_set_lockdep_class_one(struct net_device *dev,
 
 static void macvlan_set_lockdep_class(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock,
+			  &macvlan_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL);
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 13d5882f1f2..3153fe9d7ce 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3101,6 +3101,7 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
  * This is a natural nesting, which needs a split lock type.
  */
 static struct lock_class_key hostap_netdev_xmit_lock_key;
+static struct lock_class_key hostap_netdev_addr_lock_key;
 
 static void prism2_set_lockdep_class_one(struct net_device *dev,
 					 struct netdev_queue *txq,
@@ -3112,6 +3113,8 @@ static void prism2_set_lockdep_class_one(struct net_device *dev,
 
 static void prism2_set_lockdep_class(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock,
+			  &hostap_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL);
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f42bc2b26b8..4bf014e51f8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -569,6 +569,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
  * separate class since they always nest.
  */
 static struct lock_class_key vlan_netdev_xmit_lock_key;
+static struct lock_class_key vlan_netdev_addr_lock_key;
 
 static void vlan_dev_set_lockdep_one(struct net_device *dev,
 				     struct netdev_queue *txq,
@@ -581,6 +582,9 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev,
 
 static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
 {
+	lockdep_set_class_and_subclass(&dev->addr_list_lock,
+				       &vlan_netdev_addr_lock_key,
+				       subclass);
 	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 65eea83613e..6bf217da9d8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -261,7 +261,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 
 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
@@ -301,6 +301,7 @@ static const char *netdev_lock_name[] =
 	 "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 
 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 {
@@ -313,8 +314,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 	return ARRAY_SIZE(netdev_lock_type) - 1;
 }
 
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-					    unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
 {
 	int i;
 
@@ -322,9 +323,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock,
 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 				   netdev_lock_name[i]);
 }
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+	int i;
+
+	i = netdev_lock_pos(dev->type);
+	lockdep_set_class_and_name(&dev->addr_list_lock,
+				   &netdev_addr_lock_key[i],
+				   netdev_lock_name[i]);
+}
 #else
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-					    unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
+{
+}
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 }
 #endif
@@ -3851,7 +3865,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
 					  void *_unused)
 {
 	spin_lock_init(&dev_queue->_xmit_lock);
-	netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
 	dev_queue->xmit_lock_owner = -1;
 }
 
@@ -3896,6 +3910,7 @@ int register_netdevice(struct net_device *dev)
 	net = dev_net(dev);
 
 	spin_lock_init(&dev->addr_list_lock);
+	netdev_set_addr_lockdep_class(dev);
 	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fccc250f95f..532e4faa29f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -73,6 +73,7 @@ static const struct proto_ops nr_proto_ops;
  * separate class since they always nest.
  */
 static struct lock_class_key nr_netdev_xmit_lock_key;
+static struct lock_class_key nr_netdev_addr_lock_key;
 
 static void nr_set_lockdep_one(struct net_device *dev,
 			       struct netdev_queue *txq,
@@ -83,6 +84,7 @@ static void nr_set_lockdep_one(struct net_device *dev,
 
 static void nr_set_lockdep_key(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
 }
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index dbc963b4f5f..a7f1ce11bc2 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -74,6 +74,7 @@ ax25_address rose_callsign;
  * separate class since they always nest.
  */
 static struct lock_class_key rose_netdev_xmit_lock_key;
+static struct lock_class_key rose_netdev_addr_lock_key;
 
 static void rose_set_lockdep_one(struct net_device *dev,
 				 struct netdev_queue *txq,
@@ -84,6 +85,7 @@ static void rose_set_lockdep_one(struct net_device *dev,
 
 static void rose_set_lockdep_key(struct net_device *dev)
 {
+	lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
 	netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
 }
 
-- 
cgit v1.2.3-70-g09d2


From a94f779f9d82eb2d758a8715eaae5df98e8dcb21 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 22 Jul 2008 14:20:11 -0700
Subject: pkt_sched: make qdisc_class_hash_alloc() static

This patch makes the needlessly global qdisc_class_hash_alloc() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5219d5f9d75..b0601642e22 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -447,7 +447,7 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
-struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
+static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 {
 	unsigned int size = n * sizeof(struct hlist_head), i;
 	struct hlist_head *h;
-- 
cgit v1.2.3-70-g09d2


From abd0b198ea699578c3c3476d646c91842e19dbd2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 22 Jul 2008 14:20:45 -0700
Subject: sctp: make sctp_outq_flush() static

sctp_outq_flush() can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 -
 net/sctp/outqueue.c        | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 70eb64a7e1a..535a18f57a1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1161,7 +1161,6 @@ void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
 void sctp_outq_teardown(struct sctp_outq *);
 void sctp_outq_free(struct sctp_outq*);
 int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk);
-int sctp_outq_flush(struct sctp_outq *, int);
 int sctp_outq_sack(struct sctp_outq *, struct sctp_sackhdr *);
 int sctp_outq_is_empty(const struct sctp_outq *);
 void sctp_outq_restart(struct sctp_outq *);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 70ead8dc348..4328ad5439c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -71,6 +71,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
 
 static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
 
+static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout);
+
 /* Add data to the front of the queue. */
 static inline void sctp_outq_head_data(struct sctp_outq *q,
 					struct sctp_chunk *ch)
@@ -712,7 +714,7 @@ int sctp_outq_uncork(struct sctp_outq *q)
  * locking concerns must be made.  Today we use the sock lock to protect
  * this function.
  */
-int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
+static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 {
 	struct sctp_packet *packet;
 	struct sctp_packet singleton;
-- 
cgit v1.2.3-70-g09d2


From 4d6971e909e904be60218739fc961188471fc4f4 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 22 Jul 2008 14:21:30 -0700
Subject: sctp: remove sctp_assoc_proc_exit()

Commit 20c2c1fd6c842caf70dcb1d94b9d58861949fd3d
(sctp: add sctp/remaddr table to complete RFC remote address table OID)
added an unused sctp_assoc_proc_exit() function that seems to have been
unintentionally created when copying the assocs code.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/proc.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 5dd89831ece..f268910620b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -519,8 +519,3 @@ int __init sctp_remaddr_proc_init(void)
 
 	return 0;
 }
-
-void sctp_assoc_proc_exit(void)
-{
-	remove_proc_entry("remaddr", proc_net_sctp);
-}
-- 
cgit v1.2.3-70-g09d2


From 888c848ed34bd5f8cb56567624c0d951ab35174e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 22 Jul 2008 14:21:58 -0700
Subject: ipv6: make struct ipv6_devconf static

struct ipv6_devconf can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 2 --
 net/ipv6/addrconf.c    | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index db66c792774..c8effa4b1fe 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -193,8 +193,6 @@ struct inet6_dev
 	struct rcu_head		rcu;
 };
 
-extern struct ipv6_devconf ipv6_devconf;
-
 static inline void ipv6_eth_mc_map(struct in6_addr *addr, char *buf)
 {
 	/*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9f4fcce6379..74d543d504a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -153,7 +153,7 @@ static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
-struct ipv6_devconf ipv6_devconf __read_mostly = {
+static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
-- 
cgit v1.2.3-70-g09d2


From 417f28bb340725544c36b35465444d2fd57232b8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 14:33:45 -0700
Subject: netns: dont alloc ipv6 fib timer list

FIB timer list is a trivial size structure, avoid indirection and just
put it in existing ns.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  2 +-
 net/ipv6/ip6_fib.c       | 40 +++++++++++++---------------------------
 2 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5bacd838e88..2932721180c 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -39,7 +39,7 @@ struct netns_ipv6 {
 #endif
 	struct rt6_info         *ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
-	struct timer_list       *ip6_fib_timer;
+	struct timer_list       ip6_fib_timer;
 	struct hlist_head       *fib_table_hash;
 	struct fib6_table       *fib6_main_tbl;
 	struct dst_ops		*ip6_dst_ops;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 944095cf5e3..e0922975c41 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -661,16 +661,16 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
-	if (!timer_pending(net->ipv6.ip6_fib_timer) &&
+	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
 	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
-		mod_timer(net->ipv6.ip6_fib_timer,
+		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
 void fib6_force_start_gc(struct net *net)
 {
-	if (!timer_pending(net->ipv6.ip6_fib_timer))
-		mod_timer(net->ipv6.ip6_fib_timer,
+	if (!timer_pending(&net->ipv6.ip6_fib_timer))
+		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
@@ -1449,7 +1449,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 	} else {
 		local_bh_disable();
 		if (!spin_trylock(&fib6_gc_lock)) {
-			mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ);
+			mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
 			local_bh_enable();
 			return;
 		}
@@ -1462,12 +1462,10 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 	fib6_clean_all(net, fib6_age, 0, NULL);
 
 	if (gc_args.more)
-		mod_timer(net->ipv6.ip6_fib_timer, jiffies +
+		mod_timer(&net->ipv6.ip6_fib_timer, jiffies +
 			  net->ipv6.sysctl.ip6_rt_gc_interval);
-	else {
-		del_timer(net->ipv6.ip6_fib_timer);
-		net->ipv6.ip6_fib_timer->expires = 0;
-	}
+	else
+		del_timer(&net->ipv6.ip6_fib_timer);
 	spin_unlock_bh(&fib6_gc_lock);
 }
 
@@ -1478,16 +1476,7 @@ static void fib6_gc_timer_cb(unsigned long arg)
 
 static int fib6_net_init(struct net *net)
 {
-	int ret;
-	struct timer_list *timer;
-
-	ret = -ENOMEM;
-	timer = kzalloc(sizeof(*timer), GFP_KERNEL);
-	if (!timer)
-		goto out;
-
-	setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net);
-	net->ipv6.ip6_fib_timer = timer;
+	setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
 
 	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
 	if (!net->ipv6.rt6_stats)
@@ -1521,9 +1510,7 @@ static int fib6_net_init(struct net *net)
 #endif
 	fib6_tables_init(net);
 
-	ret = 0;
-out:
-	return ret;
+	return 0;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 out_fib6_main_tbl:
@@ -1534,15 +1521,14 @@ out_fib_table_hash:
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
 out_timer:
-	kfree(timer);
-	goto out;
+	return -ENOMEM;
  }
 
 static void fib6_net_exit(struct net *net)
 {
 	rt6_ifdown(net, NULL);
-	del_timer_sync(net->ipv6.ip6_fib_timer);
-	kfree(net->ipv6.ip6_fib_timer);
+	del_timer_sync(&net->ipv6.ip6_fib_timer);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	kfree(net->ipv6.fib6_local_tbl);
 #endif
-- 
cgit v1.2.3-70-g09d2


From c8a4522245e9931a53a98d5160bb4c00d3f73921 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 14:34:09 -0700
Subject: ipv6: use round_jiffies

This timer normally happens once a minute, there is no need to cause an
early wakeup for it, so align it to next second boundary to safe power.
It can't be deferred because then it could take too long on cleanup or DoS.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e0922975c41..03e23d058ec 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1462,8 +1462,9 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 	fib6_clean_all(net, fib6_age, 0, NULL);
 
 	if (gc_args.more)
-		mod_timer(&net->ipv6.ip6_fib_timer, jiffies +
-			  net->ipv6.sysctl.ip6_rt_gc_interval);
+		mod_timer(&net->ipv6.ip6_fib_timer,
+			  round_jiffies(jiffies
+					+ net->ipv6.sysctl.ip6_rt_gc_interval));
 	else
 		del_timer(&net->ipv6.ip6_fib_timer);
 	spin_unlock_bh(&fib6_gc_lock);
-- 
cgit v1.2.3-70-g09d2


From a76d7345a3f92bb8352f200e7b2e380dddcd7e36 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 14:34:35 -0700
Subject: ipv6: use spin_trylock_bh

Now there is spin_trylock_bh, use it rather than open coding.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 03e23d058ec..0ec7f2b636f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1447,10 +1447,8 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 		gc_args.timeout = expires ? (int)expires :
 			net->ipv6.sysctl.ip6_rt_gc_interval;
 	} else {
-		local_bh_disable();
-		if (!spin_trylock(&fib6_gc_lock)) {
+		if (!spin_trylock_bh(&fib6_gc_lock)) {
 			mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
-			local_bh_enable();
 			return;
 		}
 		gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
-- 
cgit v1.2.3-70-g09d2


From 75307c0fe7fcb3b52a92fe32384fc33f50622654 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 14:35:07 -0700
Subject: ipv6: use kcalloc

Th fib_table_hash is an array, so use kcalloc.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0ec7f2b636f..c72fd2461ca 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1481,9 +1481,9 @@ static int fib6_net_init(struct net *net)
 	if (!net->ipv6.rt6_stats)
 		goto out_timer;
 
-	net->ipv6.fib_table_hash =
-		kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ,
-			GFP_KERNEL);
+	net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ,
+					   sizeof(*net->ipv6.fib_table_hash),
+					   GFP_KERNEL);
 	if (!net->ipv6.fib_table_hash)
 		goto out_rt6_stats;
 
-- 
cgit v1.2.3-70-g09d2


From 3d0f24a74e7957593a5622eb5c04ed6860dd8391 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 22 Jul 2008 14:35:50 -0700
Subject: ipv6: icmp6_dst_gc return change

Change icmp6_dst_gc to return the one value the caller cares about rather
than using call by reference.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  2 +-
 net/ipv6/ip6_fib.c      |  3 +--
 net/ipv6/route.c        | 10 ++++------
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9313491e3da..2f8b3c06a10 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -68,7 +68,7 @@ extern struct rt6_info		*rt6_lookup(struct net *net,
 extern struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 					 struct neighbour *neigh,
 					 const struct in6_addr *addr);
-extern int icmp6_dst_gc(int *more);
+extern int icmp6_dst_gc(void);
 
 extern void fib6_force_start_gc(struct net *net);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c72fd2461ca..08ea2de28d6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1453,9 +1453,8 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 		}
 		gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
 	}
-	gc_args.more = 0;
 
-	icmp6_dst_gc(&gc_args.more);
+	gc_args.more = icmp6_dst_gc();
 
 	fib6_clean_all(net, fib6_age, 0, NULL);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 615b328de25..86540b24b27 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -978,13 +978,12 @@ out:
 	return &rt->u.dst;
 }
 
-int icmp6_dst_gc(int *more)
+int icmp6_dst_gc(void)
 {
 	struct dst_entry *dst, *next, **pprev;
-	int freed;
+	int more = 0;
 
 	next = NULL;
-	freed = 0;
 
 	spin_lock_bh(&icmp6_dst_lock);
 	pprev = &icmp6_dst_gc_list;
@@ -993,16 +992,15 @@ int icmp6_dst_gc(int *more)
 		if (!atomic_read(&dst->__refcnt)) {
 			*pprev = dst->next;
 			dst_free(dst);
-			freed++;
 		} else {
 			pprev = &dst->next;
-			(*more)++;
+			++more;
 		}
 	}
 
 	spin_unlock_bh(&icmp6_dst_lock);
 
-	return freed;
+	return more;
 }
 
 static int ip6_dst_gc(struct dst_ops *ops)
-- 
cgit v1.2.3-70-g09d2


From 16a37acaaf4aaa631ba3f83710ed6cdb1a597520 Mon Sep 17 00:00:00 2001
From: Maciej Sosnowski <maciej.sosnowski@intel.com>
Date: Tue, 22 Jul 2008 17:30:57 -0700
Subject: I/OAT: tcp_dma_copybreak default value dependent on I/OAT version

I/OAT DMA performance tuning showed different optimal values of
tcp_dma_copybreak for different I/OAT versions (4096 for 1.2 and 2048
for 2.0).  This patch lets ioatdma driver set tcp_dma_copybreak value
according to these results.

[dan.j.williams@intel.com: remove some ifdefs]
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat_dma.c |  2 ++
 drivers/dma/ioatdma.h  | 15 +++++++++++++++
 net/core/user_dma.c    |  1 +
 3 files changed, 18 insertions(+)

(limited to 'net')

diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index da572968a7d..ece5a0e3a33 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -1581,6 +1581,8 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_self_test;
 
+	ioat_set_tcp_copy_break(device);
+
 	dma_async_device_register(&device->common);
 
 	INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index c6ec933f989..685adb62aa5 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -27,6 +27,7 @@
 #include <linux/dmapool.h>
 #include <linux/cache.h>
 #include <linux/pci_ids.h>
+#include <net/tcp.h>
 
 #define IOAT_DMA_VERSION  "2.18"
 
@@ -129,6 +130,20 @@ struct ioat_desc_sw {
 	struct dma_async_tx_descriptor async_tx;
 };
 
+static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
+{
+	#ifdef CONFIG_NET_DMA
+	switch (dev->version) {
+	case IOAT_VER_1_2:
+		sysctl_tcp_dma_copybreak = 4096;
+		break;
+	case IOAT_VER_2_0:
+		sysctl_tcp_dma_copybreak = 2048;
+		break;
+	}
+	#endif
+}
+
 #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
 struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
 				      void __iomem *iobase);
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 0ad1cd57bc3..de760504f6f 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -34,6 +34,7 @@
 #define NET_DMA_DEFAULT_COPYBREAK 4096
 
 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
 
 /**
  *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
-- 
cgit v1.2.3-70-g09d2


From a24e202e3ffdbd1da45ceab8e60824720c6ab7fd Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Tue, 1 Jul 2008 20:18:49 +0200
Subject: Remove dead code from wanmain.c, CONFIG_WANPIPE_MULTPPP doesn't exist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
---
 net/wanrouter/wanmain.c | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'net')

diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index b210a88d096..de4cc975b46 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -567,9 +567,6 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
 {
 	wanif_conf_t *cnf;
 	struct net_device *dev = NULL;
-#ifdef CONFIG_WANPIPE_MULTPPP
-	struct ppp_device *pppdev=NULL;
-#endif
 	int err;
 
 	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
@@ -588,25 +585,10 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
 		goto out;
 
 	if (cnf->config_id == WANCONFIG_MPPP) {
-#ifdef CONFIG_WANPIPE_MULTPPP
-		pppdev = kzalloc(sizeof(struct ppp_device), GFP_KERNEL);
-		err = -ENOBUFS;
-		if (pppdev == NULL)
-			goto out;
-		pppdev->dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
-		if (pppdev->dev == NULL) {
-			kfree(pppdev);
-			err = -ENOBUFS;
-			goto out;
-		}
-		err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf);
-		dev = pppdev->dev;
-#else
 		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
 				wandev->name);
 		err = -EPROTONOSUPPORT;
 		goto out;
-#endif
 	} else {
 		dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
 		err = -ENOBUFS;
@@ -661,17 +643,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
 	kfree(dev->priv);
 	dev->priv = NULL;
 
-#ifdef CONFIG_WANPIPE_MULTPPP
-	if (cnf->config_id == WANCONFIG_MPPP)
-		kfree(pppdev);
-	else
-		kfree(dev);
-#else
 	/* Sync PPP is disabled */
 	if (cnf->config_id != WANCONFIG_MPPP)
 		kfree(dev);
-#endif
-
 out:
 	kfree(cnf);
 	return err;
-- 
cgit v1.2.3-70-g09d2


From a8817d2f6d59b0caeacf6071f56a83164b474a32 Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Tue, 1 Jul 2008 20:24:10 +0200
Subject: wanmain.c doesn't need syncppp.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
---
 net/wanrouter/wanmain.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index de4cc975b46..7f07152bc10 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -57,7 +57,6 @@
 #include <linux/vmalloc.h>	/* vmalloc, vfree */
 #include <asm/uaccess.h>        /* copy_to/from_user */
 #include <linux/init.h>         /* __initfunc et al. */
-#include <net/syncppp.h>
 
 #define KMEM_SAFETYZONE 8
 
-- 
cgit v1.2.3-70-g09d2


From 5b3ab1dbd401b36ba2f9bfee2d2dae252fd62cd8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Jul 2008 14:01:29 -0700
Subject: netdev: Remove warning from __netif_schedule().

It isn't helping anything and we aren't going to be able to change all
the drivers that do queue wakeups in strange situations.

Just letting a noop_qdisc get scheduled will work because when
qdisc_run() executes via net_tx_work() it will simply find no packets
pending when it makes the ->dequeue() call in qdisc_restart.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6bf217da9d8..ccf97f9f37e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1341,9 +1341,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 void __netif_schedule(struct Qdisc *q)
 {
-	if (WARN_ON_ONCE(q == &noop_qdisc))
-		return;
-
 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
 		struct softnet_data *sd;
 		unsigned long flags;
-- 
cgit v1.2.3-70-g09d2


From b4942af65028c5eb516fdd9053020ccb2ee186ce Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Wed, 23 Jul 2008 14:06:04 -0700
Subject: net: Update entry in af_family_clock_key_strings

In the merge phase of the CAN subsystem the
af_family_clock_key_strings[] have been added to sock.c in commit
443aef0eddfa44c158d1b94ebb431a70638fcab4
(lockdep: fixup sk_callback_lock annotation). This trivial patch adds
the missing name for address family 29 (AF_CAN).

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 10a64d57078..91f8bbc9352 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-29"          ,
+  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_MAX"
 };
-- 
cgit v1.2.3-70-g09d2


From 4b53fb67e385b856a991d402096379dab462170a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 23 Jul 2008 16:38:45 -0700
Subject: tcp: Clear probes_out more aggressively in tcp_ack().

This is based upon an excellent bug report from Eric Dumazet.

tcp_ack() should clear ->icsk_probes_out even if there are packets
outstanding.  Otherwise if we get a sequence of ACKs while we do have
packets outstanding over and over again, we'll never clear the
probes_out value and eventually think the connection is too sick and
we'll reset it.

This appears to be some "optimization" added to tcp_ack() in the 2.4.x
timeframe.  In 2.2.x, probes_out is pretty much always cleared by
tcp_ack().

Here is Eric's original report:

----------------------------------------
Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost.

In order to reproduce the problem, please try the following program on linux-2.6.25.*

Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000

iptables -N SLOWLO
iptables -A SLOWLO -m hashlimit --hashlimit 2 --hashlimit-burst 1 --hashlimit-mode dstip --hashlimit-name slow2 -j ACCEPT
iptables -A SLOWLO -j DROP

iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO

Then run the attached program and see the output :

# ./loop
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,1)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,3)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,5)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,7)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,9)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,200ms,11)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,201ms,13)
State      Recv-Q Send-Q                                  Local Address:Port                                    Peer Address:Port
ESTAB      0      40                                          127.0.0.1:54455                                      127.0.0.1:12000  timer:(persist,188ms,15)
write(): Connection timed out
wrote 890 bytes but was interrupted after 9 seconds
ESTAB      0      0                 127.0.0.1:12000            127.0.0.1:54455
Exiting read() because no data available (4000 ms timeout).
read 860 bytes

While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15)

tcpdump :

15:30:28.856695 IP 127.0.0.1.56554 > 127.0.0.1.12000: S 33788768:33788768(0) win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
15:30:28.856711 IP 127.0.0.1.12000 > 127.0.0.1.56554: S 33899253:33899253(0) ack 33788769 win 32792 <mss 16396,nop,nop,sackOK,nop,wscale 7>
15:30:29.356947 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 1:61(60) ack 1 win 257
15:30:29.356966 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 61 win 257
15:30:29.866415 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 61:111(50) ack 1 win 257
15:30:29.866427 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 111 win 257
15:30:30.366516 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 111:161(50) ack 1 win 257
15:30:30.366527 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 161 win 257
15:30:30.876196 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 161:211(50) ack 1 win 257
15:30:30.876207 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 211 win 257
15:30:31.376282 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 211:261(50) ack 1 win 257
15:30:31.376290 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 261 win 257
15:30:31.885619 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 261:311(50) ack 1 win 257
15:30:31.885631 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 311 win 257
15:30:32.385705 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 311:361(50) ack 1 win 257
15:30:32.385715 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 361 win 257
15:30:32.895249 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 361:411(50) ack 1 win 257
15:30:32.895266 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 411 win 257
15:30:33.395341 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 411:461(50) ack 1 win 257
15:30:33.395351 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 461 win 257
15:30:33.918085 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 461:511(50) ack 1 win 257
15:30:33.918096 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 511 win 257
15:30:34.418163 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 511:561(50) ack 1 win 257
15:30:34.418172 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 561 win 257
15:30:34.927685 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 561:611(50) ack 1 win 257
15:30:34.927698 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 611 win 257
15:30:35.427757 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 611:661(50) ack 1 win 257
15:30:35.427766 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 661 win 257
15:30:35.937359 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 661:711(50) ack 1 win 257
15:30:35.937376 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 711 win 257
15:30:36.437451 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 711:761(50) ack 1 win 257
15:30:36.437464 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 761 win 257
15:30:36.947022 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 761:811(50) ack 1 win 257
15:30:36.947039 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 811 win 257
15:30:37.447135 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 811:861(50) ack 1 win 257
15:30:37.447203 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 861 win 257
15:30:41.448171 IP 127.0.0.1.12000 > 127.0.0.1.56554: F 1:1(0) ack 861 win 257
15:30:41.448189 IP 127.0.0.1.56554 > 127.0.0.1.12000: R 33789629:33789629(0) win 0

Source of program :

/*
 * small producer/consumer program.
 * setup a listener on 127.0.0.1:12000
 * Forks a child
 *   child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms
 * Father accepts connection, and read all data
 */
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdio.h>
#include <time.h>
#include <sys/poll.h>

int port = 12000;
char buffer[4096];
int main(int argc, char *argv[])
{
        int lfd = socket(AF_INET, SOCK_STREAM, 0);
        struct sockaddr_in socket_address;
        time_t t0, t1;
        int on = 1, sfd, res;
        unsigned long total = 0;
        socklen_t alen = sizeof(socket_address);
        pid_t pid;

        time(&t0);
        socket_address.sin_family = AF_INET;
        socket_address.sin_port = htons(port);
        socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

        if (lfd == -1) {
                perror("socket()");
                return 1;
        }
        setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int));
        if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                perror("bind");
                close(lfd);
                return 1;
        }
        if (listen(lfd, 1) == -1) {
                perror("listen()");
                close(lfd);
                return 1;
        }
        pid = fork();
        if (pid == 0) {
                int i, cfd = socket(AF_INET, SOCK_STREAM, 0);
                close(lfd);
                if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) {
                        perror("connect()");
                        return 1;
                        }
                for (i = 0 ; ;) {
                        res = write(cfd, "blablabla\n", 10);
                        if (res > 0) total += res;
                        else if (res == -1) {
                                perror("write()");
                                break;
                        } else break;
                        usleep(100000);
                        if (++i == 10) {
                                system("ss -on dst 127.0.0.1:12000");
                                i = 0;
                        }
                }
                time(&t1);
                fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0));
                system("ss -on | grep 127.0.0.1:12000");
                close(cfd);
                return 0;
        }
        sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen);
        if (sfd == -1) {
                perror("accept");
                return 1;
        }
        close(lfd);
        while (1) {
                struct pollfd pfd[1];
                pfd[0].fd = sfd;
                pfd[0].events = POLLIN;
                if (poll(pfd, 1, 4000) == 0) {
                        fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n");
                        break;
                }
                res = read(sfd, buffer, sizeof(buffer));
                if (res > 0) total += res;
                else if (res == 0) break;
                else perror("read()");
        }
        fprintf(stderr, "read %lu bytes\n", total);
        close(sfd);
        return 0;
}
----------------------------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1f5e6049883..75efd244f2a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3292,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	 * log. Something worked...
 	 */
 	sk->sk_err_soft = 0;
+	icsk->icsk_probes_out = 0;
 	tp->rcv_tstamp = tcp_time_stamp;
 	prior_packets = tp->packets_out;
 	if (!prior_packets)
@@ -3324,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	return 1;
 
 no_queue:
-	icsk->icsk_probes_out = 0;
-
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
-- 
cgit v1.2.3-70-g09d2


From 70eed75d76635ba7350651b9bd96529a306ec67a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 23 Jul 2008 16:42:42 -0700
Subject: netfilter: make security table depend on NETFILTER_ADVANCED

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 2 +-
 net/ipv6/netfilter/Kconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f23e60c93ef..90eb7cb47e7 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -369,7 +369,7 @@ config IP_NF_SECURITY
 	tristate "Security table"
 	depends on IP_NF_IPTABLES
 	depends on SECURITY
-	default m if NETFILTER_ADVANCED=n
+	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `security' table to iptables, for use
 	  with Mandatory Access Control (MAC) policy.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 689dec899c5..0cfcce7b18d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -213,7 +213,7 @@ config IP6_NF_SECURITY
        tristate "Security table"
        depends on IP6_NF_IPTABLES
        depends on SECURITY
-       default m if NETFILTER_ADVANCED=n
+       depends on NETFILTER_ADVANCED
        help
          This option adds a `security' table to iptables, for use
          with Mandatory Access Control (MAC) policy.
-- 
cgit v1.2.3-70-g09d2


From f867e6af94239a04ec23aeec2fcda5aa58e41db7 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 23 Jul 2008 21:34:27 -0700
Subject: pkt_sched: sch_sfq: dump a real number of flows

Dump the "flows" number according to the number of active flows
instead of repeating the "limit".

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da66656..73f53844ce9 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -536,7 +536,14 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	opt.limit = q->limit;
 	opt.divisor = SFQ_HASH_DIVISOR;
-	opt.flows = q->limit;
+	opt.flows = 0;
+	if (q->tail != SFQ_DEPTH) {
+		unsigned int i;
+
+		for (i = 0; i < SFQ_HASH_DIVISOR; i++)
+			if (q->ht[i] != SFQ_DEPTH)
+				opt.flows++;
+	}
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
cgit v1.2.3-70-g09d2


From a677a039be7243357d93502bff2b40850c942e2d Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:17 -0700
Subject: flag parameters: socket and socketpair

This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair.  The additional code is minimal.  The flag
values in this implementation can and must match the O_* flags.  This
avoids overhead in the conversion.

The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>

#define PORT 57392

/* For Linux these must be the same.  */
#define SOCK_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("socket(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mips/socket.h |  7 +++++++
 include/linux/net.h       |  9 ++++++++-
 net/9p/trans_fd.c         |  2 +-
 net/sctp/socket.c         |  2 +-
 net/socket.c              | 28 ++++++++++++++++++++--------
 5 files changed, 37 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 63f60254d30..facc2d7a87c 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -102,6 +102,13 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ *  * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
+#define SOCK_NONBLOCK	O_NONBLOCK
 
 #define ARCH_HAS_SOCKET_TYPES 1
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 150a48c68d5..8b5383c45b4 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,6 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
+#include <linux/fcntl.h>	/* For O_CLOEXEC */
 #include <asm/socket.h>
 
 struct poll_table_struct;
@@ -94,6 +95,12 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
@@ -208,7 +215,7 @@ extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
 				  size_t len);
 extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
-extern int 	     sock_map_fd(struct socket *sock);
+extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4507f744f44..cdf137af7ad 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1285,7 +1285,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 	int fd, ret;
 
 	csocket->sk->sk_allocation = GFP_NOIO;
-	fd = sock_map_fd(csocket);
+	fd = sock_map_fd(csocket, 0);
 	if (fd < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n");
 		return fd;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 79bece16aed..dbb79adf8f3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3910,7 +3910,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 		goto out;
 
 	/* Map the socket to an unused fd that can be returned to the user.  */
-	retval = sock_map_fd(newsock);
+	retval = sock_map_fd(newsock, 0);
 	if (retval < 0) {
 		sock_release(newsock);
 		goto out;
diff --git a/net/socket.c b/net/socket.c
index 1ba57d88898..64601f90035 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -349,11 +349,11 @@ static struct dentry_operations sockfs_dentry_operations = {
  *	but we take care of internal coherence yet.
  */
 
-static int sock_alloc_fd(struct file **filep)
+static int sock_alloc_fd(struct file **filep, int flags)
 {
 	int fd;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(flags);
 	if (likely(fd >= 0)) {
 		struct file *file = get_empty_filp();
 
@@ -396,10 +396,10 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	return 0;
 }
 
-int sock_map_fd(struct socket *sock)
+int sock_map_fd(struct socket *sock, int flags)
 {
 	struct file *newfile;
-	int fd = sock_alloc_fd(&newfile);
+	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
 		int err = sock_attach_fd(sock, newfile);
@@ -1218,12 +1218,18 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 {
 	int retval;
 	struct socket *sock;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock);
+	retval = sock_map_fd(sock, flags & O_CLOEXEC);
 	if (retval < 0)
 		goto out_release;
 
@@ -1246,6 +1252,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
 	struct file *newfile1, *newfile2;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	/*
 	 * Obtain the first socket and check if the underlying protocol
@@ -1264,13 +1276,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	if (err < 0)
 		goto out_release_both;
 
-	fd1 = sock_alloc_fd(&newfile1);
+	fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
 	if (unlikely(fd1 < 0)) {
 		err = fd1;
 		goto out_release_both;
 	}
 
-	fd2 = sock_alloc_fd(&newfile2);
+	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
 	if (unlikely(fd2 < 0)) {
 		err = fd2;
 		put_filp(newfile1);
@@ -1426,7 +1438,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile);
+	newfd = sock_alloc_fd(&newfile, 0);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
-- 
cgit v1.2.3-70-g09d2


From aaca0bdca573f3f51ea03139f9c7289541e7bca3 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:20 -0700
Subject: flag parameters: paccept

This patch is by far the most complex in the series.  It adds a new syscall
paccept.  This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:

- a signal mask
- a flags value

The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
imlpemented here as well.  Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX.  Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc).  So an interface change in inevitable.

The flag value is the same as for socket and socketpair.  I think diverging
here will only create confusion.  Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.

The signal mask is handled as for pselect etc.  The mask is temporarily
installed for the thread and removed before the call returns.  I modeled the
code after pselect.  If there is a problem it's likely also in pselect.

For architectures which use socketcall I maintained this interface instead of
adding a system call.  The symmetry shouldn't be broken.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_CLOEXEC O_CLOEXEC

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  sleep (2);
  pthread_kill ((pthread_t) arg, SIGUSR1);

  return NULL;
}

static void
handler (int s)
{
}

int
main (void)
{
  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  int coe = fcntl (s2, F_GETFD);
  if (coe & FD_CLOEXEC)
    {
      puts ("paccept(0) set close-on-exec-flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_CLOEXEC) failed");
      return 1;
    }

  coe = fcntl (s2, F_GETFD);
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  struct sigaction sa;
  sa.sa_handler = handler;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  sigaction (SIGUSR1, &sa, NULL);

  sigset_t ss;
  pthread_sigmask (SIG_SETMASK, NULL, &ss);
  sigaddset (&ss, SIGUSR1);
  pthread_sigmask (SIG_SETMASK, &ss, NULL);

  sigdelset (&ss, SIGUSR1);
  alarm (4);
  pthread_barrier_wait (&b);

  errno = 0 ;
  s2 = paccept (s, NULL, 0, &ss, 0);
  if (s2 != -1 || errno != EINTR)
    {
      puts ("paccept did not fail with EINTR");
      return 1;
    }

  close (s);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/socket.h  |  5 +++
 include/asm-parisc/socket.h |  5 +++
 include/asm-x86/unistd_64.h |  2 ++
 include/linux/net.h         |  3 ++
 include/linux/syscalls.h    |  2 ++
 kernel/sys_ni.c             |  1 +
 net/compat.c                | 52 ++++++++++++++++++++++++++---
 net/socket.c                | 81 ++++++++++++++++++++++++++++++++++++++++-----
 8 files changed, 139 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index 08c97931992..a1057c2d95e 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -62,4 +62,9 @@
 
 #define SO_MARK			36
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK	0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index 69a7a0d30b0..fba402c95ac 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -54,4 +54,9 @@
 
 #define SO_MARK			0x401f
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK   0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 9c1a4a3470d..e323994a370 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -639,6 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+#define __NR_paccept				288
+__SYSCALL(__NR_paccept, sys_paccept)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/net.h b/include/linux/net.h
index 8b5383c45b4..3a9b06d4d0f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -47,6 +47,7 @@ struct net;
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
+#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -219,6 +220,8 @@ extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
+extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4394dadff81..2a2a40af6b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -409,6 +409,8 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
+asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
+			    const sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0fea0ee12da..2f0b8a2e600 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,6 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
+cond_syscall(sys_paccept);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 6e1b03b5193..67fb6a3834a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+				AL(6)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+				   int __user *upeer_addrlen,
+				   const compat_sigset_t __user *sigmask,
+				   compat_size_t sigsetsize, int flags)
+{
+	compat_sigset_t ss32;
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &ss32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret == -ERESTARTNOHAND) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMSG)
+	if (call < SYS_SOCKET || call > SYS_PACCEPT)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2]));
+		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_PACCEPT:
+		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
+					 compat_ptr(a[3]), a[4], a[5]);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/net/socket.c b/net/socket.c
index 64601f90035..a0ce8ad7225 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -63,6 +63,7 @@
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/thread_info.h>
 #include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
@@ -1225,6 +1226,9 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
@@ -1259,6 +1263,9 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	/*
 	 * Obtain the first socket and check if the underlying protocol
 	 * supports the socketpair call.
@@ -1413,14 +1420,20 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			   int __user *upeer_addrlen)
+long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+	       int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1438,7 +1451,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile, 0);
+	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
@@ -1491,6 +1504,50 @@ out_fd:
 	goto out_put;
 }
 
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret < 0 && signal_pending(current)) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
+{
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+}
+
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -2011,10 +2068,11 @@ out:
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[18]={
+static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+	AL(6)
 };
 
 #undef AL
@@ -2033,7 +2091,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_RECVMSG)
+	if (call < 1 || call > SYS_PACCEPT)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2062,8 +2120,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 		break;
 	case SYS_ACCEPT:
 		err =
-		    sys_accept(a0, (struct sockaddr __user *)a1,
-			       (int __user *)a[2]);
+		    do_accept(a0, (struct sockaddr __user *)a1,
+			      (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2110,6 +2168,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_PACCEPT:
+		err =
+		    sys_paccept(a0, (struct sockaddr __user *)a1,
+			        (int __user *)a[2],
+				(const sigset_t __user *) a[3],
+				a[4], a[5]);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3-70-g09d2


From c019bbc612f6633ede7ed67725cbf68de45ae8a4 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:21 -0700
Subject: flag parameters: paccept w/out set_restore_sigmask

Some platforms do not have support to restore the signal mask in the
return path from a syscall.  For those platforms syscalls like pselect are
not defined at all.  This is, I think, not a good choice for paccept()
since paccept() adds more value on top of accept() than just the signal
mask handling.

Therefore this patch defines a scaled down version of the sys_paccept
function for those platforms.  It returns -EINVAL in case the signal mask
is non-NULL but behaves the same otherwise.

Note that I explicitly included <linux/thread_info.h>.  I saw that it is
currently included but indirectly two levels down.  There is too much risk
in relying on this.  The header might change and then suddenly the
function definition would change without anyone immediately noticing.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  3 +++
 net/socket.c        | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'net')

diff --git a/include/linux/net.h b/include/linux/net.h
index 3a9b06d4d0f..39a23af059b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -102,6 +102,9 @@ enum sock_type {
 
 /* Flags for socket, socketpair, paccept */
 #define SOCK_CLOEXEC	O_CLOEXEC
+#ifndef SOCK_NONBLOCK
+#define SOCK_NONBLOCK	O_NONBLOCK
+#endif
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
diff --git a/net/socket.c b/net/socket.c
index a0ce8ad7225..d163adff95b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -69,6 +69,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/thread_info.h>
 #include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
 #include <linux/if_frad.h>
@@ -1504,6 +1505,7 @@ out_fd:
 	goto out_put;
 }
 
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 			    int __user *upeer_addrlen,
 			    const sigset_t __user *sigmask,
@@ -1541,6 +1543,21 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 
 	return ret;
 }
+#else
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	/* The platform does not support restoring the signal mask in the
+	 * return path.  So we do not allow using paccept() with a signal
+	 * mask.  */
+	if (sigmask)
+		return -EINVAL;
+
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+}
+#endif
 
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
-- 
cgit v1.2.3-70-g09d2


From 77d2720059618b9b6e827a8b73831eb6c6fad63c Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:35 -0700
Subject: flag parameters: NONBLOCK in socket and socketpair

This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and  paccept.  To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.

Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_NONBLOCK O_NONBLOCK

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  return NULL;
}

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("socket(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if (fl & O_NONBLOCK)
    {
      puts ("paccept(0) set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);

  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_NONBLOCK) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);
  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  2 +-
 net/socket.c        | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/net.h b/include/linux/net.h
index 39a23af059b..2f999fbb188 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,7 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
-#include <linux/fcntl.h>	/* For O_CLOEXEC */
+#include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <asm/socket.h>
 
 struct poll_table_struct;
diff --git a/net/socket.c b/net/socket.c
index d163adff95b..31105f9048a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -369,7 +369,7 @@ static int sock_alloc_fd(struct file **filep, int flags)
 	return fd;
 }
 
-static int sock_attach_fd(struct socket *sock, struct file *file)
+static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 {
 	struct dentry *dentry;
 	struct qstr name = { .name = "" };
@@ -391,7 +391,7 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
-	file->f_flags = O_RDWR;
+	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_pos = 0;
 	file->private_data = sock;
 
@@ -404,7 +404,7 @@ int sock_map_fd(struct socket *sock, int flags)
 	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
-		int err = sock_attach_fd(sock, newfile);
+		int err = sock_attach_fd(sock, newfile, flags);
 
 		if (unlikely(err < 0)) {
 			put_filp(newfile);
@@ -1223,7 +1223,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1234,7 +1234,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock, flags & O_CLOEXEC);
+	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 	if (retval < 0)
 		goto out_release;
 
@@ -1260,7 +1260,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1298,12 +1298,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1);
+	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		goto out_fd2;
 	}
 
-	err = sock_attach_fd(sock2, newfile2);
+	err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		fput(newfile1);
 		goto out_fd1;
@@ -1429,7 +1429,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
@@ -1459,7 +1459,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 		goto out_put;
 	}
 
-	err = sock_attach_fd(newsock, newfile);
+	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
 	if (err < 0)
 		goto out_fd_simple;
 
-- 
cgit v1.2.3-70-g09d2


From e38b36f325153eaadd1c2a7abc5762079233e540 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:42 -0700
Subject: flag parameters: check magic constants

This patch adds test that ensure the boundary conditions for the various
constants introduced in the previous patches is met.  No code is generated.

[akpm@linux-foundation.org: fix alpha]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventfd.c      | 4 ++++
 fs/eventpoll.c    | 3 +++
 fs/inotify_user.c | 4 ++++
 fs/signalfd.c     | 4 ++++
 fs/timerfd.c      | 4 ++++
 net/socket.c      | 6 ++++++
 6 files changed, 25 insertions(+)

(limited to 'net')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3ed4466177a..08bf558d040 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -203,6 +203,10 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	int fd;
 	struct eventfd_ctx *ctx;
 
+	/* Check the EFD_* constants for consistency.  */
+	BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3fd4014f3c5..2fdad420404 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1051,6 +1051,9 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 	int error, fd = -1;
 	struct eventpoll *ep;
 
+	/* Check the EPOLL_* constant for consistency.  */
+	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+
 	if (flags & ~EPOLL_CLOEXEC)
 		return -EINVAL;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index dc7e1f61974..fe79c25d95d 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -574,6 +574,10 @@ asmlinkage long sys_inotify_init1(int flags)
 	struct file *filp;
 	int fd, ret;
 
+	/* Check the IN_* constants for consistency.  */
+	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 5441a4bca77..9c39bc7f843 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -211,6 +211,10 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
+	/* Check the SFD_* constants for consistency.  */
+	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
 		return -EINVAL;
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 75d44efe346..c502c60e4f5 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,6 +184,10 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
+	/* Check the TFD_* constants for consistency.  */
+	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
+
 	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
diff --git a/net/socket.c b/net/socket.c
index 31105f9048a..1310a82cbba 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1222,6 +1222,12 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	struct socket *sock;
 	int flags;
 
+	/* Check the SOCK_* constants for consistency.  */
+	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
+	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
+	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
+
 	flags = type & ~SOCK_TYPE_MASK;
 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From cffe1c5d7a5a1e54f7c2c6d0510f651a965bccc3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 25 Jul 2008 01:25:04 -0700
Subject: pkt_sched: Fix locking in shutdown_scheduler_queue()

Qdisc locks need to be held with BH disabled.

Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4ac7e3a8c25..43abd4d27ea 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -736,9 +736,9 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 		dev_queue->qdisc = qdisc_default;
 		dev_queue->qdisc_sleeping = qdisc_default;
 
-		spin_lock(root_lock);
+		spin_lock_bh(root_lock);
 		qdisc_destroy(qdisc);
-		spin_unlock(root_lock);
+		spin_unlock_bh(root_lock);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6fccab671f2f0a24b799f29a4ec878f62d34656c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Jul 2008 02:54:40 -0700
Subject: ipsec: ipcomp - Merge IPComp implementations

This patch merges the IPv4/IPv6 IPComp implementations since most
of the code is identical.  As a result future enhancements will no
longer need to be duplicated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipcomp.h   |   6 +
 net/ipv4/Kconfig       |   4 +-
 net/ipv4/ipcomp.c      | 315 +-------------------------------------------
 net/ipv6/Kconfig       |   4 +-
 net/ipv6/ipcomp6.c     | 298 +----------------------------------------
 net/xfrm/Kconfig       |   6 +
 net/xfrm/Makefile      |   1 +
 net/xfrm/xfrm_ipcomp.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 377 insertions(+), 606 deletions(-)
 create mode 100644 net/xfrm/xfrm_ipcomp.c

(limited to 'net')

diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index 330b74e813a..2a1092abaa0 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -14,6 +14,12 @@ struct ipcomp_data {
 
 struct ip_comp_hdr;
 struct sk_buff;
+struct xfrm_state;
+
+int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb);
+int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb);
+void ipcomp_destroy(struct xfrm_state *x);
+int ipcomp_init_state(struct xfrm_state *x);
 
 static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb)
 {
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 4670683b468..591ea23639c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -356,10 +356,8 @@ config INET_ESP
 
 config INET_IPCOMP
 	tristate "IP: IPComp transformation"
-	select XFRM
 	select INET_XFRM_TUNNEL
-	select CRYPTO
-	select CRYPTO_DEFLATE
+	select XFRM_IPCOMP
 	---help---
 	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 	  typically needed for IPsec.
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index a75807b971b..a42b64d040c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -14,153 +14,14 @@
  *   - Adaptive compression.
  */
 #include <linux/module.h>
-#include <linux/crypto.h>
 #include <linux/err.h>
-#include <linux/pfkeyv2.h>
-#include <linux/percpu.h>
-#include <linux/smp.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
-#include <linux/mutex.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
 #include <net/protocol.h>
-
-struct ipcomp_tfms {
-	struct list_head list;
-	struct crypto_comp **tfms;
-	int users;
-};
-
-static DEFINE_MUTEX(ipcomp_resource_mutex);
-static void **ipcomp_scratches;
-static int ipcomp_scratch_users;
-static LIST_HEAD(ipcomp_tfms_list);
-
-static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipcomp_data *ipcd = x->data;
-	const int plen = skb->len;
-	int dlen = IPCOMP_SCRATCH_SIZE;
-	const u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-
-	if (err)
-		goto out;
-
-	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err)
-		goto out;
-
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
-out:
-	put_cpu();
-	return err;
-}
-
-static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int nexthdr;
-	int err = -ENOMEM;
-	struct ip_comp_hdr *ipch;
-
-	if (skb_linearize_cow(skb))
-		goto out;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Remove ipcomp header and decompress original payload */
-	ipch = (void *)skb->data;
-	nexthdr = ipch->nexthdr;
-
-	skb->transport_header = skb->network_header + sizeof(*ipch);
-	__skb_pull(skb, sizeof(*ipch));
-	err = ipcomp_decompress(x, skb);
-	if (err)
-		goto out;
-
-	err = nexthdr;
-
-out:
-	return err;
-}
-
-static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
-{
-	struct ipcomp_data *ipcd = x->data;
-	const int plen = skb->len;
-	int dlen = IPCOMP_SCRATCH_SIZE;
-	u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-	int err;
-
-	local_bh_disable();
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	local_bh_enable();
-	if (err)
-		goto out;
-
-	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
-		err = -EMSGSIZE;
-		goto out;
-	}
-
-	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
-
-	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
-	return 0;
-
-out:
-	put_cpu();
-	return err;
-}
-
-static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-	struct ip_comp_hdr *ipch;
-	struct ipcomp_data *ipcd = x->data;
-
-	if (skb->len < ipcd->threshold) {
-		/* Don't bother compressing */
-		goto out_ok;
-	}
-
-	if (skb_linearize_cow(skb))
-		goto out_ok;
-
-	err = ipcomp_compress(x, skb);
-
-	if (err) {
-		goto out_ok;
-	}
-
-	/* Install ipcomp header, convert into ipcomp datagram. */
-	ipch = ip_comp_hdr(skb);
-	ipch->nexthdr = *skb_mac_header(skb);
-	ipch->flags = 0;
-	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb_mac_header(skb) = IPPROTO_COMP;
-out_ok:
-	skb_push(skb, -skb_network_offset(skb));
-	return 0;
-}
+#include <net/sock.h>
 
 static void ipcomp4_err(struct sk_buff *skb, u32 info)
 {
@@ -241,156 +102,12 @@ out:
 	return err;
 }
 
-static void ipcomp_free_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (--ipcomp_scratch_users)
-		return;
-
-	scratches = ipcomp_scratches;
-	if (!scratches)
-		return;
-
-	for_each_possible_cpu(i)
-		vfree(*per_cpu_ptr(scratches, i));
-
-	free_percpu(scratches);
-}
-
-static void **ipcomp_alloc_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (ipcomp_scratch_users++)
-		return ipcomp_scratches;
-
-	scratches = alloc_percpu(void *);
-	if (!scratches)
-		return NULL;
-
-	ipcomp_scratches = scratches;
-
-	for_each_possible_cpu(i) {
-		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
-		if (!scratch)
-			return NULL;
-		*per_cpu_ptr(scratches, i) = scratch;
-	}
-
-	return scratches;
-}
-
-static void ipcomp_free_tfms(struct crypto_comp **tfms)
-{
-	struct ipcomp_tfms *pos;
-	int cpu;
-
-	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		if (pos->tfms == tfms)
-			break;
-	}
-
-	BUG_TRAP(pos);
-
-	if (--pos->users)
-		return;
-
-	list_del(&pos->list);
-	kfree(pos);
-
-	if (!tfms)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_comp(tfm);
-	}
-	free_percpu(tfms);
-}
-
-static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
-{
-	struct ipcomp_tfms *pos;
-	struct crypto_comp **tfms;
-	int cpu;
-
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
-	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		struct crypto_comp *tfm;
-
-		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
-
-		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
-			pos->users++;
-			return tfms;
-		}
-	}
-
-	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
-	if (!pos)
-		return NULL;
-
-	pos->users = 1;
-	INIT_LIST_HEAD(&pos->list);
-	list_add(&pos->list, &ipcomp_tfms_list);
-
-	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
-	if (!tfms)
-		goto error;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
-							    CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm))
-			goto error;
-		*per_cpu_ptr(tfms, cpu) = tfm;
-	}
-
-	return tfms;
-
-error:
-	ipcomp_free_tfms(tfms);
-	return NULL;
-}
-
-static void ipcomp_free_data(struct ipcomp_data *ipcd)
-{
-	if (ipcd->tfms)
-		ipcomp_free_tfms(ipcd->tfms);
-	ipcomp_free_scratches();
-}
-
-static void ipcomp_destroy(struct xfrm_state *x)
-{
-	struct ipcomp_data *ipcd = x->data;
-	if (!ipcd)
-		return;
-	xfrm_state_delete_tunnel(x);
-	mutex_lock(&ipcomp_resource_mutex);
-	ipcomp_free_data(ipcd);
-	mutex_unlock(&ipcomp_resource_mutex);
-	kfree(ipcd);
-}
-
-static int ipcomp_init_state(struct xfrm_state *x)
+static int ipcomp4_init_state(struct xfrm_state *x)
 {
 	int err;
 	struct ipcomp_data *ipcd;
 	struct xfrm_algo_desc *calg_desc;
 
-	err = -EINVAL;
-	if (!x->calg)
-		goto out;
-
-	if (x->encap)
-		goto out;
-
 	x->props.header_len = 0;
 	switch (x->props.mode) {
 	case XFRM_MODE_TRANSPORT:
@@ -402,40 +119,22 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto out;
 	}
 
-	err = -ENOMEM;
-	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
+	err = ipcomp_init_state(x);
+	if (err)
 		goto out;
 
-	mutex_lock(&ipcomp_resource_mutex);
-	if (!ipcomp_alloc_scratches())
-		goto error;
-
-	ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
-	if (!ipcd->tfms)
-		goto error;
-	mutex_unlock(&ipcomp_resource_mutex);
-
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
 	}
 
-	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
-	BUG_ON(!calg_desc);
-	ipcd->threshold = calg_desc->uinfo.comp.threshold;
-	x->data = ipcd;
 	err = 0;
 out:
 	return err;
 
 error_tunnel:
-	mutex_lock(&ipcomp_resource_mutex);
-error:
-	ipcomp_free_data(ipcd);
-	mutex_unlock(&ipcomp_resource_mutex);
-	kfree(ipcd);
+	ipcomp_destroy(x);
 	goto out;
 }
 
@@ -443,7 +142,7 @@ static const struct xfrm_type ipcomp_type = {
 	.description	= "IPCOMP4",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_COMP,
-	.init_state	= ipcomp_init_state,
+	.init_state	= ipcomp4_init_state,
 	.destructor	= ipcomp_destroy,
 	.input		= ipcomp_input,
 	.output		= ipcomp_output
@@ -481,7 +180,7 @@ module_init(ipcomp4_init);
 module_exit(ipcomp4_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
 
 MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 42814a2ec9d..ec992159b5f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -96,10 +96,8 @@ config INET6_ESP
 
 config INET6_IPCOMP
 	tristate "IPv6: IPComp transformation"
-	select XFRM
 	select INET6_XFRM_TUNNEL
-	select CRYPTO
-	select CRYPTO_DEFLATE
+	select XFRM_IPCOMP
 	---help---
 	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
 	  typically needed for IPsec.
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index ee6de425ce6..0cfcea42153 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -50,125 +50,6 @@
 #include <linux/icmpv6.h>
 #include <linux/mutex.h>
 
-struct ipcomp6_tfms {
-	struct list_head list;
-	struct crypto_comp **tfms;
-	int users;
-};
-
-static DEFINE_MUTEX(ipcomp6_resource_mutex);
-static void **ipcomp6_scratches;
-static int ipcomp6_scratch_users;
-static LIST_HEAD(ipcomp6_tfms_list);
-
-static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int nexthdr;
-	int err = -ENOMEM;
-	struct ip_comp_hdr *ipch;
-	int plen, dlen;
-	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	if (skb_linearize_cow(skb))
-		goto out;
-
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Remove ipcomp header and decompress original payload */
-	ipch = (void *)skb->data;
-	nexthdr = ipch->nexthdr;
-
-	skb->transport_header = skb->network_header + sizeof(*ipch);
-	__skb_pull(skb, sizeof(*ipch));
-
-	/* decompression */
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-	if (err)
-		goto out_put_cpu;
-
-	if (dlen < (plen + sizeof(*ipch))) {
-		err = -EINVAL;
-		goto out_put_cpu;
-	}
-
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err) {
-		goto out_put_cpu;
-	}
-
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
-	err = nexthdr;
-
-out_put_cpu:
-	put_cpu();
-out:
-	return err;
-}
-
-static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-	struct ip_comp_hdr *ipch;
-	struct ipcomp_data *ipcd = x->data;
-	int plen, dlen;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	/* check whether datagram len is larger than threshold */
-	if (skb->len < ipcd->threshold) {
-		goto out_ok;
-	}
-
-	if (skb_linearize_cow(skb))
-		goto out_ok;
-
-	/* compression */
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	local_bh_disable();
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
-	local_bh_enable();
-	if (err || (dlen + sizeof(*ipch)) >= plen) {
-		put_cpu();
-		goto out_ok;
-	}
-	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
-	put_cpu();
-	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
-
-	/* insert ipcomp header and replace datagram */
-	ipch = ip_comp_hdr(skb);
-	ipch->nexthdr = *skb_mac_header(skb);
-	ipch->flags = 0;
-	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb_mac_header(skb) = IPPROTO_COMP;
-
-out_ok:
-	skb_push(skb, -skb_network_offset(skb));
-
-	return 0;
-}
-
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 				int type, int code, int offset, __be32 info)
 {
@@ -251,161 +132,12 @@ out:
 	return err;
 }
 
-static void ipcomp6_free_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (--ipcomp6_scratch_users)
-		return;
-
-	scratches = ipcomp6_scratches;
-	if (!scratches)
-		return;
-
-	for_each_possible_cpu(i) {
-		void *scratch = *per_cpu_ptr(scratches, i);
-
-		vfree(scratch);
-	}
-
-	free_percpu(scratches);
-}
-
-static void **ipcomp6_alloc_scratches(void)
-{
-	int i;
-	void **scratches;
-
-	if (ipcomp6_scratch_users++)
-		return ipcomp6_scratches;
-
-	scratches = alloc_percpu(void *);
-	if (!scratches)
-		return NULL;
-
-	ipcomp6_scratches = scratches;
-
-	for_each_possible_cpu(i) {
-		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
-		if (!scratch)
-			return NULL;
-		*per_cpu_ptr(scratches, i) = scratch;
-	}
-
-	return scratches;
-}
-
-static void ipcomp6_free_tfms(struct crypto_comp **tfms)
-{
-	struct ipcomp6_tfms *pos;
-	int cpu;
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		if (pos->tfms == tfms)
-			break;
-	}
-
-	BUG_TRAP(pos);
-
-	if (--pos->users)
-		return;
-
-	list_del(&pos->list);
-	kfree(pos);
-
-	if (!tfms)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_comp(tfm);
-	}
-	free_percpu(tfms);
-}
-
-static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
-{
-	struct ipcomp6_tfms *pos;
-	struct crypto_comp **tfms;
-	int cpu;
-
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
-	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		struct crypto_comp *tfm;
-
-		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
-
-		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
-			pos->users++;
-			return tfms;
-		}
-	}
-
-	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
-	if (!pos)
-		return NULL;
-
-	pos->users = 1;
-	INIT_LIST_HEAD(&pos->list);
-	list_add(&pos->list, &ipcomp6_tfms_list);
-
-	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
-	if (!tfms)
-		goto error;
-
-	for_each_possible_cpu(cpu) {
-		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
-							    CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm))
-			goto error;
-		*per_cpu_ptr(tfms, cpu) = tfm;
-	}
-
-	return tfms;
-
-error:
-	ipcomp6_free_tfms(tfms);
-	return NULL;
-}
-
-static void ipcomp6_free_data(struct ipcomp_data *ipcd)
-{
-	if (ipcd->tfms)
-		ipcomp6_free_tfms(ipcd->tfms);
-	ipcomp6_free_scratches();
-}
-
-static void ipcomp6_destroy(struct xfrm_state *x)
-{
-	struct ipcomp_data *ipcd = x->data;
-	if (!ipcd)
-		return;
-	xfrm_state_delete_tunnel(x);
-	mutex_lock(&ipcomp6_resource_mutex);
-	ipcomp6_free_data(ipcd);
-	mutex_unlock(&ipcomp6_resource_mutex);
-	kfree(ipcd);
-
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
-}
-
 static int ipcomp6_init_state(struct xfrm_state *x)
 {
 	int err;
 	struct ipcomp_data *ipcd;
 	struct xfrm_algo_desc *calg_desc;
 
-	err = -EINVAL;
-	if (!x->calg)
-		goto out;
-
-	if (x->encap)
-		goto out;
-
 	x->props.header_len = 0;
 	switch (x->props.mode) {
 	case XFRM_MODE_TRANSPORT:
@@ -417,39 +149,21 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 	}
 
-	err = -ENOMEM;
-	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
-	if (!ipcd)
+	err = ipcomp_init_state(x);
+	if (err)
 		goto out;
 
-	mutex_lock(&ipcomp6_resource_mutex);
-	if (!ipcomp6_alloc_scratches())
-		goto error;
-
-	ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
-	if (!ipcd->tfms)
-		goto error;
-	mutex_unlock(&ipcomp6_resource_mutex);
-
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
 	}
 
-	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
-	BUG_ON(!calg_desc);
-	ipcd->threshold = calg_desc->uinfo.comp.threshold;
-	x->data = ipcd;
 	err = 0;
 out:
 	return err;
 error_tunnel:
-	mutex_lock(&ipcomp6_resource_mutex);
-error:
-	ipcomp6_free_data(ipcd);
-	mutex_unlock(&ipcomp6_resource_mutex);
-	kfree(ipcd);
+	ipcomp_destroy(x);
 
 	goto out;
 }
@@ -460,9 +174,9 @@ static const struct xfrm_type ipcomp6_type =
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_COMP,
 	.init_state	= ipcomp6_init_state,
-	.destructor	= ipcomp6_destroy,
-	.input		= ipcomp6_input,
-	.output		= ipcomp6_output,
+	.destructor	= ipcomp_destroy,
+	.input		= ipcomp_input,
+	.output		= ipcomp_output,
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 9201ef8ad90..6d081674515 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -46,6 +46,12 @@ config XFRM_STATISTICS
 
 	  If unsure, say N.
 
+config XFRM_IPCOMP
+	tristate
+	select XFRM
+	select CRYPTO
+	select CRYPTO_DEFLATE
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 332cfb0ff56..0f439a72cca 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o xfrm_algo.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
new file mode 100644
index 00000000000..b51e804fbba
--- /dev/null
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -0,0 +1,349 @@
+/*
+ * IP Payload Compression Protocol (IPComp) - RFC3173.
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Todo:
+ *   - Tunable compression parameters.
+ *   - Compression stats.
+ *   - Adaptive compression.
+ */
+
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rtnetlink.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+#include <net/ipcomp.h>
+#include <net/xfrm.h>
+
+struct ipcomp_tfms {
+	struct list_head list;
+	struct crypto_comp **tfms;
+	int users;
+};
+
+static DEFINE_MUTEX(ipcomp_resource_mutex);
+static void **ipcomp_scratches;
+static int ipcomp_scratch_users;
+static LIST_HEAD(ipcomp_tfms_list);
+
+static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipcomp_data *ipcd = x->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	const u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+
+	if (err)
+		goto out;
+
+	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err)
+		goto out;
+
+	skb->truesize += dlen - plen;
+	__skb_put(skb, dlen - plen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
+out:
+	put_cpu();
+	return err;
+}
+
+int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nexthdr;
+	int err = -ENOMEM;
+	struct ip_comp_hdr *ipch;
+
+	if (skb_linearize_cow(skb))
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */
+	ipch = (void *)skb->data;
+	nexthdr = ipch->nexthdr;
+
+	skb->transport_header = skb->network_header + sizeof(*ipch);
+	__skb_pull(skb, sizeof(*ipch));
+	err = ipcomp_decompress(x, skb);
+	if (err)
+		goto out;
+
+	err = nexthdr;
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(ipcomp_input);
+
+static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipcomp_data *ipcd = x->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err;
+
+	local_bh_disable();
+	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+	local_bh_enable();
+	if (err)
+		goto out;
+
+	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+	put_cpu();
+
+	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
+	return 0;
+
+out:
+	put_cpu();
+	return err;
+}
+
+int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct ip_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+
+	if (skb->len < ipcd->threshold) {
+		/* Don't bother compressing */
+		goto out_ok;
+	}
+
+	if (skb_linearize_cow(skb))
+		goto out_ok;
+
+	err = ipcomp_compress(x, skb);
+
+	if (err) {
+		goto out_ok;
+	}
+
+	/* Install ipcomp header, convert into ipcomp datagram. */
+	ipch = ip_comp_hdr(skb);
+	ipch->nexthdr = *skb_mac_header(skb);
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+	*skb_mac_header(skb) = IPPROTO_COMP;
+out_ok:
+	skb_push(skb, -skb_network_offset(skb));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipcomp_output);
+
+static void ipcomp_free_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (--ipcomp_scratch_users)
+		return;
+
+	scratches = ipcomp_scratches;
+	if (!scratches)
+		return;
+
+	for_each_possible_cpu(i)
+		vfree(*per_cpu_ptr(scratches, i));
+
+	free_percpu(scratches);
+}
+
+static void **ipcomp_alloc_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (ipcomp_scratch_users++)
+		return ipcomp_scratches;
+
+	scratches = alloc_percpu(void *);
+	if (!scratches)
+		return NULL;
+
+	ipcomp_scratches = scratches;
+
+	for_each_possible_cpu(i) {
+		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+		if (!scratch)
+			return NULL;
+		*per_cpu_ptr(scratches, i) = scratch;
+	}
+
+	return scratches;
+}
+
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
+{
+	struct ipcomp_tfms *pos;
+	int cpu;
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		if (pos->tfms == tfms)
+			break;
+	}
+
+	BUG_TRAP(pos);
+
+	if (--pos->users)
+		return;
+
+	list_del(&pos->list);
+	kfree(pos);
+
+	if (!tfms)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
+	}
+	free_percpu(tfms);
+}
+
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
+{
+	struct ipcomp_tfms *pos;
+	struct crypto_comp **tfms;
+	int cpu;
+
+	/* This can be any valid CPU ID so we don't need locking. */
+	cpu = raw_smp_processor_id();
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		struct crypto_comp *tfm;
+
+		tfms = pos->tfms;
+		tfm = *per_cpu_ptr(tfms, cpu);
+
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
+			pos->users++;
+			return tfms;
+		}
+	}
+
+	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+	if (!pos)
+		return NULL;
+
+	pos->users = 1;
+	INIT_LIST_HEAD(&pos->list);
+	list_add(&pos->list, &ipcomp_tfms_list);
+
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
+	if (!tfms)
+		goto error;
+
+	for_each_possible_cpu(cpu) {
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
+		if (IS_ERR(tfm))
+			goto error;
+		*per_cpu_ptr(tfms, cpu) = tfm;
+	}
+
+	return tfms;
+
+error:
+	ipcomp_free_tfms(tfms);
+	return NULL;
+}
+
+static void ipcomp_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfms)
+		ipcomp_free_tfms(ipcd->tfms);
+	ipcomp_free_scratches();
+}
+
+void ipcomp_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	xfrm_state_delete_tunnel(x);
+	mutex_lock(&ipcomp_resource_mutex);
+	ipcomp_free_data(ipcd);
+	mutex_unlock(&ipcomp_resource_mutex);
+	kfree(ipcd);
+}
+EXPORT_SYMBOL_GPL(ipcomp_destroy);
+
+int ipcomp_init_state(struct xfrm_state *x)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	if (x->encap)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto out;
+
+	mutex_lock(&ipcomp_resource_mutex);
+	if (!ipcomp_alloc_scratches())
+		goto error;
+
+	ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
+	if (!ipcd->tfms)
+		goto error;
+	mutex_unlock(&ipcomp_resource_mutex);
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+
+error:
+	ipcomp_free_data(ipcd);
+	mutex_unlock(&ipcomp_resource_mutex);
+	kfree(ipcd);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(ipcomp_init_state);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-- 
cgit v1.2.3-70-g09d2


From 7d7e5a60c62e88cb8782760bb6c4d3bd1577a6c6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Jul 2008 02:55:33 -0700
Subject: ipsec: ipcomp - Decompress into frags if necessary

When decompressing extremely large packets allocating them through
kmalloc is prone to failure.  Therefore it's better to use page
frags instead.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_ipcomp.c | 48 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index b51e804fbba..800f669083f 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -17,6 +17,7 @@
 
 #include <linux/crypto.h>
 #include <linux/err.h>
+#include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -49,6 +50,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
 	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
 	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+	int len;
 
 	if (err)
 		goto out;
@@ -58,13 +60,47 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 	}
 
-	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
-	if (err)
-		goto out;
+	len = dlen - plen;
+	if (len > skb_tailroom(skb))
+		len = skb_tailroom(skb);
+
+	skb->truesize += len;
+	__skb_put(skb, len);
+
+	len += plen;
+	skb_copy_to_linear_data(skb, scratch, len);
+
+	while ((scratch += len, dlen -= len) > 0) {
+		skb_frag_t *frag;
+
+		err = -EMSGSIZE;
+		if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
+			goto out;
+
+		frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
+		frag->page = alloc_page(GFP_ATOMIC);
+
+		err = -ENOMEM;
+		if (!frag->page)
+			goto out;
+
+		len = PAGE_SIZE;
+		if (dlen < len)
+			len = dlen;
+
+		memcpy(page_address(frag->page), scratch, len);
+
+		frag->page_offset = 0;
+		frag->size = len;
+		skb->truesize += len;
+		skb->data_len += len;
+		skb->len += len;
+
+		skb_shinfo(skb)->nr_frags++;
+	}
+
+	err = 0;
 
-	skb->truesize += dlen - plen;
-	__skb_put(skb, dlen - plen);
-	skb_copy_to_linear_data(skb, scratch, dlen);
 out:
 	put_cpu();
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 696adfe84c11c571a1e0863460ff0ec142b4e5a9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2008 01:45:34 -0700
Subject: list_for_each_rcu must die: networking

All uses of list_for_each_rcu() can be profitably replaced by the
easier-to-use list_for_each_entry_rcu().  This patch makes this change for
networking, in preparation for removing the list_for_each_rcu() API
entirely.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/802/psnap.c     | 4 +---
 net/ipv4/af_inet.c  | 9 +++------
 net/ipv6/af_inet6.c | 9 +++------
 3 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/802/psnap.c b/net/802/psnap.c
index ea464393144..b3cfe5a14fc 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -31,11 +31,9 @@ static struct llc_sap *snap_sap;
  */
 static struct datalink_proto *find_snap_client(unsigned char *desc)
 {
-	struct list_head *entry;
 	struct datalink_proto *proto = NULL, *p;
 
-	list_for_each_rcu(entry, &snap_list) {
-		p = list_entry(entry, struct datalink_proto, node);
+	list_for_each_entry_rcu(p, &snap_list, node) {
 		if (!memcmp(p->type, desc, 5)) {
 			proto = p;
 			break;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd919d84285..f440a9f5492 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol)
 static int inet_create(struct net *net, struct socket *sock, int protocol)
 {
 	struct sock *sk;
-	struct list_head *p;
 	struct inet_protosw *answer;
 	struct inet_sock *inet;
 	struct proto *answer_prot;
@@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
-	answer = NULL;
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
+	list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
 
+		err = 0;
 		/* Check the non-wild match. */
 		if (protocol == answer->protocol) {
 			if (protocol != IPPROTO_IP)
@@ -302,10 +300,9 @@ lookup_protocol:
 				break;
 		}
 		err = -EPROTONOSUPPORT;
-		answer = NULL;
 	}
 
-	if (unlikely(answer == NULL)) {
+	if (unlikely(err)) {
 		if (try_loading_module < 2) {
 			rcu_read_unlock();
 			/*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d828bc4b1c..60461ad7fa6 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
 	struct inet_sock *inet;
 	struct ipv6_pinfo *np;
 	struct sock *sk;
-	struct list_head *p;
 	struct inet_protosw *answer;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
@@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
 		build_ehash_secret();
 
 	/* Look for the requested type/protocol pair. */
-	answer = NULL;
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw6[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
+	list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
 
+		err = 0;
 		/* Check the non-wild match. */
 		if (protocol == answer->protocol) {
 			if (protocol != IPPROTO_IP)
@@ -118,10 +116,9 @@ lookup_protocol:
 				break;
 		}
 		err = -EPROTONOSUPPORT;
-		answer = NULL;
 	}
 
-	if (!answer) {
+	if (err) {
 		if (try_loading_module < 2) {
 			rcu_read_unlock();
 			/*
-- 
cgit v1.2.3-70-g09d2


From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: printk ratelimiting rewrite

All ratelimit user use same jiffies and burst params, so some messages
(callbacks) will be lost.

For example:
a call printk_ratelimit(5 * HZ, 1)
b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
will be supressed.

- rewrite __ratelimit, and use a ratelimit_state as parameter.  Thanks for
  hints from andrew.

- Add WARN_ON_RATELIMIT, update rcupreempt.h

- remove __printk_ratelimit

- use __ratelimit in net_ratelimit

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h  |  3 +++
 include/linux/kernel.h     |  8 ++-----
 include/linux/net.h        |  3 +--
 include/linux/ratelimit.h  | 27 +++++++++++++++++++++++
 include/linux/rcupreempt.h |  9 ++++++--
 kernel/printk.c            | 17 +++-----------
 kernel/sysctl.c            |  4 ++--
 lib/ratelimit.c            | 55 +++++++++++++++++++++++++---------------------
 net/core/sysctl_net_core.c |  4 ++--
 net/core/utils.c           |  5 ++---
 10 files changed, 79 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/ratelimit.h

(limited to 'net')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a346e744e77..a3f738cffdb 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,9 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ON_RATELIMIT(condition, state)			\
+		WARN_ON((condition) && __ratelimit(state))
+
 #ifdef CONFIG_SMP
 # define WARN_ON_SMP(x)			WARN_ON(x)
 #else
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5c4b1251e11..fdbbf72ca2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,6 +15,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
+#include <linux/ratelimit.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -189,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 #else
@@ -204,8 +202,6 @@ static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int __cold printk(const char *s, ...) { return 0; }
 static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
-				     int ratelimit_burst) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fbb188..4a9a30f2d68 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 00000000000..18a5b9ba9d4
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+	int interval;
+	int burst;
+	int printed;
+	int missed;
+	unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
+		struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+	return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64eca63..0967f03b070 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
 }
 
 static inline void rcu_exit_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+				&rs);
 }
 
 #else /* CONFIG_NO_HZ */
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a94583..a7f7559c5f6 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 }
 
 #if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  * every printk_ratelimit_jiffies to make a denial-of-service
  * attack impossible.
  */
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
-	return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
 int printk_ratelimit(void)
 {
-	return __printk_ratelimit(printk_ratelimit_jiffies,
-				printk_ratelimit_burst);
+	return __ratelimit(&printk_ratelimit_state);
 }
 EXPORT_SYMBOL(printk_ratelimit);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d1fe5..35a50db9b6c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT,
 		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_jiffies,
+		.data		= &printk_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
 		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_burst,
+		.data		= &printk_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd..35136671b21 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
  *
  * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
  *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
  * This file is released under the GPLv2.
  *
  */
@@ -11,41 +14,43 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
 /*
  * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
  *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
 {
-	static DEFINE_SPINLOCK(ratelimit_lock);
-	static unsigned toks = 10 * 5 * HZ;
-	static unsigned long last_msg;
-	static int missed;
-	unsigned long flags;
-	unsigned long now = jiffies;
+	if (!rs->interval)
+		return 1;
 
 	spin_lock_irqsave(&ratelimit_lock, flags);
-	toks += now - last_msg;
-	last_msg = now;
-	if (toks > (ratelimit_burst * ratelimit_jiffies))
-		toks = ratelimit_burst * ratelimit_jiffies;
-	if (toks >= ratelimit_jiffies) {
-		int lost = missed;
+	if (!rs->begin)
+		rs->begin = jiffies;
 
-		missed = 0;
-		toks -= ratelimit_jiffies;
-		spin_unlock_irqrestore(&ratelimit_lock, flags);
-		if (lost)
-			printk(KERN_WARNING "%s: %d messages suppressed\n",
-				__func__, lost);
-		return 1;
+	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+		if (rs->missed)
+			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+				__func__, rs->missed);
+		rs->begin = 0;
+		rs->printed = 0;
+		rs->missed = 0;
 	}
-	missed++;
+	if (rs->burst && rs->burst > rs->printed)
+		goto print;
+
+	rs->missed++;
 	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
+
+print:
+	rs->printed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
 }
 EXPORT_SYMBOL(__ratelimit);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2af22c..f686467ff12 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_COST,
 		.procname	= "message_cost",
-		.data		= &net_msg_cost,
+		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_BURST,
 		.procname	= "message_burst",
-		.data		= &net_msg_burst,
+		.data		= &net_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb59054..72e0ebe964a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
 int net_msg_warn __read_mostly = 1;
 EXPORT_SYMBOL(net_msg_warn);
 
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
 /*
  * All net warning printk()s should be guarded by this function.
  */
 int net_ratelimit(void)
 {
-	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+	return __ratelimit(&net_ratelimit_state);
 }
 EXPORT_SYMBOL(net_ratelimit);
 
-- 
cgit v1.2.3-70-g09d2


From 4ecb90090c84210a8bd2a9d7a5906e616735873c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 25 Jul 2008 01:48:32 -0700
Subject: sysctl: allow override of /proc/sys/net with CAP_NET_ADMIN

Extend the permission check for networking sysctl's to allow modification
when current process has CAP_NET_ADMIN capability and is not root.  This
version uses the until now unused permissions hook to override the mode
value for /proc/sys/net if accessed by a user with capabilities.

Found while working with Quagga.  It is impossible to turn forwarding
on/off through the command interface because Quagga uses secure coding
practice of dropping privledges during initialization and only raising via
capabilities when necessary.  Since the dameon has reset real/effective
uid after initialization, all attempts to access /proc/sys/net variables
will fail.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morgan <morgan@kernel.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sysctl_net.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 007c1a6708e..63ada437fc2 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 	return &namespaces->net_ns->sysctl_table_headers;
 }
 
+/* Return standard mode bits for table entry. */
+static int net_ctl_permissions(struct ctl_table_root *root,
+			       struct nsproxy *nsproxy,
+			       struct ctl_table *table)
+{
+	/* Allow network administrator to have same access as root. */
+	if (capable(CAP_NET_ADMIN)) {
+		int mode = (table->mode >> 6) & 7;
+		return (mode << 6) | (mode << 3) | mode;
+	}
+	return table->mode;
+}
+
 static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
+	.permissions = net_ctl_permissions,
 };
 
 static LIST_HEAD(net_sysctl_ro_tables);
-- 
cgit v1.2.3-70-g09d2


From 547b792cac0a038b9dbf958d3c120df3740b5572 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 25 Jul 2008 21:43:18 -0700
Subject: net: convert BUG_TRAP to generic WARN_ON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes legacy reinvent-the-wheel type thing. The generic
machinery integrates much better to automated debugging aids
such as kerneloops.org (and others), and is unambiguous due to
better naming. Non-intuively BUG_TRAP() is actually equal to
WARN_ON() rather than BUG_ON() though some might actually be
promoted to BUG_ON() but I left that to future.

I could make at least one BUILD_BUG_ON conversion.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h              |  5 +++--
 net/appletalk/ddp.c                     |  4 ++--
 net/core/datagram.c                     |  8 ++++----
 net/core/dev.c                          | 10 +++++-----
 net/core/request_sock.c                 |  2 +-
 net/core/skbuff.c                       | 20 ++++++++++----------
 net/core/stream.c                       |  6 +++---
 net/core/user_dma.c                     |  5 ++---
 net/dccp/dccp.h                         |  2 +-
 net/dccp/input.c                        |  2 +-
 net/dccp/ipv4.c                         |  2 +-
 net/dccp/ipv6.c                         |  2 +-
 net/dccp/proto.c                        |  4 ++--
 net/dccp/timer.c                        |  2 +-
 net/ipv4/af_inet.c                      | 14 +++++++-------
 net/ipv4/devinet.c                      |  6 +++---
 net/ipv4/inet_connection_sock.c         | 18 +++++++++---------
 net/ipv4/inet_fragment.c                |  4 ++--
 net/ipv4/inet_hashtables.c              |  8 ++++----
 net/ipv4/inet_timewait_sock.c           |  2 +-
 net/ipv4/ip_fragment.c                  |  4 ++--
 net/ipv4/ip_output.c                    |  2 +-
 net/ipv4/tcp.c                          | 12 ++++++------
 net/ipv4/tcp_input.c                    | 20 ++++++++++----------
 net/ipv4/tcp_ipv4.c                     |  2 +-
 net/ipv4/tcp_timer.c                    |  2 +-
 net/ipv6/addrconf.c                     | 11 +++++++----
 net/ipv6/af_inet6.c                     |  2 +-
 net/ipv6/inet6_connection_sock.c        |  2 +-
 net/ipv6/inet6_hashtables.c             |  4 ++--
 net/ipv6/ip6_fib.c                      | 31 ++++++++++++++++---------------
 net/ipv6/ip6_output.c                   |  2 +-
 net/ipv6/mip6.c                         |  8 ++++----
 net/ipv6/netfilter/nf_conntrack_reasm.c |  4 ++--
 net/ipv6/reassembly.c                   |  4 ++--
 net/ipv6/tcp_ipv6.c                     |  2 +-
 net/key/af_key.c                        |  4 ++--
 net/netlink/af_netlink.c                |  7 ++++---
 net/packet/af_packet.c                  |  4 ++--
 net/rxrpc/af_rxrpc.c                    |  6 +++---
 net/sched/act_api.c                     |  2 +-
 net/sched/act_police.c                  |  2 +-
 net/sched/cls_u32.c                     | 10 +++++-----
 net/sched/sch_cbq.c                     |  4 ++--
 net/sched/sch_generic.c                 |  2 +-
 net/sched/sch_htb.c                     | 16 ++++++++--------
 net/sctp/associola.c                    |  2 +-
 net/unix/af_unix.c                      |  8 ++++----
 net/xfrm/xfrm_algo.c                    |  4 ++--
 net/xfrm/xfrm_ipcomp.c                  |  3 +--
 net/xfrm/xfrm_state.c                   |  2 +-
 51 files changed, 159 insertions(+), 155 deletions(-)

(limited to 'net')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 0c96e7bed5d..8d6e991ef4d 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/bug.h>
 
 #include <net/sock.h>
 
@@ -170,7 +171,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
 {
 	struct request_sock *req = queue->rskq_accept_head;
 
-	BUG_TRAP(req != NULL);
+	WARN_ON(req == NULL);
 
 	queue->rskq_accept_head = req->dl_next;
 	if (queue->rskq_accept_head == NULL)
@@ -185,7 +186,7 @@ static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queu
 	struct request_sock *req = reqsk_queue_remove(queue);
 	struct sock *child = req->sk;
 
-	BUG_TRAP(child != NULL);
+	WARN_ON(child == NULL);
 
 	sk_acceptq_removed(parent);
 	__reqsk_free(req);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 07b5b82c5ea..0c850427a85 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -959,7 +959,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -986,7 +986,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 8a28fc93b72..dd61dcad601 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -285,7 +285,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -315,7 +315,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
@@ -366,7 +366,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -402,7 +402,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 		for (; list; list=list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
diff --git a/net/core/dev.c b/net/core/dev.c
index ccf97f9f37e..c6f9c83745e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1973,7 +1973,7 @@ static void net_tx_action(struct softirq_action *h)
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 
-			BUG_TRAP(!atomic_read(&skb->users));
+			WARN_ON(atomic_read(&skb->users));
 			__kfree_skb(skb);
 		}
 	}
@@ -3847,7 +3847,7 @@ static void rollback_registered(struct net_device *dev)
 		dev->uninit(dev);
 
 	/* Notifier chain MUST detach us from master device. */
-	BUG_TRAP(!dev->master);
+	WARN_ON(dev->master);
 
 	/* Remove entries from kobject tree */
 	netdev_unregister_kobject(dev);
@@ -4169,9 +4169,9 @@ void netdev_run_todo(void)
 
 		/* paranoia */
 		BUG_ON(atomic_read(&dev->refcnt));
-		BUG_TRAP(!dev->ip_ptr);
-		BUG_TRAP(!dev->ip6_ptr);
-		BUG_TRAP(!dev->dn_ptr);
+		WARN_ON(dev->ip_ptr);
+		WARN_ON(dev->ip6_ptr);
+		WARN_ON(dev->dn_ptr);
 
 		if (dev->destructor)
 			dev->destructor(dev);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 2d3035d3abd..7552495aff7 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -123,7 +123,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 		}
 	}
 
-	BUG_TRAP(lopt->qlen == 0);
+	WARN_ON(lopt->qlen != 0);
 	if (lopt_size > PAGE_SIZE)
 		vfree(lopt);
 	else
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e4115672b6c..4e0c9227418 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1200,7 +1200,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -1229,7 +1229,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
@@ -1475,7 +1475,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + frag->size;
 		if ((copy = end - offset) > 0) {
@@ -1503,7 +1503,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
@@ -1552,7 +1552,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -1581,7 +1581,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
@@ -1629,7 +1629,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -1662,7 +1662,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 			__wsum csum2;
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
@@ -2373,7 +2373,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -2397,7 +2397,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
diff --git a/net/core/stream.c b/net/core/stream.c
index 4a0ad152c9c..a6b3437ff08 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -192,13 +192,13 @@ void sk_stream_kill_queues(struct sock *sk)
 	__skb_queue_purge(&sk->sk_error_queue);
 
 	/* Next, the write queue. */
-	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
+	WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
 
 	/* Account for returned memory. */
 	sk_mem_reclaim(sk);
 
-	BUG_TRAP(!sk->sk_wmem_queued);
-	BUG_TRAP(!sk->sk_forward_alloc);
+	WARN_ON(sk->sk_wmem_queued);
+	WARN_ON(sk->sk_forward_alloc);
 
 	/* It is _impossible_ for the backlog to contain anything
 	 * when we get here.  All user references to this socket
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index c77aff9c6eb..53c6b67b287 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -27,7 +27,6 @@
 
 #include <linux/dmaengine.h>
 #include <linux/socket.h>
-#include <linux/rtnetlink.h> /* for BUG_TRAP */
 #include <net/tcp.h>
 #include <net/netdma.h>
 
@@ -71,7 +70,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		copy = end - offset;
@@ -100,7 +99,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			copy = end - offset;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 32617e0576c..743d85fcd65 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -164,7 +164,7 @@ static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
 {
 	s64 delta = dccp_delta_seqno(s1, s2);
 
-	BUG_TRAP(delta >= 0);
+	WARN_ON(delta < 0);
 	return (u64)delta <= ndp + 1;
 }
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 08392ed86c2..df2f110df94 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -413,7 +413,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 
 		/* Stop the REQUEST timer */
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
-		BUG_TRAP(sk->sk_send_head != NULL);
+		WARN_ON(sk->sk_send_head == NULL);
 		__kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2622ace17c4..a835b88237c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -283,7 +283,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		 * ICMPs are not backlogged, hence we cannot get an established
 		 * socket here.
 		 */
-		BUG_TRAP(!req->sk);
+		WARN_ON(req->sk);
 
 		if (seq != dccp_rsk(req)->dreq_iss) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b74e8b2cbe5..da509127e00 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -186,7 +186,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		 * ICMPs are not backlogged, hence we cannot get an established
 		 * socket here.
 		 */
-		BUG_TRAP(req->sk == NULL);
+		WARN_ON(req->sk != NULL);
 
 		if (seq != dccp_rsk(req)->dreq_iss) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a0b56009611..b622d974485 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -327,7 +327,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 	inet_csk_delack_init(sk);
 	__sk_dst_reset(sk);
 
-	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+	WARN_ON(inet->num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
@@ -981,7 +981,7 @@ adjudge_to_death:
 	 */
 	local_bh_disable();
 	bh_lock_sock(sk);
-	BUG_TRAP(!sock_owned_by_user(sk));
+	WARN_ON(sock_owned_by_user(sk));
 
 	/* Have we already been destroyed by a softirq or backlog? */
 	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3608d5342ca..6a5b961b6f5 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -106,7 +106,7 @@ static void dccp_retransmit_timer(struct sock *sk)
 	 *	-- Acks     in client-PARTOPEN state (sec. 8.1.5)
 	 *	-- CloseReq in server-CLOSEREQ state (sec. 8.3)
 	 *	-- Close    in   node-CLOSING  state (sec. 8.3)                */
-	BUG_TRAP(sk->sk_send_head != NULL);
+	WARN_ON(sk->sk_send_head == NULL);
 
 	/*
 	 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd919d84285..a107f49eea4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -148,10 +148,10 @@ void inet_sock_destruct(struct sock *sk)
 		return;
 	}
 
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(!sk->sk_wmem_queued);
-	BUG_TRAP(!sk->sk_forward_alloc);
+	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(sk->sk_wmem_queued);
+	WARN_ON(sk->sk_forward_alloc);
 
 	kfree(inet->opt);
 	dst_release(sk->sk_dst_cache);
@@ -341,7 +341,7 @@ lookup_protocol:
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
-	BUG_TRAP(answer_prot->slab != NULL);
+	WARN_ON(answer_prot->slab == NULL);
 
 	err = -ENOBUFS;
 	sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
@@ -661,8 +661,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	lock_sock(sk2);
 
-	BUG_TRAP((1 << sk2->sk_state) &
-		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
+	WARN_ON(!((1 << sk2->sk_state) &
+		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
 
 	sock_graft(sk2, newsock);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2e667e2f90d..91d3d96805d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -138,8 +138,8 @@ void in_dev_finish_destroy(struct in_device *idev)
 {
 	struct net_device *dev = idev->dev;
 
-	BUG_TRAP(!idev->ifa_list);
-	BUG_TRAP(!idev->mc_list);
+	WARN_ON(idev->ifa_list);
+	WARN_ON(idev->mc_list);
 #ifdef NET_REFCNT_DEBUG
 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
 	       idev, dev ? dev->name : "NIL");
@@ -399,7 +399,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 	}
 	ipv4_devconf_setall(in_dev);
 	if (ifa->ifa_dev != in_dev) {
-		BUG_TRAP(!ifa->ifa_dev);
+		WARN_ON(ifa->ifa_dev);
 		in_dev_hold(in_dev);
 		ifa->ifa_dev = in_dev;
 	}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index bb81c958b74..0c1ae68ee84 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -167,7 +167,7 @@ tb_not_found:
 success:
 	if (!inet_csk(sk)->icsk_bind_hash)
 		inet_bind_hash(sk, tb, snum);
-	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
+	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
 	ret = 0;
 
 fail_unlock:
@@ -260,7 +260,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 	}
 
 	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
-	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
+	WARN_ON(newsk->sk_state == TCP_SYN_RECV);
 out:
 	release_sock(sk);
 	return newsk;
@@ -386,7 +386,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 		    ireq->rmt_addr == raddr &&
 		    ireq->loc_addr == laddr &&
 		    AF_INET_FAMILY(req->rsk_ops->family)) {
-			BUG_TRAP(!req->sk);
+			WARN_ON(req->sk);
 			*prevp = prev;
 			break;
 		}
@@ -539,14 +539,14 @@ EXPORT_SYMBOL_GPL(inet_csk_clone);
  */
 void inet_csk_destroy_sock(struct sock *sk)
 {
-	BUG_TRAP(sk->sk_state == TCP_CLOSE);
-	BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+	WARN_ON(sk->sk_state != TCP_CLOSE);
+	WARN_ON(!sock_flag(sk, SOCK_DEAD));
 
 	/* It cannot be in hash table! */
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 
 	/* If it has not 0 inet_sk(sk)->num, it must be bound */
-	BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
+	WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash);
 
 	sk->sk_prot->destroy(sk);
 
@@ -629,7 +629,7 @@ void inet_csk_listen_stop(struct sock *sk)
 
 		local_bh_disable();
 		bh_lock_sock(child);
-		BUG_TRAP(!sock_owned_by_user(child));
+		WARN_ON(sock_owned_by_user(child));
 		sock_hold(child);
 
 		sk->sk_prot->disconnect(child, O_NONBLOCK);
@@ -647,7 +647,7 @@ void inet_csk_listen_stop(struct sock *sk)
 		sk_acceptq_removed(sk);
 		__reqsk_free(req);
 	}
-	BUG_TRAP(!sk->sk_ack_backlog);
+	WARN_ON(sk->sk_ack_backlog);
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 0546a0bc97e..6c52e08f786 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -134,8 +134,8 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 	struct sk_buff *fp;
 	struct netns_frags *nf;
 
-	BUG_TRAP(q->last_in & INET_FRAG_COMPLETE);
-	BUG_TRAP(del_timer(&q->timer) == 0);
+	WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
+	WARN_ON(del_timer(&q->timer) != 0);
 
 	/* Release all fragment data. */
 	fp = q->fragments;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 115f53722d2..44981906fb9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -305,7 +305,7 @@ unique:
 	inet->num = lport;
 	inet->sport = htons(lport);
 	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	write_unlock(lock);
@@ -342,7 +342,7 @@ void __inet_hash_nolisten(struct sock *sk)
 	rwlock_t *lock;
 	struct inet_ehash_bucket *head;
 
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 
 	sk->sk_hash = inet_sk_ehashfn(sk);
 	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
@@ -367,7 +367,7 @@ static void __inet_hash(struct sock *sk)
 		return;
 	}
 
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 	list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 	lock = &hashinfo->lhash_lock;
 
@@ -450,7 +450,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
 				if (tb->ib_net == net && tb->port == port) {
-					BUG_TRAP(!hlist_empty(&tb->owners));
+					WARN_ON(hlist_empty(&tb->owners));
 					if (tb->fastreuse >= 0)
 						goto next_port;
 					if (!check_established(death_row, sk,
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 75c2def8f9a..d985bd613d2 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
-	BUG_TRAP(icsk->icsk_bind_hash);
+	WARN_ON(!icsk->icsk_bind_hash);
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
 	spin_unlock(&bhead->lock);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 38d38f05801..2152d222b95 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -488,8 +488,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		qp->q.fragments = head;
 	}
 
-	BUG_TRAP(head != NULL);
-	BUG_TRAP(FRAG_CB(head)->offset == 0);
+	WARN_ON(head == NULL);
+	WARN_ON(FRAG_CB(head)->offset != 0);
 
 	/* Allocate a new buffer for the datagram. */
 	ihlen = ip_hdrlen(head);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 465544f6281..d533a89e08d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
-	BUG_TRAP(newskb->dst);
+	WARN_ON(!newskb->dst);
 	netif_rx(newskb);
 	return 0;
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0b491bf03db..1ab341e5d3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1096,7 +1096,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 #if TCP_DEBUG
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+	WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
 #endif
 
 	if (inet_csk_ack_scheduled(sk)) {
@@ -1358,7 +1358,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				goto found_ok_skb;
 			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
-			BUG_TRAP(flags & MSG_PEEK);
+			WARN_ON(!(flags & MSG_PEEK));
 			skb = skb->next;
 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
 
@@ -1421,8 +1421,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-				 (flags & (MSG_PEEK | MSG_TRUNC)));
+			WARN_ON(tp->copied_seq != tp->rcv_nxt &&
+				!(flags & (MSG_PEEK | MSG_TRUNC)));
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1844,7 +1844,7 @@ adjudge_to_death:
 	 */
 	local_bh_disable();
 	bh_lock_sock(sk);
-	BUG_TRAP(!sock_owned_by_user(sk));
+	WARN_ON(sock_owned_by_user(sk));
 
 	/* Have we already been destroyed by a softirq or backlog? */
 	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
@@ -1973,7 +1973,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 
-	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+	WARN_ON(inet->num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
 	return err;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 75efd244f2a..67ccce2a96b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1629,10 +1629,10 @@ advance_sp:
 out:
 
 #if FASTRETRANS_DEBUG > 0
-	BUG_TRAP((int)tp->sacked_out >= 0);
-	BUG_TRAP((int)tp->lost_out >= 0);
-	BUG_TRAP((int)tp->retrans_out >= 0);
-	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
+	WARN_ON((int)tp->sacked_out < 0);
+	WARN_ON((int)tp->lost_out < 0);
+	WARN_ON((int)tp->retrans_out < 0);
+	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
 #endif
 	return flag;
 }
@@ -2181,7 +2181,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 	int err;
 	unsigned int mss;
 
-	BUG_TRAP(packets <= tp->packets_out);
+	WARN_ON(packets > tp->packets_out);
 	if (tp->lost_skb_hint) {
 		skb = tp->lost_skb_hint;
 		cnt = tp->lost_cnt_hint;
@@ -2610,7 +2610,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	/* E. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		BUG_TRAP(tp->retrans_out == 0);
+		WARN_ON(tp->retrans_out != 0);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
@@ -2972,9 +2972,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 	}
 
 #if FASTRETRANS_DEBUG > 0
-	BUG_TRAP((int)tp->sacked_out >= 0);
-	BUG_TRAP((int)tp->lost_out >= 0);
-	BUG_TRAP((int)tp->retrans_out >= 0);
+	WARN_ON((int)tp->sacked_out < 0);
+	WARN_ON((int)tp->lost_out < 0);
+	WARN_ON((int)tp->retrans_out < 0);
 	if (!tp->packets_out && tcp_is_sack(tp)) {
 		icsk = inet_csk(sk);
 		if (tp->lost_out) {
@@ -3877,7 +3877,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 			int i;
 
 			/* RCV.NXT must cover all the block! */
-			BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
+			WARN_ON(before(tp->rcv_nxt, sp->end_seq));
 
 			/* Zap this SACK, by moving forward any other SACKS. */
 			for (i=this_sack+1; i < num_sacks; i++)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a82df630756..a2b06d0cc26 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -418,7 +418,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 		/* ICMPs are not backlogged, hence we cannot get
 		   an established socket here.
 		 */
-		BUG_TRAP(!req->sk);
+		WARN_ON(req->sk);
 
 		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 328e0cf42b3..5ab6ba19c3c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -287,7 +287,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 	if (!tp->packets_out)
 		goto out;
 
-	BUG_TRAP(!tcp_write_queue_empty(sk));
+	WARN_ON(tcp_write_queue_empty(sk));
 
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 74d543d504a..a7842c54f58 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -313,8 +313,10 @@ static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
 void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
-	BUG_TRAP(idev->addr_list==NULL);
-	BUG_TRAP(idev->mc_list==NULL);
+
+	WARN_ON(idev->addr_list != NULL);
+	WARN_ON(idev->mc_list != NULL);
+
 #ifdef NET_REFCNT_DEBUG
 	printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
 #endif
@@ -517,8 +519,9 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
-	BUG_TRAP(ifp->if_next==NULL);
-	BUG_TRAP(ifp->lst_next==NULL);
+	WARN_ON(ifp->if_next != NULL);
+	WARN_ON(ifp->lst_next != NULL);
+
 #ifdef NET_REFCNT_DEBUG
 	printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
 #endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d828bc4b1c..0843c4d6218 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -153,7 +153,7 @@ lookup_protocol:
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
-	BUG_TRAP(answer_prot->slab != NULL);
+	WARN_ON(answer_prot->slab == NULL);
 
 	err = -ENOBUFS;
 	sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 87801cc1b2f..16d43f20b32 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -98,7 +98,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
 		    (!treq->iif || treq->iif == iif)) {
-			BUG_TRAP(req->sk == NULL);
+			WARN_ON(req->sk != NULL);
 			*prevp = prev;
 			return req;
 		}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00a8a5f9380..1646a565825 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -28,7 +28,7 @@ void __inet6_hash(struct sock *sk)
 	struct hlist_head *list;
 	rwlock_t *lock;
 
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 
 	if (sk->sk_state == TCP_LISTEN) {
 		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -202,7 +202,7 @@ unique:
 	 * in hash table socket with a funny identity. */
 	inet->num = lport;
 	inet->sport = htons(lport);
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sk->sk_hash = hash;
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 08ea2de28d6..52dddc25d3e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -287,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
 			w->leaf = rt;
 			return 1;
 		}
-		BUG_TRAP(res!=0);
+		WARN_ON(res == 0);
 	}
 	w->leaf = NULL;
 	return 0;
@@ -778,7 +778,7 @@ out:
 			pn->leaf = fib6_find_prefix(info->nl_net, pn);
 #if RT6_DEBUG >= 2
 			if (!pn->leaf) {
-				BUG_TRAP(pn->leaf != NULL);
+				WARN_ON(pn->leaf == NULL);
 				pn->leaf = info->nl_net->ipv6.ip6_null_entry;
 			}
 #endif
@@ -942,7 +942,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
 
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src_len) {
-		BUG_TRAP(saddr!=NULL);
+		WARN_ON(saddr == NULL);
 		if (fn && fn->subtree)
 			fn = fib6_locate_1(fn->subtree, saddr, src_len,
 					   offsetof(struct rt6_info, rt6i_src));
@@ -996,9 +996,9 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
 
-		BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
-		BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
-		BUG_TRAP(fn->leaf==NULL);
+		WARN_ON(fn->fn_flags & RTN_RTINFO);
+		WARN_ON(fn->fn_flags & RTN_TL_ROOT);
+		WARN_ON(fn->leaf != NULL);
 
 		children = 0;
 		child = NULL;
@@ -1014,7 +1014,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 			fn->leaf = fib6_find_prefix(net, fn);
 #if RT6_DEBUG >= 2
 			if (fn->leaf==NULL) {
-				BUG_TRAP(fn->leaf);
+				WARN_ON(!fn->leaf);
 				fn->leaf = net->ipv6.ip6_null_entry;
 			}
 #endif
@@ -1025,16 +1025,17 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 		pn = fn->parent;
 #ifdef CONFIG_IPV6_SUBTREES
 		if (FIB6_SUBTREE(pn) == fn) {
-			BUG_TRAP(fn->fn_flags&RTN_ROOT);
+			WARN_ON(!(fn->fn_flags & RTN_ROOT));
 			FIB6_SUBTREE(pn) = NULL;
 			nstate = FWS_L;
 		} else {
-			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
+			WARN_ON(fn->fn_flags & RTN_ROOT);
 #endif
 			if (pn->right == fn) pn->right = child;
 			else if (pn->left == fn) pn->left = child;
 #if RT6_DEBUG >= 2
-			else BUG_TRAP(0);
+			else
+				WARN_ON(1);
 #endif
 			if (child)
 				child->parent = pn;
@@ -1154,14 +1155,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 
 #if RT6_DEBUG >= 2
 	if (rt->u.dst.obsolete>0) {
-		BUG_TRAP(fn==NULL);
+		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
 #endif
 	if (fn == NULL || rt == net->ipv6.ip6_null_entry)
 		return -ENOENT;
 
-	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
+	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
 	if (!(rt->rt6i_flags&RTF_CACHE)) {
 		struct fib6_node *pn = fn;
@@ -1266,7 +1267,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 			w->node = pn;
 #ifdef CONFIG_IPV6_SUBTREES
 			if (FIB6_SUBTREE(pn) == fn) {
-				BUG_TRAP(fn->fn_flags&RTN_ROOT);
+				WARN_ON(!(fn->fn_flags & RTN_ROOT));
 				w->state = FWS_L;
 				continue;
 			}
@@ -1281,7 +1282,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 				continue;
 			}
 #if RT6_DEBUG >= 2
-			BUG_TRAP(0);
+			WARN_ON(1);
 #endif
 		}
 	}
@@ -1323,7 +1324,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 			}
 			return 0;
 		}
-		BUG_TRAP(res==0);
+		WARN_ON(res != 0);
 	}
 	w->leaf = rt;
 	return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6407c64ea4a..6811901e6b1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -116,7 +116,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
-	BUG_TRAP(newskb->dst);
+	WARN_ON(!newskb->dst);
 
 	netif_rx(newskb);
 	return 0;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index ad1cc5bbf97..31295c8f619 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -164,8 +164,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 			calc_padlen(sizeof(*dstopt), 6));
 
 	hao->type = IPV6_TLV_HAO;
+	BUILD_BUG_ON(sizeof(*hao) != 18);
 	hao->length = sizeof(*hao) - 2;
-	BUG_TRAP(hao->length == 16);
 
 	len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
 
@@ -174,7 +174,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
 	spin_unlock_bh(&x->lock);
 
-	BUG_TRAP(len == x->props.header_len);
+	WARN_ON(len != x->props.header_len);
 	dstopt->hdrlen = (x->props.header_len >> 3) - 1;
 
 	return 0;
@@ -317,7 +317,7 @@ static int mip6_destopt_init_state(struct xfrm_state *x)
 	x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
 		calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
 		sizeof(struct ipv6_destopt_hao);
-	BUG_TRAP(x->props.header_len == 24);
+	WARN_ON(x->props.header_len != 24);
 
 	return 0;
 }
@@ -380,7 +380,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	rt2->rt_hdr.segments_left = 1;
 	memset(&rt2->reserved, 0, sizeof(rt2->reserved));
 
-	BUG_TRAP(rt2->rt_hdr.hdrlen == 2);
+	WARN_ON(rt2->rt_hdr.hdrlen != 2);
 
 	memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
 	spin_lock_bh(&x->lock);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index cf20bc4fd60..52d06dd4b81 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -416,8 +416,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	fq_kill(fq);
 
-	BUG_TRAP(head != NULL);
-	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
+	WARN_ON(head == NULL);
+	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6ab957ec2dd..89184b576e2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -473,8 +473,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		fq->q.fragments = head;
 	}
 
-	BUG_TRAP(head != NULL);
-	BUG_TRAP(FRAG6_CB(head)->offset == 0);
+	WARN_ON(head == NULL);
+	WARN_ON(FRAG6_CB(head)->offset != 0);
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ae45f983501..cff778b23a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -421,7 +421,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		/* ICMPs are not backlogged, hence we cannot get
 		 * an established socket here.
 		 */
-		BUG_TRAP(req->sk == NULL);
+		WARN_ON(req->sk != NULL);
 
 		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f0fc46c8038..d628df97e02 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -96,8 +96,8 @@ static void pfkey_sock_destruct(struct sock *sk)
 		return;
 	}
 
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 
 	atomic_dec(&pfkey_socks_nr);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 98bfe277eab..b0eacc0007c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -158,9 +158,10 @@ static void netlink_sock_destruct(struct sock *sk)
 		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 		return;
 	}
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(!nlk_sk(sk)->groups);
+
+	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(nlk_sk(sk)->groups);
 }
 
 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d56cae112dc..c718e7e3f7d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -260,8 +260,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
 
 static void packet_sock_destruct(struct sock *sk)
 {
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		printk("Attempt to release alive packet socket: %p\n", sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 4b2682feeed..32e489118be 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -660,9 +660,9 @@ static void rxrpc_sock_destructor(struct sock *sk)
 
 	rxrpc_purge_queue(&sk->sk_receive_queue);
 
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(sk_unhashed(sk));
-	BUG_TRAP(!sk->sk_socket);
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(!sk_unhashed(sk));
+	WARN_ON(sk->sk_socket);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		printk("Attempt to release alive rxrpc socket: %p\n", sk);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 74e662cbb2c..d308c19aa3f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -41,7 +41,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			return;
 		}
 	}
-	BUG_TRAP(0);
+	WARN_ON(1);
 }
 EXPORT_SYMBOL(tcf_hash_destroy);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 32c3f9d9fb7..38015b49394 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -116,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
 			return;
 		}
 	}
-	BUG_TRAP(0);
+	WARN_ON(1);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 527db2559dd..246f9065ce3 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -345,7 +345,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
 			}
 		}
 	}
-	BUG_TRAP(0);
+	WARN_ON(1);
 	return 0;
 }
 
@@ -368,7 +368,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode **hn;
 
-	BUG_TRAP(!ht->refcnt);
+	WARN_ON(ht->refcnt);
 
 	u32_clear_hnode(tp, ht);
 
@@ -380,7 +380,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 		}
 	}
 
-	BUG_TRAP(0);
+	WARN_ON(1);
 	return -ENOENT;
 }
 
@@ -389,7 +389,7 @@ static void u32_destroy(struct tcf_proto *tp)
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
 
-	BUG_TRAP(root_ht != NULL);
+	WARN_ON(root_ht == NULL);
 
 	if (root_ht && --root_ht->refcnt == 0)
 		u32_destroy_hnode(tp, root_ht);
@@ -407,7 +407,7 @@ static void u32_destroy(struct tcf_proto *tp)
 		while ((ht = tp_c->hlist) != NULL) {
 			tp_c->hlist = ht->next;
 
-			BUG_TRAP(ht->refcnt == 0);
+			WARN_ON(ht->refcnt != 0);
 
 			kfree(ht);
 		}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f1d2f8ec8b4..14954bf4a68 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1175,7 +1175,7 @@ static void cbq_unlink_class(struct cbq_class *this)
 				this->tparent->children = NULL;
 		}
 	} else {
-		BUG_TRAP(this->sibling == this);
+		WARN_ON(this->sibling != this);
 	}
 }
 
@@ -1699,7 +1699,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 
-	BUG_TRAP(!cl->filters);
+	WARN_ON(cl->filters);
 
 	tcf_destroy_chain(&cl->filter_list);
 	qdisc_destroy(cl->q);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 43abd4d27ea..fd2a6cadb11 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -746,5 +746,5 @@ void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
-	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
+	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30c999c61b0..75a40951c4f 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -524,7 +524,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
  */
 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
-	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
+	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
@@ -542,7 +542,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
  */
 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
-	BUG_TRAP(cl->prio_activity);
+	WARN_ON(!cl->prio_activity);
 
 	htb_deactivate_prios(q, cl);
 	cl->prio_activity = 0;
@@ -757,7 +757,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 		u32 *pid;
 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
 
-	BUG_TRAP(tree->rb_node);
+	WARN_ON(!tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
 	sp->pid = pid;
@@ -777,7 +777,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 				*sp->pptr = (*sp->pptr)->rb_left;
 			if (sp > stk) {
 				sp--;
-				BUG_TRAP(*sp->pptr);
+				WARN_ON(!*sp->pptr);
 				if (!*sp->pptr)
 					return NULL;
 				htb_next_rb_node(sp->pptr);
@@ -792,7 +792,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 			sp->pid = cl->un.inner.last_ptr_id + prio;
 		}
 	}
-	BUG_TRAP(0);
+	WARN_ON(1);
 	return NULL;
 }
 
@@ -810,7 +810,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 
 	do {
 next:
-		BUG_TRAP(cl);
+		WARN_ON(!cl);
 		if (!cl)
 			return NULL;
 
@@ -1185,7 +1185,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 {
 	struct htb_class *parent = cl->parent;
 
-	BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
 
 	if (parent->cmode != HTB_CAN_SEND)
 		htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
@@ -1205,7 +1205,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	if (!cl->level) {
-		BUG_TRAP(cl->un.leaf.q);
+		WARN_ON(!cl->un.leaf.q);
 		qdisc_destroy(cl->un.leaf.q);
 	}
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ec2a0a33fd7..8472b8b349c 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -464,7 +464,7 @@ static void sctp_association_destroy(struct sctp_association *asoc)
 		spin_unlock_bh(&sctp_assocs_id_lock);
 	}
 
-	BUG_TRAP(!atomic_read(&asoc->rmem_alloc));
+	WARN_ON(atomic_read(&asoc->rmem_alloc));
 
 	if (asoc->base.malloced) {
 		kfree(asoc);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 70ceb1604ad..24eb214581d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -227,7 +227,7 @@ static void __unix_remove_socket(struct sock *sk)
 
 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 {
-	BUG_TRAP(sk_unhashed(sk));
+	WARN_ON(!sk_unhashed(sk));
 	sk_add_node(sk, list);
 }
 
@@ -350,9 +350,9 @@ static void unix_sock_destructor(struct sock *sk)
 
 	skb_queue_purge(&sk->sk_receive_queue);
 
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(sk_unhashed(sk));
-	BUG_TRAP(!sk->sk_socket);
+	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(!sk_unhashed(sk));
+	WARN_ON(sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		printk("Attempt to release alive unix socket: %p\n", sk);
 		return;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 23a2cc04b8c..96036cf2216 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -718,7 +718,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		int end;
 
-		BUG_TRAP(start <= offset + len);
+		WARN_ON(start > offset + len);
 
 		end = start + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
@@ -748,7 +748,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 		for (; list; list = list->next) {
 			int end;
 
-			BUG_TRAP(start <= offset + len);
+			WARN_ON(start > offset + len);
 
 			end = start + list->len;
 			if ((copy = end - offset) > 0) {
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 800f669083f..c609a4b98e1 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/percpu.h>
-#include <linux/rtnetlink.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
 #include <net/ip.h>
@@ -251,7 +250,7 @@ static void ipcomp_free_tfms(struct crypto_comp **tfms)
 			break;
 	}
 
-	BUG_TRAP(pos);
+	WARN_ON(!pos);
 
 	if (--pos->users)
 		return;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 72fddafd891..4c6914ef7d9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -538,7 +538,7 @@ EXPORT_SYMBOL(xfrm_state_alloc);
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
-	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
+	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_lock);
 	list_del(&x->all);
-- 
cgit v1.2.3-70-g09d2


From 16df845f4566bc252f3e09db12f5c2f22cb44226 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 26 Jul 2008 02:21:54 -0700
Subject: syncookies: Make sure ECN is disabled

ecn_ok is not initialized when a connection is established by cookies.
The cookie syn-ack never sets ECN, so ecn_ok must be set to 0.

Spotted using ns-3/network simulation cradle simulator and valgrind.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 1 +
 net/ipv6/syncookies.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 51bc24d3b8a..9d38005abba 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -299,6 +299,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
+	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6a68eeb7bbf..a46badd1082 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
+	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
-- 
cgit v1.2.3-70-g09d2


From cdec7e50a4896c5197d5575d9ca635eea6825149 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 26 Jul 2008 02:28:09 -0700
Subject: Revert "pkt_sched: sch_sfq: dump a real number of flows"

This reverts commit f867e6af94239a04ec23aeec2fcda5aa58e41db7.

Based upon discussions between Jarek and Patrick McHardy
this is field being set is more a config parameter than a
statistic.  And we should add a true statistic to provide
this information if we really want it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 73f53844ce9..8589da66656 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -536,14 +536,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	opt.limit = q->limit;
 	opt.divisor = SFQ_HASH_DIVISOR;
-	opt.flows = 0;
-	if (q->tail != SFQ_DEPTH) {
-		unsigned int i;
-
-		for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-			if (q->ht[i] != SFQ_DEPTH)
-				opt.flows++;
-	}
+	opt.flows = q->limit;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
cgit v1.2.3-70-g09d2


From 59435444a13ed52d3444c5df26b73d3086bcd57b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 26 Jul 2008 11:59:09 +0100
Subject: dccp: Allow to distinguish original and retransmitted packets

This patch allows the sender to distinguish original and retransmitted packets,
which is in particular needed for the retransmission of DCCP-Requests:
 * the first Request uses ISS (generated in net/dccp/ip*.c), and sets GSS = ISS;
 * all retransmitted Requests use GSS' = GSS + 1, so that the n-th retransmitted
   Request has sequence number ISS + n (mod 48).

To add generic support, the patch reorganises existing code so that:
 * icsk_retransmits == 0     for the original packet and
 * icsk_retransmits = n > 0  for the n-th retransmitted packet
at the time dccp_transmit_skb() is called, via dccp_retransmit_skb().

Thanks to Wei Yongjun for pointing this problem out.

Further changes:
----------------
 * removed the `skb' argument from dccp_retransmit_skb(), since sk_send_head
   is used for all retransmissions (the exception is client-Acks in PARTOPEN
   state, but these do not use sk_send_head);
 * since sk_send_head always contains the original skb (via dccp_entail()),
   skb_cloned() never evaluated to true and thus pskb_copy() was never used.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h   |  2 +-
 net/dccp/output.c | 20 ++++++++++++++++----
 net/dccp/timer.c  | 20 ++++----------------
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 743d85fcd65..1c2e3ec2eb5 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -226,7 +226,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
 
 extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
-extern int  dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_retransmit_skb(struct sock *sk);
 
 extern void dccp_send_ack(struct sock *sk);
 extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fe20068c5d8..d19d4819501 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -284,14 +284,26 @@ void dccp_write_xmit(struct sock *sk, int block)
 	}
 }
 
-int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+/**
+ * dccp_retransmit_skb  -  Retransmit Request, Close, or CloseReq packets
+ * There are only four retransmittable packet types in DCCP:
+ * - Request  in client-REQUEST  state (sec. 8.1.1),
+ * - CloseReq in server-CLOSEREQ state (sec. 8.3),
+ * - Close    in   node-CLOSING  state (sec. 8.3),
+ * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()).
+ * This function expects sk->sk_send_head to contain the original skb.
+ */
+int dccp_retransmit_skb(struct sock *sk)
 {
+	WARN_ON(sk->sk_send_head == NULL);
+
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
-	return dccp_transmit_skb(sk, (skb_cloned(skb) ?
-				      pskb_copy(skb, GFP_ATOMIC):
-				      skb_clone(skb, GFP_ATOMIC)));
+	/* this count is used to distinguish original and retransmitted skb */
+	inet_csk(sk)->icsk_retransmits++;
+
+	return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
 }
 
 struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 6a5b961b6f5..54b3c7e9e01 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -98,22 +98,12 @@ static void dccp_retransmit_timer(struct sock *sk)
 		goto backoff;
 	}
 
-	/*
-	 * sk->sk_send_head has to have one skb with
-	 * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
-	 * packet types. The only packets eligible for retransmission are:
-	 *	-- Requests in client-REQUEST  state (sec. 8.1.1)
-	 *	-- Acks     in client-PARTOPEN state (sec. 8.1.5)
-	 *	-- CloseReq in server-CLOSEREQ state (sec. 8.3)
-	 *	-- Close    in   node-CLOSING  state (sec. 8.3)                */
-	WARN_ON(sk->sk_send_head == NULL);
-
 	/*
 	 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
 	 * sent, no need to retransmit, this sock is dead.
 	 */
 	if (dccp_write_timeout(sk))
-		goto out;
+		return;
 
 	/*
 	 * We want to know the number of packets retransmitted, not the
@@ -122,30 +112,28 @@ static void dccp_retransmit_timer(struct sock *sk)
 	if (icsk->icsk_retransmits == 0)
 		DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
 
-	if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
+	if (dccp_retransmit_skb(sk) != 0) {
 		/*
 		 * Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */
-		if (icsk->icsk_retransmits == 0)
+		if (--icsk->icsk_retransmits == 0)
 			icsk->icsk_retransmits = 1;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  min(icsk->icsk_rto,
 					      TCP_RESOURCE_PROBE_INTERVAL),
 					  DCCP_RTO_MAX);
-		goto out;
+		return;
 	}
 
 backoff:
 	icsk->icsk_backoff++;
-	icsk->icsk_retransmits++;
 
 	icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
 				  DCCP_RTO_MAX);
 	if (icsk->icsk_retransmits > sysctl_dccp_retries1)
 		__sk_dst_reset(sk);
-out:;
 }
 
 static void dccp_write_timer(unsigned long data)
-- 
cgit v1.2.3-70-g09d2


From 73f18fdbca3f92b90aeaee16f5175fe30496e218 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 26 Jul 2008 11:59:10 +0100
Subject: dccp: Bug-Fix - AWL was never updated

The AWL lower Ack validity window advances in proportion to GSS, the greatest
sequence number sent. Updating AWL other than at connection setup (in the
DCCP-Request sent by dccp_v{4,6}_connect()) was missing in the DCCP code.

This bug lead to syslog messages such as

 "kernel: dccp_check_seqno: DCCP: Step 6 failed for DATAACK packet, [...]
  P.ackno exists or LAWL(82947089) <= P.ackno(82948208)
                                   <= S.AWH(82948728), sending SYNC..."

The difference between AWL/AWH here is 1639 packets, while the expected value
(the Sequence Window) would have been 100 (the default).  A closer look showed
that LAWL = AWL = 82947089 equalled the ISS on the Response.

The patch now updates AWL with each increase of GSS.


Further changes:
----------------
The patch also enforces more stringent checks on the ISS sequence number:

 * AWL is initialised to ISS at connection setup and remains at this value;
 * AWH is then always set to GSS (via dccp_update_gss());
 * so on the first Request: AWL =      AWH = ISS,
   and on the n-th Request: AWL = ISS, AWH = ISS + n.

As a consequence, only Response packets that refer to Requests sent by this
host will pass, all others are discarded. This is the intention and in effect
implements the initial adjustments for AWL as specified in RFC 4340, 7.5.1.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/output.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index d19d4819501..d06945c7d3d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -53,8 +53,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 					  dccp_packet_hdr_len(dcb->dccpd_type);
 		int err, set_ack = 1;
 		u64 ackno = dp->dccps_gsr;
-
-		dccp_inc_seqno(&dp->dccps_gss);
+		/*
+		 * Increment GSS here already in case the option code needs it.
+		 * Update GSS for real only if option processing below succeeds.
+		 */
+		dcb->dccpd_seq = ADD48(dp->dccps_gss, 1);
 
 		switch (dcb->dccpd_type) {
 		case DCCP_PKT_DATA:
@@ -66,6 +69,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 		case DCCP_PKT_REQUEST:
 			set_ack = 0;
+			/* Use ISS on the first (non-retransmitted) Request. */
+			if (icsk->icsk_retransmits == 0)
+				dcb->dccpd_seq = dp->dccps_iss;
 			/* fall through */
 
 		case DCCP_PKT_SYNC:
@@ -84,8 +90,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		dcb->dccpd_seq = dp->dccps_gss;
-
 		if (dccp_insert_options(sk, skb)) {
 			kfree_skb(skb);
 			return -EPROTO;
@@ -103,7 +107,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		/* XXX For now we're using only 48 bits sequence numbers */
 		dh->dccph_x	= 1;
 
-		dp->dccps_awh = dp->dccps_gss;
+		dccp_update_gss(sk, dcb->dccpd_seq);
 		dccp_hdr_set_seq(dh, dp->dccps_gss);
 		if (set_ack)
 			dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
@@ -112,6 +116,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		case DCCP_PKT_REQUEST:
 			dccp_hdr_request(skb)->dccph_req_service =
 							dp->dccps_service;
+			/*
+			 * Limit Ack window to ISS <= P.ackno <= GSS, so that
+			 * only Responses to Requests we sent are considered.
+			 */
+			dp->dccps_awl = dp->dccps_iss;
 			break;
 		case DCCP_PKT_RESET:
 			dccp_hdr_reset(skb)->dccph_reset_code =
@@ -449,19 +458,7 @@ static inline void dccp_connect_init(struct sock *sk)
 
 	dccp_sync_mss(sk, dst_mtu(dst));
 
-	/*
-	 * SWL and AWL are initially adjusted so that they are not less than
-	 * the initial Sequence Numbers received and sent, respectively:
-	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
-	 *	AWL := max(GSS - W' + 1, ISS).
-	 * These adjustments MUST be applied only at the beginning of the
-	 * connection.
-	 */
-	dccp_update_gss(sk, dp->dccps_iss);
-	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
-
-	/* S.GAR - greatest valid acknowledgement number received on a non-Sync;
-	 *         initialized to S.ISS (sec. 8.5)                            */
+	/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
 	dp->dccps_gar = dp->dccps_iss;
 
 	icsk->icsk_retransmits = 0;
-- 
cgit v1.2.3-70-g09d2


From d68f0866f76e2bc4ddc07e88e2cb1bc8959a6d7e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 26 Jul 2008 11:59:10 +0100
Subject: dccp: Fix sequence number check for ICMPv4 packets

The payload of ICMP message is a part of the packet sent by ourself,
so the sequence number check must use AWL and AWH, not SWL and SWH.

For example:
     Endpoint A                  Endpoint B

     DATA-ACK       -------->
     (SEQ=X)
                    <--------    ICMP (Fragmentation Needed)
                                 (SEQ=X)

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a835b88237c..6a2f1879e18 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -238,7 +238,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	dp = dccp_sk(sk);
 	seq = dccp_hdr_seq(dh);
 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
-	    !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
+	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From e0bcfb0c6a6ed9ebd68746b306298dc5797fd426 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 26 Jul 2008 11:59:10 +0100
Subject: dccp: Add check for sequence number in ICMPv6 message

This adds a sequence number check for ICMPv6 DCCP error packets, in the same
manner as it has been done for ICMPv4 in the previous patch.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ipv6.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index da509127e00..25826b1bf68 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -89,6 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 {
 	struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
 	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+	struct dccp_sock *dp;
 	struct ipv6_pinfo *np;
 	struct sock *sk;
 	int err;
@@ -116,6 +117,14 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state == DCCP_CLOSED)
 		goto out;
 
+	dp = dccp_sk(sk);
+	seq = dccp_hdr_seq(dh);
+	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
+	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+		goto out;
+	}
+
 	np = inet6_sk(sk);
 
 	if (type == ICMPV6_PKT_TOOBIG) {
@@ -168,7 +177,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	icmpv6_err_convert(type, code, &err);
 
-	seq = dccp_hdr_seq(dh);
 	/* Might be for an request_sock */
 	switch (sk->sk_state) {
 		struct request_sock *req, **prev;
-- 
cgit v1.2.3-70-g09d2


From 18e1d836002ad970f42736bad09b7be9cfe99545 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 26 Jul 2008 11:59:10 +0100
Subject: dccp: Fix incorrect length check for ICMPv4 packets

Unlike TCP, which only needs 8 octets of original packet data, DCCP requires
minimally 12 or 16 bytes for ICMP-payload sequence number checks.

This patch replaces the insufficient length constant of 8 with a two-stage
test, making sure that 12 bytes are available, before computing the basic
header length required for sequence number checks.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ipv4.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6a2f1879e18..882c5c4de69 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -196,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
 static void dccp_v4_err(struct sk_buff *skb, u32 info)
 {
 	const struct iphdr *iph = (struct iphdr *)skb->data;
-	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
-							(iph->ihl << 2));
+	const u8 offset = iph->ihl << 2;
+	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
 	struct dccp_sock *dp;
 	struct inet_sock *inet;
 	const int type = icmp_hdr(skb)->type;
@@ -207,7 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	int err;
 	struct net *net = dev_net(skb->dev);
 
-	if (skb->len < (iph->ihl << 2) + 8) {
+	if (skb->len < offset + sizeof(*dh) ||
+	    skb->len < offset + __dccp_basic_hdr_len(dh)) {
 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
-- 
cgit v1.2.3-70-g09d2


From 860239c56bbc7c830bdbcec93b140f22a5a5219b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 26 Jul 2008 11:59:11 +0100
Subject: dccp: Add check for truncated ICMPv6 DCCP error packets

This patch adds a minimum-length check for ICMPv6 packets, as per the previous
patch for ICMPv4 payloads.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ipv6.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 25826b1bf68..5e1ee0da2c4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -96,6 +96,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	__u64 seq;
 	struct net *net = dev_net(skb->dev);
 
+	if (skb->len < offset + sizeof(*dh) ||
+	    skb->len < offset + __dccp_basic_hdr_len(dh)) {
+		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return;
+	}
+
 	sk = inet6_lookup(net, &dccp_hashinfo,
 			&hdr->daddr, dh->dccph_dport,
 			&hdr->saddr, dh->dccph_sport, inet6_iif(skb));
-- 
cgit v1.2.3-70-g09d2


From 0bc3cc03fa6e1c20aecb5a33356bcaae410640b9 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 24 Jul 2008 18:21:31 -0700
Subject: cpumask: change cpumask_of_cpu_ptr to use new cpumask_of_cpu

  * Replace previous instances of the cpumask_of_cpu_ptr* macros
    with a the new (lvalue capable) generic cpumask_of_cpu().

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/cstate.c                    |  3 +--
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       | 10 +++-------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 15 +++++----------
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 12 ++++--------
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  3 +--
 arch/x86/kernel/cpu/intel_cacheinfo.c            |  3 +--
 arch/x86/kernel/ldt.c                            |  6 ++----
 arch/x86/kernel/microcode.c                      | 17 +++++------------
 arch/x86/kernel/reboot.c                         | 11 +++--------
 drivers/acpi/processor_throttling.c              | 11 +++--------
 drivers/firmware/dcdbas.c                        |  3 +--
 drivers/misc/sgi-xp/xpc_main.c                   |  3 +--
 kernel/stop_machine.c                            |  3 +--
 kernel/time/tick-common.c                        |  8 +++-----
 kernel/trace/trace_sysprof.c                     |  4 +---
 lib/smp_processor_id.c                           |  5 +----
 net/sunrpc/svc.c                                 |  3 +--
 17 files changed, 37 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 9220cf46aa1..c2502eb9aa8 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpumask_t saved_mask;
-	cpumask_of_cpu_ptr(new_mask, cpu);
 	int retval;
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int edx_part;
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 
 	/* Make sure we are running on right CPU */
 	saved_mask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, new_mask);
+	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (retval)
 		return -1;
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ff2fff56f0a..dd097b83583 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd)
 static void drv_write(struct drv_cmd *cmd)
 {
 	cpumask_t saved_mask = current->cpus_allowed;
-	cpumask_of_cpu_ptr_declare(cpu_mask);
 	unsigned int i;
 
 	for_each_cpu_mask_nr(i, cmd->mask) {
-		cpumask_of_cpu_ptr_next(cpu_mask, i);
-		set_cpus_allowed_ptr(current, cpu_mask);
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
 		do_drv_write(cmd);
 	}
 
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu)
 	} aperf_cur, mperf_cur;
 
 	cpumask_t saved_mask;
-	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	unsigned int perf_percent;
 	unsigned int retval;
 
 	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, cpu_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (get_cpu() != cpu) {
 		/* We were not able to run on requested processor */
 		put_cpu();
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu)
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
-	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
 	unsigned int freq;
 	unsigned int cached_freq;
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	}
 
 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(cpu_mask), data);
+	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 53c7b693697..c45ca6d4dce 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -479,12 +479,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
 static int check_supported_cpu(unsigned int cpu)
 {
 	cpumask_t oldmask;
-	cpumask_of_cpu_ptr(cpu_mask, cpu);
 	u32 eax, ebx, ecx, edx;
 	unsigned int rc = 0;
 
 	oldmask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, cpu_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 
 	if (smp_processor_id() != cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -1017,7 +1016,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
 static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
 {
 	cpumask_t oldmask;
-	cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
@@ -1032,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
 
 	/* only run on specific CPU from here on */
 	oldmask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, cpu_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
 
 	if (smp_processor_id() != pol->cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1107,7 +1105,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask;
-	cpumask_of_cpu_ptr_declare(newmask);
 	int rc;
 
 	if (!cpu_online(pol->cpu))
@@ -1159,8 +1156,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 
 	/* only run on specific CPU from here on */
 	oldmask = current->cpus_allowed;
-	cpumask_of_cpu_ptr_next(newmask, pol->cpu);
-	set_cpus_allowed_ptr(current, newmask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
 
 	if (smp_processor_id() != pol->cpu) {
 		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1182,7 +1178,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	set_cpus_allowed_ptr(current, &oldmask);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		pol->cpus = *newmask;
+		pol->cpus = cpumask_of_cpu(pol->cpu);
 	else
 		pol->cpus = per_cpu(cpu_core_map, pol->cpu);
 	data->available_cores = &(pol->cpus);
@@ -1248,7 +1244,6 @@ static unsigned int powernowk8_get (unsigned int cpu)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask = current->cpus_allowed;
-	cpumask_of_cpu_ptr(newmask, cpu);
 	unsigned int khz = 0;
 	unsigned int first;
 
@@ -1258,7 +1253,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
 	if (!data)
 		return -EINVAL;
 
-	set_cpus_allowed_ptr(current, newmask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (smp_processor_id() != cpu) {
 		printk(KERN_ERR PFX
 			"limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index ca2ac13b7af..15e13c01cc3 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
 	unsigned l, h;
 	unsigned clock_freq;
 	cpumask_t saved_mask;
-	cpumask_of_cpu_ptr(new_mask, cpu);
 
 	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, new_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (smp_processor_id() != cpu)
 		return 0;
 
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy,
 		 * Best effort undo..
 		 */
 
-		if (!cpus_empty(*covered_cpus)) {
-			cpumask_of_cpu_ptr_declare(new_mask);
-
+		if (!cpus_empty(*covered_cpus))
 			for_each_cpu_mask_nr(j, *covered_cpus) {
-				cpumask_of_cpu_ptr_next(new_mask, j);
-				set_cpus_allowed_ptr(current, new_mask);
+				set_cpus_allowed_ptr(current,
+						     &cpumask_of_cpu(j));
 				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
 			}
-		}
 
 		tmp = freqs.new;
 		freqs.new = freqs.old;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2f3728dc24f..191f7263c61 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
 
 static unsigned int speedstep_get(unsigned int cpu)
 {
-	cpumask_of_cpu_ptr(newmask, cpu);
-	return _speedstep_get(newmask);
+	return _speedstep_get(&cpumask_of_cpu(cpu));
 }
 
 /**
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 650d40f7912..6b0a10b002f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 	unsigned long		j;
 	int			retval;
 	cpumask_t		oldmask;
-	cpumask_of_cpu_ptr(newmask, cpu);
 
 	if (num_cache_leaves == 0)
 		return -ENOENT;
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 		return -ENOMEM;
 
 	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, newmask);
+	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	if (retval)
 		goto out;
 
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3..b68e21f06f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 
 	if (reload) {
 #ifdef CONFIG_SMP
-		cpumask_of_cpu_ptr_declare(mask);
-
 		preempt_disable();
 		load_LDT(pc);
-		cpumask_of_cpu_ptr_next(mask, smp_processor_id());
-		if (!cpus_equal(current->mm->cpu_vm_mask, *mask))
+		if (!cpus_equal(current->mm->cpu_vm_mask,
+				cpumask_of_cpu(smp_processor_id())))
 			smp_call_function(flush_ldt, current->mm, 1);
 		preempt_enable();
 #else
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590..652fa5c38eb 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,7 +388,6 @@ static int do_microcode_update (void)
 	void *new_mc = NULL;
 	int cpu;
 	cpumask_t old;
-	cpumask_of_cpu_ptr_declare(newmask);
 
 	old = current->cpus_allowed;
 
@@ -405,8 +404,7 @@ static int do_microcode_update (void)
 
 			if (!uci->valid)
 				continue;
-			cpumask_of_cpu_ptr_next(newmask, cpu);
-			set_cpus_allowed_ptr(current, newmask);
+			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 			error = get_maching_microcode(new_mc, cpu);
 			if (error < 0)
 				goto out;
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu)
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	cpumask_t old;
-	cpumask_of_cpu_ptr(newmask, cpu);
 	unsigned int val[2];
 	int err = 0;
 
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu)
 		return 0;
 
 	old = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, newmask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 
 	/* Check if the microcode we have in memory matches the CPU */
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu)
 static void microcode_init_cpu(int cpu, int resume)
 {
 	cpumask_t old;
-	cpumask_of_cpu_ptr(newmask, cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	old = current->cpus_allowed;
 
-	set_cpus_allowed_ptr(current, newmask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 	mutex_lock(&microcode_mutex);
 	collect_cpu_info(cpu);
 	if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev,
 	if (end == buf)
 		return -EINVAL;
 	if (val == 1) {
-		cpumask_t old;
-		cpumask_of_cpu_ptr(newmask, cpu);
-
-		old = current->cpus_allowed;
+		cpumask_t old = current->cpus_allowed;
 
 		get_online_cpus();
-		set_cpus_allowed_ptr(current, newmask);
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 
 		mutex_lock(&microcode_mutex);
 		if (uci->valid)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 06a9f643817..724adfc63cb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -414,25 +414,20 @@ void native_machine_shutdown(void)
 
 	/* The boot cpu is always logical cpu 0 */
 	int reboot_cpu_id = 0;
-	cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
 
 #ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
-		cpu_online(reboot_cpu)) {
+		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
-		cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
-	}
 #endif
 
 	/* Make certain the cpu I'm about to reboot on is online */
-	if (!cpu_online(reboot_cpu_id)) {
+	if (!cpu_online(reboot_cpu_id))
 		reboot_cpu_id = smp_processor_id();
-		cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
-	}
 
 	/* Make certain I only run on the appropriate processor */
-	set_cpus_allowed_ptr(current, newmask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
 
 	/* O.K Now that I'm on the appropriate processor,
 	 * stop all of the others.
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index a2c3f9cfa54..a56fc6c4394 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -827,7 +827,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
 static int acpi_processor_get_throttling(struct acpi_processor *pr)
 {
 	cpumask_t saved_mask;
-	cpumask_of_cpu_ptr_declare(new_mask);
 	int ret;
 
 	if (!pr)
@@ -839,8 +838,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
 	 * Migrate task to the cpu pointed by pr.
 	 */
 	saved_mask = current->cpus_allowed;
-	cpumask_of_cpu_ptr_next(new_mask, pr->id);
-	set_cpus_allowed_ptr(current, new_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
 	ret = pr->throttling.acpi_processor_get_throttling(pr);
 	/* restore the previous state */
 	set_cpus_allowed_ptr(current, &saved_mask);
@@ -989,7 +987,6 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
 int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 {
 	cpumask_t saved_mask;
-	cpumask_of_cpu_ptr_declare(new_mask);
 	int ret = 0;
 	unsigned int i;
 	struct acpi_processor *match_pr;
@@ -1028,8 +1025,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 	 * it can be called only for the cpu pointed by pr.
 	 */
 	if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
-		cpumask_of_cpu_ptr_next(new_mask, pr->id);
-		set_cpus_allowed_ptr(current, new_mask);
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
 		ret = p_throttling->acpi_processor_set_throttling(pr,
 						t_state.target_state);
 	} else {
@@ -1060,8 +1056,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 				continue;
 			}
 			t_state.cpu = i;
-			cpumask_of_cpu_ptr_next(new_mask, i);
-			set_cpus_allowed_ptr(current, new_mask);
+			set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
 			ret = match_pr->throttling.
 				acpi_processor_set_throttling(
 				match_pr, t_state.target_state);
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index c66817e7717..50a071f1c94 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -245,7 +245,6 @@ static ssize_t host_control_on_shutdown_store(struct device *dev,
 static int smi_request(struct smi_cmd *smi_cmd)
 {
 	cpumask_t old_mask;
-	cpumask_of_cpu_ptr(new_mask, 0);
 	int ret = 0;
 
 	if (smi_cmd->magic != SMI_CMD_MAGIC) {
@@ -256,7 +255,7 @@ static int smi_request(struct smi_cmd *smi_cmd)
 
 	/* SMI requires CPU 0 */
 	old_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, new_mask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
 	if (smp_processor_id() != 0) {
 		dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
 			__func__);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 579b01ff82d..c3b4227f48a 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -229,11 +229,10 @@ xpc_hb_checker(void *ignore)
 	int last_IRQ_count = 0;
 	int new_IRQ_count;
 	int force_IRQ = 0;
-	cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU);
 
 	/* this thread was marked active by xpc_hb_init() */
 
-	set_cpus_allowed_ptr(current, cpumask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
 
 	/* set our heartbeating to other partitions into motion */
 	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 738b411ff2d..ba9b2054ecb 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,9 +33,8 @@ static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
-	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
 
-	set_cpus_allowed_ptr(current, cpumask);
+	set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
 
 	/* Ack: we are alive */
 	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index bf43284d685..80c4336f418 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -196,12 +196,10 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 	struct tick_device *td;
 	int cpu, ret = NOTIFY_OK;
 	unsigned long flags;
-	cpumask_of_cpu_ptr_declare(cpumask);
 
 	spin_lock_irqsave(&tick_device_lock, flags);
 
 	cpu = smp_processor_id();
-	cpumask_of_cpu_ptr_next(cpumask, cpu);
 	if (!cpu_isset(cpu, newdev->cpumask))
 		goto out_bc;
 
@@ -209,7 +207,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 	curdev = td->evtdev;
 
 	/* cpu local device ? */
-	if (!cpus_equal(newdev->cpumask, *cpumask)) {
+	if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
 
 		/*
 		 * If the cpu affinity of the device interrupt can not
@@ -222,7 +220,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		 * If we have a cpu local device already, do not replace it
 		 * by a non cpu local device
 		 */
-		if (curdev && cpus_equal(curdev->cpumask, *cpumask))
+		if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
 			goto out_bc;
 	}
 
@@ -254,7 +252,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		curdev = NULL;
 	}
 	clockevents_exchange_device(curdev, newdev);
-	tick_setup_device(td, newdev, cpu, cpumask);
+	tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
 	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 		tick_oneshot_notify();
 
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index ce2d723c10e..bb948e52ce2 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,9 +213,7 @@ static void start_stack_timers(void)
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		cpumask_of_cpu_ptr(new_mask, cpu);
-
-		set_cpus_allowed_ptr(current, new_mask);
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 		start_stack_timer(cpu);
 	}
 	set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index c4381d9516f..0f8fc22ed10 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -11,7 +11,6 @@ notrace unsigned int debug_smp_processor_id(void)
 {
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
-	cpumask_of_cpu_ptr_declare(this_mask);
 
 	if (likely(preempt_count))
 		goto out;
@@ -23,9 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	cpumask_of_cpu_ptr_next(this_mask, this_cpu);
-
-	if (cpus_equal(current->cpus_allowed, *this_mask))
+	if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu)))
 		goto out;
 
 	/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 835d2741308..5a32cb7c4bb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -310,8 +310,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
 	switch (m->mode) {
 	case SVC_POOL_PERCPU:
 	{
-		cpumask_of_cpu_ptr(cpumask, node);
-		set_cpus_allowed_ptr(task, cpumask);
+		set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
 		break;
 	}
 	case SVC_POOL_PERNODE:
-- 
cgit v1.2.3-70-g09d2


From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 19:44:49 -0700
Subject: dma-mapping: add the device argument to dma_mapping_error()

Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER
architecture does:

This enables us to cleanly fix the Calgary IOMMU issue that some devices
are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423).

I think that per-device dma_mapping_ops support would be also helpful for
KVM people to support PCI passthrough but Andi thinks that this makes it
difficult to support the PCI passthrough (see the above thread).  So I
CC'ed this to KVM camp.  Comments are appreciated.

A pointer to dma_mapping_ops to struct dev_archdata is added.  If the
pointer is non NULL, DMA operations in asm/dma-mapping.h use it.  If it's
NULL, the system-wide dma_ops pointer is used as before.

If it's useful for KVM people, I plan to implement a mechanism to register
a hook called when a new pci (or dma capable) device is created (it works
with hot plugging).  It enables IOMMUs to set up an appropriate
dma_mapping_ops per device.

The major obstacle is that dma_mapping_error doesn't take a pointer to the
device unlike other DMA operations.  So x86 can't have dma_mapping_ops per
device.  Note all the POWER IOMMUs use the same dma_mapping_error function
so this is not a problem for POWER but x86 IOMMUs use different
dma_mapping_error functions.

The first patch adds the device argument to dma_mapping_error.  The patch
is trivial but large since it touches lots of drivers and dma-mapping.h in
all the architecture.

This patch:

dma_mapping_error() doesn't take a pointer to the device unlike other DMA
operations.  So we can't have dma_mapping_ops per device.

Note that POWER already has dma_mapping_ops per device but all the POWER
IOMMUs use the same dma_mapping_error function.  x86 IOMMUs use device
argument.

[akpm@linux-foundation.org: fix sge]
[akpm@linux-foundation.org: fix svc_rdma]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix bnx2x]
[akpm@linux-foundation.org: fix s2io]
[akpm@linux-foundation.org: fix pasemi_mac]
[akpm@linux-foundation.org: fix sdhci]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix sparc]
[akpm@linux-foundation.org: fix ibmvscsi]
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt                      |  4 +-
 arch/arm/common/dmabounce.c                    |  2 +-
 arch/ia64/hp/common/hwsw_iommu.c               |  5 +-
 arch/ia64/hp/common/sba_iommu.c                |  2 +-
 arch/ia64/sn/pci/pci_dma.c                     |  2 +-
 arch/mips/mm/dma-default.c                     |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_pciex.c |  2 +-
 arch/powerpc/platforms/cell/spider-pci.c       |  2 +-
 arch/powerpc/platforms/iseries/mf.c            |  2 +-
 arch/x86/kernel/pci-calgary_64.c               |  2 +-
 arch/x86/kernel/pci-dma.c                      | 27 ++++---
 arch/x86/kernel/pci-gart_64.c                  |  3 +-
 arch/x86/kernel/pci-nommu.c                    | 14 +---
 arch/x86/kernel/pci-swiotlb_64.c               |  2 +-
 drivers/firewire/fw-iso.c                      |  2 +-
 drivers/firewire/fw-ohci.c                     |  2 +-
 drivers/firewire/fw-sbp2.c                     |  8 +--
 drivers/infiniband/hw/ipath/ipath_sdma.c       |  2 +-
 drivers/infiniband/hw/ipath/ipath_user_sdma.c  |  6 +-
 drivers/infiniband/hw/mthca/mthca_eq.c         |  2 +-
 drivers/media/dvb/pluto2/pluto2.c              |  2 +-
 drivers/mmc/host/sdhci.c                       |  4 +-
 drivers/net/arm/ep93xx_eth.c                   |  4 +-
 drivers/net/bnx2x_main.c                       |  4 +-
 drivers/net/cxgb3/sge.c                        |  2 +-
 drivers/net/e100.c                             |  2 +-
 drivers/net/e1000e/ethtool.c                   |  4 +-
 drivers/net/e1000e/netdev.c                    | 11 +--
 drivers/net/ibmveth.c                          | 38 +++++-----
 drivers/net/iseries_veth.c                     |  4 +-
 drivers/net/mlx4/eq.c                          |  2 +-
 drivers/net/pasemi_mac.c                       |  6 +-
 drivers/net/qla3xxx.c                          | 12 ++--
 drivers/net/s2io.c                             | 48 +++++++------
 drivers/net/sfc/rx.c                           |  4 +-
 drivers/net/sfc/tx.c                           |  7 +-
 drivers/net/spider_net.c                       |  4 +-
 drivers/net/tc35815.c                          |  4 +-
 drivers/net/wireless/ath5k/base.c              |  4 +-
 drivers/scsi/ibmvscsi/ibmvfc.c                 |  4 +-
 drivers/scsi/ibmvscsi/ibmvscsi.c               |  4 +-
 drivers/scsi/ibmvscsi/ibmvstgt.c               |  2 +-
 drivers/scsi/ibmvscsi/rpa_vscsi.c              |  2 +-
 drivers/spi/atmel_spi.c                        |  4 +-
 drivers/spi/au1550_spi.c                       |  6 +-
 drivers/spi/omap2_mcspi.c                      |  4 +-
 drivers/spi/pxa2xx_spi.c                       |  4 +-
 drivers/spi/spi_imx.c                          |  6 +-
 include/asm-alpha/dma-mapping.h                |  6 +-
 include/asm-alpha/pci.h                        |  2 +-
 include/asm-arm/dma-mapping.h                  |  2 +-
 include/asm-avr32/dma-mapping.h                |  2 +-
 include/asm-cris/dma-mapping.h                 |  2 +-
 include/asm-frv/dma-mapping.h                  |  2 +-
 include/asm-generic/dma-mapping-broken.h       |  2 +-
 include/asm-generic/dma-mapping.h              |  4 +-
 include/asm-generic/pci-dma-compat.h           |  4 +-
 include/asm-ia64/machvec.h                     |  2 +-
 include/asm-m68k/dma-mapping.h                 |  2 +-
 include/asm-mips/dma-mapping.h                 |  2 +-
 include/asm-mn10300/dma-mapping.h              |  2 +-
 include/asm-parisc/dma-mapping.h               |  2 +-
 include/asm-powerpc/dma-mapping.h              |  2 +-
 include/asm-sh/dma-mapping.h                   |  2 +-
 include/asm-sparc/dma-mapping_64.h             |  2 +-
 include/asm-sparc/pci_32.h                     |  3 +-
 include/asm-sparc/pci_64.h                     |  5 +-
 include/asm-x86/device.h                       |  3 +
 include/asm-x86/dma-mapping.h                  | 99 ++++++++++++++++++--------
 include/asm-x86/swiotlb.h                      |  2 +-
 include/asm-xtensa/dma-mapping.h               |  2 +-
 include/linux/i2o.h                            |  2 +-
 include/linux/ssb/ssb.h                        |  4 +-
 include/rdma/ib_verbs.h                        |  2 +-
 lib/swiotlb.c                                  |  4 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c          |  3 +-
 76 files changed, 256 insertions(+), 210 deletions(-)

(limited to 'net')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 80d150458c8..d8b63d164e4 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -298,10 +298,10 @@ recommended that you never use these unless you really know what the
 cache width is.
 
 int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr)
 
 In some circumstances dma_map_single and dma_map_page will fail to create
 a mapping. A driver can check for these errors by testing the returned
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index dd294734260..69130f36590 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -280,7 +280,7 @@ unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 	/*
 	 * Trying to unmap an invalid mapping
 	 */
-	if (dma_mapping_error(dma_addr)) {
+	if (dma_mapping_error(dev, dma_addr)) {
 		dev_err(dev, "Trying to unmap invalid mapping\n");
 		return;
 	}
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 1c44ec2a1d5..88b6e6f3fd8 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -186,9 +186,10 @@ hwsw_dma_supported (struct device *dev, u64 mask)
 }
 
 int
-hwsw_dma_mapping_error (dma_addr_t dma_addr)
+hwsw_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
+	return hwiommu_dma_mapping_error(dev, dma_addr) ||
+		swiotlb_dma_mapping_error(dev, dma_addr);
 }
 
 EXPORT_SYMBOL(hwsw_dma_mapping_error);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 34421aed1e2..4956be40d7b 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2147,7 +2147,7 @@ sba_dma_supported (struct device *dev, u64 mask)
 }
 
 int
-sba_dma_mapping_error (dma_addr_t dma_addr)
+sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 52175af299a..53ebb648449 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -350,7 +350,7 @@ void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
 
-int sn_dma_mapping_error(dma_addr_t dma_addr)
+int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index ae39dd88b9a..891312f8e5a 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -348,7 +348,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele
 
 EXPORT_SYMBOL(dma_sync_sg_for_device);
 
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index 0e04f8fb152..3e7e0f1568e 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -281,7 +281,7 @@ static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
 
 	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
 				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dummy_page_da)) {
+	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
 		pr_err("PCIEX:Map dummy page failed.\n");
 		kfree(dummy_page_va);
 		return -1;
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index 418b605ac35..5122ec14527 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -111,7 +111,7 @@ static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
 
 	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
 				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dummy_page_da)) {
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
 		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
 		kfree(dummy_page_va);
 		return -1;
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 1dc7295746d..731d7b15774 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -871,7 +871,7 @@ static int proc_mf_dump_cmdline(char *page, char **start, off_t off,
 		count = 256 - off;
 
 	dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dma_addr))
+	if (dma_mapping_error(NULL, dma_addr))
 		return -ENOMEM;
 	memset(page, 0, off + count);
 	memset(&vsp_cmd, 0, sizeof(vsp_cmd));
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 19e7fc7c2c4..1eb86be93d7 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -544,7 +544,7 @@ error:
 	return ret;
 }
 
-static const struct dma_mapping_ops calgary_dma_ops = {
+static struct dma_mapping_ops calgary_dma_ops = {
 	.alloc_coherent = calgary_alloc_coherent,
 	.map_single = calgary_map_single,
 	.unmap_single = calgary_unmap_single,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cbecb05551b..37544123896 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
 
 static int forbid_dac __read_mostly;
 
-const struct dma_mapping_ops *dma_ops;
+struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -312,6 +312,8 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr)
 
 int dma_supported(struct device *dev, u64 mask)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 #ifdef CONFIG_PCI
 	if (mask > 0xffffffff && forbid_dac > 0) {
 		dev_info(dev, "PCI: Disallowing DAC for device\n");
@@ -319,8 +321,8 @@ int dma_supported(struct device *dev, u64 mask)
 	}
 #endif
 
-	if (dma_ops->dma_supported)
-		return dma_ops->dma_supported(dev, mask);
+	if (ops->dma_supported)
+		return ops->dma_supported(dev, mask);
 
 	/* Copied from i386. Doesn't make much sense, because it will
 	   only work for pci_alloc_coherent.
@@ -367,6 +369,7 @@ void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		   gfp_t gfp)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
 	void *memory = NULL;
 	struct page *page;
 	unsigned long dma_mask = 0;
@@ -435,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 			/* Let low level make its own zone decisions */
 			gfp &= ~(GFP_DMA32|GFP_DMA);
 
-			if (dma_ops->alloc_coherent)
-				return dma_ops->alloc_coherent(dev, size,
+			if (ops->alloc_coherent)
+				return ops->alloc_coherent(dev, size,
 							   dma_handle, gfp);
 			return NULL;
 		}
@@ -448,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		}
 	}
 
-	if (dma_ops->alloc_coherent) {
+	if (ops->alloc_coherent) {
 		free_pages((unsigned long)memory, get_order(size));
 		gfp &= ~(GFP_DMA|GFP_DMA32);
-		return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
+		return ops->alloc_coherent(dev, size, dma_handle, gfp);
 	}
 
-	if (dma_ops->map_simple) {
-		*dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
+	if (ops->map_simple) {
+		*dma_handle = ops->map_simple(dev, virt_to_phys(memory),
 					      size,
 					      PCI_DMA_BIDIRECTIONAL);
 		if (*dma_handle != bad_dma_address)
@@ -477,12 +480,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
 void dma_free_coherent(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t bus)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	int order = get_order(size);
 	WARN_ON(irqs_disabled());	/* for portability */
 	if (dma_release_coherent(dev, order, vaddr))
 		return;
-	if (dma_ops->unmap_single)
-		dma_ops->unmap_single(dev, bus, size, 0);
+	if (ops->unmap_single)
+		ops->unmap_single(dev, bus, size, 0);
 	free_pages((unsigned long)vaddr, order);
 }
 EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index df5f142657d..744126e6495 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -692,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 extern int agp_amd64_init(void);
 
-static const struct dma_mapping_ops gart_dma_ops = {
-	.mapping_error			= NULL,
+static struct dma_mapping_ops gart_dma_ops = {
 	.map_single			= gart_map_single,
 	.map_simple			= gart_map_simple,
 	.unmap_single			= gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff..3f91f71cdc3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 	return nents;
 }
 
-/* Make sure we keep the same behaviour */
-static int nommu_mapping_error(dma_addr_t dma_addr)
-{
-#ifdef CONFIG_X86_32
-	return 0;
-#else
-	return (dma_addr == bad_dma_address);
-#endif
-}
-
-
-const struct dma_mapping_ops nommu_dma_ops = {
+struct dma_mapping_ops nommu_dma_ops = {
 	.map_single = nommu_map_single,
 	.map_sg = nommu_map_sg,
-	.mapping_error = nommu_mapping_error,
 	.is_phys = 1,
 };
 
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c2..c4ce0332759 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 	return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
 }
 
-const struct dma_mapping_ops swiotlb_dma_ops = {
+struct dma_mapping_ops swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
 	.alloc_coherent = swiotlb_alloc_coherent,
 	.free_coherent = swiotlb_free_coherent,
diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c
index bcbe794a3ea..e14c03dc006 100644
--- a/drivers/firewire/fw-iso.c
+++ b/drivers/firewire/fw-iso.c
@@ -50,7 +50,7 @@ fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 
 		address = dma_map_page(card->device, buffer->pages[i],
 				       0, PAGE_SIZE, direction);
-		if (dma_mapping_error(address)) {
+		if (dma_mapping_error(card->device, address)) {
 			__free_page(buffer->pages[i]);
 			goto out_pages;
 		}
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 333b12544dd..566672e0bcf 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -953,7 +953,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
 		payload_bus =
 			dma_map_single(ohci->card.device, packet->payload,
 				       packet->payload_length, DMA_TO_DEVICE);
-		if (dma_mapping_error(payload_bus)) {
+		if (dma_mapping_error(ohci->card.device, payload_bus)) {
 			packet->ack = RCODE_SEND_ERROR;
 			return -1;
 		}
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 53fc5a641e6..aaff50ebba1 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -543,7 +543,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	orb->response_bus =
 		dma_map_single(device->card->device, &orb->response,
 			       sizeof(orb->response), DMA_FROM_DEVICE);
-	if (dma_mapping_error(orb->response_bus))
+	if (dma_mapping_error(device->card->device, orb->response_bus))
 		goto fail_mapping_response;
 
 	orb->request.response.high = 0;
@@ -577,7 +577,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	orb->base.request_bus =
 		dma_map_single(device->card->device, &orb->request,
 			       sizeof(orb->request), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->base.request_bus))
+	if (dma_mapping_error(device->card->device, orb->base.request_bus))
 		goto fail_mapping_request;
 
 	sbp2_send_orb(&orb->base, lu, node_id, generation,
@@ -1424,7 +1424,7 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 	orb->page_table_bus =
 		dma_map_single(device->card->device, orb->page_table,
 			       sizeof(orb->page_table), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->page_table_bus))
+	if (dma_mapping_error(device->card->device, orb->page_table_bus))
 		goto fail_page_table;
 
 	/*
@@ -1509,7 +1509,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 	orb->base.request_bus =
 		dma_map_single(device->card->device, &orb->request,
 			       sizeof(orb->request), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->base.request_bus))
+	if (dma_mapping_error(device->card->device, orb->base.request_bus))
 		goto out;
 
 	sbp2_send_orb(&orb->base, lu, lu->tgt->node_id, lu->generation,
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index eaba03273e4..284c9bca517 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -698,7 +698,7 @@ retry:
 
 	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
 			      tx->map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(addr)) {
+	if (dma_mapping_error(&dd->pcidev->dev, addr)) {
 		ret = -EIO;
 		goto unlock;
 	}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index 86e016916cd..82d9a0b5ca2 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
 
 	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
 				DMA_TO_DEVICE);
-	if (dma_mapping_error(dma_addr)) {
+	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 		ret = -ENOMEM;
 		goto free_unmap;
 	}
@@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
 				     pages[j], 0, flen, DMA_TO_DEVICE);
 		unsigned long fofs = addr & ~PAGE_MASK;
 
-		if (dma_mapping_error(dma_addr)) {
+		if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 			ret = -ENOMEM;
 			goto done;
 		}
@@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
 		if (page) {
 			dma_addr = dma_map_page(&dd->pcidev->dev,
 						page, 0, len, DMA_TO_DEVICE);
-			if (dma_mapping_error(dma_addr)) {
+			if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 				ret = -ENOMEM;
 				goto free_pbc;
 			}
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 4e36aa7cb3d..cc6858f0b65 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -780,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 		return -ENOMEM;
 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+	if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
 		__free_page(dev->eq_table.icm_page);
 		return -ENOMEM;
 	}
diff --git a/drivers/media/dvb/pluto2/pluto2.c b/drivers/media/dvb/pluto2/pluto2.c
index 1360403b88b..a9653c63f4d 100644
--- a/drivers/media/dvb/pluto2/pluto2.c
+++ b/drivers/media/dvb/pluto2/pluto2.c
@@ -242,7 +242,7 @@ static int __devinit pluto_dma_map(struct pluto *pluto)
 	pluto->dma_addr = pci_map_single(pluto->pdev, pluto->dma_buf,
 			TS_DMA_BYTES, PCI_DMA_FROMDEVICE);
 
-	return pci_dma_mapping_error(pluto->dma_addr);
+	return pci_dma_mapping_error(pluto->pdev, pluto->dma_addr);
 }
 
 static void pluto_dma_unmap(struct pluto *pluto)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c3a5db72ddd..5f95e10229b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -337,7 +337,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 	host->align_addr = dma_map_single(mmc_dev(host->mmc),
 		host->align_buffer, 128 * 4, direction);
-	if (dma_mapping_error(host->align_addr))
+	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
 		goto fail;
 	BUG_ON(host->align_addr & 0x3);
 
@@ -439,7 +439,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 	host->adma_addr = dma_map_single(mmc_dev(host->mmc),
 		host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
-	if (dma_mapping_error(host->align_addr))
+	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
 		goto unmap_entries;
 	BUG_ON(host->adma_addr & 0x3);
 
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 7a14980f347..18d3eeb7eab 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -482,7 +482,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep)
 			goto err;
 
 		d = dma_map_single(NULL, page, PAGE_SIZE, DMA_FROM_DEVICE);
-		if (dma_mapping_error(d)) {
+		if (dma_mapping_error(NULL, d)) {
 			free_page((unsigned long)page);
 			goto err;
 		}
@@ -505,7 +505,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep)
 			goto err;
 
 		d = dma_map_single(NULL, page, PAGE_SIZE, DMA_TO_DEVICE);
-		if (dma_mapping_error(d)) {
+		if (dma_mapping_error(NULL, d)) {
 			free_page((unsigned long)page);
 			goto err;
 		}
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 0263bef9cc6..c7cc760a177 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1020,7 +1020,7 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
 
 	mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE,
 			       PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(mapping))) {
+	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
 		__free_pages(page, PAGES_PER_SGE_SHIFT);
 		return -ENOMEM;
 	}
@@ -1048,7 +1048,7 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
 
 	mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size,
 				 PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(mapping))) {
+	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
 		dev_kfree_skb(skb);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a96331c875e..1b0861d73ab 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -386,7 +386,7 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
 	dma_addr_t mapping;
 
 	mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
-	if (unlikely(pci_dma_mapping_error(mapping)))
+	if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 		return -ENOMEM;
 
 	pci_unmap_addr_set(sd, dma_addr, mapping);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 1037b133231..19d32a227be 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1790,7 +1790,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 	rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
 		RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
-	if (pci_dma_mapping_error(rx->dma_addr)) {
+	if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) {
 		dev_kfree_skb_any(rx->skb);
 		rx->skb = NULL;
 		rx->dma_addr = 0;
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index a14561f40db..9350564065e 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1090,7 +1090,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
 		tx_ring->buffer_info[i].dma =
 			pci_map_single(pdev, skb->data, skb->len,
 				       PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(tx_ring->buffer_info[i].dma)) {
+		if (pci_dma_mapping_error(pdev, tx_ring->buffer_info[i].dma)) {
 			ret_val = 4;
 			goto err_nomem;
 		}
@@ -1153,7 +1153,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
 		rx_ring->buffer_info[i].dma =
 			pci_map_single(pdev, skb->data, 2048,
 				       PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(rx_ring->buffer_info[i].dma)) {
+		if (pci_dma_mapping_error(pdev, rx_ring->buffer_info[i].dma)) {
 			ret_val = 8;
 			goto err_nomem;
 		}
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 9c0f56b3c51..d1367789976 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -195,7 +195,7 @@ map_skb:
 		buffer_info->dma = pci_map_single(pdev, skb->data,
 						  adapter->rx_buffer_len,
 						  PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
 			dev_err(&pdev->dev, "RX DMA map failed\n");
 			adapter->rx_dma_failed++;
 			break;
@@ -265,7 +265,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 						   ps_page->page,
 						   0, PAGE_SIZE,
 						   PCI_DMA_FROMDEVICE);
-				if (pci_dma_mapping_error(ps_page->dma)) {
+				if (pci_dma_mapping_error(pdev, ps_page->dma)) {
 					dev_err(&adapter->pdev->dev,
 					  "RX DMA page map failed\n");
 					adapter->rx_dma_failed++;
@@ -300,7 +300,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 		buffer_info->dma = pci_map_single(pdev, skb->data,
 						  adapter->rx_ps_bsize0,
 						  PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
 			dev_err(&pdev->dev, "RX DMA map failed\n");
 			adapter->rx_dma_failed++;
 			/* cleanup skb */
@@ -3344,7 +3344,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 				skb->data + offset,
 				size,
 				PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) {
 			dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
 			adapter->tx_dma_failed++;
 			return -1;
@@ -3382,7 +3382,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 					offset,
 					size,
 					PCI_DMA_TODEVICE);
-			if (pci_dma_mapping_error(buffer_info->dma)) {
+			if (pci_dma_mapping_error(adapter->pdev,
+						  buffer_info->dma)) {
 				dev_err(&adapter->pdev->dev,
 					"TX DMA page map failed\n");
 				adapter->tx_dma_failed++;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index e5a6e2e8454..91ec9fdc718 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -260,7 +260,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
 		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
 				pool->buff_size, DMA_FROM_DEVICE);
 
-		if (dma_mapping_error(dma_addr))
+		if (dma_mapping_error((&adapter->vdev->dev, dma_addr))
 			goto failure;
 
 		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
@@ -294,7 +294,7 @@ failure:
 		pool->consumer_index = pool->size - 1;
 	else
 		pool->consumer_index--;
-	if (!dma_mapping_error(dma_addr))
+	if (!dma_mapping_error((&adapter->vdev->dev, dma_addr))
 		dma_unmap_single(&adapter->vdev->dev,
 		                 pool->dma_addr[index], pool->buff_size,
 		                 DMA_FROM_DEVICE);
@@ -448,11 +448,11 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
 static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 {
 	int i;
+	struct device *dev = &adapter->vdev->dev;
 
 	if(adapter->buffer_list_addr != NULL) {
-		if(!dma_mapping_error(adapter->buffer_list_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
-					adapter->buffer_list_dma, 4096,
+		if (!dma_mapping_error(dev, adapter->buffer_list_dma)) {
+			dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
 					DMA_BIDIRECTIONAL);
 			adapter->buffer_list_dma = DMA_ERROR_CODE;
 		}
@@ -461,9 +461,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 	}
 
 	if(adapter->filter_list_addr != NULL) {
-		if(!dma_mapping_error(adapter->filter_list_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
-					adapter->filter_list_dma, 4096,
+		if (!dma_mapping_error(dev, adapter->filter_list_dma)) {
+			dma_unmap_single(dev, adapter->filter_list_dma, 4096,
 					DMA_BIDIRECTIONAL);
 			adapter->filter_list_dma = DMA_ERROR_CODE;
 		}
@@ -472,8 +471,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 	}
 
 	if(adapter->rx_queue.queue_addr != NULL) {
-		if(!dma_mapping_error(adapter->rx_queue.queue_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
+		if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) {
+			dma_unmap_single(dev,
 					adapter->rx_queue.queue_dma,
 					adapter->rx_queue.queue_len,
 					DMA_BIDIRECTIONAL);
@@ -535,6 +534,7 @@ static int ibmveth_open(struct net_device *netdev)
 	int rc;
 	union ibmveth_buf_desc rxq_desc;
 	int i;
+	struct device *dev;
 
 	ibmveth_debug_printk("open starting\n");
 
@@ -563,17 +563,19 @@ static int ibmveth_open(struct net_device *netdev)
 		return -ENOMEM;
 	}
 
-	adapter->buffer_list_dma = dma_map_single(&adapter->vdev->dev,
+	dev = &adapter->vdev->dev;
+
+	adapter->buffer_list_dma = dma_map_single(dev,
 			adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
-	adapter->filter_list_dma = dma_map_single(&adapter->vdev->dev,
+	adapter->filter_list_dma = dma_map_single(dev,
 			adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
-	adapter->rx_queue.queue_dma = dma_map_single(&adapter->vdev->dev,
+	adapter->rx_queue.queue_dma = dma_map_single(dev,
 			adapter->rx_queue.queue_addr,
 			adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL);
 
-	if((dma_mapping_error(adapter->buffer_list_dma) ) ||
-	   (dma_mapping_error(adapter->filter_list_dma)) ||
-	   (dma_mapping_error(adapter->rx_queue.queue_dma))) {
+	if ((dma_mapping_error(dev, adapter->buffer_list_dma)) ||
+	    (dma_mapping_error(dev, adapter->filter_list_dma)) ||
+	    (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) {
 		ibmveth_error_printk("unable to map filter or buffer list pages\n");
 		ibmveth_cleanup(adapter);
 		napi_disable(&adapter->napi);
@@ -645,7 +647,7 @@ static int ibmveth_open(struct net_device *netdev)
 	adapter->bounce_buffer_dma =
 	    dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
 			   netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(adapter->bounce_buffer_dma)) {
+	if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
 		ibmveth_error_printk("unable to map bounce buffer\n");
 		ibmveth_cleanup(adapter);
 		napi_disable(&adapter->napi);
@@ -922,7 +924,7 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		buf[1] = 0;
 	}
 
-	if (dma_mapping_error(data_dma_addr)) {
+	if (dma_mapping_error((&adapter->vdev->dev, data_dma_addr)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			ibmveth_error_printk("tx: unable to map xmit buffer\n");
 		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index b8d0639c1cd..c46864d626b 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1128,7 +1128,7 @@ static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
 	msg->data.addr[0] = dma_map_single(port->dev, skb->data,
 				skb->len, DMA_TO_DEVICE);
 
-	if (dma_mapping_error(msg->data.addr[0]))
+	if (dma_mapping_error(port->dev, msg->data.addr[0]))
 		goto recycle_and_drop;
 
 	msg->dev = port->dev;
@@ -1226,7 +1226,7 @@ static void veth_recycle_msg(struct veth_lpar_connection *cnx,
 		dma_address = msg->data.addr[0];
 		dma_length = msg->data.len[0];
 
-		if (!dma_mapping_error(dma_address))
+		if (!dma_mapping_error(msg->dev, dma_address))
 			dma_unmap_single(msg->dev, dma_address, dma_length,
 					DMA_TO_DEVICE);
 
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index ea3a09aaa84..7df928d3a3d 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -526,7 +526,7 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 		return -ENOMEM;
 	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
 					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
+	if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
 		__free_page(priv->eq_table.icm_page);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 993d87c9296..edc0fd58898 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -650,7 +650,7 @@ static void pasemi_mac_replenish_rx_ring(const struct net_device *dev,
 				     mac->bufsz - LOCAL_SKB_ALIGN,
 				     PCI_DMA_FROMDEVICE);
 
-		if (unlikely(dma_mapping_error(dma))) {
+		if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) {
 			dev_kfree_skb_irq(info->skb);
 			break;
 		}
@@ -1519,7 +1519,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
 				PCI_DMA_TODEVICE);
 	map_size[0] = skb_headlen(skb);
-	if (dma_mapping_error(map[0]))
+	if (pci_dma_mapping_error(mac->dma_pdev, map[0]))
 		goto out_err_nolock;
 
 	for (i = 0; i < nfrags; i++) {
@@ -1529,7 +1529,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 					frag->page_offset, frag->size,
 					PCI_DMA_TODEVICE);
 		map_size[i+1] = frag->size;
-		if (dma_mapping_error(map[i+1])) {
+		if (pci_dma_mapping_error(mac->dma_pdev, map[i+1])) {
 			nfrags = i;
 			goto out_err_nolock;
 		}
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index e7d48a352be..e82b37bbd6c 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -328,7 +328,7 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev,
 					     qdev->lrg_buffer_len -
 					     QL_HEADER_SPACE,
 					     PCI_DMA_FROMDEVICE);
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 				       qdev->ndev->name, err);
@@ -1919,7 +1919,7 @@ static int ql_populate_free_queue(struct ql3_adapter *qdev)
 						     QL_HEADER_SPACE,
 						     PCI_DMA_FROMDEVICE);
 
-				err = pci_dma_mapping_error(map);
+				err = pci_dma_mapping_error(qdev->pdev, map);
 				if(err) {
 					printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 					       qdev->ndev->name, err);
@@ -2454,7 +2454,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 	 */
 	map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE);
 
-	err = pci_dma_mapping_error(map);
+	err = pci_dma_mapping_error(qdev->pdev, map);
 	if(err) {
 		printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 		       qdev->ndev->name, err);
@@ -2487,7 +2487,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 						     sizeof(struct oal),
 						     PCI_DMA_TODEVICE);
 
-				err = pci_dma_mapping_error(map);
+				err = pci_dma_mapping_error(qdev->pdev, map);
 				if(err) {
 
 					printk(KERN_ERR "%s: PCI mapping outbound address list with error: %d\n",
@@ -2514,7 +2514,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 					 frag->page_offset, frag->size,
 					 PCI_DMA_TODEVICE);
 
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping frags failed with error: %d\n",
 				       qdev->ndev->name, err);
@@ -2916,7 +2916,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
 					     QL_HEADER_SPACE,
 					     PCI_DMA_FROMDEVICE);
 
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 				       qdev->ndev->name, err);
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 9dae40ccf04..86d77d05190 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2512,8 +2512,8 @@ static void stop_nic(struct s2io_nic *nic)
  *   Return Value:
  *  SUCCESS on success or an appropriate -ve value on failure.
  */
-
-static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
+static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
+				int from_card_up)
 {
 	struct sk_buff *skb;
 	struct RxD_t *rxdp;
@@ -2602,7 +2602,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 			rxdp1->Buffer0_ptr = pci_map_single
 			    (ring->pdev, skb->data, size - NET_IP_ALIGN,
 				PCI_DMA_FROMDEVICE);
-			if(pci_dma_mapping_error(rxdp1->Buffer0_ptr))
+			if (pci_dma_mapping_error(nic->pdev,
+						rxdp1->Buffer0_ptr))
 				goto pci_map_failed;
 
 			rxdp->Control_2 =
@@ -2636,7 +2637,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 				rxdp3->Buffer0_ptr =
 				   pci_map_single(ring->pdev, ba->ba_0,
 					BUF0_LEN, PCI_DMA_FROMDEVICE);
-				if (pci_dma_mapping_error(rxdp3->Buffer0_ptr))
+			if (pci_dma_mapping_error(nic->pdev,
+						rxdp3->Buffer0_ptr))
 					goto pci_map_failed;
 			} else
 				pci_dma_sync_single_for_device(ring->pdev,
@@ -2655,7 +2657,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 				(ring->pdev, skb->data, ring->mtu + 4,
 						PCI_DMA_FROMDEVICE);
 
-				if (pci_dma_mapping_error(rxdp3->Buffer2_ptr))
+				if (pci_dma_mapping_error(nic->pdev,
+							rxdp3->Buffer2_ptr))
 					goto pci_map_failed;
 
 				if (from_card_up) {
@@ -2664,8 +2667,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 						ba->ba_1, BUF1_LEN,
 						PCI_DMA_FROMDEVICE);
 
-					if (pci_dma_mapping_error
-						(rxdp3->Buffer1_ptr)) {
+					if (pci_dma_mapping_error(nic->pdev,
+						rxdp3->Buffer1_ptr)) {
 						pci_unmap_single
 							(ring->pdev,
 						    (dma_addr_t)(unsigned long)
@@ -2806,9 +2809,9 @@ static void free_rx_buffers(struct s2io_nic *sp)
 	}
 }
 
-static int s2io_chk_rx_buffers(struct ring_info *ring)
+static int s2io_chk_rx_buffers(struct s2io_nic *nic, struct ring_info *ring)
 {
-	if (fill_rx_buffers(ring, 0) == -ENOMEM) {
+	if (fill_rx_buffers(nic, ring, 0) == -ENOMEM) {
 		DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name);
 		DBG_PRINT(INFO_DBG, " in Rx Intr!!\n");
 	}
@@ -2848,7 +2851,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget)
 		return 0;
 
 	pkts_processed = rx_intr_handler(ring, budget);
-	s2io_chk_rx_buffers(ring);
+	s2io_chk_rx_buffers(nic, ring);
 
 	if (pkts_processed < budget_org) {
 		netif_rx_complete(dev, napi);
@@ -2882,7 +2885,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget)
 	for (i = 0; i < config->rx_ring_num; i++) {
 		ring = &mac_control->rings[i];
 		ring_pkts_processed = rx_intr_handler(ring, budget);
-		s2io_chk_rx_buffers(ring);
+		s2io_chk_rx_buffers(nic, ring);
 		pkts_processed += ring_pkts_processed;
 		budget -= ring_pkts_processed;
 		if (budget <= 0)
@@ -2939,7 +2942,8 @@ static void s2io_netpoll(struct net_device *dev)
 		rx_intr_handler(&mac_control->rings[i], 0);
 
 	for (i = 0; i < config->rx_ring_num; i++) {
-		if (fill_rx_buffers(&mac_control->rings[i], 0) == -ENOMEM) {
+		if (fill_rx_buffers(nic, &mac_control->rings[i], 0) ==
+				-ENOMEM) {
 			DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
 			DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n");
 			break;
@@ -4235,14 +4239,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		txdp->Buffer_Pointer = pci_map_single(sp->pdev,
 					fifo->ufo_in_band_v,
 					sizeof(u64), PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(txdp->Buffer_Pointer))
+		if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
 			goto pci_map_failed;
 		txdp++;
 	}
 
 	txdp->Buffer_Pointer = pci_map_single
 	    (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(txdp->Buffer_Pointer))
+	if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
 		goto pci_map_failed;
 
 	txdp->Host_Control = (unsigned long) skb;
@@ -4345,7 +4349,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id)
 		netif_rx_schedule(dev, &ring->napi);
 	} else {
 		rx_intr_handler(ring, 0);
-		s2io_chk_rx_buffers(ring);
+		s2io_chk_rx_buffers(sp, ring);
 	}
 
 	return IRQ_HANDLED;
@@ -4826,7 +4830,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 		 */
 		if (!config->napi) {
 			for (i = 0; i < config->rx_ring_num; i++)
-				s2io_chk_rx_buffers(&mac_control->rings[i]);
+				s2io_chk_rx_buffers(sp, &mac_control->rings[i]);
 		}
 		writeq(sp->general_int_mask, &bar0->general_int_mask);
 		readl(&bar0->general_int_status);
@@ -6859,7 +6863,7 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 				pci_map_single( sp->pdev, (*skb)->data,
 					size - NET_IP_ALIGN,
 					PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp1->Buffer0_ptr))
+			if (pci_dma_mapping_error(sp->pdev, rxdp1->Buffer0_ptr))
 				goto memalloc_failed;
 			rxdp->Host_Control = (unsigned long) (*skb);
 		}
@@ -6886,12 +6890,13 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 				pci_map_single(sp->pdev, (*skb)->data,
 					       dev->mtu + 4,
 					       PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer2_ptr))
+			if (pci_dma_mapping_error(sp->pdev, rxdp3->Buffer2_ptr))
 				goto memalloc_failed;
 			rxdp3->Buffer0_ptr = *temp0 =
 				pci_map_single( sp->pdev, ba->ba_0, BUF0_LEN,
 						PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) {
+			if (pci_dma_mapping_error(sp->pdev,
+						rxdp3->Buffer0_ptr)) {
 				pci_unmap_single (sp->pdev,
 					(dma_addr_t)rxdp3->Buffer2_ptr,
 					dev->mtu + 4, PCI_DMA_FROMDEVICE);
@@ -6903,7 +6908,8 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 			rxdp3->Buffer1_ptr = *temp1 =
 				pci_map_single(sp->pdev, ba->ba_1, BUF1_LEN,
 						PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) {
+			if (pci_dma_mapping_error(sp->pdev,
+						rxdp3->Buffer1_ptr)) {
 				pci_unmap_single (sp->pdev,
 					(dma_addr_t)rxdp3->Buffer0_ptr,
 					BUF0_LEN, PCI_DMA_FROMDEVICE);
@@ -7187,7 +7193,7 @@ static int s2io_card_up(struct s2io_nic * sp)
 
 	for (i = 0; i < config->rx_ring_num; i++) {
 		mac_control->rings[i].mtu = dev->mtu;
-		ret = fill_rx_buffers(&mac_control->rings[i], 1);
+		ret = fill_rx_buffers(sp, &mac_control->rings[i], 1);
 		if (ret) {
 			DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n",
 				  dev->name);
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 601b001437c..0d27dd39bc0 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -233,7 +233,7 @@ static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue,
 					  rx_buf->data, rx_buf->len,
 					  PCI_DMA_FROMDEVICE);
 
-	if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) {
+	if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) {
 		dev_kfree_skb_any(rx_buf->skb);
 		rx_buf->skb = NULL;
 		return -EIO;
@@ -275,7 +275,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue,
 					0, efx_rx_buf_size(efx),
 					PCI_DMA_FROMDEVICE);
 
-		if (unlikely(pci_dma_mapping_error(dma_addr))) {
+		if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) {
 			__free_pages(rx_buf->page, efx->rx_buffer_order);
 			rx_buf->page = NULL;
 			return -EIO;
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 5cdd082ab8f..5e8374ab28e 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -172,7 +172,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 
 	/* Process all fragments */
 	while (1) {
-		if (unlikely(pci_dma_mapping_error(dma_addr)))
+		if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
 			goto pci_err;
 
 		/* Store fields for marking in the per-fragment final
@@ -661,7 +661,8 @@ efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
 	tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
 					TSOH_BUFFER(tsoh), header_len,
 					PCI_DMA_TODEVICE);
-	if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) {
+	if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
+					   tsoh->dma_addr))) {
 		kfree(tsoh);
 		return NULL;
 	}
@@ -863,7 +864,7 @@ static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
 
 	st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off,
 					  len, PCI_DMA_TODEVICE);
-	if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) {
+	if (likely(!pci_dma_mapping_error(efx->pci_dev, st->ifc.unmap_addr))) {
 		st->ifc.unmap_len = len;
 		st->ifc.len = len;
 		st->ifc.dma_addr = st->ifc.unmap_addr;
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 00aa0b108cb..b6435d0d71f 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -452,7 +452,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
 	/* iommu-map the skb */
 	buf = pci_map_single(card->pdev, descr->skb->data,
 			SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(buf)) {
+	if (pci_dma_mapping_error(card->pdev, buf)) {
 		dev_kfree_skb_any(descr->skb);
 		descr->skb = NULL;
 		if (netif_msg_rx_err(card) && net_ratelimit())
@@ -691,7 +691,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
 	unsigned long flags;
 
 	buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(buf)) {
+	if (pci_dma_mapping_error(card->pdev, buf)) {
 		if (netif_msg_tx_err(card) && net_ratelimit())
 			dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). "
 				  "Dropping packet\n", skb->data, skb->len);
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index a645e5028c1..8487ace9d2e 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -506,7 +506,7 @@ static void *alloc_rxbuf_page(struct pci_dev *hwdev, dma_addr_t *dma_handle)
 		return NULL;
 	*dma_handle = pci_map_single(hwdev, buf, PAGE_SIZE,
 				     PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(*dma_handle)) {
+	if (pci_dma_mapping_error(hwdev, *dma_handle)) {
 		free_page((unsigned long)buf);
 		return NULL;
 	}
@@ -536,7 +536,7 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev,
 		return NULL;
 	*dma_handle = pci_map_single(hwdev, skb->data, RX_BUF_SIZE,
 				     PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(*dma_handle)) {
+	if (pci_dma_mapping_error(hwdev, *dma_handle)) {
 		dev_kfree_skb_any(skb);
 		return NULL;
 	}
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 217d506527a..d9769c52734 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1166,7 +1166,7 @@ ath5k_rxbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 		bf->skb = skb;
 		bf->skbaddr = pci_map_single(sc->pdev,
 			skb->data, sc->rxbufsize, PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(bf->skbaddr))) {
+		if (unlikely(pci_dma_mapping_error(sc->pdev, bf->skbaddr))) {
 			ATH5K_ERR(sc, "%s: DMA mapping failed\n", __func__);
 			dev_kfree_skb(skb);
 			bf->skb = NULL;
@@ -1918,7 +1918,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 	ATH5K_DBG(sc, ATH5K_DEBUG_BEACON, "skb %p [data %p len %u] "
 			"skbaddr %llx\n", skb, skb->data, skb->len,
 			(unsigned long long)bf->skbaddr);
-	if (pci_dma_mapping_error(bf->skbaddr)) {
+	if (pci_dma_mapping_error(sc->pdev, bf->skbaddr)) {
 		ATH5K_ERR(sc, "beacon DMA mapping failed\n");
 		return -EIO;
 	}
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index c4a7c06793c..61f8fdea2d9 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3525,7 +3525,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
 	crq->msg_token = dma_map_single(dev, crq->msgs,
 					PAGE_SIZE, DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(crq->msg_token))
+	if (dma_mapping_error(dev, crq->msg_token))
 		goto map_failed;
 
 	retrc = rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
@@ -3618,7 +3618,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
 					    async_q->size * sizeof(*async_q->msgs),
 					    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(async_q->msg_token)) {
+	if (dma_mapping_error(dev, async_q->msg_token)) {
 		dev_err(dev, "Failed to map async queue\n");
 		goto free_async_crq;
 	}
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 20000ec79b0..6b24b9cdb04 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -859,7 +859,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 					    sizeof(hostdata->madapter_info),
 					    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(req->buffer)) {
+	if (dma_mapping_error(hostdata->dev, req->buffer)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			dev_err(hostdata->dev,
 			        "Unable to map request_buffer for "
@@ -1407,7 +1407,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 						    length,
 						    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(host_config->buffer)) {
+	if (dma_mapping_error(hostdata->dev, host_config->buffer)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			dev_err(hostdata->dev,
 			        "dma_mapping error getting host config\n");
diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c b/drivers/scsi/ibmvscsi/ibmvstgt.c
index 3b9514c8f1f..2e13ec00172 100644
--- a/drivers/scsi/ibmvscsi/ibmvstgt.c
+++ b/drivers/scsi/ibmvscsi/ibmvstgt.c
@@ -564,7 +564,7 @@ static int crq_queue_create(struct crq_queue *queue, struct srp_target *target)
 					  queue->size * sizeof(*queue->msgs),
 					  DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(queue->msg_token))
+	if (dma_mapping_error(target->dev, queue->msg_token))
 		goto map_failed;
 
 	err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token,
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index 182146100dc..462a8574dad 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -253,7 +253,7 @@ static int rpavscsi_init_crq_queue(struct crq_queue *queue,
 					  queue->size * sizeof(*queue->msgs),
 					  DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(queue->msg_token))
+	if (dma_mapping_error(hostdata->dev, queue->msg_token))
 		goto map_failed;
 
 	gather_partition_info();
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index e81d59d7891..0c716566085 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -313,14 +313,14 @@ atmel_spi_dma_map_xfer(struct atmel_spi *as, struct spi_transfer *xfer)
 		xfer->tx_dma = dma_map_single(dev,
 				(void *) xfer->tx_buf, xfer->len,
 				DMA_TO_DEVICE);
-		if (dma_mapping_error(xfer->tx_dma))
+		if (dma_mapping_error(dev, xfer->tx_dma))
 			return -ENOMEM;
 	}
 	if (xfer->rx_buf) {
 		xfer->rx_dma = dma_map_single(dev,
 				xfer->rx_buf, xfer->len,
 				DMA_FROM_DEVICE);
-		if (dma_mapping_error(xfer->rx_dma)) {
+		if (dma_mapping_error(dev, xfer->rx_dma)) {
 			if (xfer->tx_buf)
 				dma_unmap_single(dev,
 						xfer->tx_dma, xfer->len,
diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index 9149689c79d..87b73e0169c 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -334,7 +334,7 @@ static int au1550_spi_dma_rxtmp_alloc(struct au1550_spi *hw, unsigned size)
 	hw->dma_rx_tmpbuf_size = size;
 	hw->dma_rx_tmpbuf_addr = dma_map_single(hw->dev, hw->dma_rx_tmpbuf,
 			size, DMA_FROM_DEVICE);
-	if (dma_mapping_error(hw->dma_rx_tmpbuf_addr)) {
+	if (dma_mapping_error(hw->dev, hw->dma_rx_tmpbuf_addr)) {
 		kfree(hw->dma_rx_tmpbuf);
 		hw->dma_rx_tmpbuf = 0;
 		hw->dma_rx_tmpbuf_size = 0;
@@ -378,7 +378,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 			dma_rx_addr = dma_map_single(hw->dev,
 					(void *)t->rx_buf,
 					t->len, DMA_FROM_DEVICE);
-			if (dma_mapping_error(dma_rx_addr))
+			if (dma_mapping_error(hw->dev, dma_rx_addr))
 				dev_err(hw->dev, "rx dma map error\n");
 		}
 	} else {
@@ -401,7 +401,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 			dma_tx_addr = dma_map_single(hw->dev,
 					(void *)t->tx_buf,
 					t->len, DMA_TO_DEVICE);
-			if (dma_mapping_error(dma_tx_addr))
+			if (dma_mapping_error(hw->dev, dma_tx_addr))
 				dev_err(hw->dev, "tx dma map error\n");
 		}
 	} else {
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index b1cc148036c..f6f987bb71c 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -836,7 +836,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m)
 		if (tx_buf != NULL) {
 			t->tx_dma = dma_map_single(&spi->dev, (void *) tx_buf,
 					len, DMA_TO_DEVICE);
-			if (dma_mapping_error(t->tx_dma)) {
+			if (dma_mapping_error(&spi->dev, t->tx_dma)) {
 				dev_dbg(&spi->dev, "dma %cX %d bytes error\n",
 						'T', len);
 				return -EINVAL;
@@ -845,7 +845,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m)
 		if (rx_buf != NULL) {
 			t->rx_dma = dma_map_single(&spi->dev, rx_buf, t->len,
 					DMA_FROM_DEVICE);
-			if (dma_mapping_error(t->rx_dma)) {
+			if (dma_mapping_error(&spi->dev, t->rx_dma)) {
 				dev_dbg(&spi->dev, "dma %cX %d bytes error\n",
 						'R', len);
 				if (tx_buf != NULL)
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 0c452c46ab0..067299d6d19 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -353,7 +353,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
 						drv_data->rx_map_len,
 						DMA_FROM_DEVICE);
-	if (dma_mapping_error(drv_data->rx_dma))
+	if (dma_mapping_error(dev, drv_data->rx_dma))
 		return 0;
 
 	/* Stream map the tx buffer */
@@ -361,7 +361,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 						drv_data->tx_map_len,
 						DMA_TO_DEVICE);
 
-	if (dma_mapping_error(drv_data->tx_dma)) {
+	if (dma_mapping_error(dev, drv_data->tx_dma)) {
 		dma_unmap_single(dev, drv_data->rx_dma,
 					drv_data->rx_map_len, DMA_FROM_DEVICE);
 		return 0;
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index 54ac7bea5f8..6fb77fcc497 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -491,7 +491,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 							buf,
 							drv_data->tx_map_len,
 							DMA_TO_DEVICE);
-			if (dma_mapping_error(drv_data->tx_dma))
+			if (dma_mapping_error(dev, drv_data->tx_dma))
 				return -1;
 
 			drv_data->tx_dma_needs_unmap = 1;
@@ -516,7 +516,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 					buf,
 					drv_data->len,
 					DMA_FROM_DEVICE);
-		if (dma_mapping_error(drv_data->rx_dma))
+		if (dma_mapping_error(dev, drv_data->rx_dma))
 			return -1;
 		drv_data->rx_dma_needs_unmap = 1;
 	}
@@ -534,7 +534,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 					buf,
 					drv_data->tx_map_len,
 					DMA_TO_DEVICE);
-	if (dma_mapping_error(drv_data->tx_dma)) {
+	if (dma_mapping_error(dev, drv_data->tx_dma)) {
 		if (drv_data->rx_dma) {
 			dma_unmap_single(dev,
 					drv_data->rx_dma,
diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h
index db351d1296f..a5801ae02e4 100644
--- a/include/asm-alpha/dma-mapping.h
+++ b/include/asm-alpha/dma-mapping.h
@@ -24,8 +24,8 @@
 		pci_unmap_sg(alpha_gendev_to_pci(dev), sg, nents, dir)
 #define dma_supported(dev, mask)			\
 		pci_dma_supported(alpha_gendev_to_pci(dev), mask)
-#define dma_mapping_error(addr)				\
-		pci_dma_mapping_error(addr)
+#define dma_mapping_error(dev, addr)				\
+		pci_dma_mapping_error(alpha_gendev_to_pci(dev), addr)
 
 #else	/* no PCI - no IOMMU. */
 
@@ -45,7 +45,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 #define dma_unmap_page(dev, addr, size, dir)	((void)0)
 #define dma_unmap_sg(dev, sg, nents, dir)	((void)0)
 
-#define dma_mapping_error(addr)  (0)
+#define dma_mapping_error(dev, addr)  (0)
 
 #endif	/* !CONFIG_PCI */
 
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index d31fd49ff79..2a14302c17a 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -106,7 +106,7 @@ extern dma_addr_t pci_map_page(struct pci_dev *, struct page *,
 /* Test for pci_map_single or pci_map_page having generated an error.  */
 
 static inline int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 {
 	return dma_addr == 0;
 }
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index e99406a7bec..f41335ba633 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -56,7 +56,7 @@ static inline int dma_is_consistent(struct device *dev, dma_addr_t handle)
 /*
  * DMA errors are defined by all-bits-set in the DMA address.
  */
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == ~0;
 }
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
index 57dc672bab8..0399359ab5d 100644
--- a/include/asm-avr32/dma-mapping.h
+++ b/include/asm-avr32/dma-mapping.h
@@ -35,7 +35,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 /*
  * dma_map_single can't fail as it is implemented now.
  */
-static inline int dma_mapping_error(dma_addr_t addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t addr)
 {
 	return 0;
 }
diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h
index edc8d1bfaae..cb2fb25ff8d 100644
--- a/include/asm-cris/dma-mapping.h
+++ b/include/asm-cris/dma-mapping.h
@@ -120,7 +120,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 }
 
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h
index 2e8966ca030..b2898877c07 100644
--- a/include/asm-frv/dma-mapping.h
+++ b/include/asm-frv/dma-mapping.h
@@ -126,7 +126,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele
 }
 
 static inline
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h
index e2468f894d2..82cd0cb1c3f 100644
--- a/include/asm-generic/dma-mapping-broken.h
+++ b/include/asm-generic/dma-mapping-broken.h
@@ -61,7 +61,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 #define dma_sync_sg_for_device dma_sync_sg_for_cpu
 
 extern int
-dma_mapping_error(dma_addr_t dma_addr);
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 extern int
 dma_supported(struct device *dev, u64 mask);
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 783ab9944d7..189486c3f92 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -144,9 +144,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 }
 
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return pci_dma_mapping_error(dma_addr);
+	return pci_dma_mapping_error(to_pci_dev(dev), dma_addr);
 }
 
 
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 25c10e96b2b..37b3706226e 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -99,9 +99,9 @@ pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
 }
 
 static inline int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 {
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
 #endif
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 0721a5e8271..a6d50c77b6b 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -54,7 +54,7 @@ typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_
 typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int);
 typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int);
 typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int);
-typedef int ia64_mv_dma_mapping_error (dma_addr_t dma_addr);
+typedef int ia64_mv_dma_mapping_error(struct device *, dma_addr_t dma_addr);
 typedef int ia64_mv_dma_supported (struct device *, u64);
 
 typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *);
diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h
index a26cdeb46a5..91f7944333d 100644
--- a/include/asm-m68k/dma-mapping.h
+++ b/include/asm-m68k/dma-mapping.h
@@ -84,7 +84,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *s
 {
 }
 
-static inline int dma_mapping_error(dma_addr_t handle)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
 {
 	return 0;
 }
diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h
index 230b3f1b69b..c64afb40cd0 100644
--- a/include/asm-mips/dma-mapping.h
+++ b/include/asm-mips/dma-mapping.h
@@ -42,7 +42,7 @@ extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 	int nelems, enum dma_data_direction direction);
 extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 	int nelems, enum dma_data_direction direction);
-extern int dma_mapping_error(dma_addr_t dma_addr);
+extern int dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 extern int dma_supported(struct device *dev, u64 mask);
 
 static inline int
diff --git a/include/asm-mn10300/dma-mapping.h b/include/asm-mn10300/dma-mapping.h
index 7c882fca9ec..ccae8f6c632 100644
--- a/include/asm-mn10300/dma-mapping.h
+++ b/include/asm-mn10300/dma-mapping.h
@@ -182,7 +182,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 }
 
 static inline
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h
index c6c0e9ff6bd..53af696f23d 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/include/asm-parisc/dma-mapping.h
@@ -248,6 +248,6 @@ void * sba_get_iommu(struct parisc_device *dev);
 #endif
 
 /* At the moment, we panic on error for IOMMU resource exaustion */
-#define dma_mapping_error(x)	0
+#define dma_mapping_error(dev, x)	0
 
 #endif
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 74c54978098..c7ca45f97dd 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -415,7 +415,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
 		__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 #ifdef CONFIG_PPC64
 	return (dma_addr == DMA_ERROR_CODE);
diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h
index 22cc419389f..6c0b8a2de14 100644
--- a/include/asm-sh/dma-mapping.h
+++ b/include/asm-sh/dma-mapping.h
@@ -171,7 +171,7 @@ static inline int dma_get_cache_alignment(void)
 	return L1_CACHE_BYTES;
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == 0;
 }
diff --git a/include/asm-sparc/dma-mapping_64.h b/include/asm-sparc/dma-mapping_64.h
index 38cbec76a33..bfa64f9702d 100644
--- a/include/asm-sparc/dma-mapping_64.h
+++ b/include/asm-sparc/dma-mapping_64.h
@@ -135,7 +135,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
 	/* No flushing needed to sync cpu writes to the device.  */
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return (dma_addr == DMA_ERROR_CODE);
 }
diff --git a/include/asm-sparc/pci_32.h b/include/asm-sparc/pci_32.h
index b93b6c79e08..0ee949d220c 100644
--- a/include/asm-sparc/pci_32.h
+++ b/include/asm-sparc/pci_32.h
@@ -154,7 +154,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 
 #define PCI_DMA_ERROR_CODE      (~(dma_addr_t)0x0)
 
-static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
+static inline int pci_dma_mapping_error(struct pci_dev *pdev,
+					dma_addr_t dma_addr)
 {
         return (dma_addr == PCI_DMA_ERROR_CODE);
 }
diff --git a/include/asm-sparc/pci_64.h b/include/asm-sparc/pci_64.h
index f59f2571295..4f79a54948f 100644
--- a/include/asm-sparc/pci_64.h
+++ b/include/asm-sparc/pci_64.h
@@ -140,9 +140,10 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
 #define PCI64_REQUIRED_MASK	(~(dma64_addr_t)0)
 #define PCI64_ADDR_BASE		0xfffc000000000000UL
 
-static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
+static inline int pci_dma_mapping_error(struct pci_dev *pdev,
+					dma_addr_t dma_addr)
 {
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 87a715367a1..3c034f48fdb 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h
@@ -5,6 +5,9 @@ struct dev_archdata {
 #ifdef CONFIG_ACPI
 	void	*acpi_handle;
 #endif
+#ifdef CONFIG_X86_64
+struct dma_mapping_ops *dma_ops;
+#endif
 #ifdef CONFIG_DMAR
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index c2ddd3d1b88..0eaa9bf6011 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -17,7 +17,8 @@ extern int panic_on_overflow;
 extern int force_iommu;
 
 struct dma_mapping_ops {
-	int             (*mapping_error)(dma_addr_t dma_addr);
+	int             (*mapping_error)(struct device *dev,
+					 dma_addr_t dma_addr);
 	void*           (*alloc_coherent)(struct device *dev, size_t size,
 				dma_addr_t *dma_handle, gfp_t gfp);
 	void            (*free_coherent)(struct device *dev, size_t size,
@@ -56,14 +57,32 @@ struct dma_mapping_ops {
 	int		is_phys;
 };
 
-extern const struct dma_mapping_ops *dma_ops;
+extern struct dma_mapping_ops *dma_ops;
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 {
-	if (dma_ops->mapping_error)
-		return dma_ops->mapping_error(dma_addr);
+#ifdef CONFIG_X86_32
+	return dma_ops;
+#else
+	if (unlikely(!dev) || !dev->archdata.dma_ops)
+		return dma_ops;
+	else
+		return dev->archdata.dma_ops;
+#endif
+}
+
+/* Make sure we keep the same behaviour */
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+#ifdef CONFIG_X86_32
+	return 0;
+#else
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
 
 	return (dma_addr == bad_dma_address);
+#endif
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -83,44 +102,53 @@ static inline dma_addr_t
 dma_map_single(struct device *hwdev, void *ptr, size_t size,
 	       int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+	return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
 }
 
 static inline void
 dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
 		 int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_single)
-		dma_ops->unmap_single(dev, addr, size, direction);
+	if (ops->unmap_single)
+		ops->unmap_single(dev, addr, size, direction);
 }
 
 static inline int
 dma_map_sg(struct device *hwdev, struct scatterlist *sg,
 	   int nents, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_sg(hwdev, sg, nents, direction);
+	return ops->map_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
 	     int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_sg)
-		dma_ops->unmap_sg(hwdev, sg, nents, direction);
+	if (ops->unmap_sg)
+		ops->unmap_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_cpu)
-		dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
-					     direction);
+	if (ops->sync_single_for_cpu)
+		ops->sync_single_for_cpu(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -128,10 +156,11 @@ static inline void
 dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
 			   size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_device)
-		dma_ops->sync_single_for_device(hwdev, dma_handle, size,
-						direction);
+	if (ops->sync_single_for_device)
+		ops->sync_single_for_device(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -139,11 +168,12 @@ static inline void
 dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			      unsigned long offset, size_t size, int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_cpu)
-		dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
-						   size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_cpu)
+		ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
+					       size, direction);
 	flush_write_buffers();
 }
 
@@ -152,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
 				 unsigned long offset, size_t size,
 				 int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_device)
-		dma_ops->sync_single_range_for_device(hwdev, dma_handle,
-						      offset, size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_device)
+		ops->sync_single_range_for_device(hwdev, dma_handle,
+						  offset, size, direction);
 	flush_write_buffers();
 }
 
@@ -164,9 +195,11 @@ static inline void
 dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
 		    int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_cpu)
-		dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_cpu)
+		ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
 	flush_write_buffers();
 }
 
@@ -174,9 +207,11 @@ static inline void
 dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 		       int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_device)
-		dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_device)
+		ops->sync_sg_for_device(hwdev, sg, nelems, direction);
 
 	flush_write_buffers();
 }
@@ -185,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(dev, page_to_phys(page)+offset,
-				   size, direction);
+	return ops->map_single(dev, page_to_phys(page) + offset,
+			       size, direction);
 }
 
 static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index c706a744263..2730b351afc 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
 			  int nents, int direction);
 extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
 			     int nents, int direction);
-extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
 				  void *vaddr, dma_addr_t dma_handle);
 extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h
index 3c7d537dd15..51882ae3db4 100644
--- a/include/asm-xtensa/dma-mapping.h
+++ b/include/asm-xtensa/dma-mapping.h
@@ -139,7 +139,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 		consistent_sync(sg_virt(sg), sg->length, dir);
 }
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 7d51cbca49a..75ae6d8aba4 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -758,7 +758,7 @@ static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
 	}
 
 	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
-	if (!dma_mapping_error(dma_addr)) {
+	if (!dma_mapping_error(&c->pdev->dev, dma_addr)) {
 #ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
 		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
 			*mptr++ = cpu_to_le32(0x7C020002);
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 4bf8cade9db..e530026eedf 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -427,9 +427,9 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr)
 {
 	switch (dev->bus->bustype) {
 	case SSB_BUSTYPE_PCI:
-		return pci_dma_mapping_error(addr);
+		return pci_dma_mapping_error(dev->bus->host_pci, addr);
 	case SSB_BUSTYPE_SSB:
-		return dma_mapping_error(addr);
+		return dma_mapping_error(dev->dev, addr);
 	default:
 		__ssb_dma_not_implemented(dev);
 	}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 90b529f7a15..936e333e7ce 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1590,7 +1590,7 @@ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
 	if (dev->dma_ops)
 		return dev->dma_ops->mapping_error(dev, dma_addr);
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(dev->dma_device, dma_addr);
 }
 
 /**
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d568894df8c..977edbdbc1d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -492,7 +492,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 */
 		dma_addr_t handle;
 		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(handle))
+		if (swiotlb_dma_mapping_error(hwdev, handle))
 			return NULL;
 
 		ret = bus_to_virt(handle);
@@ -824,7 +824,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 }
 
 int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
 	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a19b22b452a..84d328329d9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -169,7 +169,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 					  (void *)
 					  vec->sge[xdr_sge_no].iov_base + sge_off,
 					  sge_bytes, DMA_TO_DEVICE);
-		if (dma_mapping_error(sge[sge_no].addr))
+		if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
+					sge[sge_no].addr))
 			goto err;
 		sge_off = 0;
 		sge_no++;
-- 
cgit v1.2.3-70-g09d2


From 51cc50685a4275c6a02653670af9f108a64e01cf Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 19:45:34 -0700
Subject: SL*B: drop kmem cache argument from constructor

Kmem cache passed to constructor is only needed for constructors that are
themselves multiplexeres.  Nobody uses this "feature", nor does anybody uses
passed kmem cache in non-trivial way, so pass only pointer to object.

Non-trivial places are:
	arch/powerpc/mm/init_64.c
	arch/powerpc/mm/hugetlbpage.c

This is flag day, yes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Jon Tollefson <kniht@linux.vnet.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Matt Mackall <mpm@selenic.com>
[akpm@linux-foundation.org: fix arch/powerpc/mm/hugetlbpage.c]
[akpm@linux-foundation.org: fix mm/slab.c]
[akpm@linux-foundation.org: fix ubifs]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/plat-s3c24xx/dma.c               |  2 +-
 arch/powerpc/kernel/rtas_flash.c          |  2 +-
 arch/powerpc/mm/hugetlbpage.c             |  9 ++-------
 arch/powerpc/mm/init_64.c                 | 24 +++++++++---------------
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/sh/mm/pmb.c                          |  2 +-
 arch/xtensa/mm/init.c                     |  2 +-
 drivers/usb/mon/mon_text.c                |  4 ++--
 fs/adfs/super.c                           |  2 +-
 fs/affs/super.c                           |  2 +-
 fs/afs/super.c                            |  4 ++--
 fs/befs/linuxvfs.c                        |  2 +-
 fs/bfs/inode.c                            |  2 +-
 fs/block_dev.c                            |  2 +-
 fs/buffer.c                               |  2 +-
 fs/cifs/cifsfs.c                          |  2 +-
 fs/coda/inode.c                           |  2 +-
 fs/ecryptfs/main.c                        |  4 ++--
 fs/efs/super.c                            |  2 +-
 fs/ext2/super.c                           |  2 +-
 fs/ext3/super.c                           |  2 +-
 fs/ext4/super.c                           |  2 +-
 fs/fat/cache.c                            |  2 +-
 fs/fat/inode.c                            |  2 +-
 fs/fuse/inode.c                           |  2 +-
 fs/gfs2/main.c                            |  4 ++--
 fs/hfs/super.c                            |  2 +-
 fs/hfsplus/super.c                        |  2 +-
 fs/hpfs/super.c                           |  2 +-
 fs/hugetlbfs/inode.c                      |  2 +-
 fs/inode.c                                |  2 +-
 fs/isofs/inode.c                          |  2 +-
 fs/jffs2/super.c                          |  2 +-
 fs/jfs/jfs_metapage.c                     |  2 +-
 fs/jfs/super.c                            |  2 +-
 fs/locks.c                                |  2 +-
 fs/minix/inode.c                          |  2 +-
 fs/ncpfs/inode.c                          |  2 +-
 fs/nfs/inode.c                            |  2 +-
 fs/ntfs/super.c                           |  2 +-
 fs/ocfs2/dlm/dlmfs.c                      |  3 +--
 fs/ocfs2/super.c                          |  2 +-
 fs/openpromfs/inode.c                     |  2 +-
 fs/proc/inode.c                           |  2 +-
 fs/qnx4/inode.c                           |  2 +-
 fs/reiserfs/super.c                       |  2 +-
 fs/romfs/inode.c                          |  2 +-
 fs/smbfs/inode.c                          |  2 +-
 fs/sysv/inode.c                           |  2 +-
 fs/ubifs/super.c                          |  2 +-
 fs/udf/super.c                            |  2 +-
 fs/ufs/super.c                            |  2 +-
 fs/xfs/linux-2.6/kmem.h                   |  2 +-
 fs/xfs/linux-2.6/xfs_super.c              |  1 -
 include/linux/slab.h                      |  2 +-
 include/linux/slub_def.h                  |  2 +-
 ipc/mqueue.c                              |  2 +-
 kernel/fork.c                             |  2 +-
 lib/idr.c                                 |  2 +-
 lib/radix-tree.c                          |  2 +-
 mm/rmap.c                                 |  2 +-
 mm/shmem.c                                |  2 +-
 mm/slab.c                                 | 11 +++++------
 mm/slob.c                                 |  7 +++----
 mm/slub.c                                 | 13 ++++++-------
 net/socket.c                              |  2 +-
 net/sunrpc/rpc_pipe.c                     |  2 +-
 67 files changed, 90 insertions(+), 106 deletions(-)

(limited to 'net')

diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 60f162dc4fa..8c5e656d5d8 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -1304,7 +1304,7 @@ struct sysdev_class dma_sysclass = {
 
 /* kmem cache implementation */
 
-static void s3c2410_dma_cache_ctor(struct kmem_cache *c, void *p)
+static void s3c2410_dma_cache_ctor(void *p)
 {
 	memset(p, 0, sizeof(struct s3c2410_dma_buf));
 }
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 09ded5c424a..149cb112cd1 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf,
 }
 
 /* constructor for flash_block_cache */
-void rtas_block_ctor(struct kmem_cache *cache, void *ptr)
+void rtas_block_ctor(void *ptr)
 {
 	memset(ptr, 0, RTAS_BLK_SIZE);
 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb42c4dd321..ed0aab0208a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -113,7 +113,7 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			   unsigned long address, unsigned int psize)
 {
-	pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
+	pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize),
 				      GFP_KERNEL|__GFP_REPEAT);
 
 	if (! new)
@@ -730,11 +730,6 @@ static int __init hugepage_setup_sz(char *str)
 }
 __setup("hugepagesz=", hugepage_setup_sz);
 
-static void zero_ctor(struct kmem_cache *cache, void *addr)
-{
-	memset(addr, 0, kmem_cache_size(cache));
-}
-
 static int __init hugetlbpage_init(void)
 {
 	unsigned int psize;
@@ -756,7 +751,7 @@ static int __init hugetlbpage_init(void)
 						HUGEPTE_TABLE_SIZE(psize),
 						HUGEPTE_TABLE_SIZE(psize),
 						0,
-						zero_ctor);
+						NULL);
 			if (!huge_pgtable_cache(psize))
 				panic("hugetlbpage_init(): could not create %s"\
 				      "\n", HUGEPTE_CACHE_NAME(psize));
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a41bc5aa204..4f7df85129d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -136,9 +136,14 @@ static int __init setup_kcore(void)
 module_init(setup_kcore);
 #endif
 
-static void zero_ctor(struct kmem_cache *cache, void *addr)
+static void pgd_ctor(void *addr)
 {
-	memset(addr, 0, kmem_cache_size(cache));
+	memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+	memset(addr, 0, PMD_TABLE_SIZE);
 }
 
 static const unsigned int pgtable_cache_size[2] = {
@@ -163,19 +168,8 @@ struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
 
 void pgtable_cache_init(void)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
-		int size = pgtable_cache_size[i];
-		const char *name = pgtable_cache_name[i];
-
-		pr_debug("Allocating page table cache %s (#%d) "
-			"for size: %08x...\n", name, i, size);
-		pgtable_cache[i] = kmem_cache_create(name,
-						     size, size,
-						     SLAB_PANIC,
-						     zero_ctor);
-	}
+	pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
+	pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7123472801d..690ca7b0dcf 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -78,7 +78,7 @@ spufs_destroy_inode(struct inode *inode)
 }
 
 static void
-spufs_init_once(struct kmem_cache *cachep, void *p)
+spufs_init_once(void *p)
 {
 	struct spufs_inode_info *ei = p;
 
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 0b0ec6e0475..46911bcbf17 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -293,7 +293,7 @@ void pmb_unmap(unsigned long addr)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(struct kmem_cache *cachep, void *pmb)
+static void pmb_cache_ctor(void *pmb)
 {
 	struct pmb_entry *pmbe = pmb;
 
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 81d0560eaea..ee261005b36 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -309,7 +309,7 @@ void show_mem(void)
 
 struct kmem_cache *pgtable_cache __read_mostly;
 
-static void pgd_ctor(struct kmem_cache *cache, void* addr)
+static void pgd_ctor(void* addr)
 {
 	pte_t* ptep = (pte_t*)addr;
 	int i;
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 5e3e4e9b6c7..1f715436d6d 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -87,7 +87,7 @@ struct mon_reader_text {
 
 static struct dentry *mon_dir;		/* Usually /sys/kernel/debug/usbmon */
 
-static void mon_text_ctor(struct kmem_cache *, void *);
+static void mon_text_ctor(void *);
 
 struct mon_text_ptr {
 	int cnt, limit;
@@ -720,7 +720,7 @@ void mon_text_del(struct mon_bus *mbus)
 /*
  * Slab interface: constructor.
  */
-static void mon_text_ctor(struct kmem_cache *slab, void *mem)
+static void mon_text_ctor(void *mem)
 {
 	/*
 	 * Nothing to initialize. No, really!
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672..26f3b43726b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4e030956640..3a89094f93d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef681..250d8c4d66e 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
 
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
 
-static void afs_i_init_once(struct kmem_cache *cachep, void *foo);
+static void afs_i_init_once(void *foo);
 static int afs_get_sb(struct file_system_type *fs_type,
 		      int flags, const char *dev_name,
 		      void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
 /*
  * initialise an inode cache slab element prior to any use
  */
-static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode)
+static void afs_i_init_once(void *_vnode)
 {
 	struct afs_vnode *vnode = _vnode;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab..02c6e62b72f 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 053e690ec9e..0ed57b5ee01 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -264,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct bfs_inode_info *bi = foo;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871..dcf37cada36 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
 	struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index 109b261192d..5fd497cdd6f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3272,7 +3272,7 @@ int bh_submit_read(struct buffer_head *bh)
 EXPORT_SYMBOL(bh_submit_read);
 
 static void
-init_buffer_head(struct kmem_cache *cachep, void *data)
+init_buffer_head(void *data)
 {
 	struct buffer_head *bh = data;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df..fe5f6809cba 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
 };
 
 static void
-cifs_init_once(struct kmem_cache *cachep, void *inode)
+cifs_init_once(void *inode)
 {
 	struct cifsInodeInfo *cifsi = inode;
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc7008..830f51abb97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 6f403cfba14..448dfd597b5 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -578,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
  * Initializes the ecryptfs_inode_info_cache when it is created
  */
 static void
-inode_info_init_once(struct kmem_cache *cachep, void *vptr)
+inode_info_init_once(void *vptr)
 {
 	struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
 
@@ -589,7 +589,7 @@ static struct ecryptfs_cache_info {
 	struct kmem_cache **cache;
 	const char *name;
 	size_t size;
-	void (*ctor)(struct kmem_cache *cache, void *obj);
+	void (*ctor)(void *obj);
 } ecryptfs_cache_infos[] = {
 	{
 		.cache = &ecryptfs_auth_tok_list_item_cache,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e..567b134fa1f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 31308a3b0b8..fd88c7b43e6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -159,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 615788c6843..8ddced38467 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd60..b5479b1dff1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -595,7 +595,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61..3222f51c41c 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
 
 static struct kmem_cache *fat_cache_cachep;
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct fat_cache *cache = (struct fat_cache *)foo;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 23676f9d79c..6d266d793e2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -498,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d2f7d6e22e..d2249f174e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -956,7 +956,7 @@ static inline void unregister_fuseblk(void)
 }
 #endif
 
-static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void fuse_inode_init_once(void *foo)
 {
 	struct inode * inode = foo;
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fad..bb2cc303ac2 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
 #include "util.h"
 #include "glock.h"
 
-static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_inode_once(void *foo)
 {
 	struct gfs2_inode *ip = foo;
 
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
 	ip->i_alloc = NULL;
 }
 
-static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_glock_once(void *foo)
 {
 	struct gfs2_glock *gl = foo;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ac2ec5ef66e..4abb1047c68 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfs_init_once(struct kmem_cache *cachep, void *p)
+static void hfs_init_once(void *p)
 {
 	struct hfs_inode_info *i = p;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 3859118531c..e834e578c93 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfsplus_init_once(struct kmem_cache *cachep, void *p)
+static void hfsplus_init_once(void *p)
 {
 	struct hfsplus_inode_info *i = p;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec65..b8ae9c90ada 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dbd01d262ca..3f58923fb39 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -705,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
 
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
diff --git a/fs/inode.c b/fs/inode.c
index 35b6414522e..b6726f64453 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
 
 EXPORT_SYMBOL(inode_init_once);
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct inode * inode = (struct inode *) foo;
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526..26948a6033b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct iso_inode_info *ei = foo;
 
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e..efd401257ed 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
-static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
+static void jffs2_i_init_once(void *foo)
 {
 	struct jffs2_inode_info *f = foo;
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574..c350057087d 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct metapage *mp = (struct metapage *)foo;
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 359c091d896..3630718be39 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -760,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
 
diff --git a/fs/locks.c b/fs/locks.c
index 01490300f7c..5eb259e3cd3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
  * Initialises the fields of the file lock which are invariant for
  * free file_locks.
  */
-static void init_once(struct kmem_cache *cache, void *foo)
+static void init_once(void *foo)
 {
 	struct file_lock *lock = (struct file_lock *) foo;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 523d7371341..d1d1eb84679 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204de..d642f0e5b36 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6..52daefa2f52 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216b..4a46743b507 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
 struct kmem_cache *ntfs_big_inode_cache;
 
 /* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void ntfs_big_inode_init_once(void *foo)
 {
 	ntfs_inode *ni = (ntfs_inode *)foo;
 
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b7..533a789c3ef 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	return writelen;
 }
 
-static void dlmfs_init_once(struct kmem_cache *cachep,
-			    void *foo)
+static void dlmfs_init_once(void *foo)
 {
 	struct dlmfs_inode_private *ip =
 		(struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094f..2560b33889a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
 	return status;
 }
 
-static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
+static void ocfs2_inode_init_once(void *data)
 {
 	struct ocfs2_inode_info *oi = data;
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e..9f5b054f06b 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static void op_inode_init_once(struct kmem_cache * cachep, void *data)
+static void op_inode_init_once(void *data)
 {
 	struct op_inode_info *oi = (struct op_inode_info *) data;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 02eca2ed9dd..b37f25dc45a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -94,7 +94,7 @@ static void proc_destroy_inode(struct inode *inode)
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b..2aad1044b84 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2ec748ba0bd..879e54d35c2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
 
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c..8e51a2aaa97 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -577,7 +577,7 @@ static void romfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct romfs_inode_info *ei = foo;
 
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed..3528f40ffb0 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
 
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658..df0d435baa4 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *p)
+static void init_once(void *p)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad0..ca1e2d4e03c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
 /*
  * Inode slab cache constructor.
  */
-static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
+static void inode_slab_ctor(void *obj)
 {
 	struct ubifs_inode *ui = obj;
 	inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96c..5698bbf83bb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *)foo;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 227c9d70004..3e30e40aa24 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1302,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
 
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e956490297..a20683cf74d 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
 
 static inline kmem_zone_t *
 kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-		     void (*construct)(kmem_zone_t *, void *))
+		     void (*construct)(void *))
 {
 	return kmem_cache_create(zone_name, size, 0, flags, construct);
 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c..943381284e2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -843,7 +843,6 @@ xfs_fs_destroy_inode(
 
 STATIC void
 xfs_fs_inode_init_once(
-	kmem_zone_t		*zonep,
 	void			*vnode)
 {
 	inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 41103910f8a..9ff8e849940 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -58,7 +58,7 @@ int slab_is_available(void);
 
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
-			void (*)(struct kmem_cache *, void *));
+			void (*)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index d117ea2825a..5bad61a93f6 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -85,7 +85,7 @@ struct kmem_cache {
 	struct kmem_cache_order_objects min;
 	gfp_t allocflags;	/* gfp flags to use on each alloc */
 	int refcount;		/* Refcount for slab cache destroy */
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *);
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
 	const char *name;	/* Name (only for display!) */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 1fdc2eb2f6d..474984f9e03 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -207,7 +207,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type,
 	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b99d73e971a..80e83e459b1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1442,7 +1442,7 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
-static void sighand_ctor(struct kmem_cache *cachep, void *data)
+static void sighand_ctor(void *data)
 {
 	struct sighand_struct *sighand = data;
 
diff --git a/lib/idr.c b/lib/idr.c
index 3476f8203e9..e728c7fccc4 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -607,7 +607,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 }
 EXPORT_SYMBOL(idr_replace);
 
-static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer)
+static void idr_cache_ctor(void *idr_layer)
 {
 	memset(idr_layer, 0, sizeof(struct idr_layer));
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9c4f1ffa286..be86b32bc87 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1183,7 +1183,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
 EXPORT_SYMBOL(radix_tree_tagged);
 
 static void
-radix_tree_node_ctor(struct kmem_cache *cachep, void *node)
+radix_tree_node_ctor(void *node)
 {
 	memset(node, 0, sizeof(struct radix_tree_node));
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index abbd29f7c43..39ae5a9bf38 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
 		anon_vma_free(anon_vma);
 }
 
-static void anon_vma_ctor(struct kmem_cache *cachep, void *data)
+static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 1089092aeca..952d361774b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2352,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode)
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
diff --git a/mm/slab.c b/mm/slab.c
index 052e7d64537..918f04f7fef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -406,7 +406,7 @@ struct kmem_cache {
 	unsigned int dflags;		/* dynamic flags */
 
 	/* constructor func */
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *obj);
 
 /* 5) cache creation/removal */
 	const char *name;
@@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
  */
 struct kmem_cache *
 kmem_cache_create (const char *name, size_t size, size_t align,
-	unsigned long flags,
-	void (*ctor)(struct kmem_cache *, void *))
+	unsigned long flags, void (*ctor)(void *))
 {
 	size_t left_over, slab_size, ralign;
 	struct kmem_cache *cachep = NULL, *pc;
@@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 		 * They must also be threaded.
 		 */
 		if (cachep->ctor && !(cachep->flags & SLAB_POISON))
-			cachep->ctor(cachep, objp + obj_offset(cachep));
+			cachep->ctor(objp + obj_offset(cachep));
 
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 					 cachep->buffer_size / PAGE_SIZE, 0);
 #else
 		if (cachep->ctor)
-			cachep->ctor(cachep, objp);
+			cachep->ctor(objp);
 #endif
 		slab_bufctl(slabp)[i] = i + 1;
 	}
@@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 #endif
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON)
-		cachep->ctor(cachep, objp);
+		cachep->ctor(objp);
 #if ARCH_SLAB_MINALIGN
 	if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
 		printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
diff --git a/mm/slob.c b/mm/slob.c
index de268eb7ac7..d8fbd4d1bfa 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -525,12 +525,11 @@ struct kmem_cache {
 	unsigned int size, align;
 	unsigned long flags;
 	const char *name;
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *);
 };
 
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
-	size_t align, unsigned long flags,
-	void (*ctor)(struct kmem_cache *, void *))
+	size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *c;
 
@@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 		b = slob_new_page(flags, get_order(c->size), node);
 
 	if (c->ctor)
-		c->ctor(c, b);
+		c->ctor(b);
 
 	return b;
 }
diff --git a/mm/slub.c b/mm/slub.c
index 77c21cf53ff..b7e2cd5d82d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug);
 
 static unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
-	void (*ctor)(struct kmem_cache *, void *))
+	void (*ctor)(void *))
 {
 	/*
 	 * Enable debugging if selected on the kernel commandline.
@@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
 static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
 static inline unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
-	void (*ctor)(struct kmem_cache *, void *))
+	void (*ctor)(void *))
 {
 	return flags;
 }
@@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
 {
 	setup_object_debug(s, page, object);
 	if (unlikely(s->ctor))
-		s->ctor(s, object);
+		s->ctor(object);
 }
 
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 		const char *name, size_t size,
 		size_t align, unsigned long flags,
-		void (*ctor)(struct kmem_cache *, void *))
+		void (*ctor)(void *))
 {
 	memset(s, 0, kmem_size);
 	s->name = name;
@@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 
 static struct kmem_cache *find_mergeable(size_t size,
 		size_t align, unsigned long flags, const char *name,
-		void (*ctor)(struct kmem_cache *, void *))
+		void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
@@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size,
 }
 
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
-		size_t align, unsigned long flags,
-		void (*ctor)(struct kmem_cache *, void *))
+		size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
diff --git a/net/socket.c b/net/socket.c
index 1310a82cbba..8ef8ba81b9e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -265,7 +265,7 @@ static void sock_destroy_inode(struct inode *inode)
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5a9b0e7828c..23a2b8f6dc4 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -897,7 +897,7 @@ static struct file_system_type rpc_pipe_fs_type = {
 };
 
 static void
-init_once(struct kmem_cache * cachep, void *foo)
+init_once(void *foo)
 {
 	struct rpc_inode *rpci = (struct rpc_inode *) foo;
 
-- 
cgit v1.2.3-70-g09d2


From e40f51a36a6ca718e829c0933ab1e79333ac932e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 17:47:53 -0700
Subject: netfilter: ebtables: use nf_register_hooks()

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtable_filter.c | 18 +++++-------------
 net/bridge/netfilter/ebtable_nat.c    | 18 +++++-------------
 2 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 690bc3ab186..1a58af51a2e 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,28 +93,20 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 
 static int __init ebtable_filter_init(void)
 {
-	int i, j, ret;
+	int ret;
 
 	ret = ebt_register_table(&frame_filter);
 	if (ret < 0)
 		return ret;
-	for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++)
-		if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0)
-			goto cleanup;
-	return ret;
-cleanup:
-	for (j = 0; j < i; j++)
-		nf_unregister_hook(&ebt_ops_filter[j]);
-	ebt_unregister_table(&frame_filter);
+	ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
+	if (ret < 0)
+		ebt_unregister_table(&frame_filter);
 	return ret;
 }
 
 static void __exit ebtable_filter_fini(void)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++)
-		nf_unregister_hook(&ebt_ops_filter[i]);
+	nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
 	ebt_unregister_table(&frame_filter);
 }
 
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 5b495fe2d0b..f60c1e78e57 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -100,28 +100,20 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 
 static int __init ebtable_nat_init(void)
 {
-	int i, ret, j;
+	int ret;
 
 	ret = ebt_register_table(&frame_nat);
 	if (ret < 0)
 		return ret;
-	for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++)
-		if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0)
-			goto cleanup;
-	return ret;
-cleanup:
-	for (j = 0; j < i; j++)
-		nf_unregister_hook(&ebt_ops_nat[j]);
-	ebt_unregister_table(&frame_nat);
+	ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
+	if (ret < 0)
+		ebt_unregister_table(&frame_nat);
 	return ret;
 }
 
 static void __exit ebtable_nat_fini(void)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++)
-		nf_unregister_hook(&ebt_ops_nat[i]);
+	nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
 	ebt_unregister_table(&frame_nat);
 }
 
-- 
cgit v1.2.3-70-g09d2


From f858b4869a9136dd28cc2ab37f8b89268cc99462 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 17:48:38 -0700
Subject: netfilter: ip{,6}tables_security: fix future section mismatch

Currently not visible, because NET_NS is mutually exclusive with SYSFS
which is required by SECURITY.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_security.c  | 2 +-
 net/ipv6/netfilter/ip6table_security.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 2b472ac2263..db6d312128e 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -32,7 +32,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
 	.repl = {
 		.name = "security",
 		.valid_hooks = SECURITY_VALID_HOOKS,
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index a07abee3049..6e7131036bc 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -31,7 +31,7 @@ static struct
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[3];
 	struct ip6t_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
 	.repl = {
 		.name = "security",
 		.valid_hooks = SECURITY_VALID_HOOKS,
-- 
cgit v1.2.3-70-g09d2


From 3918fed5f31213067c1c345bd904e1ea369e6819 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 17:48:59 -0700
Subject: netfilter: arptables in netns for real

IN, FORWARD -- grab netns from in device, OUT -- from out device.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arptable_filter.c | 39 +++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 3be4d07e7ed..082f5dd3156 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -55,32 +55,53 @@ static struct xt_table packet_filter = {
 };
 
 /* The work comes in here from netfilter.c */
-static unsigned int arpt_hook(unsigned int hook,
-			      struct sk_buff *skb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      int (*okfn)(struct sk_buff *))
+static unsigned int arpt_in_hook(unsigned int hook,
+				 struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter);
+	return arpt_do_table(skb, hook, in, out,
+			     dev_net(in)->ipv4.arptable_filter);
+}
+
+static unsigned int arpt_out_hook(unsigned int hook,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  int (*okfn)(struct sk_buff *))
+{
+	return arpt_do_table(skb, hook, in, out,
+			     dev_net(out)->ipv4.arptable_filter);
+}
+
+static unsigned int arpt_forward_hook(unsigned int hook,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	return arpt_do_table(skb, hook, in, out,
+			     dev_net(in)->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] __read_mostly = {
 	{
-		.hook		= arpt_hook,
+		.hook		= arpt_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NF_ARP,
 		.hooknum	= NF_ARP_IN,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= arpt_hook,
+		.hook		= arpt_out_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NF_ARP,
 		.hooknum	= NF_ARP_OUT,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
-		.hook		= arpt_hook,
+		.hook		= arpt_forward_hook,
 		.owner		= THIS_MODULE,
 		.pf		= NF_ARP,
 		.hooknum	= NF_ARP_FORWARD,
-- 
cgit v1.2.3-70-g09d2


From 93bc4e89c260d91576840c4881d1066d84ccd422 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Sat, 26 Jul 2008 17:49:33 -0700
Subject: netfilter: fix double-free and use-after free

As suggested by Patrick McHardy, introduce a __krealloc() that doesn't
free the original buffer to fix a double-free and use-after-free bug
introduced by me in netfilter that uses RCU.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Tested-by: Dieter Ries <clip2@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/slab.h                |  1 +
 mm/util.c                           | 44 ++++++++++++++++++++++++++++---------
 net/netfilter/nf_conntrack_extend.c |  2 +-
 3 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9aa90a6f20e..be6f1d40b66 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -96,6 +96,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 /*
  * Common kmalloc functions provided by all allocators
  */
+void * __must_check __krealloc(const void *, size_t, gfp_t);
 void * __must_check krealloc(const void *, size_t, gfp_t);
 void kfree(const void *);
 size_t ksize(const void *);
diff --git a/mm/util.c b/mm/util.c
index 8f18683825b..6ef9e9943f6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -68,25 +68,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
 EXPORT_SYMBOL(kmemdup);
 
 /**
- * krealloc - reallocate memory. The contents will remain unchanged.
+ * __krealloc - like krealloc() but don't free @p.
  * @p: object to reallocate memory for.
  * @new_size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.  If @p is %NULL, krealloc()
- * behaves exactly like kmalloc().  If @size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
  */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
 {
 	void *ret;
 	size_t ks = 0;
 
-	if (unlikely(!new_size)) {
-		kfree(p);
+	if (unlikely(!new_size))
 		return ZERO_SIZE_PTR;
-	}
 
 	if (p)
 		ks = ksize(p);
@@ -95,10 +92,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
 		return (void *)p;
 
 	ret = kmalloc_track_caller(new_size, flags);
-	if (ret && p) {
+	if (ret && p)
 		memcpy(ret, p, ks);
+
+	return ret;
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.  If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc().  If @size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+	void *ret;
+
+	if (unlikely(!new_size)) {
 		kfree(p);
+		return ZERO_SIZE_PTR;
 	}
+
+	ret = __krealloc(p, new_size, flags);
+	if (ret && p != ret)
+		kfree(p);
+
 	return ret;
 }
 EXPORT_SYMBOL(krealloc);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 3469bc71a38..c956ef7eeec 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -95,7 +95,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	newlen = newoff + t->len;
 	rcu_read_unlock();
 
-	new = krealloc(ct->ext, newlen, gfp);
+	new = __krealloc(ct->ext, newlen, gfp);
 	if (!new)
 		return NULL;
 
-- 
cgit v1.2.3-70-g09d2


From 6c64825bf40ecc1b01610762ca736b18c8a9db92 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 26 Jul 2008 17:50:05 -0700
Subject: netfilter: nf_conntrack_extend: avoid unnecessary "ct->ext"
 dereferences

As Linus points out, "ct->ext" and "new" are always equal, avoid unnecessary
dereferences and use "new" directly.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_extend.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index c956ef7eeec..4b2c769d555 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -115,10 +115,10 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 		ct->ext = new;
 	}
 
-	ct->ext->offset[id] = newoff;
-	ct->ext->len = newlen;
-	memset((void *)ct->ext + newoff, 0, newlen - newoff);
-	return (void *)ct->ext + newoff;
+	new->offset[id] = newoff;
+	new->len = newlen;
+	memset((void *)new + newoff, 0, newlen - newoff);
+	return (void *)new + newoff;
 }
 EXPORT_SYMBOL(__nf_ct_ext_add);
 
-- 
cgit v1.2.3-70-g09d2


From 6c3b8fc618905d7599dcc514c99ce4293d476f39 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 26 Jul 2008 17:51:06 -0700
Subject: netns: fix ip_rt_frag_needed rt_is_expired

Running recent kernels, and using a particular vpn gateway, I've been
having to edit my mails down to get them accepted by the smtp server.

Git bisect led to commit e84f84f276473dcc673f360e8ff3203148bdf0e2 -
netns: place rt_genid into struct net.  The conversion from a != test
to rt_is_expired() put one negative too many: and now my mail works.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4ab0ac94f9..a507c5e27d0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1502,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->fl.iif != 0 ||
 				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
 				    !net_eq(dev_net(rth->u.dst.dev), net) ||
-				    !rt_is_expired(rth))
+				    rt_is_expired(rth))
 					continue;
 
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
-- 
cgit v1.2.3-70-g09d2


From 734550921e9b7ab924a43aa3d0bd4239dac4fbf1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 14 Jul 2008 21:22:20 -0400
Subject: [PATCH] beginning of sysctl cleanup - ctl_table_set

New object: set of sysctls [currently - root and per-net-ns].
Contains: pointer to parent set, list of tables and "should I see this set?"
method (->is_seen(set)).
Current lists of tables are subsumed by that; net-ns contains such a beast.
->lookup() for ctl_table_root returns pointer to ctl_table_set instead of
that to ->list of that ctl_table_set.

[folded compile fixes by rdd for configs without sysctl]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sysctl.h      | 15 +++++++++++++--
 include/net/net_namespace.h |  4 +++-
 kernel/sysctl.c             | 41 +++++++++++++++++++++++++++++++----------
 net/sysctl_net.c            | 22 ++++++++++------------
 4 files changed, 57 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 24141b4d1a1..c1e0cf408af 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -947,6 +947,16 @@ struct ctl_table;
 struct nsproxy;
 struct ctl_table_root;
 
+struct ctl_table_set {
+	struct list_head list;
+	struct ctl_table_set *parent;
+	int (*is_seen)(struct ctl_table_set *);
+};
+
+extern void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *));
+
 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
 						struct ctl_table_header *prev);
@@ -1049,8 +1059,8 @@ struct ctl_table
 
 struct ctl_table_root {
 	struct list_head root_list;
-	struct list_head header_list;
-	struct list_head *(*lookup)(struct ctl_table_root *root,
+	struct ctl_table_set default_set;
+	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
 					   struct nsproxy *namespaces);
 	int (*permissions)(struct ctl_table_root *root,
 			struct nsproxy *namespaces, struct ctl_table *table);
@@ -1066,6 +1076,7 @@ struct ctl_table_header
 	struct completion *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
+	struct ctl_table_set *set;
 };
 
 /* struct ctl_path describes where in the hierarchy a table is added */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3855620b78a..a8eb43cf0c7 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -38,7 +38,9 @@ struct net {
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 
-	struct list_head	sysctl_table_headers;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	sysctls;
+#endif
 
 	struct net_device       *loopback_dev;          /* The loopback */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 35a50db9b6c..8ee4a0619fb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -160,12 +160,13 @@ static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
 	.ctl_table = root_table,
-	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list),
+	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
 	.root = &sysctl_table_root,
+	.set = &sysctl_table_root.default_set,
 };
 static struct ctl_table_root sysctl_table_root = {
 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
-	.header_list = LIST_HEAD_INIT(root_table_header.ctl_entry),
+	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
 };
 
 static struct ctl_table kern_table[];
@@ -1403,14 +1404,20 @@ void sysctl_head_finish(struct ctl_table_header *head)
 	spin_unlock(&sysctl_lock);
 }
 
+static struct ctl_table_set *
+lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+{
+	struct ctl_table_set *set = &root->default_set;
+	if (root->lookup)
+		set = root->lookup(root, namespaces);
+	return set;
+}
+
 static struct list_head *
 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-	struct list_head *header_list;
-	header_list = &root->header_list;
-	if (root->lookup)
-		header_list = root->lookup(root, namespaces);
-	return header_list;
+	struct ctl_table_set *set = lookup_header_set(root, namespaces);
+	return &set->list;
 }
 
 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
@@ -1720,7 +1727,6 @@ struct ctl_table_header *__register_sysctl_paths(
 	struct nsproxy *namespaces,
 	const struct ctl_path *path, struct ctl_table *table)
 {
-	struct list_head *header_list;
 	struct ctl_table_header *header;
 	struct ctl_table *new, **prevp;
 	unsigned int n, npath;
@@ -1772,8 +1778,8 @@ struct ctl_table_header *__register_sysctl_paths(
 	}
 #endif
 	spin_lock(&sysctl_lock);
-	header_list = lookup_header_list(root, namespaces);
-	list_add_tail(&header->ctl_entry, header_list);
+	header->set = lookup_header_set(root, namespaces);
+	list_add_tail(&header->ctl_entry, &header->set->list);
 	spin_unlock(&sysctl_lock);
 
 	return header;
@@ -1832,6 +1838,15 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 	kfree(header);
 }
 
+void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *))
+{
+	INIT_LIST_HEAD(&p->list);
+	p->parent = parent ? parent : &sysctl_table_root.default_set;
+	p->is_seen = is_seen;
+}
+
 #else /* !CONFIG_SYSCTL */
 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
 {
@@ -1848,6 +1863,12 @@ void unregister_sysctl_table(struct ctl_table_header * table)
 {
 }
 
+void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *))
+{
+}
+
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 63ada437fc2..cefbc367d8b 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -29,10 +29,15 @@
 #include <linux/if_tr.h>
 #endif
 
-static struct list_head *
+static struct ctl_table_set *
 net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-	return &namespaces->net_ns->sysctl_table_headers;
+	return &namespaces->net_ns->sysctls;
+}
+
+static int is_seen(struct ctl_table_set *set)
+{
+	return &current->nsproxy->net_ns->sysctls == set;
 }
 
 /* Return standard mode bits for table entry. */
@@ -53,13 +58,6 @@ static struct ctl_table_root net_sysctl_root = {
 	.permissions = net_ctl_permissions,
 };
 
-static LIST_HEAD(net_sysctl_ro_tables);
-static struct list_head *net_ctl_ro_header_lookup(struct ctl_table_root *root,
-		struct nsproxy *namespaces)
-{
-	return &net_sysctl_ro_tables;
-}
-
 static int net_ctl_ro_header_perms(struct ctl_table_root *root,
 		struct nsproxy *namespaces, struct ctl_table *table)
 {
@@ -70,19 +68,18 @@ static int net_ctl_ro_header_perms(struct ctl_table_root *root,
 }
 
 static struct ctl_table_root net_sysctl_ro_root = {
-	.lookup = net_ctl_ro_header_lookup,
 	.permissions = net_ctl_ro_header_perms,
 };
 
 static int sysctl_net_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->sysctl_table_headers);
+	setup_sysctl_set(&net->sysctls, NULL, is_seen);
 	return 0;
 }
 
 static void sysctl_net_exit(struct net *net)
 {
-	WARN_ON(!list_empty(&net->sysctl_table_headers));
+	WARN_ON(!list_empty(&net->sysctls.list));
 	return;
 }
 
@@ -98,6 +95,7 @@ static __init int sysctl_init(void)
 	if (ret)
 		goto out;
 	register_sysctl_root(&net_sysctl_root);
+	setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL);
 	register_sysctl_root(&net_sysctl_ro_root);
 out:
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From bd7b1533cd6a68c734062aa69394bec7e2b1718e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 16:00:59 -0400
Subject: [PATCH] sysctl: make sure that /proc/sys/net/ipv4 appears before
 per-ns ones

Massage ipv4 initialization - make sure that net.ipv4 appears as
non-per-net-namespace before it shows up in per-net-namespace sysctls.
That's the only change outside of sysctl.c needed to get sane ordering
rules and data structures for sysctls (esp. for procfs side of that
mess).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/ip.h           | 2 ++
 net/ipv4/af_inet.c         | 4 ++++
 net/ipv4/sysctl_net_ipv4.c | 7 +++++++
 3 files changed, 13 insertions(+)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index b5862b97520..250e6ef025a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -188,6 +188,8 @@ extern int sysctl_ip_dynaddr;
 
 extern void ipfrag_init(void);
 
+extern void ip_static_sysctl_init(void);
+
 #ifdef CONFIG_INET
 #include <net/dst.h>
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f440a9f5492..354f6b54e49 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1439,6 +1439,10 @@ static int __init inet_init(void)
 
 	(void)sock_register(&inet_family_ops);
 
+#ifdef CONFIG_SYSCTL
+	ip_static_sysctl_init();
+#endif
+
 	/*
 	 *	Add all the base protocols.
 	 */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 14ef202a225..d63e9388d92 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -882,4 +882,11 @@ static __init int sysctl_ipv4_init(void)
 	return 0;
 }
 
+/* set enough of tree skeleton to get rid of ordering problems */
+void __init ip_static_sysctl_init(void)
+{
+	static ctl_table table[1];
+	register_sysctl_paths(net_ipv4_ctl_path, table);
+}
+
 __initcall(sysctl_ipv4_init);
-- 
cgit v1.2.3-70-g09d2


From 516e0cc5646f377ab80fcc2ee639892eccb99853 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 00:39:17 -0400
Subject: [PATCH] f_count may wrap around

make it atomic_long_t; while we are at it, get rid of useless checks in affs,
hfs and hpfs - ->open() always has it equal to 1, ->release() - to 0.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/net/ppp_generic.c |  6 +++---
 fs/affs/file.c            |  4 ----
 fs/aio.c                  |  6 +++---
 fs/file_table.c           | 10 +++++-----
 fs/hfs/inode.c            |  4 ----
 fs/hfsplus/inode.c        |  4 ----
 include/linux/fs.h        |  6 +++---
 include/net/af_unix.h     |  2 +-
 net/sched/sch_atm.c       |  4 ++--
 net/unix/af_unix.c        |  2 +-
 net/unix/garbage.c        | 18 +++++++++---------
 11 files changed, 27 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 739b3ab7bcc..ddccc074a76 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -581,12 +581,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			if (file == ppp->owner)
 				ppp_shutdown_interface(ppp);
 		}
-		if (atomic_read(&file->f_count) <= 2) {
+		if (atomic_long_read(&file->f_count) <= 2) {
 			ppp_release(NULL, file);
 			err = 0;
 		} else
-			printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%d\n",
-			       atomic_read(&file->f_count));
+			printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
+			       atomic_long_read(&file->f_count));
 		unlock_kernel();
 		return err;
 	}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec9..1377b1240b6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
 static int
 affs_file_open(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 1)
-		return 0;
 	pr_debug("AFFS: open(%lu,%d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 	atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
 static int
 affs_file_release(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 0)
-		return 0;
 	pr_debug("AFFS: release(%lu, %d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
diff --git a/fs/aio.c b/fs/aio.c
index 0051fd94b44..f658441d566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
-		req, atomic_read(&req->ki_filp->f_count));
+	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
+		req, atomic_long_read(&req->ki_filp->f_count));
 
 	assert_spin_locked(&ctx->ctx_lock);
 
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c..f45a4493f9e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
 
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
-	atomic_set(&f->f_count, 1);
+	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	f->f_uid = tsk->fsuid;
 	f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count))
+	if (atomic_long_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (file) {
-		if (!atomic_inc_not_zero(&file->f_count)) {
+		if (!atomic_long_inc_not_zero(&file->f_count)) {
 			/* File object ref couldn't be taken */
 			rcu_read_unlock();
 			return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			if (atomic_inc_not_zero(&file->f_count))
+			if (atomic_long_inc_not_zero(&file->f_count))
 				*fput_needed = 1;
 			else
 				/* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 
 void put_filp(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count)) {
+	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index aa73f3fd5dd..7e19835efa2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -522,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
 {
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFS_I(inode)->opencnt);
 	return 0;
 }
@@ -534,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
 
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfs_file_truncate(inode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d4014e3044d..b085d64a2b6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfsplus_file_truncate(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d2de4cadab..7676fa1c20a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -795,7 +795,7 @@ struct file {
 #define f_dentry	f_path.dentry
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
-	atomic_t		f_count;
+	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
 	loff_t			f_pos;
@@ -824,8 +824,8 @@ extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
 
-#define get_file(x)	atomic_inc(&(x)->f_count)
-#define file_count(x)	atomic_read(&(x)->f_count)
+#define get_file(x)	atomic_long_inc(&(x)->f_count)
+#define file_count(x)	atomic_long_read(&(x)->f_count)
 
 #ifdef CONFIG_DEBUG_WRITECOUNT
 static inline void file_take_write(struct file *f)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2dfa96b0575..7dd29b7e461 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -51,7 +51,7 @@ struct unix_sock {
         struct sock		*peer;
         struct sock		*other;
 	struct list_head	link;
-        atomic_t                inflight;
+        atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
         wait_queue_head_t       peer_wait;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 04faa835be1..6b517b9dac5 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -162,7 +162,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(&flow->filter_list);
 	if (flow->sock) {
-		pr_debug("atm_tc_put: f_count %d\n",
+		pr_debug("atm_tc_put: f_count %ld\n",
 			file_count(flow->sock->file));
 		flow->vcc->pop = flow->old_pop;
 		sockfd_put(flow->sock);
@@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	sock = sockfd_lookup(fd, &error);
 	if (!sock)
 		return error;	/* f_count++ */
-	pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
+	pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
 	if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
 		error = -EPROTOTYPE;
 		goto err_out;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 70ceb1604ad..6e7fec74bdb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -603,7 +603,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
 	u->dentry = NULL;
 	u->mnt	  = NULL;
 	spin_lock_init(&u->lock);
-	atomic_set(&u->inflight, 0);
+	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index ebdff3d877a..2a27b84f740 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -127,7 +127,7 @@ void unix_inflight(struct file *fp)
 	if(s) {
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
-		if (atomic_inc_return(&u->inflight) == 1) {
+		if (atomic_long_inc_return(&u->inflight) == 1) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
 		} else {
@@ -145,7 +145,7 @@ void unix_notinflight(struct file *fp)
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
-		if (atomic_dec_and_test(&u->inflight))
+		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
 		unix_tot_inflight--;
 		spin_unlock(&unix_gc_lock);
@@ -237,17 +237,17 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 
 static void dec_inflight(struct unix_sock *usk)
 {
-	atomic_dec(&usk->inflight);
+	atomic_long_dec(&usk->inflight);
 }
 
 static void inc_inflight(struct unix_sock *usk)
 {
-	atomic_inc(&usk->inflight);
+	atomic_long_inc(&usk->inflight);
 }
 
 static void inc_inflight_move_tail(struct unix_sock *u)
 {
-	atomic_inc(&u->inflight);
+	atomic_long_inc(&u->inflight);
 	/*
 	 * If this is still a candidate, move it to the end of the
 	 * list, so that it's checked even if it was already passed
@@ -288,11 +288,11 @@ void unix_gc(void)
 	 * before the detach without atomicity guarantees.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
-		int total_refs;
-		int inflight_refs;
+		long total_refs;
+		long inflight_refs;
 
 		total_refs = file_count(u->sk.sk_socket->file);
-		inflight_refs = atomic_read(&u->inflight);
+		inflight_refs = atomic_long_read(&u->inflight);
 
 		BUG_ON(inflight_refs < 1);
 		BUG_ON(total_refs < inflight_refs);
@@ -324,7 +324,7 @@ void unix_gc(void)
 		/* Move cursor to after the current position. */
 		list_move(&cursor, &u->link);
 
-		if (atomic_read(&u->inflight) > 0) {
+		if (atomic_long_read(&u->inflight) > 0) {
 			list_move_tail(&u->link, &gc_inflight_list);
 			u->gc_candidate = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
-- 
cgit v1.2.3-70-g09d2


From 2c3abab7c95295f319dc8899b74cbd60140fcdfb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 27 Jul 2008 03:59:24 -0700
Subject: ipcomp: Fix warnings after ipcomp consolidation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/ipv4/ipcomp.c: In function ‘ipcomp4_init_state’:
net/ipv4/ipcomp.c:109: warning: unused variable ‘calg_desc’
net/ipv4/ipcomp.c:108: warning: unused variable ‘ipcd’
net/ipv4/ipcomp.c:107: warning: ‘err’ may be used uninitialized in this function
net/ipv6/ipcomp6.c: In function ‘ipcomp6_init_state’:
net/ipv6/ipcomp6.c:139: warning: unused variable ‘calg_desc’
net/ipv6/ipcomp6.c:138: warning: unused variable ‘ipcd’
net/ipv6/ipcomp6.c:137: warning: ‘err’ may be used uninitialized in this function

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c  | 4 +---
 net/ipv6/ipcomp6.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index a42b64d040c..38ccb6dfb02 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -104,9 +104,7 @@ out:
 
 static int ipcomp4_init_state(struct xfrm_state *x)
 {
-	int err;
-	struct ipcomp_data *ipcd;
-	struct xfrm_algo_desc *calg_desc;
+	int err = -EINVAL;
 
 	x->props.header_len = 0;
 	switch (x->props.mode) {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 0cfcea42153..4545e430686 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -134,9 +134,7 @@ out:
 
 static int ipcomp6_init_state(struct xfrm_state *x)
 {
-	int err;
-	struct ipcomp_data *ipcd;
-	struct xfrm_algo_desc *calg_desc;
+	int err = -EINVAL;
 
 	x->props.header_len = 0;
 	switch (x->props.mode) {
-- 
cgit v1.2.3-70-g09d2


From 6f9f489a4eeaa3c8a8618e078a5270d2c4872b67 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 27 Jul 2008 04:40:51 -0700
Subject: net: missing bits of net-namespace / sysctl

Piss-poor sysctl registration API strikes again, film at 11...
What we really need is _pathname_ required to be present in
already registered table, so that kernel could warn about bad
order.  That's the next target for sysctl stuff (and generally
saner and more explicit order of initialization of ipv[46]
internals wouldn't hurt either).

For the time being, here are full fixups required by ..._rotable()
stuff; we make per-net sysctl sets descendents of "ro" one and
make sure that sufficient skeleton is there before we start registering
per-net sysctls.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h         |  2 ++
 include/net/route.h        |  2 --
 net/ipv4/route.c           | 11 ++++++++++-
 net/ipv4/sysctl_net_ipv4.c | 14 --------------
 net/ipv6/af_inet6.c        | 12 ++++++++++++
 net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++
 net/sysctl_net.c           |  4 +++-
 7 files changed, 43 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2d5c18514a2..113028fb8f6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -608,6 +608,8 @@ extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
 extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
+extern int ipv6_static_sysctl_register(void);
+extern void ipv6_static_sysctl_unregister(void);
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/net/route.h b/include/net/route.h
index 3140cc50085..4f0d8c14736 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -204,6 +204,4 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
 	return rt->peer;
 }
 
-extern ctl_table ipv4_route_table[];
-
 #endif	/* _ROUTE_H */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a507c5e27d0..380d6474cf6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2914,7 +2914,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 	return 0;
 }
 
-ctl_table ipv4_route_table[] = {
+static ctl_table ipv4_route_table[] = {
 	{
 		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
 		.procname	= "gc_thresh",
@@ -3216,6 +3216,15 @@ int __init ip_rt_init(void)
 	return rc;
 }
 
+/*
+ * We really need to sanitize the damn ipv4 init order, then all
+ * this nonsense will go away.
+ */
+void __init ip_static_sysctl_init(void)
+{
+	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
+}
+
 EXPORT_SYMBOL(__ip_select_ident);
 EXPORT_SYMBOL(ip_route_input);
 EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d63e9388d92..770d827f5ab 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -401,13 +401,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= &ipv4_local_port_range,
 		.strategy	= &ipv4_sysctl_local_port_range,
 	},
-	{
-		.ctl_name	= NET_IPV4_ROUTE,
-		.procname	= "route",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= ipv4_route_table
-	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.ctl_name	= NET_IPV4_IGMP_MAX_MEMBERSHIPS,
@@ -882,11 +875,4 @@ static __init int sysctl_ipv4_init(void)
 	return 0;
 }
 
-/* set enough of tree skeleton to get rid of ordering problems */
-void __init ip_static_sysctl_init(void)
-{
-	static ctl_table table[1];
-	register_sysctl_paths(net_ipv4_ctl_path, table);
-}
-
 __initcall(sysctl_ipv4_init);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c708ca84229..95055f8c3f3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -934,6 +934,11 @@ static int __init inet6_init(void)
 	if (err)
 		goto out_unregister_sock;
 
+#ifdef CONFIG_SYSCTL
+	err = ipv6_static_sysctl_register();
+	if (err)
+		goto static_sysctl_fail;
+#endif
 	/*
 	 *	ipngwg API draft makes clear that the correct semantics
 	 *	for TCP and UDP is to consider one TCP and UDP instance
@@ -1058,6 +1063,10 @@ ipmr_fail:
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
 register_pernet_fail:
+#ifdef CONFIG_SYSCTL
+	ipv6_static_sysctl_unregister();
+static_sysctl_fail:
+#endif
 	cleanup_ipv6_mibs();
 out_unregister_sock:
 	sock_unregister(PF_INET6);
@@ -1113,6 +1122,9 @@ static void __exit inet6_exit(void)
 	rawv6_exit();
 
 	unregister_pernet_subsys(&inet6_net_ops);
+#ifdef CONFIG_SYSCTL
+	ipv6_static_sysctl_unregister();
+#endif
 	cleanup_ipv6_mibs();
 	proto_unregister(&rawv6_prot);
 	proto_unregister(&udplitev6_prot);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5c99274558b..e6dfaeac6be 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -150,3 +150,19 @@ void ipv6_sysctl_unregister(void)
 	unregister_net_sysctl_table(ip6_header);
 	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 }
+
+static struct ctl_table_header *ip6_base;
+
+int ipv6_static_sysctl_register(void)
+{
+	static struct ctl_table empty[1];
+	ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty);
+	if (ip6_base == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void ipv6_static_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(ip6_base);
+}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index cefbc367d8b..972201cd5fa 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -73,7 +73,9 @@ static struct ctl_table_root net_sysctl_ro_root = {
 
 static int sysctl_net_init(struct net *net)
 {
-	setup_sysctl_set(&net->sysctls, NULL, is_seen);
+	setup_sysctl_set(&net->sysctls,
+			 &net_sysctl_ro_root.default_set,
+			 is_seen);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From eeb61f719c00c626115852bbc91189dc3011a844 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sun, 27 Jul 2008 08:59:33 +0100
Subject: missing bits of net-namespace / sysctl

Piss-poor sysctl registration API strikes again, film at 11...

What we really need is _pathname_ required to be present in already
registered table, so that kernel could warn about bad order.  That's the
next target for sysctl stuff (and generally saner and more explicit
order of initialization of ipv[46] internals wouldn't hurt either).

For the time being, here are full fixups required by ..._rotable()
stuff; we make per-net sysctl sets descendents of "ro" one and make sure
that sufficient skeleton is there before we start registering per-net
sysctls.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/ipv6.h         |  2 ++
 include/net/route.h        |  2 --
 net/ipv4/route.c           | 11 ++++++++++-
 net/ipv4/sysctl_net_ipv4.c | 14 --------------
 net/ipv6/af_inet6.c        | 12 ++++++++++++
 net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++
 net/sysctl_net.c           |  4 +++-
 7 files changed, 43 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2d5c18514a2..113028fb8f6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -608,6 +608,8 @@ extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
 extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
+extern int ipv6_static_sysctl_register(void);
+extern void ipv6_static_sysctl_unregister(void);
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/net/route.h b/include/net/route.h
index 3140cc50085..4f0d8c14736 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -204,6 +204,4 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
 	return rt->peer;
 }
 
-extern ctl_table ipv4_route_table[];
-
 #endif	/* _ROUTE_H */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a507c5e27d0..380d6474cf6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2914,7 +2914,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 	return 0;
 }
 
-ctl_table ipv4_route_table[] = {
+static ctl_table ipv4_route_table[] = {
 	{
 		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
 		.procname	= "gc_thresh",
@@ -3216,6 +3216,15 @@ int __init ip_rt_init(void)
 	return rc;
 }
 
+/*
+ * We really need to sanitize the damn ipv4 init order, then all
+ * this nonsense will go away.
+ */
+void __init ip_static_sysctl_init(void)
+{
+	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
+}
+
 EXPORT_SYMBOL(__ip_select_ident);
 EXPORT_SYMBOL(ip_route_input);
 EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d63e9388d92..770d827f5ab 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -401,13 +401,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= &ipv4_local_port_range,
 		.strategy	= &ipv4_sysctl_local_port_range,
 	},
-	{
-		.ctl_name	= NET_IPV4_ROUTE,
-		.procname	= "route",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= ipv4_route_table
-	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.ctl_name	= NET_IPV4_IGMP_MAX_MEMBERSHIPS,
@@ -882,11 +875,4 @@ static __init int sysctl_ipv4_init(void)
 	return 0;
 }
 
-/* set enough of tree skeleton to get rid of ordering problems */
-void __init ip_static_sysctl_init(void)
-{
-	static ctl_table table[1];
-	register_sysctl_paths(net_ipv4_ctl_path, table);
-}
-
 __initcall(sysctl_ipv4_init);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c708ca84229..95055f8c3f3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -934,6 +934,11 @@ static int __init inet6_init(void)
 	if (err)
 		goto out_unregister_sock;
 
+#ifdef CONFIG_SYSCTL
+	err = ipv6_static_sysctl_register();
+	if (err)
+		goto static_sysctl_fail;
+#endif
 	/*
 	 *	ipngwg API draft makes clear that the correct semantics
 	 *	for TCP and UDP is to consider one TCP and UDP instance
@@ -1058,6 +1063,10 @@ ipmr_fail:
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
 register_pernet_fail:
+#ifdef CONFIG_SYSCTL
+	ipv6_static_sysctl_unregister();
+static_sysctl_fail:
+#endif
 	cleanup_ipv6_mibs();
 out_unregister_sock:
 	sock_unregister(PF_INET6);
@@ -1113,6 +1122,9 @@ static void __exit inet6_exit(void)
 	rawv6_exit();
 
 	unregister_pernet_subsys(&inet6_net_ops);
+#ifdef CONFIG_SYSCTL
+	ipv6_static_sysctl_unregister();
+#endif
 	cleanup_ipv6_mibs();
 	proto_unregister(&rawv6_prot);
 	proto_unregister(&udplitev6_prot);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5c99274558b..e6dfaeac6be 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -150,3 +150,19 @@ void ipv6_sysctl_unregister(void)
 	unregister_net_sysctl_table(ip6_header);
 	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 }
+
+static struct ctl_table_header *ip6_base;
+
+int ipv6_static_sysctl_register(void)
+{
+	static struct ctl_table empty[1];
+	ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty);
+	if (ip6_base == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void ipv6_static_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(ip6_base);
+}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index cefbc367d8b..972201cd5fa 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -73,7 +73,9 @@ static struct ctl_table_root net_sysctl_ro_root = {
 
 static int sysctl_net_init(struct net *net)
 {
-	setup_sysctl_set(&net->sysctls, NULL, is_seen);
+	setup_sysctl_set(&net->sysctls,
+			 &net_sysctl_ro_root.default_set,
+			 is_seen);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2fd9b2212e25e6411b6f309707f4e2683d164250 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:17 -0300
Subject: rfkill: document rfkill_force_state as required (v2)

While the rfkill class does work with just get_state(), it doesn't work
well on devices that are subject to external events that cause rfkill state
changes.

Document that rfkill_force_state() is required in those cases.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt | 20 ++++++++++++++++----
 net/rfkill/rfkill.c      |  7 ++++++-
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 0843ed0163a..28b6ec87c64 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -390,9 +390,10 @@ rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft
 rfkill input line is active.  Only if none of the rfkill input lines are
 active, will it return RFKILL_STATE_UNBLOCKED.
 
-If it doesn't implement the get_state() hook, it must make sure that its calls
-to rfkill_force_state() are enough to keep the status always up-to-date, and it
-must do a rfkill_force_state() on resume from sleep.
+Since the device has a hardware rfkill line, it IS subject to state changes
+external to rfkill.  Therefore, the driver must make sure that it calls
+rfkill_force_state() to keep the status always up-to-date, and it must do a
+rfkill_force_state() on resume from sleep.
 
 Every time the driver gets a notification from the card that one of its rfkill
 lines changed state (polling might be needed on badly designed cards that don't
@@ -422,13 +423,24 @@ of the hardware is unknown), or read-write (where the hardware can be queried
 about its current state).
 
 The rfkill class will call the get_state hook of a device every time it needs
-to know the *real* current state of the hardware.  This can happen often.
+to know the *real* current state of the hardware.  This can happen often, but
+it does not do any polling, so it is not enough on hardware that is subject
+to state changes outside of the rfkill subsystem.
+
+Therefore, calling rfkill_force_state() when a state change happens is
+mandatory when the device has a hardware rfkill line, or when something else
+like the firmware could cause its state to be changed without going through the
+rfkill class.
 
 Some hardware provides events when its status changes.  In these cases, it is
 best for the driver to not provide a get_state hook, and instead register the
 rfkill class *already* with the correct status, and keep it updated using
 rfkill_force_state() when it gets an event from the hardware.
 
+rfkill_force_state() must be used on the device resume handlers to update the
+rfkill status, should there be any chance of the device status changing during
+the sleep.
+
 There is no provision for a statically-allocated rfkill struct.  You must
 use rfkill_allocate() to allocate one.
 
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 7a560b78509..022fe50ab0e 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -252,7 +252,12 @@ EXPORT_SYMBOL_GPL(rfkill_epo);
  * a notification by the firmware/hardware of the current *real*
  * state of the radio rfkill switch.
  *
- * It may not be called from an atomic context.
+ * Devices which are subject to external changes on their rfkill
+ * state (such as those caused by a hardware rfkill line) MUST
+ * have their driver arrange to call rfkill_force_state() as soon
+ * as possible after such a change.
+ *
+ * This function may not be called from an atomic context.
  */
 int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 {
-- 
cgit v1.2.3-70-g09d2


From 37f55e9d78d1b63047b1b7ae175cdce650547ba8 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:18 -0300
Subject: rfkill: fix led-trigger unregister order in error unwind

rfkill needs to unregister the led trigger AFTER a call to
rfkill_remove_switch(), otherwise it will not update the LED state,
possibly leaving it ON when it should be OFF.

To make led-trigger unregistering safer, guard against unregistering a
trigger twice, and also against issuing trigger events to a led trigger
that was unregistered.  This makes the error unwind paths more resilient.

Refer to "rfkill: Register LED triggers before registering switch".

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 022fe50ab0e..fc3a4fd8899 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -590,8 +590,10 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill)
 static void rfkill_led_trigger_unregister(struct rfkill *rfkill)
 {
 #ifdef CONFIG_RFKILL_LEDS
-	if (rfkill->led_trigger.name)
+	if (rfkill->led_trigger.name) {
 		led_trigger_unregister(&rfkill->led_trigger);
+		rfkill->led_trigger.name = NULL;
+	}
 #endif
 }
 
@@ -627,8 +629,8 @@ int rfkill_register(struct rfkill *rfkill)
 
 	error = device_add(dev);
 	if (error) {
-		rfkill_led_trigger_unregister(rfkill);
 		rfkill_remove_switch(rfkill);
+		rfkill_led_trigger_unregister(rfkill);
 		return error;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 064af1117b4aa64a0e52f6b741df7356ef055142 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:20 -0300
Subject: rfkill: mutex fixes

There are two mutexes in rfkill:

rfkill->mutex, which protects some of the fields of a rfkill struct, and is
also used for callback serialization.

rfkill_mutex, which protects the global state, the list of registered
rfkill structs and rfkill->claim.

Make sure to use the correct mutex, and to not miss locking rfkill->mutex
even when we already took rfkill_mutex.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index fc3a4fd8899..ac205eceaf4 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -150,7 +150,7 @@ static void update_rfkill_state(struct rfkill *rfkill)
  * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to
  * give the driver a hint that it should double-BLOCK the transmitter.
  *
- * Caller must have aquired rfkill_mutex.
+ * Caller must have acquired rfkill->mutex.
  */
 static int rfkill_toggle_radio(struct rfkill *rfkill,
 				enum rfkill_state state,
@@ -216,8 +216,11 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 	rfkill_states[type] = state;
 
 	list_for_each_entry(rfkill, &rfkill_list, node) {
-		if ((!rfkill->user_claim) && (rfkill->type == type))
+		if ((!rfkill->user_claim) && (rfkill->type == type)) {
+			mutex_lock(&rfkill->mutex);
 			rfkill_toggle_radio(rfkill, state, 0);
+			mutex_unlock(&rfkill->mutex);
+		}
 	}
 
 	mutex_unlock(&rfkill_mutex);
@@ -228,7 +231,7 @@ EXPORT_SYMBOL(rfkill_switch_all);
  * rfkill_epo - emergency power off all transmitters
  *
  * This kicks all rfkill devices to RFKILL_STATE_SOFT_BLOCKED, ignoring
- * everything in its path but rfkill_mutex.
+ * everything in its path but rfkill_mutex and rfkill->mutex.
  */
 void rfkill_epo(void)
 {
@@ -236,7 +239,9 @@ void rfkill_epo(void)
 
 	mutex_lock(&rfkill_mutex);
 	list_for_each_entry(rfkill, &rfkill_list, node) {
+		mutex_lock(&rfkill->mutex);
 		rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
+		mutex_unlock(&rfkill->mutex);
 	}
 	mutex_unlock(&rfkill_mutex);
 }
@@ -372,6 +377,9 @@ static ssize_t rfkill_claim_store(struct device *dev,
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	if (rfkill->user_claim_unsupported)
+		return -EOPNOTSUPP;
+
 	/*
 	 * Take the global lock to make sure the kernel is not in
 	 * the middle of rfkill_switch_all
@@ -380,19 +388,17 @@ static ssize_t rfkill_claim_store(struct device *dev,
 	if (error)
 		return error;
 
-	if (rfkill->user_claim_unsupported) {
-		error = -EOPNOTSUPP;
-		goto out_unlock;
-	}
 	if (rfkill->user_claim != claim) {
-		if (!claim)
+		if (!claim) {
+			mutex_lock(&rfkill->mutex);
 			rfkill_toggle_radio(rfkill,
 					    rfkill_states[rfkill->type],
 					    0);
+			mutex_unlock(&rfkill->mutex);
+		}
 		rfkill->user_claim = claim;
 	}
 
-out_unlock:
 	mutex_unlock(&rfkill_mutex);
 
 	return error ? error : count;
@@ -521,8 +527,11 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
 {
 	mutex_lock(&rfkill_mutex);
 	list_del_init(&rfkill->node);
-	rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
 	mutex_unlock(&rfkill_mutex);
+
+	mutex_lock(&rfkill->mutex);
+	rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
+	mutex_unlock(&rfkill->mutex);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 435307a365ceedc4f4e1813e405f583f434d98e4 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:22 -0300
Subject: rfkill: yet more minor kernel-doc fixes

For some stupid reason, I sent and old version of the patch minor kernel
doc-fix patch, and it got merged before I noticed the problem. This is an
incremental fix on top.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index ac205eceaf4..c6f2f388cb7 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -130,7 +130,6 @@ static void update_rfkill_state(struct rfkill *rfkill)
 
 /**
  * rfkill_toggle_radio - wrapper for toggle_radio hook
- *
  * @rfkill: the rfkill struct to use
  * @force: calls toggle_radio even if cache says it is not needed,
  *	and also makes sure notifications of the state will be
@@ -141,8 +140,8 @@ static void update_rfkill_state(struct rfkill *rfkill)
  * calls and handling all the red tape such as issuing notifications
  * if the call is successful.
  *
- * Note that @force cannot override a (possibly cached) state of
- * RFKILL_STATE_HARD_BLOCKED.  Any device making use of
+ * Note that the @force parameter cannot override a (possibly cached)
+ * state of RFKILL_STATE_HARD_BLOCKED.  Any device making use of
  * RFKILL_STATE_HARD_BLOCKED implements either get_state() or
  * rfkill_force_state(), so the cache either is bypassed or valid.
  *
@@ -200,12 +199,12 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 
 /**
  * rfkill_switch_all - Toggle state of all switches of given type
- * @type: type of interfaces to be affeceted
+ * @type: type of interfaces to be affected
  * @state: the new state
  *
- * This function toggles state of all switches of given type unless
- * a specific switch is claimed by userspace in which case it is
- * left alone.
+ * This function toggles the state of all switches of given type,
+ * unless a specific switch is claimed by userspace (in which case,
+ * that switch is left alone).
  */
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 {
@@ -540,9 +539,10 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
  * @type: type of the switch (RFKILL_TYPE_*)
  *
  * This function should be called by the network driver when it needs
- * rfkill structure. Once the structure is allocated the driver shoud
- * finish its initialization by setting name, private data, enable_radio
+ * rfkill structure.  Once the structure is allocated the driver should
+ * finish its initialization by setting the name, private data, enable_radio
  * and disable_radio methods and then register it with rfkill_register().
+ *
  * NOTE: If registration fails the structure shoudl be freed by calling
  * rfkill_free() otherwise rfkill_unregister() should be used.
  */
@@ -574,7 +574,7 @@ EXPORT_SYMBOL(rfkill_allocate);
  * rfkill_free - Mark rfkill structure for deletion
  * @rfkill: rfkill structure to be destroyed
  *
- * Decrements reference count of rfkill structure so it is destroyed.
+ * Decrements reference count of the rfkill structure so it is destroyed.
  * Note that rfkill_free() should _not_ be called after rfkill_unregister().
  */
 void rfkill_free(struct rfkill *rfkill)
-- 
cgit v1.2.3-70-g09d2


From 1b0241656b658522a15e7aad570cb8ea6b255a2a Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 14 Jul 2008 12:43:23 +0200
Subject: mac80211: tx, use dev_kfree_skb_any for beacon_get

Use dev_kfree_skb_any(); instead of dev_kfree_skb();, since
ieee80211_beacon_get function might be called from atomic.
(It's in a fail path.)

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Michael Wu <flamingice@sourmilk.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0fbadd8b983..7e99d161519 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1931,7 +1931,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			       "no rate found\n",
 			       wiphy_name(local->hw.wiphy));
 		}
-		dev_kfree_skb(skb);
+		dev_kfree_skb_any(skb);
 		skb = NULL;
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From 023a04bebe7030c1e6d5347bd3f27a3e49a1f222 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 14 Jul 2008 12:52:08 -0700
Subject: mac80211: return correct error return from ieee80211_wep_init

Return the proper error code rather than a hard-coded ENOMEM from
ieee80211_wep_init.  Also, print the error code on failure.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 4 ++--
 net/mac80211/wep.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f1a83d450ea..b5830f7055c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1731,8 +1731,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	result = ieee80211_wep_init(local);
 
 	if (result < 0) {
-		printk(KERN_DEBUG "%s: Failed to initialize wep\n",
-		       wiphy_name(local->hw.wiphy));
+		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
+		       wiphy_name(local->hw.wiphy), result);
 		goto fail_wep;
 	}
 
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 872d2fcd1a5..5c2bf0a3d4d 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -31,13 +31,13 @@ int ieee80211_wep_init(struct ieee80211_local *local)
 	local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
 	if (IS_ERR(local->wep_tx_tfm))
-		return -ENOMEM;
+		return PTR_ERR(local->wep_tx_tfm);
 
 	local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
 	if (IS_ERR(local->wep_rx_tfm)) {
 		crypto_free_blkcipher(local->wep_tx_tfm);
-		return -ENOMEM;
+		return PTR_ERR(local->wep_rx_tfm);
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 5422399518e8142198df888aab00acdac251f754 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Thu, 24 Jul 2008 10:40:37 +0300
Subject: mac80211: append CONFIG_ to MAC80211_VERBOSE_PS_DEBUG in
 net/mac80211/tx.c.

In net/mac80211/tx.c, there are some #ifdef which checks
MAC80211_VERBOSE_PS_DEBUG
(which in fact is never set) instead of
CONFIG_MAC80211_VERBOSE_PS_DEBUG, as should be.

This patch replaces MAC80211_VERBOSE_PS_DEBUG with
CONFIG_MAC80211_VERBOSE_PS_DEBUG in these #ifdef commands in
net/mac80211/tx.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7e99d161519..c5f78059c6c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -305,7 +305,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 	rcu_read_unlock();
 
 	local->total_ps_buffered = total;
-#ifdef MAC80211_VERBOSE_PS_DEBUG
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n",
 	       wiphy_name(local->hw.wiphy), purged);
 #endif
@@ -342,7 +342,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 			purge_old_ps_buffers(tx->local);
 		if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
 		    AP_MAX_BC_BUFFER) {
-#ifdef MAC80211_VERBOSE_PS_DEBUG
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: BC TX buffer full - "
 				       "dropping the oldest frame\n",
@@ -389,7 +389,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 			purge_old_ps_buffers(tx->local);
 		if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) {
 			struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf);
-#ifdef MAC80211_VERBOSE_PS_DEBUG
+#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: STA %s TX "
 				       "buffer full - dropping oldest frame\n",
-- 
cgit v1.2.3-70-g09d2


From d0f09804144fd9471a13cf4d80e66842c7fa114f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 29 Jul 2008 11:32:07 +0200
Subject: mac80211: partially fix skb->cb use

This patch fixes mac80211 to not use the skb->cb over the queue step
from virtual interfaces to the master. The patch also, for now,
disables aggregation because that would still require requeuing,
will fix that in a separate patch. There are two other places (software
requeue and powersaving stations) where requeue can happen, but that is
not currently used by any drivers/not possible to use respectively.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/xmit.c             |  2 +-
 drivers/net/wireless/b43legacy/xmit.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  2 +-
 include/linux/skbuff.h                      |  5 ++-
 include/net/mac80211.h                      |  6 ----
 net/core/skbuff.c                           |  3 ++
 net/mac80211/main.c                         |  8 +----
 net/mac80211/mlme.c                         |  8 ++---
 net/mac80211/tx.c                           | 47 +++++++++++++----------------
 net/mac80211/wme.c                          |  3 ++
 13 files changed, 40 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 1106d1c0629..ff3fad794b6 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1237,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 
 	pktlen = skb->len;
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) {
+	if (info->control.hw_key) {
 		keyidx = info->control.hw_key->hw_key_idx;
 		pktlen += info->control.icv_len;
 	}
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 8d54502222a..9dda8169f7c 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	const struct b43_phy *phy = &dev->phy;
 	const struct ieee80211_hdr *wlhdr =
 	    (const struct ieee80211_hdr *)fragment_data;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	__le16 fctl = wlhdr->frame_control;
 	struct ieee80211_rate *fbrate;
 	u8 rate, rate_fb;
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index e969ed8d412..68e1f8c7872 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 			       u16 cookie)
 {
 	const struct ieee80211_hdr *wlhdr;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	u16 fctl;
 	u8 rate;
 	struct ieee80211_rate *rate_fb;
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9b50b1052b0..f72cd0bf6aa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 05121f395c4..7c82ecfa30a 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 042ab00d8bd..c3ee4ecba79 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	 */
 	memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
 	rts_info = IEEE80211_SKB_CB(skb);
-	rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	rts_info->control.hw_key = NULL;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
 	rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ea44f6621f..a640385e059 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -316,7 +316,10 @@ struct sk_buff {
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-	/* 14 bit hole */
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	__u8			do_not_encrypt:1;
+#endif
+	/* 0/13/14 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 74487f26823..b52721008be 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -206,8 +206,6 @@ struct ieee80211_bss_conf {
  * These flags are used with the @flags member of &ieee80211_tx_info.
  *
  * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
- * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption;
- *	e.g., for EAPOL frame
  * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame
  * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
  *	for combined 802.11g / 802.11b networks)
@@ -220,7 +218,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD
  * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
  *	through set_retry_limit configured long retry value
- * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211
  * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
  * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
  * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
@@ -253,7 +250,6 @@ struct ieee80211_bss_conf {
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
-	IEEE80211_TX_CTL_DO_NOT_ENCRYPT		= BIT(1),
 	IEEE80211_TX_CTL_USE_RTS_CTS		= BIT(2),
 	IEEE80211_TX_CTL_USE_CTS_PROTECT	= BIT(3),
 	IEEE80211_TX_CTL_NO_ACK			= BIT(4),
@@ -263,7 +259,6 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(8),
 	IEEE80211_TX_CTL_SHORT_PREAMBLE		= BIT(9),
 	IEEE80211_TX_CTL_LONG_RETRY_LIMIT	= BIT(10),
-	IEEE80211_TX_CTL_EAPOL_FRAME		= BIT(11),
 	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(12),
 	IEEE80211_TX_CTL_AMPDU			= BIT(13),
 	IEEE80211_TX_CTL_OFDM_HT		= BIT(14),
@@ -323,7 +318,6 @@ struct ieee80211_tx_info {
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			unsigned long jiffies;
-			int ifindex;
 			u16 aid;
 			s8 rts_cts_rate_idx, alt_retry_rate_idx;
 			u8 retry_limit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e0c9227418..84640172d65 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -485,6 +485,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head);
 	C(data);
 	C(truesize);
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	C(do_not_encrypt);
+#endif
 	atomic_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b5830f7055c..a4c5b90de76 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1233,18 +1233,12 @@ static void ieee80211_tasklet_handler(unsigned long data)
 /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to
  * make a prepared TX frame (one that has been given to hw) to look like brand
  * new IEEE 802.11 frame that is ready to go through TX processing again.
- * Also, tx_packet_data in cb is restored from tx_control. */
+ */
 static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 				      struct ieee80211_key *key,
 				      struct sk_buff *skb)
 {
 	int hdrlen, iv_len, mic_len;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	info->flags &=	IEEE80211_TX_CTL_REQ_TX_STATUS |
-			IEEE80211_TX_CTL_DO_NOT_ENCRYPT |
-			IEEE80211_TX_CTL_REQUEUE |
-			IEEE80211_TX_CTL_EAPOL_FRAME;
 
 	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d7c371e36bf..35eb767cbcb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -606,7 +606,6 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 		      int encrypt)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_info *info;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	skb->dev = sdata->local->mdev;
@@ -614,11 +613,8 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 	skb_set_network_header(skb, 0);
 	skb_set_transport_header(skb, 0);
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(struct ieee80211_tx_info));
-	info->control.ifindex = sdata->dev->ifindex;
-	if (!encrypt)
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->iif = sdata->dev->ifindex;
+	skb->do_not_encrypt = !encrypt;
 
 	dev_queue_xmit(skb);
 }
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c5f78059c6c..69019e94387 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,14 +439,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u16 fc = tx->fc;
 
-	if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (unlikely(tx->skb->do_not_encrypt))
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) &&
+		 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) &&
 		 !(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TX_DROP;
@@ -476,7 +476,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	}
 
 	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+		tx->skb->do_not_encrypt = 1;
 
 	return TX_CONTINUE;
 }
@@ -732,6 +732,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 		memcpy(skb_put(frag, copylen), pos, copylen);
 		memcpy(frag->cb, first->cb, sizeof(frag->cb));
 		skb_copy_queue_mapping(frag, first);
+		frag->do_not_encrypt = first->do_not_encrypt;
 
 		pos += copylen;
 		left -= copylen;
@@ -852,7 +853,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->do_not_encrypt = 1;
 	info->flags |= IEEE80211_TX_CTL_INJECTED;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
@@ -925,8 +926,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				skb_trim(skb, skb->len - FCS_LEN);
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
-				info->flags &=
-					~IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+				tx->skb->do_not_encrypt = 0;
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
@@ -1042,10 +1042,9 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 				struct sk_buff *skb,
 				struct net_device *mdev)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct net_device *dev;
 
-	dev = dev_get_by_index(&init_net, info->control.ifindex);
+	dev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
 		dev_put(dev);
 		dev = NULL;
@@ -1306,8 +1305,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	bool may_encrypt;
 	int ret;
 
-	if (info->control.ifindex)
-		odev = dev_get_by_index(&init_net, info->control.ifindex);
+	if (skb->iif)
+		odev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
 		dev_put(odev);
 		odev = NULL;
@@ -1321,9 +1320,13 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		return 0;
 	}
 
+	memset(info, 0, sizeof(*info));
+
+	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
-	may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT);
+	may_encrypt = !skb->do_not_encrypt;
 
 	headroom = osdata->local->tx_headroom;
 	if (may_encrypt)
@@ -1348,7 +1351,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_radiotap_header *prthdr =
 		(struct ieee80211_radiotap_header *)skb->data;
 	u16 len_rthdr;
@@ -1371,11 +1373,11 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	skb->dev = local->mdev;
 
 	/* needed because we set skb device to master */
-	info->control.ifindex = dev->ifindex;
+	skb->iif = dev->ifindex;
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	/* sometimes we do encrypt injected frames, will be fixed
+	 * up in radiotap parser if not wanted */
+	skb->do_not_encrypt = 0;
 
 	/*
 	 * fix up the pointers accounting for the radiotap
@@ -1419,7 +1421,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	int ret = 1, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
@@ -1645,14 +1646,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	nh_pos += hdrlen;
 	h_pos += hdrlen;
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(*info));
-	info->control.ifindex = dev->ifindex;
-	if (ethertype == ETH_P_PAE)
-		info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME;
-
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	skb->iif = dev->ifindex;
 
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
@@ -1922,6 +1916,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 	info = IEEE80211_SKB_CB(skb);
 
+	skb->do_not_encrypt = 1;
+
 	info->band = band;
 	rate_control_get_rate(local->mdev, sband, skb, &rsel);
 
@@ -1940,7 +1936,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	info->tx_rate_idx = rsel.rate_idx;
 
 	info->flags |= IEEE80211_TX_CTL_NO_ACK;
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 	if (sdata->bss_conf.use_short_preamble &&
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 07edda0b8a5..28437f0001d 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -188,6 +188,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 {
 	int i;
 
+	/* XXX: currently broken due to cb/requeue use */
+	return -EPERM;
+
 	/* prepare the filter and save it for the SW queue
 	 * matching the received HW queue */
 
-- 
cgit v1.2.3-70-g09d2


From bba95fefb8e31f4799652666d05a4a9aad56e492 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 29 Jul 2008 13:22:51 +0200
Subject: nl80211: fix dump callbacks

Julius Volz pointed out that the dump callbacks in nl80211 were
broken and fixed one of them. This patch fixes the other three
and also addresses the TODOs there.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Julius Volz <juliusv@google.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 275 +++++++++++++++++++++++++++----------------------
 1 file changed, 153 insertions(+), 122 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b7fefffd2d0..59eb2cf42e5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -29,16 +29,16 @@ static struct genl_family nl80211_fam = {
 };
 
 /* internal helper: get drv and dev */
-static int get_drv_dev_by_info_ifindex(struct genl_info *info,
+static int get_drv_dev_by_info_ifindex(struct nlattr **attrs,
 				       struct cfg80211_registered_device **drv,
 				       struct net_device **dev)
 {
 	int ifindex;
 
-	if (!info->attrs[NL80211_ATTR_IFINDEX])
+	if (!attrs[NL80211_ATTR_IFINDEX])
 		return -EINVAL;
 
-	ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
+	ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]);
 	*dev = dev_get_by_index(&init_net, ifindex);
 	if (!*dev)
 		return -ENODEV;
@@ -291,21 +291,31 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 
 	mutex_lock(&cfg80211_drv_mutex);
 	list_for_each_entry(dev, &cfg80211_drv_list, list) {
-		if (++wp_idx < wp_start)
+		if (wp_idx < wp_start) {
+			wp_idx++;
 			continue;
+		}
 		if_idx = 0;
 
 		mutex_lock(&dev->devlist_mtx);
 		list_for_each_entry(wdev, &dev->netdev_list, list) {
-			if (++if_idx < if_start)
+			if (if_idx < if_start) {
+				if_idx++;
 				continue;
+			}
 			if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid,
 					       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					       wdev->netdev) < 0)
-				break;
+					       wdev->netdev) < 0) {
+				mutex_unlock(&dev->devlist_mtx);
+				goto out;
+			}
+			if_idx++;
 		}
 		mutex_unlock(&dev->devlist_mtx);
+
+		wp_idx++;
 	}
+ out:
 	mutex_unlock(&cfg80211_drv_mutex);
 
 	cb->args[0] = wp_idx;
@@ -321,7 +331,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *netdev;
 	int err;
 
-	err = get_drv_dev_by_info_ifindex(info, &dev, &netdev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &dev, &netdev);
 	if (err)
 		return err;
 
@@ -392,7 +402,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	} else
 		return -EINVAL;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 	ifindex = dev->ifindex;
@@ -477,7 +487,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 	int ifindex, err;
 	struct net_device *dev;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 	ifindex = dev->ifindex;
@@ -545,7 +555,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -618,7 +628,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NL80211_ATTR_KEY_DEFAULT])
 		return -EINVAL;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -699,7 +709,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -735,7 +745,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -764,7 +774,7 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
 	struct beacon_parameters params;
 	int haveinfo = 0;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -843,7 +853,7 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	struct net_device *dev;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -937,67 +947,78 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 }
 
 static int nl80211_dump_station(struct sk_buff *skb,
-		struct netlink_callback *cb)
+				struct netlink_callback *cb)
 {
-	int wp_idx = 0;
-	int if_idx = 0;
-	int sta_idx = cb->args[2];
-	int wp_start = cb->args[0];
-	int if_start = cb->args[1];
 	struct station_info sinfo;
 	struct cfg80211_registered_device *dev;
-	struct wireless_dev *wdev;
+	struct net_device *netdev;
 	u8 mac_addr[ETH_ALEN];
+	int ifidx = cb->args[0];
+	int sta_idx = cb->args[1];
 	int err;
-	int exit = 0;
 
-	/* TODO: filter by device */
-	mutex_lock(&cfg80211_drv_mutex);
-	list_for_each_entry(dev, &cfg80211_drv_list, list) {
-		if (exit)
+	if (!ifidx) {
+		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+				  nl80211_fam.attrbuf, nl80211_fam.maxattr,
+				  nl80211_policy);
+		if (err)
+			return err;
+
+		if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
+			return -EINVAL;
+
+		ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
+		if (!ifidx)
+			return -EINVAL;
+	}
+
+	netdev = dev_get_by_index(&init_net, ifidx);
+	if (!netdev)
+		return -ENODEV;
+
+	dev = cfg80211_get_dev_from_ifindex(ifidx);
+	if (IS_ERR(dev)) {
+		err = PTR_ERR(dev);
+		goto out_put_netdev;
+	}
+
+	if (!dev->ops->dump_station) {
+		err = -ENOSYS;
+		goto out_err;
+	}
+
+	rtnl_lock();
+
+	while (1) {
+		err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx,
+					     mac_addr, &sinfo);
+		if (err == -ENOENT)
 			break;
-		if (++wp_idx < wp_start)
-			continue;
-		if_idx = 0;
+		if (err)
+			goto out_err_rtnl;
 
-		mutex_lock(&dev->devlist_mtx);
-		list_for_each_entry(wdev, &dev->netdev_list, list) {
-			if (exit)
-				break;
-			if (++if_idx < if_start)
-				continue;
-			if (!dev->ops->dump_station)
-				continue;
+		if (nl80211_send_station(skb,
+				NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				netdev, mac_addr,
+				&sinfo) < 0)
+			goto out;
 
-			for (;; ++sta_idx) {
-				rtnl_lock();
-				err = dev->ops->dump_station(&dev->wiphy,
-						wdev->netdev, sta_idx, mac_addr,
-						&sinfo);
-				rtnl_unlock();
-				if (err) {
-					sta_idx = 0;
-					break;
-				}
-				if (nl80211_send_station(skb,
-						NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq, NLM_F_MULTI,
-						wdev->netdev, mac_addr,
-						&sinfo) < 0) {
-					exit = 1;
-					break;
-				}
-			}
-		}
-		mutex_unlock(&dev->devlist_mtx);
+		sta_idx++;
 	}
-	mutex_unlock(&cfg80211_drv_mutex);
 
-	cb->args[0] = wp_idx;
-	cb->args[1] = if_idx;
-	cb->args[2] = sta_idx;
 
-	return skb->len;
+ out:
+	cb->args[1] = sta_idx;
+	err = skb->len;
+ out_err_rtnl:
+	rtnl_unlock();
+ out_err:
+	cfg80211_put_dev(dev);
+ out_put_netdev:
+	dev_put(netdev);
+
+	return err;
 }
 
 static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
@@ -1016,7 +1037,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 
 	mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1112,7 +1133,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1172,7 +1193,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 				&params.station_flags))
 		return -EINVAL;
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1207,7 +1228,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1277,68 +1298,78 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 }
 
 static int nl80211_dump_mpath(struct sk_buff *skb,
-		struct netlink_callback *cb)
+			      struct netlink_callback *cb)
 {
-	int wp_idx = 0;
-	int if_idx = 0;
-	int sta_idx = cb->args[2];
-	int wp_start = cb->args[0];
-	int if_start = cb->args[1];
 	struct mpath_info pinfo;
 	struct cfg80211_registered_device *dev;
-	struct wireless_dev *wdev;
+	struct net_device *netdev;
 	u8 dst[ETH_ALEN];
 	u8 next_hop[ETH_ALEN];
+	int ifidx = cb->args[0];
+	int path_idx = cb->args[1];
 	int err;
-	int exit = 0;
 
-	/* TODO: filter by device */
-	mutex_lock(&cfg80211_drv_mutex);
-	list_for_each_entry(dev, &cfg80211_drv_list, list) {
-		if (exit)
+	if (!ifidx) {
+		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+				  nl80211_fam.attrbuf, nl80211_fam.maxattr,
+				  nl80211_policy);
+		if (err)
+			return err;
+
+		if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
+			return -EINVAL;
+
+		ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
+		if (!ifidx)
+			return -EINVAL;
+	}
+
+	netdev = dev_get_by_index(&init_net, ifidx);
+	if (!netdev)
+		return -ENODEV;
+
+	dev = cfg80211_get_dev_from_ifindex(ifidx);
+	if (IS_ERR(dev)) {
+		err = PTR_ERR(dev);
+		goto out_put_netdev;
+	}
+
+	if (!dev->ops->dump_mpath) {
+		err = -ENOSYS;
+		goto out_err;
+	}
+
+	rtnl_lock();
+
+	while (1) {
+		err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx,
+					   dst, next_hop, &pinfo);
+		if (err == -ENOENT)
 			break;
-		if (++wp_idx < wp_start)
-			continue;
-		if_idx = 0;
+		if (err)
+			goto out_err_rtnl;
 
-		mutex_lock(&dev->devlist_mtx);
-		list_for_each_entry(wdev, &dev->netdev_list, list) {
-			if (exit)
-				break;
-			if (++if_idx < if_start)
-				continue;
-			if (!dev->ops->dump_mpath)
-				continue;
+		if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				       netdev, dst, next_hop,
+				       &pinfo) < 0)
+			goto out;
 
-			for (;; ++sta_idx) {
-				rtnl_lock();
-				err = dev->ops->dump_mpath(&dev->wiphy,
-						wdev->netdev, sta_idx, dst,
-						next_hop, &pinfo);
-				rtnl_unlock();
-				if (err) {
-					sta_idx = 0;
-					break;
-				}
-				if (nl80211_send_mpath(skb,
-						NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq, NLM_F_MULTI,
-						wdev->netdev, dst, next_hop,
-						&pinfo) < 0) {
-					exit = 1;
-					break;
-				}
-			}
-		}
-		mutex_unlock(&dev->devlist_mtx);
+		path_idx++;
 	}
-	mutex_unlock(&cfg80211_drv_mutex);
 
-	cb->args[0] = wp_idx;
-	cb->args[1] = if_idx;
-	cb->args[2] = sta_idx;
 
-	return skb->len;
+ out:
+	cb->args[1] = path_idx;
+	err = skb->len;
+ out_err_rtnl:
+	rtnl_unlock();
+ out_err:
+	cfg80211_put_dev(dev);
+ out_put_netdev:
+	dev_put(netdev);
+
+	return err;
 }
 
 static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
@@ -1358,7 +1389,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
 
 	dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1411,7 +1442,7 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
 	dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
 	next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1446,7 +1477,7 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
 	dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
 	next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
@@ -1475,7 +1506,7 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-70-g09d2


From 14db74bcc3f7a779cf395a47e26b06a28207571a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 29 Jul 2008 13:22:52 +0200
Subject: mac80211: fix cfg80211 hooks for master interface

The master interface is a virtual interface that is registered
to mac80211, changing that does not seem like a good idea at
the moment. However, since it has no sdata, we cannot accept
any configuration for it. This patch makes the cfg80211 hooks
reject any such attempt.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 103 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8e7ba0e62cf..297c257864c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -81,6 +81,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 				  enum nl80211_iftype type, u32 *flags,
 				  struct vif_params *params)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
 	enum ieee80211_if_types itype;
 	struct ieee80211_sub_if_data *sdata;
@@ -95,6 +96,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	if (itype == IEEE80211_IF_TYPE_INVALID)
 		return -EINVAL;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	ret = ieee80211_if_change_type(sdata, itype);
@@ -117,12 +121,16 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 			     u8 key_idx, u8 *mac_addr,
 			     struct key_params *params)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta = NULL;
 	enum ieee80211_key_alg alg;
 	struct ieee80211_key *key;
 	int err;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	switch (params->cipher) {
@@ -167,10 +175,14 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 			     u8 key_idx, u8 *mac_addr)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	int ret;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
@@ -211,7 +223,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 			     void (*callback)(void *cookie,
 					      struct key_params *params))
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta = NULL;
 	u8 seq[6] = {0};
 	struct key_params params;
@@ -220,6 +233,11 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	u16 iv16;
 	int err = -ENOENT;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	rcu_read_lock();
 
 	if (mac_addr) {
@@ -293,8 +311,12 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
 					u8 key_idx)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	rcu_read_lock();
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -475,9 +497,15 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
 				struct beacon_parameters *params)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
 		return -EINVAL;
 
@@ -492,9 +520,15 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
 				struct beacon_parameters *params)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
 		return -EINVAL;
 
@@ -508,9 +542,15 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
 
 static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
 		return -EINVAL;
 
@@ -646,11 +686,14 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac, struct station_parameters *params)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata;
 	int err;
 
+	if (dev == local->mdev || params->vlan == local->mdev)
+		return -EOPNOTSUPP;
+
 	/* Prevent a race with changing the rate control algorithm */
 	if (!netif_running(dev))
 		return -ENETDOWN;
@@ -701,10 +744,15 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (mac) {
 		rcu_read_lock();
 
@@ -730,10 +778,13 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 				    u8 *mac,
 				    struct station_parameters *params)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *vlansdata;
 
+	if (dev == local->mdev || params->vlan == local->mdev)
+		return -EOPNOTSUPP;
+
 	rcu_read_lock();
 
 	/* XXX: get sta belonging to dev */
@@ -752,7 +803,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 			return -EINVAL;
 		}
 
-		sta->sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
+		sta->sdata = vlansdata;
 		ieee80211_send_layer2_update(sta);
 	}
 
@@ -767,15 +818,20 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *dst, u8 *next_hop)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 	int err;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
 		return -ENOTSUPP;
 
@@ -817,14 +873,19 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 				    struct net_device *dev,
 				    u8 *dst, u8 *next_hop)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
 		return -ENOTSUPP;
 
@@ -891,9 +952,15 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 			       u8 *dst, u8 *next_hop, struct mpath_info *pinfo)
 
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
 		return -ENOTSUPP;
 
@@ -913,9 +980,15 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 				 int idx, u8 *dst, u8 *next_hop,
 				 struct mpath_info *pinfo)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
 		return -ENOTSUPP;
 
-- 
cgit v1.2.3-70-g09d2


From 56a6d13dfd49d90d72a1a962246206719dd9d143 Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Tue, 29 Jul 2008 19:59:31 +0200
Subject: mac80211: fix mesh beaconing

This patch fixes mesh beaconing, which was broken by "mac80211: revamp
beacon configuration".

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 35eb767cbcb..acb04133a95 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3299,6 +3299,7 @@ void ieee80211_start_mesh(struct net_device *dev)
 	ifsta = &sdata->u.sta;
 	ifsta->state = IEEE80211_MESH_UP;
 	ieee80211_sta_timer((unsigned long)sdata);
+	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 4a36702e016947a0ce6c0c024673bb5b16d3f618 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 29 Jul 2008 23:57:58 -0700
Subject: IPv6: datagram_send_ctl() should exit immediately when an error
 occured

When an error occured, datagram_send_ctl() should exit immediately rather than
continue to run the for loop. Otherwise, the variable err might be changed and
the error might be hidden.

Fix this bug by using "goto" instead of "break".

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f7b535dec86..410046a8cc9 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -732,7 +732,7 @@ int datagram_send_ctl(struct net *net,
 			LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
 				       cmsg->cmsg_type);
 			err = -EINVAL;
-			break;
+			goto exit_f;
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 8d50b53d66a8a6ae41bafbdcabe401467803f33a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Jul 2008 02:37:46 -0700
Subject: pkt_sched: Fix OOPS on ingress qdisc add.

Bug report from Steven Jan Springl:

	Issuing the following command causes a kernel oops:
		tc qdisc add dev eth0 handle ffff: ingress

The problem mostly stems from all of the special case handling of
ingress qdiscs.

So, to fix this, do the grafting operation the same way we do for TX
qdiscs.  Which means that dev_activate() and dev_deactivate() now do
the "qdisc_sleeping <--> qdisc" transitions on dev->rx_queue too.

Future simplifications are possible now, mainly because it is
impossible for dev_queue->{qdisc,qdisc_sleeping} to be NULL.  There
are NULL checks all over to handle the ingress qdisc special case
that used to exist before this commit.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          |  4 ++--
 net/sched/sch_api.c     | 57 ++++++++++++++-----------------------------------
 net/sched/sch_generic.c |  8 ++++---
 3 files changed, 23 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8d13a9b9f1d..63d6bcddbf4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2100,7 +2100,7 @@ static int ing_filter(struct sk_buff *skb)
 	rxq = &dev->rx_queue;
 
 	q = rxq->qdisc;
-	if (q) {
+	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
@@ -2113,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (!skb->dev->rx_queue.qdisc)
+	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b0601642e22..4840aff4725 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -572,44 +572,21 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 				     struct Qdisc *qdisc)
 {
+	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
 	spinlock_t *root_lock;
-	struct Qdisc *oqdisc;
-	int ingress;
-
-	ingress = 0;
-	if (qdisc && qdisc->flags&TCQ_F_INGRESS)
-		ingress = 1;
-
-	if (ingress) {
-		oqdisc = dev_queue->qdisc;
-	} else {
-		oqdisc = dev_queue->qdisc_sleeping;
-	}
 
 	root_lock = qdisc_root_lock(oqdisc);
 	spin_lock_bh(root_lock);
 
-	if (ingress) {
-		/* Prune old scheduler */
-		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
-			/* delete */
-			qdisc_reset(oqdisc);
-			dev_queue->qdisc = NULL;
-		} else {  /* new */
-			dev_queue->qdisc = qdisc;
-		}
+	/* Prune old scheduler */
+	if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+		qdisc_reset(oqdisc);
 
-	} else {
-		/* Prune old scheduler */
-		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
-			qdisc_reset(oqdisc);
-
-		/* ... and graft new one */
-		if (qdisc == NULL)
-			qdisc = &noop_qdisc;
-		dev_queue->qdisc_sleeping = qdisc;
-		dev_queue->qdisc = &noop_qdisc;
-	}
+	/* ... and graft new one */
+	if (qdisc == NULL)
+		qdisc = &noop_qdisc;
+	dev_queue->qdisc_sleeping = qdisc;
+	dev_queue->qdisc = &noop_qdisc;
 
 	spin_unlock_bh(root_lock);
 
@@ -678,7 +655,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 
 		ingress = 0;
 		num_q = dev->num_tx_queues;
-		if (q && q->flags & TCQ_F_INGRESS) {
+		if ((q && q->flags & TCQ_F_INGRESS) ||
+		    (new && new->flags & TCQ_F_INGRESS)) {
 			num_q = 1;
 			ingress = 1;
 		}
@@ -692,13 +670,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 			if (!ingress)
 				dev_queue = netdev_get_tx_queue(dev, i);
 
-			if (ingress) {
-				old = dev_graft_qdisc(dev_queue, q);
-			} else {
-				old = dev_graft_qdisc(dev_queue, new);
-				if (new && i > 0)
-					atomic_inc(&new->refcnt);
-			}
+			old = dev_graft_qdisc(dev_queue, new);
+			if (new && i > 0)
+				atomic_inc(&new->refcnt);
+
 			notify_and_destroy(skb, n, classid, old, new);
 		}
 
@@ -817,7 +792,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out3;
 			}
 		}
-		if (parent)
+		if (parent && !(sch->flags & TCQ_F_INGRESS))
 			list_add_tail(&sch->list, &dev_queue->qdisc->list);
 
 		return sch;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fd2a6cadb11..345838a2e36 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -596,7 +596,7 @@ static void transition_one_qdisc(struct net_device *dev,
 	int *need_watchdog_p = _need_watchdog;
 
 	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
-	if (new_qdisc != &noqueue_qdisc)
+	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
 		*need_watchdog_p = 1;
 }
 
@@ -619,6 +619,7 @@ void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
+	transition_one_qdisc(dev, &dev->rx_queue, NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
@@ -677,6 +678,7 @@ void dev_deactivate(struct net_device *dev)
 	bool running;
 
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
+	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -718,7 +720,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 void dev_init_scheduler(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
+	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -745,6 +747,6 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
+	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
-- 
cgit v1.2.3-70-g09d2


From 785957d3e8c6fb37b18bf671923a76dbd8240025 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Jul 2008 03:03:15 -0700
Subject: tcp: MD5: Use MIB counter instead of warning for MD5 mismatch.

From a report by Matti Aarnio, and preliminary patch by Adam Langley.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  2 ++
 net/ipv4/proc.c      |  2 ++
 net/ipv4/tcp_ipv4.c  | 10 ++--------
 net/ipv6/tcp_ipv6.c  | 27 ++++++++-------------------
 4 files changed, 14 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 5df62ef1280..7a6e6bba4a7 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -214,6 +214,8 @@ enum
 	LINUX_MIB_TCPDSACKIGNOREDOLD,		/* TCPSACKIgnoredOld */
 	LINUX_MIB_TCPDSACKIGNOREDNOUNDO,	/* TCPSACKIgnoredNoUndo */
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
+	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
+	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 834356ea99d..8f5a403f6f6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
+	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
+	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2b06d0cc26..b3875c0d83c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	if (hash_expected && !hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
 	if (!hash_expected && hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cff778b23a7..1db45216b23 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -849,28 +849,17 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
 	hash_location = tcp_parse_md5sig_option(th);
 
-	/* do we have a hash as expected? */
-	if (!hash_expected) {
-		if (!hash_location)
-			return 0;
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	/* We've parsed the options - do we have a hash? */
+	if (!hash_expected && !hash_location)
+		return 0;
+
+	if (hash_expected && !hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
-	if (!hash_location) {
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	if (!hash_expected && hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 17ef51fce03758736e9051c4360eca237dd0aaeb Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Wed, 30 Jul 2008 03:12:31 -0700
Subject: ipv6: Fix useless proc net sockstat6 removal

This call is no longer needed, sockstat6 is per namespace so it is
removed at the namespace subsystem destruction.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index f82f6074cf8..0179b66864f 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -286,7 +286,6 @@ proc_net_fail:
 
 void ipv6_misc_proc_exit(void)
 {
-	proc_net_remove(&init_net, "sockstat6");
 	proc_net_remove(&init_net, "dev_snmp6");
 	proc_net_remove(&init_net, "snmp6");
 	unregister_pernet_subsys(&ipv6_proc_ops);
-- 
cgit v1.2.3-70-g09d2


From 031cf19e6f63941506c9baf76ac7adac06edcf08 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 30 Jul 2008 03:14:01 -0700
Subject: net: Make "networking" one-click deselectable.

Use a menuconfig directive to make all of networking support one-click
deselectable from the top-level menu.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index b9866875174..7612cc8c337 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -2,9 +2,7 @@
 # Network configuration
 #
 
-menu "Networking"
-
-config NET
+menuconfig NET
 	bool "Networking support"
 	---help---
 	  Unless you really know what you are doing, you should say Y here.
@@ -22,7 +20,6 @@ config NET
 	  recommended to read the NET-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>.
 
-# Make sure that all config symbols are dependent on NET
 if NET
 
 menu "Networking options"
@@ -252,5 +249,3 @@ source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 
 endif   # if NET
-endmenu # Networking
-
-- 
cgit v1.2.3-70-g09d2


From 4adf0af6818f3ea52421dc0bae836cfaf20ef72a Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Date: Wed, 30 Jul 2008 16:27:55 -0700
Subject: bridge: send correct MTU value in PMTU (revised)

When bridging interfaces with different MTUs, the bridge correctly chooses
the minimum of the MTUs of the physical devices as the bridges MTU.  But
when a frame is passed which fits through the incoming, but not through
the outgoing interface, a "Fragmentation Needed" packet is generated.

However, the propagated MTU is hardcoded to 1500, which is wrong in this
situation.  The sender will repeat the packet again with the same frame
size, and the same problem will occur again.

Instead of sending 1500, the (correct) MTU value of the bridge is now sent
via PMTU.  To achieve this, the corresponding rtable structure is stored
in its net_bridge structure.

Modified to get rid of fake_net_device as well.

Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c    |  9 ++++++-
 net/bridge/br_if.c        |  3 +++
 net/bridge/br_netfilter.c | 63 +++++++++++++++++++++++++----------------------
 net/bridge/br_private.h   |  6 +++++
 4 files changed, 51 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d9449df7cad..9b58d70b0e7 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -68,10 +68,17 @@ static int br_dev_stop(struct net_device *dev)
 
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev)))
+	struct net_bridge *br = netdev_priv(dev);
+	if (new_mtu < 68 || new_mtu > br_min_mtu(br))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	/* remember the MTU in the rtable for PMTU */
+	br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu;
+#endif
+
 	return 0;
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a072ea5ca6f..63c18aacde8 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -202,6 +202,9 @@ static struct net_device *new_bridge_dev(const char *name)
 	br->topology_change = 0;
 	br->topology_change_detected = 0;
 	br->ageing_time = 300 * HZ;
+
+	br_netfilter_rtable_init(br);
+
 	INIT_LIST_HEAD(&br->age_list);
 
 	br_stp_timer_init(br);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bb90cd7bace..6e280a8a31e 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -101,33 +101,30 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
 	 brnf_filter_pppoe_tagged)
 
-/* We need these fake structures to make netfilter happy --
- * lots of places assume that skb->dst != NULL, which isn't
- * all that unreasonable.
- *
+/*
+ * Initialize bogus route table used to keep netfilter happy.
  * Currently, we fill in the PMTU entry because netfilter
  * refragmentation needs it, and the rt_flags entry because
  * ipt_REJECT needs it.  Future netfilter modules might
- * require us to fill additional fields. */
-static struct net_device __fake_net_device = {
-	.hard_header_len	= ETH_HLEN,
-#ifdef CONFIG_NET_NS
-	.nd_net			= &init_net,
-#endif
-};
+ * require us to fill additional fields.
+ */
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+	struct rtable *rt = &br->fake_rtable;
 
-static struct rtable __fake_rtable = {
-	.u = {
-		.dst = {
-			.__refcnt		= ATOMIC_INIT(1),
-			.dev			= &__fake_net_device,
-			.path			= &__fake_rtable.u.dst,
-			.metrics		= {[RTAX_MTU - 1] = 1500},
-			.flags			= DST_NOXFRM,
-		}
-	},
-	.rt_flags	= 0,
-};
+	atomic_set(&rt->u.dst.__refcnt, 1);
+	rt->u.dst.dev = &br->dev;
+	rt->u.dst.path = &rt->u.dst;
+	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
+	rt->u.dst.flags	= DST_NOXFRM;
+}
+
+static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
+{
+	struct net_bridge_port *port = rcu_dereference(dev->br_port);
+
+	return port ? &port->br->fake_rtable : NULL;
+}
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
 {
@@ -226,8 +223,12 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	}
 	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
 
-	skb->rtable = &__fake_rtable;
-	dst_hold(&__fake_rtable.u.dst);
+	skb->rtable = bridge_parent_rtable(nf_bridge->physindev);
+	if (!skb->rtable) {
+		kfree_skb(skb);
+		return 0;
+	}
+	dst_hold(&skb->rtable->u.dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
@@ -391,8 +392,12 @@ bridged_dnat:
 			skb->pkt_type = PACKET_HOST;
 		}
 	} else {
-		skb->rtable = &__fake_rtable;
-		dst_hold(&__fake_rtable.u.dst);
+		skb->rtable = bridge_parent_rtable(nf_bridge->physindev);
+		if (!skb->rtable) {
+			kfree_skb(skb);
+			return 0;
+		}
+		dst_hold(&skb->rtable->u.dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
@@ -611,8 +616,8 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	if (skb->rtable == &__fake_rtable) {
-		dst_release(&__fake_rtable.u.dst);
+	if (skb->rtable && skb->rtable == bridge_parent_rtable(in)) {
+		dst_release(&skb->rtable->u.dst);
 		skb->rtable = NULL;
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 815ed38925b..c3dc18ddc04 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
+#include <net/route.h>
 
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -92,6 +93,9 @@ struct net_bridge
 	struct hlist_head		hash[BR_HASH_SIZE];
 	struct list_head		age_list;
 	unsigned long			feature_mask;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct rtable 			fake_rtable;
+#endif
 	unsigned long			flags;
 #define BR_SET_MAC_ADDR		0x00000001
 
@@ -197,9 +201,11 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us
 #ifdef CONFIG_BRIDGE_NETFILTER
 extern int br_netfilter_init(void);
 extern void br_netfilter_fini(void);
+extern void br_netfilter_rtable_init(struct net_bridge *);
 #else
 #define br_netfilter_init()	(0)
 #define br_netfilter_fini()	do { } while(0)
+#define br_netfilter_rtable_init(x)
 #endif
 
 /* br_stp.c */
-- 
cgit v1.2.3-70-g09d2


From 6a8341b68b5269de71c32c6df91f4b0298da031d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 30 Jul 2008 16:30:15 -0700
Subject: net: use the common ascii hex helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/skfp/smt.c             | 13 ++++++-------
 net/ipv4/netfilter/ipt_CLUSTERIP.c |  5 ++---
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/drivers/net/skfp/smt.c b/drivers/net/skfp/smt.c
index ffbfb1b79f9..805383b33d3 100644
--- a/drivers/net/skfp/smt.c
+++ b/drivers/net/skfp/smt.c
@@ -19,6 +19,7 @@
 #include "h/smc.h"
 #include "h/smt_p.h"
 #include <linux/bitrev.h>
+#include <linux/kernel.h>
 
 #define KERNEL
 #include "h/smtstate.h"
@@ -1730,20 +1731,18 @@ void fddi_send_antc(struct s_smc *smc, struct fddi_addr *dest)
 #endif
 
 #ifdef	DEBUG
-#define hextoasc(x)	"0123456789abcdef"[x]
-
 char *addr_to_string(struct fddi_addr *addr)
 {
 	int	i ;
 	static char	string[6*3] = "****" ;
 
 	for (i = 0 ; i < 6 ; i++) {
-		string[i*3] = hextoasc((addr->a[i]>>4)&0xf) ;
-		string[i*3+1] = hextoasc((addr->a[i])&0xf) ;
-		string[i*3+2] = ':' ;
+		string[i * 3] = hex_asc_hi(addr->a[i]);
+		string[i * 3 + 1] = hex_asc_lo(addr->a[i]);
+		string[i * 3 + 2] = ':';
 	}
-	string[5*3+2] = 0 ;
-	return(string) ;
+	string[5 * 3 + 2] = 0;
+	return(string);
 }
 #endif
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1819ad7ab91..fafe8ebb4c5 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -475,11 +475,10 @@ static void arp_print(struct arp_payload *payload)
 #define HBUFFERLEN 30
 	char hbuffer[HBUFFERLEN];
 	int j,k;
-	const char hexbuf[]= "0123456789abcdef";
 
 	for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
-		hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15];
-		hbuffer[k++]=hexbuf[payload->src_hw[j]&15];
+		hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
+		hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
 		hbuffer[k++]=':';
 	}
 	hbuffer[--k]='\0';
-- 
cgit v1.2.3-70-g09d2


From cba5cbd1559f49bec76e54de6ed21b7df3742ada Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 30 Jul 2008 16:31:46 -0700
Subject: atm: fix const assignment/discard warnings in the ATM networking
 driver

Fix const assignment/discard warnings in the ATM networking driver.

The lane2_assoc_ind() function needed its arguments changing to match changes
in the lane2_ops struct (patch 61c33e012964ce358b42d2a1e9cd309af5dab02b
"atm: use const where reasonable").

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpc.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 4fccaa1e07b..11b16d16661 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -62,11 +62,13 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg, struct mpoa_client *mpc
 static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
 static void set_mps_mac_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
 
-static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac,
-			  uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type);
+static const uint8_t *copy_macs(struct mpoa_client *mpc,
+				const uint8_t *router_mac,
+				const uint8_t *tlvs, uint8_t mps_macs,
+				uint8_t device_type);
 static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry);
 
-static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc);
+static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc);
 static void mpoad_close(struct atm_vcc *vcc);
 static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb);
 
@@ -351,12 +353,12 @@ static const char *mpoa_device_type_string(char type)
  * lec sees a TLV it uses the pointer to call this function.
  *
  */
-static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr,
-			    uint8_t *tlvs, uint32_t sizeoftlvs)
+static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
+			    const u8 *tlvs, u32 sizeoftlvs)
 {
 	uint32_t type;
 	uint8_t length, mpoa_device_type, number_of_mps_macs;
-	uint8_t *end_of_tlvs;
+	const uint8_t *end_of_tlvs;
 	struct mpoa_client *mpc;
 
 	mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */
@@ -430,8 +432,10 @@ static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr,
  * plus the possible MAC address(es) to mpc->mps_macs.
  * For a freshly allocated MPOA client mpc->mps_macs == 0.
  */
-static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac,
-			  uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type)
+static const uint8_t *copy_macs(struct mpoa_client *mpc,
+				const uint8_t *router_mac,
+				const uint8_t *tlvs, uint8_t mps_macs,
+				uint8_t device_type)
 {
 	int num_macs;
 	num_macs = (mps_macs > 1) ? mps_macs : 1;
@@ -811,7 +815,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
 	return arg;
 }
 
-static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc)
+static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc)
 {
 	struct k_message mesg;
 
-- 
cgit v1.2.3-70-g09d2


From ae375044d31075a31de5a839e07ded7f67b660aa Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 31 Jul 2008 00:38:01 -0700
Subject: netfilter: nf_conntrack_tcp: decrease timeouts while data in
 unacknowledged

In order to time out dead connections quicker, keep track of outstanding data
and cap the timeout.

Suggested by Herbert Xu.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  3 +++
 net/netfilter/nf_conntrack_proto_tcp.c     | 29 ++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 22ce29995f1..a049df4f223 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -30,6 +30,9 @@ enum tcp_conntrack {
 /* Be liberal in window checking */
 #define IP_CT_TCP_FLAG_BE_LIBERAL		0x08
 
+/* Has unacknowledged data */
+#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED	0x10
+
 struct nf_ct_tcp_flags {
 	u_int8_t flags;
 	u_int8_t mask;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 420a10d8eb1..6f61261888e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = {
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds
    to ~13-30min depending on RTO. */
-static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
 
 static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
@@ -625,8 +626,10 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		swin = win + (sack - ack);
 		if (sender->td_maxwin < swin)
 			sender->td_maxwin = swin;
-		if (after(end, sender->td_end))
+		if (after(end, sender->td_end)) {
 			sender->td_end = end;
+			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+		}
 		/*
 		 * Update receiver data.
 		 */
@@ -637,6 +640,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			if (win == 0)
 				receiver->td_maxend++;
 		}
+		if (ack == receiver->td_end)
+			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 
 		/*
 		 * Check retransmissions.
@@ -951,9 +956,16 @@ static int tcp_packet(struct nf_conn *ct,
 	if (old_state != new_state
 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
-		  && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-		  ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
+
+	if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
+	    tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
+		timeout = nf_ct_tcp_timeout_max_retrans;
+	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+		 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
+		timeout = nf_ct_tcp_timeout_unacknowledged;
+	else
+		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1235,6 +1247,13 @@ static struct ctl_table tcp_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
+		.data		= &nf_ct_tcp_timeout_unacknowledged,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
 	{
 		.ctl_name	= NET_NF_CONNTRACK_TCP_LOOSE,
 		.procname	= "nf_conntrack_tcp_loose",
-- 
cgit v1.2.3-70-g09d2


From a8ddc9163c6a16cd62531dba1ec5020484e33b02 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 31 Jul 2008 00:38:31 -0700
Subject: netfilter: ipt_recent: fix race between recent_mt_destroy and proc
 manipulations

The thing is that recent_mt_destroy first flushes the entries
from table with the recent_table_flush and only *after* this
removes the proc file, corresponding to that table.

Thus, if we manage to write to this file the '+XXX' command we
will leak some entries. If we manage to write there a 'clean'
command we'll race in two recent_table_flush flows, since the
recent_mt_destroy calls this outside the recent_lock.

The proper solution as I see it is to remove the proc file first
and then go on with flushing the table. This flushing becomes
safe w/o the lock, since the table is already inaccessible from
the outside.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 21cb053f5d7..3974d7cae5c 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -305,10 +305,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
 		spin_lock_bh(&recent_lock);
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
-		recent_table_flush(t);
 #ifdef CONFIG_PROC_FS
 		remove_proc_entry(t->name, proc_dir);
 #endif
+		recent_table_flush(t);
 		kfree(t);
 	}
 	mutex_unlock(&recent_mutex);
-- 
cgit v1.2.3-70-g09d2


From 967ab999a090b1a4e7d3c7febfd6d89b42fb4cf4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 31 Jul 2008 00:38:52 -0700
Subject: netfilter: xt_hashlimit: fix race between htable_destroy and
 htable_gc

Deleting a timer with del_timer doesn't guarantee, that the
timer function is not running at the moment of deletion. Thus
in the xt_hashlimit case we can get into a ticklish situation
when the htable_gc rearms the timer back and we'll actually
delete an entry with a pending timer.

Fix it with using del_timer_sync().

AFAIK del_timer_sync checks for the timer to be pending by
itself, so I remove the check.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 6809af542a2..d9418a26781 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -367,9 +367,7 @@ static void htable_gc(unsigned long htlong)
 
 static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 {
-	/* remove timer, if it is pending */
-	if (timer_pending(&hinfo->timer))
-		del_timer(&hinfo->timer);
+	del_timer_sync(&hinfo->timer);
 
 	/* remove proc entry */
 	remove_proc_entry(hinfo->pde->name,
-- 
cgit v1.2.3-70-g09d2


From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 31 Jul 2008 16:58:50 -0700
Subject: netdev: Fix lockdep warnings in multiqueue configurations.

When support for multiple TX queues were added, the
netif_tx_lock() routines we converted to iterate over
all TX queues and grab each queue's spinlock.

This causes heartburn for lockdep and it's not a healthy
thing to do with lots of TX queues anyways.

So modify this to use a top-level lock and a "frozen"
state for the individual TX queues.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         | 12 ++++---
 include/linux/netdevice.h | 86 ++++++++++++++++++++++++++++++-----------------
 net/core/dev.c            |  1 +
 net/core/netpoll.c        |  1 +
 net/core/pktgen.c         |  7 ++--
 net/sched/sch_generic.c   |  6 ++--
 net/sched/sch_teql.c      |  9 ++---
 7 files changed, 78 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 0960e69b2da..e4fbefc8c82 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -69,18 +69,20 @@ static void ri_tasklet(unsigned long dev)
 	struct net_device *_dev = (struct net_device *)dev;
 	struct ifb_private *dp = netdev_priv(_dev);
 	struct net_device_stats *stats = &_dev->stats;
+	struct netdev_queue *txq;
 	struct sk_buff *skb;
 
+	txq = netdev_get_tx_queue(_dev, 0);
 	dp->st_task_enter++;
 	if ((skb = skb_peek(&dp->tq)) == NULL) {
 		dp->st_txq_refl_try++;
-		if (netif_tx_trylock(_dev)) {
+		if (__netif_tx_trylock(txq)) {
 			dp->st_rxq_enter++;
 			while ((skb = skb_dequeue(&dp->rq)) != NULL) {
 				skb_queue_tail(&dp->tq, skb);
 				dp->st_rx2tx_tran++;
 			}
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 		} else {
 			/* reschedule */
 			dp->st_rxq_notenter++;
@@ -115,7 +117,7 @@ static void ri_tasklet(unsigned long dev)
 			BUG();
 	}
 
-	if (netif_tx_trylock(_dev)) {
+	if (__netif_tx_trylock(txq)) {
 		dp->st_rxq_check++;
 		if ((skb = skb_peek(&dp->rq)) == NULL) {
 			dp->tasklet_pending = 0;
@@ -123,10 +125,10 @@ static void ri_tasklet(unsigned long dev)
 				netif_wake_queue(_dev);
 		} else {
 			dp->st_rxq_rsch++;
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 			goto resched;
 		}
-		netif_tx_unlock(_dev);
+		__netif_tx_unlock(txq);
 	} else {
 resched:
 		dp->tasklet_pending = 1;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4d056ceab9..ee583f642a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -440,6 +440,7 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t
 {
 	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_FROZEN,
 };
 
 struct netdev_queue {
@@ -636,7 +637,7 @@ struct net_device
 	unsigned int		real_num_tx_queues;
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
-
+	spinlock_t		tx_global_lock;
 /*
  * One part is mostly used on xmit path (device)
  */
@@ -1099,6 +1100,11 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
+static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue)
+{
+	return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state);
+}
+
 /**
  *	netif_running - test if up
  *	@dev: network device
@@ -1475,6 +1481,26 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 	txq->xmit_lock_owner = smp_processor_id();
 }
 
+static inline int __netif_tx_trylock(struct netdev_queue *txq)
+{
+	int ok = spin_trylock(&txq->_xmit_lock);
+	if (likely(ok))
+		txq->xmit_lock_owner = smp_processor_id();
+	return ok;
+}
+
+static inline void __netif_tx_unlock(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock(&txq->_xmit_lock);
+}
+
+static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock_bh(&txq->_xmit_lock);
+}
+
 /**
  *	netif_tx_lock - grab network device transmit lock
  *	@dev: network device
@@ -1484,12 +1510,23 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
  */
 static inline void netif_tx_lock(struct net_device *dev)
 {
-	int cpu = smp_processor_id();
 	unsigned int i;
+	int cpu;
 
+	spin_lock(&dev->tx_global_lock);
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		/* We are the only thread of execution doing a
+		 * freeze, but we have to grab the _xmit_lock in
+		 * order to synchronize with threads which are in
+		 * the ->hard_start_xmit() handler and already
+		 * checked the frozen bit.
+		 */
 		__netif_tx_lock(txq, cpu);
+		set_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		__netif_tx_unlock(txq);
 	}
 }
 
@@ -1499,40 +1536,22 @@ static inline void netif_tx_lock_bh(struct net_device *dev)
 	netif_tx_lock(dev);
 }
 
-static inline int __netif_tx_trylock(struct netdev_queue *txq)
-{
-	int ok = spin_trylock(&txq->_xmit_lock);
-	if (likely(ok))
-		txq->xmit_lock_owner = smp_processor_id();
-	return ok;
-}
-
-static inline int netif_tx_trylock(struct net_device *dev)
-{
-	return __netif_tx_trylock(netdev_get_tx_queue(dev, 0));
-}
-
-static inline void __netif_tx_unlock(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock(&txq->_xmit_lock);
-}
-
-static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock_bh(&txq->_xmit_lock);
-}
-
 static inline void netif_tx_unlock(struct net_device *dev)
 {
 	unsigned int i;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		__netif_tx_unlock(txq);
-	}
 
+		/* No need to grab the _xmit_lock here.  If the
+		 * queue is not stopped for another reason, we
+		 * force a schedule.
+		 */
+		clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+			__netif_schedule(txq->qdisc);
+	}
+	spin_unlock(&dev->tx_global_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
@@ -1556,13 +1575,18 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 static inline void netif_tx_disable(struct net_device *dev)
 {
 	unsigned int i;
+	int cpu;
 
-	netif_tx_lock_bh(dev);
+	local_bh_disable();
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		__netif_tx_lock(txq, cpu);
 		netif_tx_stop_queue(txq);
+		__netif_tx_unlock(txq);
 	}
-	netif_tx_unlock_bh(dev);
+	local_bh_enable();
 }
 
 static inline void netif_addr_lock(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 63d6bcddbf4..69320a56a08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev)
 {
 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
 }
 
 /**
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c12720895ec..6c7af390be0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -70,6 +70,7 @@ static void queue_process(struct work_struct *work)
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
 		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq) ||
 		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c7d484f7e1c..3284605f2ec 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3305,6 +3305,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	txq = netdev_get_tx_queue(odev, queue_map);
 	if (netif_tx_queue_stopped(txq) ||
+	    netif_tx_queue_frozen(txq) ||
 	    need_resched()) {
 		idle_start = getCurUs();
 
@@ -3320,7 +3321,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->idle_acc += getCurUs() - idle_start;
 
-		if (netif_tx_queue_stopped(txq)) {
+		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)) {
 			pkt_dev->next_tx_us = getCurUs();	/* TODO */
 			pkt_dev->next_tx_ns = 0;
 			goto out;	/* Try the next interface */
@@ -3352,7 +3354,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	txq = netdev_get_tx_queue(odev, queue_map);
 
 	__netif_tx_lock_bh(txq);
-	if (!netif_tx_queue_stopped(txq)) {
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq)) {
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 345838a2e36..9c9cd4d9489 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -135,7 +135,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_subqueue_stopped(dev, skb))
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
@@ -162,7 +163,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 		break;
 	}
 
-	if (ret && netif_tx_queue_stopped(txq))
+	if (ret && (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)))
 		ret = 0;
 
 	return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 537223642b6..2c35c678563 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -305,10 +305,11 @@ restart:
 
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
-			if (netif_tx_trylock(slave)) {
-				if (!__netif_subqueue_stopped(slave, subq) &&
+			if (__netif_tx_trylock(slave_txq)) {
+				if (!netif_tx_queue_stopped(slave_txq) &&
+				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave->hard_start_xmit(skb, slave) == 0) {
-					netif_tx_unlock(slave);
+					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
@@ -316,7 +317,7 @@ restart:
 						qdisc_pkt_len(skb);
 					return 0;
 				}
-				netif_tx_unlock(slave);
+				__netif_tx_unlock(slave_txq);
 			}
 			if (netif_queue_stopped(dev))
 				busy = 1;
-- 
cgit v1.2.3-70-g09d2


From 77e2f14f71d68d05945f1d30ca55b5194d6ab1ce Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 31 Jul 2008 20:46:47 -0700
Subject: ipv6: Fix ip6_xmit to send fragments if ipfragok is true

SCTP used ip6_xmit() to send fragments after received ICMP packet too
big message. But while send packet used ip6_xmit, the skb->local_df is
not initialized. So when skb if enter ip6_fragment(), the following
code will discard the skb.

ip6_fragment(...)
{
    if (!skb->local_df) {
        ...
        return -EMSGSIZE;
    }
    ...
}

SCTP do the following step:
1. send packet ip6_xmit(skb, ipfragok=0)
2. received ICMP packet too big message
3. if PMTUD_ENABLE: ip6_xmit(skb, ipfragok=1)

This patch fixed the problem by set local_df if ipfragok is true.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6811901e6b1..a027003d69a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -236,6 +236,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
+	/* Allow local fragmentation. */
+	if (ipfragok)
+		skb->local_df = 1;
+
 	/*
 	 *	Fill in the IPv6 header
 	 */
-- 
cgit v1.2.3-70-g09d2


From 90b7e1120bb43ffaabb88d28f80a0c2e13167b15 Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Thu, 31 Jul 2008 20:49:48 -0700
Subject: tcp: MD5: Fix MD5 signatures on certain ACK packets

I noticed, looking at tcpdumps, that timewait ACKs were getting sent
with an incorrect MD5 signature when signatures were enabled.

I broke this in 49a72dfb8814c2d65bd9f8c9c6daf6395a1ec58d ("tcp: Fix
MD5 signatures for non-linear skbs"). I didn't take into account that
the skb passed to tcp_*_send_ack was the inbound packet, thus the
source and dest addresses need to be swapped when calculating the MD5
pseudoheader.

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 ++--
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3875c0d83c..91a8cfddf1c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -655,8 +655,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 		rep.th.doff = arg.iov[0].iov_len/4;
 
 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
-				    key, ip_hdr(skb)->daddr,
-				    ip_hdr(skb)->saddr, &rep.th);
+				    key, ip_hdr(skb)->saddr,
+				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1db45216b23..1bcdcbc71d6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1094,8 +1094,8 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
-				    &ipv6_hdr(skb)->daddr,
-				    &ipv6_hdr(skb)->saddr, t1);
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr, t1);
 	}
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 8a9204db665365354b349ed5b0bc054f0433a2a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 31 Jul 2008 20:51:22 -0700
Subject: net/ipv4/route.c: fix build error

fix:

net/ipv4/route.c: In function 'ip_static_sysctl_init':
net/ipv4/route.c:3225: error: 'ipv4_route_path' undeclared (first use in this function)
net/ipv4/route.c:3225: error: (Each undeclared identifier is reported only once
net/ipv4/route.c:3225: error: for each function it appears in.)
net/ipv4/route.c:3225: error: 'ipv4_route_table' undeclared (first use in this function)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 380d6474cf6..d7bf0564b5f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3222,7 +3222,9 @@ int __init ip_rt_init(void)
  */
 void __init ip_static_sysctl_init(void)
 {
+#ifdef CONFIG_SYSCTL
 	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
+#endif
 }
 
 EXPORT_SYMBOL(__ip_select_ident);
-- 
cgit v1.2.3-70-g09d2


From 00b1304c4ca81dd893973cc620b87a5c3ff3f660 Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Thu, 31 Jul 2008 21:36:07 -0700
Subject: tcp: MD5: Fix IPv6 signatures

Reported by Stefanos Harhalakis; although 2.6.27-rc1 talks to itself using IPv6
TCP MD5 packets just fine, Stefanos noted that tcpdump claimed that the
signatures were invalid.

I broke this in 49a72dfb8814c2d65bd9f8c9c6daf6395a1ec58d ("tcp: Fix MD5
signatures for non-linear skbs"), it was just a typo.

Note that tcpdump will still sometimes claim that the signatures are incorrect.
A patch to tcpdump has been submitted for this[1].

[1] http://tinyurl.com/6a4fl2

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1bcdcbc71d6..78185a40921 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -748,7 +748,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 	ipv6_addr_copy(&bp->saddr, saddr);
 	ipv6_addr_copy(&bp->daddr, daddr);
 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
-	bp->len = cpu_to_be16(nbytes);
+	bp->len = cpu_to_be32(nbytes);
 
 	sg_init_one(&sg, bp, sizeof(*bp));
 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
-- 
cgit v1.2.3-70-g09d2


From a1bc6eb4b499ae67ada9a01660010580b6569403 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 06:32:52 -0400
Subject: [PATCH] ipv4_static_sysctl_init() should be under CONFIG_SYSCTL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 380d6474cf6..a72a5ad46ec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3216,6 +3216,7 @@ int __init ip_rt_init(void)
 	return rc;
 }
 
+#ifdef CONFIG_SYSCTL
 /*
  * We really need to sanitize the damn ipv4 init order, then all
  * this nonsense will go away.
@@ -3224,6 +3225,7 @@ void __init ip_static_sysctl_init(void)
 {
 	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
 }
+#endif
 
 EXPORT_SYMBOL(__ip_select_ident);
 EXPORT_SYMBOL(ip_route_input);
-- 
cgit v1.2.3-70-g09d2


From 6e28fbef0f330d7c1cade345eeae003d4e5d6070 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Thu, 31 Jul 2008 10:53:57 -0300
Subject: rfkill: query EV_SW states when rfkill-input (re)?connects to a input
 device

Every time a new input device that is capable of one of the
rfkill EV_SW events (currently only SW_RFKILL_ALL) is connected to
rfkill-input, we must check the states of the input EV_SW switches
and take action.  Otherwise, we will ignore the initial switch state.

We also need to re-check the states of the EV_SW switches after
a device that was under an exclusive grab is released back to us,
since we got no input events from that device while it was grabbed.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill-input.c | 54 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 8aa82273014..e5b69556bb5 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -109,6 +109,25 @@ static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB);
 static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX);
 static DEFINE_RFKILL_TASK(rfkill_wwan, RFKILL_TYPE_WWAN);
 
+static void rfkill_schedule_evsw_rfkillall(int state)
+{
+	/* EVERY radio type. state != 0 means radios ON */
+	/* handle EPO (emergency power off) through shortcut */
+	if (state) {
+		rfkill_schedule_set(&rfkill_wwan,
+				    RFKILL_STATE_UNBLOCKED);
+		rfkill_schedule_set(&rfkill_wimax,
+				    RFKILL_STATE_UNBLOCKED);
+		rfkill_schedule_set(&rfkill_uwb,
+				    RFKILL_STATE_UNBLOCKED);
+		rfkill_schedule_set(&rfkill_bt,
+				    RFKILL_STATE_UNBLOCKED);
+		rfkill_schedule_set(&rfkill_wlan,
+				    RFKILL_STATE_UNBLOCKED);
+	} else
+		rfkill_schedule_epo();
+}
+
 static void rfkill_event(struct input_handle *handle, unsigned int type,
 			unsigned int code, int data)
 {
@@ -132,21 +151,7 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 	} else if (type == EV_SW) {
 		switch (code) {
 		case SW_RFKILL_ALL:
-			/* EVERY radio type. data != 0 means radios ON */
-			/* handle EPO (emergency power off) through shortcut */
-			if (data) {
-				rfkill_schedule_set(&rfkill_wwan,
-						    RFKILL_STATE_UNBLOCKED);
-				rfkill_schedule_set(&rfkill_wimax,
-						    RFKILL_STATE_UNBLOCKED);
-				rfkill_schedule_set(&rfkill_uwb,
-						    RFKILL_STATE_UNBLOCKED);
-				rfkill_schedule_set(&rfkill_bt,
-						    RFKILL_STATE_UNBLOCKED);
-				rfkill_schedule_set(&rfkill_wlan,
-						    RFKILL_STATE_UNBLOCKED);
-			} else
-				rfkill_schedule_epo();
+			rfkill_schedule_evsw_rfkillall(data);
 			break;
 		default:
 			break;
@@ -168,6 +173,7 @@ static int rfkill_connect(struct input_handler *handler, struct input_dev *dev,
 	handle->handler = handler;
 	handle->name = "rfkill";
 
+	/* causes rfkill_start() to be called */
 	error = input_register_handle(handle);
 	if (error)
 		goto err_free_handle;
@@ -185,6 +191,23 @@ static int rfkill_connect(struct input_handler *handler, struct input_dev *dev,
 	return error;
 }
 
+static void rfkill_start(struct input_handle *handle)
+{
+	/* Take event_lock to guard against configuration changes, we
+	 * should be able to deal with concurrency with rfkill_event()
+	 * just fine (which event_lock will also avoid). */
+	spin_lock_irq(&handle->dev->event_lock);
+
+	if (test_bit(EV_SW, handle->dev->evbit)) {
+		if (test_bit(SW_RFKILL_ALL, handle->dev->swbit))
+			rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL,
+							handle->dev->sw));
+		/* add resync for further EV_SW events here */
+	}
+
+	spin_unlock_irq(&handle->dev->event_lock);
+}
+
 static void rfkill_disconnect(struct input_handle *handle)
 {
 	input_close_device(handle);
@@ -225,6 +248,7 @@ static struct input_handler rfkill_handler = {
 	.event =	rfkill_event,
 	.connect =	rfkill_connect,
 	.disconnect =	rfkill_disconnect,
+	.start =	rfkill_start,
 	.name =		"rfkill",
 	.id_table =	rfkill_ids,
 };
-- 
cgit v1.2.3-70-g09d2


From 7c4f4578fc85d42d149f86b47f76c28626a20d92 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Tue, 22 Jul 2008 14:17:37 +0400
Subject: RFKILL: allow one to specify led trigger name

Allow the rfkill driver to specify led trigger name.
By default it still defaults to the name of rfkill switch.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index c6f2f388cb7..5c24f364718 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -589,7 +589,8 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill)
 #ifdef CONFIG_RFKILL_LEDS
 	int error;
 
-	rfkill->led_trigger.name = rfkill->dev.bus_id;
+	if (!rfkill->led_trigger.name)
+		rfkill->led_trigger.name = rfkill->dev.bus_id;
 	error = led_trigger_register(&rfkill->led_trigger);
 	if (error)
 		rfkill->led_trigger.name = NULL;
-- 
cgit v1.2.3-70-g09d2


From 96185664f10e79d038c084305d3cacff9b52204f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Tue, 22 Jul 2008 14:21:59 +0400
Subject: RFKILL: set the status of the leds on activation.

Provide default activate function to set the state of the led
when the led becomes bound to the trigger

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 5c24f364718..d2d45655cd1 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -105,6 +105,16 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 #endif /* CONFIG_RFKILL_LEDS */
 }
 
+#ifdef CONFIG_RFKILL_LEDS
+static void rfkill_led_trigger_activate(struct led_classdev *led)
+{
+	struct rfkill *rfkill = container_of(led->trigger,
+			struct rfkill, led_trigger);
+
+	rfkill_led_trigger(rfkill, rfkill->state);
+}
+#endif /* CONFIG_RFKILL_LEDS */
+
 static void notify_rfkill_state_change(struct rfkill *rfkill)
 {
 	blocking_notifier_call_chain(&rfkill_notifier_list,
@@ -591,6 +601,8 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill)
 
 	if (!rfkill->led_trigger.name)
 		rfkill->led_trigger.name = rfkill->dev.bus_id;
+	if (!rfkill->led_trigger.activate)
+		rfkill->led_trigger.activate = rfkill_led_trigger_activate;
 	error = led_trigger_register(&rfkill->led_trigger);
 	if (error)
 		rfkill->led_trigger.name = NULL;
-- 
cgit v1.2.3-70-g09d2


From f8e79ddd31c3615ddca26b9a469c44a7adbd4e13 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 24 Jul 2008 18:46:44 +0300
Subject: mac80211: fix fragmentation kludge

This patch make mac80211 transmit correctly fragmented packet after
queue was stopped

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/tx.c          | 17 ++++++++++++-----
 net/mac80211/util.c        |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a4f9a832722..a2e200f9811 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -586,6 +586,7 @@ struct ieee80211_local {
 	struct timer_list sta_cleanup;
 
 	unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)];
+	unsigned long queues_pending_run[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)];
 	struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 69019e94387..771ec68b848 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1060,13 +1060,14 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 			  struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *info;
 	int ret, i;
 
-	if (netif_subqueue_stopped(local->mdev, skb))
-		return IEEE80211_TX_AGAIN;
-
 	if (skb) {
+		if (netif_subqueue_stopped(local->mdev, skb))
+			return IEEE80211_TX_AGAIN;
+		info =  IEEE80211_SKB_CB(skb);
+
 		ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
 				     "TX to low-level driver", skb);
 		ret = local->ops->tx(local_to_hw(local), skb);
@@ -1215,6 +1216,7 @@ retry:
 
 		if (ret == IEEE80211_TX_FRAG_AGAIN)
 			skb = NULL;
+
 		set_bit(queue, local->queues_pending);
 		smp_mb();
 		/*
@@ -1708,14 +1710,19 @@ void ieee80211_tx_pending(unsigned long data)
 	netif_tx_lock_bh(dev);
 	for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) {
 		/* Check that this queue is ok */
-		if (__netif_subqueue_stopped(local->mdev, i))
+		if (__netif_subqueue_stopped(local->mdev, i) &&
+		    !test_bit(i, local->queues_pending_run))
 			continue;
 
 		if (!test_bit(i, local->queues_pending)) {
+			clear_bit(i, local->queues_pending_run);
 			ieee80211_wake_queue(&local->hw, i);
 			continue;
 		}
 
+		clear_bit(i, local->queues_pending_run);
+		netif_start_subqueue(local->mdev, i);
+
 		store = &local->pending_packet[i];
 		tx.extra_frag = store->extra_frag;
 		tx.num_extra_frag = store->num_extra_frag;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 19f85e1b369..0d463c80c40 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -361,6 +361,7 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue)
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	if (test_bit(queue, local->queues_pending)) {
+		set_bit(queue, local->queues_pending_run);
 		tasklet_schedule(&local->tx_pending_tasklet);
 	} else {
 		netif_wake_subqueue(local->mdev, queue);
-- 
cgit v1.2.3-70-g09d2


From 5fb662297b8a4bdadd60371c34b760efca948ebc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 2 Aug 2008 20:02:43 -0700
Subject: pkt_sched: Use qdisc_lock() on already sampled root qdisc.

Based upon a bug report by Jeff Kirsher.

Don't use qdisc_root_lock() in these cases as the root
qdisc could have been changed, and we'd thus lock the
wrong object.

Tested by Emil S Tantilov who confirms that this seems
to fix the problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          |  4 ++--
 net/sched/sch_generic.c | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 69320a56a08..da7acacf02b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1796,7 +1796,7 @@ gso:
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		spinlock_t *root_lock = qdisc_root_lock(q);
+		spinlock_t *root_lock = qdisc_lock(q);
 
 		spin_lock(root_lock);
 
@@ -1995,7 +1995,7 @@ static void net_tx_action(struct softirq_action *h)
 			smp_mb__before_clear_bit();
 			clear_bit(__QDISC_STATE_SCHED, &q->state);
 
-			root_lock = qdisc_root_lock(q);
+			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
 				qdisc_run(q);
 				spin_unlock(root_lock);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9c9cd4d9489..7cf83b37459 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,7 +29,7 @@
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
- * qdisc_root_lock(qdisc) spinlock.
+ * qdisc_lock(qdisc) spinlock.
  *
  * The idea is the following:
  * - enqueue, dequeue are serialized via qdisc root lock
@@ -126,7 +126,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	if (unlikely((skb = dequeue_skb(q)) == NULL))
 		return 0;
 
-	root_lock = qdisc_root_lock(q);
+	root_lock = qdisc_lock(q);
 
 	/* And release qdisc */
 	spin_unlock(root_lock);
@@ -507,7 +507,7 @@ errout:
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
 
-/* Under qdisc_root_lock(qdisc) and BH! */
+/* Under qdisc_lock(qdisc) and BH! */
 
 void qdisc_reset(struct Qdisc *qdisc)
 {
@@ -543,7 +543,7 @@ static void __qdisc_destroy(struct rcu_head *head)
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-/* Under qdisc_root_lock(qdisc) and BH! */
+/* Under qdisc_lock(qdisc) and BH! */
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
@@ -659,7 +659,7 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock)
 
 		dev_queue = netdev_get_tx_queue(dev, i);
 		q = dev_queue->qdisc;
-		root_lock = qdisc_root_lock(q);
+		root_lock = qdisc_lock(q);
 
 		if (lock)
 			spin_lock_bh(root_lock);
@@ -735,7 +735,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
-		spinlock_t *root_lock = qdisc_root_lock(qdisc);
+		spinlock_t *root_lock = qdisc_lock(qdisc);
 
 		dev_queue->qdisc = qdisc_default;
 		dev_queue->qdisc_sleeping = qdisc_default;
-- 
cgit v1.2.3-70-g09d2


From 35ed4e75989c4e84a44b25569bbf09b98f923880 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 2 Aug 2008 23:25:50 -0700
Subject: mac80211: Use queue_lock() in ieee80211_ht_agg_queue_remove().

qdisc_root_lock() is only %100 safe to use when the RTNL
semaphore is held.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/wme.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 28437f0001d..4310e2f6566 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -241,12 +241,14 @@ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
 	} else {
 		struct netdev_queue *txq;
 		spinlock_t *root_lock;
+		struct Qdisc *q;
 
 		txq = netdev_get_tx_queue(local->mdev, agg_queue);
-		root_lock = qdisc_root_lock(txq->qdisc);
+		q = rcu_dereference(txq->qdisc);
+		root_lock = qdisc_lock(q);
 
 		spin_lock_bh(root_lock);
-		qdisc_reset(txq->qdisc);
+		qdisc_reset(q);
 		spin_unlock_bh(root_lock);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From bff69732c9947f821a64a8477f7dcaa9c30e6a69 Mon Sep 17 00:00:00 2001
From: Chris Larson <clarson@mvista.com>
Date: Sun, 3 Aug 2008 01:02:41 -0700
Subject: net: in the first call to neigh_seq_next, call neigh_get_first, not
 neigh_get_idx.

neigh_seq_next won't be called both with *pos > 0 && v ==
SEQ_START_TOKEN, so there's no point calling neigh_get_idx when we're
on the start token, just call neigh_get_first directly.

Signed-off-by: Chris Larson <clarson@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f62c8af85d3..a57de755c8c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2385,7 +2385,7 @@ void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	void *rc;
 
 	if (v == SEQ_START_TOKEN) {
-		rc = neigh_get_idx(seq, pos);
+		rc = neigh_get_first(seq);
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 745e203164a9057e0de769ff4649e6e455daf753 Mon Sep 17 00:00:00 2001
From: Chris Larson <clarson@mvista.com>
Date: Sun, 3 Aug 2008 01:10:55 -0700
Subject: net: fix missing pneigh entries in the neighbor seq_file code

When pneigh entries exist, but the user's read buffer isn't sufficient to
hold them all, one of the pneigh entries will be missing from the results.

In neigh_get_idx_any, the number of elements which neigh_get_idx
encountered is not correctly subtracted from the position number before
the call to pneigh_get_idx.  neigh_get_idx reduces the position by 1 for
each call to neigh_get_next, but it does not reduce it by one for the
first element (neigh_get_first). The patch alters the neigh_get_idx and
pneigh_get_idx functions to subtract one from pos, for the first element,
when pos is non-zero.

Signed-off-by: Chris Larson <clarson@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a57de755c8c..9d92e41826e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2281,6 +2281,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
 	struct neighbour *n = neigh_get_first(seq);
 
 	if (n) {
+		--(*pos);
 		while (*pos) {
 			n = neigh_get_next(seq, n, pos);
 			if (!n)
@@ -2341,6 +2342,7 @@ static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
 	struct pneigh_entry *pn = pneigh_get_first(seq);
 
 	if (pn) {
+		--(*pos);
 		while (*pos) {
 			pn = pneigh_get_next(seq, pn, pos);
 			if (!pn)
@@ -2354,10 +2356,11 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
 	void *rc;
+	loff_t idxpos = *pos;
 
-	rc = neigh_get_idx(seq, pos);
+	rc = neigh_get_idx(seq, &idxpos);
 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
-		rc = pneigh_get_idx(seq, pos);
+		rc = pneigh_get_idx(seq, &idxpos);
 
 	return rc;
 }
@@ -2366,7 +2369,6 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
 	__acquires(tbl->lock)
 {
 	struct neigh_seq_state *state = seq->private;
-	loff_t pos_minus_one;
 
 	state->tbl = tbl;
 	state->bucket = 0;
@@ -2374,8 +2376,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
 
 	read_lock_bh(&tbl->lock);
 
-	pos_minus_one = *pos - 1;
-	return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
+	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
 }
 EXPORT_SYMBOL(neigh_seq_start);
 
-- 
cgit v1.2.3-70-g09d2


From e5a4a72d4f88f4389e9340d383ca67031d1b8536 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Sun, 3 Aug 2008 01:23:10 -0700
Subject: net: use software GSO for SG+CSUM capable netdevices

If a netdevice does not support hardware GSO, allowing the stack to
use GSO anyway and then splitting the GSO skb into MSS-sized pieces
as it is handed to the netdevice for transmitting is likely still
a win as far as throughput and/or CPU usage are concerned, since it
reduces the number of trips through the output path.

This patch enables the use of GSO on any netdevice that supports SG.
If a GSO skb is then sent to a netdevice that supports SG but does not
support hardware GSO, net/core/dev.c:dev_hard_start_xmit() will take
care of doing the necessary GSO segmentation in software.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index da7acacf02b..cbf80098980 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3988,6 +3988,10 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
+	/* Enable software GSO if SG is supported. */
+	if (dev->features & NETIF_F_SG)
+		dev->features |= NETIF_F_GSO;
+
 	netdev_initialize_kobject(dev);
 	ret = netdev_register_kobject(dev);
 	if (ret)
-- 
cgit v1.2.3-70-g09d2


From adf044c8778de98dae29c5ce9973b7e43964674f Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 3 Aug 2008 14:06:44 -0700
Subject: net: Add missing extra2 parameter for ip_default_ttl sysctl

Commit 76e6ebfb40a2455c18234dcb0f9df37533215461 ("netns: add namespace
parameter to rt_cache_flush") acceses the extra2 parameter of the
ip_default_ttl ctl_table, but it is never set to a meaningful
value. When e84f84f276473dcc673f360e8ff3203148bdf0e2 ("netns: place
rt_genid into struct net") is applied, we'll oops in
rt_cache_invalidate(). Set extra2 to init_net, to avoid that.

Reported-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Tested-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 770d827f5ab..e0689fd7b79 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -232,6 +232,7 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &ipv4_doint_and_flush,
 		.strategy	= &ipv4_doint_and_flush_strategy,
+		.extra2		= &init_net,
 	},
 	{
 		.ctl_name	= NET_IPV4_NO_PMTU_DISC,
-- 
cgit v1.2.3-70-g09d2


From 1730554f253deb65fe5112c54b2f898d5318a328 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 3 Aug 2008 18:13:44 -0700
Subject: ipv6: syncookies: free reqsk on xfrm_lookup error

cookie_v6_check() did not call reqsk_free() if xfrm_lookup() fails,
leaking the request sock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/syncookies.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a46badd1082..ec394cf5a19 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -199,10 +199,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
 
-	if (security_inet_conn_request(sk, skb, req)) {
-		reqsk_free(req);
-		goto out;
-	}
+	if (security_inet_conn_request(sk, skb, req))
+		goto out_free;
 
 	req->mss = mss;
 	ireq->rmt_port = th->source;
@@ -255,14 +253,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
 		security_req_classify_flow(req, &fl);
-		if (ip6_dst_lookup(sk, &dst, &fl)) {
-			reqsk_free(req);
-			goto out;
-		}
+		if (ip6_dst_lookup(sk, &dst, &fl))
+			goto out_free;
+
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
 		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
-			goto out;
+			goto out_free;
 	}
 
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
@@ -273,7 +270,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->rcv_wscale = rcv_wscale;
 
 	ret = get_cookie_sock(sk, skb, req, dst);
-
-out:	return ret;
+out:
+	return ret;
+out_free:
+	reqsk_free(req);
+	return NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From cfb266c0ee0ea0b7bfa8189e3a3a80344dec6112 Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Sun, 3 Aug 2008 18:16:15 -0700
Subject: ipv6: Fix the return value of Set Hop-by-Hop options header with NULL
 data pointer

When Set Hop-by-Hop options header with NULL data
pointer and optlen is not zero use setsockopt(),
the kernel successfully return 0 instead of
return error EINVAL or EFAULT.

This patch fix the problem.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ea33b26512c..741cfcd96f8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -346,6 +346,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		 */
 		if (optlen == 0)
 			optval = NULL;
+		else if (optval == NULL)
+			goto e_inval;
 		else if (optlen < sizeof(struct ipv6_opt_hdr) ||
 			 optlen & 0x7 || optlen > 8 * 255)
 			goto e_inval;
-- 
cgit v1.2.3-70-g09d2


From f880374c2fe37aad3fa62253a4bc125d7a933aad Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 3 Aug 2008 21:15:08 -0700
Subject: sctp: Drop ipfargok in sctp_xmit function

The ipfragok flag controls whether the packet may be fragmented
either on the local host on beyond.  The latter is only valid on
IPv4.

In fact, we never want to do the latter even on IPv4 when PMTU is
enabled.  This is because even though we can't fragment packets
within SCTP due to the prtocol's inherent faults, we can still
fragment it at IP layer.  By setting the DF bit we will improve
the PMTU process.

RFC 2960 only says that we SHOULD clear the DF bit in this case,
so we're compliant even if we set the DF bit.  In fact RFC 4960
no longer has this statement.

Once we make this change, we only need to control the local
fragmentation.  There is already a bit in the skb which controls
that, local_df.  So this patch sets that instead of using the
ipfragok argument.

The only complication is that there isn't a struct sock object
per transport, so for IPv4 we have to resort to changing the
pmtudisc field for every packet.  This should be safe though
as the protocol is single-threaded.

Note that after this patch we can remove ipfragok from the rest
of the stack too.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 3 +--
 net/sctp/ipv6.c            | 8 +++++---
 net/sctp/output.c          | 6 ++----
 net/sctp/protocol.c        | 9 +++++++--
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 535a18f57a1..ab1c472ea75 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -524,8 +524,7 @@ static inline void sctp_ssn_skip(struct sctp_stream *stream, __u16 id,
  */
 struct sctp_af {
 	int		(*sctp_xmit)	(struct sk_buff *skb,
-					 struct sctp_transport *,
-					 int ipfragok);
+					 struct sctp_transport *);
 	int		(*setsockopt)	(struct sock *sk,
 					 int level,
 					 int optname,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a238d6834b3..483a01d0740 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -195,8 +195,7 @@ out:
 }
 
 /* Based on tcp_v6_xmit() in tcp_ipv6.c. */
-static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport,
-			int ipfragok)
+static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 {
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -231,7 +230,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport,
 
 	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
 
-	return ip6_xmit(sk, skb, &fl, np->opt, ipfragok);
+	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
+		skb->local_df = 1;
+
+	return ip6_xmit(sk, skb, &fl, np->opt, 0);
 }
 
 /* Returns the dst cache entry for the given source and destination ip
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 45684646b1d..0dc4a7dfb23 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -586,10 +586,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
 			  nskb->len);
 
-	if (tp->param_flags & SPP_PMTUD_ENABLE)
-		(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
-	else
-		(*tp->af_specific->sctp_xmit)(nskb, tp, 1);
+	nskb->local_df = packet->ipfragok;
+	(*tp->af_specific->sctp_xmit)(nskb, tp);
 
 out:
 	packet->size = packet->overhead;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a6e0818bcff..0b65354aaf6 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -862,16 +862,21 @@ static int sctp_inet_supported_addrs(const struct sctp_sock *opt,
 
 /* Wrapper routine that calls the ip transmit routine. */
 static inline int sctp_v4_xmit(struct sk_buff *skb,
-			       struct sctp_transport *transport, int ipfragok)
+			       struct sctp_transport *transport)
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
 			  "src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n",
 			  __func__, skb, skb->len,
 			  NIPQUAD(skb->rtable->rt_src),
 			  NIPQUAD(skb->rtable->rt_dst));
 
+	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
+			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
+
 	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
-	return ip_queue_xmit(skb, ipfragok);
+	return ip_queue_xmit(skb, 0);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.3-70-g09d2


From 283d07ac201ee9f8aa6dc6f7519436b48760baff Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sun, 3 Aug 2008 21:15:59 -0700
Subject: ipv6: Do not drop packet if skb->local_df is set to true

The old code will drop IPv6 packet if ipfragok is not set, since
ipfragok is obsoleted, will be instead by used skb->local_df, so this
check must be changed to skb->local_df.

This patch fix this problem and not drop packet if skb->local_df is
set to true.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a027003d69a..a4402de425d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -269,7 +269,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb->mark = sk->sk_mark;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
+	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst),
 			      IPSTATS_MIB_OUTREQUESTS);
 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
-- 
cgit v1.2.3-70-g09d2


From 6e583ce5242f32e925dcb198f7123256d0798370 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sun, 3 Aug 2008 21:29:57 -0700
Subject: net: eliminate refcounting in backlog queue

Avoid the overhead of atomic increment/decrement on each received packet.
This helps performance of non-NAPI devices (like loopback).
Use cleanup function to walk queue on each cpu and clean out any
left over packets.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index cbf80098980..fc6c9881eca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1909,7 +1909,6 @@ int netif_rx(struct sk_buff *skb)
 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
-			dev_hold(skb->dev);
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2270,6 +2269,20 @@ out:
 	return ret;
 }
 
+/* Network device is going away, flush any packets still pending  */
+static void flush_backlog(void *arg)
+{
+	struct net_device *dev = arg;
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &queue->input_pkt_queue);
+			kfree_skb(skb);
+		}
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2279,7 +2292,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
-		struct net_device *dev;
 
 		local_irq_disable();
 		skb = __skb_dequeue(&queue->input_pkt_queue);
@@ -2288,14 +2300,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			local_irq_enable();
 			break;
 		}
-
 		local_irq_enable();
 
-		dev = skb->dev;
-
 		netif_receive_skb(skb);
-
-		dev_put(dev);
 	} while (++work < quota && jiffies == start_time);
 
 	return work;
@@ -4169,6 +4176,8 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
+		on_each_cpu(flush_backlog, dev, 1);
+
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-- 
cgit v1.2.3-70-g09d2


From 98f7dfd86cbbd377e2cbc293529681b914296f68 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 18 Jul 2008 13:52:59 +0800
Subject: mac80211: pass dtim_period to low level driver

This patch adds the dtim_period in ieee80211_bss_conf, this allows the low
level driver to know the dtim_period, and to plan power save accordingly.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 13 +++++++++++++
 include/net/mac80211.h     |  4 +++-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 11 +++++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a1630ba0b87..7f4df7c7659 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -506,6 +506,19 @@ struct ieee80211_channel_sw_ie {
 	u8 count;
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_tim
+ *
+ * This structure refers to "Traffic Indication Map information element"
+ */
+struct ieee80211_tim_ie {
+	u8 dtim_count;
+	u8 dtim_period;
+	u8 bitmap_ctrl;
+	/* variable size: 1 - 251 bytes */
+	u8 virtual_map[0];
+} __attribute__ ((packed));
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b52721008be..9d99f2e0a20 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -177,9 +177,10 @@ enum ieee80211_bss_change {
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
  * @use_short_preamble: use 802.11b short preamble
+ * @dtim_period: num of beacons before the next DTIM, for PSM
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
- * @assoc_capability: capabbilities taken from assoc resp
+ * @assoc_capability: capabilities taken from assoc resp
  * @assoc_ht: association in HT mode
  * @ht_conf: ht capabilities
  * @ht_bss_conf: ht extended capabilities
@@ -191,6 +192,7 @@ struct ieee80211_bss_conf {
 	/* erp related data */
 	bool use_cts_prot;
 	bool use_short_preamble;
+	u8 dtim_period;
 	u16 beacon_int;
 	u16 assoc_capability;
 	u64 timestamp;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a2e200f9811..ec59345af65 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -82,6 +82,7 @@ struct ieee80211_sta_bss {
 
 	u8 bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 dtim_period;
 	u16 capability; /* host byte order */
 	enum ieee80211_band band;
 	int freq;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index acb04133a95..591e6331c42 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -551,6 +551,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 			/* set timing information */
 			sdata->bss_conf.beacon_int = bss->beacon_int;
 			sdata->bss_conf.timestamp = bss->timestamp;
+			sdata->bss_conf.dtim_period = bss->dtim_period;
 
 			changed |= ieee80211_handle_bss_capability(sdata, bss);
 
@@ -2688,6 +2689,16 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
+	if (elems->tim) {
+		struct ieee80211_tim_ie *tim_ie =
+			(struct ieee80211_tim_ie *)elems->tim;
+		bss->dtim_period = tim_ie->dtim_period;
+	}
+
+	/* set default value for buggy APs */
+	if (!elems->tim || bss->dtim_period == 0)
+		bss->dtim_period = 1;
+
 	bss->supp_rates_len = 0;
 	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-- 
cgit v1.2.3-70-g09d2


From ea95bba41e69c616bb1512cf59d22f33266b8568 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Fri, 18 Jul 2008 13:53:00 +0800
Subject: mac80211: make listen_interval be limited by low level driver

This patch makes possible for a driver to specify maximal listen interval
The possibility for user to configure listen interval is not implemented
yet, currently the maximum provided by the driver or 1 is used.
Mac80211 uses config handler to set listen interval for to the driver.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 9 ++++++++-
 net/mac80211/main.c    | 5 +++++
 net/mac80211/mlme.c    | 6 ++++--
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9d99f2e0a20..b397e4d984c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -432,6 +432,7 @@ enum ieee80211_conf_flags {
  * @radio_enabled: when zero, driver is required to switch off the radio.
  *	TODO make a flag
  * @beacon_int: beacon interval (TODO make interface config)
+ * @listen_interval: listen interval in units of beacon interval
  * @flags: configuration flags defined above
  * @power_level: requested transmit power (in dBm)
  * @max_antenna_gain: maximum antenna gain (in dBi)
@@ -446,6 +447,7 @@ struct ieee80211_conf {
 	int radio_enabled;
 
 	int beacon_int;
+	u16 listen_interval;
 	u32 flags;
 	int power_level;
 	int max_antenna_gain;
@@ -787,6 +789,9 @@ enum ieee80211_hw_flags {
  * @max_signal: Maximum value for signal (rssi) in RX information, used
  *     only when @IEEE80211_HW_SIGNAL_UNSPEC or @IEEE80211_HW_SIGNAL_DB
  *
+ * @max_listen_interval: max listen interval in units of beacon interval
+ *     that HW supports
+ *
  * @queues: number of available hardware transmit queues for
  *	data packets. WMM/QoS requires at least four, these
  *	queues need to have configurable access parameters.
@@ -814,7 +819,9 @@ struct ieee80211_hw {
 	unsigned int extra_tx_headroom;
 	int channel_change_time;
 	int vif_data_size;
-	u16 queues, ampdu_queues;
+	u16 queues;
+	u16 ampdu_queues;
+	u16 max_listen_interval;
 	s8 max_signal;
 };
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a4c5b90de76..0c02c471bca 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1689,6 +1689,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (local->hw.conf.beacon_int < 10)
 		local->hw.conf.beacon_int = 100;
 
+	if (local->hw.max_listen_interval == 0)
+		local->hw.max_listen_interval = 1;
+
+	local->hw.conf.listen_interval = local->hw.max_listen_interval;
+
 	local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
 						  IEEE80211_HW_SIGNAL_DB |
 						  IEEE80211_HW_SIGNAL_DBM) ?
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 591e6331c42..779affd8b8f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -774,7 +774,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
 						   IEEE80211_STYPE_REASSOC_REQ);
 		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.reassoc_req.listen_interval = cpu_to_le16(1);
+		mgmt->u.reassoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
 		memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid,
 		       ETH_ALEN);
 	} else {
@@ -782,7 +783,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
 						   IEEE80211_STYPE_ASSOC_REQ);
 		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.assoc_req.listen_interval = cpu_to_le16(1);
+		mgmt->u.reassoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
 	}
 
 	/* SSID */
-- 
cgit v1.2.3-70-g09d2


From 80693ceb78b08baa3b66a900d9225b2cf9c6f0ed Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Sat, 19 Jul 2008 23:31:17 +0100
Subject: mac80211: automatic IBSS channel selection

When joining an ad-hoc network, the user is currently required to specify
the channel. The network will not be joined otherwise, unless it happens
to be sitting on the currently active channel.

This patch implements automatic channel selection when the user has not
locked the interface onto a specific channel.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 779affd8b8f..5358420d879 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3663,11 +3663,21 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 		       "%s\n", print_mac(mac, bssid),
 		       print_mac(mac2, ifsta->bssid));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
-	if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 &&
-	    (bss = ieee80211_rx_bss_get(dev, bssid,
-					local->hw.conf.channel->center_freq,
-					ifsta->ssid, ifsta->ssid_len))) {
+
+	if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
 		int ret;
+		int search_freq;
+
+		if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL)
+			search_freq = bss->freq;
+		else
+			search_freq = local->hw.conf.channel->center_freq;
+
+		bss = ieee80211_rx_bss_get(dev, bssid, search_freq,
+					   ifsta->ssid, ifsta->ssid_len);
+		if (!bss)
+			goto dont_join;
+
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
 		       " based on configured SSID\n",
 		       dev->name, print_mac(mac, bssid));
@@ -3675,6 +3685,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 		ieee80211_rx_bss_put(local, bss);
 		return ret;
 	}
+
+dont_join:
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "   did not try to join ibss\n");
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
-- 
cgit v1.2.3-70-g09d2


From 4c43e0d0ecd5196ed5c67f64ed2f1860770eed34 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 4 Aug 2008 16:00:39 +0800
Subject: iwlwifi: HW bug fixes

This patch adds few HW bug fixes.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-5000.c | 15 +++++++++++++++
 drivers/net/wireless/iwlwifi/iwl-csr.h  | 10 +++++++++-
 drivers/net/wireless/iwlwifi/iwl-prph.h | 12 +++++++-----
 net/mac80211/mlme.c                     |  2 +-
 4 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index f7bbd12193f..1d793c093f1 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -93,6 +93,13 @@ static int iwl5000_apm_init(struct iwl_priv *priv)
 	iwl_set_bit(priv, CSR_GIO_CHICKEN_BITS,
 		    CSR_GIO_CHICKEN_BITS_REG_BIT_L1A_NO_L0S_RX);
 
+	/* Set FH wait treshold to maximum (HW error during stress W/A) */
+	iwl_set_bit(priv, CSR_DBG_HPET_MEM_REG, CSR_DBG_HPET_MEM_REG_VAL);
+
+	/* enable HAP INTA to move device L1a -> L0s */
+	iwl_set_bit(priv, CSR_HW_IF_CONFIG_REG,
+		    CSR_HW_IF_CONFIG_REG_BIT_HAP_WAKE_L1A);
+
 	iwl_set_bit(priv, CSR_ANA_PLL_CFG, CSR50_ANA_PLL_CFG_VAL);
 
 	/* set "initialization complete" bit to move adapter
@@ -230,6 +237,14 @@ static void iwl5000_nic_config(struct iwl_priv *priv)
 		    CSR_HW_IF_CONFIG_REG_BIT_RADIO_SI |
 		    CSR_HW_IF_CONFIG_REG_BIT_MAC_SI);
 
+	/* W/A : NIC is stuck in a reset state after Early PCIe power off
+	 * (PCIe power is lost before PERST# is asserted),
+	 * causing ME FW to lose ownership and not being able to obtain it back.
+	 */
+	 iwl_set_bits_mask_prph(priv, APMG_PS_CTRL_REG,
+				APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS,
+				~APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS);
+
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-csr.h b/drivers/net/wireless/iwlwifi/iwl-csr.h
index 545ed692d88..52629fbd835 100644
--- a/drivers/net/wireless/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/iwlwifi/iwl-csr.h
@@ -104,6 +104,7 @@
  *  3-2:  0 = A, 1 = B, 2 = C, 3 = D step
  */
 #define CSR_HW_REV_WA_REG	(CSR_BASE+0x22C)
+#define CSR_DBG_HPET_MEM_REG	(CSR_BASE+0x240)
 
 /* Bits for CSR_HW_IF_CONFIG_REG */
 #define CSR49_HW_IF_CONFIG_REG_BIT_4965_R	(0x00000010)
@@ -118,7 +119,12 @@
 #define CSR39_HW_IF_CONFIG_REG_BITS_SILICON_TYPE_A    (0x00000000)
 #define CSR39_HW_IF_CONFIG_REG_BITS_SILICON_TYPE_B    (0x00001000)
 
-#define CSR_HW_IF_CONFIG_REG_BIT_EEPROM_OWN_SEM     (0x00200000)
+#define CSR_HW_IF_CONFIG_REG_BIT_HAP_WAKE_L1A		(0x00080000)
+#define CSR_HW_IF_CONFIG_REG_BIT_EEPROM_OWN_SEM		(0x00200000)
+#define CSR_HW_IF_CONFIG_REG_BIT_PCI_OWN_SEM		(0x00400000)
+#define CSR_HW_IF_CONFIG_REG_BIT_ME_OWN			(0x02000000)
+#define CSR_HW_IF_CONFIG_REG_BIT_WAKE_ME		(0x08000000)
+
 
 /* interrupt flags in INTA, set by uCode or hardware (e.g. dma),
  * acknowledged (reset) by host writing "1" to flagged bits. */
@@ -236,6 +242,8 @@
 #define CSR39_ANA_PLL_CFG_VAL        (0x01000000)
 #define CSR50_ANA_PLL_CFG_VAL        (0x00880300)
 
+/* HPET MEM debug */
+#define CSR_DBG_HPET_MEM_REG_VAL	(0xFFFF0000)
 /*=== HBUS (Host-side Bus) ===*/
 #define HBUS_BASE	(0x400)
 /*
diff --git a/drivers/net/wireless/iwlwifi/iwl-prph.h b/drivers/net/wireless/iwlwifi/iwl-prph.h
index 70d9c7568b9..ee5afd48d3a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/iwlwifi/iwl-prph.h
@@ -84,14 +84,16 @@
 #define APMG_CLK_VAL_DMA_CLK_RQT	(0x00000200)
 #define APMG_CLK_VAL_BSM_CLK_RQT	(0x00000800)
 
-#define APMG_PS_CTRL_VAL_RESET_REQ	(0x04000000)
 
-#define APMG_PCIDEV_STT_VAL_L1_ACT_DIS	(0x00000800)
+#define APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS	(0x00400000)
+#define APMG_PS_CTRL_VAL_RESET_REQ		(0x04000000)
+#define APMG_PS_CTRL_MSK_PWR_SRC		(0x03000000)
+#define APMG_PS_CTRL_VAL_PWR_SRC_VMAIN		(0x00000000)
+#define APMG_PS_CTRL_VAL_PWR_SRC_MAX		(0x01000000) /* 3945 only */
+#define APMG_PS_CTRL_VAL_PWR_SRC_VAUX		(0x02000000)
 
-#define APMG_PS_CTRL_MSK_PWR_SRC              (0x03000000)
-#define APMG_PS_CTRL_VAL_PWR_SRC_VMAIN        (0x00000000)
-#define APMG_PS_CTRL_VAL_PWR_SRC_VAUX         (0x01000000)
 
+#define APMG_PCIDEV_STT_VAL_L1_ACT_DIS		(0x00000800)
 
 /**
  * BSM (Bootstrap State Machine)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5358420d879..e1d11c9b672 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3920,7 +3920,7 @@ done:
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
-		    (!ifsta->state == IEEE80211_IBSS_JOINED &&
+		    (!(ifsta->state == IEEE80211_IBSS_JOINED) &&
 		    !ieee80211_sta_active_ibss(dev)))
 			ieee80211_sta_find_ibss(dev, ifsta);
 	}
-- 
cgit v1.2.3-70-g09d2


From 378a2f090f7a478704a372a4869b8a9ac206234e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 4 Aug 2008 22:31:03 -0700
Subject: net_sched: Add qdisc __NET_XMIT_STOLEN flag

Patrick McHardy <kaber@trash.net> noticed:
"The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters."

and later explained:
"The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS."

David Miller <davem@davemloft.net> noticed:
"Maybe these NET_XMIT_* values being passed around should be a set of
bits. They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack. If it's only about communication within the
qdisc tree, let's simply code it that way."

This patch is trying to realize these ideas.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/sch_generic.h | 14 +++++++++++++-
 net/sched/sch_atm.c       | 12 +++++++-----
 net/sched/sch_cbq.c       | 23 +++++++++++++++--------
 net/sched/sch_dsmark.c    |  8 +++++---
 net/sched/sch_hfsc.c      |  8 +++++---
 net/sched/sch_htb.c       | 18 +++++++++++-------
 net/sched/sch_netem.c     |  3 ++-
 net/sched/sch_prio.c      |  8 +++++---
 net/sched/sch_red.c       |  2 +-
 net/sched/sch_sfq.c       |  2 +-
 net/sched/sch_tbf.c       |  3 ++-
 12 files changed, 68 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee583f642a9..abbf5d52ec8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,7 @@ struct wireless_dev;
 #define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
 					   (TC use only - dev_queue_xmit
 					   returns this as NET_XMIT_SUCCESS) */
+#define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5bb1306505..f15b045a85e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -343,6 +343,18 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->pkt_len;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+/* additional qdisc xmit flags */
+enum net_xmit_qdisc_t {
+	__NET_XMIT_STOLEN = 0x00010000,
+};
+
+#define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
+
+#else
+#define net_xmit_drop_count(e)	(1)
+#endif
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 #ifdef CONFIG_NET_SCHED
@@ -355,7 +367,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
-	return qdisc_enqueue(skb, sch);
+	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6b517b9dac5..27dd773481b 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			kfree_skb(skb);
 			goto drop;
@@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
-		sch->qstats.drops++;
-		if (flow)
-			flow->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			if (flow)
+				flow->qstats.drops++;
+		}
 		return ret;
 	}
 	sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (!ret) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 		p->link.qstats.drops++;
 	}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 14954bf4a68..765ae565900 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		case TC_ACT_RECLASSIFY:
@@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return ret;
 	}
 
-	sch->qstats.drops++;
-	cbq_mark_toplevel(q, cl);
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cbq_mark_toplevel(q, cl);
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 			cbq_activate_class(cl);
 		return 0;
 	}
-	sch->qstats.drops++;
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -664,13 +668,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 	q->rx_class = NULL;
 
 	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+		int ret;
 
 		cbq_mark_toplevel(q, cl);
 
 		q->rx_class = cl;
 		cl->q->__parent = sch;
 
-		if (qdisc_enqueue(skb, cl->q) == 0) {
+		ret = qdisc_enqueue(skb, cl->q);
+		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
 			sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -678,7 +684,8 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 				cbq_activate_class(cl);
 			return 0;
 		}
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return 0;
 	}
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index a935676987e..7170275d9f9 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
 		case TC_ACT_SHOT:
 			goto drop;
@@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
@@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = p->q->ops->requeue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 0ae7d19dcba..5cf9ae71611 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
-		cl->qstats.drops++;
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
 		return err;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 75a40951c4f..538d79b489a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -572,9 +572,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else {
 		cl->bstats.packets +=
@@ -615,10 +617,12 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
 		   NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else
 		htb_activate(q, cl);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a5908570067..6cd6f2bc749 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,8 +240,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
-	} else
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
+	}
 
 	pr_debug("netem: enqueue ret %d\n", ret);
 	return ret;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f849243eb09..adb1a52b77d 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (err) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return ret;
 }
 
@@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->qstats.requeues++;
 		return 0;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return NET_XMIT_DROP;
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3f2d1d7f3bb..5da05839e22 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -97,7 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
 		sch->qstats.drops++;
 	}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da66656..3a456e1b829 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
 		}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b296672f763..7d3b7ff3bf0 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -135,7 +135,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, q->qdisc);
 	if (ret != 0) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return ret;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From c27f339af90bb874a7a9c680b17abfd32d4a727b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 4 Aug 2008 22:39:11 -0700
Subject: net_sched: Add qdisc __NET_XMIT_BYPASS flag

Patrick McHardy <kaber@trash.net> noticed that it would be nice to
handle NET_XMIT_BYPASS by NET_XMIT_SUCCESS with an internal qdisc flag
__NET_XMIT_BYPASS and to remove the mapping from dev_queue_xmit().

David Miller <davem@davemloft.net> spotted a serious bug in the first
version of this patch.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 6 +++---
 net/core/dev.c            | 1 -
 net/sched/sch_atm.c       | 2 +-
 net/sched/sch_cbq.c       | 4 ++--
 net/sched/sch_dsmark.c    | 2 +-
 net/sched/sch_hfsc.c      | 4 ++--
 net/sched/sch_htb.c       | 6 +++---
 net/sched/sch_netem.c     | 2 +-
 net/sched/sch_prio.c      | 6 +++---
 net/sched/sch_sfq.c       | 6 +++---
 10 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f15b045a85e..a7abfda3e44 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -343,14 +343,14 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->pkt_len;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-/* additional qdisc xmit flags */
+/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */
 enum net_xmit_qdisc_t {
 	__NET_XMIT_STOLEN = 0x00010000,
+	__NET_XMIT_BYPASS = 0x00020000,
 };
 
+#ifdef CONFIG_NET_CLS_ACT
 #define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
-
 #else
 #define net_xmit_drop_count(e)	(1)
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index fc6c9881eca..01993ad74e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1805,7 +1805,6 @@ gso:
 
 		spin_unlock(root_lock);
 
-		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 		goto out;
 	}
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 27dd773481b..43d37256c15 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -457,7 +457,7 @@ drop: __maybe_unused
 		return 0;
 	}
 	tasklet_schedule(&p->task);
-	return NET_XMIT_BYPASS;
+	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 }
 
 /*
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 765ae565900..4e261ce62f4 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -230,7 +230,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	    (cl = cbq_class_lookup(q, prio)) != NULL)
 		return cl;
 
-	*qerr = NET_XMIT_BYPASS;
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	for (;;) {
 		int result = 0;
 		defmap = head->defaults;
@@ -377,7 +377,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	q->rx_class = cl;
 #endif
 	if (cl == NULL) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 7170275d9f9..edd1298f85f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -268,7 +268,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 drop:
 	kfree_skb(skb);
 	sch->qstats.drops++;
-	return NET_XMIT_BYPASS;
+	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 }
 
 static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5cf9ae71611..c2b8d9cce3d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1159,7 +1159,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if (cl->level == 0)
 			return cl;
 
-	*qerr = NET_XMIT_BYPASS;
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	tcf = q->root.filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1578,7 +1578,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	cl = hfsc_classify(skb, sch, &err);
 	if (cl == NULL) {
-		if (err == NET_XMIT_BYPASS)
+		if (err & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 538d79b489a..be35422711a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -214,7 +214,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 		return cl;
 
-	*qerr = NET_XMIT_BYPASS;
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	tcf = q->filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -567,7 +567,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 #ifdef CONFIG_NET_CLS_ACT
 	} else if (!cl) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
@@ -612,7 +612,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 #ifdef CONFIG_NET_CLS_ACT
 	} else if (!cl) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6cd6f2bc749..fb0294d0b55 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -176,7 +176,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (count == 0) {
 		sch->qstats.drops++;
 		kfree_skb(skb);
-		return NET_XMIT_BYPASS;
+		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	}
 
 	skb_orphan(skb);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index adb1a52b77d..eac197610ed 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -38,7 +38,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	struct tcf_result res;
 	int err;
 
-	*qerr = NET_XMIT_BYPASS;
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	if (TC_H_MAJ(skb->priority) != sch->handle) {
 		err = tc_classify(skb, q->filter_list, &res);
 #ifdef CONFIG_NET_CLS_ACT
@@ -74,7 +74,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 #ifdef CONFIG_NET_CLS_ACT
 	if (qdisc == NULL) {
 
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
@@ -103,7 +103,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	qdisc = prio_classify(skb, sch, &ret);
 #ifdef CONFIG_NET_CLS_ACT
 	if (qdisc == NULL) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3a456e1b829..6e041d10dbd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -171,7 +171,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 	if (!q->filter_list)
 		return sfq_hash(q, skb) + 1;
 
-	*qerr = NET_XMIT_BYPASS;
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	result = tc_classify(skb, q->filter_list, &res);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -285,7 +285,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
@@ -339,7 +339,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
-		if (ret == NET_XMIT_BYPASS)
+		if (ret & __NET_XMIT_BYPASS)
 			sch->qstats.drops++;
 		kfree_skb(skb);
 		return ret;
-- 
cgit v1.2.3-70-g09d2


From ad619800e4e034cad44299b2a22df9eebb043ac3 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 5 Aug 2008 01:21:22 -0700
Subject: bridge: fix compile warning in net/bridge/br_netfilter.c

This patch fixes the following warning due to incompatible pointer
assignment:

net/bridge/br_netfilter.c: In function 'br_netfilter_rtable_init':
net/bridge/br_netfilter.c:116: warning: assignment from incompatible
pointer type

This warning is due to commit 4adf0af6818f3ea52421dc0bae836cfaf20ef72a
from July 30 (send correct MTU value in PMTU (revised)).

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6e280a8a31e..6a9a6cd74b1 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -113,7 +113,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 	struct rtable *rt = &br->fake_rtable;
 
 	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.dev = &br->dev;
+	rt->u.dst.dev = br->dev;
 	rt->u.dst.path = &rt->u.dst;
 	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
 	rt->u.dst.flags	= DST_NOXFRM;
-- 
cgit v1.2.3-70-g09d2


From ef647f1300d69adb8223d970554d59d7e244db6d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 5 Aug 2008 18:42:51 -0700
Subject: bridge: Eliminate unnecessary forward delay

From: Stephen Hemminger <shemminger@vyatta.com>

Based upon original patch by Herbert Xu, which contained
the following problem description:

--------------------
When the forward delay is set to zero, we still delay the setting
of the forwarding state by one or possibly two timers depending
on whether STP is enabled.  This could either turn out to be
instantaneous, or horribly slow depending on the load of the
machine.

As there is nothing preventing us from enabling forwarding straight
away, this patch eliminates this potential delay by executing the
code directly if the forward delay is zero.

The effect of this problem is that immediately after the carrier
comes on a port, the bridge will drop all packets received from
that port until it enters forwarding mode, thus causing unnecessary
packet loss.

Note that this patch doesn't fully remove the delay due to the
link watcher.  We should also check the carrier state when we
are about to drop an incoming packet because the port is disabled.
But that's for another patch.
--------------------

This version of the fix takes a different approach, in that
it just does the state change directly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 921bbe5cb94..6e63ec3f1fc 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -368,14 +368,25 @@ static void br_make_blocking(struct net_bridge_port *p)
 /* called under bridge lock */
 static void br_make_forwarding(struct net_bridge_port *p)
 {
-	if (p->state == BR_STATE_BLOCKING) {
-		if (p->br->stp_enabled == BR_KERNEL_STP)
-			p->state = BR_STATE_LISTENING;
-		else
-			p->state = BR_STATE_LEARNING;
+	struct net_bridge *br = p->br;
 
-		br_log_state(p);
-		mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay);	}
+	if (p->state != BR_STATE_BLOCKING)
+		return;
+
+	if (br->forward_delay == 0) {
+		p->state = BR_STATE_FORWARDING;
+		br_topology_change_detection(br);
+		del_timer(&p->forward_delay_timer);
+	}
+	else if (p->br->stp_enabled == BR_KERNEL_STP)
+		p->state = BR_STATE_LISTENING;
+	else
+		p->state = BR_STATE_LEARNING;
+
+	br_log_state(p);
+
+	if (br->forward_delay != 0)
+		mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
 }
 
 /* called under bridge lock */
-- 
cgit v1.2.3-70-g09d2


From 1211a64554065316e02b3c62b320088ad4f63260 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Tue, 5 Aug 2008 18:44:26 -0700
Subject: pktgen: random flow

Random flow generation has not worked. This fixes it.

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3284605f2ec..9c87320fdf3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2085,15 +2085,19 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
 		if (pkt_dev->flows[flow].count >= pkt_dev->lflow) {
 			/* reset time */
 			pkt_dev->flows[flow].count = 0;
+			pkt_dev->flows[flow].flags = 0;
 			pkt_dev->curfl += 1;
 			if (pkt_dev->curfl >= pkt_dev->cflows)
 				pkt_dev->curfl = 0; /*reset */
 		}
 	} else {
 		flow = random32() % pkt_dev->cflows;
+		pkt_dev->curfl = flow;
 
-		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
+		if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
 			pkt_dev->flows[flow].count = 0;
+			pkt_dev->flows[flow].flags = 0;
+		}
 	}
 
 	return pkt_dev->curfl;
-- 
cgit v1.2.3-70-g09d2


From ff2a79a5a934fe0dbb136ffad61f79b5c6385614 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Tue, 5 Aug 2008 18:45:05 -0700
Subject: pktgen: mac count

dst_mac_count and src_mac_count patch from Eneas Hunguana
We have sent one mac address to much.

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9c87320fdf3..2498cdaf8cb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2166,7 +2166,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 			mc = random32() % pkt_dev->src_mac_count;
 		else {
 			mc = pkt_dev->cur_src_mac_offset++;
-			if (pkt_dev->cur_src_mac_offset >
+			if (pkt_dev->cur_src_mac_offset >=
 			    pkt_dev->src_mac_count)
 				pkt_dev->cur_src_mac_offset = 0;
 		}
@@ -2193,7 +2193,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 		else {
 			mc = pkt_dev->cur_dst_mac_offset++;
-			if (pkt_dev->cur_dst_mac_offset >
+			if (pkt_dev->cur_dst_mac_offset >=
 			    pkt_dev->dst_mac_count) {
 				pkt_dev->cur_dst_mac_offset = 0;
 			}
-- 
cgit v1.2.3-70-g09d2


From ffb208479bd62ab26c29a242faeb1de1c6d5fcdc Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 5 Aug 2008 18:46:57 -0700
Subject: AX.25: Fix sysctl registration if !CONFIG_AX25_DAMA_SLAVE

Since 49ffcf8f99e8d33ec8afb450956804af518fd788 ("sysctl: update
sysctl_check_table") setting struct ctl_table.procname = NULL does no
longer work as it used to the way the AX.25 code is expecting it to
resulting in the AX.25 sysctl registration code to break if
CONFIG_AX25_DAMA_SLAVE was not set as in some distribution kernels.
Kernel releases from 2.6.24 are affected.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/sysctl_net_ax25.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index f597987b242..f288fc4aef9 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -36,6 +36,7 @@ static struct ctl_path ax25_path[] = {
 	{ .procname = "ax25", .ctl_name = NET_AX25, },
 	{ }
 };
+
 static const ctl_table ax25_param_table[] = {
 	{
 		.ctl_name	= NET_AX25_IP_DEFAULT_MODE,
@@ -167,6 +168,7 @@ static const ctl_table ax25_param_table[] = {
 		.extra1		= &min_proto,
 		.extra2		= &max_proto
 	},
+#ifdef CONFIG_AX25_DAMA_SLAVE
 	{
 		.ctl_name	= NET_AX25_DAMA_SLAVE_TIMEOUT,
 		.procname	= "dama_slave_timeout",
@@ -177,6 +179,8 @@ static const ctl_table ax25_param_table[] = {
 		.extra1		= &min_ds_timeout,
 		.extra2		= &max_ds_timeout
 	},
+#endif
+
 	{ .ctl_name = 0 }	/* that's all, folks! */
 };
 
@@ -210,16 +214,6 @@ void ax25_register_sysctl(void)
 		ax25_table[n].procname     = ax25_dev->dev->name;
 		ax25_table[n].mode         = 0555;
 
-#ifndef CONFIG_AX25_DAMA_SLAVE
-		/*
-		 * We do not wish to have a representation of this parameter
-		 * in /proc/sys/ when configured *not* to include the
-		 * AX.25 DAMA slave code, do we?
-		 */
-
-		child[AX25_VALUES_DS_TIMEOUT].procname = NULL;
-#endif
-
 		child[AX25_MAX_VALUES].ctl_name = 0;	/* just in case... */
 
 		for (k = 0; k < AX25_MAX_VALUES; k++)
-- 
cgit v1.2.3-70-g09d2


From 6d273f8d011c351c9603c1dbfeae2c7458edd30d Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 6 Aug 2008 02:33:49 -0700
Subject: ipv4: replace dst_metric() with dst_mtu() in net/ipv4/route.c.

This patch replaces dst_metric() with dst_mtu() in net/ipv4/route.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1bfa078ddbd..eccb61889df 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1509,14 +1509,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) &&
+					    old_mtu >= dst_mtu(&rth->u.dst) &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) {
-					if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) {
+				if (mtu <= dst_mtu(&rth->u.dst)) {
+					if (mtu < dst_mtu(&rth->u.dst)) {
 						dst_confirm(&rth->u.dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
@@ -1538,7 +1538,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
-	if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 &&
+	if (dst_mtu(dst) > mtu && mtu >= 68 &&
 	    !(dst_metric_locked(dst, RTAX_MTU))) {
 		if (mtu < ip_rt_min_pmtu) {
 			mtu = ip_rt_min_pmtu;
@@ -1667,7 +1667,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 
 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU)
+	if (dst_mtu(&rt->u.dst) > IP_MAX_MTU)
 		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
 	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
 		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
-- 
cgit v1.2.3-70-g09d2


From 1ca615fb816ba85dc765209a9b58ab82cc99bce0 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 6 Aug 2008 02:34:21 -0700
Subject: ipv6: replace dst_metric() with dst_mtu() in net/ipv6/route.c.

This patch replaces dst_metric() with dst_mtu() in net/ipv6/route.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86540b24b27..5a3e87e4b18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1249,7 +1249,7 @@ install_route:
 
 	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!dst_metric(&rt->u.dst, RTAX_MTU))
+	if (!dst_mtu(&rt->u.dst))
 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
 	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-- 
cgit v1.2.3-70-g09d2


From 9714be7da8b32f36d2468fe08ff603b6402df8cf Mon Sep 17 00:00:00 2001
From: Krzysztof Piotr Oledzki <ole@ans.pl>
Date: Wed, 6 Aug 2008 02:35:44 -0700
Subject: netfilter: fix two recent sysctl problems

Starting with 9043476f726802f4b00c96d0c4f418dde48d1304 ("[PATCH]
sanitize proc_sysctl") we have two netfilter releated problems:

 - WARNING: at kernel/sysctl.c:1966 unregister_sysctl_table+0xcc/0x103(),
   caused by wrong order of ini/fini calls

 - net.netfilter is duplicated and has truncated set of records

Thanks to very useful guidelines from Al Viro, this patch fixes both
of them.

Signed-off-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c       |  6 +++---
 net/netfilter/nf_conntrack_standalone.c | 28 +++++++++++++++++-----------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c519d090bdb..9d1830da8e8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1032,10 +1032,10 @@ void nf_conntrack_cleanup(void)
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
 			     nf_conntrack_htable_size);
 
-	nf_conntrack_proto_fini();
-	nf_conntrack_helper_fini();
-	nf_conntrack_expect_fini();
 	nf_conntrack_acct_fini();
+	nf_conntrack_expect_fini();
+	nf_conntrack_helper_fini();
+	nf_conntrack_proto_fini();
 }
 
 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 869ef9349d0..8509db14670 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -324,6 +324,7 @@ static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
 static struct ctl_table_header *nf_ct_sysctl_header;
+static struct ctl_table_header *nf_ct_netfilter_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
 	{
@@ -383,12 +384,6 @@ static ctl_table nf_ct_sysctl_table[] = {
 #define NET_NF_CONNTRACK_MAX 2089
 
 static ctl_table nf_ct_netfilter_table[] = {
-	{
-		.ctl_name	= NET_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-		.child		= nf_ct_sysctl_table,
-	},
 	{
 		.ctl_name	= NET_NF_CONNTRACK_MAX,
 		.procname	= "nf_conntrack_max",
@@ -409,18 +404,29 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
 
 static int nf_conntrack_standalone_init_sysctl(void)
 {
-	nf_ct_sysctl_header =
+	nf_ct_netfilter_header =
 		register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
-	if (nf_ct_sysctl_header == NULL) {
-		printk("nf_conntrack: can't register to sysctl.\n");
-		return -ENOMEM;
-	}
+	if (!nf_ct_netfilter_header)
+		goto out;
+
+	nf_ct_sysctl_header =
+		 register_sysctl_paths(nf_net_netfilter_sysctl_path,
+					nf_ct_sysctl_table);
+	if (!nf_ct_sysctl_header)
+		goto out_unregister_netfilter;
+
 	return 0;
 
+out_unregister_netfilter:
+	unregister_sysctl_table(nf_ct_netfilter_header);
+out:
+	printk("nf_conntrack: can't register to sysctl.\n");
+	return -ENOMEM;
 }
 
 static void nf_conntrack_standalone_fini_sysctl(void)
 {
+	unregister_sysctl_table(nf_ct_netfilter_header);
 	unregister_sysctl_table(nf_ct_sysctl_header);
 }
 #else
-- 
cgit v1.2.3-70-g09d2


From eb49e63093498cd17382018495b8cfb5b4a679bd Mon Sep 17 00:00:00 2001
From: Joakim Koskela <jookos@gmail.com>
Date: Wed, 6 Aug 2008 02:39:30 -0700
Subject: ipsec: Interfamily IPSec BEET

Here's a revised version, based on Herbert's comments, of a fix for
the ipv6-inner, ipv4-outer interfamily ipsec beet mode. It fixes the
network header adjustment in interfamily, and doesn't reserve space
for the pseudo header anymore when we have ipv6 as the inner family.

Signed-off-by: Joakim Koskela <jookos@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c            | 2 +-
 net/ipv4/xfrm4_mode_beet.c | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4e73e5708e7..21515d4c49e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -575,7 +575,7 @@ static int esp_init_state(struct xfrm_state *x)
 			      crypto_aead_ivsize(aead);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
-	else if (x->props.mode == XFRM_MODE_BEET)
+	else if (x->props.mode == XFRM_MODE_BEET && x->sel.family != AF_INET6)
 		x->props.header_len += IPV4_BEET_PHMAXLEN;
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 9c798abce73..63418185f52 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -47,8 +47,10 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (unlikely(optlen))
 		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
 
-	skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len -
-				    hdrlen);
+	skb_set_network_header(skb, -x->props.header_len -
+			            hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
+	if (x->sel.family != AF_INET6)
+		skb->network_header += IPV4_BEET_PHMAXLEN;
 	skb->mac_header = skb->network_header +
 			  offsetof(struct iphdr, protocol);
 	skb->transport_header = skb->network_header + sizeof(*top_iph);
-- 
cgit v1.2.3-70-g09d2


From abf5cdb89d09ca981db10e1a85fd8531440165f2 Mon Sep 17 00:00:00 2001
From: Joakim Koskela <jookos@gmail.com>
Date: Wed, 6 Aug 2008 02:40:25 -0700
Subject: ipsec: Interfamily IPSec BEET, ipv4-inner ipv6-outer

Here's a revised version, based on Herbert's comments, of a fix for
the ipv4-inner, ipv6-outer interfamily ipsec beet mode. It fixes the
network header adjustment during interfamily, as well as makes sure
that we reserve enough room for the new ipv6 header if we might have
something else as the inner family. Also, the ipv4 pseudo header
construction was added.

Signed-off-by: Joakim Koskela <jookos@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/esp6.c            |  4 ++++
 net/ipv6/xfrm6_mode_beet.c | 29 ++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c6bb4c6d24b..b181b08fb76 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -521,6 +521,10 @@ static int esp6_init_state(struct xfrm_state *x)
 			      crypto_aead_ivsize(aead);
 	switch (x->props.mode) {
 	case XFRM_MODE_BEET:
+		if (x->sel.family != AF_INET6)
+			x->props.header_len += IPV4_BEET_PHMAXLEN +
+				               (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
+		break;
 	case XFRM_MODE_TRANSPORT:
 		break;
 	case XFRM_MODE_TUNNEL:
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index d6ce400f585..bbd48b101ba 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -40,16 +40,39 @@ static void xfrm6_beet_make_header(struct sk_buff *skb)
 static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *top_iph;
-
-	skb_set_network_header(skb, -x->props.header_len);
+	struct ip_beet_phdr *ph;
+	struct iphdr *iphv4;
+	int optlen, hdr_len;
+
+	iphv4 = ip_hdr(skb);
+	hdr_len = 0;
+	optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+	if (unlikely(optlen))
+		hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+	skb_set_network_header(skb, -x->props.header_len - hdr_len);
+	if (x->sel.family != AF_INET6)
+		skb->network_header += IPV4_BEET_PHMAXLEN;
 	skb->mac_header = skb->network_header +
 			  offsetof(struct ipv6hdr, nexthdr);
 	skb->transport_header = skb->network_header + sizeof(*top_iph);
-	__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl);
+	ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len);
 
 	xfrm6_beet_make_header(skb);
 
 	top_iph = ipv6_hdr(skb);
+	if (unlikely(optlen)) {
+
+		BUG_ON(optlen < 0);
+
+		ph->padlen = 4 - (optlen & 4);
+		ph->hdrlen = optlen / 8;
+		ph->nexthdr = top_iph->nexthdr;
+		if (ph->padlen)
+			memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+		top_iph->nexthdr = IPPROTO_BEETPH;
+	}
 
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
-- 
cgit v1.2.3-70-g09d2


From 11d46123bfea068a48483f00518d301f452647fb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 6 Aug 2008 18:30:43 -0700
Subject: ipv4: Fix over-ifdeffing of ip_static_sysctl_init.

Noticed by Paulius Zaleckas.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index eccb61889df..16fc6f454a3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3223,9 +3223,7 @@ int __init ip_rt_init(void)
  */
 void __init ip_static_sysctl_init(void)
 {
-#ifdef CONFIG_SYSCTL
 	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
-#endif
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From ee7af8264dafa0c8c76a8dc596803966c2e29ebc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 6 Aug 2008 23:35:59 -0700
Subject: pkt_sched: Fix "parent is root" test in qdisc_create().

As noticed by Stephen Hemminger, the root qdisc is denoted by
TC_H_ROOT, not zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4840aff4725..83b23b55ce3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -792,7 +792,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out3;
 			}
 		}
-		if (parent && !(sch->flags & TCQ_F_INGRESS))
+		if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
 			list_add_tail(&sch->list, &dev_queue->qdisc->list);
 
 		return sch;
-- 
cgit v1.2.3-70-g09d2


From 6edafaaf6f5e70ef1e620ff01bd6bacebe1e0718 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 6 Aug 2008 23:50:04 -0700
Subject: tcp: Fix kernel panic when calling tcp_v(4/6)_md5_do_lookup

If the following packet flow happen, kernel will panic.
MathineA			MathineB
		SYN
	---------------------->
        	SYN+ACK
	<----------------------
		ACK(bad seq)
	---------------------->
When a bad seq ACK is received, tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr))
is finally called by tcp_v4_reqsk_send_ack(), but the first parameter(skb->sk) is
NULL at that moment, so kernel panic happens.
This patch fixes this bug.

OOPS output is as following:
[  302.812793] IP: [<c05cfaa6>] tcp_v4_md5_do_lookup+0x12/0x42
[  302.817075] Oops: 0000 [#1] SMP
[  302.819815] Modules linked in: ipv6 loop dm_multipath rtc_cmos rtc_core rtc_lib pcspkr pcnet32 mii i2c_piix4 parport_pc i2c_core parport ac button ata_piix libata dm_mod mptspi mptscsih mptbase scsi_transport_spi sd_mod scsi_mod crc_t10dif ext3 jbd mbcache uhci_hcd ohci_hcd ehci_hcd [last unloaded: scsi_wait_scan]
[  302.849946]
[  302.851198] Pid: 0, comm: swapper Not tainted (2.6.27-rc1-guijf #5)
[  302.855184] EIP: 0060:[<c05cfaa6>] EFLAGS: 00010296 CPU: 0
[  302.858296] EIP is at tcp_v4_md5_do_lookup+0x12/0x42
[  302.861027] EAX: 0000001e EBX: 00000000 ECX: 00000046 EDX: 00000046
[  302.864867] ESI: ceb69e00 EDI: 1467a8c0 EBP: cf75f180 ESP: c0792e54
[  302.868333]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[  302.871287] Process swapper (pid: 0, ti=c0792000 task=c0712340 task.ti=c0746000)
[  302.875592] Stack: c06f413a 00000000 cf75f180 ceb69e00 00000000 c05d0d86 000016d0 ceac5400
[  302.883275]        c05d28f8 000016d0 ceb69e00 ceb69e20 681bf6e3 00001000 00000000 0a67a8c0
[  302.890971]        ceac5400 c04250a3 c06f413a c0792eb0 c0792edc cf59a620 cf59a620 cf59a634
[  302.900140] Call Trace:
[  302.902392]  [<c05d0d86>] tcp_v4_reqsk_send_ack+0x17/0x35
[  302.907060]  [<c05d28f8>] tcp_check_req+0x156/0x372
[  302.910082]  [<c04250a3>] printk+0x14/0x18
[  302.912868]  [<c05d0aa1>] tcp_v4_do_rcv+0x1d3/0x2bf
[  302.917423]  [<c05d26be>] tcp_v4_rcv+0x563/0x5b9
[  302.920453]  [<c05bb20f>] ip_local_deliver_finish+0xe8/0x183
[  302.923865]  [<c05bb10a>] ip_rcv_finish+0x286/0x2a3
[  302.928569]  [<c059e438>] dev_alloc_skb+0x11/0x25
[  302.931563]  [<c05a211f>] netif_receive_skb+0x2d6/0x33a
[  302.934914]  [<d0917941>] pcnet32_poll+0x333/0x680 [pcnet32]
[  302.938735]  [<c05a3b48>] net_rx_action+0x5c/0xfe
[  302.941792]  [<c042856b>] __do_softirq+0x5d/0xc1
[  302.944788]  [<c042850e>] __do_softirq+0x0/0xc1
[  302.948999]  [<c040564b>] do_softirq+0x55/0x88
[  302.951870]  [<c04501b1>] handle_fasteoi_irq+0x0/0xa4
[  302.954986]  [<c04284da>] irq_exit+0x35/0x69
[  302.959081]  [<c0405717>] do_IRQ+0x99/0xae
[  302.961896]  [<c040422b>] common_interrupt+0x23/0x28
[  302.966279]  [<c040819d>] default_idle+0x2a/0x3d
[  302.969212]  [<c0402552>] cpu_idle+0xb2/0xd2
[  302.972169]  =======================
[  302.974274] Code: fc ff 84 d2 0f 84 df fd ff ff e9 34 fe ff ff 83 c4 0c 5b 5e 5f 5d c3 90 90 57 89 d7 56 53 89 c3 50 68 3a 41 6f c0 e8 e9 55 e5 ff <8b> 93 9c 04 00 00 58 85 d2 59 74 1e 8b 72 10 31 db 31 c9 85 f6
[  303.011610] EIP: [<c05cfaa6>] tcp_v4_md5_do_lookup+0x12/0x42 SS:ESP 0068:c0792e54
[  303.018360] Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 2 +-
 net/dccp/dccp.h            | 3 ++-
 net/dccp/minisocks.c       | 3 ++-
 net/ipv4/tcp_ipv4.c        | 4 ++--
 net/ipv4/tcp_minisocks.c   | 2 +-
 net/ipv6/tcp_ipv6.c        | 8 +++++---
 6 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 8d6e991ef4d..cac811e51f6 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -33,7 +33,7 @@ struct request_sock_ops {
 	struct kmem_cache	*slab;
 	int		(*rtx_syn_ack)(struct sock *sk,
 				       struct request_sock *req);
-	void		(*send_ack)(struct sk_buff *skb,
+	void		(*send_ack)(struct sock *sk, struct sk_buff *skb,
 				    struct request_sock *req);
 	void		(*send_reset)(struct sock *sk,
 				      struct sk_buff *skb);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1c2e3ec2eb5..b4bc6e095a0 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -229,7 +229,8 @@ extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 extern int  dccp_retransmit_skb(struct sock *sk);
 
 extern void dccp_send_ack(struct sock *sk);
-extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
+extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+				struct request_sock *rsk);
 
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 66dca5bba85..b2804e2d1b8 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -296,7 +296,8 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 
 EXPORT_SYMBOL_GPL(dccp_child_process);
 
-void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk)
+void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+			 struct request_sock *rsk)
 {
 	DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 91a8cfddf1c..44c1e934824 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -687,14 +687,14 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	inet_twsk_put(tw);
 }
 
-static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
+static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent,
 			0,
-			tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
+			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr));
 }
 
 /*
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 204c4216266..6d286f58c00 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -609,7 +609,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
 		/* Out of window: send ACK and drop. */
 		if (!(flg & TCP_FLAG_RST))
-			req->rsk_ops->send_ack(skb, req);
+			req->rsk_ops->send_ack(sk, skb, req);
 		if (paws_reject)
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 		return NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78185a40921..5b90b369ccb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,7 +69,8 @@
 #include <linux/scatterlist.h>
 
 static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
-static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
+static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+				      struct request_sock *req);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
@@ -1138,10 +1139,11 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	inet_twsk_put(tw);
 }
 
-static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
+static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+				  struct request_sock *req)
 {
 	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
-			tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr));
+			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From cc9bd5cebc0825e0fabc0186ab85806a0891104f Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:00 -0700
Subject: net/core: Uninline skb_bond().

Otherwise subsequent changes need multiple return values.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 01993ad74e7..4a09833331f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1939,22 +1939,6 @@ int netif_rx_ni(struct sk_buff *skb)
 
 EXPORT_SYMBOL(netif_rx_ni);
 
-static inline struct net_device *skb_bond(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-
-	if (dev->master) {
-		if (skb_bond_should_drop(skb)) {
-			kfree_skb(skb);
-			return NULL;
-		}
-		skb->dev = dev->master;
-	}
-
-	return dev;
-}
-
-
 static void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -2194,10 +2178,14 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->iif)
 		skb->iif = skb->dev->ifindex;
 
-	orig_dev = skb_bond(skb);
-
-	if (!orig_dev)
-		return NET_RX_DROP;
+	orig_dev = skb->dev;
+	if (orig_dev->master) {
+		if (skb_bond_should_drop(skb)) {
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+		skb->dev = orig_dev->master;
+	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-- 
cgit v1.2.3-70-g09d2


From 0d7a3681232f545c6a59f77e60f7667673ef0e93 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:01 -0700
Subject: net/core: Allow certain receives on inactive slave.

Allow a packet_type that specifies the exact device to receive
even on an inactive bonding slave devices.  This is important for some
L2 protocols such as LLDP and FCoE.  This can eventually be used
for the bonding special cases as well.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4a09833331f..dab97c7cf27 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2165,6 +2165,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
+	struct net_device *null_or_orig;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -2178,13 +2179,13 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->iif)
 		skb->iif = skb->dev->ifindex;
 
+	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb)) {
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-		skb->dev = orig_dev->master;
+		if (skb_bond_should_drop(skb))
+			null_or_orig = orig_dev; /* deliver only exact match */
+		else
+			skb->dev = orig_dev->master;
 	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
@@ -2209,7 +2210,7 @@ int netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev) {
+		if (ptype->dev == null_or_orig || ptype->dev == skb->dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -2234,7 +2235,7 @@ ncls:
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
-		    (!ptype->dev || ptype->dev == skb->dev)) {
+		    (ptype->dev == null_or_orig || ptype->dev == skb->dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.3-70-g09d2


From f982307f22db96201e41540295f24e8dcc10c78f Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:02 -0700
Subject: net/core: Allow receive on active slaves.

If a packet_type specifies an active slave to bonding and not just any
interface, allow it to receive frames that came in on that interface.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index dab97c7cf27..600bb23c4c2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2210,7 +2210,8 @@ int netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (ptype->dev == null_or_orig || ptype->dev == skb->dev) {
+		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+		    ptype->dev == orig_dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -2235,7 +2236,8 @@ ncls:
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
-		    (ptype->dev == null_or_orig || ptype->dev == skb->dev)) {
+		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+		     ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.3-70-g09d2


From e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Thu, 7 Aug 2008 02:23:01 -0700
Subject: pktgen: multiqueue etc.

Sofar far pktgen have had a restriction to only use one device per kernel
thread. With the new multiqueue architecture this is no longer adequate.

The patch below is an effort to remove this by in pktgen configuration
adding a tag to  the device name a la eth0@0 etc. The tag is used for
usual device config just as before. Also a new flag is introduced to mirror
queue_map with sending threads smp_processor_id() QUEUE_MAP_CPU.

An example: We use 4 CPU's to send to one 10g interface (eth0)
 and we use the new tagging to send a mix of packet sizes, 64, 576 and
 1500 bytes. Also we use TX queues according to smp_processor_id()

 PGDEV=/proc/net/pktgen/kpktgend_0
 pgset "add_device eth0@0"

 PGDEV=/proc/net/pktgen/kpktgend_1
 pgset "add_device eth0@1"

 PGDEV=/proc/net/pktgen/kpktgend_2
 pgset "add_device eth0@2"

 PGDEV=/proc/net/pktgen/kpktgend_3
 pgset "add_device eth0@3"
....
PGDEV=/proc/net/pktgen/eth0@0
pgset "pkt_size 64"
pgset "flag QUEUE_MAP_CPU"

PGDEV=/proc/net/pktgen/eth0@1
pgset "pkt_size 572"
pgset "flag QUEUE_MAP_CPU"

PGDEV=/proc/net/pktgen/eth0@2
pgset "pkt_size 1496"

PGDEV=/proc/net/pktgen/eth0@3
pgset "pkt_size 1496"
pgset "flag QUEUE_MAP_CPU"

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2498cdaf8cb..52623645390 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -168,7 +168,7 @@
 #include <asm/div64.h>		/* do_div */
 #include <asm/timex.h>
 
-#define VERSION  "pktgen v2.69: Packet Generator for packet performance testing.\n"
+#define VERSION  "pktgen v2.70: Packet Generator for packet performance testing.\n"
 
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
@@ -189,6 +189,7 @@
 #define F_FLOW_SEQ    (1<<11)	/* Sequential flows */
 #define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
+#define F_QUEUE_MAP_CPU (1<<14)	/* queue map mirrors smp_processor_id() */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (1<<0)
@@ -621,6 +622,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_QUEUE_MAP_RND)
 		seq_printf(seq,  "QUEUE_MAP_RND  ");
 
+	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
+		seq_printf(seq,  "QUEUE_MAP_CPU  ");
+
 	if (pkt_dev->cflows) {
 		if (pkt_dev->flags & F_FLOW_SEQ)
 			seq_printf(seq,  "FLOW_SEQ  "); /*in sequence flows*/
@@ -1134,6 +1138,12 @@ static ssize_t pktgen_if_write(struct file *file,
 
 		else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
 			pkt_dev->flags &= ~F_QUEUE_MAP_RND;
+
+		else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
+			pkt_dev->flags |= F_QUEUE_MAP_CPU;
+
+		else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
+			pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
 #ifdef CONFIG_XFRM
 		else if (strcmp(f, "IPSEC") == 0)
 			pkt_dev->flags |= F_IPSEC_ON;
@@ -1895,6 +1905,23 @@ static int pktgen_device_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, const char *ifname)
+{
+	char b[IFNAMSIZ+5];
+	int i = 0;
+
+	for(i=0; ifname[i] != '@'; i++) {
+		if(i == IFNAMSIZ)
+			break;
+
+		b[i] = ifname[i];
+	}
+	b[i] = 0;
+
+	return dev_get_by_name(&init_net, b);
+}
+
+
 /* Associate pktgen_dev with a device. */
 
 static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
@@ -1908,7 +1935,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 		pkt_dev->odev = NULL;
 	}
 
-	odev = dev_get_by_name(&init_net, ifname);
+	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
 	if (!odev) {
 		printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
@@ -2129,7 +2156,11 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 #endif
 static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
 {
-	if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+
+	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
+		pkt_dev->cur_queue_map = smp_processor_id();
+
+	else if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
 		__u16 t;
 		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
 			t = random32() %
-- 
cgit v1.2.3-70-g09d2


From e32f85f7b917456265d4c30d15f734c4912cfa6a Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Tue, 5 Aug 2008 19:34:52 +0200
Subject: mac80211: fix use of skb->cb for mesh forwarding

Now we deal with mesh forwarding before the 802.11->802.3 conversion, thus
eliminating a few unnecessary steps. The next hop lookup is called from
ieee80211_master_start_xmit() instead of subif_start_xmit(). Until the next hop
is found, RA in the frame will be all zeroes for frames originating from the
device. For forwarded frames, RA will contain the TA of the received frame,
which will be necessary to send a path error if a next hop is not found.

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.h         |   5 +-
 net/mac80211/mesh_hwmp.c    |  19 ++++----
 net/mac80211/mesh_pathtbl.c |  11 ++---
 net/mac80211/rx.c           | 116 +++++++++++++++++++++++++-------------------
 net/mac80211/tx.c           |  45 +++++++++--------
 5 files changed, 106 insertions(+), 90 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 669eafafe49..7495fbb0d21 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -214,8 +214,7 @@ void ieee80211s_stop(void);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
 
 /* Mesh paths */
-int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb,
-		struct net_device *dev);
+int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev);
 void mesh_path_start_discovery(struct net_device *dev);
 struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev);
 struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev);
@@ -286,6 +285,4 @@ static inline void mesh_path_activate(struct mesh_path *mpath)
 #define mesh_allocated	0
 #endif
 
-#define MESH_PREQ(skb)	(skb->cb + 30)
-
 #endif /* IEEE80211S_H */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 7fa149e230e..08aca446ca0 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -758,29 +758,30 @@ enddiscovery:
 /**
  * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame
  *
- * @next_hop: output argument for next hop address
- * @skb: frame to be sent
+ * @skb: 802.11 frame to be sent
  * @dev: network device the frame will be sent through
+ * @fwd_frame: true if this frame was originally from a different host
  *
  * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is
  * found, the function will start a path discovery and queue the frame so it is
  * sent when the path is resolved. This means the caller must not free the skb
  * in this case.
  */
-int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb,
-		struct net_device *dev)
+int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sk_buff *skb_to_free = NULL;
 	struct mesh_path *mpath;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	u8 *dst_addr = hdr->addr3;
 	int err = 0;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(skb->data, dev);
+	mpath = mesh_path_lookup(dst_addr, dev);
 
 	if (!mpath) {
-		mesh_path_add(skb->data, dev);
-		mpath = mesh_path_lookup(skb->data, dev);
+		mesh_path_add(dst_addr, dev);
+		mpath = mesh_path_lookup(dst_addr, dev);
 		if (!mpath) {
 			dev_kfree_skb(skb);
 			sdata->u.sta.mshstats.dropped_frames_no_route++;
@@ -792,13 +793,13 @@ int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb,
 	if (mpath->flags & MESH_PATH_ACTIVE) {
 		if (time_after(jiffies, mpath->exp_time -
 			msecs_to_jiffies(sdata->u.sta.mshcfg.path_refresh_time))
-				&& skb->pkt_type != PACKET_OTHERHOST
+				&& !memcmp(dev->dev_addr, hdr->addr4, ETH_ALEN)
 				&& !(mpath->flags & MESH_PATH_RESOLVING)
 				&& !(mpath->flags & MESH_PATH_FIXED)) {
 			mesh_queue_preq(mpath,
 					PREQ_Q_F_START | PREQ_Q_F_REFRESH);
 		}
-		memcpy(next_hop, mpath->next_hop->addr,
+		memcpy(hdr->addr1, mpath->next_hop->addr,
 				ETH_ALEN);
 	} else {
 		if (!(mpath->flags & MESH_PATH_RESOLVING)) {
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 5f88a2e6ee5..838ee60492a 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -388,18 +388,15 @@ void mesh_path_tx_pending(struct mesh_path *mpath)
 void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct mesh_path *mpath;
 	u32 dsn = 0;
 
-	if (skb->pkt_type == PACKET_OTHERHOST) {
-		struct ieee80211s_hdr *prev_meshhdr;
-		int mshhdrlen;
+	if (memcmp(hdr->addr4, dev->dev_addr, ETH_ALEN) != 0) {
 		u8 *ra, *da;
 
-		prev_meshhdr = ((struct ieee80211s_hdr *)skb->cb);
-		mshhdrlen = ieee80211_get_mesh_hdrlen(prev_meshhdr);
-		da = skb->data;
-		ra = MESH_PREQ(skb);
+		da = hdr->addr3;
+		ra = hdr->addr2;
 		mpath = mesh_path_lookup(da, dev);
 		if (mpath)
 			dsn = ++mpath->dsn;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6d9ae67c27c..6db85450519 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1109,20 +1109,9 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 
 	hdrlen = ieee80211_get_hdrlen(fc);
 
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		int meshhdrlen = ieee80211_get_mesh_hdrlen(
+	if (ieee80211_vif_is_mesh(&sdata->vif))
+		hdrlen += ieee80211_get_mesh_hdrlen(
 				(struct ieee80211s_hdr *) (skb->data + hdrlen));
-		/* Copy on cb:
-		 *  - mesh header: to be used for mesh forwarding
-		 * decision. It will also be used as mesh header template at
-		 * tx.c:ieee80211_subif_start_xmit() if interface
-		 * type is mesh and skb->pkt_type == PACKET_OTHERHOST
-		 *  - ta: to be used if a RERR needs to be sent.
-		 */
-		memcpy(skb->cb, skb->data + hdrlen, meshhdrlen);
-		memcpy(MESH_PREQ(skb), hdr->addr2, ETH_ALEN);
-		hdrlen += meshhdrlen;
-	}
 
 	/* convert IEEE 802.11 header + possible LLC headers into Ethernet
 	 * header
@@ -1269,38 +1258,6 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 		}
 	}
 
-	/* Mesh forwarding */
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		u8 *mesh_ttl = &((struct ieee80211s_hdr *)skb->cb)->ttl;
-		(*mesh_ttl)--;
-
-		if (is_multicast_ether_addr(skb->data)) {
-			if (*mesh_ttl > 0) {
-				xmit_skb = skb_copy(skb, GFP_ATOMIC);
-				if (xmit_skb)
-					xmit_skb->pkt_type = PACKET_OTHERHOST;
-				else if (net_ratelimit())
-					printk(KERN_DEBUG "%s: failed to clone "
-					       "multicast frame\n", dev->name);
-			} else
-				IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta,
-							     dropped_frames_ttl);
-		} else if (skb->pkt_type != PACKET_OTHERHOST &&
-			compare_ether_addr(dev->dev_addr, skb->data) != 0) {
-			if (*mesh_ttl == 0) {
-				IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta,
-							     dropped_frames_ttl);
-				dev_kfree_skb(skb);
-				skb = NULL;
-			} else {
-				xmit_skb = skb;
-				xmit_skb->pkt_type = PACKET_OTHERHOST;
-				if (!(dev->flags & IFF_PROMISC))
-					skb  = NULL;
-			}
-		}
-	}
-
 	if (skb) {
 		/* deliver to local stack */
 		skb->protocol = eth_type_trans(skb, dev);
@@ -1430,6 +1387,63 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_hdr *hdr;
+	struct ieee80211s_hdr *mesh_hdr;
+	unsigned int hdrlen;
+	struct sk_buff *skb = rx->skb, *fwd_skb;
+
+	hdr = (struct ieee80211_hdr *) skb->data;
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
+
+	if (!ieee80211_is_data(hdr->frame_control))
+		return RX_CONTINUE;
+
+	if (!mesh_hdr->ttl)
+		/* illegal frame */
+		return RX_DROP_MONITOR;
+
+	if (compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0)
+		return RX_CONTINUE;
+
+	mesh_hdr->ttl--;
+
+	if (rx->flags & IEEE80211_RX_RA_MATCH) {
+		if (!mesh_hdr->ttl)
+			IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.sta,
+						     dropped_frames_ttl);
+		else {
+			struct ieee80211_hdr *fwd_hdr;
+			fwd_skb = skb_copy(skb, GFP_ATOMIC);
+
+			if (!fwd_skb && net_ratelimit())
+				printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
+						   rx->dev->name);
+
+			fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
+			/*
+			 * Save TA to addr1 to send TA a path error if a
+			 * suitable next hop is not found
+			 */
+			memcpy(fwd_hdr->addr1, fwd_hdr->addr2, ETH_ALEN);
+			memcpy(fwd_hdr->addr2, rx->dev->dev_addr, ETH_ALEN);
+			fwd_skb->dev = rx->local->mdev;
+			fwd_skb->iif = rx->dev->ifindex;
+			dev_queue_xmit(fwd_skb);
+		}
+	}
+
+	if (is_multicast_ether_addr(hdr->addr3) ||
+	    rx->dev->flags & IFF_PROMISC)
+		return RX_CONTINUE;
+	else
+		return RX_DROP_MONITOR;
+}
+
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 {
@@ -1663,10 +1677,12 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 	rx->sdata = sdata;
 	rx->dev = sdata->dev;
 
-#define CALL_RXH(rxh)		\
-	res = rxh(rx);		\
-	if (res != RX_CONTINUE)	\
-		goto rxh_done;
+#define CALL_RXH(rxh)			\
+	do {				\
+		res = rxh(rx);		\
+		if (res != RX_CONTINUE)	\
+			goto rxh_done;  \
+	} while (0);
 
 	CALL_RXH(ieee80211_rx_h_passive_scan)
 	CALL_RXH(ieee80211_rx_h_check)
@@ -1678,6 +1694,8 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 	/* must be after MMIC verify so header is counted in MPDU mic */
 	CALL_RXH(ieee80211_rx_h_remove_qos_control)
 	CALL_RXH(ieee80211_rx_h_amsdu)
+	if (ieee80211_vif_is_mesh(&sdata->vif))
+		CALL_RXH(ieee80211_rx_h_mesh_fwding);
 	CALL_RXH(ieee80211_rx_h_data)
 	CALL_RXH(ieee80211_rx_h_ctrl)
 	CALL_RXH(ieee80211_rx_h_mgmt)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 771ec68b848..4788f7b91f4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1301,6 +1301,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct net_device *odev = NULL;
 	struct ieee80211_sub_if_data *osdata;
 	int headroom;
@@ -1328,6 +1329,20 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
+	if (ieee80211_vif_is_mesh(&osdata->vif) &&
+	    ieee80211_is_data(hdr->frame_control)) {
+		if (ieee80211_is_data(hdr->frame_control)) {
+			if (is_multicast_ether_addr(hdr->addr3))
+				memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
+			else
+				if (mesh_nexthop_lookup(skb, odev))
+					return  0;
+			if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
+				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.sta,
+							     fwded_frames);
+		}
+	}
+
 	may_encrypt = !skb->do_not_encrypt;
 
 	headroom = osdata->local->tx_headroom;
@@ -1472,30 +1487,17 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	case IEEE80211_IF_TYPE_MESH_POINT:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
-		if (is_multicast_ether_addr(skb->data))
-			memcpy(hdr.addr1, skb->data, ETH_ALEN);
-		else if (mesh_nexthop_lookup(hdr.addr1, skb, dev))
-				return 0;
+		memset(hdr.addr1, 0, ETH_ALEN);
 		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
 		memcpy(hdr.addr3, skb->data, ETH_ALEN);
 		memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
-		if (skb->pkt_type == PACKET_OTHERHOST) {
-			/* Forwarded frame, keep mesh ttl and seqnum */
-			struct ieee80211s_hdr *prev_meshhdr;
-			prev_meshhdr = ((struct ieee80211s_hdr *)skb->cb);
-			meshhdrlen = ieee80211_get_mesh_hdrlen(prev_meshhdr);
-			memcpy(&mesh_hdr, prev_meshhdr, meshhdrlen);
-			sdata->u.sta.mshstats.fwded_frames++;
-		} else {
-			if (!sdata->u.sta.mshcfg.dot11MeshTTL) {
-				/* Do not send frames with mesh_ttl == 0 */
-				sdata->u.sta.mshstats.dropped_frames_ttl++;
-				ret = 0;
-				goto fail;
-			}
-			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
-							       sdata);
+		if (!sdata->u.sta.mshcfg.dot11MeshTTL) {
+			/* Do not send frames with mesh_ttl == 0 */
+			sdata->u.sta.mshstats.dropped_frames_ttl++;
+			ret = 0;
+			goto fail;
 		}
+		meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata);
 		hdrlen = 30;
 		break;
 #endif
@@ -1543,7 +1545,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	 * Drop unicast frames to unauthorised stations unless they are
 	 * EAPOL frames from the local station.
 	 */
-	if (unlikely(!is_multicast_ether_addr(hdr.addr1) &&
+	if (!ieee80211_vif_is_mesh(&sdata->vif) &&
+		unlikely(!is_multicast_ether_addr(hdr.addr1) &&
 		      !(sta_flags & WLAN_STA_AUTHORIZED) &&
 		      !(ethertype == ETH_P_PAE &&
 		       compare_ether_addr(dev->dev_addr,
-- 
cgit v1.2.3-70-g09d2


From 8dbc1722a78343eb80f0ce1a3ef1965a9774ad5b Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Wed, 6 Aug 2008 13:17:54 +0200
Subject: mac80211: keep mesh ifaces in allmulti mode

Currently a mesh node will not forward a multicast frame if it is not subscribed
to the specific multicast address. This patch addresses the issue and fixes mesh
multicast forwarding.

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0c02c471bca..aa5a191598c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -245,10 +245,13 @@ static int ieee80211_open(struct net_device *dev)
 	case IEEE80211_IF_TYPE_AP:
 		sdata->bss = &sdata->u.ap;
 		break;
+	case IEEE80211_IF_TYPE_MESH_POINT:
+		/* mesh ifaces must set allmulti to forward mcast traffic */
+		atomic_inc(&local->iff_allmultis);
+		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_MNTR:
 	case IEEE80211_IF_TYPE_IBSS:
-	case IEEE80211_IF_TYPE_MESH_POINT:
 		/* no special treatment */
 		break;
 	case IEEE80211_IF_TYPE_INVALID:
@@ -495,6 +498,9 @@ static int ieee80211_stop(struct net_device *dev)
 		netif_addr_unlock_bh(local->mdev);
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
+		/* allmulti is always set on mesh ifaces */
+		atomic_dec(&local->iff_allmultis);
+		/* fall through */
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		sdata->u.sta.state = IEEE80211_DISABLED;
-- 
cgit v1.2.3-70-g09d2


From 28111eb2f5087c5aa5ec3697388f6c7d354b2ad8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 7 Aug 2008 22:26:54 +0200
Subject: [Bluetooth] Add parameters to control BNEP header compression

The Bluetooth qualification for PAN demands testing with BNEP header
compression disabled. This is actually pretty stupid and the Linux
implementation outsmarts the test system since it compresses whenever
possible. So to pass qualification two need parameters have been added
to control the compression of source and destination headers.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/bnep/core.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 021172c0e66..12bba6207a8 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -57,7 +57,10 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "1.2"
+#define VERSION "1.3"
+
+static int compress_src = 1;
+static int compress_dst = 1;
 
 static LIST_HEAD(bnep_session_list);
 static DECLARE_RWSEM(bnep_session_sem);
@@ -418,10 +421,10 @@ static inline int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
 	iv[il++] = (struct kvec) { &type, 1 };
 	len++;
 
-	if (!compare_ether_addr(eh->h_dest, s->eh.h_source))
+	if (compress_src && !compare_ether_addr(eh->h_dest, s->eh.h_source))
 		type |= 0x01;
 
-	if (!compare_ether_addr(eh->h_source, s->eh.h_dest))
+	if (compress_dst && !compare_ether_addr(eh->h_source, s->eh.h_dest))
 		type |= 0x02;
 
 	if (type)
@@ -727,6 +730,12 @@ static void __exit bnep_exit(void)
 module_init(bnep_init);
 module_exit(bnep_exit);
 
+module_param(compress_src, bool, 0644);
+MODULE_PARM_DESC(compress_src, "Compress sources headers");
+
+module_param(compress_dst, bool, 0644);
+MODULE_PARM_DESC(compress_dst, "Compress destination headers");
+
 MODULE_AUTHOR("David Libault <david.libault@inventel.fr>, Maxim Krasnyansky <maxk@qualcomm.com>");
 MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.3-70-g09d2


From 827ebd6410005b05b3c930ef6a116666c6986886 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 7 Aug 2008 20:26:40 -0700
Subject: pkt_sched: Fix qdisc config when link is down.

Bug reported by Stephen Hemminger.

We need to fetch the root from ->qdisc_sleeping not ->qdisc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 83b23b55ce3..ba1d121f312 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -189,7 +189,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		struct Qdisc *q, *txq_root = txq->qdisc;
+		struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
 
 		if (!(txq_root->flags & TCQ_F_BUILTIN) &&
 		    txq_root->handle == handle)
@@ -793,7 +793,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			}
 		}
 		if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
-			list_add_tail(&sch->list, &dev_queue->qdisc->list);
+			list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
 
 		return sch;
 	}
@@ -1236,11 +1236,11 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		q_idx = 0;
 
 		dev_queue = netdev_get_tx_queue(dev, 0);
-		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
 		dev_queue = &dev->rx_queue;
-		if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
 cont:
-- 
cgit v1.2.3-70-g09d2


From 2aaab9a0ccfd2ccf1c957cc2120da8d5593955c5 Mon Sep 17 00:00:00 2001
From: Adam Langley <agl@imperialviolet.org>
Date: Thu, 7 Aug 2008 20:27:45 -0700
Subject: tcp: (whitespace only) fix confusing indentation

The indentation in part of tcp_minisocks makes it look like one of the if
statements is much more important than it actually is.

Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 138 +++++++++++++++++++++++------------------------
 1 file changed, 68 insertions(+), 70 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6d286f58c00..f976fc57892 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -618,89 +618,87 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	/* In sequence, PAWS is OK. */
 
 	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
-			req->ts_recent = tmp_opt.rcv_tsval;
+		req->ts_recent = tmp_opt.rcv_tsval;
 
-		if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
-			/* Truncate SYN, it is out of window starting
-			   at tcp_rsk(req)->rcv_isn + 1. */
-			flg &= ~TCP_FLAG_SYN;
-		}
+	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
+		/* Truncate SYN, it is out of window starting
+		   at tcp_rsk(req)->rcv_isn + 1. */
+		flg &= ~TCP_FLAG_SYN;
+	}
 
-		/* RFC793: "second check the RST bit" and
-		 *	   "fourth, check the SYN bit"
-		 */
-		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
-			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
-			goto embryonic_reset;
-		}
+	/* RFC793: "second check the RST bit" and
+	 *	   "fourth, check the SYN bit"
+	 */
+	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
+		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+		goto embryonic_reset;
+	}
 
-		/* ACK sequence verified above, just make sure ACK is
-		 * set.  If ACK not set, just silently drop the packet.
-		 */
-		if (!(flg & TCP_FLAG_ACK))
-			return NULL;
-
-		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-			inet_rsk(req)->acked = 1;
-			return NULL;
-		}
+	/* ACK sequence verified above, just make sure ACK is
+	 * set.  If ACK not set, just silently drop the packet.
+	 */
+	if (!(flg & TCP_FLAG_ACK))
+		return NULL;
 
-		/* OK, ACK is valid, create big socket and
-		 * feed this segment to it. It will repeat all
-		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
-		 * ESTABLISHED STATE. If it will be dropped after
-		 * socket is created, wait for troubles.
-		 */
-		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
-								 req, NULL);
-		if (child == NULL)
-			goto listen_overflow;
+	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+	if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+		inet_rsk(req)->acked = 1;
+		return NULL;
+	}
+
+	/* OK, ACK is valid, create big socket and
+	 * feed this segment to it. It will repeat all
+	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+	 * ESTABLISHED STATE. If it will be dropped after
+	 * socket is created, wait for troubles.
+	 */
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+	if (child == NULL)
+		goto listen_overflow;
 #ifdef CONFIG_TCP_MD5SIG
-		else {
-			/* Copy over the MD5 key from the original socket */
-			struct tcp_md5sig_key *key;
-			struct tcp_sock *tp = tcp_sk(sk);
-			key = tp->af_specific->md5_lookup(sk, child);
-			if (key != NULL) {
-				/*
-				 * We're using one, so create a matching key on the
-				 * newsk structure. If we fail to get memory then we
-				 * end up not copying the key across. Shucks.
-				 */
-				char *newkey = kmemdup(key->key, key->keylen,
-						       GFP_ATOMIC);
-				if (newkey) {
-					if (!tcp_alloc_md5sig_pool())
-						BUG();
-					tp->af_specific->md5_add(child, child,
-								 newkey,
-								 key->keylen);
-				}
+	else {
+		/* Copy over the MD5 key from the original socket */
+		struct tcp_md5sig_key *key;
+		struct tcp_sock *tp = tcp_sk(sk);
+		key = tp->af_specific->md5_lookup(sk, child);
+		if (key != NULL) {
+			/*
+			 * We're using one, so create a matching key on the
+			 * newsk structure. If we fail to get memory then we
+			 * end up not copying the key across. Shucks.
+			 */
+			char *newkey = kmemdup(key->key, key->keylen,
+					       GFP_ATOMIC);
+			if (newkey) {
+				if (!tcp_alloc_md5sig_pool())
+					BUG();
+				tp->af_specific->md5_add(child, child, newkey,
+							 key->keylen);
 			}
 		}
+	}
 #endif
 
-		inet_csk_reqsk_queue_unlink(sk, req, prev);
-		inet_csk_reqsk_queue_removed(sk, req);
+	inet_csk_reqsk_queue_unlink(sk, req, prev);
+	inet_csk_reqsk_queue_removed(sk, req);
 
-		inet_csk_reqsk_queue_add(sk, req, child);
-		return child;
+	inet_csk_reqsk_queue_add(sk, req, child);
+	return child;
 
-	listen_overflow:
-		if (!sysctl_tcp_abort_on_overflow) {
-			inet_rsk(req)->acked = 1;
-			return NULL;
-		}
+listen_overflow:
+	if (!sysctl_tcp_abort_on_overflow) {
+		inet_rsk(req)->acked = 1;
+		return NULL;
+	}
 
-	embryonic_reset:
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
-		if (!(flg & TCP_FLAG_RST))
-			req->rsk_ops->send_reset(sk, skb);
+embryonic_reset:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+	if (!(flg & TCP_FLAG_RST))
+		req->rsk_ops->send_reset(sk, skb);
 
-		inet_csk_reqsk_queue_drop(sk, req, prev);
-		return NULL;
+	inet_csk_reqsk_queue_drop(sk, req, prev);
+	return NULL;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 76aab2c1eae491a5d73ac83deec97dd28ebac584 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Thu, 7 Aug 2008 20:37:22 -0700
Subject: pkt_sched: Fix actions referencing

When an action is added several times with the same exact index
it gets deleted on every even-numbered attempt.
This fixes that issue.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d308c19aa3f..26c7e1f9a35 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -205,10 +205,9 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
 {
 	struct tcf_common *p = NULL;
 	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
-		if (bind) {
+		if (bind)
 			p->tcfc_bindcnt++;
-			p->tcfc_refcnt++;
-		}
+		p->tcfc_refcnt++;
 		a->priv = p;
 	}
 	return p;
-- 
cgit v1.2.3-70-g09d2


From 8123b421e8ed944671d7241323ed3198cccb4041 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 Aug 2008 23:23:39 -0700
Subject: pkt_sched: Fix ingress deletion and filter attachment.

Based upon bug reports by Stephen Hemminger.

We still had some cases using ->qdisc instead of ->qdisc_sleeping.

Also, qdisc_lookup() should return ingress qdiscs.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ba1d121f312..bbf149dd781 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -183,6 +183,21 @@ EXPORT_SYMBOL(unregister_qdisc);
    (root qdisc, all its children, children of children etc.)
  */
 
+struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
+{
+	struct Qdisc *q;
+
+	if (!(root->flags & TCQ_F_BUILTIN) &&
+	    root->handle == handle)
+		return root;
+
+	list_for_each_entry(q, &root->list, list) {
+		if (q->handle == handle)
+			return q;
+	}
+	return NULL;
+}
+
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	unsigned int i;
@@ -191,16 +206,11 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 		struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
 
-		if (!(txq_root->flags & TCQ_F_BUILTIN) &&
-		    txq_root->handle == handle)
-			return txq_root;
-
-		list_for_each_entry(q, &txq_root->list, list) {
-			if (q->handle == handle)
-				return q;
-		}
+		q = qdisc_match_from_root(txq_root, handle);
+		if (q)
+			return q;
 	}
-	return NULL;
+	return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
 }
 
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
@@ -908,7 +918,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /* ingress */
-				q = dev->rx_queue.qdisc;
+				q = dev->rx_queue.qdisc_sleeping;
 			}
 		} else {
 			struct netdev_queue *dev_queue;
@@ -978,7 +988,7 @@ replay:
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /*ingress */
-				q = dev->rx_queue.qdisc;
+				q = dev->rx_queue.qdisc_sleeping;
 			}
 		} else {
 			struct netdev_queue *dev_queue;
@@ -1529,11 +1539,11 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	t = 0;
 
 	dev_queue = netdev_get_tx_queue(dev, 0);
-	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
 	dev_queue = &dev->rx_queue;
-	if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
+	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
 done:
-- 
cgit v1.2.3-70-g09d2


From d97106ea52aa57e63ff40d04479016836bbb5a4e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 9 Aug 2008 00:35:05 -0700
Subject: udp: Drop socket lock for encapsulated packets

The socket lock is there to protect the normal UDP receive path.
Encapsulation UDP sockets don't need that protection.  In fact
the locking is deadly for them as they may contain another UDP
packet within, possibly with the same addresses.

Also the nested bit was copied from TCP.  TCP needs it because
of accept(2) spawning sockets.  This simply doesn't apply to UDP
so I've removed it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 6 ++++--
 net/ipv6/udp.c | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 383d17359d0..8e42fbbd576 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -989,7 +989,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		    up->encap_rcv != NULL) {
 			int ret;
 
+			bh_unlock_sock(sk);
 			ret = (*up->encap_rcv)(sk, skb);
+			bh_lock_sock(sk);
 			if (ret <= 0) {
 				UDP_INC_STATS_BH(sock_net(sk),
 						 UDP_MIB_INDATAGRAMS,
@@ -1092,7 +1094,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 			if (skb1) {
 				int ret = 0;
 
-				bh_lock_sock_nested(sk);
+				bh_lock_sock(sk);
 				if (!sock_owned_by_user(sk))
 					ret = udp_queue_rcv_skb(sk, skb1);
 				else
@@ -1194,7 +1196,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 	if (sk != NULL) {
 		int ret = 0;
-		bh_lock_sock_nested(sk);
+		bh_lock_sock(sk);
 		if (!sock_owned_by_user(sk))
 			ret = udp_queue_rcv_skb(sk, skb);
 		else
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d1477b350f7..a6aecf76a71 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -379,7 +379,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 					uh->source, saddr, dif))) {
 		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
 		if (buff) {
-			bh_lock_sock_nested(sk2);
+			bh_lock_sock(sk2);
 			if (!sock_owned_by_user(sk2))
 				udpv6_queue_rcv_skb(sk2, buff);
 			else
@@ -387,7 +387,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 			bh_unlock_sock(sk2);
 		}
 	}
-	bh_lock_sock_nested(sk);
+	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
 	else
@@ -508,7 +508,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 	/* deliver */
 
-	bh_lock_sock_nested(sk);
+	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
 	else
-- 
cgit v1.2.3-70-g09d2


From bc0fde2fad007a81ecffceb25a893a6c3f1ed767 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 09:14:05 +0000
Subject: ipvs: Fix possible deadlock in sync code

Commit 998e7a76804b7a273a0460c2cdd5a51fa9856717 ("ipvs: Use kthread_run()
instead of doing a double-fork via kernel_thread()") introduced a possible
deadlock in the sync code. We need to use the _bh versions for the lock, as the
lock is also accessed from a bottom half.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 45e9bd96c28..a652da2c320 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -904,9 +904,9 @@ int stop_sync_thread(int state)
 		 * progress of stopping the master sync daemon.
 		 */
 
-		spin_lock(&ip_vs_sync_lock);
+		spin_lock_bh(&ip_vs_sync_lock);
 		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock(&ip_vs_sync_lock);
+		spin_unlock_bh(&ip_vs_sync_lock);
 		kthread_stop(sync_master_thread);
 		sync_master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-- 
cgit v1.2.3-70-g09d2


From 8ab19ea36c5c5340ff598e4d15fc084eb65671dc Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 09:17:59 +0000
Subject: ipvs: Fix possible deadlock in estimator code

There is a slight chance for a deadlock in the estimator code. We can't call
del_timer_sync() while holding our lock, as the timer might be active and
spinning for the lock on another cpu. Work around this issue by using
try_to_del_timer_sync() and releasing the lock. We could actually delete the
timer outside of our lock, as the add and kill functions are only every called
from userspace via [gs]etsockopt() and are serialized by a mutex, but better
make this explicit.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Cc: stable <stable@kernel.org>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_est.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index bc04eedd6db..1d6e58e502f 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -170,8 +170,11 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats)
 		kfree(est);
 		killed++;
 	}
-	if (killed && est_list == NULL)
-		del_timer_sync(&est_timer);
+	while (killed && !est_list && try_to_del_timer_sync(&est_timer) < 0) {
+		write_unlock_bh(&est_lock);
+		cpu_relax();
+		write_lock_bh(&est_lock);
+	}
 	write_unlock_bh(&est_lock);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 66a0be47200fff30f8c482ea584052c6affb08cb Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 09:18:02 +0000
Subject: ipvs: Use list_empty() instead of open-coding the same functionality

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sched.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index b6476730985..a46ad9e3501 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -184,7 +184,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 
 	write_lock_bh(&__ip_vs_sched_lock);
 
-	if (scheduler->n_list.next != &scheduler->n_list) {
+	if (!list_empty(&scheduler->n_list)) {
 		write_unlock_bh(&__ip_vs_sched_lock);
 		ip_vs_use_count_dec();
 		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
@@ -229,7 +229,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	}
 
 	write_lock_bh(&__ip_vs_sched_lock);
-	if (scheduler->n_list.next == &scheduler->n_list) {
+	if (list_empty(&scheduler->n_list)) {
 		write_unlock_bh(&__ip_vs_sched_lock);
 		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
 			  "is not in the list. failed\n", scheduler->name);
-- 
cgit v1.2.3-70-g09d2


From d149ccc9cf85cdf089c1b2189ade111305712b0c Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 09:18:02 +0000
Subject: ipvs: Initialize schedulers' struct list_head at compile time

No need to do it at runtime and this saves a couple of bytes in the text
section.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_dh.c    | 2 +-
 net/ipv4/ipvs/ip_vs_lblc.c  | 2 +-
 net/ipv4/ipvs/ip_vs_lblcr.c | 2 +-
 net/ipv4/ipvs/ip_vs_lc.c    | 2 +-
 net/ipv4/ipvs/ip_vs_nq.c    | 2 +-
 net/ipv4/ipvs/ip_vs_rr.c    | 2 +-
 net/ipv4/ipvs/ip_vs_sed.c   | 2 +-
 net/ipv4/ipvs/ip_vs_sh.c    | 2 +-
 net/ipv4/ipvs/ip_vs_wlc.c   | 2 +-
 net/ipv4/ipvs/ip_vs_wrr.c   | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 8afc1503ed2..fa66824d264 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -233,6 +233,7 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
 	.name =			"dh",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 	.init_service =		ip_vs_dh_init_svc,
 	.done_service =		ip_vs_dh_done_svc,
 	.update_service =	ip_vs_dh_update_svc,
@@ -242,7 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
 
 static int __init ip_vs_dh_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 0efa3db4b18..7a6a319f544 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -539,6 +539,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.name =			"lblc",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
 	.init_service =		ip_vs_lblc_init_svc,
 	.done_service =		ip_vs_lblc_done_svc,
 	.update_service =	ip_vs_lblc_update_svc,
@@ -550,7 +551,6 @@ static int __init ip_vs_lblc_init(void)
 {
 	int ret;
 
-	INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
 	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	if (ret)
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 8e3bbeb4513..c234e73968a 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -728,6 +728,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.name =			"lblcr",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
 	.init_service =		ip_vs_lblcr_init_svc,
 	.done_service =		ip_vs_lblcr_done_svc,
 	.update_service =	ip_vs_lblcr_update_svc,
@@ -739,7 +740,6 @@ static int __init ip_vs_lblcr_init(void)
 {
 	int ret;
 
-	INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
 	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	if (ret)
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index ac9f08e065d..ebcdbf75ac6 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -98,6 +98,7 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
 	.name =			"lc",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
 	.init_service =		ip_vs_lc_init_svc,
 	.done_service =		ip_vs_lc_done_svc,
 	.update_service =	ip_vs_lc_update_svc,
@@ -107,7 +108,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
 
 static int __init ip_vs_lc_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index a46bf258d42..92f3a677003 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -136,6 +136,7 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
 	.name =			"nq",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
 	.init_service =		ip_vs_nq_init_svc,
 	.done_service =		ip_vs_nq_done_svc,
 	.update_service =	ip_vs_nq_update_svc,
@@ -145,7 +146,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
 
 static int __init ip_vs_nq_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index c8db12d39e6..358110d17e5 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -94,6 +94,7 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 	.name =			"rr",			/* name */
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
 	.init_service =		ip_vs_rr_init_svc,
 	.done_service =		ip_vs_rr_done_svc,
 	.update_service =	ip_vs_rr_update_svc,
@@ -102,7 +103,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 
 static int __init ip_vs_rr_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 2a7d3135818..77663d84cbd 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -138,6 +138,7 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
 	.name =			"sed",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
 	.init_service =		ip_vs_sed_init_svc,
 	.done_service =		ip_vs_sed_done_svc,
 	.update_service =	ip_vs_sed_update_svc,
@@ -147,7 +148,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
 
 static int __init ip_vs_sed_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index b8fdfac6500..7b979e22805 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -230,6 +230,7 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
 	.name =			"sh",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
 	.init_service =		ip_vs_sh_init_svc,
 	.done_service =		ip_vs_sh_done_svc,
 	.update_service =	ip_vs_sh_update_svc,
@@ -239,7 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
 
 static int __init ip_vs_sh_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 772c3cb4eca..9b0ef86bb1f 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -126,6 +126,7 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
 	.name =			"wlc",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
 	.init_service =		ip_vs_wlc_init_svc,
 	.done_service =		ip_vs_wlc_done_svc,
 	.update_service =	ip_vs_wlc_update_svc,
@@ -135,7 +136,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
 
 static int __init ip_vs_wlc_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 1d6932d7dc9..0d86a79b87b 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -212,6 +212,7 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
 	.name =			"wrr",
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
 	.init_service =		ip_vs_wrr_init_svc,
 	.done_service =		ip_vs_wrr_done_svc,
 	.update_service =	ip_vs_wrr_update_svc,
@@ -220,7 +221,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
 
 static int __init ip_vs_wrr_init(void)
 {
-	INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
 	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 048cf48b897bcae9e6fa8b46b6976dab5e710e3c Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 18:24:35 +0000
Subject: ipvs: Annotate init functions with __init

Being able to discard these functions saves a couple of bytes at runtime. The
cleanup functions can't be annotated with __exit as they are also called from
init functions.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_app.c   | 2 +-
 net/ipv4/ipvs/ip_vs_conn.c  | 2 +-
 net/ipv4/ipvs/ip_vs_ctl.c   | 2 +-
 net/ipv4/ipvs/ip_vs_proto.c | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 1f1897a1a70..201b8ea3020 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -608,7 +608,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 }
 
 
-int ip_vs_app_init(void)
+int __init ip_vs_app_init(void)
 {
 	/* we will replace it with proc_net_ipvs_create() soon */
 	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index f8bdae47a77..44a6872dc24 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -965,7 +965,7 @@ static void ip_vs_conn_flush(void)
 }
 
 
-int ip_vs_conn_init(void)
+int __init ip_vs_conn_init(void)
 {
 	int idx;
 
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9a5ace0b4dd..df13333813a 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -2306,7 +2306,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 };
 
 
-int ip_vs_control_init(void)
+int __init ip_vs_control_init(void)
 {
 	int ret;
 	int idx;
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 876714f23d6..6099a88fc20 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -43,7 +43,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
 /*
  *	register an ipvs protocol
  */
-static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp)
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
 {
 	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
 
@@ -190,7 +190,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
 }
 
 
-int ip_vs_protocol_init(void)
+int __init ip_vs_protocol_init(void)
 {
 	char protocols[64];
 #define REGISTER_PROTOCOL(p)			\
-- 
cgit v1.2.3-70-g09d2


From 5587da55fbf332ab8d1b37637536f94bc373867f Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 18:24:40 +0000
Subject: ipvs: Mark net_vs_ctl_path const

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h       | 2 +-
 net/ipv4/ipvs/ip_vs_ctl.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e980416bff8..c8ee9b89b02 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -620,7 +620,7 @@ extern int sysctl_ip_vs_expire_quiescent_template;
 extern int sysctl_ip_vs_sync_threshold[2];
 extern int sysctl_ip_vs_nat_icmp_send;
 extern struct ip_vs_stats ip_vs_stats;
-extern struct ctl_path net_vs_ctl_path[];
+extern const struct ctl_path net_vs_ctl_path[];
 
 extern struct ip_vs_service *
 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index df13333813a..999d884e886 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1589,7 +1589,7 @@ static struct ctl_table vs_vars[] = {
 	{ .ctl_name = 0 }
 };
 
-struct ctl_path net_vs_ctl_path[] = {
+const struct ctl_path net_vs_ctl_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
 	{ .procname = "vs", },
-- 
cgit v1.2.3-70-g09d2


From 3a14a313f9b406c37ab7e3f855b060eb8587b8c7 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 18:24:41 +0000
Subject: ipvs: Embed estimator object into stats object

There's no reason for dynamically allocating an estimator object for every
stats object. Directly embed an estimator object into every stats object and
switch to using the kernel-provided list implementation. This makes the code
much simpler and faster, as we do not need to traverse the list of all
estimators to find the one belonging to a stats object. There's no need to use
an rwlock, as we only have one reader. Also reorder the members of the
estimator structure slightly to avoid padding overhead. This can't be done
with the stats object as the members are currently copied to our user space
object via memcpy() and changing it would break ABI.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h       |  28 ++++++++++-
 net/ipv4/ipvs/ip_vs_ctl.c |   2 +-
 net/ipv4/ipvs/ip_vs_est.c | 117 +++++++++++++++-------------------------------
 3 files changed, 65 insertions(+), 82 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c8ee9b89b02..7312c3dd309 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -140,8 +140,24 @@ struct ip_vs_seq {
 
 
 /*
- *	IPVS statistics object
+ *	IPVS statistics objects
  */
+struct ip_vs_estimator {
+	struct list_head	list;
+
+	u64			last_inbytes;
+	u64			last_outbytes;
+	u32			last_conns;
+	u32			last_inpkts;
+	u32			last_outpkts;
+
+	u32			cps;
+	u32			inpps;
+	u32			outpps;
+	u32			inbps;
+	u32			outbps;
+};
+
 struct ip_vs_stats
 {
 	__u32                   conns;          /* connections scheduled */
@@ -156,7 +172,15 @@ struct ip_vs_stats
 	__u32			inbps;		/* current in byte rate */
 	__u32			outbps;		/* current out byte rate */
 
+	/*
+	 * Don't add anything before the lock, because we use memcpy() to copy
+	 * the members before the lock to struct ip_vs_stats_user in
+	 * ip_vs_ctl.c.
+	 */
+
 	spinlock_t              lock;           /* spin lock */
+
+	struct ip_vs_estimator	est;		/* estimator */
 };
 
 struct dst_entry;
@@ -659,7 +683,7 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
 /*
  *      IPVS rate estimator prototypes (from ip_vs_est.c)
  */
-extern int ip_vs_new_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
 extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
 
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 999d884e886..d651bce0549 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -684,8 +684,8 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
 	spin_lock_bh(&stats->lock);
 	memset(stats, 0, (char *)&stats->lock - (char *)stats);
-	spin_unlock_bh(&stats->lock);
 	ip_vs_zero_estimator(stats);
+	spin_unlock_bh(&stats->lock);
 }
 
 /*
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 1d6e58e502f..5a20f93bd7f 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
+#include <linux/list.h>
 
 #include <net/ip_vs.h>
 
@@ -44,28 +45,11 @@
  */
 
 
-struct ip_vs_estimator
-{
-	struct ip_vs_estimator	*next;
-	struct ip_vs_stats	*stats;
-
-	u32			last_conns;
-	u32			last_inpkts;
-	u32			last_outpkts;
-	u64			last_inbytes;
-	u64			last_outbytes;
-
-	u32			cps;
-	u32			inpps;
-	u32			outpps;
-	u32			inbps;
-	u32			outbps;
-};
+static void estimation_timer(unsigned long arg);
 
-
-static struct ip_vs_estimator *est_list = NULL;
-static DEFINE_RWLOCK(est_lock);
-static struct timer_list est_timer;
+static LIST_HEAD(est_list);
+static DEFINE_SPINLOCK(est_lock);
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
 
 static void estimation_timer(unsigned long arg)
 {
@@ -76,9 +60,9 @@ static void estimation_timer(unsigned long arg)
 	u64 n_inbytes, n_outbytes;
 	u32 rate;
 
-	read_lock(&est_lock);
-	for (e = est_list; e; e = e->next) {
-		s = e->stats;
+	spin_lock(&est_lock);
+	list_for_each_entry(e, &est_list, list) {
+		s = container_of(e, struct ip_vs_stats, est);
 
 		spin_lock(&s->lock);
 		n_conns = s->conns;
@@ -114,19 +98,16 @@ static void estimation_timer(unsigned long arg)
 		s->outbps = (e->outbps+0xF)>>5;
 		spin_unlock(&s->lock);
 	}
-	read_unlock(&est_lock);
+	spin_unlock(&est_lock);
 	mod_timer(&est_timer, jiffies + 2*HZ);
 }
 
-int ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct ip_vs_stats *stats)
 {
-	struct ip_vs_estimator *est;
+	struct ip_vs_estimator *est = &stats->est;
 
-	est = kzalloc(sizeof(*est), GFP_KERNEL);
-	if (est == NULL)
-		return -ENOMEM;
+	INIT_LIST_HEAD(&est->list);
 
-	est->stats = stats;
 	est->last_conns = stats->conns;
 	est->cps = stats->cps<<10;
 
@@ -142,62 +123,40 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
 	est->last_outbytes = stats->outbytes;
 	est->outbps = stats->outbps<<5;
 
-	write_lock_bh(&est_lock);
-	est->next = est_list;
-	if (est->next == NULL) {
-		setup_timer(&est_timer, estimation_timer, 0);
-		est_timer.expires = jiffies + 2*HZ;
-		add_timer(&est_timer);
-	}
-	est_list = est;
-	write_unlock_bh(&est_lock);
-	return 0;
+	spin_lock_bh(&est_lock);
+	if (list_empty(&est_list))
+		mod_timer(&est_timer, jiffies + 2 * HZ);
+	list_add(&est->list, &est_list);
+	spin_unlock_bh(&est_lock);
 }
 
 void ip_vs_kill_estimator(struct ip_vs_stats *stats)
 {
-	struct ip_vs_estimator *est, **pest;
-	int killed = 0;
-
-	write_lock_bh(&est_lock);
-	pest = &est_list;
-	while ((est=*pest) != NULL) {
-		if (est->stats != stats) {
-			pest = &est->next;
-			continue;
-		}
-		*pest = est->next;
-		kfree(est);
-		killed++;
-	}
-	while (killed && !est_list && try_to_del_timer_sync(&est_timer) < 0) {
-		write_unlock_bh(&est_lock);
+	struct ip_vs_estimator *est = &stats->est;
+
+	spin_lock_bh(&est_lock);
+	list_del(&est->list);
+	while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
+		spin_unlock_bh(&est_lock);
 		cpu_relax();
-		write_lock_bh(&est_lock);
+		spin_lock_bh(&est_lock);
 	}
-	write_unlock_bh(&est_lock);
+	spin_unlock_bh(&est_lock);
 }
 
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 {
-	struct ip_vs_estimator *e;
-
-	write_lock_bh(&est_lock);
-	for (e = est_list; e; e = e->next) {
-		if (e->stats != stats)
-			continue;
-
-		/* set counters zero */
-		e->last_conns = 0;
-		e->last_inpkts = 0;
-		e->last_outpkts = 0;
-		e->last_inbytes = 0;
-		e->last_outbytes = 0;
-		e->cps = 0;
-		e->inpps = 0;
-		e->outpps = 0;
-		e->inbps = 0;
-		e->outbps = 0;
-	}
-	write_unlock_bh(&est_lock);
+	struct ip_vs_estimator *est = &stats->est;
+
+	/* set counters zero, caller must hold the stats->lock lock */
+	est->last_inbytes = 0;
+	est->last_outbytes = 0;
+	est->last_conns = 0;
+	est->last_inpkts = 0;
+	est->last_outpkts = 0;
+	est->cps = 0;
+	est->inpps = 0;
+	est->outpps = 0;
+	est->inbps = 0;
+	est->outbps = 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 519e49e888458649dde453d36c08b7f3432525dc Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 10 Aug 2008 18:24:41 +0000
Subject: ipvs: No need to zero out ip_vs_stats during initialization

It's a global variable and automatically initialized to zero. And now we can
also initialize the lock at compile time.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index d651bce0549..cfb1d20993d 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1784,7 +1784,9 @@ static const struct file_operations ip_vs_info_fops = {
 
 #endif
 
-struct ip_vs_stats ip_vs_stats;
+struct ip_vs_stats ip_vs_stats = {
+	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
+};
 
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
@@ -2333,8 +2335,6 @@ int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
 	}
 
-	memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
-	spin_lock_init(&ip_vs_stats.lock);
 	ip_vs_new_estimator(&ip_vs_stats);
 
 	/* Hook the defense timer */
-- 
cgit v1.2.3-70-g09d2


From e93615d0866a974afc7148172f8382e2af48c985 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 11 Aug 2008 17:19:14 +1000
Subject: ipvs: Explictly clear ip_vs_stats members

In order to align the coding styles of ip_vs_zero_stats() and
its child-function ip_vs_zero_estimator(), clear ip_vs_stats
members explicitlty rather than doing a limited memset().

This was chosen over modifying ip_vs_zero_estimator() to use
memset() as it is more robust against changes in members
in the relevant structures. memset() would be prefered if
all members of the structure were to be cleared.

Cc: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index cfb1d20993d..6379705a8dc 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -683,8 +683,21 @@ static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
 	spin_lock_bh(&stats->lock);
-	memset(stats, 0, (char *)&stats->lock - (char *)stats);
+
+	stats->conns = 0;
+	stats->inpkts = 0;
+	stats->outpkts = 0;
+	stats->inbytes = 0;
+	stats->outbytes = 0;
+
+	stats->cps = 0;
+	stats->inpps = 0;
+	stats->outpps = 0;
+	stats->inbps = 0;
+	stats->outbps = 0;
+
 	ip_vs_zero_estimator(stats);
+
 	spin_unlock_bh(&stats->lock);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1cfa26661a85549063e369e2b40275eeaa7b923c Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 11 Aug 2008 18:11:06 -0700
Subject: pkt_sched: Add BH protection for qdisc_stab_lock.

Since qdisc_stab_lock is used in qdisc_put_stab(), which is called in
BH context from __qdisc_destroy() RCU callback, softirq safe locking
is needed.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bbf149dd781..c25465e5607 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -331,7 +331,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (!s || tsize != s->tsize || (!tab && tsize > 0))
 		return ERR_PTR(-EINVAL);
 
-	spin_lock(&qdisc_stab_lock);
+	spin_lock_bh(&qdisc_stab_lock);
 
 	list_for_each_entry(stab, &qdisc_stab_list, list) {
 		if (memcmp(&stab->szopts, s, sizeof(*s)))
@@ -339,11 +339,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
 			continue;
 		stab->refcnt++;
-		spin_unlock(&qdisc_stab_lock);
+		spin_unlock_bh(&qdisc_stab_lock);
 		return stab;
 	}
 
-	spin_unlock(&qdisc_stab_lock);
+	spin_unlock_bh(&qdisc_stab_lock);
 
 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
 	if (!stab)
@@ -354,9 +354,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (tsize > 0)
 		memcpy(stab->data, tab, tsize * sizeof(u16));
 
-	spin_lock(&qdisc_stab_lock);
+	spin_lock_bh(&qdisc_stab_lock);
 	list_add_tail(&stab->list, &qdisc_stab_list);
-	spin_unlock(&qdisc_stab_lock);
+	spin_unlock_bh(&qdisc_stab_lock);
 
 	return stab;
 }
@@ -366,14 +366,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 	if (!tab)
 		return;
 
-	spin_lock(&qdisc_stab_lock);
+	spin_lock_bh(&qdisc_stab_lock);
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
 		kfree(tab);
 	}
 
-	spin_unlock(&qdisc_stab_lock);
+	spin_unlock_bh(&qdisc_stab_lock);
 }
 EXPORT_SYMBOL(qdisc_put_stab);
 
-- 
cgit v1.2.3-70-g09d2


From 5e0115e500fe9dd2ca11e6f92db9123204f1327a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 13 Aug 2008 01:58:57 -0700
Subject: ipv6: Fix OOPS, ip -f inet6 route get fec0::1, linux-2.6.26,
 ip6_route_output, rt6_fill_node+0x175

Alexey Dobriyan wrote:
> On Thu, Aug 07, 2008 at 07:00:56PM +0200, John Gumb wrote:
>> Scenario: no ipv6 default route set.
>
>> # ip -f inet6 route get fec0::1
>>
>> BUG: unable to handle kernel NULL pointer dereference at 00000000
>> IP: [<c0369b85>] rt6_fill_node+0x175/0x3b0
>> EIP is at rt6_fill_node+0x175/0x3b0
>
> 0xffffffff80424dd3 is in rt6_fill_node (net/ipv6/route.c:2191).
> 2186                    } else
> 2187    #endif
> 2188                            NLA_PUT_U32(skb, RTA_IIF, iif);
> 2189            } else if (dst) {
> 2190                    struct in6_addr saddr_buf;
> 2191      ====>         if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
>					       ^^^^^^^^^^^^^^^^^^^^^^^^
>											NULL
>
> 2192                                           dst, 0, &saddr_buf) == 0)
> 2193                            NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
> 2194            }

The commit that changed this can't be reverted easily, but the patch
below works for me.

Fix NULL de-reference in rt6_fill_node() when there's no IPv6 input
device present in the dst entry.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5a3e87e4b18..41b165ffb36 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2187,8 +2187,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
+		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
 		struct in6_addr saddr_buf;
-		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+		if (ipv6_dev_get_saddr(idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
-- 
cgit v1.2.3-70-g09d2


From 6ced0b3f1e1c089caf8798485423a093744b6a48 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 13 Aug 2008 02:32:06 -0700
Subject: net/tipc/subscr.c: don't use ___constant_swab32

It's an internal implementation detail which we _should_ be free to change.
So we did, and it promptly broke.

The compiler shold be able to work out when to use the __constant version
anyway.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0326d3060bc..0747d8a9232 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -85,7 +85,7 @@ static struct top_srv topsrv = { 0 };
 
 static u32 htohl(u32 in, int swap)
 {
-	return swap ? (u32)___constant_swab32(in) : in;
+	return swap ? swab32(in) : in;
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 317900cb010f4aca0e3cb14a02d0ddcc44ddafa7 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Wed, 13 Aug 2008 02:39:56 -0700
Subject: wext: Send name on events

In the minimal the wireless extensions oughta send at least
the name in addition to the ifindex.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index df5b3886c36..d98ffb75119 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1277,6 +1277,7 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
 	r->ifi_flags = dev_get_flags(dev);
 	r->ifi_change = 0;	/* Wireless changes don't affect those flags */
 
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
 	/* Add the wireless events in the netlink packet */
 	NLA_PUT(skb, IFLA_WIRELESS, event_len, event);
 
-- 
cgit v1.2.3-70-g09d2


From 34093d055e09d1bb549efc11c8d448373437bbe4 Mon Sep 17 00:00:00 2001
From: Julien Brunel <brunel@diku.dk>
Date: Wed, 13 Aug 2008 02:40:48 -0700
Subject: net/rxrpc: Use an IS_ERR test rather than a NULL test

In case of error, the function rxrpc_get_transport returns an ERR
pointer, but never returns a NULL pointer. So after a call to this
function, a NULL test should be replaced by an IS_ERR test.

A simplified version of the semantic patch that makes this change is
as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@correct_null_test@
expression x,E;
statement S1, S2;
@@
x =  rxrpc_get_transport(...)
<... when != x = E
if (
(
- x@p2 != NULL
+ ! IS_ERR ( x )
|
- x@p2 == NULL
+ IS_ERR( x )
)
 )
S1
else S2
...>
? x = E;
// </smpl>

Signed-off-by: Julien Brunel <brunel@diku.dk>
Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-accept.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index bdfb7741779..77228f28fa3 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -100,7 +100,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 
 	trans = rxrpc_get_transport(local, peer, GFP_NOIO);
 	rxrpc_put_peer(peer);
-	if (!trans) {
+	if (IS_ERR(trans)) {
 		_debug("no trans");
 		ret = -EBUSY;
 		goto error;
-- 
cgit v1.2.3-70-g09d2


From f97017cdefefdb6a0e19266024b0c6f9fd411eeb Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Wed, 13 Aug 2008 02:41:22 -0700
Subject: net-sched: Fix actions flushing

Flushing of actions has been broken since we changed
the semantics of netlink parsed tb[X] to mean X is an attribute type.
This makes the flushing work.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 26c7e1f9a35..88b57331d13 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -790,6 +790,8 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
 	if (err < 0)
 		goto nla_put_failure;
+	if (err == 0)
+		goto noflush_out;
 
 	nla_nest_end(skb, nest);
 
@@ -807,6 +809,7 @@ nla_put_failure:
 nlmsg_failure:
 	module_put(a->ops->owner);
 err_out:
+noflush_out:
 	kfree_skb(skb);
 	kfree(a);
 	return err;
@@ -824,8 +827,10 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
-		if (tb[0] != NULL && tb[1] == NULL)
-			return tca_action_flush(tb[0], n, pid);
+		if (tb[1] != NULL)
+			return tca_action_flush(tb[1], n, pid);
+		else
+			return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-- 
cgit v1.2.3-70-g09d2


From 36723873b664fb6b5cfe06d291df948126e43f50 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Wed, 13 Aug 2008 02:41:45 -0700
Subject: net-sched: fix Action flushing return code

Flushing must consistently return ENOMEM on failure of any allocation

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 88b57331d13..9974b3f04f0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -751,7 +751,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	struct nlattr *tb[TCA_ACT_MAX+1];
 	struct nlattr *kind;
 	struct tc_action *a = create_a(0);
-	int err = -EINVAL;
+	int err = -ENOMEM;
 
 	if (a == NULL) {
 		printk("tca_action_flush: couldnt create tc_action\n");
@@ -762,7 +762,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	if (!skb) {
 		printk("tca_action_flush: failed skb alloc\n");
 		kfree(a);
-		return -ENOBUFS;
+		return err;
 	}
 
 	b = skb_tail_pointer(skb);
-- 
cgit v1.2.3-70-g09d2


From c1e24df27fb1058739789126db6ad1b1ef719346 Mon Sep 17 00:00:00 2001
From: Jean-Christophe DUBOIS <jcd@tribudubois.net>
Date: Wed, 13 Aug 2008 13:35:37 -0700
Subject: xfrm: remove unnecessary variable in xfrm_output_resume() 2nd try

Small fix removing an unnecessary intermediate variable.

Signed-off-by: Jean-Christophe DUBOIS <jcd@tribudubois.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3f964db908a..ac25b4c0e98 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -112,16 +112,13 @@ error_nolock:
 int xfrm_output_resume(struct sk_buff *skb, int err)
 {
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
-		struct xfrm_state *x;
-
 		nf_reset(skb);
 
 		err = skb->dst->ops->local_out(skb);
 		if (unlikely(err != 1))
 			goto out;
 
-		x = skb->dst->xfrm;
-		if (!x)
+		if (!skb->dst->xfrm)
 			return dst_output(skb);
 
 		err = nf_hook(skb->dst->ops->family,
-- 
cgit v1.2.3-70-g09d2


From 3e8a0a559c66ee9e7468195691a56fefc3589740 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 13 Aug 2008 13:48:39 -0700
Subject: dccp: change L/R must have at least one byte in the dccpsf_val field

Thanks to Eugene Teo for reporting this problem.

Signed-off-by: Eugene Teo <eugenete@kernel.sg>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b622d974485..1ca3b26eed0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -474,6 +474,11 @@ static int dccp_setsockopt_change(struct sock *sk, int type,
 
 	if (copy_from_user(&opt, optval, sizeof(opt)))
 		return -EFAULT;
+	/*
+	 * rfc4340: 6.1. Change Options
+	 */
+	if (opt.dccpsf_len < 1)
+		return -EINVAL;
 
 	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
 	if (!val)
-- 
cgit v1.2.3-70-g09d2


From 24b8b44780a2c53ecb738f4a1c08d114f5eda27c Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 13 Aug 2008 11:05:41 -0500
Subject: svcrdma: Fix race between svc_rdma_recvfrom thread and the
 dto_tasklet

RDMA_READ completions are kept on a separate queue from the general
I/O request queue. Since a separate lock is used to protect the RDMA_READ
completion queue, a race exists between the dto_tasklet and the
svc_rdma_recvfrom thread where the dto_tasklet sets the XPT_DATA
bit and adds I/O to the read-completion queue. Concurrently, the
recvfrom thread checks the generic queue, finds it empty and resets
the XPT_DATA bit. A subsequent svc_xprt_enqueue will fail to enqueue
the transport for I/O and cause the transport to "stall".

The fix is to protect both lists with the same lock and set the XPT_DATA
bit with this lock held.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 8 ++++----
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 ++---
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index ef2e3a20bf3..dc05b54bd3a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -143,7 +143,6 @@ struct svcxprt_rdma {
 	unsigned long	     sc_flags;
 	struct list_head     sc_dto_q;		/* DTO tasklet I/O pending Q */
 	struct list_head     sc_read_complete_q;
-	spinlock_t           sc_read_complete_lock;
 	struct work_struct   sc_work;
 };
 /* sc_flags */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b4b17f44cb2..74de31a0661 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -443,18 +443,18 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
 	dprintk("svcrdma: rqstp=%p\n", rqstp);
 
-	spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
+	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
 		ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
 		list_del_init(&ctxt->dto_q);
 	}
-	spin_unlock_bh(&rdma_xprt->sc_read_complete_lock);
-	if (ctxt)
+	if (ctxt) {
+		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
 		return rdma_read_complete(rqstp, ctxt);
+	}
 
-	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
 		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
 				  struct svc_rdma_op_ctxt,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 19ddc382b77..900cb69728c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -359,11 +359,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 				struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
 				BUG_ON(!read_hdr);
+				spin_lock_bh(&xprt->sc_rq_dto_lock);
 				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-				spin_lock_bh(&xprt->sc_read_complete_lock);
 				list_add_tail(&read_hdr->dto_q,
 					      &xprt->sc_read_complete_q);
-				spin_unlock_bh(&xprt->sc_read_complete_lock);
+				spin_unlock_bh(&xprt->sc_rq_dto_lock);
 				svc_xprt_enqueue(&xprt->sc_xprt);
 			}
 			svc_rdma_put_context(ctxt, 0);
@@ -428,7 +428,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
-	spin_lock_init(&cma_xprt->sc_read_complete_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 
 	cma_xprt->sc_ord = svcrdma_ord;
-- 
cgit v1.2.3-70-g09d2


From 64c00d81b5c2491bd140b3c8eb2e8c351513f971 Mon Sep 17 00:00:00 2001
From: Andrew Gallatin <gallatin@myri.com>
Date: Wed, 13 Aug 2008 15:16:00 -0700
Subject: pktgen: prevent pktgen from using bad tx queue

With the new multi-queue transmit code, it is possible to accidentally
make pktgen pick a non-existing tx queue simply by using a stale
script to drive pktgen.  Access to this non-existing tx queue will
then trigger a bad memory access and kill the machine.

For example, setting "queue_map_max 2" will cause my machine to die
when accessing a garbage spinlock in the non-existing tx queue:

BUG: spinlock bad magic on CPU#0, kpktgend_0/564
  lock: ffff88001ddf6718, .magic: ffffffff, .owner: /-1, .owner_cpu: 0
Pid: 564, comm: kpktgend_0 Not tainted 2.6.27-rc3 #35

Call Trace:
  [<ffffffff803a1228>] spin_bug+0xa4/0xac
  [<ffffffff803a1253>] _raw_spin_lock+0x23/0x123
  [<ffffffff8055b06f>] _spin_lock_bh+0x17/0x1b
  [<ffffffff804cb57d>] pktgen_thread_worker+0xa97/0x1002
  [<ffffffff8022874d>] ? finish_task_switch+0x38/0x97
  [<ffffffff80242077>] ? autoremove_wake_function+0x0/0x36
  [<ffffffff80242077>] ? autoremove_wake_function+0x0/0x36
  [<ffffffff804caae6>] ? pktgen_thread_worker+0x0/0x1002
  [<ffffffff80241a40>] kthread+0x44/0x6d
  [<ffffffff8020c399>] child_rip+0xa/0x11
  [<ffffffff802419fc>] ? kthread+0x0/0x6d
  [<ffffffff8020c38f>] ? child_rip+0x0/0x11

The attached patch adds some sanity checking to prevent
these sorts of configuration errors.

Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 52623645390..a756847e381 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1961,6 +1961,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
  */
 static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 {
+	int ntxq;
+
 	if (!pkt_dev->odev) {
 		printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
 		       "setup_inject.\n");
@@ -1969,6 +1971,33 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 		return;
 	}
 
+	/* make sure that we don't pick a non-existing transmit queue */
+	ntxq = pkt_dev->odev->real_num_tx_queues;
+	if (ntxq <= num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) {
+		printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU "
+		       "disabled because CPU count (%d) exceeds number ",
+		       num_online_cpus());
+		printk(KERN_WARNING "pktgen: WARNING: of tx queues "
+		       "(%d) on %s \n", ntxq, pkt_dev->odev->name);
+		pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
+	}
+	if (ntxq <= pkt_dev->queue_map_min) {
+		printk(KERN_WARNING "pktgen: WARNING: Requested "
+		       "queue_map_min (%d) exceeds number of tx\n",
+		       pkt_dev->queue_map_min);
+		printk(KERN_WARNING "pktgen: WARNING: queues (%d) on "
+		       "%s, resetting\n", ntxq, pkt_dev->odev->name);
+		pkt_dev->queue_map_min = ntxq - 1;
+	}
+	if (ntxq <= pkt_dev->queue_map_max) {
+		printk(KERN_WARNING "pktgen: WARNING: Requested "
+		       "queue_map_max (%d) exceeds number of tx\n",
+		       pkt_dev->queue_map_max);
+		printk(KERN_WARNING "pktgen: WARNING: queues (%d) on "
+		       "%s, resetting\n", ntxq, pkt_dev->odev->name);
+		pkt_dev->queue_map_max = ntxq - 1;
+	}
+
 	/* Default to the interface's mac if not explicitly set. */
 
 	if (is_zero_ether_addr(pkt_dev->src_mac))
-- 
cgit v1.2.3-70-g09d2


From 26b284de54a5ca3dfbe2fd9a51ac1923e80085a2 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 13 Aug 2008 15:16:43 -0700
Subject: pkt_sched: Fix oops in htb_delete.

Recent changes introduced a bug in htb_delete(): cl->parent->children
counter update misses checking cl->parent for NULL, which is used for
root classes, so deleting them causes an oops.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index be35422711a..6febd245e62 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1279,7 +1279,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	/* delete from hash and active; remainder in destroy_class */
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
-	cl->parent->children--;
+	if (cl->parent)
+		cl->parent->children--;
 
 	if (cl->prio_activity)
 		htb_deactivate(q, cl);
-- 
cgit v1.2.3-70-g09d2


From b9a3b1102bc80b4044224494100f67de132d5448 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 13 Aug 2008 15:18:38 -0700
Subject: pkt_sched: Fix queue quiescence testing in dev_deactivate().

Based upon discussions with Jarek P. and Herbert Xu.

First, we're testing the wrong qdisc.  We just reset the device
queue qdiscs to &noop_qdisc and checking it's state is completely
pointless here.

We want to wait until the previous qdisc that was sitting at
the ->qdisc pointer is not busy any more.  And that would be
->qdisc_sleeping.

Because of how we propagate the samples qdisc pointer down into
qdisc_run and friends via per-cpu ->output_queue and netif_schedule,
we have to wait also for the __QDISC_STATE_SCHED bit to clear as
well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7cf83b37459..468574682ca 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -647,7 +647,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	}
 }
 
-static bool some_qdisc_is_running(struct net_device *dev, int lock)
+static bool some_qdisc_is_busy(struct net_device *dev, int lock)
 {
 	unsigned int i;
 
@@ -658,13 +658,14 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock)
 		int val;
 
 		dev_queue = netdev_get_tx_queue(dev, i);
-		q = dev_queue->qdisc;
+		q = dev_queue->qdisc_sleeping;
 		root_lock = qdisc_lock(q);
 
 		if (lock)
 			spin_lock_bh(root_lock);
 
-		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
+		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
 		if (lock)
 			spin_unlock_bh(root_lock);
@@ -689,14 +690,14 @@ void dev_deactivate(struct net_device *dev)
 
 	/* Wait for outstanding qdisc_run calls. */
 	do {
-		while (some_qdisc_is_running(dev, 0))
+		while (some_qdisc_is_busy(dev, 0))
 			yield();
 
 		/*
 		 * Double-check inside queue lock to ensure that all effects
 		 * of the queue run are visible when we return.
 		 */
-		running = some_qdisc_is_running(dev, 1);
+		running = some_qdisc_is_busy(dev, 1);
 
 		/*
 		 * The running flag should never be set at this point because
-- 
cgit v1.2.3-70-g09d2


From d4766692e72422f3b0f0e9ac6773d92baad07d51 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 13 Aug 2008 15:20:24 -0700
Subject: pkt_sched: Protect gen estimators under est_lock.

gen_kill_estimator() required rtnl_lock() protection, but since it is
moved to an RCU callback __qdisc_destroy() let's use est_lock instead.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 57abe8266be..a89f32fa94f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -99,7 +99,7 @@ struct gen_estimator_head
 
 static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 
-/* Protects against NULL dereference */
+/* Protects against NULL dereference and RCU write-side */
 static DEFINE_RWLOCK(est_lock);
 
 static void est_timer(unsigned long arg)
@@ -185,6 +185,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
+	write_lock_bh(&est_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -194,6 +195,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 
 	list_add_rcu(&est->list, &elist[idx].list);
+	write_unlock_bh(&est_lock);
 	return 0;
 }
 
@@ -212,7 +214,6 @@ static void __gen_kill_estimator(struct rcu_head *head)
  * Removes the rate estimator specified by &bstats and &rate_est
  * and deletes the timer.
  *
- * NOTE: Called under rtnl_mutex
  */
 void gen_kill_estimator(struct gnet_stats_basic *bstats,
 	struct gnet_stats_rate_est *rate_est)
@@ -226,17 +227,17 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
 		if (!elist[idx].timer.function)
 			continue;
 
+		write_lock_bh(&est_lock);
 		list_for_each_entry_safe(e, n, &elist[idx].list, list) {
 			if (e->rate_est != rate_est || e->bstats != bstats)
 				continue;
 
-			write_lock_bh(&est_lock);
 			e->bstats = NULL;
-			write_unlock_bh(&est_lock);
 
 			list_del_rcu(&e->list);
 			call_rcu(&e->e_rcu, __gen_kill_estimator);
 		}
+		write_unlock_bh(&est_lock);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 877acedc0d3ea07f7b36573ed2f1f479c2c1eefd Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Wed, 13 Aug 2008 16:15:57 -0700
Subject: netns: Fix crash by making igmp per namespace

This patch makes the multicast socket to be per namespace.

When a network namespace is created, other than the init_net and a
multicast packet is received, the kernel goes to a hang or a kernel panic.

How to reproduce ?

 * create a child network namespace
 * create a pair virtual device veth
    * ip link add type veth
 * move one side to the pair network device to the child namespace
    * ip link set netns <childpid> dev veth1
 * ping -I veth0 224.0.0.1

The bug appears because the function ip_mc_init_dev does not initialize
the different multicast fields as it exits because it is not the init_net.

BUG: soft lockup - CPU#0 stuck for 61s! [avahi-daemon:2695]
Modules linked in:
irq event stamp: 50350
hardirqs last  enabled at (50349): [<c03ee949>] _spin_unlock_irqrestore+0x34/0x39
hardirqs last disabled at (50350): [<c03ec639>] schedule+0x9f/0x5ff
softirqs last  enabled at (45712): [<c0374d4b>] ip_setsockopt+0x8e7/0x909
softirqs last disabled at (45710): [<c03ee682>] _spin_lock_bh+0x8/0x27

Pid: 2695, comm: avahi-daemon Not tainted (2.6.27-rc2-00029-g0872073 #3)
EIP: 0060:[<c03ee47c>] EFLAGS: 00000297 CPU: 0
EIP is at __read_lock_failed+0x8/0x10
EAX: c4f38810 EBX: c4f38810 ECX: 00000000 EDX: c04cc22e
ESI: fb0000e0 EDI: 00000011 EBP: 0f02000a ESP: c4e3faa0
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
CR0: 8005003b CR2: 44618a40 CR3: 04e37000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 [<c02311f8>] ? _raw_read_lock+0x23/0x25
 [<c0390666>] ? ip_check_mc+0x1c/0x83
 [<c036d478>] ? ip_route_input+0x229/0xe92
 [<c022e2e4>] ? trace_hardirqs_on_thunk+0xc/0x10
 [<c0104c9c>] ? do_IRQ+0x69/0x7d
 [<c0102e64>] ? restore_nocheck_notrace+0x0/0xe
 [<c036fdba>] ? ip_rcv+0x227/0x505
 [<c0358764>] ? netif_receive_skb+0xfe/0x2b3
 [<c03588d2>] ? netif_receive_skb+0x26c/0x2b3
 [<c035af31>] ? process_backlog+0x73/0xbd
 [<c035a8cd>] ? net_rx_action+0xc1/0x1ae
 [<c01218a8>] ? __do_softirq+0x7b/0xef
 [<c0121953>] ? do_softirq+0x37/0x4d
 [<c035b50d>] ? dev_queue_xmit+0x3d4/0x40b
 [<c0122037>] ? local_bh_enable+0x96/0xab
 [<c035b50d>] ? dev_queue_xmit+0x3d4/0x40b
 [<c012181e>] ? _local_bh_enable+0x79/0x88
 [<c035fcb8>] ? neigh_resolve_output+0x20f/0x239
 [<c0373118>] ? ip_finish_output+0x1df/0x209
 [<c0373364>] ? ip_dev_loopback_xmit+0x62/0x66
 [<c0371db5>] ? ip_local_out+0x15/0x17
 [<c0372013>] ? ip_push_pending_frames+0x25c/0x2bb
 [<c03891b8>] ? udp_push_pending_frames+0x2bb/0x30e
 [<c038a189>] ? udp_sendmsg+0x413/0x51d
 [<c038a1a9>] ? udp_sendmsg+0x433/0x51d
 [<c038f927>] ? inet_sendmsg+0x35/0x3f
 [<c034f092>] ? sock_sendmsg+0xb8/0xd1
 [<c012d554>] ? autoremove_wake_function+0x0/0x2b
 [<c022e6de>] ? copy_from_user+0x32/0x5e
 [<c022e6de>] ? copy_from_user+0x32/0x5e
 [<c034f238>] ? sys_sendmsg+0x18d/0x1f0
 [<c0175e90>] ? pipe_write+0x3cb/0x3d7
 [<c0170347>] ? do_sync_write+0xbe/0x105
 [<c012d554>] ? autoremove_wake_function+0x0/0x2b
 [<c03503b2>] ? sys_socketcall+0x176/0x1b0
 [<c01085ea>] ? syscall_trace_enter+0x6c/0x7b
 [<c0102e1a>] ? syscall_call+0x7/0xb

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 71 ++++++++++++++++-----------------------------------------
 1 file changed, 20 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6203ece5360..f70fac61259 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -289,6 +289,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	struct rtable *rt;
 	struct iphdr *pip;
 	struct igmpv3_report *pig;
+	struct net *net = dev_net(dev);
 
 	skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
@@ -299,7 +300,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 				    .nl_u = { .ip4_u = {
 				    .daddr = IGMPV3_ALL_MCR } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
+		if (ip_route_output_key(net, &rt, &fl)) {
 			kfree_skb(skb);
 			return NULL;
 		}
@@ -629,6 +630,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	struct igmphdr *ih;
 	struct rtable *rt;
 	struct net_device *dev = in_dev->dev;
+	struct net *net = dev_net(dev);
 	__be32	group = pmc ? pmc->multiaddr : 0;
 	__be32	dst;
 
@@ -643,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		struct flowi fl = { .oif = dev->ifindex,
 				    .nl_u = { .ip4_u = { .daddr = dst } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(net, &rt, &fl))
 			return -1;
 	}
 	if (rt->rt_src == 0) {
@@ -1196,9 +1198,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	for (im=in_dev->mc_list; im; im=im->next) {
 		if (im->multiaddr == addr) {
 			im->users++;
@@ -1278,9 +1277,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
 		if (i->multiaddr==addr) {
 			if (--i->users == 0) {
@@ -1308,9 +1304,6 @@ void ip_mc_down(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	for (i=in_dev->mc_list; i; i=i->next)
 		igmp_group_dropped(i);
 
@@ -1331,9 +1324,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
 {
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	in_dev->mc_tomb = NULL;
 #ifdef CONFIG_IP_MULTICAST
 	in_dev->mr_gq_running = 0;
@@ -1357,9 +1347,6 @@ void ip_mc_up(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for (i=in_dev->mc_list; i; i=i->next)
@@ -1376,9 +1363,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (!net_eq(dev_net(in_dev->dev), &init_net))
-		return;
-
 	/* Deactivate timers */
 	ip_mc_down(in_dev);
 
@@ -1395,7 +1379,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	write_unlock_bh(&in_dev->mc_list_lock);
 }
 
-static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 {
 	struct flowi fl = { .nl_u = { .ip4_u =
 				      { .daddr = imr->imr_multiaddr.s_addr } } };
@@ -1404,19 +1388,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 	struct in_device *idev = NULL;
 
 	if (imr->imr_ifindex) {
-		idev = inetdev_by_index(&init_net, imr->imr_ifindex);
+		idev = inetdev_by_index(net, imr->imr_ifindex);
 		if (idev)
 			__in_dev_put(idev);
 		return idev;
 	}
 	if (imr->imr_address.s_addr) {
-		dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
+		dev = ip_dev_find(net, imr->imr_address.s_addr);
 		if (!dev)
 			return NULL;
 		dev_put(dev);
 	}
 
-	if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) {
+	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
 	}
@@ -1754,18 +1738,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	struct ip_mc_socklist *iml=NULL, *i;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	int ifindex;
 	int count = 0;
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
-	in_dev = ip_mc_find_dev(imr);
+	in_dev = ip_mc_find_dev(net, imr);
 
 	if (!in_dev) {
 		iml = NULL;
@@ -1827,15 +1809,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *iml, **imlp;
 	struct in_device *in_dev;
+	struct net *net = sock_net(sk);
 	__be32 group = imr->imr_multiaddr.s_addr;
 	u32 ifindex;
 	int ret = -EADDRNOTAVAIL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
-	in_dev = ip_mc_find_dev(imr);
+	in_dev = ip_mc_find_dev(net, imr);
 	ifindex = imr->imr_ifindex;
 	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
 		if (iml->multi.imr_multiaddr.s_addr != group)
@@ -1873,21 +1853,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	struct in_device *in_dev = NULL;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *psl;
+	struct net *net = sock_net(sk);
 	int leavegroup = 0;
 	int i, j, rv;
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
 	imr.imr_address.s_addr = mreqs->imr_interface;
 	imr.imr_ifindex = ifindex;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2007,6 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *newpsl, *psl;
+	struct net *net = sock_net(sk);
 	int leavegroup = 0;
 
 	if (!ipv4_is_multicast(addr))
@@ -2015,15 +1994,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	    msf->imsf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
 	imr.imr_ifindex = ifindex;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2094,19 +2070,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *psl;
+	struct net *net = sock_net(sk);
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
 	imr.imr_ifindex = 0;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2163,9 +2137,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	err = -EADDRNOTAVAIL;
@@ -2246,19 +2217,17 @@ void ip_mc_drop_socket(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *iml;
+	struct net *net = sock_net(sk);
 
 	if (inet->mc_list == NULL)
 		return;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return;
-
 	rtnl_lock();
 	while ((iml = inet->mc_list) != NULL) {
 		struct in_device *in_dev;
 		inet->mc_list = iml->next;
 
-		in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
+		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 		if (in_dev != NULL) {
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
-- 
cgit v1.2.3-70-g09d2


From 191cd582500f49b32a63040fedeebb0168c720af Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Thu, 14 Aug 2008 15:33:21 -0700
Subject: netns: Add network namespace argument to rt6_fill_node() and
 ipv6_dev_get_saddr()

ipv6_dev_get_saddr() blindly de-references dst_dev to get the network
namespace, but some callers might pass NULL.  Change callers to pass a
namespace pointer instead.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h  |  3 ++-
 include/net/ip6_route.h |  1 +
 net/ipv6/addrconf.c     |  3 +--
 net/ipv6/fib6_rules.c   |  3 ++-
 net/ipv6/ip6_fib.c      |  1 +
 net/ipv6/ip6_output.c   |  2 +-
 net/ipv6/ndisc.c        |  2 +-
 net/ipv6/route.c        | 12 +++++++-----
 net/ipv6/xfrm6_policy.c |  4 +++-
 net/sctp/ipv6.c         |  3 ++-
 10 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 06b28142b3a..c216de528b0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -80,7 +80,8 @@ extern struct inet6_ifaddr      *ipv6_get_ifaddr(struct net *net,
 						 struct net_device *dev,
 						 int strict);
 
-extern int			ipv6_dev_get_saddr(struct net_device *dev, 
+extern int			ipv6_dev_get_saddr(struct net *net,
+					       struct net_device *dev,
 					       const struct in6_addr *daddr,
 					       unsigned int srcprefs,
 					       struct in6_addr *saddr);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index bc391ba101e..5f53db7e4e5 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -107,6 +107,7 @@ struct rt6_rtnl_dump_arg
 {
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
+	struct net *net;
 };
 
 extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a7842c54f58..e2d3b7580b7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1106,13 +1106,12 @@ out:
 	return ret;
 }
 
-int ipv6_dev_get_saddr(struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 		       const struct in6_addr *daddr, unsigned int prefs,
 		       struct in6_addr *saddr)
 {
 	struct ipv6_saddr_score scores[2],
 				*score = &scores[0], *hiscore = &scores[1];
-	struct net *net = dev_net(dst_dev);
 	struct ipv6_saddr_dst dst;
 	struct net_device *dev;
 	int dst_type;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8d05527524e..f5de3f9dc69 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -93,7 +93,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			if (flags & RT6_LOOKUP_F_SRCPREF_COA)
 				srcprefs |= IPV6_PREFER_SRC_COA;
 
-			if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+			if (ipv6_dev_get_saddr(net,
+					       ip6_dst_idev(&rt->u.dst)->dev,
 					       &flp->fl6_dst, srcprefs,
 					       &saddr))
 				goto again;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 52dddc25d3e..29c7c99e69f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -378,6 +378,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
 	arg.skb = skb;
 	arg.cb = cb;
+	arg.net = net;
 	w->args = &arg;
 
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a4402de425d..0e844c2736a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -934,7 +934,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 		goto out_err_release;
 
 	if (ipv6_addr_any(&fl->fl6_src)) {
-		err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
+		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 					 &fl->fl6_dst,
 					 sk ? inet6_sk(sk)->srcprefs : 0,
 					 &fl->fl6_src);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index beb48e3f038..f1c62ba0f56 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -549,7 +549,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 			override = 0;
 		in6_ifa_put(ifp);
 	} else {
-		if (ipv6_dev_get_saddr(dev, daddr,
+		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
 				       &tmpaddr))
 			return;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 41b165ffb36..9af6115f0f5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2106,7 +2106,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(sizeof(struct rta_cacheinfo));
 }
 
-static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_fill_node(struct net *net,
+			 struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
 			 int prefix, int nowait, unsigned int flags)
@@ -2189,7 +2190,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	} else if (dst) {
 		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
 		struct in6_addr saddr_buf;
-		if (ipv6_dev_get_saddr(idev ? idev->dev : NULL,
+		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
@@ -2234,7 +2235,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 	} else
 		prefix = 0;
 
-	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+	return rt6_fill_node(arg->net,
+		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
 		     prefix, 0, NLM_F_MULTI);
 }
@@ -2300,7 +2302,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
 	skb->dst = &rt->u.dst;
 
-	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
+	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 			    nlh->nlmsg_seq, 0, 0, 0);
 	if (err < 0) {
@@ -2327,7 +2329,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 	if (skb == NULL)
 		goto errout;
 
-	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
 				event, info->pid, seq, 0, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8f1e0543b3c..08e4cbbe3f0 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -52,12 +52,14 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
 static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
 	struct dst_entry *dst;
+	struct net_device *dev;
 
 	dst = xfrm6_dst_lookup(0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
-	ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev,
+	dev = ip6_dst_idev(dst)->dev;
+	ipv6_dev_get_saddr(dev_net(dev), dev,
 			   (struct in6_addr *)&daddr->a6, 0,
 			   (struct in6_addr *)&saddr->a6);
 	dst_release(dst);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 483a01d0740..47f91afa021 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -319,7 +319,8 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 			  __func__, asoc, dst, NIP6(daddr->v6.sin6_addr));
 
 	if (!asoc) {
-		ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
+		ipv6_dev_get_saddr(sock_net(sctp_opt2sk(sk)),
+				   dst ? ip6_dst_idev(dst)->dev : NULL,
 				   &daddr->v6.sin6_addr,
 				   inet6_sk(&sk->inet.sk)->srcprefs,
 				   &saddr->v6.sin6_addr);
-- 
cgit v1.2.3-70-g09d2


From 9a812198ae49967f239789164c55ec3e72b7e0dd Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Thu, 14 Aug 2008 14:08:44 +0200
Subject: IPVS: Add genetlink interface implementation

Add the implementation of the new Generic Netlink interface to IPVS and
keep the old set/getsockopt interface for userspace backwards
compatibility.

Signed-off-by: Julius Volz <juliusv@google.com>
Acked-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 875 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 875 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 6379705a8dc..d1dbd8b311b 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2320,6 +2321,872 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	/* For now, only support IPv4 */
+	if (nla_get_u16(nla_af) != AF_INET)
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+						  usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+
+		strlcpy(usvc->sched_name, nla_data(nla_sched),
+			sizeof(usvc->sched_name));
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+					   usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user usvc;
+	struct ip_vs_dest_user udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+static int __init ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
 
 int __init ip_vs_control_init(void)
 {
@@ -2334,6 +3201,13 @@ int __init ip_vs_control_init(void)
 		return ret;
 	}
 
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
@@ -2368,6 +3242,7 @@ void ip_vs_control_cleanup(void)
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove(&init_net, "ip_vs_stats");
 	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
cgit v1.2.3-70-g09d2


From 82dfb6f32219d8e6cf6b979a520cb2b11d977d4e Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Mon, 11 Aug 2008 19:36:06 +0000
Subject: ipvs: Only call init_service, update_service and done_service for
 schedulers if defined

There are schedulers that only schedule based on data available in the service
or destination structures and they don't need any persistent storage or
initialization routine. These schedulers currently provide dummy functions for
the init_service, update_service and/or done_service functions. For the
init_service and done_service cases we already have code that only calls these
functions, if the scheduler provides them. Do the same for the update_service
case and remove the dummy functions from all schedulers.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c   | 21 ++++++++++++---------
 net/ipv4/ipvs/ip_vs_lblc.c  |  7 -------
 net/ipv4/ipvs/ip_vs_lblcr.c |  7 -------
 net/ipv4/ipvs/ip_vs_lc.c    | 21 ---------------------
 net/ipv4/ipvs/ip_vs_nq.c    | 24 ------------------------
 net/ipv4/ipvs/ip_vs_rr.c    |  7 -------
 net/ipv4/ipvs/ip_vs_sed.c   | 24 ------------------------
 net/ipv4/ipvs/ip_vs_wlc.c   | 24 ------------------------
 8 files changed, 12 insertions(+), 123 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index d1dbd8b311b..ede101eeec1 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -869,7 +869,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 		svc->num_dests++;
 
 		/* call the update_service function of its scheduler */
-		svc->scheduler->update_service(svc);
+		if (svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
 
 		write_unlock_bh(&__ip_vs_svc_lock);
 		return 0;
@@ -899,7 +900,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	svc->num_dests++;
 
 	/* call the update_service function of its scheduler */
-	svc->scheduler->update_service(svc);
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
 
 	write_unlock_bh(&__ip_vs_svc_lock);
 
@@ -949,7 +951,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 
 	/* call the update_service, because server weight may be changed */
-	svc->scheduler->update_service(svc);
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
 
 	write_unlock_bh(&__ip_vs_svc_lock);
 
@@ -1012,12 +1015,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 	 */
 	list_del(&dest->n_list);
 	svc->num_dests--;
-	if (svcupd) {
-		/*
-		 *  Call the update_service function of its scheduler
-		 */
-		svc->scheduler->update_service(svc);
-	}
+
+	/*
+	 *  Call the update_service function of its scheduler
+	 */
+	if (svcupd && svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
 }
 
 
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 7a6a319f544..4a14d069f8a 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -388,12 +388,6 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline struct ip_vs_dest *
 __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 {
@@ -542,7 +536,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
 	.init_service =		ip_vs_lblc_init_svc,
 	.done_service =		ip_vs_lblc_done_svc,
-	.update_service =	ip_vs_lblc_update_svc,
 	.schedule =		ip_vs_lblc_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index c234e73968a..46b870385b8 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -572,12 +572,6 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline struct ip_vs_dest *
 __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 {
@@ -731,7 +725,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
 	.init_service =		ip_vs_lblcr_init_svc,
 	.done_service =		ip_vs_lblcr_done_svc,
-	.update_service =	ip_vs_lblcr_update_svc,
 	.schedule =		ip_vs_lblcr_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index ebcdbf75ac6..2c3de1b6351 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -20,24 +20,6 @@
 #include <net/ip_vs.h>
 
 
-static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline unsigned int
 ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
 {
@@ -99,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
-	.init_service =		ip_vs_lc_init_svc,
-	.done_service =		ip_vs_lc_done_svc,
-	.update_service =	ip_vs_lc_update_svc,
 	.schedule =		ip_vs_lc_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index 92f3a677003..5330d5a2de1 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -37,27 +37,6 @@
 #include <net/ip_vs.h>
 
 
-static int
-ip_vs_nq_init_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_nq_done_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_nq_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline unsigned int
 ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
 {
@@ -137,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
-	.init_service =		ip_vs_nq_init_svc,
-	.done_service =		ip_vs_nq_done_svc,
-	.update_service =	ip_vs_nq_update_svc,
 	.schedule =		ip_vs_nq_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 358110d17e5..f7492911753 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
 }
 
 
-static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
 {
 	svc->sched_data = &svc->destinations;
@@ -96,7 +90,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
 	.init_service =		ip_vs_rr_init_svc,
-	.done_service =		ip_vs_rr_done_svc,
 	.update_service =	ip_vs_rr_update_svc,
 	.schedule =		ip_vs_rr_schedule,
 };
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 77663d84cbd..53f73bea66c 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -41,27 +41,6 @@
 #include <net/ip_vs.h>
 
 
-static int
-ip_vs_sed_init_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_sed_done_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_sed_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline unsigned int
 ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
 {
@@ -139,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
-	.init_service =		ip_vs_sed_init_svc,
-	.done_service =		ip_vs_sed_done_svc,
-	.update_service =	ip_vs_sed_update_svc,
 	.schedule =		ip_vs_sed_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 9b0ef86bb1f..df7ad8d7476 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -25,27 +25,6 @@
 #include <net/ip_vs.h>
 
 
-static int
-ip_vs_wlc_init_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_wlc_done_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_wlc_update_svc(struct ip_vs_service *svc)
-{
-	return 0;
-}
-
-
 static inline unsigned int
 ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
 {
@@ -127,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
-	.init_service =		ip_vs_wlc_init_svc,
-	.done_service =		ip_vs_wlc_done_svc,
-	.update_service =	ip_vs_wlc_update_svc,
 	.schedule =		ip_vs_wlc_schedule,
 };
 
-- 
cgit v1.2.3-70-g09d2


From a919cf4b6b499416b6e2247dbc79196c4325f2e6 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Thu, 14 Aug 2008 00:47:16 +0200
Subject: ipvs: Create init functions for estimator code

Commit 8ab19ea36c5c5340ff598e4d15fc084eb65671dc ("ipvs: Fix possible deadlock
in estimator code") fixed a deadlock condition, but that condition can only
happen during unload of IPVS, because during normal operation there is at least
our global stats structure in the estimator list. The mod_timer() and
del_timer_sync() calls are actually initialization and cleanup code in
disguise. Let's make it explicit and move them to their own init and cleanup
function.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        |  2 ++
 net/ipv4/ipvs/ip_vs_core.c |  8 ++++++--
 net/ipv4/ipvs/ip_vs_est.c  | 18 +++++++++++-------
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7312c3dd309..a25ad243031 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
 /*
  *      IPVS rate estimator prototypes (from ip_vs_est.c)
  */
+extern int ip_vs_estimator_init(void);
+extern void ip_vs_estimator_cleanup(void);
 extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
 extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index a7879eafc3b..9fbf0a6d739 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1070,10 +1070,12 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
+	ip_vs_estimator_init();
+
 	ret = ip_vs_control_init();
 	if (ret < 0) {
 		IP_VS_ERR("can't setup control.\n");
-		goto cleanup_nothing;
+		goto cleanup_estimator;
 	}
 
 	ip_vs_protocol_init();
@@ -1106,7 +1108,8 @@ static int __init ip_vs_init(void)
   cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-  cleanup_nothing:
+  cleanup_estimator:
+	ip_vs_estimator_cleanup();
 	return ret;
 }
 
@@ -1117,6 +1120,7 @@ static void __exit ip_vs_cleanup(void)
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
+	ip_vs_estimator_cleanup();
 	IP_VS_INFO("ipvs unloaded.\n");
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 5a20f93bd7f..4fb620ec208 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -124,8 +124,6 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 	est->outbps = stats->outbps<<5;
 
 	spin_lock_bh(&est_lock);
-	if (list_empty(&est_list))
-		mod_timer(&est_timer, jiffies + 2 * HZ);
 	list_add(&est->list, &est_list);
 	spin_unlock_bh(&est_lock);
 }
@@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats)
 
 	spin_lock_bh(&est_lock);
 	list_del(&est->list);
-	while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
-		spin_unlock_bh(&est_lock);
-		cpu_relax();
-		spin_lock_bh(&est_lock);
-	}
 	spin_unlock_bh(&est_lock);
 }
 
@@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 	est->inbps = 0;
 	est->outbps = 0;
 }
+
+int __init ip_vs_estimator_init(void)
+{
+	mod_timer(&est_timer, jiffies + 2 * HZ);
+	return 0;
+}
+
+void ip_vs_estimator_cleanup(void)
+{
+	del_timer_sync(&est_timer);
+}
-- 
cgit v1.2.3-70-g09d2


From 4a031b0e6acd8a8c23725ceb5db6a0aa5c4e231f Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 15 Aug 2008 09:26:15 +1000
Subject: ipvs: rename __ip_vs_wlc_schedule in lblc and lblcr schedulers

For the sake of clarity, rename __ip_vs_wlc_schedule() in lblc.c to
__ip_vs_lblc_schedule() and the version in lblcr.c to __ip_vs_lblc_schedule().

I guess the original name stuck from a copy and paste.

Cc: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_lblc.c  | 6 +++---
 net/ipv4/ipvs/ip_vs_lblcr.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 4a14d069f8a..b9b334cccf3 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -389,7 +389,7 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 
 
 static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
@@ -488,7 +488,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	tbl = (struct ip_vs_lblc_table *)svc->sched_data;
 	en = ip_vs_lblc_get(tbl, iph->daddr);
 	if (en == NULL) {
-		dest = __ip_vs_wlc_schedule(svc, iph);
+		dest = __ip_vs_lblc_schedule(svc, iph);
 		if (dest == NULL) {
 			IP_VS_DBG(1, "no destination available\n");
 			return NULL;
@@ -503,7 +503,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
 		    || atomic_read(&dest->weight) <= 0
 		    || is_overloaded(dest, svc)) {
-			dest = __ip_vs_wlc_schedule(svc, iph);
+			dest = __ip_vs_lblc_schedule(svc, iph);
 			if (dest == NULL) {
 				IP_VS_DBG(1, "no destination available\n");
 				return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 46b870385b8..f1c84503689 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -573,7 +573,7 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 
 
 static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 {
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
@@ -673,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
 	en = ip_vs_lblcr_get(tbl, iph->daddr);
 	if (en == NULL) {
-		dest = __ip_vs_wlc_schedule(svc, iph);
+		dest = __ip_vs_lblcr_schedule(svc, iph);
 		if (dest == NULL) {
 			IP_VS_DBG(1, "no destination available\n");
 			return NULL;
@@ -687,7 +687,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	} else {
 		dest = ip_vs_dest_set_min(&en->set);
 		if (!dest || is_overloaded(dest, svc)) {
-			dest = __ip_vs_wlc_schedule(svc, iph);
+			dest = __ip_vs_lblcr_schedule(svc, iph);
 			if (dest == NULL) {
 				IP_VS_DBG(1, "no destination available\n");
 				return NULL;
-- 
cgit v1.2.3-70-g09d2


From 323c048836f73a11ded6f9743feda21c00465cb0 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 14 Aug 2008 17:01:10 -0700
Subject: pkt_sched: Fix unlocking in tc_ctl_tfilter()

Fix a bug with spin_lock_bh() inserted instead of spin_unlock_bh() by
some recent patch.

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d2b6f54a626..5cafdd4c801 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -280,7 +280,7 @@ replay:
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
 			spin_lock_bh(root_lock);
 			*back = tp->next;
-			spin_lock_bh(root_lock);
+			spin_unlock_bh(root_lock);
 
 			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
 			tcf_destroy(tp);
-- 
cgit v1.2.3-70-g09d2


From c6153b5b77650879d78dec76414213c76dd8d574 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 15 Aug 2008 13:44:31 -0700
Subject: ipv4: Disable route secret interval on zero interval

Let me first state that disabling the route cache hash rebuild
should not be done without extensive analysis on the risk profile
and careful deliberation.

However, there are times when this can be done safely or for
testing.  For example, when you have mechanisms for ensuring
that offending parties do not exist in your network.

This patch lets the user disable the rebuild if the interval is
set to zero.  This also incidentally fixes a divide-by-zero error
with name-spaces.

In addition, this patch makes the effect of an interval change
immediate rather than it taking effect at the next rebuild as
is currently the case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 70 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 16fc6f454a3..cca921ea855 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2914,6 +2914,68 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
 	return 0;
 }
 
+static void rt_secret_reschedule(int old)
+{
+	struct net *net;
+	int new = ip_rt_secret_interval;
+	int diff = new - old;
+
+	if (!diff)
+		return;
+
+	rtnl_lock();
+	for_each_net(net) {
+		int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
+
+		if (!new)
+			continue;
+
+		if (deleted) {
+			long time = net->ipv4.rt_secret_timer.expires - jiffies;
+
+			if (time <= 0 || (time += diff) <= 0)
+				time = 0;
+
+			net->ipv4.rt_secret_timer.expires = time;
+		} else
+			net->ipv4.rt_secret_timer.expires = new;
+
+		net->ipv4.rt_secret_timer.expires += jiffies;
+		add_timer(&net->ipv4.rt_secret_timer);
+	}
+	rtnl_unlock();
+}
+
+static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
+					  struct file *filp,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	int old = ip_rt_secret_interval;
+	int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+
+	rt_secret_reschedule(old);
+
+	return ret;
+}
+
+static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
+						   int __user *name,
+						   int nlen,
+						   void __user *oldval,
+						   size_t __user *oldlenp,
+						   void __user *newval,
+						   size_t newlen)
+{
+	int old = ip_rt_secret_interval;
+	int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval,
+				 newlen);
+
+	rt_secret_reschedule(old);
+
+	return ret;
+}
+
 static ctl_table ipv4_route_table[] = {
 	{
 		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
@@ -3048,8 +3110,8 @@ static ctl_table ipv4_route_table[] = {
 		.data		= &ip_rt_secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies,
+		.proc_handler	= &ipv4_sysctl_rt_secret_interval,
+		.strategy	= &ipv4_sysctl_rt_secret_interval_strategy,
 	},
 	{ .ctl_name = 0 }
 };
@@ -3126,10 +3188,12 @@ static __net_init int rt_secret_timer_init(struct net *net)
 	net->ipv4.rt_secret_timer.data = (unsigned long)net;
 	init_timer_deferrable(&net->ipv4.rt_secret_timer);
 
-	net->ipv4.rt_secret_timer.expires =
-		jiffies + net_random() % ip_rt_secret_interval +
-		ip_rt_secret_interval;
-	add_timer(&net->ipv4.rt_secret_timer);
+	if (ip_rt_secret_interval) {
+		net->ipv4.rt_secret_timer.expires =
+			jiffies + net_random() % ip_rt_secret_interval +
+			ip_rt_secret_interval;
+		add_timer(&net->ipv4.rt_secret_timer);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e4119a43187139736750bad5d694c6a839df045d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 15 Aug 2008 19:51:07 -0700
Subject: bridge: show offload settings

Add more ethtool generic operations to dump the bridge offload
settings.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9b58d70b0e7..4f52c3d50eb 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -148,11 +148,16 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 }
 
 static struct ethtool_ops br_ethtool_ops = {
-	.get_drvinfo = br_getinfo,
-	.get_link = ethtool_op_get_link,
-	.set_sg = br_set_sg,
-	.set_tx_csum = br_set_tx_csum,
-	.set_tso = br_set_tso,
+	.get_drvinfo    = br_getinfo,
+	.get_link	= ethtool_op_get_link,
+	.get_tx_csum	= ethtool_op_get_tx_csum,
+	.set_tx_csum 	= br_set_tx_csum,
+	.get_sg		= ethtool_op_get_sg,
+	.set_sg		= br_set_sg,
+	.get_tso	= ethtool_op_get_tso,
+	.set_tso	= br_set_tso,
+	.get_ufo	= ethtool_op_get_ufo,
+	.get_flags	= ethtool_op_get_flags,
 };
 
 void br_dev_setup(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From 6f85a124d819e1cf33b16d064a6a656fd448a735 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 15 Aug 2008 14:55:02 -0700
Subject: net: Preserve netfilter attributes in skb_gso_segment using
 __copy_skb_header

skb_gso_segment didn't preserve some attributes in the original skb
such as the netfilter fields.  This was harmless until they were used
which is the case for packets going through lo.

This patch makes it call __copy_skb_header which also picks up some
other missing attributes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 84640172d65..ca1ccdf1ef7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2256,14 +2256,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 			segs = nskb;
 		tail = nskb;
 
-		nskb->dev = skb->dev;
-		skb_copy_queue_mapping(nskb, skb);
-		nskb->priority = skb->priority;
-		nskb->protocol = skb->protocol;
-		nskb->vlan_tci = skb->vlan_tci;
-		nskb->dst = dst_clone(skb->dst);
-		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
-		nskb->pkt_type = skb->pkt_type;
+		__copy_skb_header(nskb, skb);
 		nskb->mac_len = skb->mac_len;
 
 		skb_reserve(nskb, headroom);
@@ -2274,6 +2267,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
 					  doffset);
 		if (!sg) {
+			nskb->ip_summed = CHECKSUM_NONE;
 			nskb->csum = skb_copy_and_csum_bits(skb, offset,
 							    skb_put(nskb, len),
 							    len, 0);
@@ -2283,8 +2277,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		frag = skb_shinfo(nskb)->frags;
 		k = 0;
 
-		nskb->ip_summed = CHECKSUM_PARTIAL;
-		nskb->csum = skb->csum;
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
-- 
cgit v1.2.3-70-g09d2


From db543c1f973cd1d557cc32ceee76737c1e4d2898 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 15 Aug 2008 15:13:53 -0700
Subject: net: skb_copy_datagram_from_iovec()

There's an skb_copy_datagram_iovec() to copy out of a paged skb, but
nothing the other way around (because we don't do that).

We want to allocate big skbs in tun.c, so let's add the function.
It's a carbon copy of skb_copy_datagram_iovec() with enough changes to
be annoying.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  4 +++
 net/core/datagram.c    | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 358661c9990..90992371783 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1452,6 +1452,10 @@ extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
 extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 							int hlen,
 							struct iovec *iov);
+extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
+						    int offset,
+						    struct iovec *from,
+						    int len);
 extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
 					 unsigned int flags);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index dd61dcad601..52f577a0f54 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -339,6 +339,93 @@ fault:
 	return -EFAULT;
 }
 
+/**
+ *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying to
+ *	@from: io vector to copy to
+ *	@len: amount of data to copy to buffer from iovec
+ *
+ *	Returns 0 or -EFAULT.
+ *	Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+				 struct iovec *from, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_fromiovec(skb->data + offset, from, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_fromiovec(vaddr + frag->page_offset +
+					       offset - start, from, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			WARN_ON(start > offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_datagram_from_iovec(list,
+								 offset - start,
+								 from, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				      u8 __user *to, int len,
 				      __wsum *csump)
-- 
cgit v1.2.3-70-g09d2


From a9312ae89324438b0edc554eb36c3ec6bf927d04 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 21:51:03 -0700
Subject: pkt_sched: Add 'deactivated' state.

This new state lets dev_deactivate() mark a qdisc as having been
deactivated.

dev_queue_xmit() and ing_filter() check for this bit and do not
try to process the qdisc if the bit is set.

dev_deactivate() polls the qdisc after setting the bit, waiting
for both __QDISC_STATE_RUNNING and __QDISC_STATE_SCHED to clear.

This isn't perfect yet, but subsequent changesets will make it so.
This part is just one piece of the puzzle.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 net/core/dev.c            | 9 ++++++++-
 net/sched/sch_generic.c   | 6 ++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a7abfda3e44..757ab087adb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -27,6 +27,7 @@ enum qdisc_state_t
 {
 	__QDISC_STATE_RUNNING,
 	__QDISC_STATE_SCHED,
+	__QDISC_STATE_DEACTIVATED,
 };
 
 struct qdisc_size_table {
diff --git a/net/core/dev.c b/net/core/dev.c
index 600bb23c4c2..d9e31f63ade 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1800,6 +1800,12 @@ gso:
 
 		spin_lock(root_lock);
 
+		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+			spin_unlock(root_lock);
+			rc = NET_XMIT_DROP;
+			goto out_kfree_skb;
+		}
+
 		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 
@@ -2084,7 +2090,8 @@ static int ing_filter(struct sk_buff *skb)
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
-		result = qdisc_enqueue_root(skb, q);
+		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+			result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 468574682ca..ff1c4557e5f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -597,6 +597,9 @@ static void transition_one_qdisc(struct net_device *dev,
 	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
 	int *need_watchdog_p = _need_watchdog;
 
+	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
+		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
+
 	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
 	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
 		*need_watchdog_p = 1;
@@ -640,6 +643,9 @@ static void dev_deactivate_queue(struct net_device *dev,
 	if (qdisc) {
 		spin_lock_bh(qdisc_lock(qdisc));
 
+		if (!(qdisc->flags & TCQ_F_BUILTIN))
+			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
 		dev_queue->qdisc = qdisc_default;
 		qdisc_reset(qdisc);
 
-- 
cgit v1.2.3-70-g09d2


From def82a1db1fdc4f861c77009e2ee86870c3743b0 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 17 Aug 2008 21:54:43 -0700
Subject: net: Change handling of the __QDISC_STATE_SCHED flag in
 net_tx_action().

Change handling of the __QDISC_STATE_SCHED flag in net_tx_action() to
enable proper control in dev_deactivate(). Now, if this flag is seen
as unset under root_lock means a qdisc can't be netif_scheduled.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d9e31f63ade..819f0175bdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1339,19 +1339,23 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-void __netif_schedule(struct Qdisc *q)
+static inline void __netif_reschedule(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
-		struct softnet_data *sd;
-		unsigned long flags;
+	struct softnet_data *sd;
+	unsigned long flags;
 
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		q->next_sched = sd->output_queue;
-		sd->output_queue = q;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
+	local_irq_save(flags);
+	sd = &__get_cpu_var(softnet_data);
+	q->next_sched = sd->output_queue;
+	sd->output_queue = q;
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+void __netif_schedule(struct Qdisc *q)
+{
+	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+		__netif_reschedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 
@@ -1980,15 +1984,15 @@ static void net_tx_action(struct softirq_action *h)
 
 			head = head->next_sched;
 
-			smp_mb__before_clear_bit();
-			clear_bit(__QDISC_STATE_SCHED, &q->state);
-
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
+				smp_mb__before_clear_bit();
+				clear_bit(__QDISC_STATE_SCHED,
+					  &q->state);
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
-				__netif_schedule(q);
+				__netif_reschedule(q);
 			}
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 4335cd2da1e8986fa8aff21a91144d986cb0a5fc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 21:58:07 -0700
Subject: pkt_sched: Simplify dev_deactivate() polling loop.

The condition under which the previous qdisc has no more references
after we've attached &noop_qdisc is that both RUNNING and SCHED
are both seen clear while holding the root lock.

So just make specifically that check in the polling loop, instead
of this overly complex "check without then check with lock held"
sequence.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 31 +++++--------------------------
 1 file changed, 5 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff1c4557e5f..30b76aec723 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -653,7 +653,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	}
 }
 
-static bool some_qdisc_is_busy(struct net_device *dev, int lock)
+static bool some_qdisc_is_busy(struct net_device *dev)
 {
 	unsigned int i;
 
@@ -667,14 +667,12 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
 		q = dev_queue->qdisc_sleeping;
 		root_lock = qdisc_lock(q);
 
-		if (lock)
-			spin_lock_bh(root_lock);
+		spin_lock_bh(root_lock);
 
 		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
-		if (lock)
-			spin_unlock_bh(root_lock);
+		spin_unlock_bh(root_lock);
 
 		if (val)
 			return true;
@@ -684,8 +682,6 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
 
 void dev_deactivate(struct net_device *dev)
 {
-	bool running;
-
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
 	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
@@ -695,25 +691,8 @@ void dev_deactivate(struct net_device *dev)
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	do {
-		while (some_qdisc_is_busy(dev, 0))
-			yield();
-
-		/*
-		 * Double-check inside queue lock to ensure that all effects
-		 * of the queue run are visible when we return.
-		 */
-		running = some_qdisc_is_busy(dev, 1);
-
-		/*
-		 * The running flag should never be set at this point because
-		 * we've already set dev->qdisc to noop_qdisc *inside* the same
-		 * pair of spin locks.  That is, if any qdisc_run starts after
-		 * our initial test it should see the noop_qdisc and then
-		 * clear the RUNNING bit before dropping the queue lock.  So
-		 * if it is set here then we've found a bug.
-		 */
-	} while (WARN_ON_ONCE(running));
+	while (some_qdisc_is_busy(dev))
+		yield();
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 3a76e3716b4e571f5d91a20b6afb412560599083 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 17 Aug 2008 22:02:11 -0700
Subject: pkt_sched: Grab correct lock in notify_and_destroy().

From: Jarek Poplawski <jarkao2@gmail.com>

When we are destroying non-root qdiscs, we need to lock
the root of the qdisc tree not the the qdisc itself.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c25465e5607..c8dc72e1210 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -638,9 +638,9 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid
 		qdisc_notify(skb, n, clid, old, new);
 
 	if (old) {
-		spin_lock_bh(&old->q.lock);
+		sch_tree_lock(old);
 		qdisc_destroy(old);
-		spin_unlock_bh(&old->q.lock);
+		sch_tree_unlock(old);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1e0d5a5747772182d1bb2525d8153da640fdfb58 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 22:31:26 -0700
Subject: pkt_sched: No longer destroy qdiscs from RCU.

We can now kill them synchronously with all of the
previous dev_deactivate() cures.

This makes netdev destruction and shutdown saner as
the qdiscs hold references to the device.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 -
 net/sched/sch_generic.c   | 27 +++++++++------------------
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 757ab087adb..84d25f2e618 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,7 +61,6 @@ struct Qdisc
 	struct gnet_stats_basic	bstats;
 	struct gnet_stats_queue	qstats;
 	struct gnet_stats_rate_est	rate_est;
-	struct rcu_head 	q_rcu;
 	int			(*reshape_fail)(struct sk_buff *skb,
 					struct Qdisc *q);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 30b76aec723..6f96b7bc080 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -518,14 +518,19 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-/* this is the rcu callback function to clean up a qdisc when there
- * are no further references to it */
+/* Under qdisc_lock(qdisc) and BH! */
 
-static void __qdisc_destroy(struct rcu_head *head)
+void qdisc_destroy(struct Qdisc *qdisc)
 {
-	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
 	const struct Qdisc_ops  *ops = qdisc->ops;
 
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !atomic_dec_and_test(&qdisc->refcnt))
+		return;
+
+	if (qdisc->parent)
+		list_del(&qdisc->list);
+
 #ifdef CONFIG_NET_SCHED
 	qdisc_put_stab(qdisc->stab);
 #endif
@@ -542,20 +547,6 @@ static void __qdisc_destroy(struct rcu_head *head)
 
 	kfree((char *) qdisc - qdisc->padded);
 }
-
-/* Under qdisc_lock(qdisc) and BH! */
-
-void qdisc_destroy(struct Qdisc *qdisc)
-{
-	if (qdisc->flags & TCQ_F_BUILTIN ||
-	    !atomic_dec_and_test(&qdisc->refcnt))
-		return;
-
-	if (qdisc->parent)
-		list_del(&qdisc->list);
-
-	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
-}
 EXPORT_SYMBOL(qdisc_destroy);
 
 static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From 0d40b6e564bad2047b57a9afc48b701ef3243b89 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 17 Aug 2008 22:43:56 -0700
Subject: sch_prio: Use return value from inner qdisc requeue

Use return value from inner qdisc requeue when value returned isn't
NET_XMIT_SUCCESS, instead of always returning NET_XMIT_DROP.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_prio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index eac197610ed..7cdc3e6b31e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -117,7 +117,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 	if (net_xmit_drop_count(ret))
 		sch->qstats.drops++;
-	return NET_XMIT_DROP;
+	return ret;
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 4cf7cb280e4fcfcd7ae7429e17d798d3a44087af Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 22:45:17 -0700
Subject: sch_prio: Use NET_XMIT_SUCCESS instead of "0" constant.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_prio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 7cdc3e6b31e..a6697c686c7 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -113,7 +113,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
-		return 0;
+		return NET_XMIT_SUCCESS;
 	}
 	if (net_xmit_drop_count(ret))
 		sch->qstats.drops++;
-- 
cgit v1.2.3-70-g09d2


From 13601cd8e44aab332cedff1d6dc10786ec890b7b Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Sun, 17 Aug 2008 23:21:52 -0700
Subject: ipv6: Fix the return interface index when get it while no message is
 received.

When get receiving interface index while no message is received,
the bounded device's index of the socket should be returned.

RFC 3542:
   Issuing getsockopt() for the above options will return the sticky
   option value i.e., the value set with setsockopt().  If no sticky
   option value has been set getsockopt() will return the following
   values:

   -  For the IPV6_PKTINFO option, it will return an in6_pktinfo
      structure with ipi6_addr being in6addr_any and ipi6_ifindex being
      zero.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 741cfcd96f8..4e5eac301f9 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -911,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		} else {
 			if (np->rxopt.bits.rxinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
 				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
@@ -921,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			}
 			if (np->rxopt.bits.rxoinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
 				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
-- 
cgit v1.2.3-70-g09d2


From 96d203169d1d851ac1468f7d4459a09581be364c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 23:37:16 -0700
Subject: pkt_sched: Fix missed RCU unlock in dev_queue_xmit()

Noticed by Jarek Poplawski.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 819f0175bdc..8d133802372 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1805,14 +1805,12 @@ gso:
 		spin_lock(root_lock);
 
 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			spin_unlock(root_lock);
+			kfree_skb(skb);
 			rc = NET_XMIT_DROP;
-			goto out_kfree_skb;
+		} else {
+			rc = qdisc_enqueue_root(skb, q);
+			qdisc_run(q);
 		}
-
-		rc = qdisc_enqueue_root(skb, q);
-		qdisc_run(q);
-
 		spin_unlock(root_lock);
 
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From 69747650c814a8a79fef412c7416adf823293a3e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 17 Aug 2008 23:55:36 -0700
Subject: pkt_sched: Fix return value corruption in HTB and TBF.

Based upon a bug report by Josip Rodin.

Packet schedulers should only return NET_XMIT_DROP iff
the packet really was dropped.  If the packet does reach
the device after we return NET_XMIT_DROP then TCP can
crash because it depends upon the enqueue path return
values being accurate.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c |  4 ++--
 net/sched/sch_tbf.c | 11 ++---------
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6febd245e62..0df0df202ed 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -577,7 +577,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			sch->qstats.drops++;
 			cl->qstats.drops++;
 		}
-		return NET_XMIT_DROP;
+		return ret;
 	} else {
 		cl->bstats.packets +=
 			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
@@ -623,7 +623,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 			sch->qstats.drops++;
 			cl->qstats.drops++;
 		}
-		return NET_XMIT_DROP;
+		return ret;
 	} else
 		htb_activate(q, cl);
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7d3b7ff3bf0..94c61598b86 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,15 +123,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
 
-	if (qdisc_pkt_len(skb) > q->max_size) {
-		sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_ACT
-		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-#endif
-			kfree_skb(skb);
-
-		return NET_XMIT_DROP;
-	}
+	if (qdisc_pkt_len(skb) > q->max_size)
+		return qdisc_reshape_fail(skb, sch);
 
 	ret = qdisc_enqueue(skb, q->qdisc);
 	if (ret != 0) {
-- 
cgit v1.2.3-70-g09d2


From 90855d7b725d764d6d70503bcc1b494cf10ddc98 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 18 Aug 2008 13:23:53 +0200
Subject: [Bluetooth] Fix userspace breakage due missing class links

The Bluetooth adapters and connections are best presented via a class
in sysfs. The removal of the links inside the Bluetooth class broke
assumptions by userspace programs on how to find attached adapters.

This patch creates adapters and connections as part of the Bluetooth
class, but it uses different device types to distinguish them. The
userspace programs can now easily navigate in the sysfs device tree.

The unused platform device and bus have been removed to keep the
code simple and clean.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 376 +++++++++++++++++++++++-----------------------
 1 file changed, 189 insertions(+), 187 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index c85bf8f678d..f4f6615cad9 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -3,8 +3,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 
-#include <linux/platform_device.h>
-
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
@@ -12,10 +10,164 @@
 #undef  BT_DBG
 #define BT_DBG(D...)
 #endif
+
+struct class *bt_class = NULL;
+EXPORT_SYMBOL_GPL(bt_class);
+
 static struct workqueue_struct *btaddconn;
 static struct workqueue_struct *btdelconn;
 
-static inline char *typetostr(int type)
+static inline char *link_typetostr(int type)
+{
+	switch (type) {
+	case ACL_LINK:
+		return "ACL";
+	case SCO_LINK:
+		return "SCO";
+	case ESCO_LINK:
+		return "eSCO";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static ssize_t show_link_type(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hci_conn *conn = dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n", link_typetostr(conn->type));
+}
+
+static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hci_conn *conn = dev_get_drvdata(dev);
+	bdaddr_t bdaddr;
+	baswap(&bdaddr, &conn->dst);
+	return sprintf(buf, "%s\n", batostr(&bdaddr));
+}
+
+static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hci_conn *conn = dev_get_drvdata(dev);
+
+	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				conn->features[0], conn->features[1],
+				conn->features[2], conn->features[3],
+				conn->features[4], conn->features[5],
+				conn->features[6], conn->features[7]);
+}
+
+#define LINK_ATTR(_name,_mode,_show,_store) \
+struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
+static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
+static LINK_ATTR(features, S_IRUGO, show_link_features, NULL);
+
+static struct attribute *bt_link_attrs[] = {
+	&link_attr_type.attr,
+	&link_attr_address.attr,
+	&link_attr_features.attr,
+	NULL
+};
+
+static struct attribute_group bt_link_group = {
+	.attrs = bt_link_attrs,
+};
+
+static struct attribute_group *bt_link_groups[] = {
+	&bt_link_group,
+	NULL
+};
+
+static void bt_link_release(struct device *dev)
+{
+	void *data = dev_get_drvdata(dev);
+	kfree(data);
+}
+
+static struct device_type bt_link = {
+	.name    = "link",
+	.groups  = bt_link_groups,
+	.release = bt_link_release,
+};
+
+static void add_conn(struct work_struct *work)
+{
+	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+
+	flush_workqueue(btdelconn);
+
+	if (device_add(&conn->dev) < 0) {
+		BT_ERR("Failed to register connection device");
+		return;
+	}
+}
+
+void hci_conn_add_sysfs(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p", conn);
+
+	conn->dev.type = &bt_link;
+	conn->dev.class = bt_class;
+	conn->dev.parent = &hdev->dev;
+
+	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d",
+					hdev->name, conn->handle);
+
+	dev_set_drvdata(&conn->dev, conn);
+
+	device_initialize(&conn->dev);
+
+	INIT_WORK(&conn->work, add_conn);
+
+	queue_work(btaddconn, &conn->work);
+}
+
+/*
+ * The rfcomm tty device will possibly retain even when conn
+ * is down, and sysfs doesn't support move zombie device,
+ * so we should move the device before conn device is destroyed.
+ */
+static int __match_tty(struct device *dev, void *data)
+{
+	return !strncmp(dev->bus_id, "rfcomm", 6);
+}
+
+static void del_conn(struct work_struct *work)
+{
+	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+	struct hci_dev *hdev = conn->hdev;
+
+	while (1) {
+		struct device *dev;
+
+		dev = device_find_child(&conn->dev, NULL, __match_tty);
+		if (!dev)
+			break;
+		device_move(dev, NULL);
+		put_device(dev);
+	}
+
+	device_del(&conn->dev);
+	put_device(&conn->dev);
+	hci_dev_put(hdev);
+}
+
+void hci_conn_del_sysfs(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
+
+	if (!device_is_registered(&conn->dev))
+		return;
+
+	INIT_WORK(&conn->work, del_conn);
+
+	queue_work(btdelconn, &conn->work);
+}
+
+static inline char *host_typetostr(int type)
 {
 	switch (type) {
 	case HCI_VIRTUAL:
@@ -40,7 +192,7 @@ static inline char *typetostr(int type)
 static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", typetostr(hdev->type));
+	return sprintf(buf, "%s\n", host_typetostr(hdev->type));
 }
 
 static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
@@ -221,183 +373,62 @@ static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
 				show_sniff_min_interval, store_sniff_min_interval);
 
-static struct device_attribute *bt_attrs[] = {
-	&dev_attr_type,
-	&dev_attr_name,
-	&dev_attr_class,
-	&dev_attr_address,
-	&dev_attr_features,
-	&dev_attr_manufacturer,
-	&dev_attr_hci_version,
-	&dev_attr_hci_revision,
-	&dev_attr_inquiry_cache,
-	&dev_attr_idle_timeout,
-	&dev_attr_sniff_max_interval,
-	&dev_attr_sniff_min_interval,
+static struct attribute *bt_host_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_name.attr,
+	&dev_attr_class.attr,
+	&dev_attr_address.attr,
+	&dev_attr_features.attr,
+	&dev_attr_manufacturer.attr,
+	&dev_attr_hci_version.attr,
+	&dev_attr_hci_revision.attr,
+	&dev_attr_inquiry_cache.attr,
+	&dev_attr_idle_timeout.attr,
+	&dev_attr_sniff_max_interval.attr,
+	&dev_attr_sniff_min_interval.attr,
 	NULL
 };
 
-static ssize_t show_conn_type(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", conn->type == ACL_LINK ? "ACL" : "SCO");
-}
-
-static ssize_t show_conn_address(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = dev_get_drvdata(dev);
-	bdaddr_t bdaddr;
-	baswap(&bdaddr, &conn->dst);
-	return sprintf(buf, "%s\n", batostr(&bdaddr));
-}
-
-static ssize_t show_conn_features(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct hci_conn *conn = dev_get_drvdata(dev);
-
-	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-				conn->features[0], conn->features[1],
-				conn->features[2], conn->features[3],
-				conn->features[4], conn->features[5],
-				conn->features[6], conn->features[7]);
-}
-
-#define CONN_ATTR(_name,_mode,_show,_store) \
-struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store)
-
-static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL);
-static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL);
-static CONN_ATTR(features, S_IRUGO, show_conn_features, NULL);
-
-static struct device_attribute *conn_attrs[] = {
-	&conn_attr_type,
-	&conn_attr_address,
-	&conn_attr_features,
-	NULL
+static struct attribute_group bt_host_group = {
+	.attrs = bt_host_attrs,
 };
 
-struct class *bt_class = NULL;
-EXPORT_SYMBOL_GPL(bt_class);
-
-static struct bus_type bt_bus = {
-	.name	= "bluetooth",
+static struct attribute_group *bt_host_groups[] = {
+	&bt_host_group,
+	NULL
 };
 
-static struct platform_device *bt_platform;
-
-static void bt_release(struct device *dev)
+static void bt_host_release(struct device *dev)
 {
 	void *data = dev_get_drvdata(dev);
 	kfree(data);
 }
 
-static void add_conn(struct work_struct *work)
-{
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
-	int i;
-
-	flush_workqueue(btdelconn);
-
-	if (device_add(&conn->dev) < 0) {
-		BT_ERR("Failed to register connection device");
-		return;
-	}
-
-	for (i = 0; conn_attrs[i]; i++)
-		if (device_create_file(&conn->dev, conn_attrs[i]) < 0)
-			BT_ERR("Failed to create connection attribute");
-}
-
-void hci_conn_add_sysfs(struct hci_conn *conn)
-{
-	struct hci_dev *hdev = conn->hdev;
-
-	BT_DBG("conn %p", conn);
-
-	conn->dev.bus = &bt_bus;
-	conn->dev.parent = &hdev->dev;
-
-	conn->dev.release = bt_release;
-
-	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d",
-					hdev->name, conn->handle);
-
-	dev_set_drvdata(&conn->dev, conn);
-
-	device_initialize(&conn->dev);
-
-	INIT_WORK(&conn->work, add_conn);
-
-	queue_work(btaddconn, &conn->work);
-}
-
-/*
- * The rfcomm tty device will possibly retain even when conn
- * is down, and sysfs doesn't support move zombie device,
- * so we should move the device before conn device is destroyed.
- */
-static int __match_tty(struct device *dev, void *data)
-{
-	return !strncmp(dev->bus_id, "rfcomm", 6);
-}
-
-static void del_conn(struct work_struct *work)
-{
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
-	struct hci_dev *hdev = conn->hdev;
-
-	while (1) {
-		struct device *dev;
-
-		dev = device_find_child(&conn->dev, NULL, __match_tty);
-		if (!dev)
-			break;
-		device_move(dev, NULL);
-		put_device(dev);
-	}
-
-	device_del(&conn->dev);
-	put_device(&conn->dev);
-	hci_dev_put(hdev);
-}
-
-void hci_conn_del_sysfs(struct hci_conn *conn)
-{
-	BT_DBG("conn %p", conn);
-
-	if (!device_is_registered(&conn->dev))
-		return;
-
-	INIT_WORK(&conn->work, del_conn);
-
-	queue_work(btdelconn, &conn->work);
-}
+static struct device_type bt_host = {
+	.name    = "host",
+	.groups  = bt_host_groups,
+	.release = bt_host_release,
+};
 
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct device *dev = &hdev->dev;
-	unsigned int i;
 	int err;
 
 	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
 
-	dev->bus = &bt_bus;
+	dev->type = &bt_host;
+	dev->class = bt_class;
 	dev->parent = hdev->parent;
 
 	strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE);
 
-	dev->release = bt_release;
-
 	dev_set_drvdata(dev, hdev);
 
 	err = device_register(dev);
 	if (err < 0)
 		return err;
 
-	for (i = 0; bt_attrs[i]; i++)
-		if (device_create_file(dev, bt_attrs[i]) < 0)
-			BT_ERR("Failed to create device attribute");
-
 	return 0;
 }
 
@@ -410,59 +441,30 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	int err;
-
 	btaddconn = create_singlethread_workqueue("btaddconn");
-	if (!btaddconn) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!btaddconn)
+		return -ENOMEM;
 
 	btdelconn = create_singlethread_workqueue("btdelconn");
 	if (!btdelconn) {
-		err = -ENOMEM;
-		goto out_del;
-	}
-
-	bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0);
-	if (IS_ERR(bt_platform)) {
-		err = PTR_ERR(bt_platform);
-		goto out_platform;
+		destroy_workqueue(btaddconn);
+		return -ENOMEM;
 	}
 
-	err = bus_register(&bt_bus);
-	if (err < 0)
-		goto out_bus;
-
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
-		err = PTR_ERR(bt_class);
-		goto out_class;
+		destroy_workqueue(btdelconn);
+		destroy_workqueue(btaddconn);
+		return PTR_ERR(bt_class);
 	}
 
 	return 0;
-
-out_class:
-	bus_unregister(&bt_bus);
-out_bus:
-	platform_device_unregister(bt_platform);
-out_platform:
-	destroy_workqueue(btdelconn);
-out_del:
-	destroy_workqueue(btaddconn);
-out:
-	return err;
 }
 
 void bt_sysfs_cleanup(void)
 {
 	destroy_workqueue(btaddconn);
-
 	destroy_workqueue(btdelconn);
 
 	class_destroy(bt_class);
-
-	bus_unregister(&bt_bus);
-
-	platform_device_unregister(bt_platform);
 }
-- 
cgit v1.2.3-70-g09d2


From 63fbd24e5102eecfc9d049ed7f4be7f9a25f814f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 18 Aug 2008 13:23:53 +0200
Subject: [Bluetooth] Consolidate maintainers information

The Bluetooth entries for the MAINTAINERS file are a little bit too
much. Consolidate them into two entries. One for Bluetooth drivers and
another one for the Bluetooth subsystem.

Also the MODULE_AUTHOR should indicate the current maintainer of the
module and actually not the original author. Fix all Bluetooth modules
to provide current maintainer information.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 MAINTAINERS                   | 87 +++----------------------------------------
 drivers/bluetooth/bt3c_cs.c   |  2 +-
 drivers/bluetooth/hci_ldisc.c |  2 +-
 drivers/bluetooth/hci_usb.c   |  2 +-
 drivers/bluetooth/hci_vhci.c  |  2 +-
 net/bluetooth/af_bluetooth.c  |  2 +-
 net/bluetooth/bnep/core.c     |  2 +-
 net/bluetooth/l2cap.c         |  2 +-
 net/bluetooth/rfcomm/core.c   |  2 +-
 net/bluetooth/sco.c           |  2 +-
 10 files changed, 15 insertions(+), 90 deletions(-)

(limited to 'net')

diff --git a/MAINTAINERS b/MAINTAINERS
index af6aa4e4b39..2676f9a1fee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -936,94 +936,19 @@ M:	joern@lazybastard.org
 L:	linux-mtd@lists.infradead.org
 S:	Maintained
 
-BLUETOOTH SUBSYSTEM
+BLUETOOTH DRIVERS
 P:	Marcel Holtmann
 M:	marcel@holtmann.org
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
 L:	linux-bluetooth@vger.kernel.org
-W:	http://bluez.sf.net
-W:	http://www.bluez.org
-W:	http://www.holtmann.org/linux/bluetooth/
-T:	git kernel.org:/pub/scm/linux/kernel/git/holtmann/bluetooth-2.6.git
-S:	Maintained
-
-BLUETOOTH RFCOMM LAYER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
-S:	Maintained
-
-BLUETOOTH BNEP LAYER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
-S:	Maintained
-
-BLUETOOTH CMTP LAYER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
+W:	http://www.bluez.org/
 S:	Maintained
 
-BLUETOOTH HIDP LAYER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI UART DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
-S:	Maintained
-
-BLUETOOTH HCI USB DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
-S:	Maintained
-
-BLUETOOTH HCI BCM203X DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI BPA10X DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI BFUSB DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI DTL1 DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI BLUECARD DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI BT3C DRIVER
-P:	Marcel Holtmann
-M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI BTUART DRIVER
+BLUETOOTH SUBSYSTEM
 P:	Marcel Holtmann
 M:	marcel@holtmann.org
-S:	Maintained
-
-BLUETOOTH HCI VHCI DRIVER
-P:	Maxim Krasnyansky
-M:	maxk@qualcomm.com
+L:	linux-bluetooth@vger.kernel.org
+W:	http://www.bluez.org/
+T:	git kernel.org:/pub/scm/linux/kernel/git/holtmann/bluetooth-2.6.git
 S:	Maintained
 
 BONDING DRIVER
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 593b7c59503..27058477cc8 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -60,7 +60,7 @@
 /* ======================== Module parameters ======================== */
 
 
-MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>, Jose Orlando Pereira <jop@di.uminho.pt>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth driver for the 3Com Bluetooth PCMCIA card");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE("BT3CPCC.bin");
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 69df187d74c..8dfcf77cb71 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -577,7 +577,7 @@ module_exit(hci_uart_exit);
 module_param(reset, bool, 0644);
 MODULE_PARM_DESC(reset, "Send HCI reset command on initialization");
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth HCI UART driver ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index e397572bf57..3c453924f83 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -1130,7 +1130,7 @@ module_param(isoc, int, 0644);
 MODULE_PARM_DESC(isoc, "Set isochronous transfers for SCO over HCI support");
 #endif
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth HCI USB driver ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index d97700aa54a..7320a71b636 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -377,7 +377,7 @@ module_exit(vhci_exit);
 module_param(minor, int, 0444);
 MODULE_PARM_DESC(minor, "Miscellaneous minor device number");
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 4e59df5f8e0..1edfdf4c095 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -456,7 +456,7 @@ static void __exit bt_exit(void)
 subsys_initcall(bt_init);
 module_exit(bt_exit);
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth Core ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 12bba6207a8..80ba30cf4b6 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -736,7 +736,7 @@ MODULE_PARM_DESC(compress_src, "Compress sources headers");
 module_param(compress_dst, bool, 0644);
 MODULE_PARM_DESC(compress_dst, "Compress destination headers");
 
-MODULE_AUTHOR("David Libault <david.libault@inventel.fr>, Maxim Krasnyansky <maxk@qualcomm.com>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c1239852834..3396d5bdef1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2516,7 +2516,7 @@ EXPORT_SYMBOL(l2cap_load);
 module_init(l2cap_init);
 module_exit(l2cap_exit);
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 6cfc7ba611b..ba537fae0a4 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2115,7 +2115,7 @@ MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel");
 module_param(l2cap_mtu, uint, 0644);
 MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection");
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8cda4987486..a16011fedc1 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1002,7 +1002,7 @@ module_exit(sco_exit);
 module_param(disable_esco, bool, 0644);
 MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation");
 
-MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From e10e0dfe3ba358cfb442cc3bf0d3f2068785bf5c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 2 Aug 2008 14:56:25 -0300
Subject: rfkill: protect suspended rfkill controllers

Guard rfkill controllers attached to a rfkill class against state changes
after class suspend has been issued.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt |  5 +++++
 net/rfkill/rfkill.c      | 14 ++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 28b6ec87c64..6fcb3060dec 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -363,6 +363,11 @@ This rule exists because users of the rfkill subsystem expect to get (and set,
 when possible) the overall transmitter rfkill state, not of a particular rfkill
 line.
 
+5. During suspend, the rfkill class will attempt to soft-block the radio
+through a call to rfkill->toggle_radio, and will try to restore its previous
+state during resume.  After a rfkill class is suspended, it will *not* call
+rfkill->toggle_radio until it is resumed.
+
 Example of a WLAN wireless driver connected to the rfkill subsystem:
 --------------------------------------------------------------------
 
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index d2d45655cd1..35a9994e233 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -150,6 +150,8 @@ static void update_rfkill_state(struct rfkill *rfkill)
  * calls and handling all the red tape such as issuing notifications
  * if the call is successful.
  *
+ * Suspended devices are not touched at all, and -EAGAIN is returned.
+ *
  * Note that the @force parameter cannot override a (possibly cached)
  * state of RFKILL_STATE_HARD_BLOCKED.  Any device making use of
  * RFKILL_STATE_HARD_BLOCKED implements either get_state() or
@@ -168,6 +170,9 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 	int retval = 0;
 	enum rfkill_state oldstate, newstate;
 
+	if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP))
+		return -EBUSY;
+
 	oldstate = rfkill->state;
 
 	if (rfkill->get_state && !force &&
@@ -214,7 +219,7 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
  *
  * This function toggles the state of all switches of given type,
  * unless a specific switch is claimed by userspace (in which case,
- * that switch is left alone).
+ * that switch is left alone) or suspended.
  */
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 {
@@ -239,8 +244,8 @@ EXPORT_SYMBOL(rfkill_switch_all);
 /**
  * rfkill_epo - emergency power off all transmitters
  *
- * This kicks all rfkill devices to RFKILL_STATE_SOFT_BLOCKED, ignoring
- * everything in its path but rfkill_mutex and rfkill->mutex.
+ * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
+ * ignoring everything in its path but rfkill_mutex and rfkill->mutex.
  */
 void rfkill_epo(void)
 {
@@ -458,13 +463,14 @@ static int rfkill_resume(struct device *dev)
 	if (dev->power.power_state.event != PM_EVENT_ON) {
 		mutex_lock(&rfkill->mutex);
 
+		dev->power.power_state.event = PM_EVENT_ON;
+
 		/* restore radio state AND notify everybody */
 		rfkill_toggle_radio(rfkill, rfkill->state, 1);
 
 		mutex_unlock(&rfkill->mutex);
 	}
 
-	dev->power.power_state = PMSG_ON;
 	return 0;
 }
 #else
-- 
cgit v1.2.3-70-g09d2


From a61dae1f784f0c4ced0d47721c0efc7033231522 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Sun, 10 Aug 2008 00:54:34 +0300
Subject: mac80211: update new sta's rx timestamp

This patch fixes needless probe request caused by zero value in
sta->last_rx inside ieee80211_associated flow

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e1d11c9b672..1e97fb9fb34 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2103,6 +2103,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 			rcu_read_unlock();
 			return;
 		}
+		/* update new sta with its last rx activity */
+		sta->last_rx = jiffies;
 	}
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 8608db031b4d2932d645709e2cfe8fbcd91a7305 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 18 Aug 2008 20:51:18 -0700
Subject: pkt_sched: Never schedule non-root qdiscs.

Based upon initial discovery and patch by Jarek Poplawski.

The qdisc watchdogs can be attached to any qdisc, not just the root,
so make sure we schedule the correct one.

CBQ has a similar bug.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 net/sched/sch_cbq.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c8dc72e1210..98c00847a3d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -426,7 +426,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	smp_wmb();
-	__netif_schedule(wd->qdisc);
+	__netif_schedule(qdisc_root(wd->qdisc));
 
 	return HRTIMER_NORESTART;
 }
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4e261ce62f4..47ef492c4ff 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
-	__netif_schedule(sch);
+	__netif_schedule(qdisc_root(sch));
 	return HRTIMER_NORESTART;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 25bfcd5a78a377ea4c54a3c21e44590e2fc478a6 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 18 Aug 2008 20:53:34 -0700
Subject: pkt_sched: Add lockdep annotation for qdisc locks

Qdisc locks are initialized in the same function, qdisc_alloc(), so
lockdep can't distinguish tx qdisc lock from rx and reports "possible
recursive locking detected" when both these locks are taken eg. while
using act_mirred with ifb. This looks like a false positive. Anyway,
after this patch these locks will be reported more exactly.

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98c00847a3d..7d7070b1eeb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,6 +27,7 @@
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/hrtimer.h>
+#include <linux/lockdep.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -707,6 +708,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 	return err;
 }
 
+/* lockdep annotation is needed for ingress; egress gets it only for name */
+static struct lock_class_key qdisc_tx_lock;
+static struct lock_class_key qdisc_rx_lock;
+
 /*
    Allocate and initialize new qdisc.
 
@@ -767,6 +772,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
+		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
 	} else {
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
@@ -774,6 +780,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			if (handle == 0)
 				goto err_out3;
 		}
+		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 	}
 
 	sch->handle = handle;
-- 
cgit v1.2.3-70-g09d2


From 4d8863a29c4755a0461cd31b6865026187d6c43a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 18 Aug 2008 21:03:15 -0700
Subject: pkt_sched: Don't hold qdisc lock over qdisc_destroy().

Based upon reports by Denys Fedoryshchenko, and feedback
and help from Jarek Poplawski and Herbert Xu.

We always either:

1) Never made an external reference to this qdisc.

or

2) Did a dev_deactivate() which purged all asynchronous
   references.

So do not lock the qdisc when we call qdisc_destroy(),
it's illegal anyways as when we drop the lock this is
free'd memory.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c     | 13 ++-----------
 net/sched/sch_generic.c |  6 ------
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7d7070b1eeb..d91a2338877 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -638,11 +638,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid
 	if (new || old)
 		qdisc_notify(skb, n, clid, old, new);
 
-	if (old) {
-		sch_tree_lock(old);
+	if (old)
 		qdisc_destroy(old);
-		sch_tree_unlock(old);
-	}
 }
 
 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -1092,16 +1089,10 @@ create_n_graft:
 
 graft:
 	if (1) {
-		spinlock_t *root_lock;
-
 		err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
 		if (err) {
-			if (q) {
-				root_lock = qdisc_root_lock(q);
-				spin_lock_bh(root_lock);
+			if (q)
 				qdisc_destroy(q);
-				spin_unlock_bh(root_lock);
-			}
 			return err;
 		}
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6f96b7bc080..c3ed4d44fc1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -518,8 +518,6 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-/* Under qdisc_lock(qdisc) and BH! */
-
 void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
@@ -712,14 +710,10 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
-		spinlock_t *root_lock = qdisc_lock(qdisc);
-
 		dev_queue->qdisc = qdisc_default;
 		dev_queue->qdisc_sleeping = qdisc_default;
 
-		spin_lock_bh(root_lock);
 		qdisc_destroy(qdisc);
-		spin_unlock_bh(root_lock);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From d28934ad8a4e87203a95de9c376611de8bc2f013 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 18 Aug 2008 21:14:20 -0700
Subject: dccp: Fix panic caused by too early termination of retransmission
 mechanism

Thanks is due to Wei Yongjun for the detailed analysis and description of this
bug at http://marc.info/?l=dccp&m=121739364909199&w=2

The problem is that invalid packets received by a client in state REQUEST cause
the retransmission timer for the DCCP-Request to be reset. This includes freeing
the Request-skb ( in dccp_rcv_request_sent_state_process() ). As a consequence,
 * the arrival of further packets cause a double-free, triggering a panic(),
 * the connection then may hang, since further retransmissions are blocked.

This patch changes the order of statements so that the retransmission timer is
reset, and the pending Request freed, only if a valid Response has arrived (or
the number of sysctl-retries has been exhausted).

Further changes:
----------------
To be on the safe side, replaced __kfree_skb with kfree_skb so that if due to
unexpected circumstances the sk_send_head is NULL the WARN_ON is used instead.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index df2f110df94..803933ab396 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -411,12 +411,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		struct dccp_sock *dp = dccp_sk(sk);
 		long tstamp = dccp_timestamp();
 
-		/* Stop the REQUEST timer */
-		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
-		WARN_ON(sk->sk_send_head == NULL);
-		__kfree_skb(sk->sk_send_head);
-		sk->sk_send_head = NULL;
-
 		if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
 			       dp->dccps_awl, dp->dccps_awh)) {
 			dccp_pr_debug("invalid ackno: S.AWL=%llu, "
@@ -441,6 +435,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 				    DCCP_ACKVEC_STATE_RECEIVED))
 			goto out_invalid_packet; /* FIXME: change error code */
 
+		/* Stop the REQUEST timer */
+		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+		WARN_ON(sk->sk_send_head == NULL);
+		kfree_skb(sk->sk_send_head);
+		sk->sk_send_head = NULL;
+
 		dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
 		dccp_update_gsr(sk, dp->dccps_isr);
 		/*
-- 
cgit v1.2.3-70-g09d2


From 46faec9858e8943226464dac50e205bf210d9174 Mon Sep 17 00:00:00 2001
From: Anders Grafström <grfstrm@users.sourceforge.net>
Date: Mon, 18 Aug 2008 21:29:57 -0700
Subject: netfilter: ipt_addrtype: Fix matching of inverted destination address
 type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes matching of inverted destination address type.

Signed-off-by: Anders Grafström <grfstrm@users.sourceforge.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_addrtype.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 49587a49722..462a22c9787 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
 		       (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
 	if (ret && info->dest)
 		ret &= match_type(dev, iph->daddr, info->dest) ^
-		       (info->flags & IPT_ADDRTYPE_INVERT_DEST);
+		       !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1575e7ea018fec992b94a12a1a491ce693ae9eac Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 18 Aug 2008 21:30:55 -0700
Subject: netfilter: ctnetlink: fix double helper assignation for NAT'ed
 conntracks

If we create a conntrack that has NAT handlings and a helper, the helper
is assigned twice. This happens because nf_nat_setup_info() - via
nf_conntrack_alter_reply() - sets the helper before ctnetlink, which
indeed does not check if the conntrack already has a helper as it thinks that
it is a brand new conntrack.

The fix moves the helper assignation before the set of the status flags.
This avoids a bogus assertion in __nf_ct_ext_add (if netfilter assertions are
enabled) which checks that the conntrack must not be confirmed.

This problem was introduced in 2.6.23 with the netfilter extension
infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 105a616c5c7..d1fb2f8555e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1136,16 +1136,33 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
 	ct->status |= IPS_CONFIRMED;
 
+	rcu_read_lock();
+	helper = __nf_ct_helper_find(rtuple);
+	if (helper) {
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		if (help == NULL) {
+			rcu_read_unlock();
+			err = -ENOMEM;
+			goto err;
+		}
+		/* not in hash table yet so not strictly necessary */
+		rcu_assign_pointer(help->helper, helper);
+	}
+
 	if (cda[CTA_STATUS]) {
 		err = ctnetlink_change_status(ct, cda);
-		if (err < 0)
+		if (err < 0) {
+			rcu_read_unlock();
 			goto err;
+		}
 	}
 
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
-		if (err < 0)
+		if (err < 0) {
+			rcu_read_unlock();
 			goto err;
+		}
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_KERNEL);
@@ -1155,19 +1172,6 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
 #endif
 
-	rcu_read_lock();
-	helper = __nf_ct_helper_find(rtuple);
-	if (helper) {
-		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
-		if (help == NULL) {
-			rcu_read_unlock();
-			err = -ENOMEM;
-			goto err;
-		}
-		/* not in hash table yet so not strictly necessary */
-		rcu_assign_pointer(help->helper, helper);
-	}
-
 	/* setup master conntrack: this is a confirmed expectation */
 	if (master_ct) {
 		__set_bit(IPS_EXPECTED_BIT, &ct->status);
-- 
cgit v1.2.3-70-g09d2


From cb1cb5c47457ff2b604dac2da44cab4d39d11459 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 18 Aug 2008 21:31:24 -0700
Subject: netfilter: ctnetlink: fix sleep in read-side lock section

Fix allocation with GFP_KERNEL in ctnetlink_create_conntrack() under
read-side lock sections.

This problem was introduced in 2.6.25.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d1fb2f8555e..a5b95ccb3ce 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1139,7 +1139,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	rcu_read_lock();
 	helper = __nf_ct_helper_find(rtuple);
 	if (helper) {
-		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 		if (help == NULL) {
 			rcu_read_unlock();
 			err = -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From fab00c5d15091546be681426c60b2ed2c10513bf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 18 Aug 2008 21:31:46 -0700
Subject: netfilter: ctnetlink: sleepable allocation with spin lock bh

This patch removes a GFP_KERNEL allocation while holding a spin lock with
bottom halves disabled in ctnetlink_change_helper().

This problem was introduced in 2.6.23 with the netfilter extension
infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a5b95ccb3ce..a8752031adc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -968,7 +968,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 		/* need to zero data of old helper */
 		memset(&help->help, 0, sizeof(help->help));
 	} else {
-		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 		if (help == NULL)
 			return -ENOMEM;
 	}
-- 
cgit v1.2.3-70-g09d2


From 9f593653742d1dd816c4e94c6e5154a57ccba6d1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 18 Aug 2008 21:32:32 -0700
Subject: nf_nat: use secure_ipv4_port_ephemeral() for NAT port randomization

Use incoming network tuple as seed for NAT port randomization.
This avoids concerns of leaking net_random() bits, and also gives better
port distribution. Don't have NAT server, compile tested only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

[ added missing EXPORT_SYMBOL_GPL ]

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c                    | 1 +
 net/ipv4/netfilter/nf_nat_proto_common.c | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index e0d0e371909..1838aa3d24f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1571,6 +1571,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 
 	return half_md4_transform(hash, keyptr->secret);
 }
+EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 91537f11273..6c4f11f5144 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.all) - min + 1;
 	}
 
-	off = *rover;
 	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
-		off = net_random();
+		off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
+						 maniptype == IP_NAT_MANIP_SRC
+						 ? tuple->dst.u.all
+						 : tuple->src.u.all);
+	else
+		off = *rover;
 
 	for (i = 0; i < range_size; i++, off++) {
 		*portptr = htons(min + off % range_size);
-- 
cgit v1.2.3-70-g09d2


From e5befbd9525d92bb074b70192eb2c69aae65fc60 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 18 Aug 2008 22:30:01 -0700
Subject: pkt_sched: remove bogus block (cleanup)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

...Last block local var got just deleted.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d91a2338877..9372ec41ce8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1088,14 +1088,13 @@ create_n_graft:
 	}
 
 graft:
-	if (1) {
-		err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
-		if (err) {
-			if (q)
-				qdisc_destroy(q);
-			return err;
-		}
+	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
+	if (err) {
+		if (q)
+			qdisc_destroy(q);
+		return err;
 	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From deb3abf15fb92a608fba630da2e8719862731714 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 18 Aug 2008 22:32:10 -0700
Subject: Revert "pkt_sched: Protect gen estimators under est_lock."

This reverts commit d4766692e72422f3b0f0e9ac6773d92baad07d51.

qdisc_destroy() now runs in RTNL fully again, so this
change is no longer needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index a89f32fa94f..57abe8266be 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -99,7 +99,7 @@ struct gen_estimator_head
 
 static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 
-/* Protects against NULL dereference and RCU write-side */
+/* Protects against NULL dereference */
 static DEFINE_RWLOCK(est_lock);
 
 static void est_timer(unsigned long arg)
@@ -185,7 +185,6 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
-	write_lock_bh(&est_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -195,7 +194,6 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 
 	list_add_rcu(&est->list, &elist[idx].list);
-	write_unlock_bh(&est_lock);
 	return 0;
 }
 
@@ -214,6 +212,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  * Removes the rate estimator specified by &bstats and &rate_est
  * and deletes the timer.
  *
+ * NOTE: Called under rtnl_mutex
  */
 void gen_kill_estimator(struct gnet_stats_basic *bstats,
 	struct gnet_stats_rate_est *rate_est)
@@ -227,17 +226,17 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
 		if (!elist[idx].timer.function)
 			continue;
 
-		write_lock_bh(&est_lock);
 		list_for_each_entry_safe(e, n, &elist[idx].list, list) {
 			if (e->rate_est != rate_est || e->bstats != bstats)
 				continue;
 
+			write_lock_bh(&est_lock);
 			e->bstats = NULL;
+			write_unlock_bh(&est_lock);
 
 			list_del_rcu(&e->list);
 			call_rcu(&e->e_rcu, __gen_kill_estimator);
 		}
-		write_unlock_bh(&est_lock);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From f3b9605d744df537dee10fd06630f35a62b343ec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 18 Aug 2008 22:33:05 -0700
Subject: Revert "pkt_sched: Add BH protection for qdisc_stab_lock."

This reverts commit 1cfa26661a85549063e369e2b40275eeaa7b923c.

qdisc_destroy() runs fully under RTNL again and not from softint any
longer, so this change is no longer needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9372ec41ce8..ef0efeca635 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -332,7 +332,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (!s || tsize != s->tsize || (!tab && tsize > 0))
 		return ERR_PTR(-EINVAL);
 
-	spin_lock_bh(&qdisc_stab_lock);
+	spin_lock(&qdisc_stab_lock);
 
 	list_for_each_entry(stab, &qdisc_stab_list, list) {
 		if (memcmp(&stab->szopts, s, sizeof(*s)))
@@ -340,11 +340,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
 			continue;
 		stab->refcnt++;
-		spin_unlock_bh(&qdisc_stab_lock);
+		spin_unlock(&qdisc_stab_lock);
 		return stab;
 	}
 
-	spin_unlock_bh(&qdisc_stab_lock);
+	spin_unlock(&qdisc_stab_lock);
 
 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
 	if (!stab)
@@ -355,9 +355,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (tsize > 0)
 		memcpy(stab->data, tab, tsize * sizeof(u16));
 
-	spin_lock_bh(&qdisc_stab_lock);
+	spin_lock(&qdisc_stab_lock);
 	list_add_tail(&stab->list, &qdisc_stab_list);
-	spin_unlock_bh(&qdisc_stab_lock);
+	spin_unlock(&qdisc_stab_lock);
 
 	return stab;
 }
@@ -367,14 +367,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 	if (!tab)
 		return;
 
-	spin_lock_bh(&qdisc_stab_lock);
+	spin_lock(&qdisc_stab_lock);
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
 		kfree(tab);
 	}
 
-	spin_unlock_bh(&qdisc_stab_lock);
+	spin_unlock(&qdisc_stab_lock);
 }
 EXPORT_SYMBOL(qdisc_put_stab);
 
-- 
cgit v1.2.3-70-g09d2


From 39ac50d0c79747b186c1268d9a488f8c1d256be7 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Mon, 18 Aug 2008 00:52:08 +0200
Subject: ipvs: Fix race conditions in lblc scheduler

We can't access the cache entry outside of our critical read-locked region,
because someone may free that entry. And we also need to check in the critical
region wether the destination is still available, i.e. it's not in the trash.
If we drop our reference counter, the destination can be purged from the trash
at any time. Our caller only guarantees that no destination is moved to the
trash, while we are scheduling. Also there is no need for our own rwlock,
there is already one in the service structure for use in the schedulers.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_lblc.c | 204 +++++++++++++++++++++------------------------
 1 file changed, 96 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index b9b334cccf3..d2a43aa3fe4 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -96,7 +96,6 @@ struct ip_vs_lblc_entry {
  *      IPVS lblc hash table
  */
 struct ip_vs_lblc_table {
-	rwlock_t	        lock;           /* lock for this table */
 	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
@@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = {
 
 static struct ctl_table_header * sysctl_header;
 
-/*
- *      new/free a ip_vs_lblc_entry, which is a mapping of a destionation
- *      IP address to a server.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
-{
-	struct ip_vs_lblc_entry *en;
-
-	en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
-	if (en == NULL) {
-		IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&en->list);
-	en->addr = daddr;
-
-	atomic_inc(&dest->refcnt);
-	en->dest = dest;
-
-	return en;
-}
-
-
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
 	list_del(&en->list);
@@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
  *	Hash an entry in the ip_vs_lblc_table.
  *	returns bool success.
  */
-static int
+static void
 ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
 {
-	unsigned hash;
-
-	if (!list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
+	unsigned hash = ip_vs_lblc_hashkey(en->addr);
 
-	/*
-	 *	Hash by destination IP address
-	 */
-	hash = ip_vs_lblc_hashkey(en->addr);
-
-	write_lock(&tbl->lock);
 	list_add(&en->list, &tbl->bucket[hash]);
 	atomic_inc(&tbl->entries);
-	write_unlock(&tbl->lock);
-
-	return 1;
 }
 
 
 /*
- *  Get ip_vs_lblc_entry associated with supplied parameters.
+ *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read
+ *  lock
  */
 static inline struct ip_vs_lblc_entry *
 ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
 {
-	unsigned hash;
+	unsigned hash = ip_vs_lblc_hashkey(addr);
 	struct ip_vs_lblc_entry *en;
 
-	hash = ip_vs_lblc_hashkey(addr);
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
 
-	read_lock(&tbl->lock);
+	return NULL;
+}
 
-	list_for_each_entry(en, &tbl->bucket[hash], list) {
-		if (en->addr == addr) {
-			/* HIT */
-			read_unlock(&tbl->lock);
-			return en;
+
+/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = ip_vs_lblc_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+			return NULL;
 		}
-	}
 
-	read_unlock(&tbl->lock);
+		en->addr = daddr;
+		en->lastuse = jiffies;
 
-	return NULL;
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+
+		ip_vs_lblc_hash(tbl, en);
+	} else if (en->dest != dest) {
+		atomic_dec(&en->dest->refcnt);
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+	}
+
+	return en;
 }
 
 
@@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
  */
 static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
 {
-	int i;
 	struct ip_vs_lblc_entry *en, *nxt;
+	int i;
 
 	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		write_lock(&tbl->lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
 			ip_vs_lblc_free(en);
 			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&tbl->lock);
 	}
 }
 
 
-static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 {
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en, *nxt;
 	unsigned long now = jiffies;
 	int i, j;
-	struct ip_vs_lblc_entry *en, *nxt;
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
 
-		write_lock(&tbl->lock);
+		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now,
 					en->lastuse + sysctl_ip_vs_lblc_expiration))
@@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
 			ip_vs_lblc_free(en);
 			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&tbl->lock);
+		write_unlock(&svc->sched_lock);
 	}
 	tbl->rover = j;
 }
@@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
  */
 static void ip_vs_lblc_check_expire(unsigned long data)
 {
-	struct ip_vs_lblc_table *tbl;
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
 	struct ip_vs_lblc_entry *en, *nxt;
 
-	tbl = (struct ip_vs_lblc_table *)data;
-
 	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 		/* do full expiration check */
-		ip_vs_lblc_full_check(tbl);
+		ip_vs_lblc_full_check(svc);
 		tbl->counter = 1;
 		goto out;
 	}
@@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
 
-		write_lock(&tbl->lock);
+		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
 				continue;
@@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
 			atomic_dec(&tbl->entries);
 			goal--;
 		}
-		write_unlock(&tbl->lock);
+		write_unlock(&svc->sched_lock);
 		if (goal <= 0)
 			break;
 	}
@@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblc_table for this service
 	 */
-	tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 	if (tbl == NULL) {
 		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
 	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
-		  "current service\n",
-		  sizeof(struct ip_vs_lblc_table));
+		  "current service\n", sizeof(*tbl));
 
 	/*
 	 *    Initialize the hash buckets
@@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		INIT_LIST_HEAD(&tbl->bucket[i]);
 	}
-	rwlock_init(&tbl->lock);
 	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
@@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	 *    Hook periodic timer for garbage collection
 	 */
 	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
-			(unsigned long)tbl);
-	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
-	add_timer(&tbl->periodic_timer);
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 
 	return 0;
 }
@@ -380,9 +360,9 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
 	ip_vs_lblc_flush(tbl);
 
 	/* release the table itself */
-	kfree(svc->sched_data);
+	kfree(tbl);
 	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
-		  sizeof(struct ip_vs_lblc_table));
+		  sizeof(*tbl));
 
 	return 0;
 }
@@ -478,46 +458,54 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 static struct ip_vs_dest *
 ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
-	struct ip_vs_dest *dest;
-	struct ip_vs_lblc_table *tbl;
-	struct ip_vs_lblc_entry *en;
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
 	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblc_entry *en;
 
 	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
 
-	tbl = (struct ip_vs_lblc_table *)svc->sched_data;
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
 	en = ip_vs_lblc_get(tbl, iph->daddr);
-	if (en == NULL) {
-		dest = __ip_vs_lblc_schedule(svc, iph);
-		if (dest == NULL) {
-			IP_VS_DBG(1, "no destination available\n");
-			return NULL;
-		}
-		en = ip_vs_lblc_new(iph->daddr, dest);
-		if (en == NULL) {
-			return NULL;
-		}
-		ip_vs_lblc_hash(tbl, en);
-	} else {
-		dest = en->dest;
-		if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
-		    || atomic_read(&dest->weight) <= 0
-		    || is_overloaded(dest, svc)) {
-			dest = __ip_vs_lblc_schedule(svc, iph);
-			if (dest == NULL) {
-				IP_VS_DBG(1, "no destination available\n");
-				return NULL;
-			}
-			atomic_dec(&en->dest->refcnt);
-			atomic_inc(&dest->refcnt);
-			en->dest = dest;
-		}
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/*
+		 * If the destination is not available, i.e. it's in the trash,
+		 * we must ignore it, as it may be removed from under our feet,
+		 * if someone drops our reference count. Our caller only makes
+		 * sure that destinations, that are not in the trash, are not
+		 * moved to the trash, while we are scheduling. But anyone can
+		 * free up entries from the trash at any time.
+		 */
+
+		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
+			dest = en->dest;
+	}
+	read_unlock(&svc->sched_lock);
+
+	/* If the destination has a weight and is not overloaded, use it */
+	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+		goto out;
+
+	/* No cache entry or it is invalid, time to schedule */
+	dest = __ip_vs_lblc_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
 	}
-	en->lastuse = jiffies;
 
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblc_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
 	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(en->addr),
+		  NIPQUAD(iph->daddr),
 		  NIPQUAD(dest->addr),
 		  ntohs(dest->port));
 
-- 
cgit v1.2.3-70-g09d2


From f728bafb5698076dd35bca35ee6cfe52ea1b8ab2 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Tue, 19 Aug 2008 08:16:19 +0200
Subject: ipvs: Fix race conditions in lblcr scheduler

We can't access the cache entry outside of our critical read-locked region,
because someone may free that entry. Also getting an entry under read lock,
then locking for write and trying to delete that entry looks fishy, but should
be no problem here, because we're only comparing a pointer. Also there is no
need for our own rwlock, there is already one in the service structure for use
in the schedulers.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_lblcr.c | 229 ++++++++++++++++++++++----------------------
 1 file changed, 114 insertions(+), 115 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index f1c84503689..375a1ffb6b6 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 			return NULL;
 	}
 
-	e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
 	if (e == NULL) {
 		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
 		return NULL;
@@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 	e->dest = dest;
 
 	/* link it to the list */
-	write_lock(&set->lock);
 	e->next = set->list;
 	set->list = e;
 	atomic_inc(&set->size);
-	write_unlock(&set->lock);
 
 	set->lastmod = jiffies;
 	return e;
@@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
 	struct ip_vs_dest_list *e, **ep;
 
-	write_lock(&set->lock);
 	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
 		if (e->dest == dest) {
 			/* HIT */
@@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 		}
 		ep = &e->next;
 	}
-	write_unlock(&set->lock);
 }
 
 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
@@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 	if (set == NULL)
 		return NULL;
 
-	read_lock(&set->lock);
 	/* select the first destination server, whose weight > 0 */
 	for (e=set->list; e!=NULL; e=e->next) {
 		least = e->dest;
@@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 			goto nextstage;
 		}
 	}
-	read_unlock(&set->lock);
 	return NULL;
 
 	/* find the destination with the weighted least load */
@@ -207,7 +201,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 			loh = doh;
 		}
 	}
-	read_unlock(&set->lock);
 
 	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
@@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 	if (set == NULL)
 		return NULL;
 
-	read_lock(&set->lock);
 	/* select the first destination server, whose weight > 0 */
 	for (e=set->list; e!=NULL; e=e->next) {
 		most = e->dest;
@@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 			goto nextstage;
 		}
 	}
-	read_unlock(&set->lock);
 	return NULL;
 
 	/* find the destination with the weighted most load */
@@ -256,7 +247,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 			moh = doh;
 		}
 	}
-	read_unlock(&set->lock);
 
 	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
@@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry {
  *      IPVS lblcr hash table
  */
 struct ip_vs_lblcr_table {
-	rwlock_t	        lock;           /* lock for this table */
 	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
@@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = {
 
 static struct ctl_table_header * sysctl_header;
 
-/*
- *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
- *      IP address to a server.
- */
-static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
-{
-	struct ip_vs_lblcr_entry *en;
-
-	en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
-	if (en == NULL) {
-		IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&en->list);
-	en->addr = daddr;
-
-	/* initilize its dest set */
-	atomic_set(&(en->set.size), 0);
-	en->set.list = NULL;
-	rwlock_init(&en->set.lock);
-
-	return en;
-}
-
-
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
 	list_del(&en->list);
@@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
  *	Hash an entry in the ip_vs_lblcr_table.
  *	returns bool success.
  */
-static int
+static void
 ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 {
-	unsigned hash;
-
-	if (!list_empty(&en->list)) {
-		IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
+	unsigned hash = ip_vs_lblcr_hashkey(en->addr);
 
-	/*
-	 *	Hash by destination IP address
-	 */
-	hash = ip_vs_lblcr_hashkey(en->addr);
-
-	write_lock(&tbl->lock);
 	list_add(&en->list, &tbl->bucket[hash]);
 	atomic_inc(&tbl->entries);
-	write_unlock(&tbl->lock);
-
-	return 1;
 }
 
 
 /*
- *  Get ip_vs_lblcr_entry associated with supplied parameters.
+ *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ *  read lock.
  */
 static inline struct ip_vs_lblcr_entry *
 ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
 {
-	unsigned hash;
+	unsigned hash = ip_vs_lblcr_hashkey(addr);
 	struct ip_vs_lblcr_entry *en;
 
-	hash = ip_vs_lblcr_hashkey(addr);
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
+
+	return NULL;
+}
 
-	read_lock(&tbl->lock);
 
-	list_for_each_entry(en, &tbl->bucket[hash], list) {
-		if (en->addr == addr) {
-			/* HIT */
-			read_unlock(&tbl->lock);
-			return en;
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl,  __be32 daddr,
+		struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = ip_vs_lblcr_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+			return NULL;
 		}
+
+		en->addr = daddr;
+		en->lastuse = jiffies;
+
+		/* initilize its dest set */
+		atomic_set(&(en->set.size), 0);
+		en->set.list = NULL;
+		rwlock_init(&en->set.lock);
+
+		ip_vs_lblcr_hash(tbl, en);
 	}
 
-	read_unlock(&tbl->lock);
+	write_lock(&en->set.lock);
+	ip_vs_dest_set_insert(&en->set, dest);
+	write_unlock(&en->set.lock);
 
-	return NULL;
+	return en;
 }
 
 
@@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
 	int i;
 	struct ip_vs_lblcr_entry *en, *nxt;
 
+	/* No locking required, only called during cleanup. */
 	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		write_lock(&tbl->lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
 			ip_vs_lblcr_free(en);
-			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&tbl->lock);
 	}
 }
 
 
-static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 {
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	unsigned long now = jiffies;
 	int i, j;
 	struct ip_vs_lblcr_entry *en, *nxt;
@@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
-		write_lock(&tbl->lock);
+		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
 				       now))
@@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
 			ip_vs_lblcr_free(en);
 			atomic_dec(&tbl->entries);
 		}
-		write_unlock(&tbl->lock);
+		write_unlock(&svc->sched_lock);
 	}
 	tbl->rover = j;
 }
@@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
  */
 static void ip_vs_lblcr_check_expire(unsigned long data)
 {
-	struct ip_vs_lblcr_table *tbl;
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
 	struct ip_vs_lblcr_entry *en, *nxt;
 
-	tbl = (struct ip_vs_lblcr_table *)data;
-
 	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 		/* do full expiration check */
-		ip_vs_lblcr_full_check(tbl);
+		ip_vs_lblcr_full_check(svc);
 		tbl->counter = 1;
 		goto out;
 	}
@@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
-		write_lock(&tbl->lock);
+		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
 				continue;
@@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
 			atomic_dec(&tbl->entries);
 			goal--;
 		}
-		write_unlock(&tbl->lock);
+		write_unlock(&svc->sched_lock);
 		if (goal <= 0)
 			break;
 	}
@@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblcr_table for this service
 	 */
-	tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 	if (tbl == NULL) {
 		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
 	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
-		  "current service\n",
-		  sizeof(struct ip_vs_lblcr_table));
+		  "current service\n", sizeof(*tbl));
 
 	/*
 	 *    Initialize the hash buckets
@@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		INIT_LIST_HEAD(&tbl->bucket[i]);
 	}
-	rwlock_init(&tbl->lock);
 	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
@@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	 *    Hook periodic timer for garbage collection
 	 */
 	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
-			(unsigned long)tbl);
-	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
-	add_timer(&tbl->periodic_timer);
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 
 	return 0;
 }
@@ -564,9 +535,9 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 	ip_vs_lblcr_flush(tbl);
 
 	/* release the table itself */
-	kfree(svc->sched_data);
+	kfree(tbl);
 	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
-		  sizeof(struct ip_vs_lblcr_table));
+		  sizeof(*tbl));
 
 	return 0;
 }
@@ -663,50 +634,78 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 static struct ip_vs_dest *
 ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
-	struct ip_vs_dest *dest;
-	struct ip_vs_lblcr_table *tbl;
-	struct ip_vs_lblcr_entry *en;
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
 	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblcr_entry *en;
 
 	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
 
-	tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
 	en = ip_vs_lblcr_get(tbl, iph->daddr);
-	if (en == NULL) {
-		dest = __ip_vs_lblcr_schedule(svc, iph);
-		if (dest == NULL) {
-			IP_VS_DBG(1, "no destination available\n");
-			return NULL;
-		}
-		en = ip_vs_lblcr_new(iph->daddr);
-		if (en == NULL) {
-			return NULL;
-		}
-		ip_vs_dest_set_insert(&en->set, dest);
-		ip_vs_lblcr_hash(tbl, en);
-	} else {
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/* Get the least loaded destination */
+		read_lock(&en->set.lock);
 		dest = ip_vs_dest_set_min(&en->set);
-		if (!dest || is_overloaded(dest, svc)) {
-			dest = __ip_vs_lblcr_schedule(svc, iph);
-			if (dest == NULL) {
-				IP_VS_DBG(1, "no destination available\n");
-				return NULL;
-			}
-			ip_vs_dest_set_insert(&en->set, dest);
-		}
+		read_unlock(&en->set.lock);
+
+		/* More than one destination + enough time passed by, cleanup */
 		if (atomic_read(&en->set.size) > 1 &&
-		    jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
+				time_after(jiffies, en->set.lastmod +
+				sysctl_ip_vs_lblcr_expiration)) {
 			struct ip_vs_dest *m;
+
+			write_lock(&en->set.lock);
 			m = ip_vs_dest_set_max(&en->set);
 			if (m)
 				ip_vs_dest_set_erase(&en->set, m);
+			write_unlock(&en->set.lock);
+		}
+
+		/* If the destination is not overloaded, use it */
+		if (dest && !is_overloaded(dest, svc)) {
+			read_unlock(&svc->sched_lock);
+			goto out;
+		}
+
+		/* The cache entry is invalid, time to schedule */
+		dest = __ip_vs_lblcr_schedule(svc, iph);
+		if (!dest) {
+			IP_VS_DBG(1, "no destination available\n");
+			read_unlock(&svc->sched_lock);
+			return NULL;
 		}
+
+		/* Update our cache entry */
+		write_lock(&en->set.lock);
+		ip_vs_dest_set_insert(&en->set, dest);
+		write_unlock(&en->set.lock);
+	}
+	read_unlock(&svc->sched_lock);
+
+	if (dest)
+		goto out;
+
+	/* No cache entry, time to schedule */
+	dest = __ip_vs_lblcr_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
 	}
-	en->lastuse = jiffies;
 
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblcr_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
 	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(en->addr),
+		  NIPQUAD(iph->daddr),
 		  NIPQUAD(dest->addr),
 		  ntohs(dest->port));
 
-- 
cgit v1.2.3-70-g09d2


From 195648bbc5ae0848e82f771ecf4cd7497054c212 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Aug 2008 04:00:36 -0700
Subject: pkt_sched: Prevent livelock in TX queue running.

If dev_deactivate() is trying to quiesce the queue, it
is theoretically possible for another cpu to livelock
trying to process that queue.  This happens because
dev_deactivate() grabs the queue spinlock as it checks
the queue state, whereas net_tx_action() does a trylock
and reschedules the qdisc if it hits the lock.

This breaks the livelock by adding a check on
__QDISC_STATE_DEACTIVATED to net_tx_action() when
the trylock fails.

Based upon feedback from Herbert Xu and Jarek Poplawski.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8d133802372..60c51f76588 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1990,7 +1990,9 @@ static void net_tx_action(struct softirq_action *h)
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
-				__netif_reschedule(q);
+				if (!test_bit(__QDISC_STATE_DEACTIVATED,
+					      &q->state))
+					__netif_reschedule(q);
 			}
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 5e739d1752aca4e8f3e794d431503bfca3162df4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 21 Aug 2008 03:34:25 -0700
Subject: sctp: fix potential panics in the SCTP-AUTH API.

All of the SCTP-AUTH socket options could cause a panic
if the extension is disabled and the API is envoked.

Additionally, there were some additional assumptions that
certain pointers would always be valid which may not
always be the case.

This patch hardens the API and address all of the crash
scenarios.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/endpointola.c |  4 +--
 net/sctp/socket.c      | 85 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e39a0cdef18..4c8d9f45ce0 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -103,6 +103,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 		/* Initialize the CHUNKS parameter */
 		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
+		auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t));
 
 		/* If the Add-IP functionality is enabled, we must
 		 * authenticate, ASCONF and ASCONF-ACK chunks
@@ -110,8 +111,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		if (sctp_addip_enable) {
 			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
 			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
-			auth_chunks->param_hdr.length =
-					htons(sizeof(sctp_paramhdr_t) + 2);
+			auth_chunks->param_hdr.length += htons(2);
 		}
 	}
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dbb79adf8f3..bb5c9ef1304 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3055,6 +3055,9 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
 {
 	struct sctp_authchunk val;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (optlen != sizeof(struct sctp_authchunk))
 		return -EINVAL;
 	if (copy_from_user(&val, optval, optlen))
@@ -3085,6 +3088,9 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 	struct sctp_hmacalgo *hmacs;
 	int err;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (optlen < sizeof(struct sctp_hmacalgo))
 		return -EINVAL;
 
@@ -3123,6 +3129,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 	struct sctp_association *asoc;
 	int ret;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (optlen <= sizeof(struct sctp_authkey))
 		return -EINVAL;
 
@@ -3160,6 +3169,9 @@ static int sctp_setsockopt_active_key(struct sock *sk,
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (optlen != sizeof(struct sctp_authkeyid))
 		return -EINVAL;
 	if (copy_from_user(&val, optval, optlen))
@@ -3185,6 +3197,9 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (optlen != sizeof(struct sctp_authkeyid))
 		return -EINVAL;
 	if (copy_from_user(&val, optval, optlen))
@@ -5197,19 +5212,29 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
 static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct sctp_hmacalgo  __user *p = (void __user *)optval;
 	struct sctp_hmac_algo_param *hmacs;
-	__u16 param_len;
+	__u16 data_len = 0;
+	u32 num_idents;
+
+	if (!sctp_auth_enable)
+		return -EACCES;
 
 	hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
-	param_len = ntohs(hmacs->param_hdr.length);
+	data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
 
-	if (len < param_len)
+	if (len < sizeof(struct sctp_hmacalgo) + data_len)
 		return -EINVAL;
+
+	len = sizeof(struct sctp_hmacalgo) + data_len;
+	num_idents = data_len / sizeof(u16);
+
 	if (put_user(len, optlen))
 		return -EFAULT;
-	if (copy_to_user(optval, hmacs->hmac_ids, len))
+	if (put_user(num_idents, &p->shmac_num_idents))
+		return -EFAULT;
+	if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
 		return -EFAULT;
-
 	return 0;
 }
 
@@ -5219,6 +5244,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
+	if (!sctp_auth_enable)
+		return -EACCES;
+
 	if (len < sizeof(struct sctp_authkeyid))
 		return -EINVAL;
 	if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
@@ -5233,6 +5261,12 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 	else
 		val.scact_keynumber = sctp_sk(sk)->ep->active_key_id;
 
+	len = sizeof(struct sctp_authkeyid);
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
 	return 0;
 }
 
@@ -5243,13 +5277,16 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
 	struct sctp_chunks_param *ch;
-	u32    num_chunks;
+	u32    num_chunks = 0;
 	char __user *to;
 
-	if (len <= sizeof(struct sctp_authchunks))
+	if (!sctp_auth_enable)
+		return -EACCES;
+
+	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
 		return -EFAULT;
 
 	to = p->gauth_chunks;
@@ -5258,20 +5295,21 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 		return -EINVAL;
 
 	ch = asoc->peer.peer_chunks;
+	if (!ch)
+		goto num;
 
 	/* See if the user provided enough room for all the data */
 	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
 	if (len < num_chunks)
 		return -EINVAL;
 
-	len = num_chunks;
-	if (put_user(len, optlen))
+	if (copy_to_user(to, ch->chunks, num_chunks))
 		return -EFAULT;
+num:
+	len = sizeof(struct sctp_authchunks) + num_chunks;
+	if (put_user(len, optlen)) return -EFAULT;
 	if (put_user(num_chunks, &p->gauth_number_of_chunks))
 		return -EFAULT;
-	if (copy_to_user(to, ch->chunks, len))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -5282,13 +5320,16 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
 	struct sctp_chunks_param *ch;
-	u32    num_chunks;
+	u32    num_chunks = 0;
 	char __user *to;
 
-	if (len <= sizeof(struct sctp_authchunks))
+	if (!sctp_auth_enable)
+		return -EACCES;
+
+	if (len < sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
 		return -EFAULT;
 
 	to = p->gauth_chunks;
@@ -5301,17 +5342,21 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	else
 		ch = sctp_sk(sk)->ep->auth_chunk_list;
 
+	if (!ch)
+		goto num;
+
 	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
-	if (len < num_chunks)
+	if (len < sizeof(struct sctp_authchunks) + num_chunks)
 		return -EINVAL;
 
-	len = num_chunks;
+	if (copy_to_user(to, ch->chunks, num_chunks))
+		return -EFAULT;
+num:
+	len = sizeof(struct sctp_authchunks) + num_chunks;
 	if (put_user(len, optlen))
 		return -EFAULT;
 	if (put_user(num_chunks, &p->gauth_number_of_chunks))
 		return -EFAULT;
-	if (copy_to_user(to, ch->chunks, len))
-		return -EFAULT;
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 2540e0511ea17e25831be543cdf9381e6209950d Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 21 Aug 2008 05:11:14 -0700
Subject: pkt_sched: Fix qdisc_watchdog() vs. dev_deactivate() race

dev_deactivate() can skip rescheduling of a qdisc by qdisc_watchdog()
or other timer calling netif_schedule() after dev_queue_deactivate().
We prevent this checking aliveness before scheduling the timer. Since
during deactivation the root qdisc is available only as qdisc_sleeping
additional accessor qdisc_root_sleeping() is created.

With feedback from Herbert Xu <herbert@gondor.apana.org.au>

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 5 +++++
 net/sched/sch_api.c       | 4 ++++
 net/sched/sch_cbq.c       | 4 ++++
 3 files changed, 13 insertions(+)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 84d25f2e618..b1d2cfea89c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -193,6 +193,11 @@ static inline struct Qdisc *qdisc_root(struct Qdisc *qdisc)
 	return qdisc->dev_queue->qdisc;
 }
 
+static inline struct Qdisc *qdisc_root_sleeping(struct Qdisc *qdisc)
+{
+	return qdisc->dev_queue->qdisc_sleeping;
+}
+
 /* The qdisc root lock is a mechanism by which to top level
  * of a qdisc tree can be locked from any qdisc node in the
  * forest.  This allows changing the configuration of some
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ef0efeca635..45f442d7de4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -444,6 +444,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 {
 	ktime_t time;
 
+	if (test_bit(__QDISC_STATE_DEACTIVATED,
+		     &qdisc_root_sleeping(wd->qdisc)->state))
+		return;
+
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_US2NS(expires));
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 47ef492c4ff..8fa90d68ec6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -521,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	psched_tdiff_t delay = cl->undertime - q->now;
 
+	if (test_bit(__QDISC_STATE_DEACTIVATED,
+		     &qdisc_root_sleeping(cl->qdisc)->state))
+		return;
+
 	if (!cl->delayed) {
 		psched_time_t sched = q->now;
 		ktime_t expires;
-- 
cgit v1.2.3-70-g09d2


From f6e0b239a2657ea8cb67f0d83d0bfdbfd19a481b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 22 Aug 2008 03:24:05 -0700
Subject: pkt_sched: Fix qdisc list locking

Since some qdiscs call qdisc_tree_decrease_qlen() (so qdisc_lookup())
without rtnl_lock(), adding and deleting from a qdisc list needs
additional locking. This patch adds global spinlock qdisc_list_lock
and wrapper functions for modifying the list. It is considered as a
temporary solution until hfsc_dequeue(), netem_dequeue() and
tbf_dequeue() (or qdisc_tree_decrease_qlen()) are redone.

With feedback from Herbert Xu and David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  1 +
 net/sched/sch_api.c     | 44 +++++++++++++++++++++++++++++++++++++++-----
 net/sched/sch_generic.c |  5 ++---
 3 files changed, 42 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 853fe83d9f3..b786a5b0925 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -78,6 +78,7 @@ extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 
 extern int register_qdisc(struct Qdisc_ops *qops);
 extern int unregister_qdisc(struct Qdisc_ops *qops);
+extern void qdisc_list_del(struct Qdisc *q);
 extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
 extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 45f442d7de4..e7fb9e0d21b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -199,19 +199,53 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 	return NULL;
 }
 
+/*
+ * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
+ * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
+ */
+static DEFINE_SPINLOCK(qdisc_list_lock);
+
+static void qdisc_list_add(struct Qdisc *q)
+{
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		spin_lock_bh(&qdisc_list_lock);
+		list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
+		spin_unlock_bh(&qdisc_list_lock);
+	}
+}
+
+void qdisc_list_del(struct Qdisc *q)
+{
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		spin_lock_bh(&qdisc_list_lock);
+		list_del(&q->list);
+		spin_unlock_bh(&qdisc_list_lock);
+	}
+}
+EXPORT_SYMBOL(qdisc_list_del);
+
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	unsigned int i;
+	struct Qdisc *q;
+
+	spin_lock_bh(&qdisc_list_lock);
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
+		struct Qdisc *txq_root = txq->qdisc_sleeping;
 
 		q = qdisc_match_from_root(txq_root, handle);
 		if (q)
-			return q;
+			goto unlock;
 	}
-	return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+
+	q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+
+unlock:
+	spin_unlock_bh(&qdisc_list_lock);
+
+	return q;
 }
 
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
@@ -810,8 +844,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out3;
 			}
 		}
-		if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
-			list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
+
+		qdisc_list_add(sch);
 
 		return sch;
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index c3ed4d44fc1..5f0ade7806a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -526,10 +526,9 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	    !atomic_dec_and_test(&qdisc->refcnt))
 		return;
 
-	if (qdisc->parent)
-		list_del(&qdisc->list);
-
 #ifdef CONFIG_NET_SCHED
+	qdisc_list_del(qdisc);
+
 	qdisc_put_stab(qdisc->stab);
 #endif
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
-- 
cgit v1.2.3-70-g09d2


From d92a8e81e097968d8f2bac0581a0a43bff14b8f0 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 16 Jul 2008 16:34:54 +0200
Subject: net/ieee80211: adjust error handling

Converts a test in error handling code to a sequence of labels.

The semantic match that found the problem is:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression E,E1,E2;
@@

E = alloc_etherdev(...)
... when != E = E1
if (...) { ... free_netdev(E); ... return ...; }
... when != E = E2
(
  if (...)
   {
   ... when != free_netdev(E);
   return dev; }
|
* if (...)
   {
   ... when != free_netdev(E);
   return ...; }
|
register_netdev(E)
)

// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/ieee80211/ieee80211_module.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 3bca97f55d4..949772a5a7d 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -157,7 +157,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 	err = ieee80211_networks_allocate(ieee);
 	if (err) {
 		IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err);
-		goto failed;
+		goto failed_free_netdev;
 	}
 	ieee80211_networks_initialize(ieee);
 
@@ -193,9 +193,9 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 
 	return dev;
 
-      failed:
-	if (dev)
-		free_netdev(dev);
+failed_free_netdev:
+	free_netdev(dev);
+failed:
 	return NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5442060c08a49bd0b416f033e0ae43ccedef5278 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 24 Jul 2008 12:20:09 -0400
Subject: WIRELESS: Make wireless one-click selectable.

Use "menuconfig" to make wireless support one-click selectable.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/Kconfig | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 7612cc8c337..d87de48ba65 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -236,14 +236,18 @@ source "net/rxrpc/Kconfig"
 config FIB_RULES
 	bool
 
-menu "Wireless"
+menuconfig WIRELESS
+	bool "Wireless"
 	depends on !S390
+	default y
+
+if WIRELESS
 
 source "net/wireless/Kconfig"
 source "net/mac80211/Kconfig"
 source "net/ieee80211/Kconfig"
 
-endmenu
+endif # WIRELESS
 
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
-- 
cgit v1.2.3-70-g09d2


From 92ab85354993ac3a364c65cab45745af470ffc67 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 24 Jul 2008 21:02:04 +0300
Subject: mac80211: add ieee80211_queue_stopped)

This patch adds ieee80211_queue_stopped that let drivers to query
queue status

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 ++++++++++
 net/mac80211/util.c    |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ff137fd7714..25cc192cbe4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1607,6 +1607,16 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue);
  */
 void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue);
 
+/**
+ * ieee80211_queue_stopped - test status of the queue
+ * @hw: pointer as obtained from ieee80211_alloc_hw().
+ * @queue: queue number (counted from zero).
+ *
+ * Drivers should use this function instead of netif_stop_queue.
+ */
+
+int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue);
+
 /**
  * ieee80211_stop_queues - stop all queues
  * @hw: pointer as obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0d463c80c40..24400618190 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -386,6 +386,13 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_stop_queues);
 
+int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	return __netif_subqueue_stopped(local->mdev, queue);
+}
+EXPORT_SYMBOL(ieee80211_queue_stopped);
+
 void ieee80211_wake_queues(struct ieee80211_hw *hw)
 {
 	int i;
-- 
cgit v1.2.3-70-g09d2


From b4f28bbb9bf0b2c829ecf97ce2173f204fde4f10 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 30 Jul 2008 17:19:55 +0200
Subject: mac80211: add rx status flag for short preamble

and use it for the radiotap header

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 ++
 net/mac80211/rx.c      | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 25cc192cbe4..dcc05a197dc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -363,6 +363,7 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb)
  * @RX_FLAG_TSFT: The timestamp passed in the RX status (@mactime field)
  *	is valid. This is useful in monitor mode and necessary for beacon frames
  *	to enable IBSS merging.
+ * @RX_FLAG_SHORTPRE: Short preamble was used for this frame
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR	= 1<<0,
@@ -373,6 +374,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_FAILED_FCS_CRC	= 1<<5,
 	RX_FLAG_FAILED_PLCP_CRC = 1<<6,
 	RX_FLAG_TSFT		= 1<<7,
+	RX_FLAG_SHORTPRE	= 1<<8
 };
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6db85450519..ad47a614a20 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -143,6 +143,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	/* IEEE80211_RADIOTAP_FLAGS */
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
 		*pos |= IEEE80211_RADIOTAP_F_FCS;
+	if (status->flag & RX_FLAG_SHORTPRE)
+		*pos |= IEEE80211_RADIOTAP_F_SHORTPRE;
 	pos++;
 
 	/* IEEE80211_RADIOTAP_RATE */
-- 
cgit v1.2.3-70-g09d2


From 9deb1ae572364a37d054d916c5bae858f91a3f9a Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 30 Jul 2008 17:20:06 +0200
Subject: mac80211: radiotap: assume modulation from rates

use the rates ERP flag to derive CCK or OFDM modulation for the radiotap
header.

(it might be more correct to get this information from the hardware itself, but it
seems safe to assume this in most practical cases.)

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ad47a614a20..60e9ea11115 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -157,8 +157,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	if (status->band == IEEE80211_BAND_5GHZ)
 		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM |
 					     IEEE80211_CHAN_5GHZ);
+	else if (rate->flags & IEEE80211_RATE_ERP_G)
+		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM |
+					     IEEE80211_CHAN_2GHZ);
 	else
-		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_DYN |
+		*(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_CCK |
 					     IEEE80211_CHAN_2GHZ);
 	pos += 2;
 
-- 
cgit v1.2.3-70-g09d2


From 62bf1d762e24006fa9b6c8d56a22cf47a2310af3 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:05 -0700
Subject: mac80211: explicitly check skb->len

ieee80211_get_hdrlen_from_skb internally checks the skb is long enough to
hold the full ieee80211_hdr, else it returns zero.  Use ieee80211_hdrlen
which always returns the hdrlen and check the remaining room in the
skb explicitly when removing encryption headers or the qos control field.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index aa5a191598c..f5537f90dd3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1244,9 +1244,10 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 				      struct ieee80211_key *key,
 				      struct sk_buff *skb)
 {
-	int hdrlen, iv_len, mic_len;
+	unsigned int hdrlen, iv_len, mic_len;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
-	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	if (!key)
 		goto no_key;
@@ -1268,24 +1269,20 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 		goto no_key;
 	}
 
-	if (skb->len >= mic_len &&
+	if (skb->len >= hdrlen + mic_len &&
 	    !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
 		skb_trim(skb, skb->len - mic_len);
-	if (skb->len >= iv_len && skb->len > hdrlen) {
+	if (skb->len >= hdrlen + iv_len) {
 		memmove(skb->data + iv_len, skb->data, hdrlen);
-		skb_pull(skb, iv_len);
+		hdr = (struct ieee80211_hdr *)skb_pull(skb, iv_len);
 	}
 
 no_key:
-	{
-		struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-		u16 fc = le16_to_cpu(hdr->frame_control);
-		if ((fc & 0x8C) == 0x88) /* QoS Control Field */ {
-			fc &= ~IEEE80211_STYPE_QOS_DATA;
-			hdr->frame_control = cpu_to_le16(fc);
-			memmove(skb->data + 2, skb->data, hdrlen - 2);
-			skb_pull(skb, 2);
-		}
+	if (ieee80211_is_data_qos(hdr->frame_control)) {
+		hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
+		memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data,
+			hdrlen - IEEE80211_QOS_CTL_LEN);
+		skb_pull(skb, IEEE80211_QOS_CTL_LEN);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From c44d040e186e1af9d947f7bad886b7c1d35a22f6 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:07 -0700
Subject: mac80211: wme.h remove unused QOS_CONTROL_LEN

linux/ieee80211.h now has IEEE80211_QOS_CTL_LEN for this purpose.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index 04de28c071a..465e274df7c 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -14,8 +14,6 @@
 #include <linux/netdevice.h>
 #include "ieee80211_i.h"
 
-#define QOS_CONTROL_LEN 2
-
 #define QOS_CONTROL_ACK_POLICY_NORMAL 0
 #define QOS_CONTROL_ACK_POLICY_NOACK 1
 
-- 
cgit v1.2.3-70-g09d2


From d298487260d01934a8df3a4a2a09513d84a8e69b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:10 -0700
Subject: mac80211: wep.c replace magic numbers in IV/ICV removal

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wep.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 5c2bf0a3d4d..639998775b4 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -228,11 +228,10 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb,
 		return -1;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-
-	if (skb->len < 8 + hdrlen)
+	if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN)
 		return -1;
 
-	len = skb->len - hdrlen - 8;
+	len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN;
 
 	keyidx = skb->data[hdrlen + 3] >> 6;
 
@@ -303,7 +302,7 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
 	} else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) {
 		ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
 		/* remove ICV */
-		skb_trim(rx->skb, rx->skb->len - 4);
+		skb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN);
 	}
 
 	return RX_CONTINUE;
-- 
cgit v1.2.3-70-g09d2


From b73d70ad8665fd3f35c855075b9a94de3e2b69e2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:12 -0700
Subject: mac80211: rx.c/tx.c remove more users of tx/rx_data->fc

Those functions that still use ieee80211_get_hdrlen are moved over
to use the little endian frame control.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 62 +++++++++++++++++++++----------------------------------
 net/mac80211/tx.c |  2 +-
 2 files changed, 25 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 60e9ea11115..4e9631c140a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -821,7 +821,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 
 static inline struct ieee80211_fragment_entry *
 ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
-			  u16 fc, unsigned int frag, unsigned int seq,
+			  unsigned int frag, unsigned int seq,
 			  int rx_queue, struct ieee80211_hdr *hdr)
 {
 	struct ieee80211_fragment_entry *entry;
@@ -830,7 +830,6 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 	idx = sdata->fragment_next;
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
 		struct ieee80211_hdr *f_hdr;
-		u16 f_fc;
 
 		idx--;
 		if (idx < 0)
@@ -842,10 +841,13 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 		    entry->last_frag + 1 != frag)
 			continue;
 
-		f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data;
-		f_fc = le16_to_cpu(f_hdr->frame_control);
+		f_hdr = (struct ieee80211_hdr *)entry->skb_list.next->data;
 
-		if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) ||
+		/*
+		 * Check ftype and addresses are equal, else check next fragment
+		 */
+		if (((hdr->frame_control ^ f_hdr->frame_control) &
+		     cpu_to_le16(IEEE80211_FCTL_FTYPE)) ||
 		    compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 ||
 		    compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0)
 			continue;
@@ -870,11 +872,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb;
 	DECLARE_MAC_BUF(mac);
 
-	hdr = (struct ieee80211_hdr *) rx->skb->data;
+	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) ||
+	if (likely((!ieee80211_has_morefrags(hdr->frame_control) && frag == 0) ||
 		   (rx->skb)->len < 24 ||
 		   is_multicast_ether_addr(hdr->addr1))) {
 		/* not fragmented */
@@ -889,7 +891,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->queue, &(rx->skb));
 		if (rx->key && rx->key->conf.alg == ALG_CCMP &&
-		    (rx->fc & IEEE80211_FCTL_PROTECTED)) {
+		    ieee80211_has_protected(hdr->frame_control)) {
 			/* Store CCMP PN so that we can verify that the next
 			 * fragment has a sequential PN value. */
 			entry->ccmp = 1;
@@ -903,8 +905,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	/* This is a fragment for a frame that should already be pending in
 	 * fragment cache. Add this fragment to the end of the pending entry.
 	 */
-	entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq,
-					  rx->queue, hdr);
+	entry = ieee80211_reassemble_find(rx->sdata, frag, seq, rx->queue, hdr);
 	if (!entry) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
 		return RX_DROP_MONITOR;
@@ -929,7 +930,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		memcpy(entry->last_pn, pn, CCMP_PN_LEN);
 	}
 
-	skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc));
+	skb_pull(rx->skb, ieee80211_hdrlen(hdr->frame_control));
 	__skb_queue_tail(&entry->skb_list, rx->skb);
 	entry->last_frag = frag;
 	entry->extra_len += rx->skb->len;
@@ -1096,7 +1097,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-	u16 fc, hdrlen, ethertype;
+	u16 hdrlen, ethertype;
 	u8 *payload;
 	u8 dst[ETH_ALEN];
 	u8 src[ETH_ALEN] __aligned(2);
@@ -1107,12 +1108,10 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 	DECLARE_MAC_BUF(mac3);
 	DECLARE_MAC_BUF(mac4);
 
-	fc = rx->fc;
-
-	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return -1;
 
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		hdrlen += ieee80211_get_mesh_hdrlen(
@@ -1127,41 +1126,28 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 	 *   1     0   BSSID SA    DA    n/a
 	 *   1     1   RA    TA    DA    SA
 	 */
+	memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN);
+	memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN);
 
-	switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
-	case IEEE80211_FCTL_TODS:
-		/* BSSID SA DA */
-		memcpy(dst, hdr->addr3, ETH_ALEN);
-		memcpy(src, hdr->addr2, ETH_ALEN);
-
+	switch (hdr->frame_control &
+		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
+	case __constant_cpu_to_le16(IEEE80211_FCTL_TODS):
 		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
 			     sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
 			return -1;
 		break;
-	case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
-		/* RA TA DA SA */
-		memcpy(dst, hdr->addr3, ETH_ALEN);
-		memcpy(src, hdr->addr4, ETH_ALEN);
-
-		 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS &&
+	case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
+		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS &&
 			     sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
 			return -1;
 		break;
-	case IEEE80211_FCTL_FROMDS:
-		/* DA BSSID SA */
-		memcpy(dst, hdr->addr1, ETH_ALEN);
-		memcpy(src, hdr->addr3, ETH_ALEN);
-
+	case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS):
 		if (sdata->vif.type != IEEE80211_IF_TYPE_STA ||
 		    (is_multicast_ether_addr(dst) &&
 		     !compare_ether_addr(src, dev->dev_addr)))
 			return -1;
 		break;
-	case 0:
-		/* DA SA BSSID */
-		memcpy(dst, hdr->addr1, ETH_ALEN);
-		memcpy(src, hdr->addr2, ETH_ALEN);
-
+	case __constant_cpu_to_le16(0):
 		if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 			return -1;
 		break;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4788f7b91f4..24146f3387a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1025,7 +1025,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT))
 		info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 
-	hdrlen = ieee80211_get_hdrlen(tx->fc);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) {
 		u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)];
 		tx->ethertype = (pos[0] << 8) | pos[1];
-- 
cgit v1.2.3-70-g09d2


From 6b644e524bbd4089a28e0711de4f1cf2daa5db50 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:12 -0700
Subject: mac80211: remove ieee80211_get_hdrlen

All users have been moved over to the version taking a le16 frame control
rather than a cpu-endian value.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 ----------
 net/mac80211/util.c    | 39 ---------------------------------------
 2 files changed, 49 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dcc05a197dc..0fdc3dabc96 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1558,16 +1558,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
  */
 unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
 
-/**
- * ieee80211_get_hdrlen - get header length from frame control
- *
- * This function returns the 802.11 header length in bytes (not including
- * encryption headers.)
- *
- * @fc: the frame control field (in CPU endianness)
- */
-int ieee80211_get_hdrlen(u16 fc);
-
 /**
  * ieee80211_hdrlen - get header length in bytes from frame control
  * @fc: frame control field in little-endian format
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24400618190..f40c060341a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -91,45 +91,6 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 	return NULL;
 }
 
-int ieee80211_get_hdrlen(u16 fc)
-{
-	int hdrlen = 24;
-
-	switch (fc & IEEE80211_FCTL_FTYPE) {
-	case IEEE80211_FTYPE_DATA:
-		if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
-			hdrlen = 30; /* Addr4 */
-		/*
-		 * The QoS Control field is two bytes and its presence is
-		 * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to
-		 * hdrlen if that bit is set.
-		 * This works by masking out the bit and shifting it to
-		 * bit position 1 so the result has the value 0 or 2.
-		 */
-		hdrlen += (fc & IEEE80211_STYPE_QOS_DATA)
-				>> (ilog2(IEEE80211_STYPE_QOS_DATA)-1);
-		break;
-	case IEEE80211_FTYPE_CTL:
-		/*
-		 * ACK and CTS are 10 bytes, all others 16. To see how
-		 * to get this condition consider
-		 *   subtype mask:   0b0000000011110000 (0x00F0)
-		 *   ACK subtype:    0b0000000011010000 (0x00D0)
-		 *   CTS subtype:    0b0000000011000000 (0x00C0)
-		 *   bits that matter:         ^^^      (0x00E0)
-		 *   value of those: 0b0000000011000000 (0x00C0)
-		 */
-		if ((fc & 0xE0) == 0xC0)
-			hdrlen = 10;
-		else
-			hdrlen = 16;
-		break;
-	}
-
-	return hdrlen;
-}
-EXPORT_SYMBOL(ieee80211_get_hdrlen);
-
 unsigned int ieee80211_hdrlen(__le16 fc)
 {
 	unsigned int hdrlen = 24;
-- 
cgit v1.2.3-70-g09d2


From e7827a7031a931c74c48e4a53f73ed862f0c8da0 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:13 -0700
Subject: mac80211: remove IEEE80211_FC helper

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 --
 net/mac80211/mesh_hwmp.c   |  8 ++++----
 net/mac80211/mesh_plink.c  |  4 ++--
 net/mac80211/mlme.c        | 46 +++++++++++++++++++++++-----------------------
 net/mac80211/tx.c          |  4 ++--
 5 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ec59345af65..a2870bc245d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -38,8 +38,6 @@
 
 #define WLAN_FC_DATA_PRESENT(fc) (((fc) & 0x4c) == 0x08)
 
-#define IEEE80211_FC(type, subtype) cpu_to_le16(type | subtype)
-
 struct ieee80211_local;
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 08aca446ca0..2cdbd522631 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -99,8 +99,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	mgmt = (struct ieee80211_mgmt *)
 		skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action));
 	memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action));
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
@@ -178,8 +178,8 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
 	mgmt = (struct ieee80211_mgmt *)
 		skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action));
 	memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action));
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, ra, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 9efeb1f0702..4a7e6d094ff 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -163,8 +163,8 @@ static int mesh_plink_frame_tx(struct net_device *dev,
 	mgmt = (struct ieee80211_mgmt *)
 		skb_put(skb, 25 + sizeof(mgmt->u.action.u.plink_action));
 	memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.plink_action));
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e97fb9fb34..ac776c9d48f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -641,8 +641,8 @@ static void ieee80211_send_auth(struct net_device *dev,
 
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
 	memset(mgmt, 0, 24 + 6);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_AUTH);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_AUTH);
 	if (encrypt)
 		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
@@ -771,8 +771,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 
 	if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) {
 		skb_put(skb, 10);
-		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						   IEEE80211_STYPE_REASSOC_REQ);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_REASSOC_REQ);
 		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
 		mgmt->u.reassoc_req.listen_interval =
 				cpu_to_le16(local->hw.conf.listen_interval);
@@ -780,8 +780,8 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		       ETH_ALEN);
 	} else {
 		skb_put(skb, 4);
-		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						   IEEE80211_STYPE_ASSOC_REQ);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_ASSOC_REQ);
 		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
 		mgmt->u.reassoc_req.listen_interval =
 				cpu_to_le16(local->hw.conf.listen_interval);
@@ -931,8 +931,8 @@ static void ieee80211_send_deauth(struct net_device *dev,
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_DEAUTH);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_DEAUTH);
 	skb_put(skb, 2);
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
@@ -960,8 +960,8 @@ static void ieee80211_send_disassoc(struct net_device *dev,
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_DISASSOC);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_DISASSOC);
 	skb_put(skb, 2);
 	mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
 
@@ -1115,8 +1115,8 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_PROBE_REQ);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_PROBE_REQ);
 	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 	if (dst) {
 		memcpy(mgmt->da, dst, ETH_ALEN);
@@ -1219,8 +1219,8 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					   IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 
 	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
 	mgmt->u.action.category = WLAN_CATEGORY_BACK;
@@ -1268,8 +1268,8 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 
 	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
 
@@ -1524,8 +1524,8 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-					IEEE80211_STYPE_ACTION);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
 
 	skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
 
@@ -1556,8 +1556,8 @@ void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn)
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
 	memset(bar, 0, sizeof(*bar));
-	bar->frame_control = IEEE80211_FC(IEEE80211_FTYPE_CTL,
-					IEEE80211_STYPE_BACK_REQ);
+	bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					 IEEE80211_STYPE_BACK_REQ);
 	memcpy(bar->ra, ra, ETH_ALEN);
 	memcpy(bar->ta, dev->dev_addr, ETH_ALEN);
 	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
@@ -1801,7 +1801,7 @@ static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
 	memcpy(msr_report->da, da, ETH_ALEN);
 	memcpy(msr_report->sa, dev->dev_addr, ETH_ALEN);
 	memcpy(msr_report->bssid, bssid, ETH_ALEN);
-	msr_report->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
+	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						IEEE80211_STYPE_ACTION);
 
 	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
@@ -2446,8 +2446,8 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		mgmt = (struct ieee80211_mgmt *)
 			skb_put(skb, 24 + sizeof(mgmt->u.beacon));
 		memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
-		mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						   IEEE80211_STYPE_PROBE_RESP);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_PROBE_RESP);
 		memset(mgmt->da, 0xff, ETH_ALEN);
 		memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 24146f3387a..9c60dedc368 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1889,8 +1889,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			goto out;
 
 		hdr = (struct ieee80211_hdr *) skb->data;
-		hdr->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
-						  IEEE80211_STYPE_BEACON);
+		hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						 IEEE80211_STYPE_BEACON);
 
 		num_beacons = &ifsta->num_beacons;
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
-- 
cgit v1.2.3-70-g09d2


From 358c8d9d332230b14e130b78a6930996cdbf84c2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:13 -0700
Subject: mac80211: use ieee80211 frame control directly

Remove the last users of the rx/tx_data->fc data members and use the
le16 frame_control from the header directly.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c  | 53 ++++++++++++++++++++++++++---------------------------
 net/mac80211/tx.c  | 35 ++++++++++++++---------------------
 net/mac80211/wep.c |  7 ++++---
 3 files changed, 44 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4e9631c140a..3a96251379f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -867,16 +867,18 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr;
 	u16 sc;
+	__le16 fc;
 	unsigned int frag, seq;
 	struct ieee80211_fragment_entry *entry;
 	struct sk_buff *skb;
 	DECLARE_MAC_BUF(mac);
 
 	hdr = (struct ieee80211_hdr *)rx->skb->data;
+	fc = hdr->frame_control;
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (likely((!ieee80211_has_morefrags(hdr->frame_control) && frag == 0) ||
+	if (likely((!ieee80211_has_morefrags(fc) && frag == 0) ||
 		   (rx->skb)->len < 24 ||
 		   is_multicast_ether_addr(hdr->addr1))) {
 		/* not fragmented */
@@ -891,7 +893,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->queue, &(rx->skb));
 		if (rx->key && rx->key->conf.alg == ALG_CCMP &&
-		    ieee80211_has_protected(hdr->frame_control)) {
+		    ieee80211_has_protected(fc)) {
 			/* Store CCMP PN so that we can verify that the next
 			 * fragment has a sequential PN value. */
 			entry->ccmp = 1;
@@ -930,11 +932,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		memcpy(entry->last_pn, pn, CCMP_PN_LEN);
 	}
 
-	skb_pull(rx->skb, ieee80211_hdrlen(hdr->frame_control));
+	skb_pull(rx->skb, ieee80211_hdrlen(fc));
 	__skb_queue_tail(&entry->skb_list, rx->skb);
 	entry->last_frag = frag;
 	entry->extra_len += rx->skb->len;
-	if (rx->fc & IEEE80211_FCTL_MOREFRAGS) {
+	if (ieee80211_has_morefrags(fc)) {
 		rx->skb = NULL;
 		return RX_QUEUED;
 	}
@@ -974,10 +976,9 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb;
 	int no_pending_pkts;
 	DECLARE_MAC_BUF(mac);
+	__le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control;
 
-	if (likely(!rx->sta ||
-		   (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL ||
-		   (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL ||
+	if (likely(!rx->sta || !ieee80211_is_pspoll(fc) ||
 		   !(rx->flags & IEEE80211_RX_RA_MATCH)))
 		return RX_CONTINUE;
 
@@ -1073,7 +1074,7 @@ ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
 }
 
 static int
-ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx)
+ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
 {
 	/*
 	 * Pass through unencrypted frames if the hardware has
@@ -1083,9 +1084,8 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx)
 		return 0;
 
 	/* Drop unencrypted frames if key is set. */
-	if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
-		     (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
-		     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
+	if (unlikely(!ieee80211_has_protected(fc) &&
+		     !ieee80211_is_nullfunc(fc) &&
 		     (rx->key || rx->sdata->drop_unencrypted)))
 		return -EACCES;
 
@@ -1184,7 +1184,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 /*
  * requires that rx->skb is a frame with ethernet header
  */
-static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx)
+static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
 {
 	static const u8 pae_group_addr[ETH_ALEN] __aligned(2)
 		= { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
@@ -1200,7 +1200,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx)
 		return true;
 
 	if (ieee80211_802_1x_port_control(rx) ||
-	    ieee80211_drop_unencrypted(rx))
+	    ieee80211_drop_unencrypted(rx, fc))
 		return false;
 
 	return true;
@@ -1270,20 +1270,21 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
 	struct ieee80211_local *local = rx->local;
-	u16 fc, ethertype;
+	u16 ethertype;
 	u8 *payload;
 	struct sk_buff *skb = rx->skb, *frame = NULL;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	__le16 fc = hdr->frame_control;
 	const struct ethhdr *eth;
 	int remaining, err;
 	u8 dst[ETH_ALEN];
 	u8 src[ETH_ALEN];
 	DECLARE_MAC_BUF(mac);
 
-	fc = rx->fc;
-	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
+	if (unlikely(!ieee80211_is_data(fc)))
 		return RX_CONTINUE;
 
-	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+	if (unlikely(!ieee80211_is_data_present(fc)))
 		return RX_DROP_MONITOR;
 
 	if (!(rx->flags & IEEE80211_RX_AMSDU))
@@ -1365,7 +1366,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
 		}
 
-		if (!ieee80211_frame_allowed(rx)) {
+		if (!ieee80211_frame_allowed(rx, fc)) {
 			if (skb == frame) /* last frame */
 				return RX_DROP_UNUSABLE;
 			dev_kfree_skb(frame);
@@ -1439,21 +1440,21 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
-	u16 fc;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	__le16 fc = hdr->frame_control;
 	int err;
 
-	fc = rx->fc;
-	if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
+	if (unlikely(!ieee80211_is_data(hdr->frame_control)))
 		return RX_CONTINUE;
 
-	if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
+	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return RX_DROP_MONITOR;
 
 	err = ieee80211_data_to_8023(rx);
 	if (unlikely(err))
 		return RX_DROP_UNUSABLE;
 
-	if (!ieee80211_frame_allowed(rx))
+	if (!ieee80211_frame_allowed(rx, fc))
 		return RX_DROP_MONITOR;
 
 	rx->skb->dev = dev;
@@ -1818,13 +1819,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_rx_data rx;
-	u16 type;
 	int prepares;
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
 	u8 *bssid;
 
-	hdr = (struct ieee80211_hdr *) skb->data;
+	hdr = (struct ieee80211_hdr *)skb->data;
 	memset(&rx, 0, sizeof(rx));
 	rx.skb = skb;
 	rx.local = local;
@@ -1832,9 +1832,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	rx.status = status;
 	rx.rate = rate;
 	rx.fc = le16_to_cpu(hdr->frame_control);
-	type = rx.fc & IEEE80211_FCTL_FTYPE;
 
-	if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT)
+	if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control))
 		local->dot11ReceivedFragmentCount++;
 
 	rx.sta = sta_info_get(local, hdr->addr2);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9c60dedc368..96ffb4d8dfb 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -82,6 +82,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	struct ieee80211_rate *txrate;
 	struct ieee80211_local *local = tx->local;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_hdr *hdr;
 
 	sband = local->hw.wiphy->bands[tx->channel->band];
 	txrate = &sband->bitrates[tx->rate_idx];
@@ -107,8 +108,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	 *   at the highest possible rate belonging to the PHY rates in the
 	 *   BSSBasicRateSet
 	 */
-
-	if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) {
+	hdr = (struct ieee80211_hdr *)tx->skb->data;
+	if (ieee80211_is_ctl(hdr->frame_control)) {
 		/* TODO: These control frames are not currently sent by
 		 * 80211.o, but should they be implemented, this function
 		 * needs to be updated to support duration field calculation.
@@ -213,9 +214,8 @@ static int inline is_ieee80211_device(struct net_device *dev,
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 {
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u32 sta_flags;
 
@@ -223,8 +223,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	if (unlikely(tx->local->sta_sw_scanning) &&
-	    ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
-	     (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
+	    !ieee80211_is_probe_req(hdr->frame_control))
 		return TX_DROP;
 
 	if (tx->sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT)
@@ -238,7 +237,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	if (likely(tx->flags & IEEE80211_TX_UNICAST)) {
 		if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
 			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-			     (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) {
+			     ieee80211_is_data(hdr->frame_control))) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			DECLARE_MAC_BUF(mac);
 			printk(KERN_DEBUG "%s: dropped data frame to not "
@@ -249,7 +248,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 			return TX_DROP;
 		}
 	} else {
-		if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
+		if (unlikely(ieee80211_is_data(hdr->frame_control) &&
 			     tx->local->num_sta == 0 &&
 			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) {
 			/*
@@ -315,6 +314,7 @@ static ieee80211_tx_result
 ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 
 	/*
 	 * broadcast/multicast frame
@@ -329,7 +329,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	/* no buffering for ordered frames */
-	if (tx->fc & IEEE80211_FCTL_ORDER)
+	if (ieee80211_has_order(hdr->frame_control))
 		return TX_CONTINUE;
 
 	/* no stations in PS mode */
@@ -367,12 +367,11 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 {
 	struct sta_info *sta = tx->sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 	u32 staflags;
 	DECLARE_MAC_BUF(mac);
 
-	if (unlikely(!sta ||
-		     ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT &&
-		      (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP)))
+	if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control)))
 		return TX_CONTINUE;
 
 	staflags = get_sta_flags(sta);
@@ -437,7 +436,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_key *key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
-	u16 fc = tx->fc;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 
 	if (unlikely(tx->skb->do_not_encrypt))
 		tx->key = NULL;
@@ -454,22 +453,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		tx->key = NULL;
 
 	if (tx->key) {
-		u16 ftype, stype;
-
 		tx->key->tx_rx_count++;
 		/* TODO: add threshold stuff again */
 
 		switch (tx->key->conf.alg) {
 		case ALG_WEP:
-			ftype = fc & IEEE80211_FCTL_FTYPE;
-			stype = fc & IEEE80211_FCTL_STYPE;
-
-			if (ftype == IEEE80211_FTYPE_MGMT &&
-			    stype == IEEE80211_STYPE_AUTH)
+			if (ieee80211_is_auth(hdr->frame_control))
 				break;
 		case ALG_TKIP:
 		case ALG_CCMP:
-			if (!WLAN_FC_DATA_PRESENT(fc))
+			if (!ieee80211_is_data_present(hdr->frame_control))
 				tx->key = NULL;
 			break;
 		}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 639998775b4..376c84987e4 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -291,9 +291,10 @@ u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key)
 ieee80211_rx_result
 ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
 {
-	if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
-	    ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
-	     (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_auth(hdr->frame_control))
 		return RX_CONTINUE;
 
 	if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
-- 
cgit v1.2.3-70-g09d2


From a4b7d7bda566acaa65fbf767f65a83b3a8dc74b9 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:14 -0700
Subject: mac80211: remove rx/tx_data->fc member

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 4 ++--
 net/mac80211/rx.c          | 8 ++------
 net/mac80211/tx.c          | 1 -
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a2870bc245d..978c3a03ea5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -172,7 +172,7 @@ struct ieee80211_tx_data {
 	struct sk_buff **extra_frag;
 	int num_extra_frag;
 
-	u16 fc, ethertype;
+	u16 ethertype;
 	unsigned int flags;
 };
 
@@ -200,7 +200,7 @@ struct ieee80211_rx_data {
 	struct ieee80211_rx_status *status;
 	struct ieee80211_rate *rate;
 
-	u16 fc, ethertype;
+	u16 ethertype;
 	unsigned int flags;
 	int sent_ps_buffered;
 	int queue;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3a96251379f..2464263cb7a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1057,7 +1057,6 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx)
 		ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN);
 	hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN);
 	/* change frame type to non QOS */
-	rx->fc &= ~IEEE80211_STYPE_QOS_DATA;
 	hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
 
 	return RX_CONTINUE;
@@ -1831,7 +1830,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 
 	rx.status = status;
 	rx.rate = rate;
-	rx.fc = le16_to_cpu(hdr->frame_control);
 
 	if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control))
 		local->dot11ReceivedFragmentCount++;
@@ -1894,14 +1892,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 				       prev->dev->name);
 			continue;
 		}
-		rx.fc = le16_to_cpu(hdr->frame_control);
 		ieee80211_invoke_rx_handlers(prev, &rx, skb_new);
 		prev = sdata;
 	}
-	if (prev) {
-		rx.fc = le16_to_cpu(hdr->frame_control);
+	if (prev)
 		ieee80211_invoke_rx_handlers(prev, &rx, skb);
-	} else
+	else
 		dev_kfree_skb(skb);
 }
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 96ffb4d8dfb..85f3ba85c13 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -993,7 +993,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	hdr = (struct ieee80211_hdr *) skb->data;
 
 	tx->sta = sta_info_get(local, hdr->addr1);
-	tx->fc = le16_to_cpu(hdr->frame_control);
 
 	if (is_multicast_ether_addr(hdr->addr1)) {
 		tx->flags &= ~IEEE80211_TX_UNICAST;
-- 
cgit v1.2.3-70-g09d2


From 4eb2ae9a42b77de48ee9fecfaccc66c640313188 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 15 Jul 2008 18:44:15 -0700
Subject: mac80211: remove WLAN_FC_DATA_PRESENT

All users are gone now.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 978c3a03ea5..3cad0172b45 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -36,8 +36,6 @@
 #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
 #endif /* ETH_P_PAE */
 
-#define WLAN_FC_DATA_PRESENT(fc) (((fc) & 0x4c) == 0x08)
-
 struct ieee80211_local;
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
-- 
cgit v1.2.3-70-g09d2


From bdbe819540f3365249095692118dbfeee308140d Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Thu, 14 Aug 2008 10:40:48 -0700
Subject: mac80211: allow no mac address until firmware load

Originally by Johannes Berg. This patch adds support for devices that do not
report their MAC address until the firmware is loaded. While the address is not
known, a multicast on is used.

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f5537f90dd3..93dcdc27254 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -187,9 +187,15 @@ static int ieee80211_open(struct net_device *dev)
 	u32 changed = 0;
 	int res;
 	bool need_hw_reconfig = 0;
+	u8 null_addr[ETH_ALEN] = {0};
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	/* fail early if user set an invalid address */
+	if (compare_ether_addr(dev->dev_addr, null_addr) &&
+	    !is_valid_ether_addr(dev->dev_addr))
+		return -EADDRNOTAVAIL;
+
 	/* we hold the RTNL here so can safely walk the list */
 	list_for_each_entry(nsdata, &local->interfaces, list) {
 		struct net_device *ndev = nsdata->dev;
@@ -270,6 +276,36 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
+	/*
+	 * Check all interfaces and copy the hopefully now-present
+	 * MAC address to those that have the special null one.
+	 */
+	list_for_each_entry(nsdata, &local->interfaces, list) {
+		struct net_device *ndev = nsdata->dev;
+
+		/*
+		 * No need to check netif_running since we do not allow
+		 * it to start up with this invalid address.
+		 */
+		if (compare_ether_addr(null_addr, ndev->dev_addr) == 0)
+			memcpy(ndev->dev_addr,
+			       local->hw.wiphy->perm_addr,
+			       ETH_ALEN);
+	}
+
+	if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0)
+		memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr,
+		       ETH_ALEN);
+
+	/*
+	 * Validate the MAC address for this device.
+	 */
+	if (!is_valid_ether_addr(dev->dev_addr)) {
+		if (!local->open_count && local->ops->stop)
+			local->ops->stop(local_to_hw(local));
+		return -EADDRNOTAVAIL;
+	}
+
 	switch (sdata->vif.type) {
 	case IEEE80211_IF_TYPE_VLAN:
 		/* no need to tell driver */
@@ -975,6 +1011,8 @@ void ieee80211_if_setup(struct net_device *dev)
 	dev->open = ieee80211_open;
 	dev->stop = ieee80211_stop;
 	dev->destructor = free_netdev;
+	/* we will validate the address ourselves in ->open */
+	dev->validate_addr = NULL;
 }
 
 /* everything else */
-- 
cgit v1.2.3-70-g09d2


From 02589f60510030a3c1496e7a8c511e4f674ef5ff Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 2 Aug 2008 15:10:57 -0300
Subject: rfkill: detect bogus double-registering (v2)

Detect and abort with -EEXIST if rfkill_register is called twice on the
same rfkill struct.  And WARN_ON(it) for good measure.

While at it, flag when we are adding the first switch of a type, we will
need that information later.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 35a9994e233..1f23de20a85 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -525,17 +525,44 @@ static struct class rfkill_class = {
 	.dev_uevent	= rfkill_dev_uevent,
 };
 
+static int rfkill_check_duplicity(const struct rfkill *rfkill)
+{
+	struct rfkill *p;
+	unsigned long seen[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
+
+	memset(seen, 0, sizeof(seen));
+
+	list_for_each_entry(p, &rfkill_list, node) {
+		if (p == rfkill) {
+			WARN_ON(1);
+			return -EEXIST;
+		}
+		set_bit(p->type, seen);
+	}
+
+	/* 0: first switch of its kind */
+	return test_bit(rfkill->type, seen);
+}
+
 static int rfkill_add_switch(struct rfkill *rfkill)
 {
+	int error;
+
 	mutex_lock(&rfkill_mutex);
 
+	error = rfkill_check_duplicity(rfkill);
+	if (error < 0)
+		goto unlock_out;
+
 	rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0);
 
 	list_add_tail(&rfkill->node, &rfkill_list);
 
+	error = 0;
+unlock_out:
 	mutex_unlock(&rfkill_mutex);
 
-	return 0;
+	return error;
 }
 
 static void rfkill_remove_switch(struct rfkill *rfkill)
-- 
cgit v1.2.3-70-g09d2


From 9961920199ec88d6b581d3e38502088935925c04 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 2 Aug 2008 15:10:58 -0300
Subject: rfkill: add default global states (v2)

Add a second set of global states, "rfkill_default_states", to track the
state that will be used when the first rfkill class of a given type is
registered, and also to save "undo" information when rfkill_epo is called.

Add a new exported function, rfkill_set_default(), which can be used by
platform drivers to restore radio state saved by the platform across
reboots or shutdown.

Also, fix rfkill_epo to properly update rfkill_states, but still preserve a
copy of the state so that we can undo the effect of rfkill_epo later if we
want to.  Add rfkill_restore_states() to restore rfkill_states from the
copy.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h    |   1 +
 net/rfkill/rfkill-input.h |   1 +
 net/rfkill/rfkill.c       | 127 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 117 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 741d1a62cc3..aa3c7d5852f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -116,6 +116,7 @@ int rfkill_register(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
 int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state);
+int rfkill_set_default(enum rfkill_type type, enum rfkill_state state);
 
 /**
  * rfkill_state_complement - return complementar state
diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h
index f63d0504568..bbfa646157c 100644
--- a/net/rfkill/rfkill-input.h
+++ b/net/rfkill/rfkill-input.h
@@ -13,5 +13,6 @@
 
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state);
 void rfkill_epo(void);
+void rfkill_restore_states(void);
 
 #endif /* __RFKILL_INPUT_H */
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 1f23de20a85..b995fa32cf8 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -44,7 +44,13 @@ module_param_named(default_state, rfkill_default_state, uint, 0444);
 MODULE_PARM_DESC(default_state,
 		 "Default initial state for all radio types, 0 = radio off");
 
-static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX];
+struct rfkill_gsw_state {
+	enum rfkill_state current_state;
+	enum rfkill_state default_state;
+};
+
+static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX];
+static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
 
 static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list);
 
@@ -213,22 +219,22 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 }
 
 /**
- * rfkill_switch_all - Toggle state of all switches of given type
+ * __rfkill_switch_all - Toggle state of all switches of given type
  * @type: type of interfaces to be affected
  * @state: the new state
  *
  * This function toggles the state of all switches of given type,
  * unless a specific switch is claimed by userspace (in which case,
  * that switch is left alone) or suspended.
+ *
+ * Caller must have acquired rfkill_mutex.
  */
-void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
+static void __rfkill_switch_all(const enum rfkill_type type,
+				const enum rfkill_state state)
 {
 	struct rfkill *rfkill;
 
-	mutex_lock(&rfkill_mutex);
-
-	rfkill_states[type] = state;
-
+	rfkill_global_states[type].current_state = state;
 	list_for_each_entry(rfkill, &rfkill_list, node) {
 		if ((!rfkill->user_claim) && (rfkill->type == type)) {
 			mutex_lock(&rfkill->mutex);
@@ -236,7 +242,20 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 			mutex_unlock(&rfkill->mutex);
 		}
 	}
+}
 
+/**
+ * rfkill_switch_all - Toggle state of all switches of given type
+ * @type: type of interfaces to be affected
+ * @state: the new state
+ *
+ * Acquires rfkill_mutex and calls __rfkill_switch_all(@type, @state).
+ * Please refer to __rfkill_switch_all() for details.
+ */
+void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
+{
+	mutex_lock(&rfkill_mutex);
+	__rfkill_switch_all(type, state);
 	mutex_unlock(&rfkill_mutex);
 }
 EXPORT_SYMBOL(rfkill_switch_all);
@@ -246,10 +265,14 @@ EXPORT_SYMBOL(rfkill_switch_all);
  *
  * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
  * ignoring everything in its path but rfkill_mutex and rfkill->mutex.
+ *
+ * The global state before the EPO is saved and can be restored later
+ * using rfkill_restore_states().
  */
 void rfkill_epo(void)
 {
 	struct rfkill *rfkill;
+	int i;
 
 	mutex_lock(&rfkill_mutex);
 	list_for_each_entry(rfkill, &rfkill_list, node) {
@@ -257,10 +280,34 @@ void rfkill_epo(void)
 		rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
 		mutex_unlock(&rfkill->mutex);
 	}
+	for (i = 0; i < RFKILL_TYPE_MAX; i++) {
+		rfkill_global_states[i].default_state =
+				rfkill_global_states[i].current_state;
+		rfkill_global_states[i].current_state =
+				RFKILL_STATE_SOFT_BLOCKED;
+	}
 	mutex_unlock(&rfkill_mutex);
 }
 EXPORT_SYMBOL_GPL(rfkill_epo);
 
+/**
+ * rfkill_restore_states - restore global states
+ *
+ * Restore (and sync switches to) the global state from the
+ * states in rfkill_default_states.  This can undo the effects of
+ * a call to rfkill_epo().
+ */
+void rfkill_restore_states(void)
+{
+	int i;
+
+	mutex_lock(&rfkill_mutex);
+	for (i = 0; i < RFKILL_TYPE_MAX; i++)
+		__rfkill_switch_all(i, rfkill_global_states[i].default_state);
+	mutex_unlock(&rfkill_mutex);
+}
+EXPORT_SYMBOL_GPL(rfkill_restore_states);
+
 /**
  * rfkill_force_state - Force the internal rfkill radio state
  * @rfkill: pointer to the rfkill class to modify.
@@ -406,8 +453,8 @@ static ssize_t rfkill_claim_store(struct device *dev,
 		if (!claim) {
 			mutex_lock(&rfkill->mutex);
 			rfkill_toggle_radio(rfkill,
-					    rfkill_states[rfkill->type],
-					    0);
+					rfkill_global_states[rfkill->type].current_state,
+					0);
 			mutex_unlock(&rfkill->mutex);
 		}
 		rfkill->user_claim = claim;
@@ -554,7 +601,16 @@ static int rfkill_add_switch(struct rfkill *rfkill)
 	if (error < 0)
 		goto unlock_out;
 
-	rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0);
+	if (!error) {
+		/* lock default after first use */
+		set_bit(rfkill->type, rfkill_states_lockdflt);
+		rfkill_global_states[rfkill->type].current_state =
+			rfkill_global_states[rfkill->type].default_state;
+	}
+
+	rfkill_toggle_radio(rfkill,
+			    rfkill_global_states[rfkill->type].current_state,
+			    0);
 
 	list_add_tail(&rfkill->node, &rfkill_list);
 
@@ -710,6 +766,53 @@ void rfkill_unregister(struct rfkill *rfkill)
 }
 EXPORT_SYMBOL(rfkill_unregister);
 
+/**
+ * rfkill_set_default - set initial value for a switch type
+ * @type - the type of switch to set the default state of
+ * @state - the new default state for that group of switches
+ *
+ * Sets the initial state rfkill should use for a given type.
+ * The following initial states are allowed: RFKILL_STATE_SOFT_BLOCKED
+ * and RFKILL_STATE_UNBLOCKED.
+ *
+ * This function is meant to be used by platform drivers for platforms
+ * that can save switch state across power down/reboot.
+ *
+ * The default state for each switch type can be changed exactly once.
+ * After a switch of that type is registered, the default state cannot
+ * be changed anymore.  This guards against multiple drivers it the
+ * same platform trying to set the initial switch default state, which
+ * is not allowed.
+ *
+ * Returns -EPERM if the state has already been set once or is in use,
+ * so drivers likely want to either ignore or at most printk(KERN_NOTICE)
+ * if this function returns -EPERM.
+ *
+ * Returns 0 if the new default state was set, or an error if it
+ * could not be set.
+ */
+int rfkill_set_default(enum rfkill_type type, enum rfkill_state state)
+{
+	int error;
+
+	if (type >= RFKILL_TYPE_MAX ||
+	    (state != RFKILL_STATE_SOFT_BLOCKED &&
+	     state != RFKILL_STATE_UNBLOCKED))
+		return -EINVAL;
+
+	mutex_lock(&rfkill_mutex);
+
+	if (!test_and_set_bit(type, rfkill_states_lockdflt)) {
+		rfkill_global_states[type].default_state = state;
+		error = 0;
+	} else
+		error = -EPERM;
+
+	mutex_unlock(&rfkill_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(rfkill_set_default);
+
 /*
  * Rfkill module initialization/deinitialization.
  */
@@ -723,8 +826,8 @@ static int __init rfkill_init(void)
 	    rfkill_default_state != RFKILL_STATE_UNBLOCKED)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(rfkill_states); i++)
-		rfkill_states[i] = rfkill_default_state;
+	for (i = 0; i < RFKILL_TYPE_MAX; i++)
+		rfkill_global_states[i].default_state = rfkill_default_state;
 
 	error = class_register(&rfkill_class);
 	if (error) {
-- 
cgit v1.2.3-70-g09d2


From 77fba13ccc3a2a3db100892a4a6cc5e2f8290cc7 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 2 Aug 2008 15:10:59 -0300
Subject: rfkill: add __must_check annotations

rfkill is not a small, mere detail in wireless support.  Once it starts
supporting rfkill and users start counting on that support, a wireless
device is at risk of operating in dangerous conditions should rfkill
support fail to properly activate.

Therefore, add the required __must_check annotations on some key functions
of the rfkill API, for which the wireless drivers absolutely MUST handle
the failure mode safely in order to avoid a potentially dangerous situation
where the wireless transmitter is left enabled when the user don't want it
to.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 5 +++--
 net/rfkill/rfkill.c    | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index aa3c7d5852f..e92d8e94bb8 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -110,9 +110,10 @@ struct rfkill {
 };
 #define to_rfkill(d)	container_of(d, struct rfkill, dev)
 
-struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type);
+struct rfkill * __must_check rfkill_allocate(struct device *parent,
+					     enum rfkill_type type);
 void rfkill_free(struct rfkill *rfkill);
-int rfkill_register(struct rfkill *rfkill);
+int __must_check rfkill_register(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
 int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state);
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index b995fa32cf8..fae7ffade9c 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -645,7 +645,8 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
  * NOTE: If registration fails the structure shoudl be freed by calling
  * rfkill_free() otherwise rfkill_unregister() should be used.
  */
-struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type)
+struct rfkill * __must_check rfkill_allocate(struct device *parent,
+					     enum rfkill_type type)
 {
 	struct rfkill *rfkill;
 	struct device *dev;
@@ -716,7 +717,7 @@ static void rfkill_led_trigger_unregister(struct rfkill *rfkill)
  * structure needs to be registered. Immediately from registration the
  * switch driver should be able to service calls to toggle_radio.
  */
-int rfkill_register(struct rfkill *rfkill)
+int __must_check rfkill_register(struct rfkill *rfkill)
 {
 	static atomic_t rfkill_no = ATOMIC_INIT(0);
 	struct device *dev = &rfkill->dev;
-- 
cgit v1.2.3-70-g09d2


From 96c87607ac8f9b0e641d11ba6e57f8ec0214ea1c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Sat, 2 Aug 2008 15:11:00 -0300
Subject: rfkill: introduce RFKILL_STATE_MAX

While it is interesting to not add last-enum-markers because it allows gcc
to warn us of switch() statements missing a valid state, we really should
be handling memory corruption on a rfkill state with default clauses,
anyway.

So add RFKILL_STATE_MAX and use it where applicable.  It makes for safer
code in the long run.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h |  1 +
 net/rfkill/rfkill.c    | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index e92d8e94bb8..4cd64b0d982 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -49,6 +49,7 @@ enum rfkill_state {
 	RFKILL_STATE_SOFT_BLOCKED = 0,	/* Radio output blocked */
 	RFKILL_STATE_UNBLOCKED    = 1,	/* Radio output allowed */
 	RFKILL_STATE_HARD_BLOCKED = 2,	/* Output blocked, non-overrideable */
+	RFKILL_STATE_MAX,		/* marker for last valid state */
 };
 
 /*
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index fae7ffade9c..47e0b2d232e 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -201,6 +201,8 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 		 * BLOCK even a transmitter that is already in state
 		 * RFKILL_STATE_HARD_BLOCKED */
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	if (force || state != rfkill->state) {
@@ -234,6 +236,9 @@ static void __rfkill_switch_all(const enum rfkill_type type,
 {
 	struct rfkill *rfkill;
 
+	if (unlikely(state >= RFKILL_STATE_MAX))
+		return;
+
 	rfkill_global_states[type].current_state = state;
 	list_for_each_entry(rfkill, &rfkill_list, node) {
 		if ((!rfkill->user_claim) && (rfkill->type == type)) {
@@ -329,9 +334,7 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 {
 	enum rfkill_state oldstate;
 
-	if (state != RFKILL_STATE_SOFT_BLOCKED &&
-	    state != RFKILL_STATE_UNBLOCKED &&
-	    state != RFKILL_STATE_HARD_BLOCKED)
+	if (unlikely(state >= RFKILL_STATE_MAX))
 		return -EINVAL;
 
 	mutex_lock(&rfkill->mutex);
@@ -727,6 +730,8 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 		return -EINVAL;
 	if (rfkill->type >= RFKILL_TYPE_MAX)
 		return -EINVAL;
+	if (rfkill->state >= RFKILL_STATE_MAX)
+		return -EINVAL;
 
 	snprintf(dev->bus_id, sizeof(dev->bus_id),
 		 "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
-- 
cgit v1.2.3-70-g09d2


From fef1643bf0cdd092a52dc3378479e4811fd65152 Mon Sep 17 00:00:00 2001
From: Jasper Bryant-Greene <jasper@amiton.co.nz>
Date: Sun, 3 Aug 2008 11:30:55 +1200
Subject: move ETH_P_PAE from ieee80211_i.h to if_ether.h

ETH_P_PAE belongs in if_ether.h with the other ETH_P_* definitions. This
patch moves it there.

Signed-off-by: Jasper Bryant-Greene <jasper@amiton.co.nz>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/if_ether.h   | 1 +
 net/mac80211/ieee80211_i.h | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index e157c1399b6..5028e0b6082 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -74,6 +74,7 @@
 #define ETH_P_ATMFATE	0x8884		/* Frame-based ATM Transport
 					 * over Ethernet
 					 */
+#define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3cad0172b45..168f845c173 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -32,10 +32,6 @@
 /* ieee80211.o internal definitions, etc. These are not included into
  * low-level drivers. */
 
-#ifndef ETH_P_PAE
-#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
-#endif /* ETH_P_PAE */
-
 struct ieee80211_local;
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
-- 
cgit v1.2.3-70-g09d2


From f698d856f65c3fea091cc303a135967965c5b880 Mon Sep 17 00:00:00 2001
From: Jasper Bryant-Greene <jasper@amiton.co.nz>
Date: Sun, 3 Aug 2008 12:04:37 +1200
Subject: replace net_device arguments with ieee80211_{local,sub_if_data} as
 appropriate

This patch replaces net_device arguments to mac80211 internal functions
with ieee80211_{local,sub_if_data} as appropriate.

It also does the same for many 802.11s mesh functions, and changes the
mesh path table to be indexed on sub_if_data rather than net_device.

If the mesh part needs to be a separate patch let me know, but since
mesh uses a lot of mac80211 functions which were being converted anyway,
the changes go hand-in-hand somewhat.

This patch probably does not convert all the functions which could be
converted, but it is a large chunk and followup patches will be
provided.

Signed-off-by: Jasper Bryant-Greene <jasper@amiton.co.nz>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c          |  21 +-
 net/mac80211/debugfs_sta.c  |   2 +-
 net/mac80211/event.c        |   5 +-
 net/mac80211/ieee80211_i.h  |  48 ++--
 net/mac80211/iface.c        |   8 +-
 net/mac80211/main.c         |  16 +-
 net/mac80211/mesh.c         |  29 +-
 net/mac80211/mesh.h         |  61 +++--
 net/mac80211/mesh_hwmp.c    | 112 ++++----
 net/mac80211/mesh_pathtbl.c |  76 +++---
 net/mac80211/mesh_plink.c   |  52 ++--
 net/mac80211/mlme.c         | 652 ++++++++++++++++++++------------------------
 net/mac80211/rx.c           |  16 +-
 net/mac80211/tx.c           |   4 +-
 net/mac80211/wext.c         |  49 ++--
 net/mac80211/wpa.c          |   2 +-
 16 files changed, 545 insertions(+), 608 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 297c257864c..6d2ad2bf3ab 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -66,13 +66,16 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex)
 {
 	struct net_device *dev;
+	struct ieee80211_sub_if_data *sdata;
 
 	/* we're under RTNL */
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (!dev)
 		return -ENODEV;
 
-	ieee80211_if_remove(dev);
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	ieee80211_if_remove(sdata);
 
 	return 0;
 }
@@ -842,13 +845,13 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 		return -ENOENT;
 	}
 
-	err = mesh_path_add(dst, dev);
+	err = mesh_path_add(dst, sdata);
 	if (err) {
 		rcu_read_unlock();
 		return err;
 	}
 
-	mpath = mesh_path_lookup(dst, dev);
+	mpath = mesh_path_lookup(dst, sdata);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENXIO;
@@ -862,10 +865,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *dst)
 {
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
 	if (dst)
-		return mesh_path_del(dst, dev);
+		return mesh_path_del(dst, sdata);
 
-	mesh_path_flush(dev);
+	mesh_path_flush(sdata);
 	return 0;
 }
 
@@ -897,7 +902,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 		return -ENOENT;
 	}
 
-	mpath = mesh_path_lookup(dst, dev);
+	mpath = mesh_path_lookup(dst, sdata);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -965,7 +970,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 		return -ENOTSUPP;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst, dev);
+	mpath = mesh_path_lookup(dst, sdata);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -993,7 +998,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 		return -ENOTSUPP;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup_by_idx(idx, dev);
+	mpath = mesh_path_lookup_by_idx(idx, sdata);
 	if (!mpath) {
 		rcu_read_unlock();
 		return -ENOENT;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 79a062782d5..6abe5427752 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -201,7 +201,7 @@ static ssize_t sta_agg_status_write(struct file *file,
 		tid_num = tid_num - 100;
 		if (tid_static_rx[tid_num] == 1) {
 			strcpy(state, "off ");
-			ieee80211_sta_stop_rx_ba_session(dev, da, tid_num, 0,
+			ieee80211_sta_stop_rx_ba_session(sta->sdata, da, tid_num, 0,
 					WLAN_REASON_QSTA_REQUIRE_SETUP);
 			sta->ampdu_mlme.tid_state_rx[tid_num] |=
 					HT_AGG_STATE_DEBUGFS_CTL;
diff --git a/net/mac80211/event.c b/net/mac80211/event.c
index 2280f40b456..8de60de70bc 100644
--- a/net/mac80211/event.c
+++ b/net/mac80211/event.c
@@ -8,7 +8,6 @@
  * mac80211 - events
  */
 
-#include <linux/netdevice.h>
 #include <net/iw_handler.h>
 #include "ieee80211_i.h"
 
@@ -17,7 +16,7 @@
  * (in the variable hdr) must be long enough to extract the TKIP
  * fields like TSC
  */
-void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
+void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
 				     struct ieee80211_hdr *hdr)
 {
 	union iwreq_data wrqu;
@@ -32,7 +31,7 @@ void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
 			print_mac(mac, hdr->addr2));
 		memset(&wrqu, 0, sizeof(wrqu));
 		wrqu.data.length = strlen(buf);
-		wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+		wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf);
 		kfree(buf);
 	}
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 168f845c173..b5d3f58a784 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -851,65 +851,65 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 
 /* ieee80211_ioctl.c */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
-int ieee80211_set_freq(struct net_device *dev, int freq);
+int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
 
 /* ieee80211_sta.c */
 void ieee80211_sta_timer(unsigned long data);
 void ieee80211_sta_work(struct work_struct *work);
 void ieee80211_sta_scan_work(struct work_struct *work);
-void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
+void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status);
-int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len);
-int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len);
-int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid);
-int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
-void ieee80211_sta_req_auth(struct net_device *dev,
+int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
+int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
+int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
+int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len);
+void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 			    struct ieee80211_if_sta *ifsta);
-int ieee80211_sta_scan_results(struct net_device *dev,
+int ieee80211_sta_scan_results(struct ieee80211_local *local,
 			       struct iw_request_info *info,
 			       char *buf, size_t len);
 ieee80211_rx_result ieee80211_sta_rx_scan(
-	struct net_device *dev, struct sk_buff *skb,
+	struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	struct ieee80211_rx_status *rx_status);
 void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
 void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
-int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len);
-struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len);
+struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					struct sk_buff *skb, u8 *bssid,
 					u8 *addr, u64 supp_rates);
-int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
-int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
+int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason);
+int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
-u32 ieee80211_reset_erp_info(struct net_device *dev);
+u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
 				   struct ieee80211_ht_info *ht_info);
 int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 			struct ieee80211_ht_addt_info *ht_add_info_ie,
 			struct ieee80211_ht_bss_info *bss_info);
-void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
+void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
 				  u16 tid, u8 dialog_token, u16 start_seq_num,
 				  u16 agg_size, u16 timeout);
-void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
+void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
 				u16 initiator, u16 reason_code);
-void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn);
+void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
 
-void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
+void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
 				u16 tid, u16 initiator, u16 reason);
 void sta_addba_resp_timer_expired(unsigned long data);
-void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr);
+void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr);
 u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
-void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
+void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		int encrypt);
 void ieee802_11_parse_elems(u8 *start, size_t len,
 				   struct ieee802_11_elems *elems);
 
 #ifdef CONFIG_MAC80211_MESH
-void ieee80211_start_mesh(struct net_device *dev);
+void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
 #else
-static inline void ieee80211_start_mesh(struct net_device *dev)
+static inline void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 {}
 #endif
 
@@ -920,7 +920,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct vif_params *params);
 int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 			     enum ieee80211_if_types type);
-void ieee80211_if_remove(struct net_device *dev);
+void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
 void ieee80211_remove_interfaces(struct ieee80211_local *local);
 
 /* tx handling */
@@ -938,7 +938,7 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 			enum ieee80211_if_types type);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
-void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
+void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
 				     struct ieee80211_hdr *hdr);
 
 #ifdef CONFIG_MAC80211_NOINLINE
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 610ed1d9893..4a623b8e91f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -56,7 +56,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 	case IEEE80211_IF_TYPE_MESH_POINT:
 		/* Allow compiler to elide mesh_rmc_free call. */
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			mesh_rmc_free(dev);
+			mesh_rmc_free(sdata);
 		/* fall through */
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
@@ -241,15 +241,13 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	return ret;
 }
 
-void ieee80211_if_remove(struct net_device *dev)
+void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	ASSERT_RTNL();
 
 	list_del_rcu(&sdata->list);
 	synchronize_rcu();
-	unregister_netdevice(dev);
+	unregister_netdevice(sdata->dev);
 }
 
 /*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 93dcdc27254..81963948665 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -347,8 +347,8 @@ static int ieee80211_open(struct net_device *dev)
 			goto err_stop;
 
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_start_mesh(sdata->dev);
-		changed |= ieee80211_reset_erp_info(dev);
+			ieee80211_start_mesh(sdata);
+		changed |= ieee80211_reset_erp_info(sdata);
 		ieee80211_bss_info_change_notify(sdata, changed);
 		ieee80211_enable_keys(sdata);
 
@@ -448,7 +448,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 	list_for_each_entry_rcu(sta, &local->sta_list, list) {
 		if (sta->sdata == sdata)
-			ieee80211_sta_tear_down_BA_sessions(dev, sta->addr);
+			ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
 	}
 
 	rcu_read_unlock();
@@ -706,7 +706,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
 
 
-	ieee80211_send_addba_request(sta->sdata->dev, ra, tid,
+	ieee80211_send_addba_request(sta->sdata, ra, tid,
 			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
 			 sta->ampdu_mlme.tid_tx[tid]->ssn,
 			 0x40, 5000);
@@ -889,7 +889,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 	}
 
 	if (*state & HT_AGG_STATE_INITIATOR_MSK)
-		ieee80211_send_delba(sta->sdata->dev, ra, tid,
+		ieee80211_send_delba(sta->sdata, ra, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
 	agg_queue = sta->tid_to_tx_q[tid];
@@ -1200,10 +1200,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 					     changed);
 }
 
-u32 ieee80211_reset_erp_info(struct net_device *dev)
+u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	sdata->bss_conf.use_cts_prot = 0;
 	sdata->bss_conf.use_short_preamble = 0;
 	return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE;
@@ -1438,7 +1436,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			tid = qc[0] & 0xf;
 			ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10)
 						& IEEE80211_SCTL_SEQ);
-			ieee80211_send_bar(sta->sdata->dev, hdr->addr1,
+			ieee80211_send_bar(sta->sdata, hdr->addr1,
 					   tid, ssn);
 		}
 	}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index b5933b27149..b631703bcc8 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -39,14 +39,13 @@ void ieee80211s_stop(void)
  * mesh_matches_local - check if the config of a mesh point matches ours
  *
  * @ie: information elements of a management frame from the mesh peer
- * @dev: local mesh interface
+ * @sdata: local mesh subif
  *
  * This function checks if the mesh configuration of a mesh point matches the
  * local mesh configuration, i.e. if both nodes belong to the same mesh network.
  */
-bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev)
+bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *sta = &sdata->u.sta;
 
 	/*
@@ -73,10 +72,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev)
  * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links
  *
  * @ie: information elements of a management frame from the mesh peer
- * @dev: local mesh interface
  */
-bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie,
-			      struct net_device *dev)
+bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
 	return (*(ie->mesh_config + CAPAB_OFFSET) & ACCEPT_PLINKS) != 0;
 }
@@ -111,9 +108,8 @@ void mesh_ids_set_default(struct ieee80211_if_sta *sta)
 	memcpy(sta->mesh_cc_id, def_id, 4);
 }
 
-int mesh_rmc_init(struct net_device *dev)
+int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int i;
 
 	sdata->u.sta.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL);
@@ -125,9 +121,8 @@ int mesh_rmc_init(struct net_device *dev)
 	return 0;
 }
 
-void mesh_rmc_free(struct net_device *dev)
+void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct mesh_rmc *rmc = sdata->u.sta.rmc;
 	struct rmc_entry *p, *n;
 	int i;
@@ -158,9 +153,8 @@ void mesh_rmc_free(struct net_device *dev)
  * it.
  */
 int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
-		   struct net_device *dev)
+		   struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct mesh_rmc *rmc = sdata->u.sta.rmc;
 	u32 seqnum = 0;
 	int entries = 0;
@@ -194,10 +188,9 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
 	return 0;
 }
 
-void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev)
+void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	u8 *pos;
 	int len, i, rate;
@@ -262,10 +255,10 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev)
 	return;
 }
 
-u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl)
+u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
 {
 	/* Use last four bytes of hw addr and interface index as hash index */
-	return jhash_2words(*(u32 *)(addr+2), dev->ifindex, tbl->hash_rnd)
+	return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd)
 		& tbl->hash_mask;
 }
 
@@ -434,7 +427,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	ifsta->preq_id = 0;
 	ifsta->dsn = 0;
 	atomic_set(&ifsta->mpaths, 0);
-	mesh_rmc_init(sdata->dev);
+	mesh_rmc_init(sdata);
 	ifsta->last_preq = jiffies;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7495fbb0d21..84ff5d828fd 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -47,7 +47,7 @@ enum mesh_path_flags {
  * struct mesh_path - mac80211 mesh path structure
  *
  * @dst: mesh path destination mac address
- * @dev: mesh path device
+ * @sdata: mesh subif
  * @next_hop: mesh neighbor to which frames for this destination will be
  * 	forwarded
  * @timer: mesh path discovery timer
@@ -64,14 +64,14 @@ enum mesh_path_flags {
  * @state_lock: mesh pat state lock
  *
  *
- * The combination of dst and dev is unique in the mesh path table. Since the
+ * The combination of dst and sdata is unique in the mesh path table. Since the
  * next_hop STA is only protected by RCU as well, deleting the STA must also
  * remove/substitute the mesh_path structure and wait until that is no longer
  * reachable before destroying the STA completely.
  */
 struct mesh_path {
 	u8 dst[ETH_ALEN];
-	struct net_device *dev;
+	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *next_hop;
 	struct timer_list timer;
 	struct sk_buff_head frame_queue;
@@ -203,59 +203,66 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
 		struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
-		struct net_device *dev);
-bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev);
+		struct ieee80211_sub_if_data *sdata);
+bool mesh_matches_local(struct ieee802_11_elems *ie,
+		struct ieee80211_sub_if_data *sdata);
 void mesh_ids_set_default(struct ieee80211_if_sta *sta);
-void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev);
-void mesh_rmc_free(struct net_device *dev);
-int mesh_rmc_init(struct net_device *dev);
+void mesh_mgmt_ies_add(struct sk_buff *skb,
+		struct ieee80211_sub_if_data *sdata);
+void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
+int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
 void ieee80211s_stop(void);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
 
 /* Mesh paths */
-int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev);
-void mesh_path_start_discovery(struct net_device *dev);
-struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev);
-struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev);
+int mesh_nexthop_lookup(struct sk_buff *skb,
+		struct ieee80211_sub_if_data *sdata);
+void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mesh_path_lookup(u8 *dst,
+		struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mesh_path_lookup_by_idx(int idx,
+		struct ieee80211_sub_if_data *sdata);
 void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
-void mesh_path_expire(struct net_device *dev);
-void mesh_path_flush(struct net_device *dev);
-void mesh_rx_path_sel_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
-		size_t len);
-int mesh_path_add(u8 *dst, struct net_device *dev);
+void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
+void mesh_path_flush(struct ieee80211_sub_if_data *sdata);
+void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
+		struct ieee80211_mgmt *mgmt, size_t len);
+int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
 /* Mesh plinks */
-void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev,
-		bool add);
-bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie,
-			      struct net_device *dev);
+void mesh_neighbour_update(u8 *hw_addr, u64 rates,
+		struct ieee80211_sub_if_data *sdata, bool add);
+bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
 void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
 void mesh_plink_broken(struct sta_info *sta);
 void mesh_plink_deactivate(struct sta_info *sta);
 int mesh_plink_open(struct sta_info *sta);
 int mesh_plink_close(struct sta_info *sta);
 void mesh_plink_block(struct sta_info *sta);
-void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
-			 size_t len, struct ieee80211_rx_status *rx_status);
+void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_mgmt *mgmt, size_t len,
+			 struct ieee80211_rx_status *rx_status);
 
 /* Private interfaces */
 /* Mesh tables */
 struct mesh_table *mesh_table_alloc(int size_order);
 void mesh_table_free(struct mesh_table *tbl, bool free_leafs);
 struct mesh_table *mesh_table_grow(struct mesh_table *tbl);
-u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl);
+u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
+		struct mesh_table *tbl);
 /* Mesh paths */
 int mesh_path_error_tx(u8 *dest, __le32 dest_dsn, u8 *ra,
-		struct net_device *dev);
+		struct ieee80211_sub_if_data *sdata);
 void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
 void mesh_path_flush_pending(struct mesh_path *mpath);
 void mesh_path_tx_pending(struct mesh_path *mpath);
 int mesh_pathtbl_init(void);
 void mesh_pathtbl_unregister(void);
-int mesh_path_del(u8 *addr, struct net_device *dev);
+int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata);
 void mesh_path_timer(unsigned long data);
 void mesh_path_flush_by_nexthop(struct sta_info *sta);
-void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev);
+void mesh_path_discard_frame(struct sk_buff *skb,
+		struct ieee80211_sub_if_data *sdata);
 
 #ifdef CONFIG_MAC80211_MESH
 extern int mesh_allocated;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 2cdbd522631..eeb0ce2d5d3 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -82,9 +82,9 @@ enum mpath_frame_type {
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 		u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst,
 		__le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime,
-		__le32 metric, __le32 preq_id, struct net_device *dev)
+		__le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
@@ -103,7 +103,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
 	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
 	mgmt->u.action.u.mesh_action.action_code = action;
@@ -149,7 +149,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	pos += ETH_ALEN;
 	memcpy(pos, &dst_dsn, 4);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 	return 0;
 }
 
@@ -161,9 +161,9 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
  * @ra: node this frame is addressed to
  */
 int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
-		struct net_device *dev)
+		struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
@@ -182,7 +182,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
 					  IEEE80211_STYPE_ACTION);
 
 	memcpy(mgmt->da, ra, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
 	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
 	mgmt->u.action.u.mesh_action.action_code = MPATH_PERR;
@@ -198,7 +198,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
 	pos += ETH_ALEN;
 	memcpy(pos, &dst_dsn, 4);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 	return 0;
 }
 
@@ -233,7 +233,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
 /**
  * hwmp_route_info_get - Update routing info to originator and transmitter
  *
- * @dev: local mesh interface
+ * @sdata: local mesh subif
  * @mgmt: mesh management frame
  * @hwmp_ie: hwmp information element (PREP or PREQ)
  *
@@ -246,11 +246,11 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
  * Notes: this function is the only place (besides user-provided info) where
  * path routing information is updated.
  */
-static u32 hwmp_route_info_get(struct net_device *dev,
+static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 			    struct ieee80211_mgmt *mgmt,
 			    u8 *hwmp_ie)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 	bool fresh_info;
@@ -301,14 +301,14 @@ static u32 hwmp_route_info_get(struct net_device *dev,
 		new_metric = MAX_METRIC;
 	exp_time = TU_TO_EXP_TIME(orig_lifetime);
 
-	if (memcmp(orig_addr, dev->dev_addr, ETH_ALEN) == 0) {
+	if (memcmp(orig_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
 		/* This MP is the originator, we are not interested in this
 		 * frame, except for updating transmitter's path info.
 		 */
 		process = false;
 		fresh_info = false;
 	} else {
-		mpath = mesh_path_lookup(orig_addr, dev);
+		mpath = mesh_path_lookup(orig_addr, sdata);
 		if (mpath) {
 			spin_lock_bh(&mpath->state_lock);
 			if (mpath->flags & MESH_PATH_FIXED)
@@ -324,8 +324,8 @@ static u32 hwmp_route_info_get(struct net_device *dev,
 				}
 			}
 		} else {
-			mesh_path_add(orig_addr, dev);
-			mpath = mesh_path_lookup(orig_addr, dev);
+			mesh_path_add(orig_addr, sdata);
+			mpath = mesh_path_lookup(orig_addr, sdata);
 			if (!mpath) {
 				rcu_read_unlock();
 				return 0;
@@ -357,7 +357,7 @@ static u32 hwmp_route_info_get(struct net_device *dev,
 	else {
 		fresh_info = true;
 
-		mpath = mesh_path_lookup(ta, dev);
+		mpath = mesh_path_lookup(ta, sdata);
 		if (mpath) {
 			spin_lock_bh(&mpath->state_lock);
 			if ((mpath->flags & MESH_PATH_FIXED) ||
@@ -365,8 +365,8 @@ static u32 hwmp_route_info_get(struct net_device *dev,
 					(last_hop_metric > mpath->metric)))
 				fresh_info = false;
 		} else {
-			mesh_path_add(ta, dev);
-			mpath = mesh_path_lookup(ta, dev);
+			mesh_path_add(ta, sdata);
+			mpath = mesh_path_lookup(ta, sdata);
 			if (!mpath) {
 				rcu_read_unlock();
 				return 0;
@@ -392,10 +392,9 @@ static u32 hwmp_route_info_get(struct net_device *dev,
 	return process ? new_metric : 0;
 }
 
-static void hwmp_preq_frame_process(struct net_device *dev,
+static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
 				    u8 *preq_elem, u32 metric) {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct mesh_path *mpath;
 	u8 *dst_addr, *orig_addr;
@@ -411,7 +410,7 @@ static void hwmp_preq_frame_process(struct net_device *dev,
 	orig_dsn = PREQ_IE_ORIG_DSN(preq_elem);
 	dst_flags = PREQ_IE_DST_F(preq_elem);
 
-	if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0) {
+	if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) {
 		forward = false;
 		reply = true;
 		metric = 0;
@@ -423,7 +422,7 @@ static void hwmp_preq_frame_process(struct net_device *dev,
 		}
 	} else {
 		rcu_read_lock();
-		mpath = mesh_path_lookup(dst_addr, dev);
+		mpath = mesh_path_lookup(dst_addr, sdata);
 		if (mpath) {
 			if ((!(mpath->flags & MESH_PATH_DSN_VALID)) ||
 					DSN_LT(mpath->dsn, dst_dsn)) {
@@ -451,7 +450,7 @@ static void hwmp_preq_frame_process(struct net_device *dev,
 				cpu_to_le32(dst_dsn), 0, orig_addr,
 				cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl,
 				cpu_to_le32(lifetime), cpu_to_le32(metric),
-				0, dev);
+				0, sdata);
 		else
 			ifsta->mshstats.dropped_frames_ttl++;
 	}
@@ -472,20 +471,19 @@ static void hwmp_preq_frame_process(struct net_device *dev,
 		hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1;
 		mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr,
 				cpu_to_le32(orig_dsn), dst_flags, dst_addr,
-				cpu_to_le32(dst_dsn), dev->broadcast,
+				cpu_to_le32(dst_dsn), sdata->dev->broadcast,
 				hopcount, ttl, cpu_to_le32(lifetime),
 				cpu_to_le32(metric), cpu_to_le32(preq_id),
-				dev);
+				sdata);
 		ifsta->mshstats.fwded_frames++;
 	}
 }
 
 
-static void hwmp_prep_frame_process(struct net_device *dev,
+static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
 				    u8 *prep_elem, u32 metric)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct mesh_path *mpath;
 	u8 *dst_addr, *orig_addr;
 	u8 ttl, hopcount, flags;
@@ -499,7 +497,7 @@ static void hwmp_prep_frame_process(struct net_device *dev,
 	 * replies
 	 */
 	dst_addr = PREP_IE_DST_ADDR(prep_elem);
-	if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0)
+	if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0)
 		/* destination, no forwarding required */
 		return;
 
@@ -510,7 +508,7 @@ static void hwmp_prep_frame_process(struct net_device *dev,
 	}
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, dev);
+	mpath = mesh_path_lookup(dst_addr, sdata);
 	if (mpath)
 		spin_lock_bh(&mpath->state_lock);
 	else
@@ -533,7 +531,7 @@ static void hwmp_prep_frame_process(struct net_device *dev,
 		cpu_to_le32(orig_dsn), 0, dst_addr,
 		cpu_to_le32(dst_dsn), mpath->next_hop->addr, hopcount, ttl,
 		cpu_to_le32(lifetime), cpu_to_le32(metric),
-		0, dev);
+		0, sdata);
 	rcu_read_unlock();
 	sdata->u.sta.mshstats.fwded_frames++;
 	return;
@@ -544,7 +542,7 @@ fail:
 	return;
 }
 
-static void hwmp_perr_frame_process(struct net_device *dev,
+static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_mgmt *mgmt, u8 *perr_elem)
 {
 	struct mesh_path *mpath;
@@ -555,7 +553,7 @@ static void hwmp_perr_frame_process(struct net_device *dev,
 	dst_addr = PERR_IE_DST_ADDR(perr_elem);
 	dst_dsn = PERR_IE_DST_DSN(perr_elem);
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, dev);
+	mpath = mesh_path_lookup(dst_addr, sdata);
 	if (mpath) {
 		spin_lock_bh(&mpath->state_lock);
 		if (mpath->flags & MESH_PATH_ACTIVE &&
@@ -566,7 +564,7 @@ static void hwmp_perr_frame_process(struct net_device *dev,
 			mpath->dsn = dst_dsn;
 			spin_unlock_bh(&mpath->state_lock);
 			mesh_path_error_tx(dst_addr, cpu_to_le32(dst_dsn),
-					   dev->broadcast, dev);
+					   sdata->dev->broadcast, sdata);
 		} else
 			spin_unlock_bh(&mpath->state_lock);
 	}
@@ -575,7 +573,7 @@ static void hwmp_perr_frame_process(struct net_device *dev,
 
 
-void mesh_rx_path_sel_frame(struct net_device *dev,
+void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 			    struct ieee80211_mgmt *mgmt,
 			    size_t len)
 {
@@ -592,25 +590,25 @@ void mesh_rx_path_sel_frame(struct net_device *dev,
 		if (!elems.preq || elems.preq_len != 37)
 			/* Right now we support just 1 destination and no AE */
 			return;
-		last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.preq);
+		last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq);
 		if (!last_hop_metric)
 			return;
-		hwmp_preq_frame_process(dev, mgmt, elems.preq, last_hop_metric);
+		hwmp_preq_frame_process(sdata, mgmt, elems.preq, last_hop_metric);
 		break;
 	case MPATH_PREP:
 		if (!elems.prep || elems.prep_len != 31)
 			/* Right now we support no AE */
 			return;
-		last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.prep);
+		last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep);
 		if (!last_hop_metric)
 			return;
-		hwmp_prep_frame_process(dev, mgmt, elems.prep, last_hop_metric);
+		hwmp_prep_frame_process(sdata, mgmt, elems.prep, last_hop_metric);
 		break;
 	case MPATH_PERR:
 		if (!elems.perr || elems.perr_len != 12)
 			/* Right now we support only one destination per PERR */
 			return;
-		hwmp_perr_frame_process(dev, mgmt, elems.perr);
+		hwmp_perr_frame_process(sdata, mgmt, elems.perr);
 	default:
 		return;
 	}
@@ -628,8 +626,7 @@ void mesh_rx_path_sel_frame(struct net_device *dev,
  */
 static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 {
-	struct ieee80211_sub_if_data *sdata =
-		IEEE80211_DEV_TO_SUB_IF(mpath->dev);
+	struct ieee80211_sub_if_data *sdata = mpath->sdata;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct mesh_preq_queue *preq_node;
 
@@ -672,12 +669,10 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 /**
  * mesh_path_start_discovery - launch a path discovery from the PREQ queue
  *
- * @dev: local mesh interface
+ * @sdata: local mesh subif
  */
-void mesh_path_start_discovery(struct net_device *dev)
+void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata =
-		IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct mesh_preq_queue *preq_node;
 	struct mesh_path *mpath;
@@ -699,7 +694,7 @@ void mesh_path_start_discovery(struct net_device *dev)
 	spin_unlock(&ifsta->mesh_preq_queue_lock);
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(preq_node->dst, dev);
+	mpath = mesh_path_lookup(preq_node->dst, sdata);
 	if (!mpath)
 		goto enddiscovery;
 
@@ -743,11 +738,11 @@ void mesh_path_start_discovery(struct net_device *dev)
 		dst_flags = MP_F_RF;
 
 	spin_unlock_bh(&mpath->state_lock);
-	mesh_path_sel_frame_tx(MPATH_PREQ, 0, dev->dev_addr,
+	mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr,
 			cpu_to_le32(ifsta->dsn), dst_flags, mpath->dst,
-			cpu_to_le32(mpath->dsn), dev->broadcast, 0,
+			cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0,
 			ttl, cpu_to_le32(lifetime), 0,
-			cpu_to_le32(ifsta->preq_id++), dev);
+			cpu_to_le32(ifsta->preq_id++), sdata);
 	mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
 
 enddiscovery:
@@ -759,7 +754,7 @@ enddiscovery:
  * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame
  *
  * @skb: 802.11 frame to be sent
- * @dev: network device the frame will be sent through
+ * @sdata: network subif the frame will be sent through
  * @fwd_frame: true if this frame was originally from a different host
  *
  * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is
@@ -767,9 +762,9 @@ enddiscovery:
  * sent when the path is resolved. This means the caller must not free the skb
  * in this case.
  */
-int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev)
+int mesh_nexthop_lookup(struct sk_buff *skb,
+			struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sk_buff *skb_to_free = NULL;
 	struct mesh_path *mpath;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -777,11 +772,11 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev)
 	int err = 0;
 
 	rcu_read_lock();
-	mpath = mesh_path_lookup(dst_addr, dev);
+	mpath = mesh_path_lookup(dst_addr, sdata);
 
 	if (!mpath) {
-		mesh_path_add(dst_addr, dev);
-		mpath = mesh_path_lookup(dst_addr, dev);
+		mesh_path_add(dst_addr, sdata);
+		mpath = mesh_path_lookup(dst_addr, sdata);
 		if (!mpath) {
 			dev_kfree_skb(skb);
 			sdata->u.sta.mshstats.dropped_frames_no_route++;
@@ -793,7 +788,8 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev)
 	if (mpath->flags & MESH_PATH_ACTIVE) {
 		if (time_after(jiffies, mpath->exp_time -
 			msecs_to_jiffies(sdata->u.sta.mshcfg.path_refresh_time))
-				&& !memcmp(dev->dev_addr, hdr->addr4, ETH_ALEN)
+				&& !memcmp(sdata->dev->dev_addr, hdr->addr4,
+					   ETH_ALEN)
 				&& !(mpath->flags & MESH_PATH_RESOLVING)
 				&& !(mpath->flags & MESH_PATH_FIXED)) {
 			mesh_queue_preq(mpath,
@@ -815,7 +811,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev)
 
 		skb_queue_tail(&mpath->frame_queue, skb);
 		if (skb_to_free)
-			mesh_path_discard_frame(skb_to_free, dev);
+			mesh_path_discard_frame(skb_to_free, sdata);
 		err = -ENOENT;
 	}
 
@@ -835,7 +831,7 @@ void mesh_path_timer(unsigned long data)
 	if (!mpath)
 		goto endmpathtimer;
 	spin_lock_bh(&mpath->state_lock);
-	sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev);
+	sdata = mpath->sdata;
 	if (mpath->flags & MESH_PATH_RESOLVED ||
 			(!(mpath->flags & MESH_PATH_RESOLVING)))
 		mpath->flags &= ~(MESH_PATH_RESOLVING | MESH_PATH_RESOLVED);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 838ee60492a..0a60f55f32a 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -9,7 +9,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/random.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
@@ -62,13 +61,13 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta)
 /**
  * mesh_path_lookup - look up a path in the mesh path table
  * @dst: hardware address (ETH_ALEN length) of destination
- * @dev: local interface
+ * @sdata: local subif
  *
  * Returns: pointer to the mesh path structure, or NULL if not found
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev)
+struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct hlist_node *n;
@@ -78,10 +77,10 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev)
 
 	tbl = rcu_dereference(mesh_paths);
 
-	bucket = &tbl->hash_buckets[mesh_table_hash(dst, dev, tbl)];
+	bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)];
 	hlist_for_each_entry_rcu(node, n, bucket, list) {
 		mpath = node->mpath;
-		if (mpath->dev == dev &&
+		if (mpath->sdata == sdata &&
 				memcmp(dst, mpath->dst, ETH_ALEN) == 0) {
 			if (MPATH_EXPIRED(mpath)) {
 				spin_lock_bh(&mpath->state_lock);
@@ -98,13 +97,13 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev)
 /**
  * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index
  * @idx: index
- * @dev: local interface, or NULL for all entries
+ * @sdata: local subif, or NULL for all entries
  *
  * Returns: pointer to the mesh path structure, or NULL if not found.
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev)
+struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata)
 {
 	struct mpath_node *node;
 	struct hlist_node *p;
@@ -112,7 +111,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev)
 	int j = 0;
 
 	for_each_mesh_entry(mesh_paths, p, node, i) {
-		if (dev && node->mpath->dev != dev)
+		if (sdata && node->mpath->sdata != sdata)
 			continue;
 		if (j++ == idx) {
 			if (MPATH_EXPIRED(node->mpath)) {
@@ -131,15 +130,14 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev)
 /**
  * mesh_path_add - allocate and add a new path to the mesh path table
  * @addr: destination address of the path (ETH_ALEN length)
- * @dev: local interface
+ * @sdata: local subif
  *
  * Returns: 0 on sucess
  *
  * State: the initial state of the new path is set to 0
  */
-int mesh_path_add(u8 *dst, struct net_device *dev)
+int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct mesh_path *mpath, *new_mpath;
 	struct mpath_node *node, *new_node;
 	struct hlist_head *bucket;
@@ -148,7 +146,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 	int err = 0;
 	u32 hash_idx;
 
-	if (memcmp(dst, dev->dev_addr, ETH_ALEN) == 0)
+	if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0)
 		/* never add ourselves as neighbours */
 		return -ENOTSUPP;
 
@@ -169,7 +167,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 
 	read_lock(&pathtbl_resize_lock);
 	memcpy(new_mpath->dst, dst, ETH_ALEN);
-	new_mpath->dev = dev;
+	new_mpath->sdata = sdata;
 	new_mpath->flags = 0;
 	skb_queue_head_init(&new_mpath->frame_queue);
 	new_node->mpath = new_mpath;
@@ -179,7 +177,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 	spin_lock_init(&new_mpath->state_lock);
 	init_timer(&new_mpath->timer);
 
-	hash_idx = mesh_table_hash(dst, dev, mesh_paths);
+	hash_idx = mesh_table_hash(dst, sdata, mesh_paths);
 	bucket = &mesh_paths->hash_buckets[hash_idx];
 
 	spin_lock(&mesh_paths->hashwlock[hash_idx]);
@@ -187,7 +185,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 	err = -EEXIST;
 	hlist_for_each_entry(node, n, bucket, list) {
 		mpath = node->mpath;
-		if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) == 0)
+		if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0)
 			goto err_exists;
 	}
 
@@ -241,7 +239,7 @@ void mesh_plink_broken(struct sta_info *sta)
 	struct mesh_path *mpath;
 	struct mpath_node *node;
 	struct hlist_node *p;
-	struct net_device *dev = sta->sdata->dev;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	int i;
 
 	rcu_read_lock();
@@ -256,7 +254,7 @@ void mesh_plink_broken(struct sta_info *sta)
 			spin_unlock_bh(&mpath->state_lock);
 			mesh_path_error_tx(mpath->dst,
 					cpu_to_le32(mpath->dsn),
-					dev->broadcast, dev);
+					sdata->dev->broadcast, sdata);
 		} else
 		spin_unlock_bh(&mpath->state_lock);
 	}
@@ -284,11 +282,11 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
 	for_each_mesh_entry(mesh_paths, p, node, i) {
 		mpath = node->mpath;
 		if (mpath->next_hop == sta)
-			mesh_path_del(mpath->dst, mpath->dev);
+			mesh_path_del(mpath->dst, mpath->sdata);
 	}
 }
 
-void mesh_path_flush(struct net_device *dev)
+void mesh_path_flush(struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node;
@@ -297,16 +295,15 @@ void mesh_path_flush(struct net_device *dev)
 
 	for_each_mesh_entry(mesh_paths, p, node, i) {
 		mpath = node->mpath;
-		if (mpath->dev == dev)
-			mesh_path_del(mpath->dst, mpath->dev);
+		if (mpath->sdata == sdata)
+			mesh_path_del(mpath->dst, mpath->sdata);
 	}
 }
 
 static void mesh_path_node_reclaim(struct rcu_head *rp)
 {
 	struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
-	struct ieee80211_sub_if_data *sdata =
-		IEEE80211_DEV_TO_SUB_IF(node->mpath->dev);
+	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
 
 	del_timer_sync(&node->mpath->timer);
 	atomic_dec(&sdata->u.sta.mpaths);
@@ -318,11 +315,11 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
  * mesh_path_del - delete a mesh path from the table
  *
  * @addr: dst address (ETH_ALEN length)
- * @dev: local interface
+ * @sdata: local subif
  *
  * Returns: 0 if succesful
  */
-int mesh_path_del(u8 *addr, struct net_device *dev)
+int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node;
@@ -332,13 +329,13 @@ int mesh_path_del(u8 *addr, struct net_device *dev)
 	int err = 0;
 
 	read_lock(&pathtbl_resize_lock);
-	hash_idx = mesh_table_hash(addr, dev, mesh_paths);
+	hash_idx = mesh_table_hash(addr, sdata, mesh_paths);
 	bucket = &mesh_paths->hash_buckets[hash_idx];
 
 	spin_lock(&mesh_paths->hashwlock[hash_idx]);
 	hlist_for_each_entry(node, n, bucket, list) {
 		mpath = node->mpath;
-		if (mpath->dev == dev &&
+		if (mpath->sdata == sdata &&
 				memcmp(addr, mpath->dst, ETH_ALEN) == 0) {
 			spin_lock_bh(&mpath->state_lock);
 			mpath->flags |= MESH_PATH_RESOLVING;
@@ -378,29 +375,29 @@ void mesh_path_tx_pending(struct mesh_path *mpath)
  * mesh_path_discard_frame - discard a frame whose path could not be resolved
  *
  * @skb: frame to discard
- * @dev: network device the frame was to be sent through
+ * @sdata: network subif the frame was to be sent through
  *
  * If the frame was beign forwarded from another MP, a PERR frame will be sent
  * to the precursor.
  *
  * Locking: the function must me called within a rcu_read_lock region
  */
-void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev)
+void mesh_path_discard_frame(struct sk_buff *skb,
+			     struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct mesh_path *mpath;
 	u32 dsn = 0;
 
-	if (memcmp(hdr->addr4, dev->dev_addr, ETH_ALEN) != 0) {
+	if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) {
 		u8 *ra, *da;
 
 		da = hdr->addr3;
 		ra = hdr->addr2;
-		mpath = mesh_path_lookup(da, dev);
+		mpath = mesh_path_lookup(da, sdata);
 		if (mpath)
 			dsn = ++mpath->dsn;
-		mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, dev);
+		mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, sdata);
 	}
 
 	kfree_skb(skb);
@@ -416,14 +413,11 @@ void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev)
  */
 void mesh_path_flush_pending(struct mesh_path *mpath)
 {
-	struct ieee80211_sub_if_data *sdata;
 	struct sk_buff *skb;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev);
-
 	while ((skb = skb_dequeue(&mpath->frame_queue)) &&
 			(mpath->flags & MESH_PATH_ACTIVE))
-		mesh_path_discard_frame(skb, mpath->dev);
+		mesh_path_discard_frame(skb, mpath->sdata);
 }
 
 /**
@@ -472,7 +466,7 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 	node = hlist_entry(p, struct mpath_node, list);
 	mpath = node->mpath;
 	new_node->mpath = mpath;
-	hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl);
+	hash_idx = mesh_table_hash(mpath->dst, mpath->sdata, newtbl);
 	hlist_add_head(&new_node->list,
 			&newtbl->hash_buckets[hash_idx]);
 	return 0;
@@ -489,7 +483,7 @@ int mesh_pathtbl_init(void)
 	return 0;
 }
 
-void mesh_path_expire(struct net_device *dev)
+void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node;
@@ -498,7 +492,7 @@ void mesh_path_expire(struct net_device *dev)
 
 	read_lock(&pathtbl_resize_lock);
 	for_each_mesh_entry(mesh_paths, p, node, i) {
-		if (node->mpath->dev != dev)
+		if (node->mpath->sdata != sdata)
 			continue;
 		mpath = node->mpath;
 		spin_lock_bh(&mpath->state_lock);
@@ -507,7 +501,7 @@ void mesh_path_expire(struct net_device *dev)
 			time_after(jiffies,
 			 mpath->exp_time + MESH_PATH_EXPIRE)) {
 			spin_unlock_bh(&mpath->state_lock);
-			mesh_path_del(mpath->dst, mpath->dev);
+			mesh_path_del(mpath->dst, mpath->sdata);
 		} else
 			spin_unlock_bh(&mpath->state_lock);
 	}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 4a7e6d094ff..7714b0e6e4d 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -144,10 +144,10 @@ void mesh_plink_deactivate(struct sta_info *sta)
 	spin_unlock_bh(&sta->lock);
 }
 
-static int mesh_plink_frame_tx(struct net_device *dev,
+static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid,
 		__le16 reason) {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
 	struct ieee80211_mgmt *mgmt;
 	bool include_plid = false;
@@ -166,7 +166,7 @@ static int mesh_plink_frame_tx(struct net_device *dev,
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_ACTION);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
 	mgmt->u.action.category = PLINK_CATEGORY;
 	mgmt->u.action.u.plink_action.action_code = action;
@@ -180,7 +180,7 @@ static int mesh_plink_frame_tx(struct net_device *dev,
 			/* two-byte status code followed by two-byte AID */
 			memset(pos, 0, 4);
 		}
-		mesh_mgmt_ies_add(skb, dev);
+		mesh_mgmt_ies_add(skb, sdata);
 	}
 
 	/* Add Peer Link Management element */
@@ -217,15 +217,14 @@ static int mesh_plink_frame_tx(struct net_device *dev,
 		memcpy(pos, &reason, 2);
 	}
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 	return 0;
 }
 
-void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev,
+void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata,
 			   bool peer_accepting_plinks)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 
 	rcu_read_lock();
@@ -257,7 +256,6 @@ static void mesh_plink_timer(unsigned long data)
 {
 	struct sta_info *sta;
 	__le16 llid, plid, reason;
-	struct net_device *dev = NULL;
 	struct ieee80211_sub_if_data *sdata;
 #ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG
 	DECLARE_MAC_BUF(mac);
@@ -282,7 +280,6 @@ static void mesh_plink_timer(unsigned long data)
 	llid = sta->llid;
 	plid = sta->plid;
 	sdata = sta->sdata;
-	dev = sdata->dev;
 
 	switch (sta->plink_state) {
 	case PLINK_OPN_RCVD:
@@ -299,7 +296,7 @@ static void mesh_plink_timer(unsigned long data)
 			++sta->plink_retries;
 			mod_plink_timer(sta, sta->plink_timeout);
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid,
 					    0, 0);
 			break;
 		}
@@ -312,7 +309,7 @@ static void mesh_plink_timer(unsigned long data)
 		sta->plink_state = PLINK_HOLDING;
 		mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 		spin_unlock_bh(&sta->lock);
-		mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid,
+		mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, plid,
 				    reason);
 		break;
 	case PLINK_HOLDING:
@@ -357,7 +354,7 @@ int mesh_plink_open(struct sta_info *sta)
 	mpl_dbg("Mesh plink: starting establishment with %s\n",
 		print_mac(mac, sta->addr));
 
-	return mesh_plink_frame_tx(sdata->dev, PLINK_OPEN,
+	return mesh_plink_frame_tx(sdata, PLINK_OPEN,
 				   sta->addr, llid, 0, 0);
 }
 
@@ -403,15 +400,14 @@ int mesh_plink_close(struct sta_info *sta)
 	llid = sta->llid;
 	plid = sta->plid;
 	spin_unlock_bh(&sta->lock);
-	mesh_plink_frame_tx(sta->sdata->dev, PLINK_CLOSE, sta->addr, llid,
+	mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->addr, llid,
 			    plid, reason);
 	return 0;
 }
 
-void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
+void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt,
 			 size_t len, struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee802_11_elems elems;
 	struct sta_info *sta;
@@ -478,7 +474,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 
 	/* Now we will figure out the appropriate event... */
 	event = PLINK_UNDEFINED;
-	if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, dev))) {
+	if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) {
 		switch (ftype) {
 		case PLINK_OPEN:
 			event = OPN_RJCT;
@@ -577,9 +573,9 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			sta->llid = llid;
 			mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid,
 					    0, 0);
-			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr,
 					    llid, plid, 0);
 			break;
 		default:
@@ -604,7 +600,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
@@ -613,7 +609,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			sta->plid = plid;
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		case CNF_ACPT:
@@ -646,13 +642,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		case CNF_ACPT:
@@ -685,7 +681,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
@@ -695,7 +691,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			spin_unlock_bh(&sta->lock);
 			mpl_dbg("Mesh plink with %s ESTABLISHED\n",
 					print_mac(mac, sta->addr));
-			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		default:
@@ -714,13 +710,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
 					    plid, 0);
 			break;
 		default:
@@ -743,7 +739,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt,
 			llid = sta->llid;
 			reason = sta->reason;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
 					    plid, reason);
 			break;
 		default:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ac776c9d48f..fb8e1e742ca 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -73,19 +73,19 @@
 #define IEEE80211_MIN_AMPDU_BUF 0x8
 #define IEEE80211_MAX_AMPDU_BUF 0x40
 
-static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
+static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 				     u8 *ssid, size_t ssid_len);
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq,
+ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len);
 static void ieee80211_rx_bss_put(struct ieee80211_local *local,
 				 struct ieee80211_sta_bss *bss);
-static int ieee80211_sta_find_ibss(struct net_device *dev,
+static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta);
-static int ieee80211_sta_wep_configured(struct net_device *dev);
-static int ieee80211_sta_start_scan(struct net_device *dev,
+static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata);
+static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *sdata,
 				    u8 *ssid, size_t ssid_len);
-static int ieee80211_sta_config_auth(struct net_device *dev,
+static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta);
 static void sta_rx_agg_session_timer_expired(unsigned long data);
 
@@ -239,11 +239,10 @@ static int ecw2cw(int ecw)
 }
 
 
-static void ieee80211_sta_def_wmm_params(struct net_device *dev,
+static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss,
 					 int ibss)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
 	int i, have_higher_than_11mbit = 0;
 
@@ -281,11 +280,10 @@ static void ieee80211_sta_def_wmm_params(struct net_device *dev,
 	}
 }
 
-static void ieee80211_sta_wmm_params(struct net_device *dev,
+static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				     struct ieee80211_if_sta *ifsta,
 				     u8 *wmm_param, size_t wmm_param_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_tx_queue_params params;
 	size_t left;
 	int count;
@@ -349,14 +347,14 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
 		       "cWmin=%d cWmax=%d txop=%d\n",
-		       dev->name, queue, aci, acm, params.aifs, params.cw_min,
+		       local->mdev->name, queue, aci, acm, params.aifs, params.cw_min,
 		       params.cw_max, params.txop);
 #endif
 		/* TODO: handle ACM (block TX, fallback to next lowest allowed
 		 * AC for now) */
 		if (local->ops->conf_tx(local_to_hw(local), queue, &params)) {
 			printk(KERN_DEBUG "%s: failed to set TX queue "
-			       "parameters for queue %d\n", dev->name, queue);
+			       "parameters for queue %d\n", local->mdev->name, queue);
 		}
 	}
 }
@@ -475,7 +473,7 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 	return 0;
 }
 
-static void ieee80211_sta_send_associnfo(struct net_device *dev,
+static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_if_sta *ifsta)
 {
 	char *buf;
@@ -520,17 +518,16 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev,
 
 	memset(&wrqu, 0, sizeof(wrqu));
 	wrqu.data.length = len;
-	wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+	wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf);
 
 	kfree(buf);
 }
 
 
-static void ieee80211_set_associated(struct net_device *dev,
+static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta,
 				     bool assoc)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_conf *conf = &local_to_hw(local)->conf;
 	union iwreq_data wrqu;
@@ -544,7 +541,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 			return;
 
-		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+		bss = ieee80211_rx_bss_get(local, ifsta->bssid,
 					   conf->channel->center_freq,
 					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
@@ -568,12 +565,12 @@ static void ieee80211_set_associated(struct net_device *dev,
 		ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
 		memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
 		memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
-		ieee80211_sta_send_associnfo(dev, ifsta);
+		ieee80211_sta_send_associnfo(sdata, ifsta);
 	} else {
-		netif_carrier_off(dev);
-		ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid);
+		netif_carrier_off(sdata->dev);
+		ieee80211_sta_tear_down_BA_sessions(sdata, ifsta->bssid);
 		ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
-		changed |= ieee80211_reset_erp_info(dev);
+		changed |= ieee80211_reset_erp_info(sdata);
 
 		sdata->bss_conf.assoc_ht = 0;
 		sdata->bss_conf.ht_conf = NULL;
@@ -588,27 +585,24 @@ static void ieee80211_set_associated(struct net_device *dev,
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	if (assoc)
-		netif_carrier_on(dev);
+		netif_carrier_on(sdata->dev);
 
 	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
 }
 
-static void ieee80211_set_disassoc(struct net_device *dev,
+static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta, int deauth)
 {
 	if (deauth)
 		ifsta->auth_tries = 0;
 	ifsta->assoc_tries = 0;
-	ieee80211_set_associated(dev, ifsta, 0);
+	ieee80211_set_associated(sdata, ifsta, 0);
 }
 
-void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
+void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	skb->dev = sdata->local->mdev;
 	skb_set_mac_header(skb, 0);
 	skb_set_network_header(skb, 0);
@@ -621,12 +615,12 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 }
 
 
-static void ieee80211_send_auth(struct net_device *dev,
+static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 				struct ieee80211_if_sta *ifsta,
 				int transaction, u8 *extra, size_t extra_len,
 				int encrypt)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 
@@ -634,7 +628,7 @@ static void ieee80211_send_auth(struct net_device *dev,
 			    sizeof(*mgmt) + 6 + extra_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
-		       "frame\n", dev->name);
+		       "frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -646,7 +640,7 @@ static void ieee80211_send_auth(struct net_device *dev,
 	if (encrypt)
 		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 	mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
 	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
@@ -655,11 +649,11 @@ static void ieee80211_send_auth(struct net_device *dev,
 	if (extra)
 		memcpy(skb_put(skb, extra_len), extra, extra_len);
 
-	ieee80211_sta_tx(dev, skb, encrypt);
+	ieee80211_sta_tx(sdata, skb, encrypt);
 }
 
 
-static void ieee80211_authenticate(struct net_device *dev,
+static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta)
 {
 	DECLARE_MAC_BUF(mac);
@@ -668,16 +662,16 @@ static void ieee80211_authenticate(struct net_device *dev,
 	if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: authentication with AP %s"
 		       " timed out\n",
-		       dev->name, print_mac(mac, ifsta->bssid));
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
 		ifsta->state = IEEE80211_DISABLED;
 		return;
 	}
 
 	ifsta->state = IEEE80211_AUTHENTICATE;
 	printk(KERN_DEBUG "%s: authenticate with AP %s\n",
-	       dev->name, print_mac(mac, ifsta->bssid));
+	       sdata->dev->name, print_mac(mac, ifsta->bssid));
 
-	ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0);
+	ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0);
 
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
 }
@@ -703,10 +697,10 @@ static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
 	return count;
 }
 
-static void ieee80211_send_assoc(struct net_device *dev,
+static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, *ies;
@@ -722,7 +716,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 			    ifsta->ssid_len);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
-		       "frame\n", dev->name);
+		       "frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -738,7 +732,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
 	}
 
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
 				   local->hw.conf.channel->center_freq,
 				   ifsta->ssid, ifsta->ssid_len);
 	if (bss) {
@@ -766,7 +760,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 
 	if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) {
@@ -907,21 +901,21 @@ static void ieee80211_send_assoc(struct net_device *dev,
 	if (ifsta->assocreq_ies)
 		memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
 
-static void ieee80211_send_deauth(struct net_device *dev,
+static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_if_sta *ifsta, u16 reason)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for deauth "
-		       "frame\n", dev->name);
+		       "frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -929,28 +923,28 @@ static void ieee80211_send_deauth(struct net_device *dev,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_DEAUTH);
 	skb_put(skb, 2);
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
 
-static void ieee80211_send_disassoc(struct net_device *dev,
+static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_if_sta *ifsta, u16 reason)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc "
-		       "frame\n", dev->name);
+		       "frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -958,21 +952,21 @@ static void ieee80211_send_disassoc(struct net_device *dev,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_DISASSOC);
 	skb_put(skb, 2);
 	mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
 
-static int ieee80211_privacy_mismatch(struct net_device *dev,
+static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss;
 	int bss_privacy;
 	int wep_privacy;
@@ -981,14 +975,14 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
 	if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL))
 		return 0;
 
-	bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
 				   local->hw.conf.channel->center_freq,
 				   ifsta->ssid, ifsta->ssid_len);
 	if (!bss)
 		return 0;
 
 	bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY);
-	wep_privacy = !!ieee80211_sta_wep_configured(dev);
+	wep_privacy = !!ieee80211_sta_wep_configured(sdata);
 	privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED);
 
 	ieee80211_rx_bss_put(local, bss);
@@ -1000,7 +994,7 @@ static int ieee80211_privacy_mismatch(struct net_device *dev,
 }
 
 
-static void ieee80211_associate(struct net_device *dev,
+static void ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 				struct ieee80211_if_sta *ifsta)
 {
 	DECLARE_MAC_BUF(mac);
@@ -1009,31 +1003,31 @@ static void ieee80211_associate(struct net_device *dev,
 	if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) {
 		printk(KERN_DEBUG "%s: association with AP %s"
 		       " timed out\n",
-		       dev->name, print_mac(mac, ifsta->bssid));
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
 		ifsta->state = IEEE80211_DISABLED;
 		return;
 	}
 
 	ifsta->state = IEEE80211_ASSOCIATE;
 	printk(KERN_DEBUG "%s: associate with AP %s\n",
-	       dev->name, print_mac(mac, ifsta->bssid));
-	if (ieee80211_privacy_mismatch(dev, ifsta)) {
+	       sdata->dev->name, print_mac(mac, ifsta->bssid));
+	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
 		printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
-		       "mixed-cell disabled - abort association\n", dev->name);
+		       "mixed-cell disabled - abort association\n", sdata->dev->name);
 		ifsta->state = IEEE80211_DISABLED;
 		return;
 	}
 
-	ieee80211_send_assoc(dev, ifsta);
+	ieee80211_send_assoc(sdata, ifsta);
 
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT);
 }
 
 
-static void ieee80211_associated(struct net_device *dev,
+static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	int disassoc;
 	DECLARE_MAC_BUF(mac);
@@ -1050,7 +1044,7 @@ static void ieee80211_associated(struct net_device *dev,
 	sta = sta_info_get(local, ifsta->bssid);
 	if (!sta) {
 		printk(KERN_DEBUG "%s: No STA entry for own AP %s\n",
-		       dev->name, print_mac(mac, ifsta->bssid));
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
 		disassoc = 1;
 	} else {
 		disassoc = 0;
@@ -1060,11 +1054,11 @@ static void ieee80211_associated(struct net_device *dev,
 				printk(KERN_DEBUG "%s: No ProbeResp from "
 				       "current AP %s - assume out of "
 				       "range\n",
-				       dev->name, print_mac(mac, ifsta->bssid));
+				       sdata->dev->name, print_mac(mac, ifsta->bssid));
 				disassoc = 1;
 				sta_info_unlink(&sta);
 			} else
-				ieee80211_send_probe_req(dev, ifsta->bssid,
+				ieee80211_send_probe_req(sdata, ifsta->bssid,
 							 local->scan_ssid,
 							 local->scan_ssid_len);
 			ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL;
@@ -1073,7 +1067,7 @@ static void ieee80211_associated(struct net_device *dev,
 			if (time_after(jiffies, ifsta->last_probe +
 				       IEEE80211_PROBE_INTERVAL)) {
 				ifsta->last_probe = jiffies;
-				ieee80211_send_probe_req(dev, ifsta->bssid,
+				ieee80211_send_probe_req(sdata, ifsta->bssid,
 							 ifsta->ssid,
 							 ifsta->ssid_len);
 			}
@@ -1087,7 +1081,7 @@ static void ieee80211_associated(struct net_device *dev,
 
 	if (disassoc) {
 		ifsta->state = IEEE80211_DISABLED;
-		ieee80211_set_associated(dev, ifsta, 0);
+		ieee80211_set_associated(sdata, ifsta, 0);
 	} else {
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_MONITORING_INTERVAL);
@@ -1095,10 +1089,10 @@ static void ieee80211_associated(struct net_device *dev,
 }
 
 
-static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
+static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 				     u8 *ssid, size_t ssid_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
@@ -1108,7 +1102,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
-		       "request\n", dev->name);
+		       "request\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -1117,7 +1111,7 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 	memset(mgmt, 0, 24);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_PROBE_REQ);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	if (dst) {
 		memcpy(mgmt->da, dst, ETH_ALEN);
 		memcpy(mgmt->bssid, dst, ETH_ALEN);
@@ -1152,13 +1146,12 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
 		*pos = rate->bitrate / 5;
 	}
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
 
-static int ieee80211_sta_wep_configured(struct net_device *dev)
+static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (!sdata || !sdata->default_key ||
 	    sdata->default_key->conf.alg != ALG_WEP)
 		return 0;
@@ -1166,16 +1159,16 @@ static int ieee80211_sta_wep_configured(struct net_device *dev)
 }
 
 
-static void ieee80211_auth_completed(struct net_device *dev,
+static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
-	printk(KERN_DEBUG "%s: authenticated\n", dev->name);
+	printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name);
 	ifsta->flags |= IEEE80211_STA_AUTHENTICATED;
-	ieee80211_associate(dev, ifsta);
+	ieee80211_associate(sdata, ifsta);
 }
 
 
-static void ieee80211_auth_challenge(struct net_device *dev,
+static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
@@ -1187,17 +1180,16 @@ static void ieee80211_auth_challenge(struct net_device *dev,
 	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 	if (!elems.challenge)
 		return;
-	ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2,
+	ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2,
 			    elems.challenge_len + 2, 1);
 }
 
-static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
+static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
 					u8 dialog_token, u16 status, u16 policy,
 					u16 buf_size, u16 timeout)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u16 capab;
@@ -1206,7 +1198,7 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 
 	if (!skb) {
 		printk(KERN_DEBUG "%s: failed to allocate buffer "
-		       "for addba resp frame\n", dev->name);
+		       "for addba resp frame\n", sdata->dev->name);
 		return;
 	}
 
@@ -1214,9 +1206,9 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -1235,17 +1227,16 @@ static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
 	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
 	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 
 	return;
 }
 
-void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
+void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
 				u16 tid, u8 dialog_token, u16 start_seq_num,
 				u16 agg_size, u16 timeout)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
@@ -1255,16 +1246,16 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
-				"for addba request frame\n", dev->name);
+				"for addba request frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 
@@ -1287,14 +1278,13 @@ void ieee80211_send_addba_request(struct net_device *dev, const u8 *da,
 	mgmt->u.action.u.addba_req.start_seq_num =
 					cpu_to_le16(start_seq_num << 4);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_sta_process_addba_request(struct net_device *dev,
+static void ieee80211_sta_process_addba_request(struct ieee80211_local *local,
 						struct ieee80211_mgmt *mgmt,
 						size_t len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_conf *conf = &hw->conf;
 	struct sta_info *sta;
@@ -1426,16 +1416,15 @@ end:
 	spin_unlock_bh(&sta->lock);
 
 end_no_lock:
-	ieee80211_send_addba_resp(sta->sdata->dev, sta->addr, tid,
+	ieee80211_send_addba_resp(sta->sdata, sta->addr, tid,
 				  dialog_token, status, 1, buf_size, timeout);
 	rcu_read_unlock();
 }
 
-static void ieee80211_sta_process_addba_resp(struct net_device *dev,
+static void ieee80211_sta_process_addba_resp(struct ieee80211_local *local,
 					     struct ieee80211_mgmt *mgmt,
 					     size_t len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
 	struct sta_info *sta;
 	u16 capab;
@@ -1497,11 +1486,10 @@ addba_resp_exit:
 	rcu_read_unlock();
 }
 
-void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
+void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
 			  u16 initiator, u16 reason_code)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
@@ -1511,7 +1499,7 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer "
-					"for delba frame\n", dev->name);
+					"for delba frame\n", sdata->dev->name);
 		return;
 	}
 
@@ -1519,9 +1507,9 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -1537,12 +1525,12 @@ void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
 	mgmt->u.action.u.delba.params = cpu_to_le16(params);
 	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn)
+void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_bar *bar;
 	u16 bar_control = 0;
@@ -1550,7 +1538,7 @@ void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn)
 	skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer for "
-			"bar frame\n", dev->name);
+			"bar frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -1559,20 +1547,20 @@ void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn)
 	bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 					 IEEE80211_STYPE_BACK_REQ);
 	memcpy(bar->ra, ra, ETH_ALEN);
-	memcpy(bar->ta, dev->dev_addr, ETH_ALEN);
+	memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
 	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
 	bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
 	bar_control |= (u16)(tid << 12);
 	bar->control = cpu_to_le16(bar_control);
 	bar->start_seq_num = cpu_to_le16(ssn);
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
+void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
 					u16 initiator, u16 reason)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_hw *hw = &local->hw;
 	struct sta_info *sta;
 	int ret, i;
@@ -1620,7 +1608,7 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 
 	/* check if this is a self generated aggregation halt */
 	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
-		ieee80211_send_delba(dev, ra, tid, 0, reason);
+		ieee80211_send_delba(sdata, ra, tid, 0, reason);
 
 	/* free the reordering buffer */
 	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
@@ -1641,10 +1629,10 @@ void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
 }
 
 
-static void ieee80211_sta_process_delba(struct net_device *dev,
+static void ieee80211_sta_process_delba(struct ieee80211_sub_if_data *sdata,
 			struct ieee80211_mgmt *mgmt, size_t len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	u16 tid, params;
 	u16 initiator;
@@ -1671,7 +1659,7 @@ static void ieee80211_sta_process_delba(struct net_device *dev,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (initiator == WLAN_BACK_INITIATOR)
-		ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid,
+		ieee80211_sta_stop_rx_ba_session(sdata, sta->addr, tid,
 						 WLAN_BACK_INITIATOR, 0);
 	else { /* WLAN_BACK_RECIPIENT */
 		spin_lock_bh(&sta->lock);
@@ -1758,31 +1746,31 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr,
+	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
 					 (u16)*ptid, WLAN_BACK_TIMER,
 					 WLAN_REASON_QSTA_TIMEOUT);
 }
 
-void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr)
+void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	int i;
 
 	for (i = 0; i <  STA_TID_NUM; i++) {
 		ieee80211_stop_tx_ba_session(&local->hw, addr, i,
 					     WLAN_BACK_INITIATOR);
-		ieee80211_sta_stop_rx_ba_session(dev, addr, i,
+		ieee80211_sta_stop_rx_ba_session(sdata, addr, i,
 						 WLAN_BACK_RECIPIENT,
 						 WLAN_REASON_QSTA_LEAVE_QBSS);
 	}
 }
 
-static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
+static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_msrment_ie *request_ie,
 					const u8 *da, const u8 *bssid,
 					u8 dialog_token)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *msr_report;
 
@@ -1791,7 +1779,7 @@ static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
 
 	if (!skb) {
 		printk(KERN_ERR "%s: failed to allocate buffer for "
-				"measurement report frame\n", dev->name);
+				"measurement report frame\n", sdata->dev->name);
 		return;
 	}
 
@@ -1799,7 +1787,7 @@ static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
 	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
 	memset(msr_report, 0, 24);
 	memcpy(msr_report->da, da, ETH_ALEN);
-	memcpy(msr_report->sa, dev->dev_addr, ETH_ALEN);
+	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(msr_report->bssid, bssid, ETH_ALEN);
 	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						IEEE80211_STYPE_ACTION);
@@ -1821,10 +1809,10 @@ static void ieee80211_send_refuse_measurement_request(struct net_device *dev,
 			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
 	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
 
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_sta_process_measurement_req(struct net_device *dev,
+static void ieee80211_sta_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 						struct ieee80211_mgmt *mgmt,
 						size_t len)
 {
@@ -1835,19 +1823,18 @@ static void ieee80211_sta_process_measurement_req(struct net_device *dev,
 	 * For now just refuse
 	 * TODO: Answer basic measurement as unmeasured
 	 */
-	ieee80211_send_refuse_measurement_request(dev,
+	ieee80211_send_refuse_measurement_request(sdata,
 			&mgmt->u.action.u.measurement.msr_elem,
 			mgmt->sa, mgmt->bssid,
 			mgmt->u.action.u.measurement.dialog_token);
 }
 
 
-static void ieee80211_rx_mgmt_auth(struct net_device *dev,
+static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta,
 				   struct ieee80211_mgmt *mgmt,
 				   size_t len)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	u16 auth_alg, auth_transaction, status_code;
 	DECLARE_MAC_BUF(mac);
 
@@ -1879,7 +1866,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 		 */
 		if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
 			return;
-		ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0);
+		ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0);
 	}
 
 	if (auth_alg != ifsta->auth_alg ||
@@ -1912,7 +1899,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 				    algs[pos] == 0xff)
 					continue;
 				if (algs[pos] == WLAN_AUTH_SHARED_KEY &&
-				    !ieee80211_sta_wep_configured(dev))
+				    !ieee80211_sta_wep_configured(sdata))
 					continue;
 				ifsta->auth_alg = algs[pos];
 				break;
@@ -1924,19 +1911,19 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
 	switch (ifsta->auth_alg) {
 	case WLAN_AUTH_OPEN:
 	case WLAN_AUTH_LEAP:
-		ieee80211_auth_completed(dev, ifsta);
+		ieee80211_auth_completed(sdata, ifsta);
 		break;
 	case WLAN_AUTH_SHARED_KEY:
 		if (ifsta->auth_transaction == 4)
-			ieee80211_auth_completed(dev, ifsta);
+			ieee80211_auth_completed(sdata, ifsta);
 		else
-			ieee80211_auth_challenge(dev, ifsta, mgmt, len);
+			ieee80211_auth_challenge(sdata, ifsta, mgmt, len);
 		break;
 	}
 }
 
 
-static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
+static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len)
@@ -1953,7 +1940,7 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
 	reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
 	if (ifsta->flags & IEEE80211_STA_AUTHENTICATED)
-		printk(KERN_DEBUG "%s: deauthenticated\n", dev->name);
+		printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name);
 
 	if (ifsta->state == IEEE80211_AUTHENTICATE ||
 	    ifsta->state == IEEE80211_ASSOCIATE ||
@@ -1963,12 +1950,12 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev,
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
 
-	ieee80211_set_disassoc(dev, ifsta, 1);
+	ieee80211_set_disassoc(sdata, ifsta, 1);
 	ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED;
 }
 
 
-static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
+static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_if_sta *ifsta,
 				       struct ieee80211_mgmt *mgmt,
 				       size_t len)
@@ -1985,7 +1972,7 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	if (ifsta->flags & IEEE80211_STA_ASSOCIATED)
-		printk(KERN_DEBUG "%s: disassociated\n", dev->name);
+		printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name);
 
 	if (ifsta->state == IEEE80211_ASSOCIATED) {
 		ifsta->state = IEEE80211_ASSOCIATE;
@@ -1993,7 +1980,7 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
 
-	ieee80211_set_disassoc(dev, ifsta, 0);
+	ieee80211_set_disassoc(sdata, ifsta, 0);
 }
 
 
@@ -2004,7 +1991,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 					 int reassoc)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct net_device *dev = sdata->dev;
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
 	u64 rates, basic_rates;
@@ -2034,12 +2020,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x "
 	       "status=%d aid=%d)\n",
-	       dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa),
+	       sdata->dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa),
 	       capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
-		       dev->name, status_code);
+		       sdata->dev->name, status_code);
 		/* if this was a reassociation, ensure we try a "full"
 		 * association next time. This works around some broken APs
 		 * which do not correctly reject reassociation requests. */
@@ -2049,7 +2035,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
 		printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not "
-		       "set\n", dev->name, aid);
+		       "set\n", sdata->dev->name, aid);
 	aid &= ~(BIT(15) | BIT(14));
 
 	pos = mgmt->u.assoc_resp.variable;
@@ -2057,11 +2043,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
-		       dev->name);
+		       sdata->dev->name);
 		return;
 	}
 
-	printk(KERN_DEBUG "%s: associated\n", dev->name);
+	printk(KERN_DEBUG "%s: associated\n", sdata->dev->name);
 	ifsta->aid = aid;
 	ifsta->ap_capab = capab_info;
 
@@ -2082,11 +2068,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC);
 		if (!sta) {
 			printk(KERN_DEBUG "%s: failed to alloc STA entry for"
-			       " the AP\n", dev->name);
+			       " the AP\n", sdata->dev->name);
 			rcu_read_unlock();
 			return;
 		}
-		bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
+		bss = ieee80211_rx_bss_get(local, ifsta->bssid,
 					   local->hw.conf.channel->center_freq,
 					   ifsta->ssid, ifsta->ssid_len);
 		if (bss) {
@@ -2099,7 +2085,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		err = sta_info_insert(sta);
 		if (err) {
 			printk(KERN_DEBUG "%s: failed to insert STA entry for"
-			       " the AP (error %d)\n", dev->name, err);
+			       " the AP (error %d)\n", sdata->dev->name, err);
 			rcu_read_unlock();
 			return;
 		}
@@ -2179,7 +2165,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (elems.wmm_param) {
 		set_sta_flags(sta, WLAN_STA_WME);
 		rcu_read_unlock();
-		ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
+		ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
 					 elems.wmm_param_len);
 	} else
 		rcu_read_unlock();
@@ -2188,17 +2174,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
-	ieee80211_set_associated(dev, ifsta, 1);
+	ieee80211_set_associated(sdata, ifsta, 1);
 
-	ieee80211_associated(dev, ifsta);
+	ieee80211_associated(sdata, ifsta);
 }
 
 
 /* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_add(struct net_device *dev,
+static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
 					struct ieee80211_sta_bss *bss)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	u8 hash_idx;
 
 	if (bss_mesh_cfg(bss))
@@ -2234,10 +2219,10 @@ static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
 
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int freq,
+ieee80211_rx_bss_add(struct ieee80211_sub_if_data *sdata, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss;
 
 	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
@@ -2255,16 +2240,15 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int freq,
 	spin_lock_bh(&local->sta_bss_lock);
 	/* TODO: order by RSSI? */
 	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(dev, bss);
+	__ieee80211_rx_bss_hash_add(local, bss);
 	spin_unlock_bh(&local->sta_bss_lock);
 	return bss;
 }
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq,
+ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
 
 	spin_lock_bh(&local->sta_bss_lock);
@@ -2286,10 +2270,9 @@ ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq,
 
 #ifdef CONFIG_MAC80211_MESH
 static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_get(struct net_device *dev, u8 *mesh_id, int mesh_id_len,
+ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
 			  u8 *mesh_cfg, int freq)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
 
 	spin_lock_bh(&local->sta_bss_lock);
@@ -2311,10 +2294,9 @@ ieee80211_rx_mesh_bss_get(struct net_device *dev, u8 *mesh_id, int mesh_id_len,
 }
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_add(struct net_device *dev, u8 *mesh_id, int mesh_id_len,
+ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
 			  u8 *mesh_cfg, int mesh_config_len, int freq)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sta_bss *bss;
 
 	if (mesh_config_len != MESH_CFG_LEN)
@@ -2348,7 +2330,7 @@ ieee80211_rx_mesh_bss_add(struct net_device *dev, u8 *mesh_id, int mesh_id_len,
 	spin_lock_bh(&local->sta_bss_lock);
 	/* TODO: order by RSSI? */
 	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(dev, bss);
+	__ieee80211_rx_bss_hash_add(local, bss);
 	spin_unlock_bh(&local->sta_bss_lock);
 	return bss;
 }
@@ -2399,23 +2381,20 @@ void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
 }
 
 
-static int ieee80211_sta_join_ibss(struct net_device *dev,
+static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta,
 				   struct ieee80211_sta_bss *bss)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	int res, rates, i, j;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_supported_band *sband;
 	union iwreq_data wrqu;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	/* Remove possible STA entries from other IBSS networks. */
 	sta_info_flush_delayed(sdata);
 
@@ -2433,7 +2412,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	sdata->drop_unencrypted = bss->capability &
 		WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
-	res = ieee80211_set_freq(dev, bss->freq);
+	res = ieee80211_set_freq(sdata, bss->freq);
 
 	if (res)
 		return res;
@@ -2449,7 +2428,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_PROBE_RESP);
 		memset(mgmt->da, 0xff, ETH_ALEN);
-		memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
+		memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
 		mgmt->u.beacon.beacon_int =
 			cpu_to_le16(local->hw.conf.beacon_int);
@@ -2506,14 +2485,14 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
 	}
 	ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
 
-	ieee80211_sta_def_wmm_params(dev, bss, 1);
+	ieee80211_sta_def_wmm_params(sdata, bss, 1);
 
 	ifsta->state = IEEE80211_IBSS_JOINED;
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
 
 	memset(&wrqu, 0, sizeof(wrqu));
 	memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
-	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
 
 	return res;
 }
@@ -2555,35 +2534,34 @@ u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 }
 
 
-static void ieee80211_rx_bss_info(struct net_device *dev,
+static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len,
 				  struct ieee80211_rx_status *rx_status,
 				  struct ieee802_11_elems *elems,
 				  int beacon)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	int freq, clen;
 	struct ieee80211_sta_bss *bss;
 	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	u64 beacon_timestamp, rx_timestamp;
 	struct ieee80211_channel *channel;
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
-	if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN))
+	if (!beacon && memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
 		return; /* ignore ProbeResp to foreign address */
 
 	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
-	    elems->mesh_config && mesh_matches_local(elems, dev)) {
+	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
 		u64 rates = ieee80211_sta_get_rates(local, elems,
 						rx_status->band);
 
-		mesh_neighbour_update(mgmt->sa, rates, dev,
-				      mesh_peer_accepts_plinks(elems, dev));
+		mesh_neighbour_update(mgmt->sa, rates, sdata,
+				      mesh_peer_accepts_plinks(elems));
 	}
 
 	rcu_read_lock();
@@ -2620,21 +2598,21 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 
 #ifdef CONFIG_MAC80211_MESH
 	if (elems->mesh_config)
-		bss = ieee80211_rx_mesh_bss_get(dev, elems->mesh_id,
+		bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id,
 				elems->mesh_id_len, elems->mesh_config, freq);
 	else
 #endif
-		bss = ieee80211_rx_bss_get(dev, mgmt->bssid, freq,
+		bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq,
 					   elems->ssid, elems->ssid_len);
 	if (!bss) {
 #ifdef CONFIG_MAC80211_MESH
 		if (elems->mesh_config)
-			bss = ieee80211_rx_mesh_bss_add(dev, elems->mesh_id,
+			bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id,
 				elems->mesh_id_len, elems->mesh_config,
 				elems->mesh_config_len, freq);
 		else
 #endif
-			bss = ieee80211_rx_bss_add(dev, mgmt->bssid, freq,
+			bss = ieee80211_rx_bss_add(sdata, mgmt->bssid, freq,
 						  elems->ssid, elems->ssid_len);
 		if (!bss)
 			return;
@@ -2871,10 +2849,10 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 #ifndef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: beacon TSF higher than "
 			       "local TSF - IBSS merge with BSSID %s\n",
-			       dev->name, print_mac(mac, mgmt->bssid));
+			       sdata->dev->name, print_mac(mac, mgmt->bssid));
 #endif
-			ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss);
-			ieee80211_ibss_add_sta(dev, NULL,
+			ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss);
+			ieee80211_ibss_add_sta(sdata, NULL,
 					       mgmt->bssid, mgmt->sa,
 					       BIT(rx_status->rate_idx));
 		}
@@ -2884,7 +2862,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 }
 
 
-static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev,
+static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_mgmt *mgmt,
 					 size_t len,
 					 struct ieee80211_rx_status *rx_status)
@@ -2899,20 +2877,19 @@ static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev,
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
 				&elems);
 
-	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 0);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 0);
 }
 
 
-static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
+static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len,
 				     struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_sta *ifsta;
 	size_t baselen;
 	struct ieee802_11_elems elems;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_conf *conf = &local->hw.conf;
 	u32 changed = 0;
 
@@ -2923,9 +2900,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 1);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 1);
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
 	ifsta = &sdata->u.sta;
@@ -2934,7 +2910,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
-	ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
+	ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
 				 elems.wmm_param_len);
 
 	/* Do not send changes to driver if we are scanning. This removes
@@ -2966,14 +2942,13 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 }
 
 
-static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
+static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_if_sta *ifsta,
 					struct ieee80211_mgmt *mgmt,
 					size_t len,
 					struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	int tx_last_beacon;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *resp;
@@ -2997,7 +2972,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID="
 	       "%s (tx_last_beacon=%d)\n",
-	       dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da),
+	       sdata->dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da),
 	       print_mac(mac3, mgmt->bssid), tx_last_beacon);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
@@ -3015,7 +2990,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
 		       "from %s\n",
-		       dev->name, print_mac(mac, mgmt->sa));
+		       sdata->dev->name, print_mac(mac, mgmt->sa));
 #endif
 		return;
 	}
@@ -3035,19 +3010,18 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
 	memcpy(resp->da, mgmt->sa, ETH_ALEN);
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n",
-	       dev->name, print_mac(mac, resp->da));
+	       sdata->dev->name, print_mac(mac, resp->da));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
-	ieee80211_sta_tx(dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_rx_mgmt_action(struct net_device *dev,
+static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len,
 				     struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 
 	if (len < IEEE80211_MIN_ACTION_SIZE)
 		return;
@@ -3061,7 +3035,7 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.measurement)))
 				break;
-			ieee80211_sta_process_measurement_req(dev, mgmt, len);
+			ieee80211_sta_process_measurement_req(sdata, mgmt, len);
 			break;
 		}
 		break;
@@ -3071,38 +3045,37 @@ static void ieee80211_rx_mgmt_action(struct net_device *dev,
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.addba_req)))
 				break;
-			ieee80211_sta_process_addba_request(dev, mgmt, len);
+			ieee80211_sta_process_addba_request(local, mgmt, len);
 			break;
 		case WLAN_ACTION_ADDBA_RESP:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.addba_resp)))
 				break;
-			ieee80211_sta_process_addba_resp(dev, mgmt, len);
+			ieee80211_sta_process_addba_resp(local, mgmt, len);
 			break;
 		case WLAN_ACTION_DELBA:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.delba)))
 				break;
-			ieee80211_sta_process_delba(dev, mgmt, len);
+			ieee80211_sta_process_delba(sdata, mgmt, len);
 			break;
 		}
 		break;
 	case PLINK_CATEGORY:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			mesh_rx_plink_frame(dev, mgmt, len, rx_status);
+			mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
 	case MESH_PATH_SEL_CATEGORY:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			mesh_rx_path_sel_frame(dev, mgmt, len);
+			mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
 	}
 }
 
-void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
+void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta;
 	struct ieee80211_mgmt *mgmt;
 	u16 fc;
@@ -3110,7 +3083,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
 	if (skb->len < 24)
 		goto fail;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ifsta = &sdata->u.sta;
 
 	mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -3137,16 +3109,14 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
 }
 
 
-static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
+static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 					 struct sk_buff *skb)
 {
 	struct ieee80211_rx_status *rx_status;
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_sta *ifsta;
 	struct ieee80211_mgmt *mgmt;
 	u16 fc;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ifsta = &sdata->u.sta;
 
 	rx_status = (struct ieee80211_rx_status *) skb->cb;
@@ -3155,17 +3125,17 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_PROBE_REQ:
-		ieee80211_rx_mgmt_probe_req(dev, ifsta, mgmt, skb->len,
+		ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len,
 					    rx_status);
 		break;
 	case IEEE80211_STYPE_PROBE_RESP:
-		ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status);
+		ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status);
 		break;
 	case IEEE80211_STYPE_BEACON:
-		ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status);
+		ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status);
 		break;
 	case IEEE80211_STYPE_AUTH:
-		ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len);
+		ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len);
 		break;
 	case IEEE80211_STYPE_ASSOC_RESP:
 		ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0);
@@ -3174,13 +3144,13 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 		ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1);
 		break;
 	case IEEE80211_STYPE_DEAUTH:
-		ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len);
+		ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len);
 		break;
 	case IEEE80211_STYPE_DISASSOC:
-		ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len);
+		ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len);
 		break;
 	case IEEE80211_STYPE_ACTION:
-		ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len, rx_status);
+		ieee80211_rx_mgmt_action(sdata, ifsta, mgmt, skb->len, rx_status);
 		break;
 	}
 
@@ -3189,7 +3159,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
 
 
 ieee80211_rx_result
-ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
+ieee80211_sta_rx_scan(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_mgmt *mgmt;
@@ -3208,13 +3178,13 @@ ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
 		return RX_DROP_MONITOR;
 
 	if (ieee80211_is_probe_resp(fc)) {
-		ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status);
+		ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status);
 		dev_kfree_skb(skb);
 		return RX_QUEUED;
 	}
 
 	if (ieee80211_is_beacon(fc)) {
-		ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status);
+		ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status);
 		dev_kfree_skb(skb);
 		return RX_QUEUED;
 	}
@@ -3223,12 +3193,11 @@ ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
 }
 
 
-static int ieee80211_sta_active_ibss(struct net_device *dev)
+static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	int active = 0;
 	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
 
@@ -3247,9 +3216,9 @@ static int ieee80211_sta_active_ibss(struct net_device *dev)
 }
 
 
-static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time)
+static void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta, *tmp;
 	LIST_HEAD(tmp_list);
 	DECLARE_MAC_BUF(mac);
@@ -3260,7 +3229,7 @@ static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time)
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: expiring inactive STA %s\n",
-			       dev->name, print_mac(mac, sta->addr));
+			       sdata->dev->name, print_mac(mac, sta->addr));
 #endif
 			__sta_info_unlink(&sta);
 			if (sta)
@@ -3273,30 +3242,29 @@ static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time)
 }
 
 
-static void ieee80211_sta_merge_ibss(struct net_device *dev,
+static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
 
-	ieee80211_sta_expire(dev, IEEE80211_IBSS_INACTIVITY_LIMIT);
-	if (ieee80211_sta_active_ibss(dev))
+	ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
+	if (ieee80211_sta_active_ibss(sdata))
 		return;
 
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
-	       "IBSS networks with same SSID (merge)\n", dev->name);
-	ieee80211_sta_req_scan(dev, ifsta->ssid, ifsta->ssid_len);
+	       "IBSS networks with same SSID (merge)\n", sdata->dev->name);
+	ieee80211_sta_req_scan(sdata, ifsta->ssid, ifsta->ssid_len);
 }
 
 
 #ifdef CONFIG_MAC80211_MESH
-static void ieee80211_mesh_housekeeping(struct net_device *dev,
+static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
 			   struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	bool free_plinks;
 
-	ieee80211_sta_expire(dev, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
-	mesh_path_expire(dev);
+	ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
+	mesh_path_expire(sdata);
 
 	free_plinks = mesh_plink_availables(sdata);
 	if (free_plinks != sdata->u.sta.accepting_plinks)
@@ -3307,10 +3275,9 @@ static void ieee80211_mesh_housekeeping(struct net_device *dev,
 }
 
 
-void ieee80211_start_mesh(struct net_device *dev)
+void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_sta *ifsta;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ifsta = &sdata->u.sta;
 	ifsta->state = IEEE80211_MESH_UP;
 	ieee80211_sta_timer((unsigned long)sdata);
@@ -3324,7 +3291,7 @@ void ieee80211_sta_timer(unsigned long data)
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = wdev_priv(&sdata->wdev);
+	struct ieee80211_local *local = sdata->local;
 
 	set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
 	queue_work(local->hw.workqueue, &ifsta->work);
@@ -3334,12 +3301,11 @@ void ieee80211_sta_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data, u.sta.work);
-	struct net_device *dev = sdata->dev;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta;
 	struct sk_buff *skb;
 
-	if (!netif_running(dev))
+	if (!netif_running(sdata->dev))
 		return;
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
@@ -3352,27 +3318,27 @@ void ieee80211_sta_work(struct work_struct *work)
 	ifsta = &sdata->u.sta;
 
 	while ((skb = skb_dequeue(&ifsta->skb_queue)))
-		ieee80211_sta_rx_queued_mgmt(dev, skb);
+		ieee80211_sta_rx_queued_mgmt(sdata, skb);
 
 #ifdef CONFIG_MAC80211_MESH
 	if (ifsta->preq_queue_len &&
 	    time_after(jiffies,
 		       ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval)))
-		mesh_path_start_discovery(dev);
+		mesh_path_start_discovery(sdata);
 #endif
 
 	if (ifsta->state != IEEE80211_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
 		if (ifsta->scan_ssid_len)
-			ieee80211_sta_start_scan(dev, ifsta->scan_ssid, ifsta->scan_ssid_len);
+			ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
 		else
-			ieee80211_sta_start_scan(dev, NULL, 0);
+			ieee80211_sta_start_scan(sdata, NULL, 0);
 		return;
 	}
 
 	if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
-		if (ieee80211_sta_config_auth(dev, ifsta))
+		if (ieee80211_sta_config_auth(sdata, ifsta))
 			return;
 		clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
 	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
@@ -3382,23 +3348,23 @@ void ieee80211_sta_work(struct work_struct *work)
 	case IEEE80211_DISABLED:
 		break;
 	case IEEE80211_AUTHENTICATE:
-		ieee80211_authenticate(dev, ifsta);
+		ieee80211_authenticate(sdata, ifsta);
 		break;
 	case IEEE80211_ASSOCIATE:
-		ieee80211_associate(dev, ifsta);
+		ieee80211_associate(sdata, ifsta);
 		break;
 	case IEEE80211_ASSOCIATED:
-		ieee80211_associated(dev, ifsta);
+		ieee80211_associated(sdata, ifsta);
 		break;
 	case IEEE80211_IBSS_SEARCH:
-		ieee80211_sta_find_ibss(dev, ifsta);
+		ieee80211_sta_find_ibss(sdata, ifsta);
 		break;
 	case IEEE80211_IBSS_JOINED:
-		ieee80211_sta_merge_ibss(dev, ifsta);
+		ieee80211_sta_merge_ibss(sdata, ifsta);
 		break;
 #ifdef CONFIG_MAC80211_MESH
 	case IEEE80211_MESH_UP:
-		ieee80211_mesh_housekeeping(dev, ifsta);
+		ieee80211_mesh_housekeeping(sdata, ifsta);
 		break;
 #endif
 	default:
@@ -3406,20 +3372,20 @@ void ieee80211_sta_work(struct work_struct *work)
 		break;
 	}
 
-	if (ieee80211_privacy_mismatch(dev, ifsta)) {
+	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
 		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
-		       "mixed-cell disabled - disassociate\n", dev->name);
+		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
 
-		ieee80211_send_disassoc(dev, ifsta, WLAN_REASON_UNSPECIFIED);
-		ieee80211_set_disassoc(dev, ifsta, 0);
+		ieee80211_send_disassoc(sdata, ifsta, WLAN_REASON_UNSPECIFIED);
+		ieee80211_set_disassoc(sdata, ifsta, 0);
 	}
 }
 
 
-static void ieee80211_sta_reset_auth(struct net_device *dev,
+static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 
 	if (local->ops->reset_tsf) {
 		/* Reset own TSF to allow time synchronization work. */
@@ -3440,15 +3406,14 @@ static void ieee80211_sta_reset_auth(struct net_device *dev,
 	ifsta->auth_transaction = -1;
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 	ifsta->auth_tries = ifsta->assoc_tries = 0;
-	netif_carrier_off(dev);
+	netif_carrier_off(sdata->dev);
 }
 
 
-void ieee80211_sta_req_auth(struct net_device *dev,
+void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 			    struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
@@ -3492,11 +3457,10 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
 	return 0;
 }
 
-static int ieee80211_sta_config_auth(struct net_device *dev,
+static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss, *selected = NULL;
 	int top_rssi = 0, freq;
 
@@ -3535,22 +3499,22 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
 	spin_unlock_bh(&local->sta_bss_lock);
 
 	if (selected) {
-		ieee80211_set_freq(dev, selected->freq);
+		ieee80211_set_freq(sdata, selected->freq);
 		if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
-			ieee80211_sta_set_ssid(dev, selected->ssid,
+			ieee80211_sta_set_ssid(sdata, selected->ssid,
 					       selected->ssid_len);
-		ieee80211_sta_set_bssid(dev, selected->bssid);
-		ieee80211_sta_def_wmm_params(dev, selected, 0);
+		ieee80211_sta_set_bssid(sdata, selected->bssid);
+		ieee80211_sta_def_wmm_params(sdata, selected, 0);
 		ieee80211_rx_bss_put(local, selected);
 		ifsta->state = IEEE80211_AUTHENTICATE;
-		ieee80211_sta_reset_auth(dev, ifsta);
+		ieee80211_sta_reset_auth(sdata, ifsta);
 		return 0;
 	} else {
 		if (ifsta->state != IEEE80211_AUTHENTICATE) {
 			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
-				ieee80211_sta_start_scan(dev, NULL, 0);
+				ieee80211_sta_start_scan(sdata, NULL, 0);
 			else
-				ieee80211_sta_start_scan(dev, ifsta->ssid,
+				ieee80211_sta_start_scan(sdata, ifsta->ssid,
 							 ifsta->ssid_len);
 			ifsta->state = IEEE80211_AUTHENTICATE;
 			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
@@ -3561,12 +3525,11 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
 }
 
 
-static int ieee80211_sta_create_ibss(struct net_device *dev,
+static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_supported_band *sband;
 	u8 bssid[ETH_ALEN], *pos;
 	int i;
@@ -3582,15 +3545,15 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 	 * random number generator get different BSSID. */
 	get_random_bytes(bssid, ETH_ALEN);
 	for (i = 0; i < ETH_ALEN; i++)
-		bssid[i] ^= dev->dev_addr[i];
+		bssid[i] ^= sdata->dev->dev_addr[i];
 	bssid[0] &= ~0x01;
 	bssid[0] |= 0x02;
 #endif
 
 	printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n",
-	       dev->name, print_mac(mac, bssid));
+	       sdata->dev->name, print_mac(mac, bssid));
 
-	bss = ieee80211_rx_bss_add(dev, bssid,
+	bss = ieee80211_rx_bss_add(sdata, bssid,
 				   local->hw.conf.channel->center_freq,
 				   sdata->u.sta.ssid, sdata->u.sta.ssid_len);
 	if (!bss)
@@ -3617,16 +3580,16 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
 		*pos++ = (u8) (rate / 5);
 	}
 
-	ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
+	ret = ieee80211_sta_join_ibss(sdata, ifsta, bss);
 	ieee80211_rx_bss_put(local, bss);
 	return ret;
 }
 
 
-static int ieee80211_sta_find_ibss(struct net_device *dev,
+static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss;
 	int found = 0;
 	u8 bssid[ETH_ALEN];
@@ -3637,10 +3600,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 	if (ifsta->ssid_len == 0)
 		return -EINVAL;
 
-	active_ibss = ieee80211_sta_active_ibss(dev);
+	active_ibss = ieee80211_sta_active_ibss(sdata);
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
-	       dev->name, active_ibss);
+	       sdata->dev->name, active_ibss);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	spin_lock_bh(&local->sta_bss_lock);
 	list_for_each_entry(bss, &local->sta_bss_list, list) {
@@ -3675,15 +3638,15 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
 		else
 			search_freq = local->hw.conf.channel->center_freq;
 
-		bss = ieee80211_rx_bss_get(dev, bssid, search_freq,
+		bss = ieee80211_rx_bss_get(local, bssid, search_freq,
 					   ifsta->ssid, ifsta->ssid_len);
 		if (!bss)
 			goto dont_join;
 
 		printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
 		       " based on configured SSID\n",
-		       dev->name, print_mac(mac, bssid));
-		ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
+		       sdata->dev->name, print_mac(mac, bssid));
+		ret = ieee80211_sta_join_ibss(sdata, ifsta, bss);
 		ieee80211_rx_bss_put(local, bss);
 		return ret;
 	}
@@ -3695,14 +3658,14 @@ dont_join:
 
 	/* Selected IBSS not found in current scan results - try to scan */
 	if (ifsta->state == IEEE80211_IBSS_JOINED &&
-	    !ieee80211_sta_active_ibss(dev)) {
+	    !ieee80211_sta_active_ibss(sdata)) {
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_IBSS_MERGE_INTERVAL);
 	} else if (time_after(jiffies, local->last_scan_completed +
 			      IEEE80211_SCAN_INTERVAL)) {
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
-		       "join\n", dev->name);
-		return ieee80211_sta_req_scan(dev, ifsta->ssid,
+		       "join\n", sdata->dev->name);
+		return ieee80211_sta_req_scan(sdata, ifsta->ssid,
 					      ifsta->ssid_len);
 	} else if (ifsta->state != IEEE80211_IBSS_JOINED) {
 		int interval = IEEE80211_SCAN_INTERVAL;
@@ -3712,10 +3675,10 @@ dont_join:
 			if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) &&
 			    (!(local->oper_channel->flags &
 					IEEE80211_CHAN_NO_IBSS)))
-				return ieee80211_sta_create_ibss(dev, ifsta);
+				return ieee80211_sta_create_ibss(sdata, ifsta);
 			if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) {
 				printk(KERN_DEBUG "%s: IBSS not allowed on"
-				       " %d MHz\n", dev->name,
+				       " %d MHz\n", sdata->dev->name,
 				       local->hw.conf.channel->center_freq);
 			}
 
@@ -3733,9 +3696,8 @@ dont_join:
 }
 
 
-int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
+int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta;
 	int res;
 
@@ -3759,7 +3721,7 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
 			res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
 		if (res) {
 			printk(KERN_DEBUG "%s: Failed to config new SSID to "
-			       "the low-level driver\n", dev->name);
+			       "the low-level driver\n", sdata->dev->name);
 			return res;
 		}
 	}
@@ -3773,16 +3735,15 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
 	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
 		ifsta->ibss_join_req = jiffies;
 		ifsta->state = IEEE80211_IBSS_SEARCH;
-		return ieee80211_sta_find_ibss(dev, ifsta);
+		return ieee80211_sta_find_ibss(sdata, ifsta);
 	}
 
 	return 0;
 }
 
 
-int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len)
+int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	memcpy(ssid, ifsta->ssid, ifsta->ssid_len);
 	*len = ifsta->ssid_len;
@@ -3790,13 +3751,11 @@ int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len)
 }
 
 
-int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid)
+int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
 {
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_if_sta *ifsta;
 	int res;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	ifsta = &sdata->u.sta;
 
 	if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
@@ -3809,7 +3768,7 @@ int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid)
 			res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
 		if (res) {
 			printk(KERN_DEBUG "%s: Failed to config new BSSID to "
-			       "the low-level driver\n", dev->name);
+			       "the low-level driver\n", sdata->dev->name);
 			return res;
 		}
 	}
@@ -3850,7 +3809,7 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
 
-	ieee80211_sta_tx(sdata->dev, skb, 0);
+	ieee80211_sta_tx(sdata, skb, 0);
 }
 
 
@@ -3923,8 +3882,8 @@ done:
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
 		    (!(ifsta->state == IEEE80211_IBSS_JOINED) &&
-		    !ieee80211_sta_active_ibss(dev)))
-			ieee80211_sta_find_ibss(dev, ifsta);
+		    !ieee80211_sta_active_ibss(sdata)))
+			ieee80211_sta_find_ibss(sdata, ifsta);
 	}
 }
 EXPORT_SYMBOL(ieee80211_scan_completed);
@@ -4013,7 +3972,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 
 		if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN)
 			break;
-		ieee80211_send_probe_req(dev, NULL, local->scan_ssid,
+		ieee80211_send_probe_req(sdata, NULL, local->scan_ssid,
 					 local->scan_ssid_len);
 		next_delay = IEEE80211_CHANNEL_TIME;
 		break;
@@ -4025,10 +3984,10 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 }
 
 
-static int ieee80211_sta_start_scan(struct net_device *dev,
+static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 				    u8 *ssid, size_t ssid_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = scan_sdata->local;
 	struct ieee80211_sub_if_data *sdata;
 
 	if (ssid_len > IEEE80211_MAX_SSID_LEN)
@@ -4052,7 +4011,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 	 */
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_dev == dev)
+		if (local->scan_dev == scan_sdata->dev)
 			return 0;
 		return -EBUSY;
 	}
@@ -4062,7 +4021,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 					     ssid, ssid_len);
 		if (!rc) {
 			local->sta_hw_scanning = 1;
-			local->scan_dev = dev;
+			local->scan_dev = scan_sdata->dev;
 		}
 		return rc;
 	}
@@ -4086,7 +4045,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 	local->scan_state = SCAN_SET_CHANNEL;
 	local->scan_channel_idx = 0;
 	local->scan_band = IEEE80211_BAND_2GHZ;
-	local->scan_dev = dev;
+	local->scan_dev = scan_sdata->dev;
 
 	netif_addr_lock_bh(local->mdev);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
@@ -4105,17 +4064,16 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
 }
 
 
-int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
+int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
-		return ieee80211_sta_start_scan(dev, ssid, ssid_len);
+		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_dev == dev)
+		if (local->scan_dev == sdata->dev)
 			return 0;
 		return -EBUSY;
 	}
@@ -4129,12 +4087,11 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
 }
 
 static char *
-ieee80211_sta_scan_result(struct net_device *dev,
+ieee80211_sta_scan_result(struct ieee80211_local *local,
 			  struct iw_request_info *info,
 			  struct ieee80211_sta_bss *bss,
 			  char *current_ev, char *end_buf)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct iw_event iwe;
 
 	if (time_after(jiffies,
@@ -4321,11 +4278,10 @@ ieee80211_sta_scan_result(struct net_device *dev,
 }
 
 
-int ieee80211_sta_scan_results(struct net_device *dev,
+int ieee80211_sta_scan_results(struct ieee80211_local *local,
 			       struct iw_request_info *info,
 			       char *buf, size_t len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	char *current_ev = buf;
 	char *end_buf = buf + len;
 	struct ieee80211_sta_bss *bss;
@@ -4336,7 +4292,7 @@ int ieee80211_sta_scan_results(struct net_device *dev,
 			spin_unlock_bh(&local->sta_bss_lock);
 			return -E2BIG;
 		}
-		current_ev = ieee80211_sta_scan_result(dev, info, bss,
+		current_ev = ieee80211_sta_scan_result(local, info, bss,
 						       current_ev, end_buf);
 	}
 	spin_unlock_bh(&local->sta_bss_lock);
@@ -4344,9 +4300,8 @@ int ieee80211_sta_scan_results(struct net_device *dev,
 }
 
 
-int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
 	kfree(ifsta->extra_ie);
@@ -4366,13 +4321,12 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len)
 }
 
 
-struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
+struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					struct sk_buff *skb, u8 *bssid,
 					u8 *addr, u64 supp_rates)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	DECLARE_MAC_BUF(mac);
 	int band = local->hw.conf.channel->band;
 
@@ -4381,7 +4335,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 	if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
 		if (net_ratelimit()) {
 			printk(KERN_DEBUG "%s: No room for a new IBSS STA "
-			       "entry %s\n", dev->name, print_mac(mac, addr));
+			       "entry %s\n", sdata->dev->name, print_mac(mac, addr));
 		}
 		return NULL;
 	}
@@ -4391,7 +4345,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n",
-	       wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name);
+	       wiphy_name(local->hw.wiphy), print_mac(mac, addr), sdata->dev->name);
 #endif
 
 	sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -4414,31 +4368,29 @@ struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev,
 }
 
 
-int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
+int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
 	printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
-	       dev->name, reason);
+	       sdata->dev->name, reason);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
 	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return -EINVAL;
 
-	ieee80211_send_deauth(dev, ifsta, reason);
-	ieee80211_set_disassoc(dev, ifsta, 1);
+	ieee80211_send_deauth(sdata, ifsta, reason);
+	ieee80211_set_disassoc(sdata, ifsta, 1);
 	return 0;
 }
 
 
-int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
+int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
 	printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
-	       dev->name, reason);
+	       sdata->dev->name, reason);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return -EINVAL;
@@ -4446,8 +4398,8 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
 	if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
 		return -1;
 
-	ieee80211_send_disassoc(dev, ifsta, reason);
-	ieee80211_set_disassoc(dev, ifsta, 0);
+	ieee80211_send_disassoc(sdata, ifsta, reason);
+	ieee80211_set_disassoc(sdata, ifsta, 0);
 	return 0;
 }
 
@@ -4464,7 +4416,7 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 			if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 				continue;
 
-			ieee80211_sta_req_auth(sdata->dev, &sdata->u.sta);
+			ieee80211_sta_req_auth(sdata, &sdata->u.sta);
 		}
 		rcu_read_unlock();
 		break;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2464263cb7a..fd83ef760a3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -404,11 +404,11 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb = rx->skb;
 
 	if (unlikely(local->sta_hw_scanning))
-		return ieee80211_sta_rx_scan(rx->dev, skb, rx->status);
+		return ieee80211_sta_rx_scan(rx->sdata, skb, rx->status);
 
 	if (unlikely(local->sta_sw_scanning)) {
 		/* drop all the other packets during a software scan anyway */
-		if (ieee80211_sta_rx_scan(rx->dev, skb, rx->status)
+		if (ieee80211_sta_rx_scan(rx->sdata, skb, rx->status)
 		    != RX_QUEUED)
 			dev_kfree_skb(skb);
 		return RX_QUEUED;
@@ -466,7 +466,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 
 	if (ieee80211_is_data(hdr->frame_control) &&
 	    is_multicast_ether_addr(hdr->addr1) &&
-	    mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev))
+	    mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->sdata))
 		return RX_DROP_MONITOR;
 #undef msh_h_get
 
@@ -1523,7 +1523,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	     sdata->vif.type == IEEE80211_IF_TYPE_IBSS ||
 	     sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) &&
 	    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
-		ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->status);
+		ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
 	else
 		return RX_DROP_MONITOR;
 
@@ -1570,7 +1570,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 	    !ieee80211_is_auth(hdr->frame_control))
 		goto ignore;
 
-	mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr);
+	mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr);
  ignore:
 	dev_kfree_skb(rx->skb);
 	rx->skb = NULL;
@@ -1744,7 +1744,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			return 0;
 		if (ieee80211_is_beacon(hdr->frame_control)) {
 			if (!rx->sta)
-				rx->sta = ieee80211_ibss_add_sta(sdata->dev,
+				rx->sta = ieee80211_ibss_add_sta(sdata,
 						rx->skb, bssid, hdr->addr2,
 						BIT(rx->status->rate_idx));
 			return 1;
@@ -1760,7 +1760,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		} else if (!rx->sta)
-			rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb,
+			rx->sta = ieee80211_ibss_add_sta(sdata, rx->skb,
 						bssid, hdr->addr2,
 						BIT(rx->status->rate_idx));
 		break;
@@ -2066,7 +2066,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr,
+		ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
 			tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
 		ret = 1;
 		goto end_reorder;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 85f3ba85c13..c413d4836af 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1327,7 +1327,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 			if (is_multicast_ether_addr(hdr->addr3))
 				memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
 			else
-				if (mesh_nexthop_lookup(skb, odev))
+				if (mesh_nexthop_lookup(skb, osdata))
 					return  0;
 			if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
 				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.sta,
@@ -1908,7 +1908,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		*pos++ = WLAN_EID_SSID;
 		*pos++ = 0x0;
 
-		mesh_mgmt_ies_add(skb, sdata->dev);
+		mesh_mgmt_ies_add(skb, sdata);
 
 		num_beacons = &sdata->u.sta.num_beacons;
 	} else {
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 34fa8ed1e78..735daa355c3 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -27,22 +27,19 @@
 #include "aes_ccm.h"
 
 
-static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr,
+static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta_addr,
 				    int idx, int alg, int remove,
 				    int set_tx_key, const u8 *_key,
 				    size_t key_len)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct ieee80211_key *key;
-	struct ieee80211_sub_if_data *sdata;
 	int err;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
 		printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
-		       dev->name, idx);
+		       sdata->dev->name, idx);
 		return -EINVAL;
 	}
 
@@ -127,11 +124,11 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 
 	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
 	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length);
+		int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length);
 		if (ret)
 			return ret;
 		sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
-		ieee80211_sta_req_auth(dev, &sdata->u.sta);
+		ieee80211_sta_req_auth(sdata, &sdata->u.sta);
 		return 0;
 	}
 
@@ -333,12 +330,11 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
 	return 0;
 }
 
-int ieee80211_set_freq(struct net_device *dev, int freqMHz)
+int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 {
 	int ret = -EINVAL;
 	struct ieee80211_channel *chan;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 
 	chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
 
@@ -346,7 +342,7 @@ int ieee80211_set_freq(struct net_device *dev, int freqMHz)
 		if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
 		    chan->flags & IEEE80211_CHAN_NO_IBSS) {
 			printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
-				"%d MHz\n", dev->name, chan->center_freq);
+				"%d MHz\n", sdata->dev->name, chan->center_freq);
 			return ret;
 		}
 		local->oper_channel = chan;
@@ -379,14 +375,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 					IEEE80211_STA_AUTO_CHANNEL_SEL;
 			return 0;
 		} else
-			return ieee80211_set_freq(dev,
+			return ieee80211_set_freq(sdata,
 				ieee80211_channel_to_frequency(freq->m));
 	} else {
 		int i, div = 1000000;
 		for (i = 0; i < freq->e; i++)
 			div /= 10;
 		if (div > 0)
-			return ieee80211_set_freq(dev, freq->m / div);
+			return ieee80211_set_freq(sdata, freq->m / div);
 		else
 			return -EINVAL;
 	}
@@ -432,10 +428,10 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 			sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
 		else
 			sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL;
-		ret = ieee80211_sta_set_ssid(dev, ssid, len);
+		ret = ieee80211_sta_set_ssid(sdata, ssid, len);
 		if (ret)
 			return ret;
-		ieee80211_sta_req_auth(dev, &sdata->u.sta);
+		ieee80211_sta_req_auth(sdata, &sdata->u.sta);
 		return 0;
 	}
 
@@ -460,7 +456,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
 	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		int res = ieee80211_sta_get_ssid(dev, ssid, &len);
+		int res = ieee80211_sta_get_ssid(sdata, ssid, &len);
 		if (res == 0) {
 			data->length = len;
 			data->flags = 1;
@@ -504,10 +500,10 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 			sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL;
 		else
 			sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
-		ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data);
+		ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
 		if (ret)
 			return ret;
-		ieee80211_sta_req_auth(dev, &sdata->u.sta);
+		ieee80211_sta_req_auth(sdata, &sdata->u.sta);
 		return 0;
 	} else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
 		/*
@@ -584,7 +580,7 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 		ssid_len = req->essid_len;
 	}
 
-	return ieee80211_sta_req_scan(dev, ssid, ssid_len);
+	return ieee80211_sta_req_scan(sdata, ssid, ssid_len);
 }
 
 
@@ -594,11 +590,14 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
 {
 	int res;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return -EAGAIN;
 
-	res = ieee80211_sta_scan_results(dev, info, extra, data->length);
+	res = ieee80211_sta_scan_results(local, info, extra, data->length);
 	if (res >= 0) {
 		data->length = res;
 		return 0;
@@ -894,10 +893,10 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
 	switch (mlme->cmd) {
 	case IW_MLME_DEAUTH:
 		/* TODO: mlme->addr.sa_data */
-		return ieee80211_sta_deauthenticate(dev, mlme->reason_code);
+		return ieee80211_sta_deauthenticate(sdata, mlme->reason_code);
 	case IW_MLME_DISASSOC:
 		/* TODO: mlme->addr.sa_data */
-		return ieee80211_sta_disassociate(dev, mlme->reason_code);
+		return ieee80211_sta_disassociate(sdata, mlme->reason_code);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -938,7 +937,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev,
 	}
 
 	return ieee80211_set_encryption(
-		dev, bcaddr,
+		sdata, bcaddr,
 		idx, alg, remove,
 		!sdata->default_key,
 		keybuf, erq->length);
@@ -1184,7 +1183,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
 	} else
 		idx--;
 
-	return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg,
+	return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg,
 					remove,
 					ext->ext_flags &
 					IW_ENCODE_EXT_SET_TX_KEY,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 2f33df0dccc..78021780b88 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -127,7 +127,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 		if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 			return RX_DROP_UNUSABLE;
 
-		mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx,
+		mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
 						(void *) skb->data);
 		return RX_DROP_UNUSABLE;
 	}
-- 
cgit v1.2.3-70-g09d2


From 8e7cdbb6333ef7654e708bd60e50a123688dcd7b Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 3 Aug 2008 14:32:01 +0300
Subject: mac80211: filter probes in ieee80211_rx_mgmt_probe_resp

This patch moves filtering statement from ieee80211_rx_bss_info
which is called for both beacon and probe to ieee80211_rx_mgmt_probe_resp
and save few cycles in beacon parsing.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index fb8e1e742ca..ae5a5cbabae 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2550,9 +2550,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
-	if (!beacon && memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
-		return; /* ignore ProbeResp to foreign address */
-
 	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
@@ -2870,6 +2867,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	struct ieee802_11_elems elems;
 
+	if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
+		return; /* ignore ProbeResp to foreign address */
+
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
 	if (baselen > len)
 		return;
-- 
cgit v1.2.3-70-g09d2


From 48c2fc59aa415ba92be0ad3a7e741c46883e3944 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 6 Aug 2008 14:22:01 +0300
Subject: mac80211: cleanup mlme state namespace

This patch move add STA_MLME to station mlme state defines.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 34 ++++++++++++++--------
 net/mac80211/main.c        |  2 +-
 net/mac80211/mlme.c        | 72 +++++++++++++++++++++++-----------------------
 net/mac80211/wext.c        |  4 +--
 4 files changed, 61 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b5d3f58a784..747814f319a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -292,17 +292,33 @@ struct mesh_config {
 #define IEEE80211_STA_AUTO_BSSID_SEL	BIT(11)
 #define IEEE80211_STA_AUTO_CHANNEL_SEL	BIT(12)
 #define IEEE80211_STA_PRIVACY_INVOKED	BIT(13)
+/* flags for  MLME request*/
+#define IEEE80211_STA_REQ_SCAN 0
+#define IEEE80211_STA_REQ_AUTH 1
+#define IEEE80211_STA_REQ_RUN  2
+
+/* flags used for setting mlme state */
+enum ieee80211_sta_mlme_state {
+	IEEE80211_STA_MLME_DISABLED,
+	IEEE80211_STA_MLME_AUTHENTICATE,
+	IEEE80211_STA_MLME_ASSOCIATE,
+	IEEE80211_STA_MLME_ASSOCIATED,
+	IEEE80211_STA_MLME_IBSS_SEARCH,
+	IEEE80211_STA_MLME_IBSS_JOINED,
+	IEEE80211_STA_MLME_MESH_UP
+};
+
+/* bitfield of allowed auth algs */
+#define IEEE80211_AUTH_ALG_OPEN BIT(0)
+#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
+#define IEEE80211_AUTH_ALG_LEAP BIT(2)
+
 struct ieee80211_if_sta {
 	struct timer_list timer;
 	struct work_struct work;
 	u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	enum {
-		IEEE80211_DISABLED, IEEE80211_AUTHENTICATE,
-		IEEE80211_ASSOCIATE, IEEE80211_ASSOCIATED,
-		IEEE80211_IBSS_SEARCH, IEEE80211_IBSS_JOINED,
-		IEEE80211_MESH_UP
-	} state;
+	enum ieee80211_sta_mlme_state state;
 	size_t ssid_len;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
@@ -352,13 +368,7 @@ struct ieee80211_if_sta {
 	unsigned long last_probe;
 
 	unsigned int flags;
-#define IEEE80211_STA_REQ_SCAN 0
-#define IEEE80211_STA_REQ_AUTH 1
-#define IEEE80211_STA_REQ_RUN  2
 
-#define IEEE80211_AUTH_ALG_OPEN BIT(0)
-#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
-#define IEEE80211_AUTH_ALG_LEAP BIT(2)
 	unsigned int auth_algs; /* bitfield of allowed auth algs */
 	int auth_alg; /* currently used IEEE 802.11 authentication algorithm */
 	int auth_transaction;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 81963948665..398ca66bdfc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -539,7 +539,7 @@ static int ieee80211_stop(struct net_device *dev)
 		/* fall through */
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
-		sdata->u.sta.state = IEEE80211_DISABLED;
+		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
 		memset(sdata->u.sta.bssid, 0, ETH_ALEN);
 		del_timer_sync(&sdata->u.sta.timer);
 		/*
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ae5a5cbabae..f05519d1c2c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -663,11 +663,11 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 		printk(KERN_DEBUG "%s: authentication with AP %s"
 		       " timed out\n",
 		       sdata->dev->name, print_mac(mac, ifsta->bssid));
-		ifsta->state = IEEE80211_DISABLED;
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		return;
 	}
 
-	ifsta->state = IEEE80211_AUTHENTICATE;
+	ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 	printk(KERN_DEBUG "%s: authenticate with AP %s\n",
 	       sdata->dev->name, print_mac(mac, ifsta->bssid));
 
@@ -1004,17 +1004,17 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 		printk(KERN_DEBUG "%s: association with AP %s"
 		       " timed out\n",
 		       sdata->dev->name, print_mac(mac, ifsta->bssid));
-		ifsta->state = IEEE80211_DISABLED;
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		return;
 	}
 
-	ifsta->state = IEEE80211_ASSOCIATE;
+	ifsta->state = IEEE80211_STA_MLME_ASSOCIATE;
 	printk(KERN_DEBUG "%s: associate with AP %s\n",
 	       sdata->dev->name, print_mac(mac, ifsta->bssid));
 	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
 		printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
 		       "mixed-cell disabled - abort association\n", sdata->dev->name);
-		ifsta->state = IEEE80211_DISABLED;
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		return;
 	}
 
@@ -1037,7 +1037,7 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 	 * for better APs. */
 	/* TODO: remove expired BSSes */
 
-	ifsta->state = IEEE80211_ASSOCIATED;
+	ifsta->state = IEEE80211_STA_MLME_ASSOCIATED;
 
 	rcu_read_lock();
 
@@ -1080,7 +1080,7 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 		sta_info_destroy(sta);
 
 	if (disassoc) {
-		ifsta->state = IEEE80211_DISABLED;
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		ieee80211_set_associated(sdata, ifsta, 0);
 	} else {
 		mod_timer(&ifsta->timer, jiffies +
@@ -1838,7 +1838,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	u16 auth_alg, auth_transaction, status_code;
 	DECLARE_MAC_BUF(mac);
 
-	if (ifsta->state != IEEE80211_AUTHENTICATE &&
+	if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return;
 
@@ -1942,10 +1942,10 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 	if (ifsta->flags & IEEE80211_STA_AUTHENTICATED)
 		printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name);
 
-	if (ifsta->state == IEEE80211_AUTHENTICATE ||
-	    ifsta->state == IEEE80211_ASSOCIATE ||
-	    ifsta->state == IEEE80211_ASSOCIATED) {
-		ifsta->state = IEEE80211_AUTHENTICATE;
+	if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE ||
+	    ifsta->state == IEEE80211_STA_MLME_ASSOCIATE ||
+	    ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) {
+		ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
@@ -1974,8 +1974,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	if (ifsta->flags & IEEE80211_STA_ASSOCIATED)
 		printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name);
 
-	if (ifsta->state == IEEE80211_ASSOCIATED) {
-		ifsta->state = IEEE80211_ASSOCIATE;
+	if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) {
+		ifsta->state = IEEE80211_STA_MLME_ASSOCIATE;
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
@@ -2005,7 +2005,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	/* AssocResp and ReassocResp have identical structure, so process both
 	 * of them in this function. */
 
-	if (ifsta->state != IEEE80211_ASSOCIATE)
+	if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE)
 		return;
 
 	if (len < 24 + 6)
@@ -2487,7 +2487,7 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_sta_def_wmm_params(sdata, bss, 1);
 
-	ifsta->state = IEEE80211_IBSS_JOINED;
+	ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED;
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
 
 	memset(&wrqu, 0, sizeof(wrqu));
@@ -2960,7 +2960,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 #endif
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS ||
-	    ifsta->state != IEEE80211_IBSS_JOINED ||
+	    ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED ||
 	    len < 24 + 2 || !ifsta->probe_resp)
 		return;
 
@@ -3279,7 +3279,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_sta *ifsta;
 	ifsta = &sdata->u.sta;
-	ifsta->state = IEEE80211_MESH_UP;
+	ifsta->state = IEEE80211_STA_MLME_MESH_UP;
 	ieee80211_sta_timer((unsigned long)sdata);
 	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
 }
@@ -3327,8 +3327,8 @@ void ieee80211_sta_work(struct work_struct *work)
 		mesh_path_start_discovery(sdata);
 #endif
 
-	if (ifsta->state != IEEE80211_AUTHENTICATE &&
-	    ifsta->state != IEEE80211_ASSOCIATE &&
+	if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
+	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
 		if (ifsta->scan_ssid_len)
 			ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
@@ -3345,25 +3345,25 @@ void ieee80211_sta_work(struct work_struct *work)
 		return;
 
 	switch (ifsta->state) {
-	case IEEE80211_DISABLED:
+	case IEEE80211_STA_MLME_DISABLED:
 		break;
-	case IEEE80211_AUTHENTICATE:
+	case IEEE80211_STA_MLME_AUTHENTICATE:
 		ieee80211_authenticate(sdata, ifsta);
 		break;
-	case IEEE80211_ASSOCIATE:
+	case IEEE80211_STA_MLME_ASSOCIATE:
 		ieee80211_associate(sdata, ifsta);
 		break;
-	case IEEE80211_ASSOCIATED:
+	case IEEE80211_STA_MLME_ASSOCIATED:
 		ieee80211_associated(sdata, ifsta);
 		break;
-	case IEEE80211_IBSS_SEARCH:
+	case IEEE80211_STA_MLME_IBSS_SEARCH:
 		ieee80211_sta_find_ibss(sdata, ifsta);
 		break;
-	case IEEE80211_IBSS_JOINED:
+	case IEEE80211_STA_MLME_IBSS_JOINED:
 		ieee80211_sta_merge_ibss(sdata, ifsta);
 		break;
 #ifdef CONFIG_MAC80211_MESH
-	case IEEE80211_MESH_UP:
+	case IEEE80211_STA_MLME_MESH_UP:
 		ieee80211_mesh_housekeeping(sdata, ifsta);
 		break;
 #endif
@@ -3506,20 +3506,20 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 		ieee80211_sta_set_bssid(sdata, selected->bssid);
 		ieee80211_sta_def_wmm_params(sdata, selected, 0);
 		ieee80211_rx_bss_put(local, selected);
-		ifsta->state = IEEE80211_AUTHENTICATE;
+		ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 		ieee80211_sta_reset_auth(sdata, ifsta);
 		return 0;
 	} else {
-		if (ifsta->state != IEEE80211_AUTHENTICATE) {
+		if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE) {
 			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
 				ieee80211_sta_start_scan(sdata, NULL, 0);
 			else
 				ieee80211_sta_start_scan(sdata, ifsta->ssid,
 							 ifsta->ssid_len);
-			ifsta->state = IEEE80211_AUTHENTICATE;
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
 		} else
-			ifsta->state = IEEE80211_DISABLED;
+			ifsta->state = IEEE80211_STA_MLME_DISABLED;
 	}
 	return -1;
 }
@@ -3657,7 +3657,7 @@ dont_join:
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
 	/* Selected IBSS not found in current scan results - try to scan */
-	if (ifsta->state == IEEE80211_IBSS_JOINED &&
+	if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED &&
 	    !ieee80211_sta_active_ibss(sdata)) {
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_IBSS_MERGE_INTERVAL);
@@ -3667,7 +3667,7 @@ dont_join:
 		       "join\n", sdata->dev->name);
 		return ieee80211_sta_req_scan(sdata, ifsta->ssid,
 					      ifsta->ssid_len);
-	} else if (ifsta->state != IEEE80211_IBSS_JOINED) {
+	} else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) {
 		int interval = IEEE80211_SCAN_INTERVAL;
 
 		if (time_after(jiffies, ifsta->ibss_join_req +
@@ -3687,7 +3687,7 @@ dont_join:
 			interval = IEEE80211_SCAN_INTERVAL_SLOW;
 		}
 
-		ifsta->state = IEEE80211_IBSS_SEARCH;
+		ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
 		mod_timer(&ifsta->timer, jiffies + interval);
 		return 0;
 	}
@@ -3734,7 +3734,7 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
 	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
 		ifsta->ibss_join_req = jiffies;
-		ifsta->state = IEEE80211_IBSS_SEARCH;
+		ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
 		return ieee80211_sta_find_ibss(sdata, ifsta);
 	}
 
@@ -3881,7 +3881,7 @@ done:
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
-		    (!(ifsta->state == IEEE80211_IBSS_JOINED) &&
+		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
 		    !ieee80211_sta_active_ibss(sdata)))
 			ieee80211_sta_find_ibss(sdata, ifsta);
 	}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 735daa355c3..beae664ab48 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -535,8 +535,8 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
 	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		if (sdata->u.sta.state == IEEE80211_ASSOCIATED ||
-		    sdata->u.sta.state == IEEE80211_IBSS_JOINED) {
+		if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED ||
+		    sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) {
 			ap_addr->sa_family = ARPHRD_ETHER;
 			memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
 			return 0;
-- 
cgit v1.2.3-70-g09d2


From 6042a3e3ff7943e4ff5cbcb8c223ea87337501ea Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Fri, 8 Aug 2008 01:50:46 +0300
Subject: mac80211: change number of pre-assoc scans

This patch fixes noticed problem in noisy environments of 50+ APs
that scan fails to find the requested AP on first try, which
leads to connection refusal. second scan has empirically proven to fix
this problem in almost all cases.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Esti Kummer <ester.kummer@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 4 +++-
 net/mac80211/mlme.c        | 9 +++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 747814f319a..c6de3156930 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -361,7 +361,9 @@ struct ieee80211_if_sta {
 
 	struct sk_buff_head skb_queue;
 
-	int auth_tries, assoc_tries;
+	int assoc_scan_tries; /* number of scans done pre-association */
+	int auth_tries; /* retries for auth req */
+	int assoc_tries; /* retries for assoc req */
 
 	unsigned long request;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f05519d1c2c..e821d1ac5b0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -34,6 +34,7 @@
 #include "led.h"
 #include "mesh.h"
 
+#define IEEE80211_ASSOC_SCANS_MAX_TRIES 2
 #define IEEE80211_AUTH_TIMEOUT (HZ / 5)
 #define IEEE80211_AUTH_MAX_TRIES 3
 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
@@ -596,6 +597,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 {
 	if (deauth)
 		ifsta->auth_tries = 0;
+	ifsta->assoc_scan_tries = 0;
 	ifsta->assoc_tries = 0;
 	ieee80211_set_associated(sdata, ifsta, 0);
 }
@@ -3405,7 +3407,9 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 		ifsta->auth_alg = WLAN_AUTH_OPEN;
 	ifsta->auth_transaction = -1;
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
-	ifsta->auth_tries = ifsta->assoc_tries = 0;
+	ifsta->assoc_scan_tries = 0;
+	ifsta->auth_tries = 0;
+	ifsta->assoc_tries = 0;
 	netif_carrier_off(sdata->dev);
 }
 
@@ -3510,7 +3514,8 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 		ieee80211_sta_reset_auth(sdata, ifsta);
 		return 0;
 	} else {
-		if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE) {
+		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
+			ifsta->assoc_scan_tries++;
 			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
 				ieee80211_sta_start_scan(sdata, NULL, 0);
 			else
-- 
cgit v1.2.3-70-g09d2


From 9859b81eaeb8d48563b5fbd90215c0ae606455a3 Mon Sep 17 00:00:00 2001
From: Ron Rindjunsky <ron.rindjunsky@intel.com>
Date: Sat, 9 Aug 2008 03:02:19 +0300
Subject: mac80211: add direct probe before association

This patch adds a direct probe request as first step in the association
flow if data we have is not up to date. Motivation of this step is to make
sure that the bss information we have is correct, since last scan could
have been done a while ago, and beacons do not fully answer this need as
there are potential differences between them and probe responses (e.g.
WMM parameter element)

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  9 ++++--
 net/mac80211/mlme.c        | 79 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 73 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c6de3156930..8361054fb7c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -100,7 +100,7 @@ struct ieee80211_sta_bss {
 	u64 timestamp;
 	int beacon_int;
 
-	bool probe_resp;
+	unsigned long last_probe_resp;
 	unsigned long last_update;
 
 	/* during assocation, we save an ERP value from a probe response so
@@ -294,12 +294,14 @@ struct mesh_config {
 #define IEEE80211_STA_PRIVACY_INVOKED	BIT(13)
 /* flags for  MLME request*/
 #define IEEE80211_STA_REQ_SCAN 0
-#define IEEE80211_STA_REQ_AUTH 1
-#define IEEE80211_STA_REQ_RUN  2
+#define IEEE80211_STA_REQ_DIRECT_PROBE 1
+#define IEEE80211_STA_REQ_AUTH 2
+#define IEEE80211_STA_REQ_RUN  3
 
 /* flags used for setting mlme state */
 enum ieee80211_sta_mlme_state {
 	IEEE80211_STA_MLME_DISABLED,
+	IEEE80211_STA_MLME_DIRECT_PROBE,
 	IEEE80211_STA_MLME_AUTHENTICATE,
 	IEEE80211_STA_MLME_ASSOCIATE,
 	IEEE80211_STA_MLME_ASSOCIATED,
@@ -362,6 +364,7 @@ struct ieee80211_if_sta {
 	struct sk_buff_head skb_queue;
 
 	int assoc_scan_tries; /* number of scans done pre-association */
+	int direct_probe_tries; /* retries for direct probes */
 	int auth_tries; /* retries for auth req */
 	int assoc_tries; /* retries for assoc req */
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e821d1ac5b0..84999791a33 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -595,8 +595,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta, int deauth)
 {
-	if (deauth)
+	if (deauth) {
+		ifsta->direct_probe_tries = 0;
 		ifsta->auth_tries = 0;
+	}
 	ifsta->assoc_scan_tries = 0;
 	ifsta->assoc_tries = 0;
 	ieee80211_set_associated(sdata, ifsta, 0);
@@ -654,6 +656,36 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, encrypt);
 }
 
+static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_if_sta *ifsta)
+{
+	DECLARE_MAC_BUF(mac);
+
+	ifsta->direct_probe_tries++;
+	if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n",
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
+		return;
+	}
+
+	printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n",
+			sdata->dev->name, print_mac(mac, ifsta->bssid),
+			ifsta->direct_probe_tries);
+
+	ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
+
+	set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request);
+
+	/* Direct probe is sent to broadcast address as some APs
+	 * will not answer to direct packet in unassociated state.
+	 */
+	ieee80211_send_probe_req(sdata, NULL,
+				 ifsta->ssid, ifsta->ssid_len);
+
+	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
+}
+
 
 static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta)
@@ -1947,7 +1979,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 	if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE ||
 	    ifsta->state == IEEE80211_STA_MLME_ASSOCIATE ||
 	    ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) {
-		ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+		ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
@@ -2540,8 +2572,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len,
 				  struct ieee80211_rx_status *rx_status,
-				  struct ieee802_11_elems *elems,
-				  int beacon)
+				  struct ieee802_11_elems *elems)
 {
 	struct ieee80211_local *local = sdata->local;
 	int freq, clen;
@@ -2549,6 +2580,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 	u64 beacon_timestamp, rx_timestamp;
 	struct ieee80211_channel *channel;
+	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
@@ -2705,15 +2737,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	bss->signal = rx_status->signal;
 	bss->noise = rx_status->noise;
 	bss->qual = rx_status->qual;
-	if (!beacon && !bss->probe_resp)
-		bss->probe_resp = true;
-
+	if (!beacon)
+		bss->last_probe_resp = jiffies;
 	/*
 	 * In STA mode, the remaining parameters should not be overridden
 	 * by beacons because they're not necessarily accurate there.
 	 */
 	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    bss->probe_resp && beacon) {
+	    bss->last_probe_resp && beacon) {
 		ieee80211_rx_bss_put(local, bss);
 		return;
 	}
@@ -2868,6 +2899,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 {
 	size_t baselen;
 	struct ieee802_11_elems elems;
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 
 	if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
 		return; /* ignore ProbeResp to foreign address */
@@ -2879,7 +2911,15 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
 				&elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 0);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+
+	/* direct probe may be part of the association flow */
+	if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE,
+							&ifsta->request)) {
+		printk(KERN_DEBUG "%s direct probe responded\n",
+		       sdata->dev->name);
+		ieee80211_authenticate(sdata, ifsta);
+	}
 }
 
 
@@ -2902,7 +2942,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 1);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
@@ -3329,7 +3369,8 @@ void ieee80211_sta_work(struct work_struct *work)
 		mesh_path_start_discovery(sdata);
 #endif
 
-	if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
+	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
+	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
 		if (ifsta->scan_ssid_len)
@@ -3349,6 +3390,9 @@ void ieee80211_sta_work(struct work_struct *work)
 	switch (ifsta->state) {
 	case IEEE80211_STA_MLME_DISABLED:
 		break;
+	case IEEE80211_STA_MLME_DIRECT_PROBE:
+		ieee80211_direct_probe(sdata, ifsta);
+		break;
 	case IEEE80211_STA_MLME_AUTHENTICATE:
 		ieee80211_authenticate(sdata, ifsta);
 		break;
@@ -3408,6 +3452,7 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 	ifsta->auth_transaction = -1;
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 	ifsta->assoc_scan_tries = 0;
+	ifsta->direct_probe_tries = 0;
 	ifsta->auth_tries = 0;
 	ifsta->assoc_tries = 0;
 	netif_carrier_off(sdata->dev);
@@ -3509,8 +3554,18 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 					       selected->ssid_len);
 		ieee80211_sta_set_bssid(sdata, selected->bssid);
 		ieee80211_sta_def_wmm_params(sdata, selected, 0);
+
+		/* Send out direct probe if no probe resp was received or
+		 * the one we have is outdated
+		 */
+		if (!selected->last_probe_resp ||
+		    time_after(jiffies, selected->last_probe_resp
+					+ IEEE80211_SCAN_RESULT_EXPIRE))
+			ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
+		else
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+
 		ieee80211_rx_bss_put(local, selected);
-		ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 		ieee80211_sta_reset_auth(sdata, ifsta);
 		return 0;
 	} else {
-- 
cgit v1.2.3-70-g09d2


From fdc0bde90a689b9145f2b6f271c03f4c99d09667 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 23 Aug 2008 04:43:33 -0700
Subject: icmp: icmp_sk() should not use smp_processor_id() in preemptible code

Pass namespace into icmp_xmit_lock, obtain socket inside and return
it as a result for caller.

Thanks Alexey Dobryan for this report:

Steps to reproduce:

	CONFIG_PREEMPT=y
	CONFIG_DEBUG_PREEMPT=y
	tracepath <something>

BUG: using smp_processor_id() in preemptible [00000000] code: tracepath/3205
caller is icmp_sk+0x15/0x30
Pid: 3205, comm: tracepath Not tainted 2.6.27-rc4 #1

Call Trace:
 [<ffffffff8031af14>] debug_smp_processor_id+0xe4/0xf0
 [<ffffffff80409405>] icmp_sk+0x15/0x30
 [<ffffffff8040a17b>] icmp_send+0x4b/0x3f0
 [<ffffffff8025a415>] ? trace_hardirqs_on_caller+0xd5/0x160
 [<ffffffff8025a4ad>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff8023a475>] ? local_bh_enable_ip+0x95/0x110
 [<ffffffff804285b9>] ? _spin_unlock_bh+0x39/0x40
 [<ffffffff8025a26c>] ? mark_held_locks+0x4c/0x90
 [<ffffffff8025a4ad>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff8025a415>] ? trace_hardirqs_on_caller+0xd5/0x160
 [<ffffffff803e91b4>] ip_fragment+0x8d4/0x900
 [<ffffffff803e7030>] ? ip_finish_output2+0x0/0x290
 [<ffffffff803e91e0>] ? ip_finish_output+0x0/0x60
 [<ffffffff803e6650>] ? dst_output+0x0/0x10
 [<ffffffff803e922c>] ip_finish_output+0x4c/0x60
 [<ffffffff803e92e3>] ip_output+0xa3/0xf0
 [<ffffffff803e68d0>] ip_local_out+0x20/0x30
 [<ffffffff803e753f>] ip_push_pending_frames+0x27f/0x400
 [<ffffffff80406313>] udp_push_pending_frames+0x233/0x3d0
 [<ffffffff804067d1>] udp_sendmsg+0x321/0x6f0
 [<ffffffff8040d155>] inet_sendmsg+0x45/0x80
 [<ffffffff803b967f>] sock_sendmsg+0xdf/0x110
 [<ffffffff8024a100>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff80257ce5>] ? validate_chain+0x415/0x1010
 [<ffffffff8027dc10>] ? __do_fault+0x140/0x450
 [<ffffffff802597d0>] ? __lock_acquire+0x260/0x590
 [<ffffffff803b9e55>] ? sockfd_lookup_light+0x45/0x80
 [<ffffffff803ba50a>] sys_sendto+0xea/0x120
 [<ffffffff80428e42>] ? _spin_unlock_irqrestore+0x42/0x80
 [<ffffffff803134bc>] ? __up_read+0x4c/0xb0
 [<ffffffff8024e0c6>] ? up_read+0x26/0x30
 [<ffffffff8020b8bb>] system_call_fastpath+0x16/0x1b

icmp6_sk() is similar.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 22 ++++++++++++++--------
 net/ipv6/icmp.c | 23 ++++++++++++-----------
 2 files changed, 26 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 860558633b2..55c355e6323 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -204,18 +204,22 @@ static struct sock *icmp_sk(struct net *net)
 	return net->ipv4.icmp_sk[smp_processor_id()];
 }
 
-static inline int icmp_xmit_lock(struct sock *sk)
+static inline struct sock *icmp_xmit_lock(struct net *net)
 {
+	struct sock *sk;
+
 	local_bh_disable();
 
+	sk = icmp_sk(net);
+
 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 		/* This can happen if the output path signals a
 		 * dst_link_failure() for an outgoing ICMP packet.
 		 */
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
-	return 0;
+	return sk;
 }
 
 static inline void icmp_xmit_unlock(struct sock *sk)
@@ -354,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	struct ipcm_cookie ipc;
 	struct rtable *rt = skb->rtable;
 	struct net *net = dev_net(rt->u.dst.dev);
-	struct sock *sk = icmp_sk(net);
-	struct inet_sock *inet = inet_sk(sk);
+	struct sock *sk;
+	struct inet_sock *inet;
 	__be32 daddr;
 
 	if (ip_options_echo(&icmp_param->replyopts, skb))
 		return;
 
-	if (icmp_xmit_lock(sk))
+	sk = icmp_xmit_lock(net);
+	if (sk == NULL)
 		return;
+	inet = inet_sk(sk);
 
 	icmp_param->data.icmph.checksum = 0;
 
@@ -419,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	if (!rt)
 		goto out;
 	net = dev_net(rt->u.dst.dev);
-	sk = icmp_sk(net);
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -483,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		}
 	}
 
-	if (icmp_xmit_lock(sk))
+	sk = icmp_xmit_lock(net);
+	if (sk == NULL)
 		return;
 
 	/*
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index abedf95fdf2..b3157a0cc15 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -91,19 +91,22 @@ static struct inet6_protocol icmpv6_protocol = {
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static __inline__ int icmpv6_xmit_lock(struct sock *sk)
+static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 {
+	struct sock *sk;
+
 	local_bh_disable();
 
+	sk = icmpv6_sk(net);
 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 		/* This can happen if the output path (f.e. SIT or
 		 * ip6ip6 tunnel) signals dst_link_failure() for an
 		 * outgoing ICMP6 packet.
 		 */
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
-	return 0;
+	return sk;
 }
 
 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
@@ -392,11 +395,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	fl.fl_icmp_code = code;
 	security_skb_classify_flow(skb, &fl);
 
-	sk = icmpv6_sk(net);
-	np = inet6_sk(sk);
-
-	if (icmpv6_xmit_lock(sk))
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
+	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl))
 		goto out;
@@ -539,11 +541,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
 	security_skb_classify_flow(skb, &fl);
 
-	sk = icmpv6_sk(net);
-	np = inet6_sk(sk);
-
-	if (icmpv6_xmit_lock(sk))
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
+	np = inet6_sk(sk);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.3-70-g09d2


From cbe2d128a01315fb4bd55b96cf8b963f5df28ea2 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 23 Aug 2008 05:10:12 -0700
Subject: tcp: Add tcp_validate_incoming & put duplicated code there
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Large block of code duplication removed.

Sadly, the return value thing is a bit tricky here but it
seems the most sensible way to return positive from validator
on success rather than negative.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 147 ++++++++++++++++++++++++---------------------------
 1 file changed, 69 insertions(+), 78 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2a96b..e1b15d4e617 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4691,6 +4691,67 @@ out:
 }
 #endif /* CONFIG_NET_DMA */
 
+/* Does PAWS and seqno based validation of an incoming segment, flags will
+ * play significant role here.
+ */
+static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
+			      struct tcphdr *th, int syn_inerr)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* RFC1323: H1. Apply PAWS check first. */
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+	    tcp_paws_discard(sk, skb)) {
+		if (!th->rst) {
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+			tcp_send_dupack(sk, skb);
+			goto discard;
+		}
+		/* Reset is accepted even if it did not pass PAWS. */
+	}
+
+	/* Step 1: check sequence number */
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		/* RFC793, page 37: "In all states except SYN-SENT, all reset
+		 * (RST) segments are validated by checking their SEQ-fields."
+		 * And page 69: "If an incoming segment is not acceptable,
+		 * an acknowledgment should be sent in reply (unless the RST
+		 * bit is set, if so drop the segment and return)".
+		 */
+		if (!th->rst)
+			tcp_send_dupack(sk, skb);
+		goto discard;
+	}
+
+	/* Step 2: check RST bit */
+	if (th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	/* ts_recent update must be made after we are sure that the packet
+	 * is in window.
+	 */
+	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+	/* step 3: check security and precedence [ignored] */
+
+	/* step 4: Check for a SYN in window. */
+	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		if (syn_inerr)
+			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
+		tcp_reset(sk);
+		return -1;
+	}
+
+	return 1;
+
+discard:
+	__kfree_skb(skb);
+	return 0;
+}
+
 /*
  *	TCP receive function for the ESTABLISHED state.
  *
@@ -4718,6 +4779,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int res;
 
 	/*
 	 *	Header prediction.
@@ -4898,52 +4960,13 @@ slow_path:
 	if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
 		goto csum_error;
 
-	/*
-	 * RFC1323: H1. Apply PAWS check first.
-	 */
-	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-	    tcp_paws_discard(sk, skb)) {
-		if (!th->rst) {
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-			tcp_send_dupack(sk, skb);
-			goto discard;
-		}
-		/* Resets are accepted even if PAWS failed.
-
-		   ts_recent update must be made after we are sure
-		   that the packet is in window.
-		 */
-	}
-
 	/*
 	 *	Standard slow path.
 	 */
 
-	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-		/* RFC793, page 37: "In all states except SYN-SENT, all reset
-		 * (RST) segments are validated by checking their SEQ-fields."
-		 * And page 69: "If an incoming segment is not acceptable,
-		 * an acknowledgment should be sent in reply (unless the RST bit
-		 * is set, if so drop the segment and return)".
-		 */
-		if (!th->rst)
-			tcp_send_dupack(sk, skb);
-		goto discard;
-	}
-
-	if (th->rst) {
-		tcp_reset(sk);
-		goto discard;
-	}
-
-	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
-	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-		tcp_reset(sk);
-		return 1;
-	}
+	res = tcp_validate_incoming(sk, skb, th, 1);
+	if (res <= 0)
+		return -res;
 
 step5:
 	if (th->ack)
@@ -5225,6 +5248,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int queued = 0;
+	int res;
 
 	tp->rx_opt.saw_tstamp = 0;
 
@@ -5277,42 +5301,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		return 0;
 	}
 
-	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-	    tcp_paws_discard(sk, skb)) {
-		if (!th->rst) {
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-			tcp_send_dupack(sk, skb);
-			goto discard;
-		}
-		/* Reset is accepted even if it did not pass PAWS. */
-	}
-
-	/* step 1: check sequence number */
-	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-		if (!th->rst)
-			tcp_send_dupack(sk, skb);
-		goto discard;
-	}
-
-	/* step 2: check RST bit */
-	if (th->rst) {
-		tcp_reset(sk);
-		goto discard;
-	}
-
-	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
-	/* step 3: check security and precedence [ignored] */
-
-	/*	step 4:
-	 *
-	 *	Check for a SYN in window.
-	 */
-	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-		tcp_reset(sk);
-		return 1;
-	}
+	res = tcp_validate_incoming(sk, skb, th, 0);
+	if (res <= 0)
+		return -res;
 
 	/* step 5: check the ACK field */
 	if (th->ack) {
-- 
cgit v1.2.3-70-g09d2


From 2cf46637b501794d7fe9e365f0a3046f5d1f5dfb Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 23 Aug 2008 05:11:41 -0700
Subject: tcp: Add tcp_collapse_one to eliminate duplicated code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e1b15d4e617..580f9547ddf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4161,6 +4161,18 @@ add_sack:
 	}
 }
 
+static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
+					struct sk_buff_head *list)
+{
+	struct sk_buff *next = skb->next;
+
+	__skb_unlink(skb, list);
+	__kfree_skb(skb);
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
+
+	return next;
+}
+
 /* Collapse contiguous sequence of skbs head..tail with
  * sequence numbers start..end.
  * Segments with FIN/SYN are not collapsed (only because this
@@ -4178,11 +4190,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 	for (skb = head; skb != tail;) {
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-			struct sk_buff *next = skb->next;
-			__skb_unlink(skb, list);
-			__kfree_skb(skb);
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-			skb = next;
+			skb = tcp_collapse_one(sk, skb, list);
 			continue;
 		}
 
@@ -4246,11 +4254,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 				start += size;
 			}
 			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-				struct sk_buff *next = skb->next;
-				__skb_unlink(skb, list);
-				__kfree_skb(skb);
-				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-				skb = next;
+				skb = tcp_collapse_one(sk, skb, list);
 				if (skb == tail ||
 				    tcp_hdr(skb)->syn ||
 				    tcp_hdr(skb)->fin)
-- 
cgit v1.2.3-70-g09d2


From a4356b2920fd4861dd6c75f558749fa5c38a00e8 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 23 Aug 2008 05:12:29 -0700
Subject: tcp: Add tcp_parse_aligned_timestamp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some duplicated code lying around. Located with my suffix tree
tool.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 580f9547ddf..f79a5160729 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3442,6 +3442,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 	}
 }
 
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+{
+	__be32 *ptr = (__be32 *)(th + 1);
+
+	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+		tp->rx_opt.saw_tstamp = 1;
+		++ptr;
+		tp->rx_opt.rcv_tsval = ntohl(*ptr);
+		++ptr;
+		tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+		return 1;
+	}
+	return 0;
+}
+
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
@@ -3453,16 +3469,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 		return 0;
 	} else if (tp->rx_opt.tstamp_ok &&
 		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
-		__be32 *ptr = (__be32 *)(th + 1);
-		if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-				  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
-			tp->rx_opt.saw_tstamp = 1;
-			++ptr;
-			tp->rx_opt.rcv_tsval = ntohl(*ptr);
-			++ptr;
-			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
-		}
 	}
 	tcp_parse_options(skb, &tp->rx_opt, 1);
 	return 1;
@@ -4822,19 +4830,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 		/* Check timestamp */
 		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-			__be32 *ptr = (__be32 *)(th + 1);
-
 			/* No? Slow path! */
-			if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-					  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+			if (!tcp_parse_aligned_timestamp(tp, th))
 				goto slow_path;
 
-			tp->rx_opt.saw_tstamp = 1;
-			++ptr;
-			tp->rx_opt.rcv_tsval = ntohl(*ptr);
-			++ptr;
-			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
-
 			/* If PAWS failed, check it more carefully in slow path */
 			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
 				goto slow_path;
-- 
cgit v1.2.3-70-g09d2


From f410a1fba7afa79d2992620e874a343fdba28332 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 23 Aug 2008 05:16:46 -0700
Subject: ipv6: protocol for address routes

This fixes a problem spotted with zebra, but not sure if it is
necessary a kernel problem.  With IPV6 when an address is added to an
interface, Zebra creates a duplicate RIB entry, one as a connected
route, and other as a kernel route.

When an address is added to an interface the RTN_NEWADDR message
causes Zebra to create a connected route. In IPV4 when an address is
added to an interface a RTN_NEWROUTE message is set to user space with
the protocol RTPROT_KERNEL. Zebra ignores these messages, because it
already has the connected route.

The problem is that route created in IPV6 has route protocol ==
RTPROT_BOOT.  Was this a design decision or a bug? This fixes it. Same
patch applies to both net-2.6 and stable.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e2d3b7580b7..7b6a584b62d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1688,6 +1688,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_dst_len = plen,
 		.fc_flags = RTF_UP | flags,
 		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_protocol = RTPROT_KERNEL,
 	};
 
 	ipv6_addr_copy(&cfg.fc_dst, pfx);
-- 
cgit v1.2.3-70-g09d2


From 30c2235cbc477d4629983d440cdc4f496fec9246 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 25 Aug 2008 15:16:19 -0700
Subject: sctp: add verification checks to SCTP_AUTH_KEY option

The structure used for SCTP_AUTH_KEY option contains a
length that needs to be verfied to prevent buffer overflow
conditions.  Spoted by Eugene Teo <eteo@redhat.com>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c   | 4 ++++
 net/sctp/socket.c | 5 +++++
 2 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 675a5c3e68a..1fcb4cf2f4c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -80,6 +80,10 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp)
 {
 	struct sctp_auth_bytes *key;
 
+	/* Verify that we are not going to overflow INT_MAX */
+	if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes))
+		return NULL;
+
 	/* Allocate the shared key */
 	key = kmalloc(sizeof(struct sctp_auth_bytes) + key_len, gfp);
 	if (!key)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bb5c9ef1304..afa952e726d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3144,6 +3144,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 		goto out;
 	}
 
+	if (authkey->sca_keylength > optlen) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	asoc = sctp_id2assoc(sk, authkey->sca_assoc_id);
 	if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) {
 		ret = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 2f4520d35d89ca6c5cd129c38e3b11f0283b7d1b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Aug 2008 15:17:44 -0700
Subject: ipv4: sysctl fixes

net.ipv4.neigh should be a part of skeleton to avoid ordering problems

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cca921ea855..e91bafeb32f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3116,14 +3116,23 @@ static ctl_table ipv4_route_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static __net_initdata struct ctl_path ipv4_route_path[] = {
+static struct ctl_table empty[1];
+
+static struct ctl_table ipv4_skeleton[] =
+{
+	{ .procname = "route", .ctl_name = NET_IPV4_ROUTE,
+	  .child = ipv4_route_table},
+	{ .procname = "neigh", .ctl_name = NET_IPV4_NEIGH,
+	  .child = empty},
+	{ }
+};
+
+static __net_initdata struct ctl_path ipv4_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
-	{ .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
 	{ },
 };
 
-
 static struct ctl_table ipv4_route_flush_table[] = {
 	{
 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
@@ -3136,6 +3145,13 @@ static struct ctl_table ipv4_route_flush_table[] = {
 	{ .ctl_name = 0 },
 };
 
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+	{ },
+};
+
 static __net_init int sysctl_route_net_init(struct net *net)
 {
 	struct ctl_table *tbl;
@@ -3287,7 +3303,7 @@ int __init ip_rt_init(void)
  */
 void __init ip_static_sysctl_init(void)
 {
-	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
+	register_sysctl_paths(ipv4_path, ipv4_skeleton);
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From ce3113ec57abcd41cc5a2fed02474aee3f63d12c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Aug 2008 15:18:15 -0700
Subject: ipv6: sysctl fixes

Braino: net.ipv6 in ipv6 skeleton has no business in rotable
class

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e6dfaeac6be..587f8f60c48 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -156,7 +156,7 @@ static struct ctl_table_header *ip6_base;
 int ipv6_static_sysctl_register(void)
 {
 	static struct ctl_table empty[1];
-	ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty);
+	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
 	if (ip6_base == NULL)
 		return -ENOMEM;
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 988b02f1bf5b608ef91a9d98c7170d037d0f12e3 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Mon, 18 Aug 2008 22:56:53 +0300
Subject: net: rfkill: add missing line break

Trivial patch adding a missing line break on
rfkill_claim_show().

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.co>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 35a9994e233..74aecc098ba 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -377,7 +377,7 @@ static ssize_t rfkill_claim_show(struct device *dev,
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
-	return sprintf(buf, "%d", rfkill->user_claim);
+	return sprintf(buf, "%d\n", rfkill->user_claim);
 }
 
 static ssize_t rfkill_claim_store(struct device *dev,
-- 
cgit v1.2.3-70-g09d2


From 087d833e5a9f67ba933cb32eaf5a2279c1a5b47c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 19 Aug 2008 10:54:32 +0300
Subject: mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM

The previous code was using IWEVCUSTOM to report IEs from AssocReq and
AssocResp frames into user space. This can easily hit the 256 byte
limit (IW_CUSTOM_MAX) with APs that include number of vendor IEs in
AssocResp. This results in the event message not being sent and dmesg
showing "wlan0 (WE) : Wireless Event too big (366)" type of errors.

Convert mac80211 to use IWEVASSOCREQIE/IWEVASSOCRESPIE to avoid the
issue of being unable to send association IEs as wireless events. These
newer event types use binary encoding and larger maximum size
(IW_GENERIC_IE_MAX = 1024), so the likelyhood of not being able to send
the IEs is much smaller than with IWEVCUSTOM. As an extra benefit, the
code is also quite a bit simpler since there is no need to allocate an
extra buffer for hex encoding.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 48 +++++++++---------------------------------------
 1 file changed, 9 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e97fb9fb34..09a56e24b79 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -478,51 +478,21 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 static void ieee80211_sta_send_associnfo(struct net_device *dev,
 					 struct ieee80211_if_sta *ifsta)
 {
-	char *buf;
-	size_t len;
-	int i;
 	union iwreq_data wrqu;
 
-	if (!ifsta->assocreq_ies && !ifsta->assocresp_ies)
-		return;
-
-	buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len +
-				ifsta->assocresp_ies_len), GFP_KERNEL);
-	if (!buf)
-		return;
-
-	len = sprintf(buf, "ASSOCINFO(");
 	if (ifsta->assocreq_ies) {
-		len += sprintf(buf + len, "ReqIEs=");
-		for (i = 0; i < ifsta->assocreq_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifsta->assocreq_ies[i]);
-		}
-	}
-	if (ifsta->assocresp_ies) {
-		if (ifsta->assocreq_ies)
-			len += sprintf(buf + len, " ");
-		len += sprintf(buf + len, "RespIEs=");
-		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifsta->assocresp_ies[i]);
-		}
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = ifsta->assocreq_ies_len;
+		wireless_send_event(dev, IWEVASSOCREQIE, &wrqu,
+				    ifsta->assocreq_ies);
 	}
-	len += sprintf(buf + len, ")");
 
-	if (len > IW_CUSTOM_MAX) {
-		len = sprintf(buf, "ASSOCRESPIE=");
-		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifsta->assocresp_ies[i]);
-		}
+	if (ifsta->assocresp_ies) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = ifsta->assocresp_ies_len;
+		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu,
+				    ifsta->assocresp_ies);
 	}
-
-	memset(&wrqu, 0, sizeof(wrqu));
-	wrqu.data.length = len;
-	wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
-
-	kfree(buf);
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 667d8af9af775a5fea203890978037ea750816cc Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 23 Aug 2008 18:27:38 +0200
Subject: net/mac80211/mesh.c: correct the argument to __mesh_table_free

In the function mesh_table_grow, it is the new table not the argument table
that should be freed if the function fails (cf commit
bd9b448f4c0a514559bdae4ca18ca3e8cd999c6d)

The semantic match that detects this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
expression E,f;
position p1,p2,p3;
identifier l;
statement S;
@@

x = mesh_table_alloc@p1(...)
...
if (x == NULL) S
... when != E = x
    when != mesh_table_free(x)
goto@p2 l;
... when != E = x
    when != f(...,x,...)
    when any
(
return \(0\|x\);
|
return@p3 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
p3 << r.p3;
@@

print "%s: call on line %s not freed or saved before return on line %s via line %s" % (p1[0].file,p1[0].line,p3[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index b5933b27149..35f2f95f2fa 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -383,7 +383,7 @@ errcopy:
 		hlist_for_each_safe(p, q, &newtbl->hash_buckets[i])
 			tbl->free_node(p, 0);
 	}
-	__mesh_table_free(tbl);
+	__mesh_table_free(newtbl);
 endgrow:
 	return NULL;
 }
-- 
cgit v1.2.3-70-g09d2


From 93015f0f34e81a47c4126329746ce5f364bafd11 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 25 Aug 2008 11:57:06 +0300
Subject: mac80211: Fix debugfs file add/del for netdev

Previous version was using incorrect union structures for non-AP
interfaces when adding and removing max_ratectrl_rateidx and
force_unicast_rateidx entries. Depending on the vif type, this ended
up in corrupting debugfs entries since the dentries inside different
union structures ended up going being on top of eachother.. As the
end result, debugfs files were being left behind with references to
freed data (instant kernel oops on access) and directories were not
removed properly when unloading mac80211 drivers. This patch fixes
those issues by using only a single union structure based on the vif
type.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c | 24 ++++++++++++------------
 net/mac80211/ieee80211_i.h    |  6 ++++++
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 475f89a8aee..8165df578c9 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -248,8 +248,8 @@ IEEE80211_IF_WFILE(min_discovery_timeout,
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+	DEBUGFS_ADD(force_unicast_rateidx, sta);
+	DEBUGFS_ADD(max_ratectrl_rateidx, sta);
 
 	DEBUGFS_ADD(state, sta);
 	DEBUGFS_ADD(bssid, sta);
@@ -283,8 +283,8 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, wds);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+	DEBUGFS_ADD(force_unicast_rateidx, wds);
+	DEBUGFS_ADD(max_ratectrl_rateidx, wds);
 
 	DEBUGFS_ADD(peer, wds);
 }
@@ -292,8 +292,8 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, vlan);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+	DEBUGFS_ADD(force_unicast_rateidx, vlan);
+	DEBUGFS_ADD(max_ratectrl_rateidx, vlan);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -381,8 +381,8 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, sta);
-	DEBUGFS_DEL(force_unicast_rateidx, ap);
-	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+	DEBUGFS_DEL(force_unicast_rateidx, sta);
+	DEBUGFS_DEL(max_ratectrl_rateidx, sta);
 
 	DEBUGFS_DEL(state, sta);
 	DEBUGFS_DEL(bssid, sta);
@@ -416,8 +416,8 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 static void del_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, wds);
-	DEBUGFS_DEL(force_unicast_rateidx, ap);
-	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+	DEBUGFS_DEL(force_unicast_rateidx, wds);
+	DEBUGFS_DEL(max_ratectrl_rateidx, wds);
 
 	DEBUGFS_DEL(peer, wds);
 }
@@ -425,8 +425,8 @@ static void del_wds_files(struct ieee80211_sub_if_data *sdata)
 static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_DEL(drop_unencrypted, vlan);
-	DEBUGFS_DEL(force_unicast_rateidx, ap);
-	DEBUGFS_DEL(max_ratectrl_rateidx, ap);
+	DEBUGFS_DEL(force_unicast_rateidx, vlan);
+	DEBUGFS_DEL(max_ratectrl_rateidx, vlan);
 }
 
 static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ec59345af65..586a9b49b0f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -470,6 +470,8 @@ struct ieee80211_sub_if_data {
 			struct dentry *auth_transaction;
 			struct dentry *flags;
 			struct dentry *num_beacons_sta;
+			struct dentry *force_unicast_rateidx;
+			struct dentry *max_ratectrl_rateidx;
 		} sta;
 		struct {
 			struct dentry *drop_unencrypted;
@@ -483,9 +485,13 @@ struct ieee80211_sub_if_data {
 		struct {
 			struct dentry *drop_unencrypted;
 			struct dentry *peer;
+			struct dentry *force_unicast_rateidx;
+			struct dentry *max_ratectrl_rateidx;
 		} wds;
 		struct {
 			struct dentry *drop_unencrypted;
+			struct dentry *force_unicast_rateidx;
+			struct dentry *max_ratectrl_rateidx;
 		} vlan;
 		struct {
 			struct dentry *mode;
-- 
cgit v1.2.3-70-g09d2


From 8ab65b03b7893da4a49009e7e356e36e27b0c407 Mon Sep 17 00:00:00 2001
From: Jan-Espen Pettersen <sigsegv@radiotube.org>
Date: Mon, 25 Aug 2008 20:29:22 +0200
Subject: mac80211: don't send empty extended rates IE

The association request includes a list of supported data rates.

802.11b: 4 supported rates.
802.11g: 12 (8 + 4) supported rates.
802.11a: 8 supported rates.

The rates tag of the assoc request has room for only 8 rates. In case of
802.11g an extended rate tag is appended. However in net/wireless/mlme.c
an extended (empty) rate tag is also appended if the number of rates is
exact 8. This empty (length=0) extended rates tag causes some APs to
deny association with code 18 (unsupported rates). These APs include my
ZyXEL G-570U, and according to Tomas Winkler som Cisco APs.

'If count == 8' has been used to check for the need for an extended rates
tag. But count would also be equal to 8 if the for loop exited because of
no more supported rates. Therefore a check for count being less than
rates_len would seem more correct.

Thanks to:
 * Dan Williams for newbie guidance
 * Tomas Winkler for confirming the problem

Signed-off-by: Jan-Espen Pettersen <sigsegv@radiotube.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 09a56e24b79..74777ade6b2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -783,7 +783,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
 		}
 	}
 
-	if (count == 8) {
+	if (rates_len > count) {
 		pos = skb_put(skb, rates_len - count + 2);
 		*pos++ = WLAN_EID_EXT_SUPP_RATES;
 		*pos++ = rates_len - count;
-- 
cgit v1.2.3-70-g09d2


From 576fdeaef631976c3afcb1d78ef196051282611d Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 26 Aug 2008 20:33:34 -0400
Subject: mac80211: quiet chatty IBSS merge message

It seems obvious that this #ifndef should be the opposite polarity...

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 74777ade6b2..9bb68c6a8f4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2838,7 +2838,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 		       jiffies);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 		if (beacon_timestamp > rx_timestamp) {
-#ifndef CONFIG_MAC80211_IBSS_DEBUG
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: beacon TSF higher than "
 			       "local TSF - IBSS merge with BSSID %s\n",
 			       dev->name, print_mac(mac, mgmt->bssid));
-- 
cgit v1.2.3-70-g09d2


From 409a19669e4cd8d1bab7dff31d3b6aa493ff60f0 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Fri, 22 Aug 2008 14:06:12 +0200
Subject: IPVS: Integrate ESP protocol into ip_vs_proto_ah.c

Rename all ah_* functions to ah_esp_* (and adjust comments). Move ESP
protocol definition into ip_vs_proto_ah.c and remove all usage of
ip_vs_proto_esp.c.

Make the compilation of ip_vs_proto_ah.c dependent on a new config
variable, IP_VS_PROTO_AH_ESP, which is selected either by
IP_VS_PROTO_ESP or IP_VS_PROTO_AH. Only compile the selected protocols'
structures within this file.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/Kconfig          |  6 ++++
 net/ipv4/ipvs/Makefile         |  3 +-
 net/ipv4/ipvs/ip_vs_proto_ah.c | 69 ++++++++++++++++++++++++++++--------------
 3 files changed, 54 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 09d0c3f3566..2e48a7e2722 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -71,14 +71,20 @@ config	IP_VS_PROTO_UDP
 	  This option enables support for load balancing UDP transport
 	  protocol. Say Y if unsure.
 
+config	IP_VS_PROTO_AH_ESP
+	bool
+	depends on UNDEFINED
+
 config	IP_VS_PROTO_ESP
 	bool "ESP load balancing support"
+	select IP_VS_PROTO_AH_ESP
 	---help---
 	  This option enables support for load balancing ESP (Encapsulation
 	  Security Payload) transport protocol. Say Y if unsure.
 
 config	IP_VS_PROTO_AH
 	bool "AH load balancing support"
+	select IP_VS_PROTO_AH_ESP
 	---help---
 	  This option enables support for load balancing AH (Authentication
 	  Header) transport protocol. Say Y if unsure.
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
index 30e85de9fff..cda3e0860d6 100644
--- a/net/ipv4/ipvs/Makefile
+++ b/net/ipv4/ipvs/Makefile
@@ -6,8 +6,7 @@
 ip_vs_proto-objs-y :=
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah.o
 
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 73e0ea87c1f..3f9ebd7639a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -1,5 +1,5 @@
 /*
- * ip_vs_proto_ah.c:	AH IPSec load balancing support for IPVS
+ * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
  *
  * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
  *		Wensong Zhang <wensong@linuxvirtualserver.org>
@@ -39,11 +39,11 @@ struct isakmp_hdr {
 
 
 static struct ip_vs_conn *
-ah_conn_in_get(const struct sk_buff *skb,
-	       struct ip_vs_protocol *pp,
-	       const struct iphdr *iph,
-	       unsigned int proto_off,
-	       int inverse)
+ah_esp_conn_in_get(const struct sk_buff *skb,
+		   struct ip_vs_protocol *pp,
+		   const struct iphdr *iph,
+		   unsigned int proto_off,
+		   int inverse)
 {
 	struct ip_vs_conn *cp;
 
@@ -79,8 +79,8 @@ ah_conn_in_get(const struct sk_buff *skb,
 
 
 static struct ip_vs_conn *
-ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct iphdr *iph, unsigned int proto_off, int inverse)
+ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    const struct iphdr *iph, unsigned int proto_off, int inverse)
 {
 	struct ip_vs_conn *cp;
 
@@ -112,12 +112,12 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static int
-ah_conn_schedule(struct sk_buff *skb,
-		 struct ip_vs_protocol *pp,
-		 int *verdict, struct ip_vs_conn **cpp)
+ah_esp_conn_schedule(struct sk_buff *skb,
+		     struct ip_vs_protocol *pp,
+		     int *verdict, struct ip_vs_conn **cpp)
 {
 	/*
-	 * AH is only related traffic. Pass the packet to IP stack.
+	 * AH/ESP is only related traffic. Pass the packet to IP stack.
 	 */
 	*verdict = NF_ACCEPT;
 	return 0;
@@ -125,8 +125,8 @@ ah_conn_schedule(struct sk_buff *skb,
 
 
 static void
-ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		int offset, const char *msg)
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		    int offset, const char *msg)
 {
 	char buf[256];
 	struct iphdr _iph, *ih;
@@ -143,28 +143,29 @@ ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 }
 
 
-static void ah_init(struct ip_vs_protocol *pp)
+static void ah_esp_init(struct ip_vs_protocol *pp)
 {
 	/* nothing to do now */
 }
 
 
-static void ah_exit(struct ip_vs_protocol *pp)
+static void ah_esp_exit(struct ip_vs_protocol *pp)
 {
 	/* nothing to do now */
 }
 
 
+#ifdef CONFIG_IP_VS_PROTO_AH
 struct ip_vs_protocol ip_vs_protocol_ah = {
 	.name =			"AH",
 	.protocol =		IPPROTO_AH,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_init,
-	.exit =			ah_exit,
-	.conn_schedule =	ah_conn_schedule,
-	.conn_in_get =		ah_conn_in_get,
-	.conn_out_get =		ah_conn_out_get,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
 	.snat_handler =		NULL,
 	.dnat_handler =		NULL,
 	.csum_check =		NULL,
@@ -172,7 +173,31 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.register_app =		NULL,
 	.unregister_app =	NULL,
 	.app_conn_bind =	NULL,
-	.debug_packet =		ah_debug_packet,
+	.debug_packet =		ah_esp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
 	.set_state_timeout =	NULL,
 };
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
-- 
cgit v1.2.3-70-g09d2


From e3c2ced8d21410e8bc897480081e2ffc516c0f70 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Fri, 22 Aug 2008 14:06:13 +0200
Subject: IPVS: Rename ip_vs_proto_ah.c to ip_vs_proto_ah_esp.c

After integrating ESP into ip_vs_proto_ah, rename it (and the references to
it) to ip_vs_proto_ah_esp.c and delete the old ip_vs_proto_esp.c.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/Makefile             |   2 +-
 net/ipv4/ipvs/ip_vs_proto_ah.c     | 203 -------------------------------------
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 203 +++++++++++++++++++++++++++++++++++++
 net/ipv4/ipvs/ip_vs_proto_esp.c    | 176 --------------------------------
 4 files changed, 204 insertions(+), 380 deletions(-)
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto_ah.c
 create mode 100644 net/ipv4/ipvs/ip_vs_proto_ah_esp.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto_esp.c

(limited to 'net')

diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
index cda3e0860d6..73a46fe1fe4 100644
--- a/net/ipv4/ipvs/Makefile
+++ b/net/ipv4/ipvs/Makefile
@@ -6,7 +6,7 @@
 ip_vs_proto-objs-y :=
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
 
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
deleted file mode 100644
index 3f9ebd7639a..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
- *
- * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
- *		Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		version 2 as published by the Free Software Foundation;
- *
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-/* TODO:
-
-struct isakmp_hdr {
-	__u8		icookie[8];
-	__u8		rcookie[8];
-	__u8		np;
-	__u8		version;
-	__u8		xchgtype;
-	__u8		flags;
-	__u32		msgid;
-	__u32		length;
-};
-
-*/
-
-#define PORT_ISAKMP	500
-
-
-static struct ip_vs_conn *
-ah_esp_conn_in_get(const struct sk_buff *skb,
-		   struct ip_vs_protocol *pp,
-		   const struct iphdr *iph,
-		   unsigned int proto_off,
-		   int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->saddr,
-				       htons(PORT_ISAKMP),
-				       iph->daddr,
-				       htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->daddr,
-				       htons(PORT_ISAKMP),
-				       iph->saddr,
-				       htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		/*
-		 * We are not sure if the packet is from our
-		 * service, so our conn_schedule hook should return NF_ACCEPT
-		 */
-		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		    const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->saddr,
-					htons(PORT_ISAKMP),
-					iph->daddr,
-					htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->daddr,
-					htons(PORT_ISAKMP),
-					iph->saddr,
-					htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static int
-ah_esp_conn_schedule(struct sk_buff *skb,
-		     struct ip_vs_protocol *pp,
-		     int *verdict, struct ip_vs_conn **cpp)
-{
-	/*
-	 * AH/ESP is only related traffic. Pass the packet to IP stack.
-	 */
-	*verdict = NF_ACCEPT;
-	return 0;
-}
-
-
-static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		    int offset, const char *msg)
-{
-	char buf[256];
-	struct iphdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else
-		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
-			pp->name, NIPQUAD(ih->saddr),
-			NIPQUAD(ih->daddr));
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-
-
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-#ifdef CONFIG_IP_VS_PROTO_AH
-struct ip_vs_protocol ip_vs_protocol_ah = {
-	.name =			"AH",
-	.protocol =		IPPROTO_AH,
-	.num_states =		1,
-	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
-	.conn_schedule =	ah_esp_conn_schedule,
-	.conn_in_get =		ah_esp_conn_in_get,
-	.conn_out_get =		ah_esp_conn_out_get,
-	.snat_handler =		NULL,
-	.dnat_handler =		NULL,
-	.csum_check =		NULL,
-	.state_transition =	NULL,
-	.register_app =		NULL,
-	.unregister_app =	NULL,
-	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
-	.timeout_change =	NULL,		/* ISAKMP */
-	.set_state_timeout =	NULL,
-};
-#endif
-
-#ifdef CONFIG_IP_VS_PROTO_ESP
-struct ip_vs_protocol ip_vs_protocol_esp = {
-	.name =			"ESP",
-	.protocol =		IPPROTO_ESP,
-	.num_states =		1,
-	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
-	.conn_schedule =	ah_esp_conn_schedule,
-	.conn_in_get =		ah_esp_conn_in_get,
-	.conn_out_get =		ah_esp_conn_out_get,
-	.snat_handler =		NULL,
-	.dnat_handler =		NULL,
-	.csum_check =		NULL,
-	.state_transition =	NULL,
-	.register_app =		NULL,
-	.unregister_app =	NULL,
-	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
-	.timeout_change =	NULL,		/* ISAKMP */
-};
-#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
new file mode 100644
index 00000000000..3f9ebd7639a
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -0,0 +1,203 @@
+/*
+ * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+
+static struct ip_vs_conn *
+ah_esp_conn_in_get(const struct sk_buff *skb,
+		   struct ip_vs_protocol *pp,
+		   const struct iphdr *iph,
+		   unsigned int proto_off,
+		   int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->saddr,
+				       htons(PORT_ISAKMP),
+				       iph->daddr,
+				       htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->daddr,
+				       htons(PORT_ISAKMP),
+				       iph->saddr,
+				       htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->saddr,
+					htons(PORT_ISAKMP),
+					iph->daddr,
+					htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->daddr,
+					htons(PORT_ISAKMP),
+					iph->saddr,
+					htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+ah_esp_conn_schedule(struct sk_buff *skb,
+		     struct ip_vs_protocol *pp,
+		     int *verdict, struct ip_vs_conn **cpp)
+{
+	/*
+	 * AH/ESP is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		    int offset, const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+
+static void ah_esp_init(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+static void ah_esp_exit(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+#ifdef CONFIG_IP_VS_PROTO_AH
+struct ip_vs_protocol ip_vs_protocol_ah = {
+	.name =			"AH",
+	.protocol =		IPPROTO_AH,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+	.set_state_timeout =	NULL,
+};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
deleted file mode 100644
index 21d70c8ffa5..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * ip_vs_proto_esp.c:	ESP IPSec load balancing support for IPVS
- *
- * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
- *		Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		version 2 as published by the Free Software Foundation;
- *
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-/* TODO:
-
-struct isakmp_hdr {
-	__u8		icookie[8];
-	__u8		rcookie[8];
-	__u8		np;
-	__u8		version;
-	__u8		xchgtype;
-	__u8		flags;
-	__u32		msgid;
-	__u32		length;
-};
-
-*/
-
-#define PORT_ISAKMP	500
-
-
-static struct ip_vs_conn *
-esp_conn_in_get(const struct sk_buff *skb,
-		struct ip_vs_protocol *pp,
-		const struct iphdr *iph,
-		unsigned int proto_off,
-		int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->saddr,
-				       htons(PORT_ISAKMP),
-				       iph->daddr,
-				       htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->daddr,
-				       htons(PORT_ISAKMP),
-				       iph->saddr,
-				       htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		/*
-		 * We are not sure if the packet is from our
-		 * service, so our conn_schedule hook should return NF_ACCEPT
-		 */
-		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->saddr,
-					htons(PORT_ISAKMP),
-					iph->daddr,
-					htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->daddr,
-					htons(PORT_ISAKMP),
-					iph->saddr,
-					htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static int
-esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
-		  int *verdict, struct ip_vs_conn **cpp)
-{
-	/*
-	 * ESP is only related traffic. Pass the packet to IP stack.
-	 */
-	*verdict = NF_ACCEPT;
-	return 0;
-}
-
-
-static void
-esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		 int offset, const char *msg)
-{
-	char buf[256];
-	struct iphdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else
-		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
-			pp->name, NIPQUAD(ih->saddr),
-			NIPQUAD(ih->daddr));
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-
-
-static void esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-struct ip_vs_protocol ip_vs_protocol_esp = {
-	.name =			"ESP",
-	.protocol =		IPPROTO_ESP,
-	.num_states =		1,
-	.dont_defrag =		1,
-	.init =			esp_init,
-	.exit =			esp_exit,
-	.conn_schedule =	esp_conn_schedule,
-	.conn_in_get =		esp_conn_in_get,
-	.conn_out_get =		esp_conn_out_get,
-	.snat_handler =		NULL,
-	.dnat_handler =		NULL,
-	.csum_check =		NULL,
-	.state_transition =	NULL,
-	.register_app =		NULL,
-	.unregister_app =	NULL,
-	.app_conn_bind =	NULL,
-	.debug_packet =		esp_debug_packet,
-	.timeout_change =	NULL,		/* ISAKMP */
-};
-- 
cgit v1.2.3-70-g09d2


From 33c449675c0e371edd35b3bd7ce8a14451ff2f0b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Always generate a Reset in response to option errors

RFC4340 states that if a packet is received with an option error (such as a
Mandatory Option as the last byte of the option list), the endpoint should
repond with a Reset.

In the LISTEN and RESPOND states, the endpoint correctly reponds with Reset,
while in the REQUEST/OPEN states, packets with option errors are just ignored.

The packet sequence is as follows:

Case 1:

  Endpoint A                           Endpoint B
  (CLOSED)                             (CLOSED)

               <----------------       REQUEST

  RESPONSE     ----------------->      (*1)
  (with invalid option)
               <----------------       RESET
                                       (with Reset Code 5, "Option Error")

  (*1) currently just ignored, no Reset is sent

Case 2:

  Endpoint A                           Endpoint B
  (OPEN)                               (OPEN)

  DATA-ACK     ----------------->      (*2)
  (with invalid option)
               <----------------       RESET
                                       (with Reset Code 5, "Option Error")

  (*2) currently just ignored, no Reset is sent

This patch fixes the problem, by generating a Reset instead of silently
ignoring option errors.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 803933ab396..779d0ed9ae9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -370,7 +370,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	if (dccp_parse_options(sk, NULL, skb))
-		goto discard;
+		return 1;
 
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
@@ -610,7 +610,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		 * Step 8: Process options and mark acknowledgeable
 		 */
 		if (dccp_parse_options(sk, NULL, skb))
-			goto discard;
+			return 1;
 
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
-- 
cgit v1.2.3-70-g09d2


From 1efa6bbac876318ebf6f3a757f18e7d9ebe02dd0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Silently ignore options with nonsensical lengths

This updates the option-parsing code with regard to RFC 4340, 5.8:
 "[..] options with nonsensical lengths (length byte less than two or more
  than the remaining space in the options portion of the header) MUST be
  ignored, and any option space following an option with nonsensical length
  MUST likewise be ignored."

Hence in the following cases erratic options will be ignored:
 1. The type byte of a multi-byte option is the last byte of the header
    options (i.e. effective option length of 1).
 2. The value of the length byte is less than the minimum 2. This has been
    changed from previously 3: although no multi-byte option with a length
    less than 3 yet exists (cf. table 3 in 5.8), a length of 2 is valid.
    (The switch-statement in dccp_parse has further per-option length checks.)
 3. The option length exceeds the length of the remaining option space.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/options.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index dc7c158a2f4..4284f085604 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		/* Check if this isn't a single byte option */
 		if (opt > DCCPO_MAX_RESERVED) {
 			if (opt_ptr == opt_end)
-				goto out_invalid_option;
+				goto out_nonsensical_length;
 
 			len = *opt_ptr++;
-			if (len < 3)
-				goto out_invalid_option;
+			if (len < 2)
+				goto out_nonsensical_length;
 			/*
 			 * Remove the type and len fields, leaving
 			 * just the value size
@@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			opt_ptr += len;
 
 			if (opt_ptr > opt_end)
-				goto out_invalid_option;
+				goto out_nonsensical_length;
 		}
 
 		/*
@@ -283,6 +283,8 @@ ignore_option:
 	if (mandatory)
 		goto out_invalid_option;
 
+out_nonsensical_length:
+	/* RFC 4340, 5.8: ignore option and all remaining option space */
 	return 0;
 
 out_invalid_option:
-- 
cgit v1.2.3-70-g09d2


From 5a056417e696fabab8642ec38783de0b496bde76 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Fill in the Data fields for "Option Error" Resets

This updates the use of the `out_invalid_option' label, which produces a
Reset (code 5, "Option Error"), to fill in the  Data1...Data3 fields as
specified in RFC 4340, 5.6.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/options.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4284f085604..0809b63cb05 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -291,6 +291,9 @@ out_invalid_option:
 	DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
 	DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
+	DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
+	DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
+	DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
 	return -1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From b569d5a134074d4e15ab8e26cf2dd9f02c29fadc Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Empty the write queue when disconnecting

dccp_disconnect() can be called due to several reasons:

 1. when the connection setup failed (inet_stream_connect());
 2. when shutting down (inet_shutdown(), inet_csk_listen_stop());
 3. when aborting the connection (dccp_close() with 0 linger time).

In case (1) the write queue is empty. This patch empties the write queue,
if in case (2) or (3) it was not yet empty.

This avoids triggering the write-queue BUG_TRAP in sk_stream_kill_queues()
later on.

It also seems natural to do: when breaking an association, to delete all
packets that were originally intended for the soon-disconnected end (compare
with call to tcp_write_queue_purge in tcp_disconnect()).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/proto.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1ca3b26eed0..ae66473b11f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -309,7 +309,9 @@ int dccp_disconnect(struct sock *sk, int flags)
 		sk->sk_err = ECONNRESET;
 
 	dccp_clear_xmit_timers(sk);
+
 	__skb_queue_purge(&sk->sk_receive_queue);
+	__skb_queue_purge(&sk->sk_write_queue);
 	if (sk->sk_send_head != NULL) {
 		__kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
-- 
cgit v1.2.3-70-g09d2


From 157439fa4a9b38ac4ce41e2fc379fc5031affec8 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Toggle debug output without module unloading

This sets the sysfs permissions so that root can toggle the `debug'
parameter available for nearly every DCCP module. This is useful
since there are various module inter-dependencies. The debug flag
can now be toggled at runtime using

  echo 1 > /sys/module/dccp/parameters/dccp_debug
  echo 1 > /sys/module/dccp_ccid2/parameters/ccid2_debug
  echo 1 > /sys/module/dccp_ccid3/parameters/ccid3_debug
  echo 1 > /sys/module/dccp_tfrc_lib/parameters/tfrc_debug

The last is not very useful yet, since no code at the moment calls
the tfrc_debug() macro.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c    | 2 +-
 net/dccp/ccids/ccid3.c    | 2 +-
 net/dccp/ccids/lib/tfrc.c | 2 +-
 net/dccp/proto.c          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 8e958087421..9a430734530 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = {
 };
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-module_param(ccid2_debug, bool, 0444);
+module_param(ccid2_debug, bool, 0644);
 MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f6756e0c9e6..3b8bd7ca676 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -963,7 +963,7 @@ static struct ccid_operations ccid3 = {
 };
 
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-module_param(ccid3_debug, bool, 0444);
+module_param(ccid3_debug, bool, 0644);
 MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 97ecec0a8e7..185916218e0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -10,7 +10,7 @@
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
 int tfrc_debug;
-module_param(tfrc_debug, bool, 0444);
+module_param(tfrc_debug, bool, 0644);
 MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ae66473b11f..d0bd3481976 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1030,7 +1030,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 int dccp_debug;
-module_param(dccp_debug, bool, 0444);
+module_param(dccp_debug, bool, 0644);
 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
 
 EXPORT_SYMBOL_GPL(dccp_debug);
-- 
cgit v1.2.3-70-g09d2


From eff253c4272cd2aac95ccff46d3d2e1a495f22b1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp ccid-3: Replace lazy BUG_ON with condition

The BUG_ON(w_tot == 0) only holds if there is no more than 1 loss interval in
the loss history. If there is only a single loss interval, the calc_i_mean()
routine need in fact not be called (RFC 3448, 6.3.1).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/loss_interval.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index bcd6ac415bb..5b3ce0688c5 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 	u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
 	int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
 
-	for (i=0; i <= k; i++) {
+	if (k <= 0)
+		return;
+
+	for (i = 0; i <= k; i++) {
 		i_i = tfrc_lh_get_interval(lh, i);
 
 		if (i < k) {
@@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 			i_tot1 += i_i * tfrc_lh_weights[i-1];
 	}
 
-	BUG_ON(w_tot == 0);
 	lh->i_mean = max(i_tot0, i_tot1) / w_tot;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 666d9bbedfff7c2c37eab92e715641922dee6864 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 27 Aug 2008 02:12:52 -0700
Subject: pkt_sched: Fix dev_graft_qdisc() locking

During dev_graft_qdisc() dev is deactivated, so qdisc_root_lock()
returns wrong lock of noop_qdisc instead of qdisc_sleeping.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e7fb9e0d21b..341d558b6e3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -624,7 +624,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
 	spinlock_t *root_lock;
 
-	root_lock = qdisc_root_lock(oqdisc);
+	root_lock = qdisc_lock(oqdisc);
 	spin_lock_bh(root_lock);
 
 	/* Prune old scheduler */
-- 
cgit v1.2.3-70-g09d2


From f7a54c13c7b072d9426bd5cec1cdb8306df5ef55 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 27 Aug 2008 02:22:07 -0700
Subject: pkt_sched: Use rcu_assign_pointer() to change dev_queue->qdisc

These pointers are RCU protected, so proper primitives should be used.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c     | 2 +-
 net/sched/sch_generic.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 341d558b6e3..ad9cda1b8c0 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -635,7 +635,7 @@ static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 	if (qdisc == NULL)
 		qdisc = &noop_qdisc;
 	dev_queue->qdisc_sleeping = qdisc;
-	dev_queue->qdisc = &noop_qdisc;
+	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
 
 	spin_unlock_bh(root_lock);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5f0ade7806a..9634091ee2f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -634,7 +634,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 		if (!(qdisc->flags & TCQ_F_BUILTIN))
 			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
 
-		dev_queue->qdisc = qdisc_default;
+		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
 		qdisc_reset(qdisc);
 
 		spin_unlock_bh(qdisc_lock(qdisc));
@@ -709,7 +709,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
-		dev_queue->qdisc = qdisc_default;
+		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
 		dev_queue->qdisc_sleeping = qdisc_default;
 
 		qdisc_destroy(qdisc);
-- 
cgit v1.2.3-70-g09d2


From f6f9b93f1624206c802ac9162c9302edaf59bfd9 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 27 Aug 2008 02:25:17 -0700
Subject: pkt_sched: Fix gen_estimator locks

While passing a qdisc root lock to gen_new_estimator() and
gen_replace_estimator() dev could be deactivated or even before
grafting proper root qdisc as qdisc_sleeping (e.g. qdisc_create), so
using qdisc_root_lock() is not enough. This patch adds
qdisc_root_sleeping_lock() for this, plus additional checks, where
necessary.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  8 ++++++++
 net/sched/sch_api.c       | 14 +++++++++++---
 net/sched/sch_cbq.c       |  4 ++--
 net/sched/sch_hfsc.c      |  4 ++--
 net/sched/sch_htb.c       |  4 ++--
 5 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b1d2cfea89c..ef8a7e2e12e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -217,6 +217,14 @@ static inline spinlock_t *qdisc_root_lock(struct Qdisc *qdisc)
 	return qdisc_lock(root);
 }
 
+static inline spinlock_t *qdisc_root_sleeping_lock(struct Qdisc *qdisc)
+{
+	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+	ASSERT_RTNL();
+	return qdisc_lock(root);
+}
+
 static inline struct net_device *qdisc_dev(struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ad9cda1b8c0..506b709510b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -830,9 +830,16 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			sch->stab = stab;
 		}
 		if (tca[TCA_RATE]) {
+			spinlock_t *root_lock;
+
+			if ((sch->parent != TC_H_ROOT) &&
+			    !(sch->flags & TCQ_F_INGRESS))
+				root_lock = qdisc_root_sleeping_lock(sch);
+			else
+				root_lock = qdisc_lock(sch);
+
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
-						qdisc_root_lock(sch),
-						tca[TCA_RATE]);
+						root_lock, tca[TCA_RATE]);
 			if (err) {
 				/*
 				 * Any broken qdiscs that would require
@@ -884,7 +891,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 
 	if (tca[TCA_RATE])
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
-				      qdisc_root_lock(sch), tca[TCA_RATE]);
+				      qdisc_root_sleeping_lock(sch),
+				      tca[TCA_RATE]);
 	return 0;
 }
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 8fa90d68ec6..9b720adedea 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1839,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_lock(sch),
+					      qdisc_root_sleeping_lock(sch),
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1930,7 +1930,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_lock(sch), tca[TCA_RATE]);
+				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
 
 	*arg = (unsigned long)cl;
 	return 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c2b8d9cce3d..c1e77da8cd0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1045,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_lock(sch),
+					      qdisc_root_sleeping_lock(sch),
 					      tca[TCA_RATE]);
 		return 0;
 	}
@@ -1104,7 +1104,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE])
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_lock(sch), tca[TCA_RATE]);
+				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0df0df202ed..97d4761cc31 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1372,7 +1372,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			goto failure;
 
 		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_lock(sch),
+				  qdisc_root_sleeping_lock(sch),
 				  tca[TCA_RATE] ? : &est.nla);
 		cl->refcnt = 1;
 		cl->children = 0;
@@ -1427,7 +1427,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	} else {
 		if (tca[TCA_RATE])
 			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_lock(sch),
+					      qdisc_root_sleeping_lock(sch),
 					      tca[TCA_RATE]);
 		sch_tree_lock(sch);
 	}
-- 
cgit v1.2.3-70-g09d2


From 7982d5e1b350acb96aa156916c44c25ef87bb809 Mon Sep 17 00:00:00 2001
From: Philip Love <love_phil@emc.com>
Date: Wed, 27 Aug 2008 02:33:50 -0700
Subject: tcp: fix tcp header size miscalculation when window scale is unused

The size of the TCP header is miscalculated when the window scale ends
up being 0. Additionally, this can be induced by sending a SYN to a
passive open port with a window scale option with value 0.

Signed-off-by: Philip Love <love_phil@emc.com>
Signed-off-by: Adam Langley <agl@imperialviolet.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a00532de2a8..8165f5aa8c7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -468,7 +468,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	}
 	if (likely(sysctl_tcp_window_scaling)) {
 		opts->ws = tp->rx_opt.rcv_wscale;
-		size += TCPOLEN_WSCALE_ALIGNED;
+		if(likely(opts->ws))
+			size += TCPOLEN_WSCALE_ALIGNED;
 	}
 	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
@@ -509,7 +510,8 @@ static unsigned tcp_synack_options(struct sock *sk,
 
 	if (likely(ireq->wscale_ok)) {
 		opts->ws = ireq->rcv_wscale;
-		size += TCPOLEN_WSCALE_ALIGNED;
+		if(likely(opts->ws))
+			size += TCPOLEN_WSCALE_ALIGNED;
 	}
 	if (likely(doing_ts)) {
 		opts->options |= OPTION_TS;
-- 
cgit v1.2.3-70-g09d2


From d994af0d50efc96b2077978fe9f066992639d525 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 27 Aug 2008 02:35:18 -0700
Subject: ipv4: mode 0555 in ipv4_skeleton

vpnc on today's kernel says Cannot open "/proc/sys/net/ipv4/route/flush":
d--------- 0 root root 0 2008-08-26 11:32 /proc/sys/net/ipv4/route
d--------- 0 root root 0 2008-08-26 19:16 /proc/sys/net/ipv4/neigh

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e91bafeb32f..6ee5354c9aa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3121,9 +3121,9 @@ static struct ctl_table empty[1];
 static struct ctl_table ipv4_skeleton[] =
 {
 	{ .procname = "route", .ctl_name = NET_IPV4_ROUTE,
-	  .child = ipv4_route_table},
+	  .mode = 0555, .child = ipv4_route_table},
 	{ .procname = "neigh", .ctl_name = NET_IPV4_NEIGH,
-	  .child = empty},
+	  .mode = 0555, .child = empty},
 	{ }
 };
 
-- 
cgit v1.2.3-70-g09d2


From 328fc47ea0bcc27d9afa69c3ad6e52431cadd76c Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 27 Aug 2008 16:08:54 -0700
Subject: sctp: correct bounds check in sctp_setsockopt_auth_key

The bonds check to prevent buffer overlflow was not exactly
right.  It still allowed overflow of up to 8 bytes which is
sizeof(struct sctp_authkey).

Since optlen is already checked against the size of that struct,
we are guaranteed not to cause interger overflow either.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index afa952e726d..9b9b2c31dd1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3144,7 +3144,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 		goto out;
 	}
 
-	if (authkey->sca_keylength > optlen) {
+	if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) {
 		ret = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From d97240552cd98c4b07322f30f66fd9c3ba4171de Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 27 Aug 2008 16:09:49 -0700
Subject: sctp: fix random memory dereference with SCTP_HMAC_IDENT option.

The number of identifiers needs to be checked against the option
length.  Also, the identifier index provided needs to be verified
to make sure that it doesn't exceed the bounds of the array.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c   | 3 +++
 net/sctp/socket.c | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 1fcb4cf2f4c..52db5f60daa 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -786,6 +786,9 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 	for (i = 0; i < hmacs->shmac_num_idents; i++) {
 		id = hmacs->shmac_idents[i];
 
+		if (id > SCTP_AUTH_HMAC_ID_MAX)
+			return -EOPNOTSUPP;
+
 		if (SCTP_AUTH_HMAC_ID_SHA1 == id)
 			has_sha1 = 1;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9b9b2c31dd1..5ffb9dec1c3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3086,6 +3086,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 				    int optlen)
 {
 	struct sctp_hmacalgo *hmacs;
+	u32 idents;
 	int err;
 
 	if (!sctp_auth_enable)
@@ -3103,8 +3104,9 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 		goto out;
 	}
 
-	if (hmacs->shmac_num_idents == 0 ||
-	    hmacs->shmac_num_idents > SCTP_AUTH_NUM_HMACS) {
+	idents = hmacs->shmac_num_idents;
+	if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS ||
+	    (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From 6eac56040787c3ff604fe7d48bbbb7897cd1387c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 28 Aug 2008 01:08:02 -0700
Subject: tcp: Skip empty hash buckets faster in /proc/net/tcp

On most systems most of the TCP established/time-wait hash buckets are empty.
When walking the hash table for /proc/net/tcp their read locks would
always be aquired just to find out they're empty. This patch changes the code
to check first if the buckets have any entries before taking the lock, which
is much cheaper than taking a lock. Since the hash tables are large
this makes a measurable difference on processing /proc/net/tcp,
especially on architectures with slow read_lock (e.g. PPC)

On a 2GB Core2 system time cat /proc/net/tcp > /dev/null (with a mostly
empty hash table) goes from 0.046s to 0.005s.

On systems with slower atomics (like P4 or POWER4) or larger hash tables
(more RAM) the difference is much higher.

This can be noticeable because there are some daemons around who regularly
scan /proc/net/tcp.

Original idea for this patch from Marcus Meissner, but redone by me.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e934824..37ca3843c40 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1946,6 +1946,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 	return rc;
 }
 
+static inline int empty_bucket(struct tcp_iter_state *st)
+{
+	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+		hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+}
+
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1964,10 @@ static void *established_get_first(struct seq_file *seq)
 		struct inet_timewait_sock *tw;
 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
+		/* Lockless fast path for the common case of empty buckets */
+		if (empty_bucket(st))
+			continue;
+
 		read_lock_bh(lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family ||
@@ -2008,13 +2018,15 @@ get_tw:
 		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
-		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
-			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
-		} else {
-			cur = NULL;
-			goto out;
-		}
+		/* Look for next non empty bucket */
+		while (++st->bucket < tcp_hashinfo.ehash_size &&
+				empty_bucket(st))
+			;
+		if (st->bucket >= tcp_hashinfo.ehash_size)
+			return NULL;
+
+		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+		sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 	} else
 		sk = sk_next(sk);
 
-- 
cgit v1.2.3-70-g09d2


From 6be547a61d6220199826070cda792297c3d15994 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 28 Aug 2008 01:09:54 -0700
Subject: inet_diag: Add empty bucket optimization to inet_diag too

Skip quickly over empty buckets in inet_diag.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c10036e7a46..89cb047ab31 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -782,11 +782,15 @@ skip_listen_ht:
 		struct sock *sk;
 		struct hlist_node *node;
 
+		num = 0;
+
+		if (hlist_empty(&head->chain) && hlist_empty(&head->twchain))
+			continue;
+
 		if (i > s_i)
 			s_num = 0;
 
 		read_lock_bh(lock);
-		num = 0;
 		sk_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
 
-- 
cgit v1.2.3-70-g09d2


From a627266570605a98c5fda5b8234d9e92015e4d14 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 28 Aug 2008 01:11:25 -0700
Subject: ip: speedup /proc/net/rt_cache handling

When scanning route cache hash table, we can avoid taking locks for
empty buckets.  Both /proc/net/rt_cache and NETLINK RTM_GETROUTE
interface are taken into account.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cca921ea855..71598f64c11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 	struct rtable *r = NULL;
 
 	for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
+		if (!rt_hash_table[st->bucket].chain)
+			continue;
 		rcu_read_lock_bh();
 		r = rcu_dereference(rt_hash_table[st->bucket].chain);
 		while (r) {
@@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 					  struct rtable *r)
 {
 	struct rt_cache_iter_state *st = seq->private;
+
 	r = r->u.dst.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
-		if (--st->bucket < 0)
-			break;
+		do {
+			if (--st->bucket < 0)
+				return NULL;
+		} while (!rt_hash_table[st->bucket].chain);
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
@@ -2840,7 +2845,9 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 	if (s_h < 0)
 		s_h = 0;
 	s_idx = idx = cb->args[1];
-	for (h = s_h; h <= rt_hash_mask; h++) {
+	for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) {
+		if (!rt_hash_table[h].chain)
+			continue;
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
 		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
@@ -2859,7 +2866,6 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			dst_release(xchg(&skb->dst, NULL));
 		}
 		rcu_read_unlock_bh();
-		s_idx = 0;
 	}
 
 done:
-- 
cgit v1.2.3-70-g09d2


From 7f93ea3e246db512c0c17b79847f57dd3a2891e1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 6 Aug 2008 21:45:26 +0200
Subject: mac80211: fill start-sequence-number for BA session start

Otherwise, drivers are required to keep track of the sequence numbers
themselves, and they really shouldn't be since we already do it for
them. I'll fix the race once we figure out how this code should work
at all, it's currently disabled.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 398ca66bdfc..638b75f36e2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -598,7 +598,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata;
-	u16 start_seq_num = 0;
+	u16 start_seq_num;
 	u8 *state;
 	int ret;
 	DECLARE_MAC_BUF(mac);
@@ -678,6 +678,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	 * call back right away, it must see that the flow has begun */
 	*state |= HT_ADDBA_REQUESTED_MSK;
 
+	/* This is slightly racy because the queue isn't stopped */
+	start_seq_num = sta->tid_seq[tid];
+
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
 						ra, tid, &start_seq_num);
-- 
cgit v1.2.3-70-g09d2


From 9f1ba9062e032fb7b395cd27fc564754fe4e9867 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Thu, 7 Aug 2008 20:07:01 +0300
Subject: mac80211/cfg80211: Add BSS configuration options for AP mode

This change adds a new cfg80211 command, NL80211_CMD_SET_BSS, to allow
AP mode BSS parameters to be changed from user space (e.g., hostapd).
The drivers using mac80211 are expected to be modified with separate
changes to use the new BSS info parameter for short slot time in the
bss_info_changed() handler.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 19 ++++++++++++++++++
 include/net/cfg80211.h  | 22 +++++++++++++++++++++
 include/net/mac80211.h  |  9 +++++++++
 net/mac80211/cfg.c      | 37 +++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c  | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2be7c63bc0f..447c02a5190 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -89,6 +89,8 @@
  * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC
  *	or, if no MAC address given, all mesh paths, on the interface identified
  *	by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by
+ *	%NL80211_ATTR_IFINDEX.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -127,6 +129,8 @@ enum nl80211_commands {
 	NL80211_CMD_NEW_MPATH,
 	NL80211_CMD_DEL_MPATH,
 
+	NL80211_CMD_SET_BSS,
+
 	/* add commands here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -134,6 +138,11 @@ enum nl80211_commands {
 	NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1
 };
 
+/*
+ * Allow user space programs to use #ifdef on new commands by defining them
+ * here
+ */
+#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -192,6 +201,12 @@ enum nl80211_commands {
  * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of
  *      &enum nl80211_mntr_flags.
  *
+ * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled
+ *	(u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled
+ *	(u8, 0 or 1)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -235,6 +250,10 @@ enum nl80211_attrs {
 	NL80211_ATTR_MPATH_NEXT_HOP,
 	NL80211_ATTR_MPATH_INFO,
 
+	NL80211_ATTR_BSS_CTS_PROT,
+	NL80211_ATTR_BSS_SHORT_PREAMBLE,
+	NL80211_ATTR_BSS_SHORT_SLOT_TIME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e00750836ba..7afef14d5c5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -268,6 +268,23 @@ struct mpath_info {
 	u8 flags;
 };
 
+/**
+ * struct bss_parameters - BSS parameters
+ *
+ * Used to change BSS parameters (mainly for AP mode).
+ *
+ * @use_cts_prot: Whether to use CTS protection
+ *	(0 = no, 1 = yes, -1 = do not change)
+ * @use_short_preamble: Whether the use of short preambles is allowed
+ *	(0 = no, 1 = yes, -1 = do not change)
+ * @use_short_slot_time: Whether the use of short slot time is allowed
+ *	(0 = no, 1 = yes, -1 = do not change)
+ */
+struct bss_parameters {
+	int use_cts_prot;
+	int use_short_preamble;
+	int use_short_slot_time;
+};
 
 /* from net/wireless.h */
 struct wiphy;
@@ -318,6 +335,8 @@ struct wiphy;
  * @change_station: Modify a given station.
  *
  * @set_mesh_cfg: set mesh parameters (by now, just mesh id)
+ *
+ * @change_bss: Modify parameters for a given BSS.
  */
 struct cfg80211_ops {
 	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
@@ -370,6 +389,9 @@ struct cfg80211_ops {
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *dst, u8 *next_hop,
 			       struct mpath_info *pinfo);
+
+	int	(*change_bss)(struct wiphy *wiphy, struct net_device *dev,
+			      struct bss_parameters *params);
 };
 
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0fdc3dabc96..7c399a9c11d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -158,12 +158,14 @@ struct ieee80211_low_level_stats {
  *	also implies a change in the AID.
  * @BSS_CHANGED_ERP_CTS_PROT: CTS protection changed
  * @BSS_CHANGED_ERP_PREAMBLE: preamble changed
+ * @BSS_CHANGED_ERP_SLOT: slot timing changed
  * @BSS_CHANGED_HT: 802.11n parameters changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
 	BSS_CHANGED_ERP_CTS_PROT	= 1<<1,
 	BSS_CHANGED_ERP_PREAMBLE	= 1<<2,
+	BSS_CHANGED_ERP_SLOT		= 1<<3,
 	BSS_CHANGED_HT                  = 1<<4,
 };
 
@@ -177,6 +179,7 @@ enum ieee80211_bss_change {
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
  * @use_short_preamble: use 802.11b short preamble
+ * @use_short_slot: use short slot time (only relevant for ERP)
  * @dtim_period: num of beacons before the next DTIM, for PSM
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
@@ -192,6 +195,7 @@ struct ieee80211_bss_conf {
 	/* erp related data */
 	bool use_cts_prot;
 	bool use_short_preamble;
+	bool use_short_slot;
 	u8 dtim_period;
 	u16 beacon_int;
 	u16 assoc_capability;
@@ -420,6 +424,11 @@ struct ieee80211_rx_status {
  * @IEEE80211_CONF_PS: Enable 802.11 power save mode
  */
 enum ieee80211_conf_flags {
+	/*
+	 * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers
+	 * have been converted to use bss_info_changed() for slot time
+	 * configuration
+	 */
 	IEEE80211_CONF_SHORT_SLOT_TIME	= (1<<0),
 	IEEE80211_CONF_RADIOTAP		= (1<<1),
 	IEEE80211_CONF_SUPPORT_HT_MODE	= (1<<2),
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6d2ad2bf3ab..2b19532f4c8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1010,6 +1010,42 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 }
 #endif
 
+static int ieee80211_change_bss(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct bss_parameters *params)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata;
+	u32 changed = 0;
+
+	if (dev == local->mdev)
+		return -EOPNOTSUPP;
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		return -EINVAL;
+
+	if (params->use_cts_prot >= 0) {
+		sdata->bss_conf.use_cts_prot = params->use_cts_prot;
+		changed |= BSS_CHANGED_ERP_CTS_PROT;
+	}
+	if (params->use_short_preamble >= 0) {
+		sdata->bss_conf.use_short_preamble =
+			params->use_short_preamble;
+		changed |= BSS_CHANGED_ERP_PREAMBLE;
+	}
+	if (params->use_short_slot_time >= 0) {
+		sdata->bss_conf.use_short_slot =
+			params->use_short_slot_time;
+		changed |= BSS_CHANGED_ERP_SLOT;
+	}
+
+	ieee80211_bss_info_change_notify(sdata, changed);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1033,4 +1069,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
 #endif
+	.change_bss = ieee80211_change_bss,
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 59eb2cf42e5..47542ee01c5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -87,6 +87,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY,
 				.len = IEEE80211_MAX_MESH_ID_LEN },
 	[NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 },
+
+	[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
+	[NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 },
+	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
 };
 
 /* message building helper */
@@ -1525,6 +1529,48 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	int err;
+	struct net_device *dev;
+	struct bss_parameters params;
+
+	memset(&params, 0, sizeof(params));
+	/* default to not changing parameters */
+	params.use_cts_prot = -1;
+	params.use_short_preamble = -1;
+	params.use_short_slot_time = -1;
+
+	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
+		params.use_cts_prot =
+		    nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]);
+	if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE])
+		params.use_short_preamble =
+		    nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]);
+	if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME])
+		params.use_short_slot_time =
+		    nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]);
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		return err;
+
+	if (!drv->ops->change_bss) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	rtnl_lock();
+	err = drv->ops->change_bss(&drv->wiphy, dev, &params);
+	rtnl_unlock();
+
+ out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -1656,6 +1702,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_BSS,
+		.doit = nl80211_set_bss,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3-70-g09d2


From 43ac2ca3840f64f699a239535c590fa7ebaaac27 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Fri, 15 Aug 2008 22:21:27 +0300
Subject: mac80211: Handle scan result IEs in one block

Clean up and extend scan result processing by storing all the IEs from
Beacon/Probe Response frames in a single block instead of allocating
memory for each specific IE separately. This removes lot of unnecessary
code and automatically supports reporting of new IEs (e.g., IEEE
802.11r) into user space without need to manually extend mac80211
scanning code whenever a new protocol adds IE(s).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  18 ++--
 net/mac80211/mlme.c        | 230 ++++++++++++++++-----------------------------
 2 files changed, 89 insertions(+), 159 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8361054fb7c..2bb546744b9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -79,16 +79,11 @@ struct ieee80211_sta_bss {
 	enum ieee80211_band band;
 	int freq;
 	int signal, noise, qual;
-	u8 *wpa_ie;
-	size_t wpa_ie_len;
-	u8 *rsn_ie;
-	size_t rsn_ie_len;
-	u8 *wmm_ie;
-	size_t wmm_ie_len;
-	u8 *ht_ie;
-	size_t ht_ie_len;
-	u8 *ht_add_ie;
-	size_t ht_add_ie_len;
+	u8 *ies; /* all information elements from the last Beacon or Probe
+		  * Response frames; note Beacon frame is not allowed to
+		  * override values from Probe Response */
+	size_t ies_len;
+	bool wmm_used;
 #ifdef CONFIG_MAC80211_MESH
 	u8 *mesh_id;
 	size_t mesh_id_len;
@@ -773,6 +768,9 @@ struct ieee80211_ra_tid {
 
 /* Parsed Information Elements */
 struct ieee802_11_elems {
+	u8 *ie_start;
+	size_t total_len;
+
 	/* pointers to IEs */
 	u8 *ssid;
 	u8 *supp_rates;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 84999791a33..e088b440aaf 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -98,6 +98,8 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 	u8 *pos = start;
 
 	memset(elems, 0, sizeof(*elems));
+	elems->ie_start = start;
+	elems->total_len = len;
 
 	while (left >= 2) {
 		u8 id, elen;
@@ -234,6 +236,27 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 }
 
 
+static u8 * ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
+{
+	u8 *end, *pos;
+
+	pos = bss->ies;
+	if (pos == NULL)
+		return NULL;
+	end = pos + bss->ies_len;
+
+	while (pos + 1 < end) {
+		if (pos + 2 + pos[1] > end)
+			break;
+		if (pos[0] == ie)
+			return pos;
+		pos += 2 + pos[1];
+	}
+
+	return NULL;
+}
+
+
 static int ecw2cw(int ecw)
 {
 	return (1 << ecw) - 1;
@@ -737,7 +760,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies;
+	u8 *pos, *ies, *ht_add_ie;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_sta_bss *bss;
@@ -772,7 +795,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	if (bss) {
 		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
 			capab |= WLAN_CAPABILITY_PRIVACY;
-		if (bss->wmm_ie)
+		if (bss->wmm_used)
 			wmm = 1;
 
 		/* get all rates supported by the device and the AP as
@@ -894,9 +917,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	/* wmm support is a must to HT */
 	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
-	    sband->ht_info.ht_supported && bss->ht_add_ie) {
+	    sband->ht_info.ht_supported &&
+	    (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) {
 		struct ieee80211_ht_addt_info *ht_add_info =
-			(struct ieee80211_ht_addt_info *)bss->ht_add_ie;
+			(struct ieee80211_ht_addt_info *)ht_add_ie;
 		u16 cap = sband->ht_info.cap;
 		__le16 tmp;
 		u32 flags = local->hw.conf.channel->flags;
@@ -2372,11 +2396,7 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i
 
 static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 {
-	kfree(bss->wpa_ie);
-	kfree(bss->rsn_ie);
-	kfree(bss->wmm_ie);
-	kfree(bss->ht_ie);
-	kfree(bss->ht_add_ie);
+	kfree(bss->ies);
 	kfree(bss_mesh_id(bss));
 	kfree(bss_mesh_cfg(bss));
 	kfree(bss);
@@ -2662,43 +2682,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		bss->has_erp_value = 1;
 	}
 
-	if (elems->ht_cap_elem &&
-	     (!bss->ht_ie || bss->ht_ie_len != elems->ht_cap_elem_len ||
-	     memcmp(bss->ht_ie, elems->ht_cap_elem, elems->ht_cap_elem_len))) {
-		kfree(bss->ht_ie);
-		bss->ht_ie = kmalloc(elems->ht_cap_elem_len + 2, GFP_ATOMIC);
-		if (bss->ht_ie) {
-			memcpy(bss->ht_ie, elems->ht_cap_elem - 2,
-				elems->ht_cap_elem_len + 2);
-			bss->ht_ie_len = elems->ht_cap_elem_len + 2;
-		} else
-			bss->ht_ie_len = 0;
-	} else if (!elems->ht_cap_elem && bss->ht_ie) {
-		kfree(bss->ht_ie);
-		bss->ht_ie = NULL;
-		bss->ht_ie_len = 0;
-	}
-
-	if (elems->ht_info_elem &&
-	     (!bss->ht_add_ie ||
-	     bss->ht_add_ie_len != elems->ht_info_elem_len ||
-	     memcmp(bss->ht_add_ie, elems->ht_info_elem,
-			elems->ht_info_elem_len))) {
-		kfree(bss->ht_add_ie);
-		bss->ht_add_ie =
-			kmalloc(elems->ht_info_elem_len + 2, GFP_ATOMIC);
-		if (bss->ht_add_ie) {
-			memcpy(bss->ht_add_ie, elems->ht_info_elem - 2,
-				elems->ht_info_elem_len + 2);
-			bss->ht_add_ie_len = elems->ht_info_elem_len + 2;
-		} else
-			bss->ht_add_ie_len = 0;
-	} else if (!elems->ht_info_elem && bss->ht_add_ie) {
-		kfree(bss->ht_add_ie);
-		bss->ht_add_ie = NULL;
-		bss->ht_add_ie_len = 0;
-	}
-
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
@@ -2749,88 +2732,17 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	if (elems->wpa &&
-	    (!bss->wpa_ie || bss->wpa_ie_len != elems->wpa_len ||
-	     memcmp(bss->wpa_ie, elems->wpa, elems->wpa_len))) {
-		kfree(bss->wpa_ie);
-		bss->wpa_ie = kmalloc(elems->wpa_len + 2, GFP_ATOMIC);
-		if (bss->wpa_ie) {
-			memcpy(bss->wpa_ie, elems->wpa - 2, elems->wpa_len + 2);
-			bss->wpa_ie_len = elems->wpa_len + 2;
-		} else
-			bss->wpa_ie_len = 0;
-	} else if (!elems->wpa && bss->wpa_ie) {
-		kfree(bss->wpa_ie);
-		bss->wpa_ie = NULL;
-		bss->wpa_ie_len = 0;
-	}
-
-	if (elems->rsn &&
-	    (!bss->rsn_ie || bss->rsn_ie_len != elems->rsn_len ||
-	     memcmp(bss->rsn_ie, elems->rsn, elems->rsn_len))) {
-		kfree(bss->rsn_ie);
-		bss->rsn_ie = kmalloc(elems->rsn_len + 2, GFP_ATOMIC);
-		if (bss->rsn_ie) {
-			memcpy(bss->rsn_ie, elems->rsn - 2, elems->rsn_len + 2);
-			bss->rsn_ie_len = elems->rsn_len + 2;
-		} else
-			bss->rsn_ie_len = 0;
-	} else if (!elems->rsn && bss->rsn_ie) {
-		kfree(bss->rsn_ie);
-		bss->rsn_ie = NULL;
-		bss->rsn_ie_len = 0;
+	if (bss->ies == NULL || bss->ies_len < elems->total_len) {
+		kfree(bss->ies);
+		bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
 	}
+	if (bss->ies) {
+		memcpy(bss->ies, elems->ie_start, elems->total_len);
+		bss->ies_len = elems->total_len;
+	} else
+		bss->ies_len = 0;
 
-	/*
-	 * Cf.
-	 * http://www.wipo.int/pctdb/en/wo.jsp?wo=2007047181&IA=WO2007047181&DISPLAY=DESC
-	 *
-	 * quoting:
-	 *
-	 * In particular, "Wi-Fi CERTIFIED for WMM - Support for Multimedia
-	 * Applications with Quality of Service in Wi-Fi Networks," Wi- Fi
-	 * Alliance (September 1, 2004) is incorporated by reference herein.
-	 * The inclusion of the WMM Parameters in probe responses and
-	 * association responses is mandatory for WMM enabled networks. The
-	 * inclusion of the WMM Parameters in beacons, however, is optional.
-	 */
-
-	if (elems->wmm_param &&
-	    (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_param_len ||
-	     memcmp(bss->wmm_ie, elems->wmm_param, elems->wmm_param_len))) {
-		kfree(bss->wmm_ie);
-		bss->wmm_ie = kmalloc(elems->wmm_param_len + 2, GFP_ATOMIC);
-		if (bss->wmm_ie) {
-			memcpy(bss->wmm_ie, elems->wmm_param - 2,
-			       elems->wmm_param_len + 2);
-			bss->wmm_ie_len = elems->wmm_param_len + 2;
-		} else
-			bss->wmm_ie_len = 0;
-	} else if (elems->wmm_info &&
-		    (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_info_len ||
-		     memcmp(bss->wmm_ie, elems->wmm_info,
-						elems->wmm_info_len))) {
-		 /* As for certain AP's Fifth bit is not set in WMM IE in
-		  * beacon frames.So while parsing the beacon frame the
-		  * wmm_info structure is used instead of wmm_param.
-		  * wmm_info structure was never used to set bss->wmm_ie.
-		  * This code fixes this problem by copying the WME
-		  * information from wmm_info to bss->wmm_ie and enabling
-		  * n-band association.
-		  */
-		kfree(bss->wmm_ie);
-		bss->wmm_ie = kmalloc(elems->wmm_info_len + 2, GFP_ATOMIC);
-		if (bss->wmm_ie) {
-			memcpy(bss->wmm_ie, elems->wmm_info - 2,
-			       elems->wmm_info_len + 2);
-			bss->wmm_ie_len = elems->wmm_info_len + 2;
-		} else
-			bss->wmm_ie_len = 0;
-	} else if (!elems->wmm_param && !elems->wmm_info && bss->wmm_ie) {
-		kfree(bss->wmm_ie);
-		bss->wmm_ie = NULL;
-		bss->wmm_ie_len = 0;
-	}
+	bss->wmm_used = elems->wmm_param || elems->wmm_info;
 
 	/* check if we need to merge IBSS */
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
@@ -4146,6 +4058,48 @@ int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t
 	return 0;
 }
 
+
+static void ieee80211_sta_add_scan_ies(struct iw_request_info *info,
+				       struct ieee80211_sta_bss *bss,
+				       char **current_ev, char *end_buf)
+{
+	u8 *pos, *end, *next;
+	struct iw_event iwe;
+
+	if (bss == NULL || bss->ies == NULL)
+		return;
+
+	/*
+	 * If needed, fragment the IEs buffer (at IE boundaries) into short
+	 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
+	 */
+	pos = bss->ies;
+	end = pos + bss->ies_len;
+
+	while (end - pos > IW_GENERIC_IE_MAX) {
+		next = pos + 2 + pos[1];
+		while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
+			next = next + 2 + next[1];
+
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = next - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+
+		pos = next;
+	}
+
+	if (end > pos) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = end - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+	}
+}
+
+
 static char *
 ieee80211_sta_scan_result(struct ieee80211_local *local,
 			  struct iw_request_info *info,
@@ -4225,29 +4179,7 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
 					  &iwe, "");
 
-	if (bss && bss->wpa_ie) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = bss->wpa_ie_len;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss->wpa_ie);
-	}
-
-	if (bss && bss->rsn_ie) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = bss->rsn_ie_len;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss->rsn_ie);
-	}
-
-	if (bss && bss->ht_ie) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = bss->ht_ie_len;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss->ht_ie);
-	}
+	ieee80211_sta_add_scan_ies(info, bss, &current_ev, end_buf);
 
 	if (bss && bss->supp_rates_len > 0) {
 		/* display all supported rates in readable format */
-- 
cgit v1.2.3-70-g09d2


From 2f58bbf27fe5321a7a208be9071efc54e8a8a3bd Mon Sep 17 00:00:00 2001
From: Daniel Wagner <wagi@monom.org>
Date: Tue, 19 Aug 2008 15:44:35 +0200
Subject: mac80211: Use only precedence level of DSCP field for frame
 classification

Bit 4-5 of DSCP should not be considered by classify_d1. The
802.11 QoS Priority field is only depending on the precedence level.

Signed-off-by: Daniel Wagner <wagi@monom.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 4310e2f6566..7229e958879 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -47,8 +47,6 @@ static unsigned int classify_1d(struct sk_buff *skb)
 		return 0;
 	}
 
-	if (dscp & 0x1c)
-		return 0;
 	return dscp >> 5;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 36aedc903ea11a4188de0a118d26c9f20afdd272 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 25 Aug 2008 11:58:58 +0300
Subject: mac80211/cfg80211: HT capabilities for NEW_STA

Allow userspace (e.g., hostapd) to set HT capabilities for associated
STAs. This is based on a patch from Zhu Yi <yi.zhu@intel.com> (only
the NL80211_ATTR_HT_CAPABILITY for NEW_STA part is included here).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 ++++++++++++
 include/net/cfg80211.h  |  1 +
 net/mac80211/cfg.c      |  5 +++++
 net/wireless/nl80211.c  | 10 ++++++++++
 4 files changed, 28 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 447c02a5190..0c1147de3ec 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -207,6 +207,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled
  *	(u8, 0 or 1)
  *
+ * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -254,16 +257,25 @@ enum nl80211_attrs {
 	NL80211_ATTR_BSS_SHORT_PREAMBLE,
 	NL80211_ATTR_BSS_SHORT_SLOT_TIME,
 
+	NL80211_ATTR_HT_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
 	NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
 };
 
+/*
+ * Allow user space programs to use #ifdef on new attributes by defining them
+ * here
+ */
+#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
+
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY	16
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
+#define NL80211_HT_CAPABILITY_LEN		26
 
 /**
  * enum nl80211_iftype - (virtual) interface types
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7afef14d5c5..0a72d1e3d3a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -152,6 +152,7 @@ struct station_parameters {
 	u16 aid;
 	u8 supported_rates_len;
 	u8 plink_action;
+	struct ieee80211_ht_cap *ht_capa;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2b19532f4c8..928813ce08e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -674,6 +674,11 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		sta->supp_rates[local->oper_channel->band] = rates;
 	}
 
+	if (params->ht_capa) {
+		ieee80211_ht_cap_ie_to_ht_info(params->ht_capa,
+					       &sta->ht_info);
+	}
+
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
 		switch (params->plink_action) {
 		case PLINK_ACTION_OPEN:
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 47542ee01c5..4d6c02afd6f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -91,6 +91,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
+
+	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
+					 .len = NL80211_HT_CAPABILITY_LEN },
 };
 
 /* message building helper */
@@ -1129,6 +1132,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.listen_interval =
 		    nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
 
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
+		params.ht_capa =
+			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+
 	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
 				&params.station_flags))
 		return -EINVAL;
@@ -1192,6 +1199,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	params.listen_interval =
 		nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
 	params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
+		params.ht_capa =
+			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 
 	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
 				&params.station_flags))
-- 
cgit v1.2.3-70-g09d2


From 849e0576a76bc421aacd782f97948856f487726c Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Tue, 26 Aug 2008 11:57:57 -0300
Subject: rfkill: use strict_strtoul (v2)

Switch sysfs parsing to something that actually works properly.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 47e0b2d232e..173d039d990 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -402,12 +402,16 @@ static ssize_t rfkill_state_store(struct device *dev,
 				  const char *buf, size_t count)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned int state = simple_strtoul(buf, NULL, 0);
+	unsigned long state;
 	int error;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	error = strict_strtoul(buf, 0, &state);
+	if (error)
+		return error;
+
 	/* RFKILL_STATE_HARD_BLOCKED is illegal here... */
 	if (state != RFKILL_STATE_UNBLOCKED &&
 	    state != RFKILL_STATE_SOFT_BLOCKED)
@@ -435,7 +439,8 @@ static ssize_t rfkill_claim_store(struct device *dev,
 				  const char *buf, size_t count)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	bool claim = !!simple_strtoul(buf, NULL, 0);
+	unsigned long claim_tmp;
+	bool claim;
 	int error;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -444,6 +449,11 @@ static ssize_t rfkill_claim_store(struct device *dev,
 	if (rfkill->user_claim_unsupported)
 		return -EOPNOTSUPP;
 
+	error = strict_strtoul(buf, 0, &claim_tmp);
+	if (error)
+		return error;
+	claim = !!claim_tmp;
+
 	/*
 	 * Take the global lock to make sure the kernel is not in
 	 * the middle of rfkill_switch_all
-- 
cgit v1.2.3-70-g09d2


From 01b510b9c29caf2134c31d2bc8c2c5cc73987eb6 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Tue, 26 Aug 2008 11:57:58 -0300
Subject: rfkill: add missing line break

Trivial patch adding a missing line break on
rfkill_claim_show().

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.co>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 173d039d990..b630f358d67 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -431,7 +431,7 @@ static ssize_t rfkill_claim_show(struct device *dev,
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
-	return sprintf(buf, "%d", rfkill->user_claim);
+	return sprintf(buf, "%d\n", rfkill->user_claim);
 }
 
 static ssize_t rfkill_claim_store(struct device *dev,
-- 
cgit v1.2.3-70-g09d2


From f745ba03a12a1c4b98a88a96ab39d9b58ac677a2 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Tue, 26 Aug 2008 11:57:59 -0300
Subject: rfkill: add WARN and BUG_ON paranoia (v2)

BUG_ON() and WARN() the heck out of buggy drivers calling into the rfkill
subsystem.

Also switch from WARN_ON(1) to the new descriptive WARN().

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 50 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index b630f358d67..910699c4e04 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -76,6 +76,7 @@ static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list);
  */
 int register_rfkill_notifier(struct notifier_block *nb)
 {
+	BUG_ON(!nb);
 	return blocking_notifier_chain_register(&rfkill_notifier_list, nb);
 }
 EXPORT_SYMBOL_GPL(register_rfkill_notifier);
@@ -91,6 +92,7 @@ EXPORT_SYMBOL_GPL(register_rfkill_notifier);
  */
 int unregister_rfkill_notifier(struct notifier_block *nb)
 {
+	BUG_ON(!nb);
 	return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb);
 }
 EXPORT_SYMBOL_GPL(unregister_rfkill_notifier);
@@ -202,6 +204,9 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 		 * RFKILL_STATE_HARD_BLOCKED */
 		break;
 	default:
+		WARN(1, KERN_WARNING
+			"rfkill: illegal state %d passed as parameter "
+			"to rfkill_toggle_radio\n", state);
 		return -EINVAL;
 	}
 
@@ -236,7 +241,11 @@ static void __rfkill_switch_all(const enum rfkill_type type,
 {
 	struct rfkill *rfkill;
 
-	if (unlikely(state >= RFKILL_STATE_MAX))
+	if (WARN((state >= RFKILL_STATE_MAX || type >= RFKILL_TYPE_MAX),
+			KERN_WARNING
+			"rfkill: illegal state %d or type %d "
+			"passed as parameter to __rfkill_switch_all\n",
+			state, type))
 		return;
 
 	rfkill_global_states[type].current_state = state;
@@ -334,7 +343,11 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 {
 	enum rfkill_state oldstate;
 
-	if (unlikely(state >= RFKILL_STATE_MAX))
+	BUG_ON(!rfkill);
+	if (WARN((state >= RFKILL_STATE_MAX),
+			KERN_WARNING
+			"rfkill: illegal state %d passed as parameter "
+			"to rfkill_force_state\n", state))
 		return -EINVAL;
 
 	mutex_lock(&rfkill->mutex);
@@ -593,10 +606,10 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill)
 	memset(seen, 0, sizeof(seen));
 
 	list_for_each_entry(p, &rfkill_list, node) {
-		if (p == rfkill) {
-			WARN_ON(1);
+		if (WARN((p == rfkill), KERN_WARNING
+				"rfkill: illegal attempt to register "
+				"an already registered rfkill struct\n"))
 			return -EEXIST;
-		}
 		set_bit(p->type, seen);
 	}
 
@@ -664,6 +677,12 @@ struct rfkill * __must_check rfkill_allocate(struct device *parent,
 	struct rfkill *rfkill;
 	struct device *dev;
 
+	if (WARN((type >= RFKILL_TYPE_MAX),
+			KERN_WARNING
+			"rfkill: illegal type %d passed as parameter "
+			"to rfkill_allocate\n", type))
+		return NULL;
+
 	rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL);
 	if (!rfkill)
 		return NULL;
@@ -736,11 +755,12 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 	struct device *dev = &rfkill->dev;
 	int error;
 
-	if (!rfkill->toggle_radio)
-		return -EINVAL;
-	if (rfkill->type >= RFKILL_TYPE_MAX)
-		return -EINVAL;
-	if (rfkill->state >= RFKILL_STATE_MAX)
+	if (WARN((!rfkill || !rfkill->toggle_radio ||
+			rfkill->type >= RFKILL_TYPE_MAX ||
+			rfkill->state >= RFKILL_STATE_MAX),
+			KERN_WARNING
+			"rfkill: attempt to register a "
+			"badly initialized rfkill struct\n"))
 		return -EINVAL;
 
 	snprintf(dev->bus_id, sizeof(dev->bus_id),
@@ -775,6 +795,7 @@ EXPORT_SYMBOL(rfkill_register);
  */
 void rfkill_unregister(struct rfkill *rfkill)
 {
+	BUG_ON(!rfkill);
 	device_del(&rfkill->dev);
 	rfkill_remove_switch(rfkill);
 	rfkill_led_trigger_unregister(rfkill);
@@ -811,9 +832,12 @@ int rfkill_set_default(enum rfkill_type type, enum rfkill_state state)
 {
 	int error;
 
-	if (type >= RFKILL_TYPE_MAX ||
-	    (state != RFKILL_STATE_SOFT_BLOCKED &&
-	     state != RFKILL_STATE_UNBLOCKED))
+	if (WARN((type >= RFKILL_TYPE_MAX ||
+			(state != RFKILL_STATE_SOFT_BLOCKED &&
+			 state != RFKILL_STATE_UNBLOCKED)),
+			KERN_WARNING
+			"rfkill: illegal state %d or type %d passed as "
+			"parameter to rfkill_set_default\n", state, type))
 		return -EINVAL;
 
 	mutex_lock(&rfkill_mutex);
-- 
cgit v1.2.3-70-g09d2


From 15635744484d4255778fc641261be27179c51f9a Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Tue, 26 Aug 2008 11:58:00 -0300
Subject: rfkill: rename rfkill_mutex to rfkill_global_mutex

rfkill_mutex and rfkill->mutex are too easy to confuse with each other.

Rename rfkill_mutex to rfkill_global_mutex, so that they are easier to tell
apart with just one glance.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 910699c4e04..d5735799ccd 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("RF switch support");
 MODULE_LICENSE("GPL");
 
 static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
-static DEFINE_MUTEX(rfkill_mutex);
+static DEFINE_MUTEX(rfkill_global_mutex);
 
 static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED;
 module_param_named(default_state, rfkill_default_state, uint, 0444);
@@ -234,7 +234,7 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
  * unless a specific switch is claimed by userspace (in which case,
  * that switch is left alone) or suspended.
  *
- * Caller must have acquired rfkill_mutex.
+ * Caller must have acquired rfkill_global_mutex.
  */
 static void __rfkill_switch_all(const enum rfkill_type type,
 				const enum rfkill_state state)
@@ -263,14 +263,14 @@ static void __rfkill_switch_all(const enum rfkill_type type,
  * @type: type of interfaces to be affected
  * @state: the new state
  *
- * Acquires rfkill_mutex and calls __rfkill_switch_all(@type, @state).
+ * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
  * Please refer to __rfkill_switch_all() for details.
  */
 void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
 {
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
 	__rfkill_switch_all(type, state);
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 }
 EXPORT_SYMBOL(rfkill_switch_all);
 
@@ -278,7 +278,7 @@ EXPORT_SYMBOL(rfkill_switch_all);
  * rfkill_epo - emergency power off all transmitters
  *
  * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
- * ignoring everything in its path but rfkill_mutex and rfkill->mutex.
+ * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex.
  *
  * The global state before the EPO is saved and can be restored later
  * using rfkill_restore_states().
@@ -288,7 +288,8 @@ void rfkill_epo(void)
 	struct rfkill *rfkill;
 	int i;
 
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
+
 	list_for_each_entry(rfkill, &rfkill_list, node) {
 		mutex_lock(&rfkill->mutex);
 		rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
@@ -300,7 +301,7 @@ void rfkill_epo(void)
 		rfkill_global_states[i].current_state =
 				RFKILL_STATE_SOFT_BLOCKED;
 	}
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 }
 EXPORT_SYMBOL_GPL(rfkill_epo);
 
@@ -315,10 +316,11 @@ void rfkill_restore_states(void)
 {
 	int i;
 
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
+
 	for (i = 0; i < RFKILL_TYPE_MAX; i++)
 		__rfkill_switch_all(i, rfkill_global_states[i].default_state);
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 }
 EXPORT_SYMBOL_GPL(rfkill_restore_states);
 
@@ -471,7 +473,7 @@ static ssize_t rfkill_claim_store(struct device *dev,
 	 * Take the global lock to make sure the kernel is not in
 	 * the middle of rfkill_switch_all
 	 */
-	error = mutex_lock_interruptible(&rfkill_mutex);
+	error = mutex_lock_interruptible(&rfkill_global_mutex);
 	if (error)
 		return error;
 
@@ -486,7 +488,7 @@ static ssize_t rfkill_claim_store(struct device *dev,
 		rfkill->user_claim = claim;
 	}
 
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 
 	return error ? error : count;
 }
@@ -621,7 +623,7 @@ static int rfkill_add_switch(struct rfkill *rfkill)
 {
 	int error;
 
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
 
 	error = rfkill_check_duplicity(rfkill);
 	if (error < 0)
@@ -642,16 +644,16 @@ static int rfkill_add_switch(struct rfkill *rfkill)
 
 	error = 0;
 unlock_out:
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 
 	return error;
 }
 
 static void rfkill_remove_switch(struct rfkill *rfkill)
 {
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
 	list_del_init(&rfkill->node);
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 
 	mutex_lock(&rfkill->mutex);
 	rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
@@ -840,7 +842,7 @@ int rfkill_set_default(enum rfkill_type type, enum rfkill_state state)
 			"parameter to rfkill_set_default\n", state, type))
 		return -EINVAL;
 
-	mutex_lock(&rfkill_mutex);
+	mutex_lock(&rfkill_global_mutex);
 
 	if (!test_and_set_bit(type, rfkill_states_lockdflt)) {
 		rfkill_global_states[type].default_state = state;
@@ -848,7 +850,7 @@ int rfkill_set_default(enum rfkill_type type, enum rfkill_state state)
 	} else
 		error = -EPERM;
 
-	mutex_unlock(&rfkill_mutex);
+	mutex_unlock(&rfkill_global_mutex);
 	return error;
 }
 EXPORT_SYMBOL_GPL(rfkill_set_default);
-- 
cgit v1.2.3-70-g09d2


From 3cc76caa98b092a8fb3e7b4303c70f847db0651f Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Fri, 29 Aug 2008 14:06:51 -0700
Subject: ipv6: When we droped a packet, we should return NET_RX_DROP instead
 of 0

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 01d47674f7e..e53e493606c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -377,14 +377,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	    skb_checksum_complete(skb)) {
 		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
-		return 0;
+		return NET_RX_DROP;
 	}
 
 	/* Charge it to the socket. */
 	if (sock_queue_rcv_skb(sk,skb)<0) {
 		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
-		return 0;
+		return NET_RX_DROP;
 	}
 
 	return 0;
@@ -429,7 +429,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 		if (skb_checksum_complete(skb)) {
 			atomic_inc(&sk->sk_drops);
 			kfree_skb(skb);
-			return 0;
+			return NET_RX_DROP;
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 102396ae65108b026e4e1868e30fa013f45a169e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 29 Aug 2008 14:21:52 -0700
Subject: pkt_sched: Fix locking of qdisc_root with qdisc_root_sleeping_lock()

Use qdisc_root_sleeping_lock() instead of qdisc_root_lock() where
appropriate. The only difference is while dev is deactivated, when
currently we can use a sleeping qdisc with the lock of noop_qdisc.
This shouldn't be dangerous since after deactivation root lock could
be used only by gen_estimator code, but looks wrong anyway.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c   | 2 +-
 net/sched/cls_route.c | 2 +-
 net/sched/sch_api.c   | 8 ++++----
 net/sched/sch_cbq.c   | 2 +-
 net/sched/sch_htb.c   | 4 ++--
 net/sched/sch_netem.c | 2 +-
 net/sched/sch_teql.c  | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5cafdd4c801..8eb79e92e94 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -205,7 +205,7 @@ replay:
 		}
 	}
 
-	root_lock = qdisc_root_lock(q);
+	root_lock = qdisc_root_sleeping_lock(q);
 
 	if (tp == NULL) {
 		/* Proto-tcf does not exist, create new one */
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 481260a4f10..e3d8455eebc 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -75,7 +75,7 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
 static inline
 void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
-	spinlock_t *root_lock = qdisc_root_lock(q);
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
 
 	spin_lock_bh(root_lock);
 	memset(head->fastmap, 0, sizeof(head->fastmap));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 506b709510b..1122c952aa9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1169,8 +1169,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
 		goto nla_put_failure;
 
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 qdisc_root_sleeping_lock(q), &d) < 0)
 		goto nla_put_failure;
 
 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1461,8 +1461,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
 		goto nla_put_failure;
 
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-					 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 qdisc_root_sleeping_lock(q), &d) < 0)
 		goto nla_put_failure;
 
 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9b720adedea..8b06fa90048 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1754,7 +1754,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
 
 	if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
-		spinlock_t *root_lock = qdisc_root_lock(sch);
+		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
 		struct cbq_sched_data *q = qdisc_priv(sch);
 
 		spin_lock_bh(root_lock);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 97d4761cc31..d14f02056ae 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1043,7 +1043,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	spinlock_t *root_lock = qdisc_root_lock(sch);
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
@@ -1075,7 +1075,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	spinlock_t *root_lock = qdisc_root_lock(sch);
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fb0294d0b55..3781e55046d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -341,7 +341,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	for (i = 0; i < n; i++)
 		d->table[i] = data[i];
 
-	root_lock = qdisc_root_lock(sch);
+	root_lock = qdisc_root_sleeping_lock(sch);
 
 	spin_lock_bh(root_lock);
 	d = xchg(&q->delay_dist, d);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 2c35c678563..d35ef059abb 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -161,7 +161,7 @@ teql_destroy(struct Qdisc* sch)
 						txq = netdev_get_tx_queue(master->dev, 0);
 						master->slaves = NULL;
 
-						root_lock = qdisc_root_lock(txq->qdisc);
+						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 						spin_lock_bh(root_lock);
 						qdisc_reset(txq->qdisc);
 						spin_unlock_bh(root_lock);
-- 
cgit v1.2.3-70-g09d2


From 27df6f25ff218072e0e879a96beeb398a79cdbc8 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 31 Aug 2008 19:25:49 +0400
Subject: sunrpc: fix possible overrun on read of /proc/sys/sunrpc/transports

Vegard Nossum reported
----------------------
> I noticed that something weird is going on with /proc/sys/sunrpc/transports.
> This file is generated in net/sunrpc/sysctl.c, function proc_do_xprt(). When
> I "cat" this file, I get the expected output:
>    $ cat /proc/sys/sunrpc/transports
>    tcp 1048576
>    udp 32768

> But I think that it does not check the length of the buffer supplied by
> userspace to read(). With my original program, I found that the stack was
> being overwritten by the characters above, even when the length given to
> read() was just 1.

David Wagner added (among other things) that copy_to_user could be
probably used here.

Ingo Oeser suggested to use simple_read_from_buffer() here.

The conclusion is that proc_do_xprt doesn't check for userside buffer
size indeed so fix this by using Ingo's suggestion.

Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
CC: Ingo Oeser <ioe-lkml@rameria.de>
Cc: Neil Brown <neilb@suse.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Greg Banks <gnb@sgi.com>
Cc: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/sysctl.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 0f8c439b848..5231f7aaac0 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -60,24 +60,14 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char tmpbuf[256];
-	int len;
+	size_t len;
+
 	if ((*ppos && !write) || !*lenp) {
 		*lenp = 0;
 		return 0;
 	}
-	if (write)
-		return -EINVAL;
-	else {
-		len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
-		if (!access_ok(VERIFY_WRITE, buffer, len))
-			return -EFAULT;
-
-		if (__copy_to_user(buffer, tmpbuf, len))
-			return -EFAULT;
-	}
-	*lenp -= len;
-	*ppos += len;
-	return 0;
+	len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+	return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
 }
 
 static int
-- 
cgit v1.2.3-70-g09d2


From d9664741e0e2216770d6e52646474d3982b8eb55 Mon Sep 17 00:00:00 2001
From: Florian Mickler <florian@mickler.org>
Date: Tue, 2 Sep 2008 15:26:34 +0200
Subject: net/wireless/Kconfig: clarify the description for
 CONFIG_WIRELESS_EXT_SYSFS

Current setup with hal and NetworkManager will fail to work
without newest hal version with this config option disabled.

Although this will solve itself by time, at the moment it is
dishonest to say that we don't know any software that uses it,
if there are many many people relying on old hal versions.

Signed-off-by: Florian Mickler <florian@mickler.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index ab015c62d56..833b024f8f6 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -39,4 +39,5 @@ config WIRELESS_EXT_SYSFS
 	  files in /sys/class/net/*/wireless/. The same information
 	  is available via the ioctls as well.
 
-	  Say Y if you have programs using it (we don't know of any).
+	  Say Y if you have programs using it, like old versions of
+	  hal.
-- 
cgit v1.2.3-70-g09d2


From 2b58b209399844995ad48e421267e359e16c03db Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Thu, 28 Aug 2008 15:12:06 +0300
Subject: mac80211: Fix debugfs union misuse and pointer corruption

debugfs union in struct ieee80211_sub_if_data is misused by including a
common default_key dentry as a union member. This ends occupying the same
memory area with the first dentry in other union members (structures;
usually drop_unencrypted). Consequently, debugfs operations on
default_key symlinks and drop_unencrypted entry are using the same
dentry pointer even though they are supposed to be separate ones. This
can lead to removing entries incorrectly or potentially leaving
something behind since one of the dentry pointers gets lost.

Fix this by moving the default_key dentry to a new struct
(common_debugfs) that contains dentries (more to be added in future)
that are shared by all vif types. The debugfs union must only be used
for vif type-specific entries to avoid this type of pointer corruption.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_key.c | 6 +++---
 net/mac80211/ieee80211_i.h | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7439b63df5d..cf82acec913 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -265,7 +265,7 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
 	key = sdata->default_key;
 	if (key) {
 		sprintf(buf, "../keys/%d", key->debugfs.cnt);
-		sdata->debugfs.default_key =
+		sdata->common_debugfs.default_key =
 			debugfs_create_symlink("default_key",
 					       sdata->debugfsdir, buf);
 	} else
@@ -277,8 +277,8 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
 	if (!sdata)
 		return;
 
-	debugfs_remove(sdata->debugfs.default_key);
-	sdata->debugfs.default_key = NULL;
+	debugfs_remove(sdata->common_debugfs.default_key);
+	sdata->common_debugfs.default_key = NULL;
 }
 
 void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 586a9b49b0f..4498d871365 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -496,8 +496,10 @@ struct ieee80211_sub_if_data {
 		struct {
 			struct dentry *mode;
 		} monitor;
-		struct dentry *default_key;
 	} debugfs;
+	struct {
+		struct dentry *default_key;
+	} common_debugfs;
 
 #ifdef CONFIG_MAC80211_MESH
 	struct dentry *mesh_stats_dir;
-- 
cgit v1.2.3-70-g09d2


From 9d7d74029e0f5fde3b88b39892b9b9cfdf4ea10a Mon Sep 17 00:00:00 2001
From: Julien Brunel <brunel@diku.dk>
Date: Tue, 2 Sep 2008 17:24:28 -0700
Subject: net/xfrm: Use an IS_ERR test rather than a NULL test

In case of error, the function xfrm_bundle_create returns an ERR
pointer, but never returns a NULL pointer. So a NULL test that comes
after an IS_ERR test should be deleted.

The semantic match that finds this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@match_bad_null_test@
expression x, E;
statement S1,S2;
@@
x =  xfrm_bundle_create(...)
... when != x = E
*  if (x != NULL)
S1 else S2
// </smpl>

Signed-off-by: Julien Brunel <brunel@diku.dk>
Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 841b32a2e68..46914b79d85 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1731,8 +1731,7 @@ restart:
 			 * We can't enlist stable bundles either.
 			 */
 			write_unlock_bh(&policy->lock);
-			if (dst)
-				dst_free(dst);
+			dst_free(dst);
 
 			if (pol_dead)
 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
@@ -1748,8 +1747,7 @@ restart:
 			err = xfrm_dst_update_origin(dst, fl);
 		if (unlikely(err)) {
 			write_unlock_bh(&policy->lock);
-			if (dst)
-				dst_free(dst);
+			dst_free(dst);
 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			goto error;
 		}
-- 
cgit v1.2.3-70-g09d2


From 06770843c2f0f929a6e0c758dc433902a01aabfb Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Tue, 2 Sep 2008 17:28:58 -0700
Subject: ipv: Re-enable IP when MTU > 68

Re-enable IP when the MTU gets back to a valid size.

This patch just checks if the in_dev is NULL on a NETDEV_CHANGEMTU event
and if MTU is valid (bigger than 68), then re-enable in_dev.

Also a function that checks valid MTU size was created.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 91d3d96805d..b12dae2b0b2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1029,6 +1029,11 @@ skip:
 	}
 }
 
+static inline bool inetdev_valid_mtu(unsigned mtu)
+{
+	return mtu >= 68;
+}
+
 /* Called only under RTNL semaphore */
 
 static int inetdev_event(struct notifier_block *this, unsigned long event,
@@ -1048,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
 			}
+		} else if (event == NETDEV_CHANGEMTU) {
+			/* Re-enabling IP */
+			if (inetdev_valid_mtu(dev->mtu))
+				in_dev = inetdev_init(dev);
 		}
 		goto out;
 	}
@@ -1058,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		dev->ip_ptr = NULL;
 		break;
 	case NETDEV_UP:
-		if (dev->mtu < 68)
+		if (!inetdev_valid_mtu(dev->mtu))
 			break;
 		if (dev->flags & IFF_LOOPBACK) {
 			struct in_ifaddr *ifa;
@@ -1080,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		ip_mc_down(in_dev);
 		break;
 	case NETDEV_CHANGEMTU:
-		if (dev->mtu >= 68)
+		if (inetdev_valid_mtu(dev->mtu))
 			break;
-		/* MTU falled under 68, disable IP */
+		/* disable IP when MTU is not enough */
 	case NETDEV_UNREGISTER:
 		inetdev_destroy(in_dev);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 2c10b32bf57db7ec6d4cca4c4aa3d86bacb01c8a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 2 Sep 2008 17:30:27 -0700
Subject: netlink: Remove compat API for nested attributes

Removes all _nested_compat() functions from the API. The prio qdisc
no longer requires them and netem has its own format anyway. Their
existance is only confusing.

Resend: Also remove the wrapper macro.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 82 ---------------------------------------------------
 net/sched/sch_netem.c | 18 +++++++++--
 net/sched/sch_prio.c  |  6 +---
 3 files changed, 17 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 18024b8cecb..76c43ff38f6 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -119,9 +119,6 @@
  * Nested Attributes Construction:
  *   nla_nest_start(skb, type)		start a nested attribute
  *   nla_nest_end(skb, nla)		finalize a nested attribute
- *   nla_nest_compat_start(skb, type,	start a nested compat attribute
- *			   len, data)
- *   nla_nest_compat_end(skb, type)	finalize a nested compat attribute
  *   nla_nest_cancel(skb, nla)		cancel nested attribute construction
  *
  * Attribute Length Calculations:
@@ -156,7 +153,6 @@
  *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
- *   nla_parse_nested_compat()		parse nested compat attributes
  *   nla_for_each_attr()		loop over all attributes
  *   nla_for_each_nested()		loop over the nested attributes
  *=========================================================================
@@ -751,39 +747,6 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
 	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
 }
 
-/**
- * nla_parse_nested_compat - parse nested compat attributes
- * @tb: destination array with maxtype+1 elements
- * @maxtype: maximum attribute type to be expected
- * @nla: attribute containing the nested attributes
- * @data: pointer to point to contained structure
- * @len: length of contained structure
- * @policy: validation policy
- *
- * Parse a nested compat attribute. The compat attribute contains a structure
- * and optionally a set of nested attributes. On success the data pointer
- * points to the nested data and tb contains the parsed attributes
- * (see nla_parse).
- */
-static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
-					    struct nlattr *nla,
-					    const struct nla_policy *policy,
-					    int len)
-{
-	int nested_len = nla_len(nla) - NLA_ALIGN(len);
-
-	if (nested_len < 0)
-		return -EINVAL;
-	if (nested_len >= nla_attr_size(0))
-		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
-				 nested_len, policy);
-	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
-	return 0;
-}
-
-#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \
-({	data = nla_len(nla) >= len ? nla_data(nla) : NULL; \
-	__nla_parse_nested_compat(tb, maxtype, nla, policy, len); })
 /**
  * nla_put_u8 - Add a u8 netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
@@ -1030,51 +993,6 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
 	return skb->len;
 }
 
-/**
- * nla_nest_compat_start - Start a new level of nested compat attributes
- * @skb: socket buffer to add attributes to
- * @attrtype: attribute type of container
- * @attrlen: length of structure
- * @data: pointer to structure
- *
- * Start a nested compat attribute that contains both a structure and
- * a set of nested attributes.
- *
- * Returns the container attribute
- */
-static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb,
-						   int attrtype, int attrlen,
-						   const void *data)
-{
-	struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
-
-	if (nla_put(skb, attrtype, attrlen, data) < 0)
-		return NULL;
-	if (nla_nest_start(skb, attrtype) == NULL) {
-		nlmsg_trim(skb, start);
-		return NULL;
-	}
-	return start;
-}
-
-/**
- * nla_nest_compat_end - Finalize nesting of compat attributes
- * @skb: socket buffer the attributes are stored in
- * @start: container attribute
- *
- * Corrects the container attribute header to include the all
- * appeneded attributes.
- *
- * Returns the total data length of the skb.
- */
-static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
-{
-	struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len);
-
-	start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
-	return nla_nest_end(skb, nest);
-}
-
 /**
  * nla_nest_cancel - Cancel nesting of attributes
  * @skb: socket buffer the message is stored in
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3781e55046d..a11959908d9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
 };
 
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+		      const struct nla_policy *policy, int len)
+{
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len < 0)
+		return -EINVAL;
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy);
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+	return 0;
+}
+
 /* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 {
@@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
-				      qopt, sizeof(*qopt));
+	qopt = nla_data(opt);
+	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
 	if (ret < 0)
 		return ret;
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a6697c686c7..504a78cdb71 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nest;
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
 
-	nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
-	if (nest == NULL)
-		goto nla_put_failure;
-	nla_nest_compat_end(skb, nest);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
 
-- 
cgit v1.2.3-70-g09d2


From 37b08e34a98c664bea86e3fae718ac45a46b7276 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 2 Sep 2008 20:14:15 -0700
Subject: ipsec: Fix deadlock in xfrm_state management.

Ever since commit 4c563f7669c10a12354b72b518c2287ffc6ebfb3
("[XFRM]: Speed up xfrm_policy and xfrm_state walking") it is
illegal to call __xfrm_state_destroy (and thus xfrm_state_put())
with xfrm_state_lock held.  If we do, we'll deadlock since we
have the lock already and __xfrm_state_destroy() tries to take
it again.

Fix this by pushing the xfrm_state_put() calls after the lock
is dropped.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4c6914ef7d9..7bd62f61593 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -780,11 +780,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 {
 	unsigned int h;
 	struct hlist_node *entry;
-	struct xfrm_state *x, *x0;
+	struct xfrm_state *x, *x0, *to_put;
 	int acquire_in_progress = 0;
 	int error = 0;
 	struct xfrm_state *best = NULL;
 
+	to_put = NULL;
+
 	spin_lock_bh(&xfrm_state_lock);
 	h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
@@ -833,7 +835,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
 					      tmpl->id.proto, family)) != NULL) {
-			xfrm_state_put(x0);
+			to_put = x0;
 			error = -EEXIST;
 			goto out;
 		}
@@ -849,7 +851,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
 		if (error) {
 			x->km.state = XFRM_STATE_DEAD;
-			xfrm_state_put(x);
+			to_put = x;
 			x = NULL;
 			goto out;
 		}
@@ -870,7 +872,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			xfrm_hash_grow_check(x->bydst.next != NULL);
 		} else {
 			x->km.state = XFRM_STATE_DEAD;
-			xfrm_state_put(x);
+			to_put = x;
 			x = NULL;
 			error = -ESRCH;
 		}
@@ -881,6 +883,8 @@ out:
 	else
 		*err = acquire_in_progress ? -EAGAIN : error;
 	spin_unlock_bh(&xfrm_state_lock);
+	if (to_put)
+		xfrm_state_put(to_put);
 	return x;
 }
 
@@ -1067,18 +1071,20 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
-	struct xfrm_state *x1;
+	struct xfrm_state *x1, *to_put;
 	int family;
 	int err;
 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	family = x->props.family;
 
+	to_put = NULL;
+
 	spin_lock_bh(&xfrm_state_lock);
 
 	x1 = __xfrm_state_locate(x, use_spi, family);
 	if (x1) {
-		xfrm_state_put(x1);
+		to_put = x1;
 		x1 = NULL;
 		err = -EEXIST;
 		goto out;
@@ -1088,7 +1094,7 @@ int xfrm_state_add(struct xfrm_state *x)
 		x1 = __xfrm_find_acq_byseq(x->km.seq);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
 		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
-			xfrm_state_put(x1);
+			to_put = x1;
 			x1 = NULL;
 		}
 	}
@@ -1110,6 +1116,9 @@ out:
 		xfrm_state_put(x1);
 	}
 
+	if (to_put)
+		xfrm_state_put(to_put);
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_add);
@@ -1269,10 +1278,12 @@ EXPORT_SYMBOL(xfrm_state_migrate);
 
 int xfrm_state_update(struct xfrm_state *x)
 {
-	struct xfrm_state *x1;
+	struct xfrm_state *x1, *to_put;
 	int err;
 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
+	to_put = NULL;
+
 	spin_lock_bh(&xfrm_state_lock);
 	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
 
@@ -1281,7 +1292,7 @@ int xfrm_state_update(struct xfrm_state *x)
 		goto out;
 
 	if (xfrm_state_kern(x1)) {
-		xfrm_state_put(x1);
+		to_put = x1;
 		err = -EEXIST;
 		goto out;
 	}
@@ -1295,6 +1306,9 @@ int xfrm_state_update(struct xfrm_state *x)
 out:
 	spin_unlock_bh(&xfrm_state_lock);
 
+	if (to_put)
+		xfrm_state_put(to_put);
+
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-70-g09d2


From 6c00055a819ce8a6e2c3af2f65d4ea1a8559c491 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 2 Sep 2008 23:38:32 -0700
Subject: tipc: Don't use structure names which easily globally conflict.

Andrew Morton reported a build failure on sparc32, because TIPC
uses names like "struct node" and there is a like named data
structure defined in linux/node.h

This just regexp replaces "struct node*" to "struct tipc_node*"
to avoid this and any future similar problems.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c       | 22 +++++++++---------
 net/tipc/bcast.h       | 22 +++++++++---------
 net/tipc/bearer.c      |  2 +-
 net/tipc/bearer.h      |  2 +-
 net/tipc/cluster.c     | 16 +++++++-------
 net/tipc/cluster.h     | 10 ++++-----
 net/tipc/discover.c    |  2 +-
 net/tipc/link.c        | 26 +++++++++++-----------
 net/tipc/link.h        |  2 +-
 net/tipc/name_table.h  |  2 +-
 net/tipc/net.c         |  2 +-
 net/tipc/net.h         |  2 +-
 net/tipc/node.c        | 60 +++++++++++++++++++++++++-------------------------
 net/tipc/node.h        | 42 +++++++++++++++++------------------
 net/tipc/node_subscr.c |  4 ++--
 net/tipc/node_subscr.h | 10 ++++-----
 net/tipc/port.h        |  2 +-
 net/tipc/zone.c        |  4 ++--
 net/tipc/zone.h        |  2 +-
 19 files changed, 117 insertions(+), 117 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index b1ff16aa4bd..3ddaff42d1b 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -96,8 +96,8 @@ struct bcbearer {
 	struct media media;
 	struct bcbearer_pair bpairs[MAX_BEARERS];
 	struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
-	struct node_map remains;
-	struct node_map remains_new;
+	struct tipc_node_map remains;
+	struct tipc_node_map remains_new;
 };
 
 /**
@@ -110,7 +110,7 @@ struct bcbearer {
 
 struct bclink {
 	struct link link;
-	struct node node;
+	struct tipc_node node;
 };
 
 
@@ -149,7 +149,7 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
  * Called with 'node' locked, bc_lock unlocked
  */
 
-static void bclink_set_gap(struct node *n_ptr)
+static void bclink_set_gap(struct tipc_node *n_ptr)
 {
 	struct sk_buff *buf = n_ptr->bclink.deferred_head;
 
@@ -202,7 +202,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
  * Node is locked, bc_lock unlocked.
  */
 
-void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked)
+void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 {
 	struct sk_buff *crs;
 	struct sk_buff *next;
@@ -250,7 +250,7 @@ void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked)
  * tipc_net_lock and node lock set
  */
 
-static void bclink_send_ack(struct node *n_ptr)
+static void bclink_send_ack(struct tipc_node *n_ptr)
 {
 	struct link *l_ptr = n_ptr->active_links[n_ptr->addr & 1];
 
@@ -264,7 +264,7 @@ static void bclink_send_ack(struct node *n_ptr)
  * tipc_net_lock and node lock set
  */
 
-static void bclink_send_nack(struct node *n_ptr)
+static void bclink_send_nack(struct tipc_node *n_ptr)
 {
 	struct sk_buff *buf;
 	struct tipc_msg *msg;
@@ -308,7 +308,7 @@ static void bclink_send_nack(struct node *n_ptr)
  * tipc_net_lock and node lock set
  */
 
-void tipc_bclink_check_gap(struct node *n_ptr, u32 last_sent)
+void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 last_sent)
 {
 	if (!n_ptr->bclink.supported ||
 	    less_eq(last_sent, mod(n_ptr->bclink.last_in)))
@@ -328,7 +328,7 @@ void tipc_bclink_check_gap(struct node *n_ptr, u32 last_sent)
 
 static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 gap_to)
 {
-	struct node *n_ptr = tipc_node_find(dest);
+	struct tipc_node *n_ptr = tipc_node_find(dest);
 	u32 my_after, my_to;
 
 	if (unlikely(!n_ptr || !tipc_node_is_up(n_ptr)))
@@ -418,7 +418,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 	static int rx_count = 0;
 #endif
 	struct tipc_msg *msg = buf_msg(buf);
-	struct node* node = tipc_node_find(msg_prevnode(msg));
+	struct tipc_node* node = tipc_node_find(msg_prevnode(msg));
 	u32 next_in;
 	u32 seqno;
 	struct sk_buff *deferred;
@@ -538,7 +538,7 @@ u32 tipc_bclink_get_last_sent(void)
 	return last_sent;
 }
 
-u32 tipc_bclink_acks_missing(struct node *n_ptr)
+u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
 {
 	return (n_ptr->bclink.supported &&
 		(tipc_bclink_get_last_sent() != n_ptr->bclink.acked));
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a2416fa6b90..5aa024b99c5 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -41,12 +41,12 @@
 #define WSIZE 32
 
 /**
- * struct node_map - set of node identifiers
+ * struct tipc_node_map - set of node identifiers
  * @count: # of nodes in set
  * @map: bitmap of node identifiers that are in the set
  */
 
-struct node_map {
+struct tipc_node_map {
 	u32 count;
 	u32 map[MAX_NODES / WSIZE];
 };
@@ -68,7 +68,7 @@ struct port_list {
 };
 
 
-struct node;
+struct tipc_node;
 
 extern char tipc_bclink_name[];
 
@@ -77,7 +77,7 @@ extern char tipc_bclink_name[];
  * nmap_add - add a node to a node map
  */
 
-static inline void tipc_nmap_add(struct node_map *nm_ptr, u32 node)
+static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
 {
 	int n = tipc_node(node);
 	int w = n / WSIZE;
@@ -93,7 +93,7 @@ static inline void tipc_nmap_add(struct node_map *nm_ptr, u32 node)
  * nmap_remove - remove a node from a node map
  */
 
-static inline void tipc_nmap_remove(struct node_map *nm_ptr, u32 node)
+static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
 {
 	int n = tipc_node(node);
 	int w = n / WSIZE;
@@ -109,7 +109,7 @@ static inline void tipc_nmap_remove(struct node_map *nm_ptr, u32 node)
  * nmap_equal - test for equality of node maps
  */
 
-static inline int tipc_nmap_equal(struct node_map *nm_a, struct node_map *nm_b)
+static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b)
 {
 	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
 }
@@ -121,8 +121,8 @@ static inline int tipc_nmap_equal(struct node_map *nm_a, struct node_map *nm_b)
  * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
  */
 
-static inline void tipc_nmap_diff(struct node_map *nm_a, struct node_map *nm_b,
-				  struct node_map *nm_diff)
+static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
+				  struct tipc_node_map *nm_diff)
 {
 	int stop = sizeof(nm_a->map) / sizeof(u32);
 	int w;
@@ -195,12 +195,12 @@ static inline void tipc_port_list_free(struct port_list *pl_ptr)
 
 int  tipc_bclink_init(void);
 void tipc_bclink_stop(void);
-void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked);
+void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
 int  tipc_bclink_send_msg(struct sk_buff *buf);
 void tipc_bclink_recv_pkt(struct sk_buff *buf);
 u32  tipc_bclink_get_last_sent(void);
-u32  tipc_bclink_acks_missing(struct node *n_ptr);
-void tipc_bclink_check_gap(struct node *n_ptr, u32 seqno);
+u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
+void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 seqno);
 int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(void);
 int  tipc_bclink_set_queue_limits(u32 limit);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6a9aba3edd0..a7a36779b9b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -599,7 +599,7 @@ int tipc_block_bearer(const char *name)
 	spin_lock_bh(&b_ptr->publ.lock);
 	b_ptr->publ.blocked = 1;
 	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
-		struct node *n_ptr = l_ptr->owner;
+		struct tipc_node *n_ptr = l_ptr->owner;
 
 		spin_lock_bh(&n_ptr->lock);
 		tipc_link_reset(l_ptr);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6a36b6600e6..ca573489271 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -104,7 +104,7 @@ struct bearer {
 	u32 continue_count;
 	int active;
 	char net_plane;
-	struct node_map nodes;
+	struct tipc_node_map nodes;
 };
 
 struct bearer_name {
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 46ee6c58532..689fdefe9d0 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -48,8 +48,8 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 				u32 lower, u32 upper);
 static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest);
 
-struct node **tipc_local_nodes = NULL;
-struct node_map tipc_cltr_bcast_nodes = {0,{0,}};
+struct tipc_node **tipc_local_nodes = NULL;
+struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
 u32 tipc_highest_allowed_slave = 0;
 
 struct cluster *tipc_cltr_create(u32 addr)
@@ -115,7 +115,7 @@ void tipc_cltr_delete(struct cluster *c_ptr)
 
 u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
 {
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 n_num = tipc_node(addr) + 1;
 
 	if (!c_ptr)
@@ -133,7 +133,7 @@ u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
 	return 0;
 }
 
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr)
+void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 {
 	u32 n_num = tipc_node(n_ptr->addr);
 	u32 max_n_num = tipc_max_nodes;
@@ -196,7 +196,7 @@ u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref)
  * Uses deterministic and fair algorithm.
  */
 
-struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
+struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
 {
 	u32 n_num;
 	u32 mask = tipc_max_nodes;
@@ -379,7 +379,7 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
 	struct cluster *c_ptr;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	unchar *node_table;
 	u32 table_size;
 	u32 router;
@@ -499,7 +499,7 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 			 u32 lower, u32 upper)
 {
 	struct sk_buff *buf_copy;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 n_num;
 	u32 tstop;
 
@@ -534,7 +534,7 @@ void tipc_cltr_broadcast(struct sk_buff *buf)
 {
 	struct sk_buff *buf_copy;
 	struct cluster *c_ptr;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 n_num;
 	u32 tstart;
 	u32 tstop;
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 62df074afae..333efb0b9c4 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -54,24 +54,24 @@
 struct cluster {
 	u32 addr;
 	struct _zone *owner;
-	struct node **nodes;
+	struct tipc_node **nodes;
 	u32 highest_node;
 	u32 highest_slave;
 };
 
 
-extern struct node **tipc_local_nodes;
+extern struct tipc_node **tipc_local_nodes;
 extern u32 tipc_highest_allowed_slave;
-extern struct node_map tipc_cltr_bcast_nodes;
+extern struct tipc_node_map tipc_cltr_bcast_nodes;
 
 void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
 void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest);
-struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector);
+struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector);
 u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref);
 void tipc_cltr_recv_routing_table(struct sk_buff *buf);
 struct cluster *tipc_cltr_create(u32 addr);
 void tipc_cltr_delete(struct cluster *c_ptr);
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr);
+void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
 void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1657f0e795f..74b7d1e28ae 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -193,7 +193,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 		/* Always accept link here */
 		struct sk_buff *rbuf;
 		struct tipc_media_addr *addr;
-		struct node *n_ptr = tipc_node_find(orig);
+		struct tipc_node *n_ptr = tipc_node_find(orig);
 		int link_fully_up;
 
 		dbg(" in own cluster\n");
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d60113ba4b1..dd4c18b9a35 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1155,7 +1155,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
 {
 	struct link *l_ptr;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	int res = -ELINKCONG;
 
 	read_lock_bh(&tipc_net_lock);
@@ -1226,7 +1226,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
 {
 	struct link *l_ptr;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	int res;
 	u32 selector = msg_origport(buf_msg(buf)) & 1;
 	u32 dummy;
@@ -1270,7 +1270,7 @@ int tipc_link_send_sections_fast(struct port *sender,
 	struct tipc_msg *hdr = &sender->publ.phdr;
 	struct link *l_ptr;
 	struct sk_buff *buf;
-	struct node *node;
+	struct tipc_node *node;
 	int res;
 	u32 selector = msg_origport(hdr) & 1;
 
@@ -1364,7 +1364,7 @@ static int link_send_sections_long(struct port *sender,
 				   u32 destaddr)
 {
 	struct link *l_ptr;
-	struct node *node;
+	struct tipc_node *node;
 	struct tipc_msg *hdr = &sender->publ.phdr;
 	u32 dsz = msg_data_sz(hdr);
 	u32 max_pkt,fragm_sz,rest;
@@ -1636,7 +1636,7 @@ void tipc_link_push_queue(struct link *l_ptr)
 
 static void link_reset_all(unsigned long addr)
 {
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	char addr_string[16];
 	u32 i;
 
@@ -1682,7 +1682,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 
 		/* Handle failure on broadcast link */
 
-		struct node *n_ptr;
+		struct tipc_node *n_ptr;
 		char addr_string[16];
 
 		tipc_printf(TIPC_OUTPUT, "Msg seq number: %u,  ", msg_seqno(msg));
@@ -1843,7 +1843,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 	read_lock_bh(&tipc_net_lock);
 	while (head) {
 		struct bearer *b_ptr = (struct bearer *)tb_ptr;
-		struct node *n_ptr;
+		struct tipc_node *n_ptr;
 		struct link *l_ptr;
 		struct sk_buff *crs;
 		struct sk_buff *buf = head;
@@ -2935,7 +2935,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
  * Returns pointer to link (or 0 if invalid link name).
  */
 
-static struct link *link_find_link(const char *name, struct node **node)
+static struct link *link_find_link(const char *name, struct tipc_node **node)
 {
 	struct link_name link_name_parts;
 	struct bearer *b_ptr;
@@ -2965,7 +2965,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
 	struct tipc_link_config *args;
 	u32 new_value;
 	struct link *l_ptr;
-	struct node *node;
+	struct tipc_node *node;
 	int res;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG))
@@ -3043,7 +3043,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
 {
 	char *link_name;
 	struct link *l_ptr;
-	struct node *node;
+	struct tipc_node *node;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -3091,7 +3091,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
 {
 	struct print_buf pb;
 	struct link *l_ptr;
-	struct node *node;
+	struct tipc_node *node;
 	char *status;
 	u32 profile_total = 0;
 
@@ -3207,7 +3207,7 @@ int link_control(const char *name, u32 op, u32 val)
 	int res = -EINVAL;
 	struct link *l_ptr;
 	u32 bearer_id;
-	struct node * node;
+	struct tipc_node * node;
 	u32 a;
 
 	a = link_name2addr(name, &bearer_id);
@@ -3249,7 +3249,7 @@ int link_control(const char *name, u32 op, u32 val)
 
 u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 {
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	struct link *l_ptr;
 	u32 res = MAX_PKT_DEFAULT;
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 52f3e7c1871..6a51e38ad25 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -116,7 +116,7 @@ struct link {
 	char name[TIPC_MAX_LINK_NAME];
 	struct tipc_media_addr media_addr;
 	struct timer_list timer;
-	struct node *owner;
+	struct tipc_node *owner;
 	struct list_head link_list;
 
 	/* Management and link supervision data */
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index b9e7cd336d7..139882d4ed0 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -76,7 +76,7 @@ struct publication {
 	u32 node;
 	u32 ref;
 	u32 key;
-	struct node_subscr subscr;
+	struct tipc_node_subscr subscr;
 	struct list_head local_list;
 	struct list_head pport_list;
 	struct publication *node_list_next;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index ec7b04fbdc4..7906608bf51 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -118,7 +118,7 @@
 DEFINE_RWLOCK(tipc_net_lock);
 struct network tipc_net = { NULL };
 
-struct node *tipc_net_select_remote_node(u32 addr, u32 ref)
+struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
 {
 	return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref);
 }
diff --git a/net/tipc/net.h b/net/tipc/net.h
index d154ac2bda9..de2b9ad8f64 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -55,7 +55,7 @@ extern rwlock_t tipc_net_lock;
 void tipc_net_remove_as_router(u32 router);
 void tipc_net_send_external_routes(u32 dest);
 void tipc_net_route_msg(struct sk_buff *buf);
-struct node *tipc_net_select_remote_node(u32 addr, u32 ref);
+struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref);
 u32 tipc_net_select_router(u32 addr, u32 ref);
 
 int tipc_net_start(u32 addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index ee952ad6021..20d98c56e15 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -46,11 +46,11 @@
 #include "bearer.h"
 #include "name_distr.h"
 
-void node_print(struct print_buf *buf, struct node *n_ptr, char *str);
-static void node_lost_contact(struct node *n_ptr);
-static void node_established_contact(struct node *n_ptr);
+void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
+static void node_lost_contact(struct tipc_node *n_ptr);
+static void node_established_contact(struct tipc_node *n_ptr);
 
-struct node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
+struct tipc_node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
 
 static DEFINE_SPINLOCK(node_create_lock);
 
@@ -66,11 +66,11 @@ u32 tipc_own_tag = 0;
  * but this is a non-trivial change.)
  */
 
-struct node *tipc_node_create(u32 addr)
+struct tipc_node *tipc_node_create(u32 addr)
 {
 	struct cluster *c_ptr;
-	struct node *n_ptr;
-	struct node **curr_node;
+	struct tipc_node *n_ptr;
+	struct tipc_node **curr_node;
 
 	spin_lock_bh(&node_create_lock);
 
@@ -120,7 +120,7 @@ struct node *tipc_node_create(u32 addr)
 	return n_ptr;
 }
 
-void tipc_node_delete(struct node *n_ptr)
+void tipc_node_delete(struct tipc_node *n_ptr)
 {
 	if (!n_ptr)
 		return;
@@ -146,7 +146,7 @@ void tipc_node_delete(struct node *n_ptr)
  * Link becomes active (alone or shared) or standby, depending on its priority.
  */
 
-void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
+void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr)
 {
 	struct link **active = &n_ptr->active_links[0];
 
@@ -180,7 +180,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr)
  * node_select_active_links - select active link
  */
 
-static void node_select_active_links(struct node *n_ptr)
+static void node_select_active_links(struct tipc_node *n_ptr)
 {
 	struct link **active = &n_ptr->active_links[0];
 	u32 i;
@@ -208,7 +208,7 @@ static void node_select_active_links(struct node *n_ptr)
  * tipc_node_link_down - handle loss of link
  */
 
-void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr)
+void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
 {
 	struct link **active;
 
@@ -235,30 +235,30 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr)
 		node_lost_contact(n_ptr);
 }
 
-int tipc_node_has_active_links(struct node *n_ptr)
+int tipc_node_has_active_links(struct tipc_node *n_ptr)
 {
 	return (n_ptr &&
 		((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
 }
 
-int tipc_node_has_redundant_links(struct node *n_ptr)
+int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
 {
 	return (n_ptr->working_links > 1);
 }
 
-static int tipc_node_has_active_routes(struct node *n_ptr)
+static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
 {
 	return (n_ptr && (n_ptr->last_router >= 0));
 }
 
-int tipc_node_is_up(struct node *n_ptr)
+int tipc_node_is_up(struct tipc_node *n_ptr)
 {
 	return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr));
 }
 
-struct node *tipc_node_attach_link(struct link *l_ptr)
+struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 {
-	struct node *n_ptr = tipc_node_find(l_ptr->addr);
+	struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr);
 
 	if (!n_ptr)
 		n_ptr = tipc_node_create(l_ptr->addr);
@@ -285,7 +285,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
 	return NULL;
 }
 
-void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr)
+void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 {
 	n_ptr->links[l_ptr->b_ptr->identity] = NULL;
 	tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
@@ -338,7 +338,7 @@ void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr)
  *
  */
 
-static void node_established_contact(struct node *n_ptr)
+static void node_established_contact(struct tipc_node *n_ptr)
 {
 	struct cluster *c_ptr;
 
@@ -384,10 +384,10 @@ static void node_established_contact(struct node *n_ptr)
 				  tipc_highest_allowed_slave);
 }
 
-static void node_lost_contact(struct node *n_ptr)
+static void node_lost_contact(struct tipc_node *n_ptr)
 {
 	struct cluster *c_ptr;
-	struct node_subscr *ns, *tns;
+	struct tipc_node_subscr *ns, *tns;
 	char addr_string[16];
 	u32 i;
 
@@ -466,9 +466,9 @@ static void node_lost_contact(struct node *n_ptr)
  * Called by when cluster local lookup has failed.
  */
 
-struct node *tipc_node_select_next_hop(u32 addr, u32 selector)
+struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
 {
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 router_addr;
 
 	if (!tipc_addr_domain_valid(addr))
@@ -513,7 +513,7 @@ struct node *tipc_node_select_next_hop(u32 addr, u32 selector)
  * Uses a deterministic and fair algorithm for selecting router node.
  */
 
-u32 tipc_node_select_router(struct node *n_ptr, u32 ref)
+u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref)
 {
 	u32 ulim;
 	u32 mask;
@@ -551,7 +551,7 @@ u32 tipc_node_select_router(struct node *n_ptr, u32 ref)
 	return tipc_addr(own_zone(), own_cluster(), r);
 }
 
-void tipc_node_add_router(struct node *n_ptr, u32 router)
+void tipc_node_add_router(struct tipc_node *n_ptr, u32 router)
 {
 	u32 r_num = tipc_node(router);
 
@@ -562,7 +562,7 @@ void tipc_node_add_router(struct node *n_ptr, u32 router)
 	       !n_ptr->routers[n_ptr->last_router]);
 }
 
-void tipc_node_remove_router(struct node *n_ptr, u32 router)
+void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
 {
 	u32 r_num = tipc_node(router);
 
@@ -580,7 +580,7 @@ void tipc_node_remove_router(struct node *n_ptr, u32 router)
 }
 
 #if 0
-void node_print(struct print_buf *buf, struct node *n_ptr, char *str)
+void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str)
 {
 	u32 i;
 
@@ -597,7 +597,7 @@ void node_print(struct print_buf *buf, struct node *n_ptr, char *str)
 
 u32 tipc_available_nodes(const u32 domain)
 {
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 cnt = 0;
 
 	read_lock_bh(&tipc_net_lock);
@@ -615,7 +615,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 domain;
 	struct sk_buff *buf;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	struct tipc_node_info node_info;
 	u32 payload_size;
 
@@ -667,7 +667,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 domain;
 	struct sk_buff *buf;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	struct tipc_link_info link_info;
 	u32 payload_size;
 
diff --git a/net/tipc/node.h b/net/tipc/node.h
index cd1882654bb..6f990da5d14 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -43,7 +43,7 @@
 #include "bearer.h"
 
 /**
- * struct node - TIPC node structure
+ * struct tipc_node - TIPC node structure
  * @addr: network address of node
  * @lock: spinlock governing access to structure
  * @owner: pointer to cluster that node belongs to
@@ -68,11 +68,11 @@
  *    @defragm: list of partially reassembled b'cast message fragments from node
  */
 
-struct node {
+struct tipc_node {
 	u32 addr;
 	spinlock_t lock;
 	struct cluster *owner;
-	struct node *next;
+	struct tipc_node *next;
 	struct list_head nsub;
 	struct link *active_links[2];
 	struct link *links[MAX_BEARERS];
@@ -94,26 +94,26 @@ struct node {
 	} bclink;
 };
 
-extern struct node *tipc_nodes;
+extern struct tipc_node *tipc_nodes;
 extern u32 tipc_own_tag;
 
-struct node *tipc_node_create(u32 addr);
-void tipc_node_delete(struct node *n_ptr);
-struct node *tipc_node_attach_link(struct link *l_ptr);
-void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr);
-void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr);
-void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr);
-int tipc_node_has_active_links(struct node *n_ptr);
-int tipc_node_has_redundant_links(struct node *n_ptr);
-u32 tipc_node_select_router(struct node *n_ptr, u32 ref);
-struct node *tipc_node_select_next_hop(u32 addr, u32 selector);
-int tipc_node_is_up(struct node *n_ptr);
-void tipc_node_add_router(struct node *n_ptr, u32 router);
-void tipc_node_remove_router(struct node *n_ptr, u32 router);
+struct tipc_node *tipc_node_create(u32 addr);
+void tipc_node_delete(struct tipc_node *n_ptr);
+struct tipc_node *tipc_node_attach_link(struct link *l_ptr);
+void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr);
+void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
+void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
+int tipc_node_has_active_links(struct tipc_node *n_ptr);
+int tipc_node_has_redundant_links(struct tipc_node *n_ptr);
+u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref);
+struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector);
+int tipc_node_is_up(struct tipc_node *n_ptr);
+void tipc_node_add_router(struct tipc_node *n_ptr, u32 router);
+void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router);
 struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
 
-static inline struct node *tipc_node_find(u32 addr)
+static inline struct tipc_node *tipc_node_find(u32 addr)
 {
 	if (likely(in_own_cluster(addr)))
 		return tipc_local_nodes[tipc_node(addr)];
@@ -126,19 +126,19 @@ static inline struct node *tipc_node_find(u32 addr)
 	return NULL;
 }
 
-static inline struct node *tipc_node_select(u32 addr, u32 selector)
+static inline struct tipc_node *tipc_node_select(u32 addr, u32 selector)
 {
 	if (likely(in_own_cluster(addr)))
 		return tipc_local_nodes[tipc_node(addr)];
 	return tipc_node_select_next_hop(addr, selector);
 }
 
-static inline void tipc_node_lock(struct node *n_ptr)
+static inline void tipc_node_lock(struct tipc_node *n_ptr)
 {
 	spin_lock_bh(&n_ptr->lock);
 }
 
-static inline void tipc_node_unlock(struct node *n_ptr)
+static inline void tipc_node_unlock(struct tipc_node *n_ptr)
 {
 	spin_unlock_bh(&n_ptr->lock);
 }
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 8ecbd0fb610..19194d476a9 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -44,7 +44,7 @@
  * tipc_nodesub_subscribe - create "node down" subscription for specified node
  */
 
-void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
+void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
 		       void *usr_handle, net_ev_handler handle_down)
 {
 	if (addr == tipc_own_addr) {
@@ -69,7 +69,7 @@ void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
  * tipc_nodesub_unsubscribe - cancel "node down" subscription (if any)
  */
 
-void tipc_nodesub_unsubscribe(struct node_subscr *node_sub)
+void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
 {
 	if (!node_sub->node)
 		return;
diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
index 5f3f5859b84..006ed739f51 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/node_subscr.h
@@ -42,22 +42,22 @@
 typedef void (*net_ev_handler) (void *usr_handle);
 
 /**
- * struct node_subscr - "node down" subscription entry
+ * struct tipc_node_subscr - "node down" subscription entry
  * @node: ptr to node structure of interest (or NULL, if none)
  * @handle_node_down: routine to invoke when node fails
  * @usr_handle: argument to pass to routine when node fails
  * @nodesub_list: adjacent entries in list of subscriptions for the node
  */
 
-struct node_subscr {
-	struct node *node;
+struct tipc_node_subscr {
+	struct tipc_node *node;
 	net_ev_handler handle_node_down;
 	void *usr_handle;
 	struct list_head nodesub_list;
 };
 
-void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr,
+void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
 			    void *usr_handle, net_ev_handler handle_down);
-void tipc_nodesub_unsubscribe(struct node_subscr *node_sub);
+void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
 
 #endif
diff --git a/net/tipc/port.h b/net/tipc/port.h
index e5f8c16429b..ff31ee4a1dc 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -105,7 +105,7 @@ struct port {
 	u32 probing_interval;
 	u32 last_in_seqno;
 	struct timer_list timer;
-	struct node_subscr subscription;
+	struct tipc_node_subscr subscription;
 };
 
 extern spinlock_t tipc_port_list_lock;
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 3506f856344..2c01ba2d86b 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -111,10 +111,10 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest)
 	}
 }
 
-struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref)
+struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref)
 {
 	struct cluster *c_ptr;
-	struct node *n_ptr;
+	struct tipc_node *n_ptr;
 	u32 c_num;
 
 	if (!z_ptr)
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
index 6e7a08df8af..7bdc3406ba9 100644
--- a/net/tipc/zone.h
+++ b/net/tipc/zone.h
@@ -54,7 +54,7 @@ struct _zone {
 	u32 links;
 };
 
-struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref);
+struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref);
 u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref);
 void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router);
 void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
-- 
cgit v1.2.3-70-g09d2


From ba1a6c7bc0ff33e405f5156dc8f4145437255f1f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Always generate a Reset in response to option errors

RFC4340 states that if a packet is received with an option error (such as a
Mandatory Option as the last byte of the option list), the endpoint should
repond with a Reset.

In the LISTEN and RESPOND states, the endpoint correctly reponds with Reset,
while in the REQUEST/OPEN states, packets with option errors are just ignored.

The packet sequence is as follows:

Case 1:

  Endpoint A                           Endpoint B
  (CLOSED)                             (CLOSED)

               <----------------       REQUEST

  RESPONSE     ----------------->      (*1)
  (with invalid option)
               <----------------       RESET
                                       (with Reset Code 5, "Option Error")

  (*1) currently just ignored, no Reset is sent

Case 2:

  Endpoint A                           Endpoint B
  (OPEN)                               (OPEN)

  DATA-ACK     ----------------->      (*2)
  (with invalid option)
               <----------------       RESET
                                       (with Reset Code 5, "Option Error")

  (*2) currently just ignored, no Reset is sent

This patch fixes the problem, by generating a Reset instead of silently
ignoring option errors.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 803933ab396..779d0ed9ae9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -370,7 +370,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	if (dccp_parse_options(sk, NULL, skb))
-		goto discard;
+		return 1;
 
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
@@ -610,7 +610,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		 * Step 8: Process options and mark acknowledgeable
 		 */
 		if (dccp_parse_options(sk, NULL, skb))
-			goto discard;
+			return 1;
 
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
-- 
cgit v1.2.3-70-g09d2


From faf61c3319ea336ed47acd6ca86faaaa3a8f4937 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Silently ignore options with nonsensical lengths

This updates the option-parsing code with regard to RFC 4340, 5.8:
 "[..] options with nonsensical lengths (length byte less than two or more
  than the remaining space in the options portion of the header) MUST be
  ignored, and any option space following an option with nonsensical length
  MUST likewise be ignored."

Hence in the following cases erratic options will be ignored:
 1. The type byte of a multi-byte option is the last byte of the header
    options (i.e. effective option length of 1).
 2. The value of the length byte is less than the minimum 2. This has been
    changed from previously 3: although no multi-byte option with a length
    less than 3 yet exists (cf. table 3 in 5.8), a length of 2 is valid.
    (The switch-statement in dccp_parse has further per-option length checks.)
 3. The option length exceeds the length of the remaining option space.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/options.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index dc7c158a2f4..4284f085604 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		/* Check if this isn't a single byte option */
 		if (opt > DCCPO_MAX_RESERVED) {
 			if (opt_ptr == opt_end)
-				goto out_invalid_option;
+				goto out_nonsensical_length;
 
 			len = *opt_ptr++;
-			if (len < 3)
-				goto out_invalid_option;
+			if (len < 2)
+				goto out_nonsensical_length;
 			/*
 			 * Remove the type and len fields, leaving
 			 * just the value size
@@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			opt_ptr += len;
 
 			if (opt_ptr > opt_end)
-				goto out_invalid_option;
+				goto out_nonsensical_length;
 		}
 
 		/*
@@ -283,6 +283,8 @@ ignore_option:
 	if (mandatory)
 		goto out_invalid_option;
 
+out_nonsensical_length:
+	/* RFC 4340, 5.8: ignore option and all remaining option space */
 	return 0;
 
 out_invalid_option:
-- 
cgit v1.2.3-70-g09d2


From eac7726bf5cd24440d84b166e0813668d1bf3224 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Fill in the Data fields for "Option Error" Resets

This updates the use of the `out_invalid_option' label, which produces a
Reset (code 5, "Option Error"), to fill in the  Data1...Data3 fields as
specified in RFC 4340, 5.6.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/options.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4284f085604..0809b63cb05 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -291,6 +291,9 @@ out_invalid_option:
 	DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
 	DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
+	DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
+	DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
+	DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
 	return -1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 48816322ad4d9ce195aaddd10f0ce98c944af193 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Empty the write queue when disconnecting

dccp_disconnect() can be called due to several reasons:

 1. when the connection setup failed (inet_stream_connect());
 2. when shutting down (inet_shutdown(), inet_csk_listen_stop());
 3. when aborting the connection (dccp_close() with 0 linger time).

In case (1) the write queue is empty. This patch empties the write queue,
if in case (2) or (3) it was not yet empty.

This avoids triggering the write-queue BUG_TRAP in sk_stream_kill_queues()
later on.

It also seems natural to do: when breaking an association, to delete all
packets that were originally intended for the soon-disconnected end (compare
with call to tcp_write_queue_purge in tcp_disconnect()).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/proto.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1ca3b26eed0..ae66473b11f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -309,7 +309,9 @@ int dccp_disconnect(struct sock *sk, int flags)
 		sk->sk_err = ECONNRESET;
 
 	dccp_clear_xmit_timers(sk);
+
 	__skb_queue_purge(&sk->sk_receive_queue);
+	__skb_queue_purge(&sk->sk_write_queue);
 	if (sk->sk_send_head != NULL) {
 		__kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
-- 
cgit v1.2.3-70-g09d2


From 432649916b0435b608fb3e1fcb97347ac294d38d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp: Toggle debug output without module unloading

This sets the sysfs permissions so that root can toggle the `debug'
parameter available for nearly every DCCP module. This is useful
since there are various module inter-dependencies. The debug flag
can now be toggled at runtime using

  echo 1 > /sys/module/dccp/parameters/dccp_debug
  echo 1 > /sys/module/dccp_ccid2/parameters/ccid2_debug
  echo 1 > /sys/module/dccp_ccid3/parameters/ccid3_debug
  echo 1 > /sys/module/dccp_tfrc_lib/parameters/tfrc_debug

The last is not very useful yet, since no code at the moment calls
the tfrc_debug() macro.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c    | 2 +-
 net/dccp/ccids/ccid3.c    | 2 +-
 net/dccp/ccids/lib/tfrc.c | 2 +-
 net/dccp/proto.c          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 8e958087421..9a430734530 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = {
 };
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-module_param(ccid2_debug, bool, 0444);
+module_param(ccid2_debug, bool, 0644);
 MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f6756e0c9e6..3b8bd7ca676 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -963,7 +963,7 @@ static struct ccid_operations ccid3 = {
 };
 
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-module_param(ccid3_debug, bool, 0444);
+module_param(ccid3_debug, bool, 0644);
 MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 97ecec0a8e7..185916218e0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -10,7 +10,7 @@
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
 int tfrc_debug;
-module_param(tfrc_debug, bool, 0444);
+module_param(tfrc_debug, bool, 0644);
 MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
 #endif
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ae66473b11f..d0bd3481976 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1030,7 +1030,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 int dccp_debug;
-module_param(dccp_debug, bool, 0444);
+module_param(dccp_debug, bool, 0644);
 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
 
 EXPORT_SYMBOL_GPL(dccp_debug);
-- 
cgit v1.2.3-70-g09d2


From 959fd992f05b7468bf30d759ac0c9fd0ef0fa80b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 23 Aug 2008 13:28:27 +0200
Subject: dccp ccid-3: Replace lazy BUG_ON with condition

The BUG_ON(w_tot == 0) only holds if there is no more than 1 loss interval in
the loss history. If there is only a single loss interval, the calc_i_mean()
routine need in fact not be called (RFC 3448, 6.3.1).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/loss_interval.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index bcd6ac415bb..5b3ce0688c5 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 	u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
 	int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
 
-	for (i=0; i <= k; i++) {
+	if (k <= 0)
+		return;
+
+	for (i = 0; i <= k; i++) {
 		i_i = tfrc_lh_get_interval(lh, i);
 
 		if (i < k) {
@@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 			i_tot1 += i_i * tfrc_lh_weights[i-1];
 	}
 
-	BUG_ON(w_tot == 0);
 	lh->i_mean = max(i_tot0, i_tot1) / w_tot;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5c7c9451f1f422b69bf0e161e471dd3976ecd408 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Basic data structure for feature negotiation

This patch prepares for the new and extended feature-negotiation routines.

The following feature-negotiation data structures are provided:
	* a container for the various (SP or NN) values,
	* symbolic state names to track feature states,
	* an entry struct which holds all current information together,
	* elementary functions to fill in and process these structures.

Entry structs are arranged as FIFO for the following reason: RFC 4340 specifies
that if multiple options of the same type are present, they are processed in the
order of their appearance in the packet; which means that this order needs to be
preserved in the local data structure (the later insertion code also respects
this order).

The struct list_head has been chosen for the following reasons: the most
frequent operations are
 * add new entry at tail (when receiving Change or setting socket options);
 * delete entry (when Confirm has been received);
 * deep copy of entire list (cloning from listening socket onto request socket).

The NN value has been set to 64 bit, which is a currently sufficient upper limit
(Sequence Window feature has 48 bit).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c | 14 ++++++++++++++
 net/dccp/feat.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 933a0ecf8d4..94a81b8e5ef 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -23,6 +23,20 @@
 
 #define DCCP_FEAT_SP_NOAGREE (-123)
 
+/* copy constructor, fval must not already contain allocated memory */
+static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
+{
+	fval->sp.len = len;
+	if (fval->sp.len > 0) {
+		fval->sp.vec = kmemdup(val, len, gfp_any());
+		if (fval->sp.vec == NULL) {
+			fval->sp.len = 0;
+			return -ENOBUFS;
+		}
+	}
+	return 0;
+}
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e272222c7ac..94203c230c6 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -14,6 +14,66 @@
 #include <linux/types.h>
 #include "dccp.h"
 
+enum dccp_feat_type {
+	FEAT_AT_RX   = 1,	/* located at RX side of half-connection  */
+	FEAT_AT_TX   = 2,	/* located at TX side of half-connection  */
+	FEAT_SP      = 4,	/* server-priority reconciliation (6.3.1) */
+	FEAT_NN	     = 8,	/* non-negotiable reconciliation (6.3.2)  */
+	FEAT_UNKNOWN = 0xFF	/* not understood or invalid feature	  */
+};
+
+enum dccp_feat_state {
+	FEAT_DEFAULT = 0,	/* using default values from 6.4 */
+	FEAT_INITIALISING,	/* feature is being initialised  */
+	FEAT_CHANGING,		/* Change sent but not confirmed yet */
+	FEAT_UNSTABLE,		/* local modification in state CHANGING */
+	FEAT_STABLE		/* both ends (think they) agree */
+};
+
+/**
+ * dccp_feat_val  -  Container for SP or NN feature values
+ * @nn:     single NN value
+ * @sp.vec: single SP value plus optional preference list
+ * @sp.len: length of @sp.vec in bytes
+ */
+typedef union {
+	u64 nn;
+	struct {
+		u8	*vec;
+		u8	len;
+	}   sp;
+} dccp_feat_val;
+
+/**
+ * struct feat_entry  -  Data structure to perform feature negotiation
+ * @feat_num: one of %dccp_feature_numbers
+ * @val: feature's current value (SP features may have preference list)
+ * @state: feature's current state
+ * @needs_mandatory: whether Mandatory options should be sent
+ * @needs_confirm: whether to send a Confirm instead of a Change
+ * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
+ * @is_local: feature location (1) or feature-remote (0)
+ * @node: list pointers, entries arranged in FIFO order
+ */
+struct dccp_feat_entry {
+	u8                      feat_num;
+	dccp_feat_val           val;
+	enum dccp_feat_state    state:8;
+	bool			needs_mandatory:1,
+				needs_confirm:1,
+				empty_confirm:1,
+				is_local:1;
+
+	struct list_head	node;
+};
+
+static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
+{
+	if (entry->needs_confirm)
+		return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
+	return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
+}
+
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern const char *dccp_feat_typename(const u8 type);
 extern const char *dccp_feat_name(const u8 feat);
-- 
cgit v1.2.3-70-g09d2


From b4eec206370b7154dc354dc30f0a3f02ea8468b2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Implement lookup table for feature-negotiation information

A lookup table for feature-negotiation information, extracted from RFC 4340/42,
is provided by this patch. All currently known features can be found in this
table, along with their feature location, their default value, and type.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 include/linux/dccp.h |   9 ++--
 net/dccp/feat.c      | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6080449fbec..3978aff197d 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -176,19 +176,20 @@ enum {
 };
 
 /* DCCP features (RFC 4340 section 6.4) */
-enum {
+enum dccp_feature_numbers {
 	DCCPF_RESERVED = 0,
 	DCCPF_CCID = 1,
-	DCCPF_SHORT_SEQNOS = 2,		/* XXX: not yet implemented */
+	DCCPF_SHORT_SEQNOS = 2,
 	DCCPF_SEQUENCE_WINDOW = 3,
-	DCCPF_ECN_INCAPABLE = 4,	/* XXX: not yet implemented */
+	DCCPF_ECN_INCAPABLE = 4,
 	DCCPF_ACK_RATIO = 5,
 	DCCPF_SEND_ACK_VECTOR = 6,
 	DCCPF_SEND_NDP_COUNT = 7,
 	DCCPF_MIN_CSUM_COVER = 8,
-	DCCPF_DATA_CHECKSUM = 9,	/* XXX: not yet implemented */
+	DCCPF_DATA_CHECKSUM = 9,
 	/* 10-127 reserved */
 	DCCPF_MIN_CCID_SPECIFIC = 128,
+	DCCPF_SEND_LEV_RATE = 192,	/* RFC 4342, sec. 8.4 */
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 94a81b8e5ef..d7468f70544 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -23,6 +23,80 @@
 
 #define DCCP_FEAT_SP_NOAGREE (-123)
 
+static const struct {
+	u8			feat_num;		/* DCCPF_xxx */
+	enum dccp_feat_type	rxtx;			/* RX or TX  */
+	enum dccp_feat_type	reconciliation;		/* SP or NN  */
+	u8			default_value;		/* as in 6.4 */
+/*
+ *    Lookup table for location and type of features (from RFC 4340/4342)
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ *  | Feature                  | Location | Reconc. | Initial |  Section  |
+ *  |                          | RX | TX  | SP | NN |  Value  | Reference |
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ *  | DCCPF_CCID               |    |  X  | X  |    |   2     | 10        |
+ *  | DCCPF_SHORT_SEQNOS       |    |  X  | X  |    |   0     |  7.6.1    |
+ *  | DCCPF_SEQUENCE_WINDOW    |    |  X  |    | X  | 100     |  7.5.2    |
+ *  | DCCPF_ECN_INCAPABLE      | X  |     | X  |    |   0     | 12.1      |
+ *  | DCCPF_ACK_RATIO          |    |  X  |    | X  |   2     | 11.3      |
+ *  | DCCPF_SEND_ACK_VECTOR    | X  |     | X  |    |   0     | 11.5      |
+ *  | DCCPF_SEND_NDP_COUNT     |    |  X  | X  |    |   0     |  7.7.2    |
+ *  | DCCPF_MIN_CSUM_COVER     | X  |     | X  |    |   0     |  9.2.1    |
+ *  | DCCPF_DATA_CHECKSUM      | X  |     | X  |    |   0     |  9.3.1    |
+ *  | DCCPF_SEND_LEV_RATE      | X  |     | X  |    |   0     | 4342/8.4  |
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ */
+} dccp_feat_table[] = {
+	{ DCCPF_CCID,		 FEAT_AT_TX, FEAT_SP, 2 },
+	{ DCCPF_SHORT_SEQNOS,	 FEAT_AT_TX, FEAT_SP, 0 },
+	{ DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100 },
+	{ DCCPF_ECN_INCAPABLE,	 FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_ACK_RATIO,	 FEAT_AT_TX, FEAT_NN, 2 },
+	{ DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_SEND_NDP_COUNT,  FEAT_AT_TX, FEAT_SP, 0 },
+	{ DCCPF_MIN_CSUM_COVER,  FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_DATA_CHECKSUM,	 FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_SEND_LEV_RATE,	 FEAT_AT_RX, FEAT_SP, 0 },
+};
+#define DCCP_FEAT_SUPPORTED_MAX		ARRAY_SIZE(dccp_feat_table)
+
+/**
+ * dccp_feat_index  -  Hash function to map feature number into array position
+ * Returns consecutive array index or -1 if the feature is not understood.
+ */
+static int dccp_feat_index(u8 feat_num)
+{
+	/* The first 9 entries are occupied by the types from RFC 4340, 6.4 */
+	if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM)
+		return feat_num - 1;
+
+	/*
+	 * Other features: add cases for new feature types here after adding
+	 * them to the above table.
+	 */
+	switch (feat_num) {
+	case DCCPF_SEND_LEV_RATE:
+			return DCCP_FEAT_SUPPORTED_MAX - 1;
+	}
+	return -1;
+}
+
+static u8 dccp_feat_type(u8 feat_num)
+{
+	int idx = dccp_feat_index(feat_num);
+
+	if (idx < 0)
+		return FEAT_UNKNOWN;
+	return dccp_feat_table[idx].reconciliation;
+}
+
+static int dccp_feat_default_value(u8 feat_num)
+{
+	int idx = dccp_feat_index(feat_num);
+
+	return idx < 0 ? : dccp_feat_table[idx].default_value;
+}
+
 /* copy constructor, fval must not already contain allocated memory */
 static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
 {
@@ -37,6 +111,45 @@ static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
 	return 0;
 }
 
+static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
+{
+	if (unlikely(val == NULL))
+		return;
+	if (dccp_feat_type(feat_num) == FEAT_SP)
+		kfree(val->sp.vec);
+	memset(val, 0, sizeof(*val));
+}
+
+static struct dccp_feat_entry *
+	      dccp_feat_clone_entry(struct dccp_feat_entry const *original)
+{
+	struct dccp_feat_entry *new;
+	u8 type = dccp_feat_type(original->feat_num);
+
+	if (type == FEAT_UNKNOWN)
+		return NULL;
+
+	new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
+	if (new == NULL)
+		return NULL;
+
+	if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
+						      original->val.sp.vec,
+						      original->val.sp.len)) {
+		kfree(new);
+		return NULL;
+	}
+	return new;
+}
+
+static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
+{
+	if (entry != NULL) {
+		dccp_feat_val_destructor(entry->feat_num, &entry->val);
+		kfree(entry);
+	}
+}
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
@@ -653,6 +766,8 @@ const char *dccp_feat_name(const u8 feat)
 	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
 		return feature_names[DCCPF_RESERVED];
 
+	if (feat ==  DCCPF_SEND_LEV_RATE)
+		return "Send Loss Event Rate";
 	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
 		return "CCID-specific";
 
-- 
cgit v1.2.3-70-g09d2


From 3001fc0569651f2d0c3b45adc991351471b0c382 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: List management for new feature negotiation

This adds list fields and list management functions for the new feature
negotiation implementation. The new code is kept in parallel to the old
code, until removed at the end of the patch set.

Thanks to Arnaldo for suggestions to improve the code.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index d7468f70544..2ec2cd11769 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -150,6 +150,135 @@ static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
 	}
 }
 
+/*
+ * List management functions
+ *
+ * Feature negotiation lists rely on and maintain the following invariants:
+ * - each feat_num in the list is known, i.e. we know its type and default value
+ * - each feat_num/is_local combination is unique (old entries are overwritten)
+ * - SP values are always freshly allocated
+ * - list is sorted in increasing order of feature number (faster lookup)
+ */
+static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
+						     u8 feat_num, bool is_local)
+{
+	struct dccp_feat_entry *entry;
+
+	list_for_each_entry(entry, fn_list, node)
+		if (entry->feat_num == feat_num && entry->is_local == is_local)
+			return entry;
+		else if (entry->feat_num > feat_num)
+			break;
+	return NULL;
+}
+
+/**
+ * dccp_feat_entry_new  -  Central list update routine (called by all others)
+ * @head:  list to add to
+ * @feat:  feature number
+ * @local: whether the local (1) or remote feature with number @feat is meant
+ * This is the only constructor and serves to ensure the above invariants.
+ */
+static struct dccp_feat_entry *
+	      dccp_feat_entry_new(struct list_head *head, u8 feat, bool local)
+{
+	struct dccp_feat_entry *entry;
+
+	list_for_each_entry(entry, head, node)
+		if (entry->feat_num == feat && entry->is_local == local) {
+			dccp_feat_val_destructor(entry->feat_num, &entry->val);
+			return entry;
+		} else if (entry->feat_num > feat) {
+			head = &entry->node;
+			break;
+		}
+
+	entry = kmalloc(sizeof(*entry), gfp_any());
+	if (entry != NULL) {
+		entry->feat_num = feat;
+		entry->is_local = local;
+		list_add_tail(&entry->node, head);
+	}
+	return entry;
+}
+
+/**
+ * dccp_feat_push_change  -  Add/overwrite a Change option in the list
+ * @fn_list: feature-negotiation list to update
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @needs_mandatory: whether to use Mandatory feature negotiation options
+ * @fval: pointer to NN/SP value to be inserted (will be copied)
+ */
+static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
+				 u8 mandatory, dccp_feat_val *fval)
+{
+	struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
+
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->feat_num	     = feat;
+	new->is_local	     = local;
+	new->state	     = FEAT_INITIALISING;
+	new->needs_confirm   = 0;
+	new->empty_confirm   = 0;
+	new->val	     = *fval;
+	new->needs_mandatory = mandatory;
+
+	return 0;
+}
+
+/**
+ * dccp_feat_push_confirm  -  Add a Confirm entry to the FN list
+ * @fn_list: feature-negotiation list to add to
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is being confirmed
+ * @fval: pointer to NN/SP value to be inserted or NULL
+ * Returns 0 on success, a Reset code for further processing otherwise.
+ */
+static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
+				  dccp_feat_val *fval)
+{
+	struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
+
+	if (new == NULL)
+		return DCCP_RESET_CODE_TOO_BUSY;
+
+	new->feat_num	     = feat;
+	new->is_local	     = local;
+	new->state	     = FEAT_STABLE;	/* transition in 6.6.2 */
+	new->needs_confirm   = 1;
+	new->empty_confirm   = (fval == NULL);
+	new->val.nn	     = 0;		/* zeroes the whole structure */
+	if (!new->empty_confirm)
+		new->val     = *fval;
+	new->needs_mandatory = 0;
+
+	return 0;
+}
+
+static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local)
+{
+	return dccp_feat_push_confirm(fn_list, feat, local, NULL);
+}
+
+static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry)
+{
+	list_del(&entry->node);
+	dccp_feat_entry_destructor(entry);
+}
+
+void dccp_feat_list_purge(struct list_head *fn_list)
+{
+	struct dccp_feat_entry *entry, *next;
+
+	list_for_each_entry_safe(entry, next, fn_list, node)
+		dccp_feat_entry_destructor(entry);
+	INIT_LIST_HEAD(fn_list);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
-- 
cgit v1.2.3-70-g09d2


From 828755cee087e4a34f45d6c9db661ccd0631cc6d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Per-socket initialisation of feature negotiation

This provides feature-negotiation initialisation for both DCCP sockets and
DCCP request_sockets, to support feature negotiation during connection setup.

It also resolves a FIXME regarding the congestion control initialisation.

Thanks to Wei Yongjun for help with the IPv6 side of this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 include/linux/dccp.h |  4 ++++
 net/dccp/dccp.h      |  3 ++-
 net/dccp/feat.c      | 19 +++++++++++++++++++
 net/dccp/feat.h      |  1 +
 net/dccp/input.c     |  2 --
 net/dccp/ipv4.c      |  3 ++-
 net/dccp/ipv6.c      |  3 ++-
 net/dccp/minisocks.c |  7 ++++++-
 net/dccp/proto.c     |  1 +
 9 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 3978aff197d..484b8a1fb02 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk);
  * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1)
  * @dreq_isr: initial sequence number received on the Request
  * @dreq_service: service code present on the Request (there is just one)
+ * @dreq_featneg: feature negotiation options for this connection
  * The following two fields are analogous to the ones in dccp_sock:
  * @dreq_timestamp_echo: last received timestamp to echo (13.1)
  * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
@@ -421,6 +422,7 @@ struct dccp_request_sock {
 	__u64			 dreq_iss;
 	__u64			 dreq_isr;
 	__be32			 dreq_service;
+	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
 };
@@ -498,6 +500,7 @@ struct dccp_ackvec;
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
  * @dccps_minisock - associated minisock (accessed via dccp_msk)
+ * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
@@ -535,6 +538,7 @@ struct dccp_sock {
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
+	struct list_head		dccps_featneg;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b4bc6e095a0..ab096c06bba 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -252,7 +252,8 @@ extern const char *dccp_state_name(const int state);
 extern void dccp_set_state(struct sock *sk, const int state);
 extern void dccp_done(struct sock *sk);
 
-extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
+extern int  dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+			    struct sk_buff const *skb);
 
 extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 2ec2cd11769..faade82856f 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -279,6 +279,25 @@ void dccp_feat_list_purge(struct list_head *fn_list)
 }
 EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
 
+/* generate @to as full clone of @from - @to must not contain any nodes */
+int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
+{
+	struct dccp_feat_entry *entry, *new;
+
+	INIT_LIST_HEAD(to);
+	list_for_each_entry(entry, from, node) {
+		new = dccp_feat_clone_entry(entry);
+		if (new == NULL)
+			goto cloning_failed;
+		list_add_tail(&new->node, to);
+	}
+	return 0;
+
+cloning_failed:
+	dccp_feat_list_purge(to);
+	return -ENOMEM;
+}
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 94203c230c6..7e953fd0a79 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -95,6 +95,7 @@ extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 				   u8 *val, u8 len);
 extern void dccp_feat_clean(struct dccp_minisock *dmsk);
 extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
+extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
 extern int  dccp_feat_init(struct dccp_minisock *dmsk);
 
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 779d0ed9ae9..3070015edc7 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -590,8 +590,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
 								    skb) < 0)
 				return 1;
-
-			/* FIXME: do congestion control initialization */
 			goto discard;
 		}
 		if (dh->dccph_type == DCCP_PKT_RESET)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 882c5c4de69..0ce84ea8911 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -595,7 +595,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	dccp_reqsk_init(req, skb);
+	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+		goto drop_and_free;
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5e1ee0da2c4..33e8a1ea304 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -424,7 +424,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	dccp_reqsk_init(req, skb);
+	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+		goto drop_and_free;
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b2804e2d1b8..e487133ae07 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -125,6 +125,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
 		newicsk->icsk_rto	    = DCCP_TIMEOUT_INIT;
 
+		INIT_LIST_HEAD(&newdp->dccps_featneg);
 		if (dccp_feat_clone(sk, newsk))
 			goto out_free;
 
@@ -304,7 +305,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
 
-void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+int dccp_reqsk_init(struct request_sock *req,
+		    struct dccp_sock const *dp, struct sk_buff const *skb)
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
@@ -312,6 +314,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 	inet_rsk(req)->acked	  = 0;
 	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
+
+	/* inherit feature negotiation options from listening socket */
+	return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
 }
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d0bd3481976..1cdf4ae9960 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -193,6 +193,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 
 	dccp_init_xmit_timers(sk);
 
+	INIT_LIST_HEAD(&dp->dccps_featneg);
 	/*
 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
 	 * the listening (master) sock get CCID control blocks, which is not
-- 
cgit v1.2.3-70-g09d2


From 702083839b607f390dbed5d2304eb8fc5f4c85ac Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Cleanup routines for feature negotiation

This inserts the required de-allocation routines for memory allocated by
feature negotiation in the socket destructors, replacing dccp_feat_clean()
in one instance.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h  | 2 ++
 net/dccp/ipv4.c  | 1 +
 net/dccp/ipv6.c  | 1 +
 net/dccp/proto.c | 2 +-
 4 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ab096c06bba..dee4a90886d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -442,6 +442,8 @@ static inline int dccp_ack_pending(const struct sock *sk)
 	       inet_csk_ack_scheduled(sk);
 }
 
+extern void dccp_feat_list_purge(struct list_head *fn_list);
+
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
 extern int dccp_insert_option_elapsed_time(struct sock *sk,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0ce84ea8911..b623f6b2548 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -545,6 +545,7 @@ out:
 
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
+	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
 	kfree(inet_rsk(req)->opt);
 }
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 33e8a1ea304..ad6212e0043 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -302,6 +302,7 @@ done:
 
 static void dccp_v6_reqsk_destructor(struct request_sock *req)
 {
+	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
 	if (inet6_rsk(req)->pktopts != NULL)
 		kfree_skb(inet6_rsk(req)->pktopts);
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1cdf4ae9960..dafcefd8659 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -268,7 +268,7 @@ void dccp_destroy_sock(struct sock *sk)
 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 
 	/* clean up feature negotiation state */
-	dccp_feat_clean(dmsk);
+	dccp_feat_list_purge(&dp->dccps_featneg);
 }
 
 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
-- 
cgit v1.2.3-70-g09d2


From 5591d286281fdfb57914f5fad3ca001d44ce8fc6 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Limit feature negotiation to connection setup phase

This patch starts the new implementation of feature negotiation:
 1. Although it is theoretically possible to perform feature negotiation at any
    time (and RFC 4340 supports this), in practice this is prohibitively complex,
    as it requires to put traffic on hold for each new negotiation.
 2. As a byproduct of restricting feature negotiation to connection setup, the
    feature-negotiation retransmit timer is no longer required. This part is now
    mapped onto the protocol-level retransmission.
    Details indicating why timers are no longer needed can be found on
    http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/feature_negotiation/\
	                                      implementation_notes.html

This patch disables anytime negotiation, subsequent patches work out full
feature negotiation support for connection setup.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/feat.c    | 19 ++++++++-----------
 net/dccp/options.c | 18 ------------------
 net/dccp/timer.c   | 12 ------------
 3 files changed, 8 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index faade82856f..77ce2f6b031 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -6,6 +6,8 @@
  *
  *  ASSUMPTIONS
  *  -----------
+ *  o Feature negotiation is coordinated with connection setup (as in TCP), wild
+ *    changes of parameters of an established connection are not supported.
  *  o All currently known SP features have 1-byte quantities. If in the future
  *    extensions of RFCs 4340..42 define features with item lengths larger than
  *    one byte, a feature-specific extension of the code will be required.
@@ -652,6 +654,9 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 {
 	int rc;
 
+	/* Ignore Change requests other than during connection setup */
+	if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING)
+		return 0;
 	dccp_feat_debug(type, feature, *val);
 
 	/* figure out if it's SP or NN feature */
@@ -701,6 +706,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 	int found = 0;
 	int all_confirmed = 1;
 
+	/* Ignore Confirm options other than during connection setup */
+	if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING)
+		return 0;
 	dccp_feat_debug(type, feature, *val);
 
 	/* locate our change request */
@@ -735,17 +743,6 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 			all_confirmed = 0;
 	}
 
-	/* fix re-transmit timer */
-	/* XXX gotta make sure that no option negotiation occurs during
-	 * connection shutdown.  Consider that the CLOSEREQ is sent and timer is
-	 * on.  if all options are confirmed it might kill timer which should
-	 * remain alive until close is received.
-	 */
-	if (all_confirmed) {
-		dccp_pr_debug("clear feat negotiation timer %p\n", sk);
-		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
-	}
-
 	if (!found)
 		dccp_pr_debug("%s(%d, ...) never requested\n",
 			      dccp_feat_typename(type), feature);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0809b63cb05..67a171a1268 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -489,7 +489,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
 
 static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_minisock *dmsk = dccp_msk(sk);
 	struct dccp_opt_pend *opt, *next;
 	int change = 0;
@@ -530,23 +529,6 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	/* Retransmit timer.
-	 * If this is the master listening sock, we don't set a timer on it.  It
-	 * should be fine because if the dude doesn't receive our RESPONSE
-	 * [which will contain the CHANGE] he will send another REQUEST which
-	 * will "retrnasmit" the change.
-	 */
-	if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
-		dccp_pr_debug("reset feat negotiation timer %p\n", sk);
-
-		/* XXX don't reset the timer on re-transmissions.  I.e. reset it
-		 * only when sending new stuff i guess.  Currently the timer
-		 * never backs off because on re-transmission it just resets it!
-		 */
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-					  inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
-	}
-
 	return 0;
 }
 
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 54b3c7e9e01..162d1e683c3 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	/* retransmit timer is used for feature negotiation throughout
-	 * connection.  In this case, no packet is re-transmitted, but rather an
-	 * ack is generated and pending changes are placed into its options.
-	 */
-	if (sk->sk_send_head == NULL) {
-		dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
-		if (sk->sk_state == DCCP_OPEN)
-			dccp_send_ack(sk);
-		goto backoff;
-	}
-
 	/*
 	 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
 	 * sent, no need to retransmit, this sock is dead.
@@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk)
 		return;
 	}
 
-backoff:
 	icsk->icsk_backoff++;
 
 	icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
-- 
cgit v1.2.3-70-g09d2


From 86349c8d9c6892b57aff4549256ab1aa65aed0f0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Registration routines for changing feature values

Two registration routines, for SP and NN features, are provided by this patch,
replacing a previous routine which was used for both feature types.

These are internal-only routines and therefore start with `__feat_register'.

It further exports the known limits of Sequence Window and Ack Ratio as symbolic
constants.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/ccids/ccid2.c |   6 +--
 net/dccp/feat.c        | 123 ++++++++++++++++++++++++++++++++++++++++---------
 net/dccp/feat.h        |  25 +++++++++-
 net/dccp/proto.c       |   2 +-
 4 files changed, 128 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9a430734530..c9ea19a4d85 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,7 +25,7 @@
 /*
  * This implementation should follow RFC 4341
  */
-
+#include "../feat.h"
 #include "../ccid.h"
 #include "../dccp.h"
 #include "ccid2.h"
@@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 		DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
 		val = max_ratio;
 	}
-	if (val > 0xFFFF)		/* RFC 4340, 11.3 */
-		val = 0xFFFF;
+	if (val > DCCPF_ACK_RATIO_MAX)
+		val = DCCPF_ACK_RATIO_MAX;
 
 	if (val == dp->dccps_l_ack_ratio)
 		return;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 77ce2f6b031..b859722fba7 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -300,6 +300,95 @@ cloning_failed:
 	return -ENOMEM;
 }
 
+static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
+{
+	switch (feat_num) {
+	case DCCPF_ACK_RATIO:
+		return val <= DCCPF_ACK_RATIO_MAX;
+	case DCCPF_SEQUENCE_WINDOW:
+		return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX;
+	}
+	return 0;	/* feature unknown - so we can't tell */
+}
+
+/* check that SP values are within the ranges defined in RFC 4340 */
+static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val)
+{
+	switch (feat_num) {
+	case DCCPF_CCID:
+		return val == DCCPC_CCID2 || val == DCCPC_CCID3;
+	/* Type-check Boolean feature values: */
+	case DCCPF_SHORT_SEQNOS:
+	case DCCPF_ECN_INCAPABLE:
+	case DCCPF_SEND_ACK_VECTOR:
+	case DCCPF_SEND_NDP_COUNT:
+	case DCCPF_DATA_CHECKSUM:
+	case DCCPF_SEND_LEV_RATE:
+		return val < 2;
+	case DCCPF_MIN_CSUM_COVER:
+		return val < 16;
+	}
+	return 0;			/* feature unknown */
+}
+
+static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
+{
+	if (sp_list == NULL || sp_len < 1)
+		return 0;
+	while (sp_len--)
+		if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++))
+			return 0;
+	return 1;
+}
+
+/**
+ * __feat_register_nn  -  Register new NN value on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an NN feature from %dccp_feature_numbers
+ * @mandatory: use Mandatory option if 1
+ * @nn_val: value to register (restricted to 4 bytes)
+ * Note that NN features are local by definition (RFC 4340, 6.3.2).
+ */
+static int __feat_register_nn(struct list_head *fn, u8 feat,
+			      u8 mandatory, u64 nn_val)
+{
+	dccp_feat_val fval = { .nn = nn_val };
+
+	if (dccp_feat_type(feat) != FEAT_NN ||
+	    !dccp_feat_is_valid_nn_val(feat, nn_val))
+		return -EINVAL;
+
+	/* Don't bother with default values, they will be activated anyway. */
+	if (nn_val - (u64)dccp_feat_default_value(feat) == 0)
+		return 0;
+
+	return dccp_feat_push_change(fn, feat, 1, mandatory, &fval);
+}
+
+/**
+ * __feat_register_sp  -  Register new SP value/list on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an SP feature from %dccp_feature_numbers
+ * @is_local: whether the local (1) or the remote (0) @feat is meant
+ * @mandatory: use Mandatory option if 1
+ * @sp_val: SP value followed by optional preference list
+ * @sp_len: length of @sp_val in bytes
+ */
+static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
+			      u8 mandatory, u8 const *sp_val, u8 sp_len)
+{
+	dccp_feat_val fval;
+
+	if (dccp_feat_type(feat) != FEAT_SP ||
+	    !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
+		return -EINVAL;
+
+	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
+		return -ENOMEM;
+
+	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+}
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
@@ -836,42 +925,30 @@ out_clean:
 
 EXPORT_SYMBOL_GPL(dccp_feat_clone);
 
-static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
-			    u8 *val, u8 len)
-{
-	int rc = -ENOMEM;
-	u8 *copy = kmemdup(val, len, GFP_KERNEL);
-
-	if (copy != NULL) {
-		rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
-		if (rc)
-			kfree(copy);
-	}
-	return rc;
-}
-
-int dccp_feat_init(struct dccp_minisock *dmsk)
+int dccp_feat_init(struct sock *sk)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
 	int rc;
 
-	INIT_LIST_HEAD(&dmsk->dccpms_pending);
-	INIT_LIST_HEAD(&dmsk->dccpms_conf);
+	INIT_LIST_HEAD(&dmsk->dccpms_pending);	/* XXX no longer used */
+	INIT_LIST_HEAD(&dmsk->dccpms_conf);	/* XXX no longer used */
 
 	/* CCID L */
-	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
-			      &dmsk->dccpms_tx_ccid, 1);
+	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 1, 0,
+				&dmsk->dccpms_tx_ccid, 1);
 	if (rc)
 		goto out;
 
 	/* CCID R */
-	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
-			      &dmsk->dccpms_rx_ccid, 1);
+	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 0, 0,
+				&dmsk->dccpms_rx_ccid, 1);
 	if (rc)
 		goto out;
 
 	/* Ack ratio */
-	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
-			      &dmsk->dccpms_ack_ratio, 1);
+	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
+				dmsk->dccpms_ack_ratio);
 out:
 	return rc;
 }
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 7e953fd0a79..9eefdb43783 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -14,6 +14,15 @@
 #include <linux/types.h>
 #include "dccp.h"
 
+/*
+ * Known limit values
+ */
+/* Ack Ratio takes 2-byte integer values (11.3) */
+#define DCCPF_ACK_RATIO_MAX	0xFFFF
+/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
+#define DCCPF_SEQ_WMIN		32
+#define DCCPF_SEQ_WMAX		0x3FFFFFFFFFFFull
+
 enum dccp_feat_type {
 	FEAT_AT_RX   = 1,	/* located at RX side of half-connection  */
 	FEAT_AT_TX   = 2,	/* located at TX side of half-connection  */
@@ -74,6 +83,20 @@ static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
 	return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
 }
 
+/**
+ * struct ccid_dependency  -  Track changes resulting from choosing a CCID
+ * @dependent_feat: one of %dccp_feature_numbers
+ * @is_local: local (1) or remote (0) @dependent_feat
+ * @is_mandatory: whether presence of @dependent_feat is mission-critical or not
+ * @val: corresponding default value for @dependent_feat (u8 is sufficient here)
+ */
+struct ccid_dependency {
+	u8	dependent_feat;
+	bool	is_local:1,
+		is_mandatory:1;
+	u8	val;
+};
+
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern const char *dccp_feat_typename(const u8 type);
 extern const char *dccp_feat_name(const u8 feat);
@@ -96,6 +119,6 @@ extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 extern void dccp_feat_clean(struct dccp_minisock *dmsk);
 extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
-extern int  dccp_feat_init(struct dccp_minisock *dmsk);
+extern int  dccp_feat_init(struct sock *sk);
 
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index dafcefd8659..01332fe7a99 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -202,7 +202,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	 * setsockopt(CCIDs-I-want/accept). -acme
 	 */
 	if (likely(ctl_sock_initialized)) {
-		int rc = dccp_feat_init(dmsk);
+		int rc = dccp_feat_init(sk);
 
 		if (rc)
 			return rc;
-- 
cgit v1.2.3-70-g09d2


From 71bb49596bbf4e5a3328e1704d18604e822ba181 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Query supported CCIDs

This provides a data structure to record which CCIDs are locally supported
and three accessor functions:
 - a test function for internal use which is used to validate CCID requests
   made by the user;
 - a copy function so that the list can be used for feature-negotiation;
 - documented getsockopt() support so that the user can query capabilities.

The data structure is a table which is filled in at compile-time with the
list of available CCIDs (which in turn depends on the Kconfig choices).

Using the copy function for cloning the list of supported CCIDs is useful for
feature negotiation, since the negotiation is now with the full list of available
CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation
will not fail if the peer requests to use CCID3 instead of CCID2.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 Documentation/networking/dccp.txt |  4 ++++
 include/linux/dccp.h              |  1 +
 net/dccp/ccid.c                   | 48 +++++++++++++++++++++++++++++++++++++++
 net/dccp/ccid.h                   |  5 ++++
 net/dccp/feat.c                   |  4 ++++
 net/dccp/proto.c                  |  2 ++
 6 files changed, 64 insertions(+)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 39131a3c78f..f0aeb20fa63 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,10 @@ can be set before calling bind().
 DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
 size (application payload size) in bytes, see RFC 4340, section 14.
 
+DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
+supported by the endpoint (see include/linux/dccp.h for symbolic constants).
+The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
+
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 484b8a1fb02..d3ac1bde60b 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -209,6 +209,7 @@ struct dccp_so_feat {
 #define DCCP_SOCKOPT_SERVER_TIMEWAIT	6
 #define DCCP_SOCKOPT_SEND_CSCOV		10
 #define DCCP_SOCKOPT_RECV_CSCOV		11
+#define DCCP_SOCKOPT_AVAILABLE_CCIDS	12
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 4809753d12a..f72ca83df55 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,6 +13,13 @@
 
 #include "ccid.h"
 
+static u8 builtin_ccids[] = {
+	DCCPC_CCID2,		/* CCID2 is supported by default */
+#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
+	DCCPC_CCID3,
+#endif
+};
+
 static struct ccid_operations *ccids[CCID_MAX];
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 static atomic_t ccids_lockct = ATOMIC_INIT(0);
@@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
+{
+	u8 i, j, found;
+
+	for (i = 0, found = 0; i < array_len; i++, found = 0) {
+		for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
+			found = (ccid_array[i] == builtin_ccids[j]);
+		if (!found)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * ccid_get_builtin_ccids  -  Provide copy of `builtin' CCID array
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
+{
+	*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
+	if (*ccid_array == NULL)
+		return -ENOBUFS;
+	*array_len = ARRAY_SIZE(builtin_ccids);
+	return 0;
+}
+
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+				    char __user *optval, int __user *optlen)
+{
+	if (len < sizeof(builtin_ccids))
+		return -EINVAL;
+
+	if (put_user(sizeof(builtin_ccids), optlen) ||
+	    copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
+		return -EFAULT;
+	return 0;
+}
+
 int ccid_register(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index fdeae7b5731..259f5469d7d 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -103,6 +103,11 @@ static inline void *ccid_priv(const struct ccid *ccid)
 	return (void *)ccid->ccid_priv;
 }
 
+extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+					  char __user *, int __user *);
+
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index b859722fba7..9399554878c 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -383,6 +383,10 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
 	    !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
 		return -EINVAL;
 
+	/* Avoid negotiating alien CCIDs by only advertising supported ones */
+	if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
+		return -EOPNOTSUPP;
+
 	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
 		return -ENOMEM;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 01332fe7a99..b4b10cbd888 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -649,6 +649,8 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_GET_CUR_MPS:
 		val = dp->dccps_mss_cache;
 		break;
+	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		val = dp->dccps_server_timewait;
 		break;
-- 
cgit v1.2.3-70-g09d2


From 093e1f46cf162913d05e1d4eeb01baa3e297b683 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Resolve dependencies of features on choice of CCID

This provides a missing link in the code chain, as several features implicitly
depend and/or rely on the choice of CCID. Most notably, this is the Send Ack Vector
feature, but also Ack Ratio and Send Loss Event Rate (also taken care of).

For Send Ack Vector, the situation is as follows:
 * since CCID2 mandates the use of Ack Vectors, there is no point in allowing
   endpoints which use CCID2 to disable Ack Vector features such a connection;

 * a peer with a TX CCID of CCID2 will always expect Ack Vectors, and a peer
   with a RX CCID of CCID2 must always send Ack Vectors (RFC 4341, sec. 4);

 * for all other CCIDs, the use of (Send) Ack Vector is optional and thus
   negotiable. However, this implies that the code negotiating the use of Ack
   Vectors also supports it (i.e. is able to supply and to either parse or
   ignore received Ack Vectors). Since this is not the case (CCID-3 has no Ack
   Vector support), the use of Ack Vectors is here disabled, with a comment
   in the source code.

An analogous consideration arises for the Send Loss Event Rate feature,
since the CCID-3 implementation does not support the loss interval options
of RFC 4342. To make such use explicit, corresponding feature-negotiation
options are inserted which signal the use of the loss event rate option,
as it is used by the CCID3 code.

Lastly, the values of the Ack Ratio feature are matched to the choice of CCID.

The patch implements this as a function which is called after the user has
made all other registrations for changing default values of features.

The table is variable-length, the reserved (and hence for feature-negotiation
invalid, confirmed by considering section 19.4 of RFC 4340) feature number `0'
is used to mark the end of the table.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h   |   1 +
 net/dccp/feat.c   | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/output.c |   4 ++
 net/dccp/proto.c  |   3 +
 4 files changed, 168 insertions(+)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index dee4a90886d..1881527bdcd 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -442,6 +442,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 	       inet_csk_ack_scheduled(sk);
 }
 
+extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
 extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 9399554878c..ed9f50b1c34 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -441,6 +441,166 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 
 EXPORT_SYMBOL_GPL(dccp_feat_change);
 
+/*
+ *	Tracking features whose value depend on the choice of CCID
+ *
+ * This is designed with an extension in mind so that a list walk could be done
+ * before activating any features. However, the existing framework was found to
+ * work satisfactorily up until now, the automatic verification is left open.
+ * When adding new CCIDs, add a corresponding dependency table here.
+ */
+static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
+{
+	static const struct ccid_dependency ccid2_dependencies[2][2] = {
+		/*
+		 * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX
+		 * feature and Send Ack Vector is an RX feature, `is_local'
+		 * needs to be reversed.
+		 */
+		{	/* Dependencies of the receiver-side (remote) CCID2 */
+			{
+				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
+				.is_local	= true,
+				.is_mandatory	= true,
+				.val		= 1
+			},
+			{ 0, 0, 0, 0 }
+		},
+		{	/* Dependencies of the sender-side (local) CCID2 */
+			{
+				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
+				.is_local	= false,
+				.is_mandatory	= true,
+				.val		= 1
+			},
+			{ 0, 0, 0, 0 }
+		}
+	};
+	static const struct ccid_dependency ccid3_dependencies[2][5] = {
+		{	/*
+			 * Dependencies of the receiver-side CCID3
+			 */
+			{	/* locally disable Ack Vectors */
+				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
+				.is_local	= true,
+				.is_mandatory	= false,
+				.val		= 0
+			},
+			{	/* see below why Send Loss Event Rate is on */
+				.dependent_feat	= DCCPF_SEND_LEV_RATE,
+				.is_local	= true,
+				.is_mandatory	= true,
+				.val		= 1
+			},
+			{	/* NDP Count is needed as per RFC 4342, 6.1.1 */
+				.dependent_feat	= DCCPF_SEND_NDP_COUNT,
+				.is_local	= false,
+				.is_mandatory	= true,
+				.val		= 1
+			},
+			{ 0, 0, 0, 0 },
+		},
+		{	/*
+			 * CCID3 at the TX side: we request that the HC-receiver
+			 * will not send Ack Vectors (they will be ignored, so
+			 * Mandatory is not set); we enable Send Loss Event Rate
+			 * (Mandatory since the implementation does not support
+			 * the Loss Intervals option of RFC 4342, 8.6).
+			 * The last two options are for peer's information only.
+			*/
+			{
+				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
+				.is_local	= false,
+				.is_mandatory	= false,
+				.val		= 0
+			},
+			{
+				.dependent_feat	= DCCPF_SEND_LEV_RATE,
+				.is_local	= false,
+				.is_mandatory	= true,
+				.val		= 1
+			},
+			{	/* this CCID does not support Ack Ratio */
+				.dependent_feat	= DCCPF_ACK_RATIO,
+				.is_local	= true,
+				.is_mandatory	= false,
+				.val		= 0
+			},
+			{	/* tell receiver we are sending NDP counts */
+				.dependent_feat	= DCCPF_SEND_NDP_COUNT,
+				.is_local	= true,
+				.is_mandatory	= false,
+				.val		= 1
+			},
+			{ 0, 0, 0, 0 }
+		}
+	};
+	switch (ccid) {
+	case DCCPC_CCID2:
+		return ccid2_dependencies[is_local];
+	case DCCPC_CCID3:
+		return ccid3_dependencies[is_local];
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID
+ * @fn: feature-negotiation list to update
+ * @id: CCID number to track
+ * @is_local: whether TX CCID (1) or RX CCID (0) is meant
+ * This function needs to be called after registering all other features.
+ */
+static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
+{
+	const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local);
+	int i, rc = (table == NULL);
+
+	for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++)
+		if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP)
+			rc = __feat_register_sp(fn, table[i].dependent_feat,
+						    table[i].is_local,
+						    table[i].is_mandatory,
+						    &table[i].val, 1);
+		else
+			rc = __feat_register_nn(fn, table[i].dependent_feat,
+						    table[i].is_mandatory,
+						    table[i].val);
+	return rc;
+}
+
+/**
+ * dccp_feat_finalise_settings  -  Finalise settings before starting negotiation
+ * @dp: client or listening socket (settings will be inherited)
+ * This is called after all registrations (socket initialisation, sysctls, and
+ * sockopt calls), and before sending the first packet containing Change options
+ * (ie. client-Request or server-Response), to ensure internal consistency.
+ */
+int dccp_feat_finalise_settings(struct dccp_sock *dp)
+{
+	struct list_head *fn = &dp->dccps_featneg;
+	struct dccp_feat_entry *entry;
+	int i = 2, ccids[2] = { -1, -1 };
+
+	/*
+	 * Propagating CCIDs:
+	 * 1) not useful to propagate CCID settings if this host advertises more
+	 *    than one CCID: the choice of CCID  may still change - if this is
+	 *    the client, or if this is the server and the client sends
+	 *    singleton CCID values.
+	 * 2) since is that propagate_ccid changes the list, we defer changing
+	 *    the sorted list until after the traversal.
+	 */
+	list_for_each_entry(entry, fn, node)
+		if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1)
+			ccids[entry->is_local] = entry->val.sp.vec[0];
+	while (i--)
+		if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
+			return -1;
+	return 0;
+}
+
 static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3d..dc96ecfbe6e 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -469,6 +469,10 @@ int dccp_connect(struct sock *sk)
 	struct sk_buff *skb;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	/* do not connect if feature negotiation setup fails */
+	if (dccp_feat_finalise_settings(dccp_sk(sk)))
+		return -EPROTO;
+
 	dccp_connect_init(sk);
 
 	skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b4b10cbd888..46cb3490d48 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -278,6 +278,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	dp->dccps_role = DCCP_ROLE_LISTEN;
+	/* do not start to listen if feature negotiation setup fails */
+	if (dccp_feat_finalise_settings(dp))
+		return -EPROTO;
 	return inet_csk_listen_start(sk, backlog);
 }
 
-- 
cgit v1.2.3-70-g09d2


From d4c8741c431e07cfc66eb2b4c3a17b8d4975d9c0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Mechanism to resolve CCID dependencies

This adds a hook to resolve features whose value depends on the choice of
CCID. It is done at the server since it can only be done after the CCID
values have been negotiated; i.e. the client will add its CCID preference
list on the Change options sent in the Request, which will be reconciled
with the local preference list of the server.

The concept is documented on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/feature_negotiation/\
				implementation_notes.html#ccid_dependencies

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h   |  1 +
 net/dccp/feat.c   | 25 +++++++++++++++++++++++++
 net/dccp/output.c | 13 +++++++++----
 3 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1881527bdcd..e656dafb5d9 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -443,6 +443,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 }
 
 extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
+extern int  dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
 extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index ed9f50b1c34..6852960bb0a 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -601,6 +601,31 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp)
 	return 0;
 }
 
+/**
+ * dccp_feat_server_ccid_dependencies  -  Resolve CCID-dependent features
+ * It is the server which resolves the dependencies once the CCID has been
+ * fully negotiated. If no CCID has been negotiated, it uses the default CCID.
+ */
+int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
+{
+	struct list_head *fn = &dreq->dreq_featneg;
+	struct dccp_feat_entry *entry;
+	u8 is_local, ccid;
+
+	for (is_local = 0; is_local <= 1; is_local++) {
+		entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local);
+
+		if (entry != NULL && !entry->empty_confirm)
+			ccid = entry->val.sp.vec[0];
+		else
+			ccid = dccp_feat_default_value(DCCPF_CCID);
+
+		if (dccp_feat_propagate_ccid(fn, ccid, is_local))
+			return -1;
+	}
+	return 0;
+}
+
 static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index dc96ecfbe6e..19a93d5433a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -339,10 +339,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
 	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
 
-	if (dccp_insert_options_rsk(dreq, skb)) {
-		kfree_skb(skb);
-		return NULL;
-	}
+	/* Resolve feature dependencies resulting from choice of CCID */
+	if (dccp_feat_server_ccid_dependencies(dreq))
+		goto response_failed;
+
+	if (dccp_insert_options_rsk(dreq, skb))
+		goto response_failed;
 
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -363,6 +365,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	inet_rsk(req)->acked = 1;
 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 	return skb;
+response_failed:
+	kfree_skb(skb);
+	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(dccp_make_response);
-- 
cgit v1.2.3-70-g09d2


From 668144f7b41716a9efe1b398e15ead32a26cd101 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Deprecate old setsockopt framework

The previous setsockopt interface, which passed socket options via struct
dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to
ugly code since the old approach did not distinguish between NN and SP values.

This patch removes the old setsockopt interface and replaces it with two new
functions to register NN/SP values for feature negotiation. These are
essentially wrappers around the internal __feat_register functions, with
checking added to avoid
 * wrong usage (type);
 * changing values while the connection is in progress.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  7 -----
 net/dccp/feat.c      | 72 ++++++++++++++++++++--------------------------------
 net/dccp/feat.h      |  5 ++--
 net/dccp/proto.c     | 53 ++------------------------------------
 4 files changed, 32 insertions(+), 105 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index d3ac1bde60b..6eaaca9b037 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -193,13 +193,6 @@ enum dccp_feature_numbers {
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
-/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */
-struct dccp_so_feat {
-	__u8 dccpsf_feat;
-	__u8 __user *dccpsf_val;
-	__u8 dccpsf_len;
-};
-
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */
 #define DCCP_SOCKOPT_SERVICE		2
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 6852960bb0a..44b10afd3fb 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -393,53 +393,35 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
 	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
 }
 
-int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
-		     u8 *val, u8 len, gfp_t gfp)
-{
-	struct dccp_opt_pend *opt;
-
-	dccp_feat_debug(type, feature, *val);
-
-	if (len > 3) {
-		DCCP_WARN("invalid length %d\n", len);
+/**
+ * dccp_feat_register_sp  -  Register requests to change SP feature values
+ * @sk: client or listening socket
+ * @feat: one of %dccp_feature_numbers
+ * @is_local: whether the local (1) or remote (0) @feat is meant
+ * @list: array of preferred values, in descending order of preference
+ * @len: length of @list in bytes
+ */
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+			  u8 const *list, u8 len)
+{	 /* any changes must be registered before establishing the connection */
+	if (sk->sk_state != DCCP_CLOSED)
+		return -EISCONN;
+	if (dccp_feat_type(feat) != FEAT_SP)
 		return -EINVAL;
-	}
-	/* XXX add further sanity checks */
-
-	/* check if that feature is already being negotiated */
-	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-		/* ok we found a negotiation for this option already */
-		if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
-			dccp_pr_debug("Replacing old\n");
-			/* replace */
-			BUG_ON(opt->dccpop_val == NULL);
-			kfree(opt->dccpop_val);
-			opt->dccpop_val	 = val;
-			opt->dccpop_len	 = len;
-			opt->dccpop_conf = 0;
-			return 0;
-		}
-	}
-
-	/* negotiation for a new feature */
-	opt = kmalloc(sizeof(*opt), gfp);
-	if (opt == NULL)
-		return -ENOMEM;
-
-	opt->dccpop_type = type;
-	opt->dccpop_feat = feature;
-	opt->dccpop_len	 = len;
-	opt->dccpop_val	 = val;
-	opt->dccpop_conf = 0;
-	opt->dccpop_sc	 = NULL;
-
-	BUG_ON(opt->dccpop_val == NULL);
-
-	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
-	return 0;
+	return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
+				  0, list, len);
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_change);
+/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
+int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
+{
+	/* any changes must be registered before establishing the connection */
+	if (sk->sk_state != DCCP_CLOSED)
+		return -EISCONN;
+	if (dccp_feat_type(feat) != FEAT_NN)
+		return -EINVAL;
+	return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
+}
 
 /*
  *	Tracking features whose value depend on the choice of CCID
@@ -1137,7 +1119,7 @@ int dccp_feat_init(struct sock *sk)
 
 	/* Ack ratio */
 	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
-				dmsk->dccpms_ack_ratio);
+				dp->dccps_l_ack_ratio);
 out:
 	return rc;
 }
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 9eefdb43783..2c92bd18e5f 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -110,8 +110,9 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #define dccp_feat_debug(type, feat, val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
-extern int  dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
-			     u8 *val, u8 len, gfp_t gfp);
+extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+				  u8 const *list, u8 len);
+extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
 				  u8 *val, u8 len);
 extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 46cb3490d48..108d56bd25c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -470,44 +470,6 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 	return 0;
 }
 
-/* byte 1 is feature.  the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
-				  struct dccp_so_feat __user *optval)
-{
-	struct dccp_so_feat opt;
-	u8 *val;
-	int rc;
-
-	if (copy_from_user(&opt, optval, sizeof(opt)))
-		return -EFAULT;
-	/*
-	 * rfc4340: 6.1. Change Options
-	 */
-	if (opt.dccpsf_len < 1)
-		return -EINVAL;
-
-	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
-	if (!val)
-		return -ENOMEM;
-
-	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
-		rc = -EFAULT;
-		goto out_free_val;
-	}
-
-	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
-			      val, opt.dccpsf_len, GFP_KERNEL);
-	if (rc)
-		goto out_free_val;
-
-out:
-	return rc;
-
-out_free_val:
-	kfree(val);
-	goto out;
-}
-
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -530,20 +492,9 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		err = 0;
 		break;
 	case DCCP_SOCKOPT_CHANGE_L:
-		if (optlen != sizeof(struct dccp_so_feat))
-			err = -EINVAL;
-		else
-			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
-						     (struct dccp_so_feat __user *)
-						     optval);
-		break;
 	case DCCP_SOCKOPT_CHANGE_R:
-		if (optlen != sizeof(struct dccp_so_feat))
-			err = -EINVAL;
-		else
-			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
-						     (struct dccp_so_feat __user *)
-						     optval);
+		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+		err = 0;
 		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		if (dp->dccps_role != DCCP_ROLE_SERVER)
-- 
cgit v1.2.3-70-g09d2


From 20f41eee82864e308a5499308a1722dc3181cc3a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Feature negotiation for minimum-checksum-coverage

This provides feature negotiation for server minimum checksum coverage
which so far has been missing.

Since sender/receiver coverage values range only from 0...15, their
type has also been reduced in size from u16 to u4.

Feature-negotiation options are now generated for both sender and receiver
coverage, i.e. when the peer has `forgotten' to enable partial coverage
then feature negotiation will automatically enable (negotiate) the partial
coverage value for this connection.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 include/linux/dccp.h |  4 ++--
 net/dccp/proto.c     | 53 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6eaaca9b037..5a5a89935db 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -527,8 +527,8 @@ struct dccp_sock {
 	__u32				dccps_timestamp_time;
 	__u16				dccps_l_ack_ratio;
 	__u16				dccps_r_ack_ratio;
-	__u16				dccps_pcslen;
-	__u16				dccps_pcrlen;
+	__u8				dccps_pcslen:4;
+	__u8				dccps_pcrlen:4;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 108d56bd25c..47b137a3fea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -470,6 +470,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 	return 0;
 }
 
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
+{
+	u8 *list, len;
+	int i, rc;
+
+	if (cscov < 0 || cscov > 15)
+		return -EINVAL;
+	/*
+	 * Populate a list of permissible values, in the range cscov...15. This
+	 * is necessary since feature negotiation of single values only works if
+	 * both sides incidentally choose the same value. Since the list starts
+	 * lowest-value first, negotiation will pick the smallest shared value.
+	 */
+	if (cscov == 0)
+		return 0;
+	len = 16 - cscov;
+
+	list = kmalloc(len, GFP_KERNEL);
+	if (list == NULL)
+		return -ENOBUFS;
+
+	for (i = 0; i < len; i++)
+		list[i] = cscov++;
+
+	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
+
+	if (rc == 0) {
+		if (rx)
+			dccp_sk(sk)->dccps_pcrlen = cscov;
+		else
+			dccp_sk(sk)->dccps_pcslen = cscov;
+	}
+	kfree(list);
+	return rc;
+}
+
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -502,20 +538,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		else
 			dp->dccps_server_timewait = (val != 0);
 		break;
-	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
-		if (val < 0 || val > 15)
-			err = -EINVAL;
-		else
-			dp->dccps_pcslen = val;
+	case DCCP_SOCKOPT_SEND_CSCOV:
+		err = dccp_setsockopt_cscov(sk, val, false);
 		break;
-	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
-		if (val < 0 || val > 15)
-			err = -EINVAL;
-		else {
-			dp->dccps_pcrlen = val;
-			/* FIXME: add feature negotiation,
-			 * ChangeL(MinimumChecksumCoverage, val) */
-		}
+	case DCCP_SOCKOPT_RECV_CSCOV:
+		err = dccp_setsockopt_cscov(sk, val, true);
 		break;
 	default:
 		err = -ENOPROTOOPT;
-- 
cgit v1.2.3-70-g09d2


From 17c30b40ed79e9f3955e884632c8f01e577b204a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Deprecate Ack Ratio sysctl

This patch deprecates the Ack Ratio sysctl, since
 * Ack Ratio is entirely ignored by CCID-3 and CCID-4,
 * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1);
 * even if it would work in CCID-2, there is no point for a user to change it:
   - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2),
   - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts
     (since waiting for Acks which will never arrive in this window),
   - cwnd is not a user-configurable value.

The only reasonable place for Ack Ratio is to print it for debugging. It is
planned to do this later on, as part of e.g. dccp_probe.

With this patch Ack Ratio is now under full control of feature negotiation:
 * Ack Ratio is resolved as a dependency of the selected CCID;
 * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to
   the default of 2, following RFC 4340, 11.3 - "New connections start with Ack
   Ratio 2 for both endpoints";
 * what happens then is part of another patch set, since it concerns the
   dynamic update of Ack Ratio while the connection is in full flight.

Thanks to Tomasz Grobelny for discussion leading up to this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 Documentation/networking/dccp.txt | 3 ---
 include/linux/dccp.h              | 2 --
 net/dccp/dccp.h                   | 1 -
 net/dccp/minisocks.c              | 1 -
 net/dccp/options.c                | 1 -
 net/dccp/sysctl.c                 | 7 -------
 6 files changed, 15 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index f0aeb20fa63..43df4487379 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -125,9 +125,6 @@ send_ndp = 1
 send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
-ack_ratio = 2
-	The default Ack Ratio (sec. 11.3) to use.
-
 tx_ccid = 2
 	Default CCID for the sender-receiver half-connection.
 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 5a5a89935db..eda389ce04f 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * @dccpms_ccid - Congestion Control Id (CCID) (section 10)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
-  * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
@@ -378,7 +377,6 @@ struct dccp_minisock {
 	__u8			dccpms_tx_ccid;
 	__u8			dccpms_send_ack_vector;
 	__u8			dccpms_send_ndp_count;
-	__u8			dccpms_ack_ratio;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e656dafb5d9..031ce350d3c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -98,7 +98,6 @@ extern int  sysctl_dccp_retries2;
 extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
-extern int  sysctl_dccp_feat_ack_ratio;
 extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e487133ae07..ee7f40f8ddf 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -47,7 +47,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk)
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
 	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
 	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
-	dmsk->dccpms_ack_ratio	     = sysctl_dccp_feat_ack_ratio;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 67a171a1268..515ad45013a 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,7 +26,6 @@
 int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 21295993fdb..f6e54f433e2 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "ack_ratio",
-		.data		= &sysctl_dccp_feat_ack_ratio,
-		.maxlen		= sizeof(sysctl_dccp_feat_ack_ratio),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "send_ackvec",
 		.data		= &sysctl_dccp_feat_send_ack_vector,
-- 
cgit v1.2.3-70-g09d2


From 73bbe095bbb9ce5f94d5475bad54c7ccd8573b1b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Tidy up setsockopt calls

This splits the setsockopt calls into two groups, depending on whether an
integer argument (val) is required and whether routines being called do
their own locking.

Some options (such as setting the CCID) use u8 rather than int, so that for
these the test with regard to integer-sizeof can not be used.

The second switch-case statement now only has those statements which need
locking and which make use of `val'.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Reviewed-by: Eugene Teo <eugeneteo@kernel.sg>
---
 net/dccp/proto.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 47b137a3fea..e29bbf91405 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -512,7 +512,17 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	struct dccp_sock *dp = dccp_sk(sk);
 	int val, err = 0;
 
-	if (optlen < sizeof(int))
+	switch (optname) {
+	case DCCP_SOCKOPT_PACKET_SIZE:
+		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+		return 0;
+	case DCCP_SOCKOPT_CHANGE_L:
+	case DCCP_SOCKOPT_CHANGE_R:
+		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+		return 0;
+	}
+
+	if (optlen < (int)sizeof(int))
 		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
@@ -523,15 +533,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 
 	lock_sock(sk);
 	switch (optname) {
-	case DCCP_SOCKOPT_PACKET_SIZE:
-		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
-		err = 0;
-		break;
-	case DCCP_SOCKOPT_CHANGE_L:
-	case DCCP_SOCKOPT_CHANGE_R:
-		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
-		err = 0;
-		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		if (dp->dccps_role != DCCP_ROLE_SERVER)
 			err = -EOPNOTSUPP;
@@ -548,8 +549,8 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		err = -ENOPROTOOPT;
 		break;
 	}
-
 	release_sock(sk);
+
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From fade756f18d42694e3acb00e3471ab43002cba16 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Set per-connection CCIDs via socket options

With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which
overrides the defaults set by the global sysctl variables for TX/RX CCIDs.

To make full use of this facility, the remaining patches of this patch set are
needed, which track dependencies and activate negotiated feature values.

Note on the maximum number of CCIDs that can be registered:
-----------------------------------------------------------
The maximum number of CCIDs that can be registered on the socket is constrained
by the space in a Confirm/Change feature negotiation option.

The space in these in turn depends on the size of header options as defined
in RFC 4340, 5.8. Since this is a recurring constant, it has been moved from
ackvec.h into linux/dccp.h, clarifying its purpose.

Relative to this size, the maximum number of CCID identifiers that can be
present in a Confirm option (which always consumes 1 byte more than a Change
option, cf. 6.1) is 2 bytes less than the maximum TLV size: one for the
CCID-feature-type and one for the selected value.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 Documentation/networking/dccp.txt | 14 ++++++++++++++
 include/linux/dccp.h              |  5 +++++
 net/dccp/ackvec.c                 |  9 ++++-----
 net/dccp/ackvec.h                 |  5 ++---
 net/dccp/feat.h                   |  2 ++
 net/dccp/proto.c                  | 34 ++++++++++++++++++++++++++++++++++
 6 files changed, 61 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 43df4487379..610083ff73f 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -61,6 +61,20 @@ DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
 supported by the endpoint (see include/linux/dccp.h for symbolic constants).
 The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
 
+DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
+time, combining the operation of the next two socket options. This option is
+preferrable over the latter two, since often applications will use the same
+type of CCID for both directions; and mixed use of CCIDs is not currently well
+understood. This socket option takes as argument at least one uint8_t value, or
+an array of uint8_t values, which must match available CCIDS (see above). CCIDs
+must be registered on the socket before calling connect() or listen().
+
+DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
+the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
+Please note that the getsockopt argument type here is `int', not uint8_t.
+
+DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
+
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eda389ce04f..6a72ff52a8a 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -168,6 +168,8 @@ enum {
 	DCCPO_MIN_CCID_SPECIFIC = 128,
 	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
+/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
+#define DCCP_SINGLE_OPT_MAXLEN	253
 
 /* DCCP CCIDS */
 enum {
@@ -203,6 +205,9 @@ enum dccp_feature_numbers {
 #define DCCP_SOCKOPT_SEND_CSCOV		10
 #define DCCP_SOCKOPT_RECV_CSCOV		11
 #define DCCP_SOCKOPT_AVAILABLE_CCIDS	12
+#define DCCP_SOCKOPT_CCID		13
+#define DCCP_SOCKOPT_TX_CCID		14
+#define DCCP_SOCKOPT_RX_CCID		15
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 1e8be246ad1..01e4d39fa23 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -12,7 +12,6 @@
 #include "ackvec.h"
 #include "dccp.h"
 
-#include <linux/dccp.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
-	const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
 	u16 len = av->av_vec_len + 2 * nr_opts, i;
 	u32 elapsed_time;
 	const unsigned char *tail, *from;
@@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
 
-		if (len > DCCP_MAX_ACKVEC_OPT_LEN)
-			copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+		if (len > DCCP_SINGLE_OPT_MAXLEN)
+			copylen = DCCP_SINGLE_OPT_MAXLEN;
 
 		*to++ = DCCPO_ACK_VECTOR_0;
 		*to++ = copylen + 2;
@@ -432,7 +431,7 @@ found:
 int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 		      u64 *ackno, const u8 opt, const u8 *value, const u8 len)
 {
-	if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+	if (len > DCCP_SINGLE_OPT_MAXLEN)
 		return -1;
 
 	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index bcb64fb4ace..4ccee030524 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -11,15 +11,14 @@
  *	published by the Free Software Foundation.
  */
 
+#include <linux/dccp.h>
 #include <linux/compiler.h>
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_OPT_LEN 253
 /* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
+#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
 
 #define DCCP_ACKVEC_STATE_RECEIVED	0
 #define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 2c92bd18e5f..b53b11717c4 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -22,6 +22,8 @@
 /* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
 #define DCCPF_SEQ_WMIN		32
 #define DCCPF_SEQ_WMAX		0x3FFFFFFFFFFFull
+/* Maximum number of SP values that fit in a single (Confirm) option */
+#define DCCP_FEAT_MAX_SP_VALS	(DCCP_SINGLE_OPT_MAXLEN - 2)
 
 enum dccp_feat_type {
 	FEAT_AT_RX   = 1,	/* located at RX side of half-connection  */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e29bbf91405..2cd56df44d8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -506,6 +506,36 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 	return rc;
 }
 
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+				char __user *optval, int optlen)
+{
+	u8 *val;
+	int rc = 0;
+
+	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
+		return -EINVAL;
+
+	val = kmalloc(optlen, GFP_KERNEL);
+	if (val == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(val, optval, optlen)) {
+		kfree(val);
+		return -EFAULT;
+	}
+
+	lock_sock(sk);
+	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
+
+	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+	release_sock(sk);
+
+	kfree(val);
+	return rc;
+}
+
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -520,6 +550,10 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_CHANGE_R:
 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
 		return 0;
+	case DCCP_SOCKOPT_CCID:
+	case DCCP_SOCKOPT_RX_CCID:
+	case DCCP_SOCKOPT_TX_CCID:
+		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
 	}
 
 	if (optlen < (int)sizeof(int))
-- 
cgit v1.2.3-70-g09d2


From c8041e264b3db6944d37b87969fbe6458cb30cfd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: API to query the current TX/RX CCID

This provides function to query the current TX/RX CCID dynamically, without
reliance on the minisock value, using dynamic information available in the
currently loaded CCID module.

This query function is then used to
 (a) provide the getsockopt part for getting/setting CCIDs via sockopts;
 (b) replace the current test for "which CCID is in use" in probe.c.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/ccid.h  | 18 ++++++++++++++++++
 net/dccp/probe.c |  7 ++-----
 net/dccp/proto.c | 10 ++++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 259f5469d7d..803343aed00 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -116,6 +116,24 @@ extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
 extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
 				   gfp_t gfp);
 
+static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
+{
+	struct ccid *ccid = dp->dccps_hc_rx_ccid;
+
+	if (ccid == NULL || ccid->ccid_ops == NULL)
+		return -1;
+	return ccid->ccid_ops->ccid_id;
+}
+
+static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
+{
+	struct ccid *ccid = dp->dccps_hc_tx_ccid;
+
+	if (ccid == NULL || ccid->ccid_ops == NULL)
+		return -1;
+	return ccid->ccid_ops->ccid_id;
+}
+
 extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
 extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 81368a7f537..9ca783d7467 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -74,14 +74,11 @@ static void printl(const char *fmt, ...)
 static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			 struct msghdr *msg, size_t size)
 {
-	const struct dccp_minisock *dmsk = dccp_msk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct ccid3_hc_tx_sock *hctx;
+	struct ccid3_hc_tx_sock *hctx = NULL;
 
-	if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+	if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
 		hctx = ccid3_hc_tx_sk(sk);
-	else
-		hctx = NULL;
 
 	if (port == 0 || ntohs(inet->dport) == port ||
 	    ntohs(inet->sport) == port) {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2cd56df44d8..6550452d59e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -667,6 +667,16 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 		break;
 	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
 		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+	case DCCP_SOCKOPT_TX_CCID:
+		val = ccid_get_current_tx_ccid(dp);
+		if (val < 0)
+			return -ENOPROTOOPT;
+		break;
+	case DCCP_SOCKOPT_RX_CCID:
+		val = ccid_get_current_rx_ccid(dp);
+		if (val < 0)
+			return -ENOPROTOOPT;
+		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		val = dp->dccps_server_timewait;
 		break;
-- 
cgit v1.2.3-70-g09d2


From b9aaac1c538a9c86e8ef3be2579a13ff55580908 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Increase the scope of variable-length htonl/ntohl functions

This extends the scope of two available functions, encode|decode_value_var,
to work up to 6 (8) bytes, to match maximum requirements in the RFC.

These functions are going to be used both by general option processing and
feature negotiation code, hence declarations have been put into feat.h.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 net/dccp/feat.h    | 14 ++++++++++++++
 net/dccp/options.c | 21 ++++++++++++++-------
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index b53b11717c4..90d16caf545 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -124,4 +124,18 @@ extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
 extern int  dccp_feat_init(struct sock *sk);
 
+/*
+ * Encoding variable-length options and their maximum length.
+ *
+ * This affects NN options (SP options are all u8) and other variable-length
+ * options (see table 3 in RFC 4340). The limit is currently given the Sequence
+ * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
+ * options consume less than 6 bytes (timestamps are 4 bytes).
+ * When updating this constant (e.g. due to new internet drafts / RFCs), make
+ * sure that you also update all code which refers to it.
+ */
+#define DCCP_OPTVAL_MAXLEN	6
+
+extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
+extern u64  dccp_decode_value_var(const u8 *bf, const u8 len);
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 515ad45013a..9cb0ff89405 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,16 +29,20 @@ int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
-static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
-	u32 value = 0;
+	u64 value = 0;
 
+	if (len >= DCCP_OPTVAL_MAXLEN)
+		value += ((u64)*bf++) << 40;
+	if (len > 4)
+		value += ((u64)*bf++) << 32;
 	if (len > 3)
-		value += *bf++ << 24;
+		value += ((u64)*bf++) << 24;
 	if (len > 2)
-		value += *bf++ << 16;
+		value += ((u64)*bf++) << 16;
 	if (len > 1)
-		value += *bf++ << 8;
+		value += ((u64)*bf++) << 8;
 	if (len > 0)
 		value += *bf;
 
@@ -298,9 +302,12 @@ out_invalid_option:
 
 EXPORT_SYMBOL_GPL(dccp_parse_options);
 
-static void dccp_encode_value_var(const u32 value, unsigned char *to,
-				  const unsigned int len)
+void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
 {
+	if (len >= DCCP_OPTVAL_MAXLEN)
+		*to++ = (value & 0xFF0000000000ull) >> 40;
+	if (len > 4)
+		*to++ = (value & 0xFF00000000ull) >> 32;
 	if (len > 3)
 		*to++ = (value & 0xFF000000) >> 24;
 	if (len > 2)
-- 
cgit v1.2.3-70-g09d2


From d0440ee6f6903fcde6ed4efb88c910de1dfa18e5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Support for Mandatory options

Support for Mandatory options is provided by this patch, which will
be used by subsequent feature-negotiation patches.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 net/dccp/feat.h    |  2 ++
 net/dccp/options.c | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 90d16caf545..602e0a7294b 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -138,4 +138,6 @@ extern int  dccp_feat_init(struct sock *sk);
 
 extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
 extern u64  dccp_decode_value_var(const u8 *bf, const u8 len);
+
+extern int  dccp_insert_option_mandatory(struct sk_buff *skb);
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9cb0ff89405..676d53065de 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -467,6 +467,21 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
 	return 0;
 }
 
+/**
+ * dccp_insert_option_mandatory  -  Mandatory option (5.8.2)
+ * Note that since we are using skb_push, this function needs to be called
+ * _after_ inserting the option it is supposed to influence (stack order).
+ */
+int dccp_insert_option_mandatory(struct sk_buff *skb)
+{
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len++;
+	*skb_push(skb, 1) = DCCPO_MANDATORY;
+	return 0;
+}
+
 static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
 				u8 *val, u8 len)
 {
-- 
cgit v1.2.3-70-g09d2


From cf9ddf73b9ba21a5cd6d3fcb0a45cfa9ec452033 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Header option insertion routine for feature-negotiation

The patch extends existing code:
 * Confirm options divide into the confirmed value plus an optional preference
   list for SP values. Previously only the preference list was echoed for SP
   values, now the confirmed value is added as per RFC 4340, 6.1;
 * length and sanity checks are added to avoid illegal memory (or NULL) access.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.h    |  2 ++
 net/dccp/options.c | 91 +++++++++++++++++++-----------------------------------
 2 files changed, 33 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 602e0a7294b..8e863839ed9 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -140,4 +140,6 @@ extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
 extern u64  dccp_decode_value_var(const u8 *bf, const u8 len);
 
 extern int  dccp_insert_option_mandatory(struct sk_buff *skb);
+extern int  dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+			       u8 *val, u8 len, bool repeat_first);
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 676d53065de..bfa1cb8f3ef 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -482,23 +482,46 @@ int dccp_insert_option_mandatory(struct sk_buff *skb)
 	return 0;
 }
 
-static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
-				u8 *val, u8 len)
+/**
+ * dccp_insert_fn_opt  -  Insert single Feature-Negotiation option into @skb
+ * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
+ * @feat: one out of %dccp_feature_numbers
+ * @val: NN value or SP array (preferred element first) to copy
+ * @len: true length of @val in bytes (excluding first element repetition)
+ * @repeat_first: whether to copy the first element of @val twice
+ * The last argument is used to construct Confirm options, where the preferred
+ * value and the preference list appear separately (RFC 4340, 6.3.1). Preference
+ * lists are kept such that the preferred entry is always first, so we only need
+ * to copy twice, and avoid the overhead of cloning into a bigger array.
+ */
+int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+		       u8 *val, u8 len, bool repeat_first)
 {
-	u8 *to;
+	u8 tot_len, *to;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
-		DCCP_WARN("packet too small for feature %d option!\n", feat);
+	/* take the `Feature' field and possible repetition into account */
+	if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
+		DCCP_WARN("length %u for feature %u too large\n", len, feat);
 		return -1;
 	}
 
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
+	if (unlikely(val == NULL || len == 0))
+		len = repeat_first = 0;
+	tot_len = 3 + repeat_first + len;
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
+		DCCP_WARN("packet too small for feature %d option!\n", feat);
+		return -1;
+	}
+	DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
 
-	to    = skb_push(skb, len + 3);
+	to    = skb_push(skb, tot_len);
 	*to++ = type;
-	*to++ = len + 3;
+	*to++ = tot_len;
 	*to++ = feat;
 
+	if (repeat_first)
+		*to++ = *val;
 	if (len)
 		memcpy(to, val, len);
 
@@ -508,51 +531,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
 	return 0;
 }
 
-static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
-{
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	struct dccp_opt_pend *opt, *next;
-	int change = 0;
-
-	/* confirm any options [NN opts] */
-	list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
-		dccp_insert_feat_opt(skb, opt->dccpop_type,
-				     opt->dccpop_feat, opt->dccpop_val,
-				     opt->dccpop_len);
-		/* fear empty confirms */
-		if (opt->dccpop_val)
-			kfree(opt->dccpop_val);
-		kfree(opt);
-	}
-	INIT_LIST_HEAD(&dmsk->dccpms_conf);
-
-	/* see which features we need to send */
-	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-		/* see if we need to send any confirm */
-		if (opt->dccpop_sc) {
-			dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
-					     opt->dccpop_feat,
-					     opt->dccpop_sc->dccpoc_val,
-					     opt->dccpop_sc->dccpoc_len);
-
-			BUG_ON(!opt->dccpop_sc->dccpoc_val);
-			kfree(opt->dccpop_sc->dccpoc_val);
-			kfree(opt->dccpop_sc);
-			opt->dccpop_sc = NULL;
-		}
-
-		/* any option not confirmed, re-send it */
-		if (!opt->dccpop_conf) {
-			dccp_insert_feat_opt(skb, opt->dccpop_type,
-					     opt->dccpop_feat, opt->dccpop_val,
-					     opt->dccpop_len);
-			change++;
-		}
-	}
-
-	return 0;
-}
-
 /* The length of all options needs to be a multiple of 4 (5.8) */
 static void dccp_insert_option_padding(struct sk_buff *skb)
 {
@@ -589,13 +567,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		dp->dccps_hc_rx_insert_options = 0;
 	}
 
-	/* Feature negotiation */
-	/* Data packets can't do feat negotiation */
-	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
-	    DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
-	    dccp_insert_options_feat(sk, skb))
-		return -1;
-
 	/*
 	 * Obtain RTT sample from Request/Response exchange.
 	 * This is currently used in CCID 3 initialisation.
-- 
cgit v1.2.3-70-g09d2


From 0ef118a017919cd661cf294811d1889ac556ee80 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Insert feature-negotiation options into skb

This patch replaces the earlier insertion routine from options.c, so that
code specific to feature negotiation can remain in feat.c. This is possible
by calling a function already existing in options.c.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h |  2 ++
 net/dccp/feat.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 031ce350d3c..2e2a6f229ea 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -443,6 +443,8 @@ static inline int dccp_ack_pending(const struct sock *sk)
 
 extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
 extern int  dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+extern int  dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+				  struct sk_buff *skb);
 extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 44b10afd3fb..da686464462 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -300,6 +300,20 @@ cloning_failed:
 	return -ENOMEM;
 }
 
+/**
+ * dccp_feat_valid_nn_length  -  Enforce length constraints on NN options
+ * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
+ * incoming options are accepted as long as their values are valid.
+ */
+static u8 dccp_feat_valid_nn_length(u8 feat_num)
+{
+	if (feat_num == DCCPF_ACK_RATIO)	/* RFC 4340, 11.3 and 6.6.8 */
+		return 2;
+	if (feat_num == DCCPF_SEQUENCE_WINDOW)	/* RFC 4340, 7.5.2 and 6.5  */
+		return 6;
+	return 0;
+}
+
 static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
 {
 	switch (feat_num) {
@@ -341,6 +355,57 @@ static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
 	return 1;
 }
 
+/**
+ * dccp_feat_insert_opts  -  Generate FN options from current list state
+ * @skb: next sk_buff to be sent to the peer
+ * @dp: for client during handshake and general negotiation
+ * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND)
+ */
+int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
+			  struct sk_buff *skb)
+{
+	struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+	struct dccp_feat_entry *pos, *next;
+	u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN];
+	bool rpt;
+
+	/* put entries into @skb in the order they appear in the list */
+	list_for_each_entry_safe_reverse(pos, next, fn, node) {
+		opt  = dccp_feat_genopt(pos);
+		type = dccp_feat_type(pos->feat_num);
+		rpt  = false;
+
+		if (pos->empty_confirm) {
+			len = 0;
+			ptr = NULL;
+		} else {
+			if (type == FEAT_SP) {
+				len = pos->val.sp.len;
+				ptr = pos->val.sp.vec;
+				rpt = pos->needs_confirm;
+			} else if (type == FEAT_NN) {
+				len = dccp_feat_valid_nn_length(pos->feat_num);
+				ptr = nn_in_nbo;
+				dccp_encode_value_var(pos->val.nn, ptr, len);
+			} else {
+				DCCP_BUG("unknown feature %u", pos->feat_num);
+				return -1;
+			}
+		}
+
+		if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
+			return -1;
+		if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
+			return -1;
+		/*
+		 * Enter CHANGING after transmitting the Change option (6.6.2).
+		 */
+		if (pos->state == FEAT_INITIALISING)
+			pos->state = FEAT_CHANGING;
+	}
+	return 0;
+}
+
 /**
  * __feat_register_nn  -  Register new NN value on socket
  * @fn: feature-negotiation list to register with
-- 
cgit v1.2.3-70-g09d2


From f8a644c07e6f38b2c3cbaf99990e867d670d207b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Integrate feature-negotiation insertion code

The patch implements insertion of feature negotiation at the server (listening
and request socket) and the client (connecting socket).

In dccp_insert_options(), several statements have been grouped together now
to achieve (I hope) better efficiency by reducing the number of tests each
packet has to go through:
 - Ack Vectors are sent if the packet is neither a Data or a Request packet;
 - a previous issue is corrected - feature negotiation options are allowed
   on DataAck packets (5.8).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/options.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfa1cb8f3ef..0e277114cb2 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -554,11 +554,25 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 	    dccp_insert_option_ndp(sk, skb))
 		return -1;
 
-	if (!dccp_packet_without_ack(skb)) {
-		if (dmsk->dccpms_send_ack_vector &&
-		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
-		    dccp_insert_option_ackvec(sk, skb))
+	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
+
+		/* Feature Negotiation */
+		if (dccp_feat_insert_opts(dp, NULL, skb))
 			return -1;
+
+		if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
+			/*
+			 * Obtain RTT sample from Request/Response exchange.
+			 * This is currently used in CCID 3 initialisation.
+			 */
+			if (dccp_insert_option_timestamp(sk, skb))
+				return -1;
+
+		} else if (dmsk->dccpms_send_ack_vector	&&
+			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+			   dccp_insert_option_ackvec(sk, skb)) {
+				return -1;
+		}
 	}
 
 	if (dp->dccps_hc_rx_insert_options) {
@@ -567,14 +581,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		dp->dccps_hc_rx_insert_options = 0;
 	}
 
-	/*
-	 * Obtain RTT sample from Request/Response exchange.
-	 * This is currently used in CCID 3 initialisation.
-	 */
-	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
-	    dccp_insert_option_timestamp(sk, skb))
-		return -1;
-
 	if (dp->dccps_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(dp, NULL, skb))
 		return -1;
@@ -587,6 +593,9 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
 {
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
+	if (dccp_feat_insert_opts(NULL, dreq, skb))
+		return -1;
+
 	if (dreq->dreq_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(NULL, dreq, skb))
 		return -1;
-- 
cgit v1.2.3-70-g09d2


From c664d4f4e2963ee355b1b0e77461eb844d1b288d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Preference list reconciliation

This provides two functions to
 * reconcile preference lists (with appropriate return codes) and
 * reorder the preference list if successful reconciliation changed the
   preferred value.

The patch also removes the old code for processing SP/NN Change options, since
new code to process these is mostly there already; related references have been
commented out.

The code for processing Change options follows in the next patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index da686464462..d53077bd40b 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -718,6 +718,76 @@ static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
 	return 0;
 }
 
+/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
+static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
+{
+	u8 c, s;
+
+	for (s = 0; s < slen; s++)
+		for (c = 0; c < clen; c++)
+			if (servlist[s] == clilist[c])
+				return servlist[s];
+	return -1;
+}
+
+/**
+ * dccp_feat_prefer  -  Move preferred entry to the start of array
+ * Reorder the @array_len elements in @array so that @preferred_value comes
+ * first. Returns >0 to indicate that @preferred_value does occur in @array.
+ */
+static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len)
+{
+	u8 i, does_occur = 0;
+
+	if (array != NULL) {
+		for (i = 0; i < array_len; i++)
+			if (array[i] == preferred_value) {
+				array[i] = array[0];
+				does_occur++;
+			}
+		if (does_occur)
+			array[0] = preferred_value;
+	}
+	return does_occur;
+}
+
+/**
+ * dccp_feat_reconcile  -  Reconcile SP preference lists
+ *  @fval: SP list to reconcile into
+ *  @arr: received SP preference list
+ *  @len: length of @arr in bytes
+ *  @is_server: whether this side is the server (and @fv is the server's list)
+ *  @reorder: whether to reorder the list in @fv after reconciling with @arr
+ * When successful, > 0 is returned and the reconciled list is in @fval.
+ * A value of 0 means that negotiation failed (no shared entry).
+ */
+static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
+			       bool is_server, bool reorder)
+{
+	int rc;
+
+	if (!fv->sp.vec || !arr) {
+		DCCP_CRIT("NULL feature value or array");
+		return 0;
+	}
+
+	if (is_server)
+		rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len);
+	else
+		rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len);
+
+	if (!reorder)
+		return rc;
+	if (rc < 0)
+		return 0;
+
+	/*
+	 * Reorder list: used for activating features and in dccp_insert_fn_opt.
+	 */
+	return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
+}
+
+#ifdef __this_is_the_old_framework_and_will_be_removed_later_in_a_subsequent_patch
 static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
 			       u8 *rpref, u8 rlen)
 {
@@ -913,6 +983,7 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 
 	return 0;
 }
+#endif /* (later) */
 
 static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
 				    u8 type, u8 feature)
@@ -988,12 +1059,14 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 	switch (feature) {
 	/* deal with SP features */
 	case DCCPF_CCID:
-		rc = dccp_feat_sp(sk, type, feature, val, len);
+		/* XXX Obsoleted by next patch
+		rc = dccp_feat_sp(sk, type, feature, val, len); */
 		break;
 
 	/* deal with NN features */
 	case DCCPF_ACK_RATIO:
-		rc = dccp_feat_nn(sk, type, feature, val, len);
+		/* XXX Obsoleted by next patch
+		rc = dccp_feat_nn(sk, type, feature, val, len); */
 		break;
 
 	/* XXX implement other features */
-- 
cgit v1.2.3-70-g09d2


From 5a146b97d5e93db2df075c0d820f492bb996d0e3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Process incoming Change feature-negotiation options

This adds/replaces code for processing incoming ChangeL/R options.
The main difference is that:
 * mandatory FN options are now interpreted inside the function
  (there are too many individual cases to do this externally);
 * the function returns an appropriate Reset code or 0,
   which is then used to fill in the data for the Reset packet.

Old code, which is no longer used or referenced, has been removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/feat.c    | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/dccp/feat.h    |   4 +-
 net/dccp/options.c |  23 +++------
 3 files changed, 155 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index d53077bd40b..01b4da7007b 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -983,7 +983,6 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 
 	return 0;
 }
-#endif /* (later) */
 
 static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
 				    u8 type, u8 feature)
@@ -1094,6 +1093,7 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 }
 
 EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
+#endif	/* (later) */
 
 int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 			   u8 *val, u8 len)
@@ -1234,6 +1234,150 @@ out_clean:
 
 EXPORT_SYMBOL_GPL(dccp_feat_clone);
 
+/**
+ * dccp_feat_change_recv  -  Process incoming ChangeL/R options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether the Change was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is the server (1) or the client (0)
+ */
+static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+				u8 feat, u8 *val, u8 len, const bool server)
+{
+	u8 defval, type = dccp_feat_type(feat);
+	const bool local = (opt == DCCPO_CHANGE_R);
+	struct dccp_feat_entry *entry;
+	dccp_feat_val fval;
+
+	if (len == 0 || type == FEAT_UNKNOWN)		/* 6.1 and 6.6.8 */
+		goto unknown_feature_or_value;
+
+	/*
+	 *	Negotiation of NN features: Change R is invalid, so there is no
+	 *	simultaneous negotiation; hence we do not look up in the list.
+	 */
+	if (type == FEAT_NN) {
+		if (local || len > sizeof(fval.nn))
+			goto unknown_feature_or_value;
+
+		/* 6.3.2: "The feature remote MUST accept any valid value..." */
+		fval.nn = dccp_decode_value_var(val, len);
+		if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+			goto unknown_feature_or_value;
+
+		return dccp_feat_push_confirm(fn, feat, local, &fval);
+	}
+
+	/*
+	 *	Unidirectional/simultaneous negotiation of SP features (6.3.1)
+	 */
+	entry = dccp_feat_list_lookup(fn, feat, local);
+	if (entry == NULL) {
+		/*
+		 * No particular preferences have been registered. We deal with
+		 * this situation by assuming that all valid values are equally
+		 * acceptable, and apply the following checks:
+		 * - if the peer's list is a singleton, we accept a valid value;
+		 * - if we are the server, we first try to see if the peer (the
+		 *   client) advertises the default value. If yes, we use it,
+		 *   otherwise we accept the preferred value;
+		 * - else if we are the client, we use the first list element.
+		 */
+		if (dccp_feat_clone_sp_val(&fval, val, 1))
+			return DCCP_RESET_CODE_TOO_BUSY;
+
+		if (len > 1 && server) {
+			defval = dccp_feat_default_value(feat);
+			if (dccp_feat_preflist_match(&defval, 1, val, len) > -1)
+				fval.sp.vec[0] = defval;
+		} else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) {
+			kfree(fval.sp.vec);
+			goto unknown_feature_or_value;
+		}
+
+		/* Treat unsupported CCIDs like invalid values */
+		if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) {
+			kfree(fval.sp.vec);
+			goto not_valid_or_not_known;
+		}
+
+		return dccp_feat_push_confirm(fn, feat, local, &fval);
+
+	} else if (entry->state == FEAT_UNSTABLE) {	/* 6.6.2 */
+		return 0;
+	}
+
+	if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
+		entry->empty_confirm = 0;
+	} else if (is_mandatory) {
+		return DCCP_RESET_CODE_MANDATORY_ERROR;
+	} else if (entry->state == FEAT_INITIALISING) {
+		/*
+		 * Failed simultaneous negotiation (server only): try to `save'
+		 * the connection by checking whether entry contains the default
+		 * value for @feat. If yes, send an empty Confirm to signal that
+		 * the received Change was not understood - which implies using
+		 * the default value.
+		 * If this also fails, we use Reset as the last resort.
+		 */
+		WARN_ON(!server);
+		defval = dccp_feat_default_value(feat);
+		if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
+			return DCCP_RESET_CODE_OPTION_ERROR;
+		entry->empty_confirm = 1;
+	}
+	entry->needs_confirm   = 1;
+	entry->needs_mandatory = 0;
+	entry->state	       = FEAT_STABLE;
+	return 0;
+
+unknown_feature_or_value:
+	if (!is_mandatory)
+		return dccp_push_empty_confirm(fn, feat, local);
+
+not_valid_or_not_known:
+	return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+			    : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
+/**
+ * dccp_feat_parse_options  -  Process Feature-Negotiation Options
+ * @sk: for general use and used by the client during connection setup
+ * @dreq: used by the server during connection setup
+ * @mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: value contents of @opt
+ * @len: length of @val in bytes
+ * Returns 0 on success, a Reset code for ending the connection otherwise.
+ */
+int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
+			    u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+	bool server = false;
+
+	switch (sk->sk_state) {
+	/*
+	 *	Negotiation during connection setup
+	 */
+	case DCCP_LISTEN:
+		server = true;			/* fall through */
+	case DCCP_REQUESTING:
+		switch (opt) {
+		case DCCPO_CHANGE_L:
+		case DCCPO_CHANGE_R:
+			return dccp_feat_change_recv(fn, mandatory, opt, feat,
+						     val, len, server);
+		}
+	}
+	return 0;	/* ignore FN options in all other states */
+}
+
 int dccp_feat_init(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 8e863839ed9..ce97f3fe768 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -115,8 +115,8 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 				  u8 const *list, u8 len);
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
-extern int  dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
-				  u8 *val, u8 len);
+extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
+				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
 extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 				   u8 *val, u8 len);
 extern void dccp_feat_clean(struct dccp_minisock *dmsk);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0e277114cb2..fb8466ea467 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -135,22 +135,13 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 				      (unsigned long long)opt_recv->dccpor_ndp);
 			break;
 		case DCCPO_CHANGE_L:
-			/* fall through */
 		case DCCPO_CHANGE_R:
 			if (pkt_type == DCCP_PKT_DATA)
 				break;
-			if (len < 2)
-				goto out_invalid_option;
-			rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
-						   len - 1);
-			/*
-			 * When there is a change error, change_recv is
-			 * responsible for dealing with it.  i.e. reply with an
-			 * empty confirm.
-			 * If the change was mandatory, then we need to die.
-			 */
-			if (rc && mandatory)
-				goto out_invalid_option;
+			rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
+						    *value, value + 1, len - 1);
+			if (rc)
+				goto out_featneg_failed;
 			break;
 		case DCCPO_CONFIRM_L:
 			/* fall through */
@@ -292,8 +283,10 @@ out_nonsensical_length:
 
 out_invalid_option:
 	DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
-	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
-	DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
+	rc = DCCP_RESET_CODE_OPTION_ERROR;
+out_featneg_failed:
+	DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
+	DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
 	DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
 	DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
 	DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
-- 
cgit v1.2.3-70-g09d2


From d2150b7bff3d397692cf0dc890f198d23564de5f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Processing Confirm options

Analogous to the previous patch, this adds code to interpret incoming Confirm
feature-negotiation options. Both functions operate on the feature-negotiation
list of either the request_sock (server) or the dccp_sock (client).

Thanks to Wei Yongjun for pointing out that it is overly restrictive to check
the entire list of confirmed SP values.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c    | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/dccp/feat.h    |   2 --
 net/dccp/options.c |  16 ++-------
 3 files changed, 101 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 01b4da7007b..da3bbad9d50 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -99,6 +99,13 @@ static int dccp_feat_default_value(u8 feat_num)
 	return idx < 0 ? : dccp_feat_table[idx].default_value;
 }
 
+/* Test for "Req'd" feature (RFC 4340, 6.4) */
+static inline int dccp_feat_must_be_understood(u8 feat_num)
+{
+	return	feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS ||
+		feat_num == DCCPF_SEQUENCE_WINDOW;
+}
+
 /* copy constructor, fval must not already contain allocated memory */
 static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
 {
@@ -1093,7 +1100,6 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 }
 
 EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
-#endif	/* (later) */
 
 int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 			   u8 *val, u8 len)
@@ -1148,6 +1154,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 }
 
 EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
+#endif	/* (later) */
 
 void dccp_feat_clean(struct dccp_minisock *dmsk)
 {
@@ -1343,6 +1350,93 @@ not_valid_or_not_known:
 			    : DCCP_RESET_CODE_OPTION_ERROR;
 }
 
+/**
+ * dccp_feat_confirm_recv  -  Process received Confirm options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is server (1) or client (0)
+ */
+static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+				 u8 feat, u8 *val, u8 len, const bool server)
+{
+	u8 *plist, plen, type = dccp_feat_type(feat);
+	const bool local = (opt == DCCPO_CONFIRM_R);
+	struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
+
+	if (entry == NULL) {	/* nothing queued: ignore or handle error */
+		if (is_mandatory && type == FEAT_UNKNOWN)
+			return DCCP_RESET_CODE_MANDATORY_ERROR;
+
+		if (!local && type == FEAT_NN)		/* 6.3.2 */
+			goto confirmation_failed;
+		return 0;
+	}
+
+	if (entry->state != FEAT_CHANGING)		/* 6.6.2 */
+		return 0;
+
+	if (len == 0) {
+		if (dccp_feat_must_be_understood(feat))	/* 6.6.7 */
+			goto confirmation_failed;
+		/*
+		 * Empty Confirm during connection setup: this means reverting
+		 * to the `old' value, which in this case is the default. Since
+		 * we handle default values automatically when no other values
+		 * have been set, we revert to the old value by removing this
+		 * entry from the list.
+		 */
+		dccp_feat_list_pop(entry);
+		return 0;
+	}
+
+	if (type == FEAT_NN) {
+		if (len > sizeof(entry->val.nn))
+			goto confirmation_failed;
+
+		if (entry->val.nn == dccp_decode_value_var(val, len))
+			goto confirmation_succeeded;
+
+		DCCP_WARN("Bogus Confirm for non-existing value\n");
+		goto confirmation_failed;
+	}
+
+	/*
+	 * Parsing SP Confirms: the first element of @val is the preferred
+	 * SP value which the peer confirms, the remainder depends on @len.
+	 * Note that only the confirmed value need to be a valid SP value.
+	 */
+	if (!dccp_feat_is_valid_sp_val(feat, *val))
+		goto confirmation_failed;
+
+	if (len == 1) {		/* peer didn't supply a preference list */
+		plist = val;
+		plen  = len;
+	} else {		/* preferred value + preference list */
+		plist = val + 1;
+		plen  = len - 1;
+	}
+
+	/* Check whether the peer got the reconciliation right (6.6.8) */
+	if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) {
+		DCCP_WARN("Confirm selected the wrong value %u\n", *val);
+		return DCCP_RESET_CODE_OPTION_ERROR;
+	}
+	entry->val.sp.vec[0] = *val;
+
+confirmation_succeeded:
+	entry->state = FEAT_STABLE;
+	return 0;
+
+confirmation_failed:
+	DCCP_WARN("Confirmation failed\n");
+	return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+			    : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
 /**
  * dccp_feat_parse_options  -  Process Feature-Negotiation Options
  * @sk: for general use and used by the client during connection setup
@@ -1373,6 +1467,10 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_CHANGE_R:
 			return dccp_feat_change_recv(fn, mandatory, opt, feat,
 						     val, len, server);
+		case DCCPO_CONFIRM_R:
+		case DCCPO_CONFIRM_L:
+			return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
+						      val, len, server);
 		}
 	}
 	return 0;	/* ignore FN options in all other states */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index ce97f3fe768..618bed935b3 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -117,8 +117,6 @@ extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
 				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
-				   u8 *val, u8 len);
 extern void dccp_feat_clean(struct dccp_minisock *dmsk);
 extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index fb8466ea467..3a9a22f0ac1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -134,26 +134,14 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
 				      (unsigned long long)opt_recv->dccpor_ndp);
 			break;
-		case DCCPO_CHANGE_L:
-		case DCCPO_CHANGE_R:
-			if (pkt_type == DCCP_PKT_DATA)
+		case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
+			if (pkt_type == DCCP_PKT_DATA)      /* RFC 4340, 6 */
 				break;
 			rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
 						    *value, value + 1, len - 1);
 			if (rc)
 				goto out_featneg_failed;
 			break;
-		case DCCPO_CONFIRM_L:
-			/* fall through */
-		case DCCPO_CONFIRM_R:
-			if (pkt_type == DCCP_PKT_DATA)
-				break;
-			if (len < 2)	/* FIXME this disallows empty confirm */
-				goto out_invalid_option;
-			if (dccp_feat_confirm_recv(sk, opt, *value,
-						   value + 1, len - 1))
-				goto out_invalid_option;
-			break;
 		case DCCPO_ACK_VECTOR_0:
 		case DCCPO_ACK_VECTOR_1:
 			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
-- 
cgit v1.2.3-70-g09d2


From c926c6aed3e444e8c88a768f063b2de8fd6ae760 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Feature activation handlers

This patch provides the post-processing of feature negotiation state, after
the negotiation has completed.

To this purpose, handlers are used and added to the dccp_feat_table. Each
handler is passed a boolean flag whether the RX or TX side of the feature
is meant.

Several handlers are provided already, new handlers can easily be added.

The initialisation is now fully dynamic, i.e. CCIDs are activated only
after the feature negotiation. The integration of this dynamic activation
is done in the subsequent patches.

Thanks to Wei Yongjun for pointing out the necessity of skipping over empty
Confirm options while copying the negotiated feature values.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h |   1 +
 net/dccp/feat.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 204 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 2e2a6f229ea..1baed789bcc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -445,6 +445,7 @@ extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
 extern int  dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
 extern int  dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
 				  struct sk_buff *skb);
+extern int  dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
 extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index da3bbad9d50..f78bd357b5b 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -25,11 +25,101 @@
 
 #define DCCP_FEAT_SP_NOAGREE (-123)
 
+/*
+ * Feature activation handlers.
+ *
+ * These all use an u64 argument, to provide enough room for NN/SP features. At
+ * this stage the negotiated values have been checked to be within their range.
+ */
+static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any());
+
+	if (new_ccid == NULL)
+		return -ENOMEM;
+
+	if (rx) {
+		ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+		dp->dccps_hc_rx_ccid = new_ccid;
+	} else {
+		ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+		dp->dccps_hc_tx_ccid = new_ccid;
+	}
+	return 0;
+}
+
+static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
+{
+	if (!rx)
+		dccp_msk(sk)->dccpms_sequence_window = seq_win;
+	return 0;
+}
+
+static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
+{
+	if (rx)
+		dccp_sk(sk)->dccps_r_ack_ratio = ratio;
+	else
+		dccp_sk(sk)->dccps_l_ack_ratio = ratio;
+	return 0;
+}
+
+static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (rx) {
+		if (enable && dp->dccps_hc_rx_ackvec == NULL) {
+			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any());
+			if (dp->dccps_hc_rx_ackvec == NULL)
+				return -ENOMEM;
+		} else if (!enable) {
+			dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+			dp->dccps_hc_rx_ackvec = NULL;
+		}
+	}
+	return 0;
+}
+
+static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
+{
+	if (!rx)
+		dccp_msk(sk)->dccpms_send_ndp_count = (enable > 0);
+	return 0;
+}
+
+/*
+ * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that
+ * `rx' holds when the sending peer informs about his partial coverage via a
+ * ChangeR() option. In the other case, we are the sender and the receiver
+ * announces its coverage via ChangeL() options. The policy here is to honour
+ * such communication by enabling the corresponding partial coverage - but only
+ * if it has not been set manually before; the warning here means that all
+ * packets will be dropped.
+ */
+static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (rx)
+		dp->dccps_pcrlen = cscov;
+	else {
+		if (dp->dccps_pcslen == 0)
+			dp->dccps_pcslen = cscov;
+		else if (cscov > dp->dccps_pcslen)
+			DCCP_WARN("CsCov %u too small, peer requires >= %u\n",
+				  dp->dccps_pcslen, (u8)cscov);
+	}
+	return 0;
+}
+
 static const struct {
 	u8			feat_num;		/* DCCPF_xxx */
 	enum dccp_feat_type	rxtx;			/* RX or TX  */
 	enum dccp_feat_type	reconciliation;		/* SP or NN  */
 	u8			default_value;		/* as in 6.4 */
+	int (*activation_hdlr)(struct sock *sk, u64 val, bool rx);
 /*
  *    Lookup table for location and type of features (from RFC 4340/4342)
  *  +--------------------------+----+-----+----+----+---------+-----------+
@@ -49,16 +139,16 @@ static const struct {
  *  +--------------------------+----+-----+----+----+---------+-----------+
  */
 } dccp_feat_table[] = {
-	{ DCCPF_CCID,		 FEAT_AT_TX, FEAT_SP, 2 },
-	{ DCCPF_SHORT_SEQNOS,	 FEAT_AT_TX, FEAT_SP, 0 },
-	{ DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100 },
-	{ DCCPF_ECN_INCAPABLE,	 FEAT_AT_RX, FEAT_SP, 0 },
-	{ DCCPF_ACK_RATIO,	 FEAT_AT_TX, FEAT_NN, 2 },
-	{ DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0 },
-	{ DCCPF_SEND_NDP_COUNT,  FEAT_AT_TX, FEAT_SP, 0 },
-	{ DCCPF_MIN_CSUM_COVER,  FEAT_AT_RX, FEAT_SP, 0 },
-	{ DCCPF_DATA_CHECKSUM,	 FEAT_AT_RX, FEAT_SP, 0 },
-	{ DCCPF_SEND_LEV_RATE,	 FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_CCID,		 FEAT_AT_TX, FEAT_SP, 2,   dccp_hdlr_ccid     },
+	{ DCCPF_SHORT_SEQNOS,	 FEAT_AT_TX, FEAT_SP, 0,   NULL },
+	{ DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win  },
+	{ DCCPF_ECN_INCAPABLE,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
+	{ DCCPF_ACK_RATIO,	 FEAT_AT_TX, FEAT_NN, 2,   dccp_hdlr_ack_ratio},
+	{ DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0,   dccp_hdlr_ackvec   },
+	{ DCCPF_SEND_NDP_COUNT,  FEAT_AT_TX, FEAT_SP, 0,   dccp_hdlr_ndp      },
+	{ DCCPF_MIN_CSUM_COVER,  FEAT_AT_RX, FEAT_SP, 0,   dccp_hdlr_min_cscov},
+	{ DCCPF_DATA_CHECKSUM,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
+	{ DCCPF_SEND_LEV_RATE,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
 };
 #define DCCP_FEAT_SUPPORTED_MAX		ARRAY_SIZE(dccp_feat_table)
 
@@ -99,6 +189,41 @@ static int dccp_feat_default_value(u8 feat_num)
 	return idx < 0 ? : dccp_feat_table[idx].default_value;
 }
 
+static int __dccp_feat_activate(struct sock *sk, const int idx,
+				const bool is_local, dccp_feat_val const *fval)
+{
+	bool rx;
+	u64 val;
+
+	if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX)
+		return -1;
+	if (dccp_feat_table[idx].activation_hdlr == NULL)
+		return 0;
+
+	if (fval == NULL) {
+		val = dccp_feat_table[idx].default_value;
+	} else if (dccp_feat_table[idx].reconciliation == FEAT_SP) {
+		if (fval->sp.vec == NULL) {
+			/*
+			 * This can happen when an empty Confirm is sent
+			 * for an SP (i.e. known) feature. In this case
+			 * we would be using the default anyway.
+			 */
+			DCCP_CRIT("Feature #%d undefined: using default", idx);
+			val = dccp_feat_table[idx].default_value;
+		} else {
+			val = fval->sp.vec[0];
+		}
+	} else {
+		val = fval->nn;
+	}
+
+	/* Location is RX if this is a local-RX or remote-TX feature */
+	rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
+
+	return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
+}
+
 /* Test for "Req'd" feature (RFC 4340, 6.4) */
 static inline int dccp_feat_must_be_understood(u8 feat_num)
 {
@@ -1506,6 +1631,74 @@ out:
 
 EXPORT_SYMBOL_GPL(dccp_feat_init);
 
+int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_feat_entry *cur, *next;
+	int idx;
+	dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = {
+		 [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL }
+	};
+
+	list_for_each_entry(cur, fn_list, node) {
+		/*
+		 * An empty Confirm means that either an unknown feature type
+		 * or an invalid value was present. In the first case there is
+		 * nothing to activate, in the other the default value is used.
+		 */
+		if (cur->empty_confirm)
+			continue;
+
+		idx = dccp_feat_index(cur->feat_num);
+		if (idx < 0) {
+			DCCP_BUG("Unknown feature %u", cur->feat_num);
+			goto activation_failed;
+		}
+		if (cur->state != FEAT_STABLE) {
+			DCCP_CRIT("Negotiation of %s %u failed in state %u",
+				  cur->is_local ? "local" : "remote",
+				  cur->feat_num, cur->state);
+			goto activation_failed;
+		}
+		fvals[idx][cur->is_local] = &cur->val;
+	}
+
+	/*
+	 * Activate in decreasing order of index, so that the CCIDs are always
+	 * activated as the last feature. This avoids the case where a CCID
+	 * relies on the initialisation of one or more features that it depends
+	 * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features).
+	 */
+	for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;)
+		if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) ||
+		    __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) {
+			DCCP_CRIT("Could not activate %d", idx);
+			goto activation_failed;
+		}
+
+	/* Clean up Change options which have been confirmed already */
+	list_for_each_entry_safe(cur, next, fn_list, node)
+		if (!cur->needs_confirm)
+			dccp_feat_list_pop(cur);
+
+	dccp_pr_debug("Activation OK\n");
+	return 0;
+
+activation_failed:
+	/*
+	 * We clean up everything that may have been allocated, since
+	 * it is difficult to track at which stage negotiation failed.
+	 * This is ok, since all allocation functions below are robust
+	 * against NULL arguments.
+	 */
+	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+	dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+	dp->dccps_hc_rx_ackvec = NULL;
+	return -1;
+}
+
 #ifdef CONFIG_IP_DCCP_DEBUG
 const char *dccp_feat_typename(const u8 type)
 {
-- 
cgit v1.2.3-70-g09d2


From 3a53a9adfa269da7fa40fc476f09e46155c0143d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Integration of dynamic feature activation - part 1 (socket
 setup)

This first patch out of three replaces the hardcoded default settings with
initialisation code for the dynamic feature negotiation.

Note on retransmitting Confirm options:
---------------------------------------
This patch also defers flushing the client feature-negotiation queue,
due to the following considerations.

As long as the client is in PARTOPEN, it needs to retransmit the Confirm
options for the Change options received on the DCCP-Response from the server.

Otherwise, if the packet containing the Confirm options gets dropped in the
network, the connection aborts due to undefined feature negotiation state.

Thanks to Leandro Melo de Sales who reported a bug in an earlier revision
of the patch set, resulting from not retransmitting the Confirm options.

The patch now ensures that the client feature-negotiation queue is flushed only
when entering the OPEN state. Since confirmed Change options are removed as
soon as they are confirmed (in the DCCP-Response), this ensures that Confirm
options are retransmitted.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/proto.c | 46 ++++++----------------------------------------
 1 file changed, 6 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6550452d59e..0d420790b79 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -67,6 +67,9 @@ void dccp_set_state(struct sock *sk, const int state)
 	case DCCP_OPEN:
 		if (oldstate != DCCP_OPEN)
 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+		/* Client retransmits all Confirm options until entering OPEN */
+		if (oldstate == DCCP_PARTOPEN)
+			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
 		break;
 
 	case DCCP_CLOSED:
@@ -175,7 +178,6 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	dccp_minisock_init(&dp->dccps_minisock);
@@ -194,45 +196,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	dccp_init_xmit_timers(sk);
 
 	INIT_LIST_HEAD(&dp->dccps_featneg);
-	/*
-	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
-	 * the listening (master) sock get CCID control blocks, which is not
-	 * necessary, but for now, to not mess with the test userspace apps,
-	 * lets leave it here, later the real solution is to do this in a
-	 * setsockopt(CCIDs-I-want/accept). -acme
-	 */
-	if (likely(ctl_sock_initialized)) {
-		int rc = dccp_feat_init(sk);
-
-		if (rc)
-			return rc;
-
-		if (dmsk->dccpms_send_ack_vector) {
-			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
-			if (dp->dccps_hc_rx_ackvec == NULL)
-				return -ENOMEM;
-		}
-		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
-						      sk, GFP_KERNEL);
-		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
-						      sk, GFP_KERNEL);
-		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
-			     dp->dccps_hc_tx_ccid == NULL)) {
-			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
-			if (dmsk->dccpms_send_ack_vector) {
-				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
-				dp->dccps_hc_rx_ackvec = NULL;
-			}
-			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
-			return -ENOMEM;
-		}
-	} else {
-		/* control socket doesn't need feat nego */
-		INIT_LIST_HEAD(&dmsk->dccpms_pending);
-		INIT_LIST_HEAD(&dmsk->dccpms_conf);
-	}
-
+	/* control socket doesn't need feat nego */
+	if (likely(ctl_sock_initialized))
+		return dccp_feat_init(sk);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e70cacb90d76f0632f7bba69c87a62e709e84619 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Integration of dynamic feature activation - part 2 (server
 side)

This patch integrates the activation of features at the end of negotiation
into the server-side code.

Note:
  In dccp_create_openreq_child the request_sock argument is no longer constant,
  since dccp_activate_values() uses the feature-negotiation list on dreq to sort
  out the initialisation values for the different features of the child socket;
  and purges this queue after use (but the `req' argument to openreq_child
  can and does still remain constant).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/minisocks.c | 42 ++++++++++++------------------------------
 1 file changed, 12 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ee7f40f8ddf..258195972bc 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -111,7 +111,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 	struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
 
 	if (newsk != NULL) {
-		const struct dccp_request_sock *dreq = dccp_rsk(req);
+		struct dccp_request_sock *dreq = dccp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
 		struct dccp_minisock *newdmsk = dccp_msk(newsk);
@@ -125,35 +125,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		newicsk->icsk_rto	    = DCCP_TIMEOUT_INIT;
 
 		INIT_LIST_HEAD(&newdp->dccps_featneg);
-		if (dccp_feat_clone(sk, newsk))
-			goto out_free;
-
-		if (newdmsk->dccpms_send_ack_vector) {
-			newdp->dccps_hc_rx_ackvec =
-						dccp_ackvec_alloc(GFP_ATOMIC);
-			if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
-				goto out_free;
-		}
-
-		newdp->dccps_hc_rx_ccid =
-			    ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
-					   newsk, GFP_ATOMIC);
-		newdp->dccps_hc_tx_ccid =
-			    ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
-					   newsk, GFP_ATOMIC);
-		if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
-			     newdp->dccps_hc_tx_ccid == NULL)) {
-			dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
-			ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
-			ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
-out_free:
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			sk_free(newsk);
-			return NULL;
-		}
-
 		/*
 		 * Step 3: Process LISTEN state
 		 *
@@ -184,6 +155,17 @@ out_free:
 		dccp_set_seqno(&newdp->dccps_awl,
 			       max48(newdp->dccps_awl, newdp->dccps_iss));
 
+		/*
+		 * Activate features after initialising the sequence numbers,
+		 * since CCID initialisation may depend on GSS, ISR, ISS etc.
+		 */
+		if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			sk_free(newsk);
+			return NULL;
+		}
 		dccp_init_xmit_timers(newsk);
 
 		DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
-- 
cgit v1.2.3-70-g09d2


From c49b22729f3da7479c4e6c572d53fdd40201d0bd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Integration of dynamic feature activation - part 3 (client
 side)

This integrates feature-activation in the client, with these details:

 1. When dccp_parse_options() fails, the reset code is already set, request_sent
    _state_process() currently overrides this with `Packet Error', which is not
    intended - so changed to use the reset code set in dccp_parse_options();

 2. There was a FIXME to change the error code when dccp_ackvec_add() fails.
    I have looked this up and found that:
    * the check whether ackno < ISN is already made earlier,
    * this Response is likely the 1st packet with an Ackno that the client gets,
    * so when dccp_ackvec_add() fails, the reason is likely not a packet error.

 3. When feature negotiation fails, the socket should be marked as not usable,
    so that the application is notified that an error occurs. This is achieved
    by a new label, which uses an error code of `Aborted' and which sets the
    socket state to CLOSED, as well as sk_err.

 4. Avoids parsing the Ack twice in Respond state by not doing option processing
    again in dccp_rcv_respond_partopen_state_process (as option processing has
    already been done on the request_sock in dccp_check_req).

Since this addresses congestion-control initialisation, a corresponding
FIXME has been removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/input.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3070015edc7..0672b7e1763 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -421,8 +421,13 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
+		/*
+		 * If option processing (Step 8) failed, return 1 here so that
+		 * dccp_v4_do_rcv() sends a Reset. The Reset code depends on
+		 * the option type and is set in dccp_parse_options().
+		 */
 		if (dccp_parse_options(sk, NULL, skb))
-			goto out_invalid_packet;
+			return 1;
 
 		/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
 		if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
@@ -475,6 +480,15 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		 */
 		dccp_set_state(sk, DCCP_PARTOPEN);
 
+		/*
+		 * If feature negotiation was successful, activate features now;
+		 * an activation failure means that this host could not activate
+		 * one ore more features (e.g. insufficient memory), which would
+		 * leave at least one feature in an undefined state.
+		 */
+		if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
+			goto unable_to_proceed;
+
 		/* Make sure socket is routed, for correct metrics. */
 		icsk->icsk_af_ops->rebuild_header(sk);
 
@@ -509,6 +523,16 @@ out_invalid_packet:
 	/* dccp_v4_do_rcv will send a reset */
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
 	return 1;
+
+unable_to_proceed:
+	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
+	/*
+	 * We mark this socket as no longer usable, so that the loop in
+	 * dccp_sendmsg() terminates and the application gets notified.
+	 */
+	dccp_set_state(sk, DCCP_CLOSED);
+	sk->sk_err = ECOMM;
+	return 1;
 }
 
 static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -600,7 +624,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		return 1;
 	}
 
-	if (sk->sk_state != DCCP_REQUESTING) {
+	if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
 		if (dccp_check_seqno(sk, skb))
 			goto discard;
 
@@ -665,8 +689,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		return 1;
 
 	case DCCP_REQUESTING:
-		/* FIXME: do congestion control initialization */
-
 		queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
 		if (queued >= 0)
 			return queued;
-- 
cgit v1.2.3-70-g09d2


From 23479cbfd30402c7d9fa413cc467983061073557 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Clean up old feature-negotiation infrastructure

The code removed by this patch is no longer referenced or used, the added
lines update documentation and copyrights.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c | 505 +-------------------------------------------------------
 net/dccp/feat.h |  12 +-
 2 files changed, 12 insertions(+), 505 deletions(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f78bd357b5b..6c82dea409d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1,8 +1,13 @@
 /*
  *  net/dccp/feat.c
  *
- *  An implementation of the DCCP protocol
- *  Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *  Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ *
+ *  Copyright (c) 2008 The University of Aberdeen, Scotland, UK
+ *  Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ *  Rewrote from scratch, some bits from earlier code by
+ *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
  *
  *  ASSUMPTIONS
  *  -----------
@@ -17,14 +22,10 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
-
 #include <linux/module.h>
-
 #include "ccid.h"
 #include "feat.h"
 
-#define DCCP_FEAT_SP_NOAGREE (-123)
-
 /*
  * Feature activation handlers.
  *
@@ -805,51 +806,6 @@ int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
 	return 0;
 }
 
-static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	/* figure out if we are changing our CCID or the peer's */
-	const int rx = type == DCCPO_CHANGE_R;
-	const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
-	struct ccid *new_ccid;
-
-	/* Check if nothing is being changed. */
-	if (ccid_nr == new_ccid_nr)
-		return 0;
-
-	new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
-	if (new_ccid == NULL)
-		return -ENOMEM;
-
-	if (rx) {
-		ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-		dp->dccps_hc_rx_ccid = new_ccid;
-		dmsk->dccpms_rx_ccid = new_ccid_nr;
-	} else {
-		ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
-		dp->dccps_hc_tx_ccid = new_ccid;
-		dmsk->dccpms_tx_ccid = new_ccid_nr;
-	}
-
-	return 0;
-}
-
-static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
-{
-	dccp_feat_debug(type, feat, val);
-
-	switch (feat) {
-	case DCCPF_CCID:
-		return dccp_feat_update_ccid(sk, type, val);
-	default:
-		dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
-			      dccp_feat_typename(type), feat);
-		break;
-	}
-	return 0;
-}
-
 /* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
 static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
 {
@@ -919,453 +875,6 @@ static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
 	return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
 }
 
-#ifdef __this_is_the_old_framework_and_will_be_removed_later_in_a_subsequent_patch
-static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
-			       u8 *rpref, u8 rlen)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-	u8 *spref, slen, *res = NULL;
-	int i, j, rc, agree = 1;
-
-	BUG_ON(rpref == NULL);
-
-	/* check if we are the black sheep */
-	if (dp->dccps_role == DCCP_ROLE_CLIENT) {
-		spref = rpref;
-		slen  = rlen;
-		rpref = opt->dccpop_val;
-		rlen  = opt->dccpop_len;
-	} else {
-		spref = opt->dccpop_val;
-		slen  = opt->dccpop_len;
-	}
-	/*
-	 * Now we have server preference list in spref and client preference in
-	 * rpref
-	 */
-	BUG_ON(spref == NULL);
-	BUG_ON(rpref == NULL);
-
-	/* FIXME sanity check vals */
-
-	/* Are values in any order?  XXX Lame "algorithm" here */
-	for (i = 0; i < slen; i++) {
-		for (j = 0; j < rlen; j++) {
-			if (spref[i] == rpref[j]) {
-				res = &spref[i];
-				break;
-			}
-		}
-		if (res)
-			break;
-	}
-
-	/* we didn't agree on anything */
-	if (res == NULL) {
-		/* confirm previous value */
-		switch (opt->dccpop_feat) {
-		case DCCPF_CCID:
-			/* XXX did i get this right? =P */
-			if (opt->dccpop_type == DCCPO_CHANGE_L)
-				res = &dccp_msk(sk)->dccpms_tx_ccid;
-			else
-				res = &dccp_msk(sk)->dccpms_rx_ccid;
-			break;
-
-		default:
-			DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
-			/* XXX implement res */
-			return -EFAULT;
-		}
-
-		dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
-		agree = 0; /* this is used for mandatory options... */
-	}
-
-	/* need to put result and our preference list */
-	rlen = 1 + opt->dccpop_len;
-	rpref = kmalloc(rlen, GFP_ATOMIC);
-	if (rpref == NULL)
-		return -ENOMEM;
-
-	*rpref = *res;
-	memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
-
-	/* put it in the "confirm queue" */
-	if (opt->dccpop_sc == NULL) {
-		opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
-		if (opt->dccpop_sc == NULL) {
-			kfree(rpref);
-			return -ENOMEM;
-		}
-	} else {
-		/* recycle the confirm slot */
-		BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
-		kfree(opt->dccpop_sc->dccpoc_val);
-		dccp_pr_debug("recycling confirm slot\n");
-	}
-	memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
-
-	opt->dccpop_sc->dccpoc_val = rpref;
-	opt->dccpop_sc->dccpoc_len = rlen;
-
-	/* update the option on our side [we are about to send the confirm] */
-	rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
-	if (rc) {
-		kfree(opt->dccpop_sc->dccpoc_val);
-		kfree(opt->dccpop_sc);
-		opt->dccpop_sc = NULL;
-		return rc;
-	}
-
-	dccp_pr_debug("Will confirm %d\n", *rpref);
-
-	/* say we want to change to X but we just got a confirm X, suppress our
-	 * change
-	 */
-	if (!opt->dccpop_conf) {
-		if (*opt->dccpop_val == *res)
-			opt->dccpop_conf = 1;
-		dccp_pr_debug("won't ask for change of same feature\n");
-	}
-
-	return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
-}
-
-static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
-{
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	struct dccp_opt_pend *opt;
-	int rc = 1;
-	u8 t;
-
-	/*
-	 * We received a CHANGE.  We gotta match it against our own preference
-	 * list.  If we got a CHANGE_R it means it's a change for us, so we need
-	 * to compare our CHANGE_L list.
-	 */
-	if (type == DCCPO_CHANGE_L)
-		t = DCCPO_CHANGE_R;
-	else
-		t = DCCPO_CHANGE_L;
-
-	/* find our preference list for this feature */
-	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-		if (opt->dccpop_type != t || opt->dccpop_feat != feature)
-			continue;
-
-		/* find the winner from the two preference lists */
-		rc = dccp_feat_reconcile(sk, opt, val, len);
-		break;
-	}
-
-	/* We didn't deal with the change.  This can happen if we have no
-	 * preference list for the feature.  In fact, it just shouldn't
-	 * happen---if we understand a feature, we should have a preference list
-	 * with at least the default value.
-	 */
-	BUG_ON(rc == 1);
-
-	return rc;
-}
-
-static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
-{
-	struct dccp_opt_pend *opt;
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	u8 *copy;
-	int rc;
-
-	/* NN features must be Change L (sec. 6.3.2) */
-	if (type != DCCPO_CHANGE_L) {
-		dccp_pr_debug("received %s for NN feature %d\n",
-				dccp_feat_typename(type), feature);
-		return -EFAULT;
-	}
-
-	/* XXX sanity check opt val */
-
-	/* copy option so we can confirm it */
-	opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
-	if (opt == NULL)
-		return -ENOMEM;
-
-	copy = kmemdup(val, len, GFP_ATOMIC);
-	if (copy == NULL) {
-		kfree(opt);
-		return -ENOMEM;
-	}
-
-	opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
-	opt->dccpop_feat = feature;
-	opt->dccpop_val	 = copy;
-	opt->dccpop_len	 = len;
-
-	/* change feature */
-	rc = dccp_feat_update(sk, type, feature, *val);
-	if (rc) {
-		kfree(opt->dccpop_val);
-		kfree(opt);
-		return rc;
-	}
-
-	dccp_feat_debug(type, feature, *copy);
-
-	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
-
-	return 0;
-}
-
-static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
-				    u8 type, u8 feature)
-{
-	/* XXX check if other confirms for that are queued and recycle slot */
-	struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
-
-	if (opt == NULL) {
-		/* XXX what do we do?  Ignoring should be fine.  It's a change
-		 * after all =P
-		 */
-		return;
-	}
-
-	switch (type) {
-	case DCCPO_CHANGE_L:
-		opt->dccpop_type = DCCPO_CONFIRM_R;
-		break;
-	case DCCPO_CHANGE_R:
-		opt->dccpop_type = DCCPO_CONFIRM_L;
-		break;
-	default:
-		DCCP_WARN("invalid type %d\n", type);
-		kfree(opt);
-		return;
-	}
-	opt->dccpop_feat = feature;
-	opt->dccpop_val	 = NULL;
-	opt->dccpop_len	 = 0;
-
-	/* change feature */
-	dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
-
-	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
-}
-
-static void dccp_feat_flush_confirm(struct sock *sk)
-{
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	/* Check if there is anything to confirm in the first place */
-	int yes = !list_empty(&dmsk->dccpms_conf);
-
-	if (!yes) {
-		struct dccp_opt_pend *opt;
-
-		list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-			if (opt->dccpop_conf) {
-				yes = 1;
-				break;
-			}
-		}
-	}
-
-	if (!yes)
-		return;
-
-	/* OK there is something to confirm... */
-	/* XXX check if packet is in flight?  Send delayed ack?? */
-	if (sk->sk_state == DCCP_OPEN)
-		dccp_send_ack(sk);
-}
-
-int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
-{
-	int rc;
-
-	/* Ignore Change requests other than during connection setup */
-	if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING)
-		return 0;
-	dccp_feat_debug(type, feature, *val);
-
-	/* figure out if it's SP or NN feature */
-	switch (feature) {
-	/* deal with SP features */
-	case DCCPF_CCID:
-		/* XXX Obsoleted by next patch
-		rc = dccp_feat_sp(sk, type, feature, val, len); */
-		break;
-
-	/* deal with NN features */
-	case DCCPF_ACK_RATIO:
-		/* XXX Obsoleted by next patch
-		rc = dccp_feat_nn(sk, type, feature, val, len); */
-		break;
-
-	/* XXX implement other features */
-	default:
-		dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
-			      dccp_feat_typename(type), feature);
-		rc = -EFAULT;
-		break;
-	}
-
-	/* check if there were problems changing features */
-	if (rc) {
-		/* If we don't agree on SP, we sent a confirm for old value.
-		 * However we propagate rc to caller in case option was
-		 * mandatory
-		 */
-		if (rc != DCCP_FEAT_SP_NOAGREE)
-			dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
-	}
-
-	/* generate the confirm [if required] */
-	dccp_feat_flush_confirm(sk);
-
-	return rc;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
-
-int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
-			   u8 *val, u8 len)
-{
-	u8 t;
-	struct dccp_opt_pend *opt;
-	struct dccp_minisock *dmsk = dccp_msk(sk);
-	int found = 0;
-	int all_confirmed = 1;
-
-	/* Ignore Confirm options other than during connection setup */
-	if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING)
-		return 0;
-	dccp_feat_debug(type, feature, *val);
-
-	/* locate our change request */
-	switch (type) {
-	case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
-	case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
-	default:	      DCCP_WARN("invalid type %d\n", type);
-			      return 1;
-
-	}
-	/* XXX sanity check feature value */
-
-	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-		if (!opt->dccpop_conf && opt->dccpop_type == t &&
-		    opt->dccpop_feat == feature) {
-			found = 1;
-			dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
-
-			/* XXX do sanity check */
-
-			opt->dccpop_conf = 1;
-
-			/* We got a confirmation---change the option */
-			dccp_feat_update(sk, opt->dccpop_type,
-					 opt->dccpop_feat, *val);
-
-			/* XXX check the return value of dccp_feat_update */
-			break;
-		}
-
-		if (!opt->dccpop_conf)
-			all_confirmed = 0;
-	}
-
-	if (!found)
-		dccp_pr_debug("%s(%d, ...) never requested\n",
-			      dccp_feat_typename(type), feature);
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
-#endif	/* (later) */
-
-void dccp_feat_clean(struct dccp_minisock *dmsk)
-{
-	struct dccp_opt_pend *opt, *next;
-
-	list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
-				 dccpop_node) {
-		BUG_ON(opt->dccpop_val == NULL);
-		kfree(opt->dccpop_val);
-
-		if (opt->dccpop_sc != NULL) {
-			BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
-			kfree(opt->dccpop_sc->dccpoc_val);
-			kfree(opt->dccpop_sc);
-		}
-
-		kfree(opt);
-	}
-	INIT_LIST_HEAD(&dmsk->dccpms_pending);
-
-	list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
-		BUG_ON(opt == NULL);
-		if (opt->dccpop_val != NULL)
-			kfree(opt->dccpop_val);
-		kfree(opt);
-	}
-	INIT_LIST_HEAD(&dmsk->dccpms_conf);
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_clean);
-
-/* this is to be called only when a listening sock creates its child.  It is
- * assumed by the function---the confirm is not duplicated, but rather it is
- * "passed on".
- */
-int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
-{
-	struct dccp_minisock *olddmsk = dccp_msk(oldsk);
-	struct dccp_minisock *newdmsk = dccp_msk(newsk);
-	struct dccp_opt_pend *opt;
-	int rc = 0;
-
-	INIT_LIST_HEAD(&newdmsk->dccpms_pending);
-	INIT_LIST_HEAD(&newdmsk->dccpms_conf);
-
-	list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
-		struct dccp_opt_pend *newopt;
-		/* copy the value of the option */
-		u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
-
-		if (val == NULL)
-			goto out_clean;
-
-		newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
-		if (newopt == NULL) {
-			kfree(val);
-			goto out_clean;
-		}
-
-		/* insert the option */
-		newopt->dccpop_val = val;
-		list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
-
-		/* XXX what happens with backlogs and multiple connections at
-		 * once...
-		 */
-		/* the master socket no longer needs to worry about confirms */
-		opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
-
-		/* reset state for a new socket */
-		opt->dccpop_conf = 0;
-	}
-
-	/* XXX not doing anything about the conf queue */
-
-out:
-	return rc;
-
-out_clean:
-	dccp_feat_clean(newdmsk);
-	rc = -ENOMEM;
-	goto out;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_clone);
-
 /**
  * dccp_feat_change_recv  -  Process incoming ChangeL/R options
  * @fn: feature-negotiation list to update
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 618bed935b3..177e9c39ea9 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -3,14 +3,14 @@
 /*
  *  net/dccp/feat.h
  *
- *  An implementation of the DCCP protocol
+ *  Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ *  Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
  *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
- *	This program is free software; you can redistribute it and/or modify it
- *	under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
  */
-
 #include <linux/types.h>
 #include "dccp.h"
 
@@ -117,8 +117,6 @@ extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
 				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-extern void dccp_feat_clean(struct dccp_minisock *dmsk);
-extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
 extern int  dccp_feat_init(struct sock *sk);
 
-- 
cgit v1.2.3-70-g09d2


From 78673e24df27c76ec75565f4024d45c2c74ef148 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Remove obsolete parts of the old CCID interface

The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector
case, their value equals initially that of the sysctl, but at the end of
feature negotiation may be something different.

The old interface removed by this patch thus has been replaced by the newer
interface to dynamically query the currently loaded CCIDs earlier in this
patch set.

Also removed the constructors for the TX CCID and the RX CCID, since the
switch rx/non-rx is done by the handler in minisocks.c (and the handler is
the only place in the code where CCIDs are loaded).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 Documentation/networking/dccp.txt |  5 +++--
 include/linux/dccp.h              |  3 ---
 net/dccp/ccid.c                   | 14 --------------
 net/dccp/ccid.h                   |  5 -----
 net/dccp/feat.c                   | 12 ------------
 net/dccp/minisocks.c              |  2 --
 6 files changed, 3 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 610083ff73f..a203d132dbe 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -140,10 +140,11 @@ send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
 tx_ccid = 2
-	Default CCID for the sender-receiver half-connection.
+	Default CCID for the sender-receiver half-connection. Depending on the
+	choice of CCID, the Send Ack Vector feature is enabled automatically.
 
 rx_ccid = 2
-	Default CCID for the receiver-sender half-connection.
+	Default CCID for the receiver-sender half-connection; see tx_ccid.
 
 seq_window = 100
 	The initial sequence window (sec. 7.5.2).
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6a72ff52a8a..46daea312d9 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_ccid - Congestion Control Id (CCID) (section 10)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
   * @dccpms_pending - List of features being negotiated
@@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	__u8			dccpms_rx_ccid;
-	__u8			dccpms_tx_ccid;
 	__u8			dccpms_send_ack_vector;
 	__u8			dccpms_send_ndp_count;
 	struct list_head	dccpms_pending;
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index f72ca83df55..330372a1a0b 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -253,20 +253,6 @@ out_module_put:
 
 EXPORT_SYMBOL_GPL(ccid_new);
 
-struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
-{
-	return ccid_new(id, sk, 1, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
-
-struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk,  gfp_t gfp)
-{
-	return ccid_new(id, sk, 0, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
-
 static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
 {
 	struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 803343aed00..18f69423a70 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -111,11 +111,6 @@ extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
-extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
-				   gfp_t gfp);
-extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
-				   gfp_t gfp);
-
 static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
 {
 	struct ccid *ccid = dp->dccps_hc_rx_ccid;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 6c82dea409d..cb2ddd2b894 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1119,18 +1119,6 @@ int dccp_feat_init(struct sock *sk)
 	INIT_LIST_HEAD(&dmsk->dccpms_pending);	/* XXX no longer used */
 	INIT_LIST_HEAD(&dmsk->dccpms_conf);	/* XXX no longer used */
 
-	/* CCID L */
-	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 1, 0,
-				&dmsk->dccpms_tx_ccid, 1);
-	if (rc)
-		goto out;
-
-	/* CCID R */
-	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 0, 0,
-				&dmsk->dccpms_rx_ccid, 1);
-	if (rc)
-		goto out;
-
 	/* Ack ratio */
 	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
 				dp->dccps_l_ack_ratio);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 258195972bc..486d61df260 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,8 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
-	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
-- 
cgit v1.2.3-70-g09d2


From 68e074bfcef269bc61006c2740d7f89ccbbd93d7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Remove manual influence on NDP Count feature

Updating the NDP count feature is handled automatically now:
 * for CCID-2 it is disabled, since the code does not use NDP counts;
 * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths.

Allowing the user to change NDP values leads to unpredictable and failing
behaviour, since it is then possible to disable NDP counts even when they
are needed (e.g. in CCID-3).

This means that only those user settings are sensible that agree with the
values for Send NDP Count implied by the choice of CCID. But those settings
are already activated by the feature negotiation (CCID dependency tracking),
hence this form of support is redundant.

At startup the initialisation of the NDP count feature is with the default
value of 0, which is done implicitly by the zeroing-out of the socket when
it is allocated. If the choice of CCID or feature negotiation enables NDP
count, this will then be updated via the NDP activation handler.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 Documentation/networking/dccp.txt | 3 ---
 include/linux/dccp.h              | 4 ++--
 net/dccp/dccp.h                   | 1 -
 net/dccp/feat.c                   | 2 +-
 net/dccp/minisocks.c              | 1 -
 net/dccp/options.c                | 4 +---
 net/dccp/sysctl.c                 | 7 -------
 7 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index a203d132dbe..1403745ab40 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -133,9 +133,6 @@ retries2
 	importance for retransmitted acknowledgments and feature negotiation,
 	data packets are never retransmitted. Analogue of tcp_retries2.
 
-send_ndp = 1
-	Whether or not to send NDP count options (sec. 7.7.2).
-
 send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 46daea312d9..60e94438ead 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
-  * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
 	__u8			dccpms_send_ack_vector;
-	__u8			dccpms_send_ndp_count;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
@@ -490,6 +488,7 @@ struct dccp_ackvec;
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
+ * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
@@ -529,6 +528,7 @@ struct dccp_sock {
 	__u16				dccps_r_ack_ratio;
 	__u8				dccps_pcslen:4;
 	__u8				dccps_pcrlen:4;
+	__u8				dccps_send_ndp_count:1;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1baed789bcc..51436c82565 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -99,7 +99,6 @@ extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
 extern int  sysctl_dccp_feat_send_ack_vector;
-extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index cb2ddd2b894..35a57ab3bb1 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -86,7 +86,7 @@ static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
 static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
 {
 	if (!rx)
-		dccp_msk(sk)->dccpms_send_ndp_count = (enable > 0);
+		dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
 	return 0;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 486d61df260..9e223257266 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -46,7 +46,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
-	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
 
 void dccp_time_wait(struct sock *sk, int state, int timeo)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 3a9a22f0ac1..6b0704497e8 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -27,7 +27,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
@@ -531,8 +530,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
-	if (dmsk->dccpms_send_ndp_count &&
-	    dccp_insert_option_ndp(sk, skb))
+	if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
 		return -1;
 
 	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index f6e54f433e2..587c12f915c 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -47,13 +47,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "send_ndp",
-		.data		= &sysctl_dccp_feat_send_ndp_count,
-		.maxlen		= sizeof(sysctl_dccp_feat_send_ndp_count),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
-- 
cgit v1.2.3-70-g09d2


From b235dc4abbc1356284bd0dc730efa711f394e0e2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl

This removes the use of the sysctl and the minisock variable for the Send Ack
Vector feature, which is now handled fully dynamically via feature negotiation;
i.e. when CCID2 is enabled, Ack Vectors are automatically enabled (as per
RFC 4341, 4.).

Using a sysctl in parallel to this implementation would open the door to
crashes, since much of the code relies on tests of the boolean minisock /
sysctl variable. Thus, this patch replaces all tests of type

	if (dccp_msk(sk)->dccpms_send_ack_vector)
		/* ... */
with
	if (dp->dccps_hc_rx_ackvec != NULL)
		/* ... */

The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature
negotiation concluded that Ack Vectors are to be used on the half-connection.
Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child),
so that the test is a valid one.

The activation handler for Ack Vectors is called as soon as the feature
negotiation has concluded at the
 * server when the Ack marking the transition RESPOND => OPEN arrives;
 * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN.

Adding the sequence number of the Response packet to the Ack Vector has been
removed, since
 (a) connection establishment implies that the Response has been received;
 (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e.
     this entry will always be ignored;
 (c) it can not be used for anything useful - to detect loss for instance, only
     packets received after the loss can serve as pseudo-dupacks.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 Documentation/networking/dccp.txt |  3 ---
 include/linux/dccp.h              |  3 ---
 net/dccp/dccp.h                   |  3 +--
 net/dccp/diag.c                   |  2 +-
 net/dccp/input.c                  | 12 +++---------
 net/dccp/minisocks.c              |  1 -
 net/dccp/options.c                |  7 ++-----
 net/dccp/proto.c                  |  3 +--
 net/dccp/sysctl.c                 |  7 -------
 9 files changed, 8 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 1403745ab40..7a3bb1abb83 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -133,9 +133,6 @@ retries2
 	importance for retransmitted acknowledgments and feature negotiation,
 	data packets are never retransmitted. Analogue of tcp_retries2.
 
-send_ackvec = 1
-	Whether or not to send Ack Vector options (sec. 11.5).
-
 tx_ccid = 2
 	Default CCID for the sender-receiver half-connection. Depending on the
 	choice of CCID, the Send Ack Vector feature is enabled automatically.
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 60e94438ead..61734e27abb 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 #define DCCPF_INITIAL_SEQUENCE_WINDOW		100
 #define DCCPF_INITIAL_ACK_RATIO			2
 #define DCCPF_INITIAL_CCID			DCCPC_CCID2
-#define DCCPF_INITIAL_SEND_ACK_VECTOR		1
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
 
@@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	__u8			dccpms_send_ack_vector;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 51436c82565..3fd16e82c00 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -98,7 +98,6 @@ extern int  sysctl_dccp_retries2;
 extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
-extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
@@ -434,7 +433,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 	const struct dccp_sock *dp = dccp_sk(sk);
 	return dp->dccps_timestamp_echo != 0 ||
 #ifdef CONFIG_IP_DCCP_ACKVEC
-	       (dccp_msk(sk)->dccpms_send_ack_vector &&
+	       (dp->dccps_hc_rx_ackvec != NULL &&
 		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
 #endif
 	       inet_csk_ack_scheduled(sk);
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index d8a3509b26f..93aae7c9555 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_backoff	= icsk->icsk_backoff;
 	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector)
+	if (dp->dccps_hc_rx_ackvec != NULL)
 		info->tcpi_options |= TCPI_OPT_SACK;
 
 	ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 0672b7e1763..5eb443f656c 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector)
+	if (dp->dccps_hc_rx_ackvec != NULL)
 		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
 					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
@@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector &&
+	if (dp->dccps_hc_rx_ackvec != NULL &&
 	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 			    DCCP_SKB_CB(skb)->dccpd_seq,
 			    DCCP_ACKVEC_STATE_RECEIVED))
@@ -434,12 +434,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
 			    dp->dccps_options_received.dccpor_timestamp_echo));
 
-		if (dccp_msk(sk)->dccpms_send_ack_vector &&
-		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq,
-				    DCCP_ACKVEC_STATE_RECEIVED))
-			goto out_invalid_packet; /* FIXME: change error code */
-
 		/* Stop the REQUEST timer */
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 		WARN_ON(sk->sk_send_head == NULL);
@@ -637,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
 
-		if (dccp_msk(sk)->dccpms_send_ack_vector &&
+		if (dp->dccps_hc_rx_ackvec != NULL &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 				    DCCP_SKB_CB(skb)->dccpd_seq,
 				    DCCP_ACKVEC_STATE_RECEIVED))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 9e223257266..0ebf8ebcf3d 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,7 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 }
 
 void dccp_time_wait(struct sock *sk, int state, int timeo)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 6b0704497e8..aca309e1663 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,7 +26,6 @@
 int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
@@ -145,8 +144,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_ACK_VECTOR_1:
 			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
 				break;
-
-			if (dccp_msk(sk)->dccpms_send_ack_vector &&
+			if (dp->dccps_hc_rx_ackvec != NULL &&
 			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
 				goto out_invalid_option;
 			break;
@@ -526,7 +524,6 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
 int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
@@ -547,7 +544,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			if (dccp_insert_option_timestamp(sk, skb))
 				return -1;
 
-		} else if (dmsk->dccpms_send_ack_vector	&&
+		} else if (dp->dccps_hc_rx_ackvec != NULL &&
 			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
 			   dccp_insert_option_ackvec(sk, skb)) {
 				return -1;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0d420790b79..775eaa3d0c4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -207,7 +207,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
 void dccp_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	/*
 	 * DCCP doesn't use sk_write_queue, just sk_send_head
@@ -225,7 +224,7 @@ void dccp_destroy_sock(struct sock *sk)
 	kfree(dp->dccps_service_list);
 	dp->dccps_service_list = NULL;
 
-	if (dmsk->dccpms_send_ack_vector) {
+	if (dp->dccps_hc_rx_ackvec != NULL) {
 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 		dp->dccps_hc_rx_ackvec = NULL;
 	}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 587c12f915c..018e210875e 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "send_ackvec",
-		.data		= &sysctl_dccp_feat_send_ack_vector,
-		.maxlen		= sizeof(sysctl_dccp_feat_send_ack_vector),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
-- 
cgit v1.2.3-70-g09d2


From 5d3dac267a7fd0811ec777e76a81f97f5cdcb395 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Initialisation framework for feature negotiation

This initialises feature negotiation from two tables, which are initialised
from sysctls.

As a novel feature, specifics of the implementation (e.g. currently short
seqnos and ECN are not supported) are advertised for robustness.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 include/linux/dccp.h | 19 ---------------
 net/dccp/feat.c      | 66 ++++++++++++++++++++++++++++++++++++++++++++--------
 net/dccp/feat.h      |  2 +-
 3 files changed, 57 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61734e27abb..990e97fa1f0 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -369,28 +369,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_pending - List of features being negotiated
-  * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	struct list_head	dccpms_pending;
-	struct list_head	dccpms_conf;
-};
-
-struct dccp_opt_conf {
-	__u8			*dccpoc_val;
-	__u8			dccpoc_len;
-};
-
-struct dccp_opt_pend {
-	struct list_head	dccpop_node;
-	__u8			dccpop_type;
-	__u8			dccpop_feat;
-	__u8		        *dccpop_val;
-	__u8			dccpop_len;
-	int			dccpop_conf;
-	struct dccp_opt_conf    *dccpop_sc;
 };
 
 extern void dccp_minisock_init(struct dccp_minisock *dmsk);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 35a57ab3bb1..a687740e442 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1110,24 +1110,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 	return 0;	/* ignore FN options in all other states */
 }
 
+/**
+ * dccp_feat_init  -  Seed feature negotiation with host-specific defaults
+ * This initialises global defaults, depending on the value of the sysctls.
+ * These can later be overridden by registering changes via setsockopt calls.
+ * The last link in the chain is finalise_settings, to make sure that between
+ * here and the start of actual feature negotiation no inconsistencies enter.
+ *
+ * All features not appearing below use either defaults or are otherwise
+ * later adjusted through dccp_feat_finalise_settings().
+ */
 int dccp_feat_init(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
+	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+	u8 on = 1, off = 0;
 	int rc;
+	struct {
+		u8 *val;
+		u8 len;
+	} tx, rx;
+
+	/* Non-negotiable (NN) features */
+	rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
+				    sysctl_dccp_feat_sequence_window);
+	if (rc)
+		return rc;
+
+	/* Server-priority (SP) features */
+
+	/* Advertise that short seqnos are not supported (7.6.1) */
+	rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
+	if (rc)
+		return rc;
 
-	INIT_LIST_HEAD(&dmsk->dccpms_pending);	/* XXX no longer used */
-	INIT_LIST_HEAD(&dmsk->dccpms_conf);	/* XXX no longer used */
+	/* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
+	rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
+	if (rc)
+		return rc;
+
+	/*
+	 * We advertise the available list of CCIDs and reorder according to
+	 * preferences, to avoid failure resulting from negotiating different
+	 * singleton values (which always leads to failure).
+	 * These settings can still (later) be overridden via sockopts.
+	 */
+	if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
+	    ccid_get_builtin_ccids(&rx.val, &rx.len))
+		return -ENOBUFS;
 
-	/* Ack ratio */
-	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
-				dp->dccps_l_ack_ratio);
-out:
+	if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) ||
+	    !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len))
+		goto free_ccid_lists;
+
+	rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
+	if (rc)
+		goto free_ccid_lists;
+
+	rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
+
+free_ccid_lists:
+	kfree(tx.val);
+	kfree(rx.val);
 	return rc;
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_init);
-
 int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 177e9c39ea9..f73b47abca9 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -112,13 +112,13 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #define dccp_feat_debug(type, feat, val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
+extern int  dccp_feat_init(struct sock *sk);
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 				  u8 const *list, u8 len);
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
 				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
 extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
-extern int  dccp_feat_init(struct sock *sk);
 
 /*
  * Encoding variable-length options and their maximum length.
-- 
cgit v1.2.3-70-g09d2


From 09856c108956c99088ead9267ccbd1dab77f7043 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Auto-load (when supported) CCID plugins for negotiation

This adds auto-loading of CCIDs (when module loading is enabled)
for the purpose of feature negotiation.

The problem with loading the CCIDs at the end of feature negotiation is
that this would happen in software interrupt context. Besides, if the host
advertises CCIDs during negotiation, it should have them ready to use, in
case an agreeing peer wants to use it for the connection.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/ccid.c | 39 +++++++++++++++++++++++++++++----------
 net/dccp/ccid.h |  1 +
 net/dccp/feat.c |  5 +++++
 3 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 330372a1a0b..e3fb52b4f5c 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -196,22 +196,41 @@ int ccid_unregister(struct ccid_operations *ccid_ops)
 
 EXPORT_SYMBOL_GPL(ccid_unregister);
 
+/**
+ * ccid_request_module  -  Pre-load CCID module for later use
+ * This should be called only from process context (e.g. during connection
+ * setup) and is necessary for later calls to ccid_new (typically in software
+ * interrupt), so that it has the modules available when they are needed.
+ */
+static int ccid_request_module(u8 id)
+{
+	if (!in_atomic()) {
+		ccids_read_lock();
+		if (ccids[id] == NULL) {
+			ccids_read_unlock();
+			return request_module("net-dccp-ccid-%d", id);
+		}
+		ccids_read_unlock();
+	}
+	return 0;
+}
+
+int ccid_request_modules(u8 const *ccid_array, u8 array_len)
+{
+#ifdef CONFIG_KMOD
+	while (array_len--)
+		if (ccid_request_module(ccid_array[array_len]))
+			return -1;
+#endif
+	return 0;
+}
+
 struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 {
 	struct ccid_operations *ccid_ops;
 	struct ccid *ccid = NULL;
 
 	ccids_read_lock();
-#ifdef CONFIG_KMOD
-	if (ccids[id] == NULL) {
-		/* We only try to load if in process context */
-		ccids_read_unlock();
-		if (gfp & GFP_ATOMIC)
-			goto out;
-		request_module("net-dccp-ccid-%d", id);
-		ccids_read_lock();
-	}
-#endif
 	ccid_ops = ccids[id];
 	if (ccid_ops == NULL)
 		goto out_unlock;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 18f69423a70..20ba066b277 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -108,6 +108,7 @@ extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
 extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 					  char __user *, int __user *);
 
+extern int    ccid_request_modules(u8 const *ccid_array, u8 array_len);
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index a687740e442..9a493809278 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1158,6 +1158,11 @@ int dccp_feat_init(struct sock *sk)
 	    ccid_get_builtin_ccids(&rx.val, &rx.len))
 		return -ENOBUFS;
 
+	/* Pre-load all CCID modules that are going to be advertised */
+	rc = -EUNATCH;
+	if (ccid_request_modules(tx.val, tx.len))
+		goto free_ccid_lists;
+
 	if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) ||
 	    !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len))
 		goto free_ccid_lists;
-- 
cgit v1.2.3-70-g09d2


From 51c7d4fa2675c106a980ddcdbe308b54b5151945 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Implement both feature-local and feature-remote Sequence Window
 feature

This adds full support for local/remote Sequence Window feature, from which the
  * sequence-number-validity (W) and
  * acknowledgment-number-validity (W') windows
derive as specified in RFC 4340, 7.5.3.

Specifically, the following changes are introduced:
  * integrated new socket fields into dccp_sk;
  * updated the update_gsr/gss routines with regard to these fields;
  * updated handler code: the Sequence Window feature is located at the TX side,
    so the local feature is meant if the handler-rx flag is false;
  * the initialisation of `rcv_wnd' in reqsk is removed, since
    - rcv_wnd is not used by the code anywhere;
    - sequence number checks are not done in the LISTEN state (cf. 7.5.3);
    - dccp_check_req checks the Ack number validity more rigorously;
  * the `struct dccp_minisock' became empty and is now removed.

Until the handshake completes with activating negotiated values, the local/remote
Sequence-Window values are undefined and thus can not reliably be estimated.
This issue is addressed in a separate patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 Documentation/networking/dccp.txt |  3 ++-
 include/linux/dccp.h              | 24 ++++--------------------
 net/dccp/dccp.h                   | 16 +++++++---------
 net/dccp/feat.c                   | 13 +++++++++++--
 net/dccp/minisocks.c              | 11 -----------
 net/dccp/proto.c                  |  2 --
 6 files changed, 24 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 7a3bb1abb83..b132e4a3cf0 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -141,7 +141,8 @@ rx_ccid = 2
 	Default CCID for the receiver-sender half-connection; see tx_ccid.
 
 seq_window = 100
-	The initial sequence window (sec. 7.5.2).
+	The initial sequence window (sec. 7.5.2) of the sender. This influences
+	the local ackno validity and the remote seqno validity windows (7.5.1).
 
 tx_qlen = 5
 	The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 990e97fa1f0..7a0502ab383 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -363,19 +363,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
 
-/**
-  * struct dccp_minisock - Minimal DCCP connection representation
-  *
-  * Will be used to pass the state from dccp_request_sock to dccp_sock.
-  *
-  * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  */
-struct dccp_minisock {
-	__u64			dccpms_sequence_window;
-};
-
-extern void dccp_minisock_init(struct dccp_minisock *dmsk);
-
 /**
  * struct dccp_request_sock  -  represent DCCP-specific connection request
  * @dreq_inet_rsk: structure inherited from
@@ -464,13 +451,14 @@ struct dccp_ackvec;
  * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
  * @dccps_l_ack_ratio - feature-local Ack Ratio
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
+ * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
+ * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
  * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_minisock - associated minisock (accessed via dccp_msk)
  * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
@@ -504,12 +492,13 @@ struct dccp_sock {
 	__u32				dccps_timestamp_time;
 	__u16				dccps_l_ack_ratio;
 	__u16				dccps_r_ack_ratio;
+	__u64				dccps_l_seq_win:48;
+	__u64				dccps_r_seq_win:48;
 	__u8				dccps_pcslen:4;
 	__u8				dccps_pcrlen:4;
 	__u8				dccps_send_ndp_count:1;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
-	struct dccp_minisock		dccps_minisock;
 	struct list_head		dccps_featneg;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	struct ccid			*dccps_hc_rx_ccid;
@@ -527,11 +516,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
 	return (struct dccp_sock *)sk;
 }
 
-static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
-{
-	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
-}
-
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3fd16e82c00..6101ecdb85b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -409,23 +409,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
 static inline void dccp_update_gsr(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	const struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	dp->dccps_gsr = seq;
-	dccp_set_seqno(&dp->dccps_swl,
-		       dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
-	dccp_set_seqno(&dp->dccps_swh,
-		       dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
+	/* Sequence validity window depends on remote Sequence Window (7.5.1) */
+	dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
+	dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
 }
 
 static inline void dccp_update_gss(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	dp->dccps_awh = dp->dccps_gss = seq;
-	dccp_set_seqno(&dp->dccps_awl,
-		       (dp->dccps_gss -
-			dccp_msk(sk)->dccpms_sequence_window + 1));
+	dp->dccps_gss = seq;
+	/* Ack validity window depends on local Sequence Window value (7.5.1) */
+	dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
+	dp->dccps_awh = dp->dccps_gss;
 }
 
 static inline int dccp_ack_pending(const struct sock *sk)
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 9a493809278..84346599985 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -52,8 +52,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
 
 static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
 {
-	if (!rx)
-		dccp_msk(sk)->dccpms_sequence_window = seq_win;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	if (rx) {
+		dp->dccps_r_seq_win = seq_win;
+		/* propagate changes to update SWL/SWH */
+		dccp_update_gsr(sk, dp->dccps_gsr);
+	} else {
+		dp->dccps_l_seq_win = seq_win;
+		/* propagate changes to update AWL */
+		dccp_update_gss(sk, dp->dccps_gss);
+	}
 	return 0;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 0ebf8ebcf3d..0ecb19c5e8c 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = {
 
 EXPORT_SYMBOL_GPL(dccp_death_row);
 
-void dccp_minisock_init(struct dccp_minisock *dmsk)
-{
-	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-}
-
 void dccp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
@@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct dccp_request_sock *dreq = dccp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
-		struct dccp_minisock *newdmsk = dccp_msk(newsk);
 
 		newdp->dccps_role	    = DCCP_ROLE_SERVER;
 		newdp->dccps_hc_rx_ackvec   = NULL;
@@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		 *    Initialize S.GAR := S.ISS
 		 *    Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
 		 */
-
-		/* See dccp_v4_conn_request */
-		newdmsk->dccpms_sequence_window = req->rcv_wnd;
-
 		newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
 		dccp_update_gss(newsk, dreq->dreq_iss);
 
@@ -289,7 +279,6 @@ int dccp_reqsk_init(struct request_sock *req,
 
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
 	inet_rsk(req)->acked	  = 0;
-	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
 
 	/* inherit feature negotiation options from listening socket */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 775eaa3d0c4..392a5d822b3 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -180,8 +180,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	dccp_minisock_init(&dp->dccps_minisock);
-
 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
 	sk->sk_state		= DCCP_CLOSED;
-- 
cgit v1.2.3-70-g09d2


From 0a4822679d94e2b0117aeead06a19fad59533905 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Initialisation and type-checking of feature sysctls

This patch takes care of initialising and type-checking sysctls related to
feature negotiation. Type checking is important since some of the sysctls
now directly act on the feature-negotiation process.

The sysctls are initialised with the known default values for each feature.
For the type-checking the value constraints from RFC 4340 are used:

 * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes),
   tested and confirmed that it works up to 4294967295 - for Gbps speed;
 * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer);
 * CCIDs are between 0 .. 255;
 * request_retries, retries1, retries2 also between 0..255 for good measure;
 * tx_qlen is checked to be non-negative;
 * sync_ratelimit remains as before.

Further changes:
----------------
Performed s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 include/linux/dccp.h |  8 --------
 net/dccp/dccp.h      |  3 ---
 net/dccp/feat.c      | 11 ++++++++---
 net/dccp/feat.h      |  8 ++++++++
 net/dccp/options.c   |  4 ----
 net/dccp/sysctl.c    | 43 ++++++++++++++++++++++++++++++-------------
 6 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7a0502ab383..7434a8353e2 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -355,14 +355,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 	return __dccp_hdr_len(dccp_hdr(skb));
 }
 
-
-/* initial values for each feature */
-#define DCCPF_INITIAL_SEQUENCE_WINDOW		100
-#define DCCPF_INITIAL_ACK_RATIO			2
-#define DCCPF_INITIAL_CCID			DCCPC_CCID2
-/* FIXME: for now we're default to 1 but it should really be 0 */
-#define DCCPF_INITIAL_SEND_NDP_COUNT		1
-
 /**
  * struct dccp_request_sock  -  represent DCCP-specific connection request
  * @dreq_inet_rsk: structure inherited from
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 6101ecdb85b..4f681f1a16c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -95,9 +95,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
 extern int  sysctl_dccp_retries2;
-extern int  sysctl_dccp_feat_sequence_window;
-extern int  sysctl_dccp_feat_rx_ccid;
-extern int  sysctl_dccp_feat_tx_ccid;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 84346599985..4c95cbdb0e0 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -26,6 +26,11 @@
 #include "ccid.h"
 #include "feat.h"
 
+/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
+unsigned long	sysctl_dccp_sequence_window __read_mostly = 100;
+int		sysctl_dccp_rx_ccid	    __read_mostly = 2,
+		sysctl_dccp_tx_ccid	    __read_mostly = 2;
+
 /*
  * Feature activation handlers.
  *
@@ -1141,7 +1146,7 @@ int dccp_feat_init(struct sock *sk)
 
 	/* Non-negotiable (NN) features */
 	rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
-				    sysctl_dccp_feat_sequence_window);
+				    sysctl_dccp_sequence_window);
 	if (rc)
 		return rc;
 
@@ -1172,8 +1177,8 @@ int dccp_feat_init(struct sock *sk)
 	if (ccid_request_modules(tx.val, tx.len))
 		goto free_ccid_lists;
 
-	if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) ||
-	    !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len))
+	if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
+	    !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
 		goto free_ccid_lists;
 
 	rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index f73b47abca9..f8456bca4ee 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -99,6 +99,13 @@ struct ccid_dependency {
 	u8	val;
 };
 
+/*
+ * Sysctls to seed defaults for feature negotiation
+ */
+extern unsigned long sysctl_dccp_sequence_window;
+extern int	     sysctl_dccp_rx_ccid;
+extern int	     sysctl_dccp_tx_ccid;
+
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern const char *dccp_feat_typename(const u8 type);
 extern const char *dccp_feat_name(const u8 feat);
@@ -113,6 +120,7 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
 extern int  dccp_feat_init(struct sock *sk);
+extern void dccp_feat_initialise_sysctls(void);
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
 				  u8 const *list, u8 len);
 extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index aca309e1663..5906e96eedd 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -23,10 +23,6 @@
 #include "dccp.h"
 #include "feat.h"
 
-int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
-int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
 	u64 value = 0;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 018e210875e..a5a1856234e 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,55 +18,72 @@
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
+/* Boundary values */
+static int		zero     = 0,
+			u8_max   = 0xFF;
+static unsigned long	seqw_min = 32;
+
 static struct ctl_table dccp_default_table[] = {
 	{
 		.procname	= "seq_window",
-		.data		= &sysctl_dccp_feat_sequence_window,
-		.maxlen		= sizeof(sysctl_dccp_feat_sequence_window),
+		.data		= &sysctl_dccp_sequence_window,
+		.maxlen		= sizeof(sysctl_dccp_sequence_window),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &seqw_min,		/* RFC 4340, 7.5.2 */
 	},
 	{
 		.procname	= "rx_ccid",
-		.data		= &sysctl_dccp_feat_rx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_feat_rx_ccid),
+		.data		= &sysctl_dccp_rx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_rx_ccid),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
 		.procname	= "tx_ccid",
-		.data		= &sysctl_dccp_feat_tx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_feat_tx_ccid),
+		.data		= &sysctl_dccp_tx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_tx_ccid),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,		/* RFC 4340, 10. */
 	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
 		.maxlen		= sizeof(sysctl_dccp_request_retries),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "retries1",
 		.data		= &sysctl_dccp_retries1,
 		.maxlen		= sizeof(sysctl_dccp_retries1),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "retries2",
 		.data		= &sysctl_dccp_retries2,
 		.maxlen		= sizeof(sysctl_dccp_retries2),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &u8_max,
 	},
 	{
 		.procname	= "tx_qlen",
 		.data		= &sysctl_dccp_tx_qlen,
 		.maxlen		= sizeof(sysctl_dccp_tx_qlen),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 	{
 		.procname	= "sync_ratelimit",
-- 
cgit v1.2.3-70-g09d2


From 76f738a7950b559a23ab3c692c99a02f35a54f7f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Debugging functions for feature negotiation

Since all feature-negotiation processing now takes place in feat.c, functions
for producing verbose debugging output are concentrated there.

New functions to print out values, entry records, and options are provided,
and also a macro is defined to not always have the function name in the
output line.

Thanks a lot to Wei Yongjun and Giuseppe Galeota for help with errors in an
earlier revision of this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h    |   2 +
 net/dccp/feat.c    | 153 +++++++++++++++++++++++++++++++++++++----------------
 net/dccp/feat.h    |  13 -----
 net/dccp/options.c |   4 --
 4 files changed, 109 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 4f681f1a16c..94ae6d41d72 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -42,9 +42,11 @@
 extern int dccp_debug;
 #define dccp_pr_debug(format, a...)	  DCCP_PR_DEBUG(dccp_debug, format, ##a)
 #define dccp_pr_debug_cat(format, a...)   DCCP_PRINTK(dccp_debug, format, ##a)
+#define dccp_debug(fmt, a...)		  dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
 #else
 #define dccp_pr_debug(format, a...)
 #define dccp_pr_debug_cat(format, a...)
+#define dccp_debug(format, a...)
 #endif
 
 extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4c95cbdb0e0..3abacad0b22 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -204,6 +204,100 @@ static int dccp_feat_default_value(u8 feat_num)
 	return idx < 0 ? : dccp_feat_table[idx].default_value;
 }
 
+/*
+ *	Debugging and verbose-printing section
+ */
+static const char *dccp_feat_fname(const u8 feat)
+{
+	static const char *feature_names[] = {
+		[DCCPF_RESERVED]	= "Reserved",
+		[DCCPF_CCID]		= "CCID",
+		[DCCPF_SHORT_SEQNOS]	= "Allow Short Seqnos",
+		[DCCPF_SEQUENCE_WINDOW]	= "Sequence Window",
+		[DCCPF_ECN_INCAPABLE]	= "ECN Incapable",
+		[DCCPF_ACK_RATIO]	= "Ack Ratio",
+		[DCCPF_SEND_ACK_VECTOR]	= "Send ACK Vector",
+		[DCCPF_SEND_NDP_COUNT]	= "Send NDP Count",
+		[DCCPF_MIN_CSUM_COVER]	= "Min. Csum Coverage",
+		[DCCPF_DATA_CHECKSUM]	= "Send Data Checksum",
+	};
+	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
+		return feature_names[DCCPF_RESERVED];
+
+	if (feat ==  DCCPF_SEND_LEV_RATE)
+		return "Send Loss Event Rate";
+	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
+		return "CCID-specific";
+
+	return feature_names[feat];
+}
+
+static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING",
+					 "UNSTABLE", "STABLE" };
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+static const char *dccp_feat_oname(const u8 opt)
+{
+	switch (opt) {
+	case DCCPO_CHANGE_L:  return "Change_L";
+	case DCCPO_CONFIRM_L: return "Confirm_L";
+	case DCCPO_CHANGE_R:  return "Change_R";
+	case DCCPO_CONFIRM_R: return "Confirm_R";
+	}
+	return NULL;
+}
+
+static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val)
+{
+	u8 i, type = dccp_feat_type(feat_num);
+
+	if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL))
+		dccp_pr_debug_cat("(NULL)");
+	else if (type == FEAT_SP)
+		for (i = 0; i < val->sp.len; i++)
+			dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]);
+	else if (type == FEAT_NN)
+		dccp_pr_debug_cat("%llu", (unsigned long long)val->nn);
+	else
+		dccp_pr_debug_cat("unknown type %u", type);
+}
+
+static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len)
+{
+	u8 type = dccp_feat_type(feat_num);
+	dccp_feat_val fval = { .sp.vec = list, .sp.len = len };
+
+	if (type == FEAT_NN)
+		fval.nn = dccp_decode_value_var(list, len);
+	dccp_feat_printval(feat_num, &fval);
+}
+
+static void dccp_feat_print_entry(struct dccp_feat_entry const *entry)
+{
+	dccp_debug("   * %s %s = ", entry->is_local ? "local" : "remote",
+				    dccp_feat_fname(entry->feat_num));
+	dccp_feat_printval(entry->feat_num, &entry->val);
+	dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state],
+			  entry->needs_confirm ? "(Confirm pending)" : "");
+}
+
+#define dccp_feat_print_opt(opt, feat, val, len, mandatory)	do {	      \
+	dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\
+	dccp_feat_printvals(feat, val, len);				      \
+	dccp_pr_debug_cat(") %s\n", mandatory ? "!" : "");	} while (0)
+
+#define dccp_feat_print_fnlist(fn_list)  {		\
+	const struct dccp_feat_entry *___entry;		\
+							\
+	dccp_pr_debug("List Dump:\n");			\
+	list_for_each_entry(___entry, fn_list, node)	\
+		dccp_feat_print_entry(___entry);	\
+}
+#else	/* ! CONFIG_IP_DCCP_DEBUG */
+#define dccp_feat_print_opt(opt, feat, val, len, mandatory)
+#define dccp_feat_print_fnlist(fn_list)
+#endif
+
 static int __dccp_feat_activate(struct sock *sk, const int idx,
 				const bool is_local, dccp_feat_val const *fval)
 {
@@ -236,6 +330,10 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
 	/* Location is RX if this is a local-RX or remote-TX feature */
 	rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
 
+	dccp_debug("   -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX",
+		   dccp_feat_fname(dccp_feat_table[idx].feat_num),
+		   fval ? "" : "default ",  (unsigned long long)val);
+
 	return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
 }
 
@@ -539,6 +637,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
 				return -1;
 			}
 		}
+		dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0);
 
 		if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
 			return -1;
@@ -792,6 +891,7 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp)
 	while (i--)
 		if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
 			return -1;
+	dccp_feat_print_fnlist(fn);
 	return 0;
 }
 
@@ -910,6 +1010,8 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
 	if (len == 0 || type == FEAT_UNKNOWN)		/* 6.1 and 6.6.8 */
 		goto unknown_feature_or_value;
 
+	dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
+
 	/*
 	 *	Negotiation of NN features: Change R is invalid, so there is no
 	 *	simultaneous negotiation; hence we do not look up in the list.
@@ -1015,6 +1117,8 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
 	const bool local = (opt == DCCPO_CONFIRM_R);
 	struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
 
+	dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
+
 	if (entry == NULL) {	/* nothing queued: ignore or handle error */
 		if (is_mandatory && type == FEAT_UNKNOWN)
 			return DCCP_RESET_CODE_MANDATORY_ERROR;
@@ -1217,9 +1321,10 @@ int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
 			goto activation_failed;
 		}
 		if (cur->state != FEAT_STABLE) {
-			DCCP_CRIT("Negotiation of %s %u failed in state %u",
+			DCCP_CRIT("Negotiation of %s %s failed in state %s",
 				  cur->is_local ? "local" : "remote",
-				  cur->feat_num, cur->state);
+				  dccp_feat_fname(cur->feat_num),
+				  dccp_feat_sname[cur->state]);
 			goto activation_failed;
 		}
 		fvals[idx][cur->is_local] = &cur->val;
@@ -1260,47 +1365,3 @@ activation_failed:
 	dp->dccps_hc_rx_ackvec = NULL;
 	return -1;
 }
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-const char *dccp_feat_typename(const u8 type)
-{
-	switch(type) {
-	case DCCPO_CHANGE_L:  return("ChangeL");
-	case DCCPO_CONFIRM_L: return("ConfirmL");
-	case DCCPO_CHANGE_R:  return("ChangeR");
-	case DCCPO_CONFIRM_R: return("ConfirmR");
-	/* the following case must not appear in feature negotation  */
-	default:	      dccp_pr_debug("unknown type %d [BUG!]\n", type);
-	}
-	return NULL;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_typename);
-
-const char *dccp_feat_name(const u8 feat)
-{
-	static const char *feature_names[] = {
-		[DCCPF_RESERVED]	= "Reserved",
-		[DCCPF_CCID]		= "CCID",
-		[DCCPF_SHORT_SEQNOS]	= "Allow Short Seqnos",
-		[DCCPF_SEQUENCE_WINDOW]	= "Sequence Window",
-		[DCCPF_ECN_INCAPABLE]	= "ECN Incapable",
-		[DCCPF_ACK_RATIO]	= "Ack Ratio",
-		[DCCPF_SEND_ACK_VECTOR]	= "Send ACK Vector",
-		[DCCPF_SEND_NDP_COUNT]	= "Send NDP Count",
-		[DCCPF_MIN_CSUM_COVER]	= "Min. Csum Coverage",
-		[DCCPF_DATA_CHECKSUM]	= "Send Data Checksum",
-	};
-	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
-		return feature_names[DCCPF_RESERVED];
-
-	if (feat ==  DCCPF_SEND_LEV_RATE)
-		return "Send Loss Event Rate";
-	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
-		return "CCID-specific";
-
-	return feature_names[feat];
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_name);
-#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index f8456bca4ee..2217066e22d 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -106,19 +106,6 @@ extern unsigned long sysctl_dccp_sequence_window;
 extern int	     sysctl_dccp_rx_ccid;
 extern int	     sysctl_dccp_tx_ccid;
 
-#ifdef CONFIG_IP_DCCP_DEBUG
-extern const char *dccp_feat_typename(const u8 type);
-extern const char *dccp_feat_name(const u8 feat);
-
-static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
-{
-	dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
-					   dccp_feat_name(feat), feat, val);
-}
-#else
-#define dccp_feat_debug(type, feat, val)
-#endif /* CONFIG_IP_DCCP_DEBUG */
-
 extern int  dccp_feat_init(struct sock *sk);
 extern void dccp_feat_initialise_sysctls(void);
 extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 5906e96eedd..fd51cc70c63 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -498,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
 		*to++ = *val;
 	if (len)
 		memcpy(to, val, len);
-
-	dccp_pr_debug("%s(%s (%d), ...), length %d\n",
-		      dccp_feat_typename(type),
-		      dccp_feat_name(feat), feat, len);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 624a965a93610152b10c73d050ed44812efa8abe Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Support for the exchange of NN options in established state

In contrast to static feature negotiation at the begin of a connection, which
establishes the capabilities of both endpoints, this patch introduces support
for dynamic exchange of feature negotiation options.

Such a dynamic exchange is necessary in at least two cases:
 * CCID-2's Ack Ratio (RFC 4341, 6.1.2) which changes during the connection;
 * Sequence Window values that, as per RFC 4340, 7.5.2, should be sent "as
   as the connection progresses".

Both are NN (non-negotiable) features. Hence dynamic feature "negotiation" is
distinguished from static/pre-connection negotiation by the following:
 * no new capabilities are negotiated (those that matter for the connection
   are negotiated prior to setting up the connection, comparable to SIP);
 * features must be understood by each endpoint: as per RFC 4340, 6.4,
   Sequence Window is "Req'd" and Ack Ratio must be understood when CCID-2
   is used as per the note underneath Table 4.

These characteristics are reflected in the implementation:
 * only NN options can be exchanged after connection setup;
 * NN options are activated directly after validating them. The rationale is
   that a peer must accept every valid NN value (RFC 4340, 6.3.2), hence it
   will either accept the value and send a "Confirm R", or it will send an
   empty Confirm (which will reset the connection according to FN rules).
 * An Ack is scheduled directly after activation to accelerate communicating
   the update to the peer.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/dccp.h |  1 +
 net/dccp/feat.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 94ae6d41d72..0a07b2e24e6 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -436,6 +436,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 	       inet_csk_ack_scheduled(sk);
 }
 
+extern int  dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
 extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
 extern int  dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
 extern int  dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 3abacad0b22..5be8b85ac74 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -13,6 +13,7 @@
  *  -----------
  *  o Feature negotiation is coordinated with connection setup (as in TCP), wild
  *    changes of parameters of an established connection are not supported.
+ *  o Changing NN values (Ack Ratio only) is supported in state OPEN/PARTOPEN.
  *  o All currently known SP features have 1-byte quantities. If in the future
  *    extensions of RFCs 4340..42 define features with item lengths larger than
  *    one byte, a feature-specific extension of the code will be required.
@@ -337,6 +338,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
 	return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
 }
 
+/**
+ * dccp_feat_activate  -  Activate feature value on socket
+ * @sk: fully connected DCCP socket (after handshake is complete)
+ * @feat_num: feature to activate, one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @fval: the value (SP or NN) to activate, or NULL to use the default value
+ * For general use this function is preferable over __dccp_feat_activate().
+ */
+static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
+			      dccp_feat_val const *fval)
+{
+	return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
+}
+
 /* Test for "Req'd" feature (RFC 4340, 6.4) */
 static inline int dccp_feat_must_be_understood(u8 feat_num)
 {
@@ -734,6 +749,48 @@ int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
 	return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
 }
 
+/**
+ * dccp_feat_signal_nn_change  -  Update NN values for an established connection
+ * @sk: DCCP socket of an established connection
+ * @feat: NN feature number from %dccp_feature_numbers
+ * @nn_val: the new value to use
+ * This function is used to communicate NN updates out-of-band. The difference
+ * to feature negotiation during connection setup is that values are activated
+ * immediately after validation, i.e. we don't wait for the Confirm: either the
+ * value is accepted by the peer (and then the waiting is futile), or it is not
+ * (Reset or empty Confirm). We don't accept empty Confirms - transmitted values
+ * are validated, and the peer "MUST accept any valid value" (RFC 4340, 6.3.2).
+ */
+int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
+{
+	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+	dccp_feat_val fval = { .nn = nn_val };
+	struct dccp_feat_entry *entry;
+
+	if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
+		return 0;
+
+	if (dccp_feat_type(feat) != FEAT_NN ||
+	    !dccp_feat_is_valid_nn_val(feat, nn_val))
+		return -EINVAL;
+
+	entry = dccp_feat_list_lookup(fn, feat, 1);
+	if (entry != NULL) {
+		dccp_pr_debug("Ignoring %llu, entry %llu exists in state %s\n",
+			      (unsigned long long)nn_val,
+			      (unsigned long long)entry->val.nn,
+			      dccp_feat_sname[entry->state]);
+		return 0;
+	}
+
+	if (dccp_feat_activate(sk, feat, 1, &fval))
+		return -EADV;
+
+	inet_csk_schedule_ack(sk);
+	return dccp_feat_push_change(fn, feat, 1, 0, &fval);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
+
 /*
  *	Tracking features whose value depend on the choice of CCID
  *
-- 
cgit v1.2.3-70-g09d2


From 4861a354430d2ea36847ef88086c7449b4f385b6 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Support for exchanging of NN options in established state

This patch provides support for the reception of NN options in (PART)OPEN state.

It is a combination of change_recv() and confirm_recv(), specifically geared
towards receiving the `fast-path' NN options.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 5be8b85ac74..c847c80d1b9 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1246,6 +1246,93 @@ confirmation_failed:
 			    : DCCP_RESET_CODE_OPTION_ERROR;
 }
 
+/**
+ * dccp_feat_handle_nn_established  -  Fast-path reception of NN options
+ * @sk:		socket of an established DCCP connection
+ * @mandatory:	whether @opt was preceded by a Mandatory option
+ * @opt:	%DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
+ * @feat:	NN number, one of %dccp_feature_numbers
+ * @val:	NN value
+ * @len:	length of @val in bytes
+ * This function combines the functionality of change_recv/confirm_recv, with
+ * the following differences (reset codes are the same):
+ *    - cleanup after receiving the Confirm;
+ *    - values are directly activated after successful parsing;
+ *    - deliberately restricted to NN features.
+ * The restriction to NN features is essential since SP features can have non-
+ * predictable outcomes (depending on the remote configuration), and are inter-
+ * dependent (CCIDs for instance cause further dependencies).
+ */
+static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
+					  u8 feat, u8 *val, u8 len)
+{
+	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+	const bool local = (opt == DCCPO_CONFIRM_R);
+	struct dccp_feat_entry *entry;
+	u8 type = dccp_feat_type(feat);
+	dccp_feat_val fval;
+
+	dccp_feat_print_opt(opt, feat, val, len, mandatory);
+
+	/* Ignore non-mandatory unknown and non-NN features */
+	if (type == FEAT_UNKNOWN) {
+		if (local && !mandatory)
+			return 0;
+		goto fast_path_unknown;
+	} else if (type != FEAT_NN) {
+		return 0;
+	}
+
+	/*
+	 * We don't accept empty Confirms, since in fast-path feature
+	 * negotiation the values are enabled immediately after sending
+	 * the Change option.
+	 * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
+	 */
+	if (len == 0 || len > sizeof(fval.nn))
+		goto fast_path_unknown;
+
+	if (opt == DCCPO_CHANGE_L) {
+		fval.nn = dccp_decode_value_var(val, len);
+		if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+			goto fast_path_unknown;
+
+		if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
+		    dccp_feat_activate(sk, feat, local, &fval))
+			return DCCP_RESET_CODE_TOO_BUSY;
+
+		/* set the `Ack Pending' flag to piggyback a Confirm */
+		inet_csk_schedule_ack(sk);
+
+	} else if (opt == DCCPO_CONFIRM_R) {
+		entry = dccp_feat_list_lookup(fn, feat, local);
+		if (entry == NULL || entry->state != FEAT_CHANGING)
+			return 0;
+
+		fval.nn = dccp_decode_value_var(val, len);
+		if (fval.nn != entry->val.nn) {
+			DCCP_WARN("Bogus Confirm for non-existing value\n");
+			goto fast_path_failed;
+		}
+
+		/* It has been confirmed - so remove the entry */
+		dccp_feat_list_pop(entry);
+
+	} else {
+		DCCP_WARN("Received illegal option %u\n", opt);
+		goto fast_path_failed;
+	}
+	return 0;
+
+fast_path_unknown:
+	if (!mandatory)
+		return dccp_push_empty_confirm(fn, feat, local);
+
+fast_path_failed:
+	return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+			 : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
 /**
  * dccp_feat_parse_options  -  Process Feature-Negotiation Options
  * @sk: for general use and used by the client during connection setup
@@ -1281,6 +1368,15 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
 						      val, len, server);
 		}
+		break;
+	/*
+	 *	Support for exchanging NN options on an established connection
+	 *	This is currently restricted to Ack Ratio (RFC 4341, 6.1.2)
+	 */
+	case DCCP_OPEN:
+	case DCCP_PARTOPEN:
+		return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
+						       val, len);
 	}
 	return 0;	/* ignore FN options in all other states */
 }
-- 
cgit v1.2.3-70-g09d2


From 2faae5587f692fd5c79856ca4c4b90944ee0472a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Use feature-negotiation to report Ack Ratio changes

This uses the new feature-negotiation framework to signal Ack Ratio changes,
as required by RFC 4341, sec. 6.1.2.

This raises some problems for CCID-2 since it can at the moment not cope
gracefully with Ack Ratio of e.g. 2. A FIXME has thus been added which
reverts to the existing policy of bypassing the Ack Ratio sysctl.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/feat.c  | 12 ++++++++++++
 net/dccp/proto.c |  1 -
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index c847c80d1b9..f94c7c9d1a7 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -74,6 +74,18 @@ static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
 
 static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
 {
+#ifndef __CCID2_COPES_GRACEFULLY_WITH_DYNAMIC_ACK_RATIO_UPDATES__
+	/*
+	 * FIXME: This is required until several problems in the CCID-2 code are
+	 * resolved. The CCID-2 code currently does not cope well; using dynamic
+	 * Ack Ratios greater than 1 caused instabilities. These were manifest
+	 * in hangups and long RTO timeouts (1...3 seconds). Until this has been
+	 * stabilised, it is safer not to activate dynamic Ack Ratio changes.
+	 */
+	dccp_pr_debug("Not changing %s Ack Ratio from 1 to %u\n",
+		      rx ? "RX" : "TX", (u16)ratio);
+	ratio = 1;
+#endif
 	if (rx)
 		dccp_sk(sk)->dccps_r_ack_ratio = ratio;
 	else
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 392a5d822b3..11905e0cf8f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -189,7 +189,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	dp->dccps_rate_last	= jiffies;
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
-	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
 
 	dccp_init_xmit_timers(sk);
 
-- 
cgit v1.2.3-70-g09d2


From 55ebe3ab2d504bd3f3eeade0262826210019abda Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Leave headroom for options when calculating the MPS

The Maximum Packet Size (MPS) is of interest for applications which want
to transfer data, so it is only relevant to the data transfer phase of a
connection (unless one wants to send data on the DCCP-Request, but that is
not considered here).

The strategy chosen to deal with this requirement is to leave room for only
such options that may appear on data packets.

A special consideration applies to Ack Vectors: this is purely guesswork,
since these can have any length between 3 and 1020 bytes. The strategy
chosen here is to subtract a configurable minimum, the value of 16 bytes
(2 bytes for type/length plus 14 Ack Vector cells) has been found by
experimentatation. If people experience this as too much or too little,
this could later be turned into a Kconfig option.

There are currently no CCID-specific header options which may appear on data
packets, hence it is not necessary to define a corresponding CCID field.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/ackvec.h |  3 +++
 net/dccp/output.c | 22 ++++++++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 4ccee030524..1c10814fdf7 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -20,6 +20,9 @@
 /* We can spread an ack vector across multiple options */
 #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
 
+/* Estimated minimum average Ack Vector length - used for updating MPS */
+#define DCCPAV_MIN_OPTLEN	16
+
 #define DCCP_ACKVEC_STATE_RECEIVED	0
 #define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
 #define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 19a93d5433a..0aab919dafe 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	u32 ccmps = dccp_determine_ccmps(dp);
-	int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
+	u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
 
 	/* Account for header lengths and IPv4/v6 option overhead */
 	cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
 		    sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
 
 	/*
-	 * FIXME: this should come from the CCID infrastructure, where, say,
-	 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
-	 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
-	 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
-	 * make it a multiple of 4
+	 * Leave enough headroom for common DCCP header options.
+	 * This only considers options which may appear on DCCP-Data packets, as
+	 * per table 3 in RFC 4340, 5.8. When running out of space for other
+	 * options (eg. Ack Vector which can take up to 255 bytes), it is better
+	 * to schedule a separate Ack. Thus we leave headroom for the following:
+	 *  - 1 byte for Slow Receiver (11.6)
+	 *  - 6 bytes for Timestamp (13.1)
+	 *  - 10 bytes for Timestamp Echo (13.3)
+	 *  - 8 bytes for NDP count (7.7, when activated)
+	 *  - 6 bytes for Data Checksum (9.3)
+	 *  - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
 	 */
-
-	cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+	cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
+			   (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
 
 	/* And store cached results */
 	icsk->icsk_pmtu_cookie = pmtu;
-- 
cgit v1.2.3-70-g09d2


From 88ddac513a4e7e04234214b600401ec22abfbb46 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Special case of the MPS for client-PARTOPEN with DataAcks

To increase robustness, it is necessary to resend Confirm feature-negotiation
options, even though the RFC does not mandate it. But feature negotiation
options can take (much) more room than the options on common DataAck packets.

Instead of reducing the MPS always for a case which only applies to the three
messages send during initial handshake, this patch devises a special case:

   if the payload length of the DataAck in PARTOPEN is too large, an Ack is sent
   to carry the options, and the feature-negotiation list is then flushed.

   This means that the server gets two Acks for one Response. If both Acks get
   lost, it is probably better to restart the connection anyway and devising yet
   another special-case does not seem worth the extra complexity.

The patch (over-)estimates the expected overhead to be 32*4 bytes -- commonly
seen values were 20-90 bytes for initial feature-negotiation options.

It uses sizeof(u32) to mean "aligned units of 4 bytes". For consistency,
another use of sizeof is modified.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h   |  5 ++++-
 net/dccp/output.c | 15 ++++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0a07b2e24e6..c7370de4c4d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -63,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
  *    - DCCP-Reset    with ACK Subheader and 4 bytes of Reset Code fields
  *  Hence a safe upper bound for the maximum option length is 1020-28 = 992
  */
-#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
 #define DCCP_MAX_PACKET_HDR 28
 #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
 #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
 
+/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
+#define DCCP_FEATNEG_OVERHEAD	 (32 * sizeof(uint32_t))
+
 #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
 				     * state, about 60 seconds */
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 0aab919dafe..9888f61f9b0 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -276,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block)
 			const int len = skb->len;
 
 			if (sk->sk_state == DCCP_PARTOPEN) {
-				/* See 8.1.5.  Handshake Completion */
+				const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+				/*
+				 * See 8.1.5 - Handshake Completion.
+				 *
+				 * For robustness we resend Confirm options until the client has
+				 * entered OPEN. During the initial feature negotiation, the MPS
+				 * is smaller than usual, reduced by the Change/Confirm options.
+				 */
+				if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+					DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+					dccp_send_ack(sk);
+					dccp_feat_list_purge(&dp->dccps_featneg);
+				}
+
 				inet_csk_schedule_ack(sk);
 				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  inet_csk(sk)->icsk_rto,
-- 
cgit v1.2.3-70-g09d2


From 1fb87509606cb19f5f603e54c28af7da149049f3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Remove ccid2hc{tx,rx}_ prefixes

This patch fixes two problems caused by the ubiquitous long "hctx->ccid2htx_"
and "hcrx->ccid2hcrx_" prefixes:
 * code becomes hard to read;
 * multiple-line statements are almost inevitable even for simple expressions;
The prefixes are not really necessary (compare with "struct tcp_sock").

There had been previous discussion of this on dccp@vger, but so far this was
not followed up (most people agreed that the prefixes are too long).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Leandro Melo de Sales <leandroal@gmail.com>
---
 net/dccp/ccids/ccid2.c | 274 ++++++++++++++++++++++++-------------------------
 net/dccp/ccids/ccid2.h |  48 ++++-----
 2 files changed, 159 insertions(+), 163 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c9ea19a4d85..9728bbf0ace 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -39,16 +39,16 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 {
 	int len = 0;
 	int pipe = 0;
-	struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
+	struct ccid2_seq *seqp = hctx->seqh;
 
 	/* there is data in the chain */
-	if (seqp != hctx->ccid2hctx_seqt) {
+	if (seqp != hctx->seqt) {
 		seqp = seqp->ccid2s_prev;
 		len++;
 		if (!seqp->ccid2s_acked)
 			pipe++;
 
-		while (seqp != hctx->ccid2hctx_seqt) {
+		while (seqp != hctx->seqt) {
 			struct ccid2_seq *prev = seqp->ccid2s_prev;
 
 			len++;
@@ -65,16 +65,16 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 		}
 	}
 
-	BUG_ON(pipe != hctx->ccid2hctx_pipe);
+	BUG_ON(pipe != hctx->pipe);
 	ccid2_pr_debug("len of chain=%d\n", len);
 
 	do {
 		seqp = seqp->ccid2s_prev;
 		len++;
-	} while (seqp != hctx->ccid2hctx_seqh);
+	} while (seqp != hctx->seqh);
 
 	ccid2_pr_debug("total len=%d\n", len);
-	BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
+	BUG_ON(len != hctx->seqbufc * CCID2_SEQBUF_LEN);
 }
 #else
 #define ccid2_pr_debug(format, a...)
@@ -87,8 +87,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
 	int i;
 
 	/* check if we have space to preserve the pointer to the buffer */
-	if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
-					sizeof(struct ccid2_seq*)))
+	if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *))
 		return -ENOMEM;
 
 	/* allocate buffer and initialize linked list */
@@ -104,20 +103,20 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
 	seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
 
 	/* This is the first allocation.  Initiate the head and tail.  */
-	if (hctx->ccid2hctx_seqbufc == 0)
-		hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
+	if (hctx->seqbufc == 0)
+		hctx->seqh = hctx->seqt = seqp;
 	else {
 		/* link the existing list with the one we just created */
-		hctx->ccid2hctx_seqh->ccid2s_next = seqp;
-		seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
+		hctx->seqh->ccid2s_next = seqp;
+		seqp->ccid2s_prev = hctx->seqh;
 
-		hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
-		seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
+		hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
+		seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt;
 	}
 
 	/* store the original pointer to the buffer so we can free it */
-	hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
-	hctx->ccid2hctx_seqbufc++;
+	hctx->seqbuf[hctx->seqbufc] = seqp;
+	hctx->seqbufc++;
 
 	return 0;
 }
@@ -126,7 +125,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
+	if (hctx->pipe < hctx->cwnd)
 		return 0;
 
 	return 1; /* XXX CCID should dequeue when ready instead of polling */
@@ -135,7 +134,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
+	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2);
 
 	/*
 	 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
@@ -160,7 +159,7 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
 {
 	ccid2_pr_debug("change SRTT to %ld\n", val);
-	hctx->ccid2hctx_srtt = val;
+	hctx->srtt = val;
 }
 
 static void ccid2_start_rto_timer(struct sock *sk);
@@ -173,8 +172,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
-		sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
-			       jiffies + HZ / 5);
+		sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5);
 		goto out;
 	}
 
@@ -183,28 +181,28 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	ccid2_hc_tx_check_sanity(hctx);
 
 	/* back-off timer */
-	hctx->ccid2hctx_rto <<= 1;
+	hctx->rto <<= 1;
 
-	s = hctx->ccid2hctx_rto / HZ;
+	s = hctx->rto / HZ;
 	if (s > 60)
-		hctx->ccid2hctx_rto = 60 * HZ;
+		hctx->rto = 60 * HZ;
 
 	ccid2_start_rto_timer(sk);
 
 	/* adjust pipe, cwnd etc */
-	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
-	if (hctx->ccid2hctx_ssthresh < 2)
-		hctx->ccid2hctx_ssthresh = 2;
-	hctx->ccid2hctx_cwnd	 = 1;
-	hctx->ccid2hctx_pipe	 = 0;
+	hctx->ssthresh = hctx->cwnd / 2;
+	if (hctx->ssthresh < 2)
+		hctx->ssthresh = 2;
+	hctx->cwnd = 1;
+	hctx->pipe = 0;
 
 	/* clear state about stuff we sent */
-	hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
-	hctx->ccid2hctx_packets_acked = 0;
+	hctx->seqt = hctx->seqh;
+	hctx->packets_acked = 0;
 
 	/* clear ack ratio state. */
-	hctx->ccid2hctx_rpseq	 = 0;
-	hctx->ccid2hctx_rpdupack = -1;
+	hctx->rpseq    = 0;
+	hctx->rpdupack = -1;
 	ccid2_change_l_ack_ratio(sk, 1);
 	ccid2_hc_tx_check_sanity(hctx);
 out:
@@ -216,11 +214,11 @@ static void ccid2_start_rto_timer(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
+	ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->rto);
 
-	BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
-	sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
-		       jiffies + hctx->ccid2hctx_rto);
+	BUG_ON(timer_pending(&hctx->rtotimer));
+	sk_reset_timer(sk, &hctx->rtotimer,
+		       jiffies + hctx->rto);
 }
 
 static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
@@ -229,27 +227,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	struct ccid2_seq *next;
 
-	hctx->ccid2hctx_pipe++;
+	hctx->pipe++;
 
-	hctx->ccid2hctx_seqh->ccid2s_seq   = dp->dccps_gss;
-	hctx->ccid2hctx_seqh->ccid2s_acked = 0;
-	hctx->ccid2hctx_seqh->ccid2s_sent  = jiffies;
+	hctx->seqh->ccid2s_seq   = dp->dccps_gss;
+	hctx->seqh->ccid2s_acked = 0;
+	hctx->seqh->ccid2s_sent  = jiffies;
 
-	next = hctx->ccid2hctx_seqh->ccid2s_next;
+	next = hctx->seqh->ccid2s_next;
 	/* check if we need to alloc more space */
-	if (next == hctx->ccid2hctx_seqt) {
+	if (next == hctx->seqt) {
 		if (ccid2_hc_tx_alloc_seq(hctx)) {
 			DCCP_CRIT("packet history - out of memory!");
 			/* FIXME: find a more graceful way to bail out */
 			return;
 		}
-		next = hctx->ccid2hctx_seqh->ccid2s_next;
-		BUG_ON(next == hctx->ccid2hctx_seqt);
+		next = hctx->seqh->ccid2s_next;
+		BUG_ON(next == hctx->seqt);
 	}
-	hctx->ccid2hctx_seqh = next;
+	hctx->seqh = next;
 
-	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
-		       hctx->ccid2hctx_pipe);
+	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe);
 
 	/*
 	 * FIXME: The code below is broken and the variables have been removed
@@ -272,12 +269,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 	 */
 #if 0
 	/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
-	hctx->ccid2hctx_arsent++;
+	hctx->arsent++;
 	/* We had an ack loss in this window... */
-	if (hctx->ccid2hctx_ackloss) {
-		if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
-			hctx->ccid2hctx_arsent	= 0;
-			hctx->ccid2hctx_ackloss	= 0;
+	if (hctx->ackloss) {
+		if (hctx->arsent >= hctx->cwnd) {
+			hctx->arsent  = 0;
+			hctx->ackloss = 0;
 		}
 	} else {
 		/* No acks lost up to now... */
@@ -287,28 +284,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 			int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
 				    dp->dccps_l_ack_ratio;
 
-			denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
+			denom = hctx->cwnd * hctx->cwnd / denom;
 
-			if (hctx->ccid2hctx_arsent >= denom) {
+			if (hctx->arsent >= denom) {
 				ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
-				hctx->ccid2hctx_arsent = 0;
+				hctx->arsent = 0;
 			}
 		} else {
 			/* we can't increase ack ratio further [1] */
-			hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
+			hctx->arsent = 0; /* or maybe set it to cwnd*/
 		}
 	}
 #endif
 
 	/* setup RTO timer */
-	if (!timer_pending(&hctx->ccid2hctx_rtotimer))
+	if (!timer_pending(&hctx->rtotimer))
 		ccid2_start_rto_timer(sk);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	do {
-		struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
+		struct ccid2_seq *seqp = hctx->seqt;
 
-		while (seqp != hctx->ccid2hctx_seqh) {
+		while (seqp != hctx->seqh) {
 			ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
 				       (unsigned long long)seqp->ccid2s_seq,
 				       seqp->ccid2s_acked, seqp->ccid2s_sent);
@@ -386,7 +383,7 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
+	sk_stop_timer(sk, &hctx->rtotimer);
 	ccid2_pr_debug("deleted RTO timer\n");
 }
 
@@ -396,73 +393,73 @@ static inline void ccid2_new_ack(struct sock *sk,
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
-		if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
-			hctx->ccid2hctx_cwnd += 1;
-			*maxincr	     -= 1;
-			hctx->ccid2hctx_packets_acked = 0;
+	if (hctx->cwnd < hctx->ssthresh) {
+		if (*maxincr > 0 && ++hctx->packets_acked == 2) {
+			hctx->cwnd += 1;
+			*maxincr   -= 1;
+			hctx->packets_acked = 0;
 		}
-	} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
-			hctx->ccid2hctx_cwnd += 1;
-			hctx->ccid2hctx_packets_acked = 0;
+	} else if (++hctx->packets_acked >= hctx->cwnd) {
+			hctx->cwnd += 1;
+			hctx->packets_acked = 0;
 	}
 
 	/* update RTO */
-	if (hctx->ccid2hctx_srtt == -1 ||
-	    time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
+	if (hctx->srtt == -1 ||
+	    time_after(jiffies, hctx->lastrtt + hctx->srtt)) {
 		unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
 		int s;
 
 		/* first measurement */
-		if (hctx->ccid2hctx_srtt == -1) {
+		if (hctx->srtt == -1) {
 			ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
 				       r, jiffies,
 				       (unsigned long long)seqp->ccid2s_seq);
 			ccid2_change_srtt(hctx, r);
-			hctx->ccid2hctx_rttvar = r >> 1;
+			hctx->rttvar = r >> 1;
 		} else {
 			/* RTTVAR */
-			long tmp = hctx->ccid2hctx_srtt - r;
+			long tmp = hctx->srtt - r;
 			long srtt;
 
 			if (tmp < 0)
 				tmp *= -1;
 
 			tmp >>= 2;
-			hctx->ccid2hctx_rttvar *= 3;
-			hctx->ccid2hctx_rttvar >>= 2;
-			hctx->ccid2hctx_rttvar += tmp;
+			hctx->rttvar *= 3;
+			hctx->rttvar >>= 2;
+			hctx->rttvar += tmp;
 
 			/* SRTT */
-			srtt = hctx->ccid2hctx_srtt;
+			srtt = hctx->srtt;
 			srtt *= 7;
 			srtt >>= 3;
 			tmp = r >> 3;
 			srtt += tmp;
 			ccid2_change_srtt(hctx, srtt);
 		}
-		s = hctx->ccid2hctx_rttvar << 2;
+		s = hctx->rttvar << 2;
 		/* clock granularity is 1 when based on jiffies */
 		if (!s)
 			s = 1;
-		hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
+		hctx->rto = hctx->srtt + s;
 
 		/* must be at least a second */
-		s = hctx->ccid2hctx_rto / HZ;
+		s = hctx->rto / HZ;
 		/* DCCP doesn't require this [but I like it cuz my code sux] */
 #if 1
 		if (s < 1)
-			hctx->ccid2hctx_rto = HZ;
+			hctx->rto = HZ;
 #endif
 		/* max 60 seconds */
 		if (s > 60)
-			hctx->ccid2hctx_rto = HZ * 60;
+			hctx->rto = HZ * 60;
 
-		hctx->ccid2hctx_lastrtt = jiffies;
+		hctx->lastrtt = jiffies;
 
 		ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
-			       hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
-			       hctx->ccid2hctx_rto, HZ, r);
+			       hctx->srtt, hctx->rttvar,
+			       hctx->rto, HZ, r);
 	}
 
 	/* we got a new ack, so re-start RTO timer */
@@ -474,12 +471,12 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (hctx->ccid2hctx_pipe == 0)
+	if (hctx->pipe == 0)
 		DCCP_BUG("pipe == 0");
 	else
-		hctx->ccid2hctx_pipe--;
+		hctx->pipe--;
 
-	if (hctx->ccid2hctx_pipe == 0)
+	if (hctx->pipe == 0)
 		ccid2_hc_tx_kill_rto_timer(sk);
 }
 
@@ -487,19 +484,19 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
+	if (time_before(seqp->ccid2s_sent, hctx->last_cong)) {
 		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
 		return;
 	}
 
-	hctx->ccid2hctx_last_cong = jiffies;
+	hctx->last_cong = jiffies;
 
-	hctx->ccid2hctx_cwnd     = hctx->ccid2hctx_cwnd / 2 ? : 1U;
-	hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
+	hctx->cwnd     = hctx->cwnd / 2 ? : 1U;
+	hctx->ssthresh = max(hctx->cwnd, 2U);
 
 	/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
-	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
-		ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
+	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd)
+		ccid2_change_l_ack_ratio(sk, hctx->cwnd);
 }
 
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -523,21 +520,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	 * -sorbo.
 	 */
 	/* need to bootstrap */
-	if (hctx->ccid2hctx_rpdupack == -1) {
-		hctx->ccid2hctx_rpdupack = 0;
-		hctx->ccid2hctx_rpseq = seqno;
+	if (hctx->rpdupack == -1) {
+		hctx->rpdupack = 0;
+		hctx->rpseq = seqno;
 	} else {
 		/* check if packet is consecutive */
-		if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
-			hctx->ccid2hctx_rpseq = seqno;
+		if (dccp_delta_seqno(hctx->rpseq, seqno) == 1)
+			hctx->rpseq = seqno;
 		/* it's a later packet */
-		else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
-			hctx->ccid2hctx_rpdupack++;
+		else if (after48(seqno, hctx->rpseq)) {
+			hctx->rpdupack++;
 
 			/* check if we got enough dupacks */
-			if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
-				hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
-				hctx->ccid2hctx_rpseq = 0;
+			if (hctx->rpdupack >= NUMDUPACK) {
+				hctx->rpdupack = -1; /* XXX lame */
+				hctx->rpseq = 0;
 
 				ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
 			}
@@ -546,7 +543,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	/* check forward path congestion */
 	/* still didn't send out new data packets */
-	if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
+	if (hctx->seqh == hctx->seqt)
 		return;
 
 	switch (DCCP_SKB_CB(skb)->dccpd_type) {
@@ -558,14 +555,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
-	if (after48(ackno, hctx->ccid2hctx_high_ack))
-		hctx->ccid2hctx_high_ack = ackno;
+	if (after48(ackno, hctx->high_ack))
+		hctx->high_ack = ackno;
 
-	seqp = hctx->ccid2hctx_seqt;
+	seqp = hctx->seqt;
 	while (before48(seqp->ccid2s_seq, ackno)) {
 		seqp = seqp->ccid2s_next;
-		if (seqp == hctx->ccid2hctx_seqh) {
-			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+		if (seqp == hctx->seqh) {
+			seqp = hctx->seqh->ccid2s_prev;
 			break;
 		}
 	}
@@ -575,7 +572,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	 * packets per acknowledgement. Rounding up avoids that cwnd is not
 	 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
 	 */
-	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
+	if (hctx->cwnd < hctx->ssthresh)
 		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 
 	/* go through all ack vectors */
@@ -594,7 +591,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * seqnos.
 			 */
 			while (after48(seqp->ccid2s_seq, ackno)) {
-				if (seqp == hctx->ccid2hctx_seqt) {
+				if (seqp == hctx->seqt) {
 					done = 1;
 					break;
 				}
@@ -626,7 +623,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 						       (unsigned long long)seqp->ccid2s_seq);
 					ccid2_hc_tx_dec_pipe(sk);
 				}
-				if (seqp == hctx->ccid2hctx_seqt) {
+				if (seqp == hctx->seqt) {
 					done = 1;
 					break;
 				}
@@ -645,11 +642,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/* The state about what is acked should be correct now
 	 * Check for NUMDUPACK
 	 */
-	seqp = hctx->ccid2hctx_seqt;
-	while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
+	seqp = hctx->seqt;
+	while (before48(seqp->ccid2s_seq, hctx->high_ack)) {
 		seqp = seqp->ccid2s_next;
-		if (seqp == hctx->ccid2hctx_seqh) {
-			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+		if (seqp == hctx->seqh) {
+			seqp = hctx->seqh->ccid2s_prev;
 			break;
 		}
 	}
@@ -660,7 +657,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			if (done == NUMDUPACK)
 				break;
 		}
-		if (seqp == hctx->ccid2hctx_seqt)
+		if (seqp == hctx->seqt)
 			break;
 		seqp = seqp->ccid2s_prev;
 	}
@@ -683,20 +680,20 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				ccid2_congestion_event(sk, seqp);
 				ccid2_hc_tx_dec_pipe(sk);
 			}
-			if (seqp == hctx->ccid2hctx_seqt)
+			if (seqp == hctx->seqt)
 				break;
 			seqp = seqp->ccid2s_prev;
 		}
 
-		hctx->ccid2hctx_seqt = last_acked;
+		hctx->seqt = last_acked;
 	}
 
 	/* trim acked packets in tail */
-	while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
-		if (!hctx->ccid2hctx_seqt->ccid2s_acked)
+	while (hctx->seqt != hctx->seqh) {
+		if (!hctx->seqt->ccid2s_acked)
 			break;
 
-		hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
+		hctx->seqt = hctx->seqt->ccid2s_next;
 	}
 
 	ccid2_hc_tx_check_sanity(hctx);
@@ -709,17 +706,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	u32 max_ratio;
 
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
-	hctx->ccid2hctx_ssthresh  = ~0U;
+	hctx->ssthresh = ~0U;
 
 	/*
 	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
 	 * packets for new connections, following the rules from [RFC3390]".
 	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
 	 */
-	hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+	hctx->cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
-	max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
+	max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
 	if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
 		dp->dccps_l_ack_ratio = max_ratio;
 
@@ -727,13 +724,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	if (ccid2_hc_tx_alloc_seq(hctx))
 		return -ENOMEM;
 
-	hctx->ccid2hctx_rto	 = 3 * HZ;
+	hctx->rto	 = 3 * HZ;
 	ccid2_change_srtt(hctx, -1);
-	hctx->ccid2hctx_rttvar	 = -1;
-	hctx->ccid2hctx_rpdupack = -1;
-	hctx->ccid2hctx_last_cong = jiffies;
-	setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
-			(unsigned long)sk);
+	hctx->rttvar	= -1;
+	hctx->rpdupack  = -1;
+	hctx->last_cong = jiffies;
+	setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
 
 	ccid2_hc_tx_check_sanity(hctx);
 	return 0;
@@ -746,9 +742,9 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 
 	ccid2_hc_tx_kill_rto_timer(sk);
 
-	for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
-		kfree(hctx->ccid2hctx_seqbuf[i]);
-	hctx->ccid2hctx_seqbufc = 0;
+	for (i = 0; i < hctx->seqbufc; i++)
+		kfree(hctx->seqbuf[i]);
+	hctx->seqbufc = 0;
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -759,10 +755,10 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	switch (DCCP_SKB_CB(skb)->dccpd_type) {
 	case DCCP_PKT_DATA:
 	case DCCP_PKT_DATAACK:
-		hcrx->ccid2hcrx_data++;
-		if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
+		hcrx->data++;
+		if (hcrx->data >= dp->dccps_r_ack_ratio) {
 			dccp_send_ack(sk);
-			hcrx->ccid2hcrx_data = 0;
+			hcrx->data = 0;
 		}
 		break;
 	}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 2c94ca02901..d7815804bce 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,34 +42,34 @@ struct ccid2_seq {
 
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
- * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
- * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
- * @ccid2hctx_lastrtt -time RTT was last measured
- * @ccid2hctx_rpseq - last consecutive seqno
- * @ccid2hctx_rpdupack - dupacks since rpseq
-*/
+ * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
+ * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
+ * @lastrtt: time RTT was last measured
+ * @rpseq: last consecutive seqno
+ * @rpdupack: dupacks since rpseq
+ */
 struct ccid2_hc_tx_sock {
-	u32			ccid2hctx_cwnd;
-	u32			ccid2hctx_ssthresh;
-	u32			ccid2hctx_pipe;
-	u32			ccid2hctx_packets_acked;
-	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
-	int			ccid2hctx_seqbufc;
-	struct ccid2_seq	*ccid2hctx_seqh;
-	struct ccid2_seq	*ccid2hctx_seqt;
-	long			ccid2hctx_rto;
-	long			ccid2hctx_srtt;
-	long			ccid2hctx_rttvar;
-	unsigned long		ccid2hctx_lastrtt;
-	struct timer_list	ccid2hctx_rtotimer;
-	u64			ccid2hctx_rpseq;
-	int			ccid2hctx_rpdupack;
-	unsigned long		ccid2hctx_last_cong;
-	u64			ccid2hctx_high_ack;
+	u32			cwnd;
+	u32			ssthresh;
+	u32			pipe;
+	u32			packets_acked;
+	struct ccid2_seq	*seqbuf[CCID2_SEQBUF_MAX];
+	int			seqbufc;
+	struct ccid2_seq	*seqh;
+	struct ccid2_seq	*seqt;
+	long			rto;
+	long			srtt;
+	long			rttvar;
+	unsigned long		lastrtt;
+	struct timer_list	rtotimer;
+	u64			rpseq;
+	int			rpdupack;
+	unsigned long		last_cong;
+	u64			high_ack;
 };
 
 struct ccid2_hc_rx_sock {
-	int	ccid2hcrx_data;
+	int			data;
 };
 
 static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
-- 
cgit v1.2.3-70-g09d2


From 842d1ef14ff37e9611eab479f31a0d74c1a5c4c0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove ccid3hc{tx,rx}_ prefixes

This patch does the same for CCID-3 as the previous patch for CCID-2:

        s#ccid3hctx_##g;
        s#ccid3hcrx_##g;

plus manual editing to retain consistency.

Please note: expanded the fields of the `struct tfrc_tx_info' in the hc_tx_sock,
since using short #define identifiers is not a good idea. The only place where
this embedded struct was used is ccid3_hc_tx_getsockopt().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 337 +++++++++++++++++++++++--------------------------
 net/dccp/ccids/ccid3.h | 119 +++++++++--------
 net/dccp/probe.c       |   6 +-
 3 files changed, 220 insertions(+), 242 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3b8bd7ca676..2f026ce2714 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -67,13 +67,13 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 				  enum ccid3_hc_tx_states state)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+	enum ccid3_hc_tx_states oldstate = hctx->state;
 
 	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
 		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
 		       ccid3_tx_state_name(state));
 	WARN_ON(state == oldstate);
-	hctx->ccid3hctx_state = state;
+	hctx->state = state;
 }
 
 /*
@@ -88,10 +88,9 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 static inline u64 rfc3390_initial_rate(struct sock *sk)
 {
 	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	const __u32 w_init = clamp_t(__u32, 4380U,
-			2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s);
+	const __u32 w_init = clamp_t(__u32, 4380U, 2 * hctx->s, 4 * hctx->s);
 
-	return scaled_div(w_init << 6, hctx->ccid3hctx_rtt);
+	return scaled_div(w_init << 6, hctx->rtt);
 }
 
 /*
@@ -100,24 +99,20 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
 static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
 	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
-	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
-					     hctx->ccid3hctx_x);
+	hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x);
 
 	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
-	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
-					   TFRC_OPSYS_HALF_TIME_GRAN);
-
-	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
-		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
-		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
+	hctx->delta = min_t(u32, hctx->t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
 
+	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hctx->t_ipi,
+		       hctx->delta, hctx->s, (unsigned)(hctx->x >> 6));
 }
 
 static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
 {
-	u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
+	u32 delta = ktime_us_delta(now, hctx->t_last_win_count);
 
-	return delta / hctx->ccid3hctx_rtt;
+	return delta / hctx->rtt;
 }
 
 /**
@@ -133,8 +128,8 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
 static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
-	const  __u64 old_x = hctx->ccid3hctx_x;
+	u64 min_rate = 2 * hctx->x_recv;
+	const u64 old_x = hctx->x;
 	ktime_t now = stamp ? *stamp : ktime_get_real();
 
 	/*
@@ -145,33 +140,27 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 	 */
 	if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) {
 		min_rate = rfc3390_initial_rate(sk);
-		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
+		min_rate = max(min_rate, 2 * hctx->x_recv);
 	}
 
-	if (hctx->ccid3hctx_p > 0) {
+	if (hctx->p > 0) {
 
-		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
-					min_rate);
-		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
-					(((__u64)hctx->ccid3hctx_s) << 6) /
-								TFRC_T_MBI);
+		hctx->x = min(((u64)hctx->x_calc) << 6, min_rate);
+		hctx->x = max(hctx->x, (((u64)hctx->s) << 6) / TFRC_T_MBI);
 
-	} else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld)
-				- (s64)hctx->ccid3hctx_rtt >= 0) {
+	} else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) {
 
-		hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate);
-		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
-			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
-				       hctx->ccid3hctx_rtt));
-		hctx->ccid3hctx_t_ld = now;
+		hctx->x = min(2 * hctx->x, min_rate);
+		hctx->x = max(hctx->x,
+			      scaled_div(((u64)hctx->s) << 6, hctx->rtt));
+		hctx->t_ld = now;
 	}
 
-	if (hctx->ccid3hctx_x != old_x) {
+	if (hctx->x != old_x) {
 		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
 			       "X_recv=%u\n", (unsigned)(old_x >> 6),
-			       (unsigned)(hctx->ccid3hctx_x >> 6),
-			       hctx->ccid3hctx_x_calc,
-			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
+			       (unsigned)(hctx->x >> 6), hctx->x_calc,
+			       (unsigned)(hctx->x_recv >> 6));
 
 		ccid3_update_send_interval(hctx);
 	}
@@ -183,11 +172,11 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
  */
 static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
-	const u16 old_s = hctx->ccid3hctx_s;
+	const u16 old_s = hctx->s;
 
-	hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9);
+	hctx->s = tfrc_ewma(hctx->s, len, 9);
 
-	if (hctx->ccid3hctx_s != old_s)
+	if (hctx->s != old_s)
 		ccid3_update_send_interval(hctx);
 }
 
@@ -198,13 +187,13 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
 						ktime_t now)
 {
-	u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count),
-	    quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt;
+	u32 delta = ktime_us_delta(now, hctx->t_last_win_count),
+	    quarter_rtts = (4 * delta) / hctx->rtt;
 
 	if (quarter_rtts > 0) {
-		hctx->ccid3hctx_t_last_win_count = now;
-		hctx->ccid3hctx_last_win_count  += min(quarter_rtts, 5U);
-		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */
+		hctx->t_last_win_count = now;
+		hctx->last_win_count  += min(quarter_rtts, 5U);
+		hctx->last_win_count  &= 0xF;		/* mod 16 */
 	}
 }
 
@@ -222,23 +211,21 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	}
 
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
-		       ccid3_tx_state_name(hctx->ccid3hctx_state));
+		       ccid3_tx_state_name(hctx->state));
 
-	if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
+	if (hctx->state == TFRC_SSTATE_FBACK)
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-	else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+	else if (hctx->state != TFRC_SSTATE_NO_FBACK)
 		goto out;
 
 	/*
 	 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
+	 * RTO is 0 if and only if no feedback has been received yet.
 	 */
-	if (hctx->ccid3hctx_t_rto == 0 ||	/* no feedback received yet */
-	    hctx->ccid3hctx_p == 0) {
+	if (hctx->t_rto == 0 || hctx->p == 0) {
 
 		/* halve send rate directly */
-		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
-					(((__u64)hctx->ccid3hctx_s) << 6) /
-								    TFRC_T_MBI);
+		hctx->x = max(hctx->x / 2, (((u64)hctx->s) << 6) / TFRC_T_MBI);
 		ccid3_update_send_interval(hctx);
 	} else {
 		/*
@@ -251,33 +238,32 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		 *
 		 *  Note that X_recv is scaled by 2^6 while X_calc is not
 		 */
-		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
+		BUG_ON(hctx->p && !hctx->x_calc);
 
-		if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
-			hctx->ccid3hctx_x_recv =
-				max(hctx->ccid3hctx_x_recv / 2,
-				    (((__u64)hctx->ccid3hctx_s) << 6) /
-							      (2 * TFRC_T_MBI));
+		if (hctx->x_calc > (hctx->x_recv >> 5))
+			hctx->x_recv =
+				max(hctx->x_recv / 2,
+				    (((__u64)hctx->s) << 6) / (2 * TFRC_T_MBI));
 		else {
-			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
-			hctx->ccid3hctx_x_recv <<= 4;
+			hctx->x_recv = hctx->x_calc;
+			hctx->x_recv <<= 4;
 		}
 		ccid3_hc_tx_update_x(sk, NULL);
 	}
 	ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
-			(unsigned long long)hctx->ccid3hctx_x);
+			(unsigned long long)hctx->x);
 
 	/*
 	 * Set new timeout for the nofeedback timer.
 	 * See comments in packet_recv() regarding the value of t_RTO.
 	 */
-	if (unlikely(hctx->ccid3hctx_t_rto == 0))	/* no feedback yet */
+	if (unlikely(hctx->t_rto == 0))		/* no feedback received yet */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
 	else
-		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+		t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi);
 
 restart_timer:
-	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+	sk_reset_timer(sk, &hctx->no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
 out:
 	bh_unlock_sock(sk);
@@ -305,18 +291,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(skb->len == 0))
 		return -EBADMSG;
 
-	switch (hctx->ccid3hctx_state) {
+	switch (hctx->state) {
 	case TFRC_SSTATE_NO_SENT:
-		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
-			       (jiffies +
+		sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies +
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
-		hctx->ccid3hctx_last_win_count	 = 0;
-		hctx->ccid3hctx_t_last_win_count = now;
+		hctx->last_win_count   = 0;
+		hctx->t_last_win_count = now;
 
 		/* Set t_0 for initial packet */
-		hctx->ccid3hctx_t_nom = now;
+		hctx->t_nom = now;
 
-		hctx->ccid3hctx_s = skb->len;
+		hctx->s = skb->len;
 
 		/*
 		 * Use initial RTT sample when available: recommended by erratum
@@ -325,9 +310,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (dp->dccps_syn_rtt) {
 			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
-			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
-			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
-			hctx->ccid3hctx_t_ld = now;
+			hctx->rtt  = dp->dccps_syn_rtt;
+			hctx->x    = rfc3390_initial_rate(sk);
+			hctx->t_ld = now;
 		} else {
 			/*
 			 * Sender does not have RTT sample:
@@ -335,9 +320,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			 *   is needed in several parts (e.g.  window counter);
 			 * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
 			 */
-			hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT;
-			hctx->ccid3hctx_x   = hctx->ccid3hctx_s;
-			hctx->ccid3hctx_x <<= 6;
+			hctx->rtt = DCCP_FALLBACK_RTT;
+			hctx->x   = hctx->s;
+			hctx->x <<= 6;
 		}
 		ccid3_update_send_interval(hctx);
 
@@ -345,7 +330,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		break;
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
-		delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now);
+		delay = ktime_us_delta(hctx->t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
@@ -355,7 +340,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay - (s64)hctx->ccid3hctx_delta >= 1000)
+		if (delay - (s64)hctx->delta >= 1000)
 			return (u32)delay / 1000L;
 
 		ccid3_hc_tx_update_win_count(hctx, now);
@@ -367,11 +352,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 
 	/* prepare to send now (add options etc.) */
 	dp->dccps_hc_tx_insert_options = 1;
-	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+	DCCP_SKB_CB(skb)->dccpd_ccval  = hctx->last_win_count;
 
 	/* set the nominal send time for the next following packet */
-	hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
-					     hctx->ccid3hctx_t_ipi);
+	hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi);
 	return 0;
 }
 
@@ -382,14 +366,14 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 
 	ccid3_hc_tx_update_s(hctx, len);
 
-	if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss))
+	if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss))
 		DCCP_CRIT("packet history - out of memory!");
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct ccid3_options_received *opt_recv;
+	struct ccid3_options_received *opt_recv = &hctx->options_received;
 	ktime_t now;
 	unsigned long t_nfb;
 	u32 pinv, r_sample;
@@ -399,15 +383,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
 		return;
 	/* ... and only in the established state */
-	if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
-	    hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+	if (hctx->state != TFRC_SSTATE_FBACK &&
+	    hctx->state != TFRC_SSTATE_NO_FBACK)
 		return;
 
-	opt_recv = &hctx->ccid3hctx_options_received;
 	now = ktime_get_real();
 
 	/* Estimate RTT from history if ACK number is valid */
-	r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
+	r_sample = tfrc_tx_hist_rtt(hctx->hist,
 				    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
 	if (r_sample == 0) {
 		DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
@@ -417,37 +400,37 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Update receive rate in units of 64 * bytes/second */
-	hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
-	hctx->ccid3hctx_x_recv <<= 6;
+	hctx->x_recv = opt_recv->ccid3or_receive_rate;
+	hctx->x_recv <<= 6;
 
 	/* Update loss event rate (which is scaled by 1e6) */
 	pinv = opt_recv->ccid3or_loss_event_rate;
 	if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
-		hctx->ccid3hctx_p = 0;
+		hctx->p = 0;
 	else				       /* can not exceed 100% */
-		hctx->ccid3hctx_p = scaled_div(1, pinv);
+		hctx->p = scaled_div(1, pinv);
 	/*
 	 * Validate new RTT sample and update moving average
 	 */
 	r_sample = dccp_sample_rtt(sk, r_sample);
-	hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
+	hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9);
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
 	 */
-	if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+	if (hctx->state == TFRC_SSTATE_NO_FBACK) {
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 
-		if (hctx->ccid3hctx_t_rto == 0) {
+		if (hctx->t_rto == 0) {
 			/*
 			 * Initial feedback packet: Larger Initial Windows (4.2)
 			 */
-			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
-			hctx->ccid3hctx_t_ld = now;
+			hctx->x    = rfc3390_initial_rate(sk);
+			hctx->t_ld = now;
 
 			ccid3_update_send_interval(hctx);
 
 			goto done_computing_x;
-		} else if (hctx->ccid3hctx_p == 0) {
+		} else if (hctx->p == 0) {
 			/*
 			 * First feedback after nofeedback timer expiry (4.3)
 			 */
@@ -456,25 +439,20 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
-	if (hctx->ccid3hctx_p > 0)
-		hctx->ccid3hctx_x_calc =
-				tfrc_calc_x(hctx->ccid3hctx_s,
-					    hctx->ccid3hctx_rtt,
-					    hctx->ccid3hctx_p);
+	if (hctx->p > 0)
+		hctx->x_calc = tfrc_calc_x(hctx->s, hctx->rtt, hctx->p);
 	ccid3_hc_tx_update_x(sk, &now);
 
 done_computing_x:
 	ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
 			       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
-			       dccp_role(sk),
-			       sk, hctx->ccid3hctx_rtt, r_sample,
-			       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
-			       hctx->ccid3hctx_x_calc,
-			       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
-			       (unsigned)(hctx->ccid3hctx_x >> 6));
+			       dccp_role(sk), sk, hctx->rtt, r_sample,
+			       hctx->s, hctx->p, hctx->x_calc,
+			       (unsigned)(hctx->x_recv >> 6),
+			       (unsigned)(hctx->x >> 6));
 
 	/* unschedule no feedback timer */
-	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+	sk_stop_timer(sk, &hctx->no_feedback_timer);
 
 	/*
 	 * As we have calculated new ipi, delta, t_nom it is possible
@@ -488,21 +466,19 @@ done_computing_x:
 	 * This can help avoid triggering the nofeedback timer too
 	 * often ('spinning') on LANs with small RTTs.
 	 */
-	hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
-					   (CONFIG_IP_DCCP_CCID3_RTO *
-					    (USEC_PER_SEC / 1000)));
+	hctx->t_rto = max_t(u32, 4 * hctx->rtt, (CONFIG_IP_DCCP_CCID3_RTO *
+						 (USEC_PER_SEC / 1000)));
 	/*
 	 * Schedule no feedback timer to expire in
 	 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
 	 */
-	t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
+	t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi);
 
 	ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
 		       "expire in %lu jiffies (%luus)\n",
-		       dccp_role(sk),
-		       sk, usecs_to_jiffies(t_nfb), t_nfb);
+		       dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb);
 
-	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+	sk_reset_timer(sk, &hctx->no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
 }
 
@@ -513,11 +489,9 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
 	int rc = 0;
 	const struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct ccid3_options_received *opt_recv;
+	struct ccid3_options_received *opt_recv = &hctx->options_received;
 	__be32 opt_val;
 
-	opt_recv = &hctx->ccid3hctx_options_received;
-
 	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
 		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
 		opt_recv->ccid3or_loss_event_rate    = ~0;
@@ -572,11 +546,10 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
 
-	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
-	hctx->ccid3hctx_hist = NULL;
-	setup_timer(&hctx->ccid3hctx_no_feedback_timer,
-			ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
-
+	hctx->state = TFRC_SSTATE_NO_SENT;
+	hctx->hist  = NULL;
+	setup_timer(&hctx->no_feedback_timer,
+		    ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
 	return 0;
 }
 
@@ -585,9 +558,9 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
-	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+	sk_stop_timer(sk, &hctx->no_feedback_timer);
 
-	tfrc_tx_hist_purge(&hctx->ccid3hctx_hist);
+	tfrc_tx_hist_purge(&hctx->hist);
 }
 
 static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -599,14 +572,15 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 		return;
 
 	hctx = ccid3_hc_tx_sk(sk);
-	info->tcpi_rto = hctx->ccid3hctx_t_rto;
-	info->tcpi_rtt = hctx->ccid3hctx_rtt;
+	info->tcpi_rto = hctx->t_rto;
+	info->tcpi_rtt = hctx->rtt;
 }
 
 static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
 	const struct ccid3_hc_tx_sock *hctx;
+	struct tfrc_tx_info tfrc;
 	const void *val;
 
 	/* Listen socks doesn't have a private CCID block */
@@ -616,10 +590,17 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 	hctx = ccid3_hc_tx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_TX_INFO:
-		if (len < sizeof(hctx->ccid3hctx_tfrc))
+		if (len < sizeof(tfrc))
 			return -EINVAL;
-		len = sizeof(hctx->ccid3hctx_tfrc);
-		val = &hctx->ccid3hctx_tfrc;
+		tfrc.tfrctx_x	   = hctx->x;
+		tfrc.tfrctx_x_recv = hctx->x_recv;
+		tfrc.tfrctx_x_calc = hctx->x_calc;
+		tfrc.tfrctx_rtt	   = hctx->rtt;
+		tfrc.tfrctx_p	   = hctx->p;
+		tfrc.tfrctx_rto	   = hctx->t_rto;
+		tfrc.tfrctx_ipi	   = hctx->t_ipi;
+		len = sizeof(tfrc);
+		val = &tfrc;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -660,13 +641,13 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
 				  enum ccid3_hc_rx_states state)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+	enum ccid3_hc_rx_states oldstate = hcrx->state;
 
 	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
 		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
 		       ccid3_rx_state_name(state));
 	WARN_ON(state == oldstate);
-	hcrx->ccid3hcrx_state = state;
+	hcrx->state = state;
 }
 
 static void ccid3_hc_rx_send_feedback(struct sock *sk,
@@ -678,15 +659,15 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 	ktime_t now;
 	s64 delta = 0;
 
-	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
+	if (unlikely(hcrx->state == TFRC_RSTATE_TERM))
 		return;
 
 	now = ktime_get_real();
 
 	switch (fbtype) {
 	case CCID3_FBACK_INITIAL:
-		hcrx->ccid3hcrx_x_recv = 0;
-		hcrx->ccid3hcrx_pinv   = ~0U;   /* see RFC 4342, 8.5 */
+		hcrx->x_recv = 0;
+		hcrx->p_inverse = ~0U;   /* see RFC 4342, 8.5 */
 		break;
 	case CCID3_FBACK_PARAM_CHANGE:
 		/*
@@ -699,27 +680,26 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		 * the number of bytes since last feedback.
 		 * This is a safe fallback, since X is bounded above by X_calc.
 		 */
-		if (hcrx->ccid3hcrx_x_recv > 0)
+		if (hcrx->x_recv > 0)
 			break;
 		/* fall through */
 	case CCID3_FBACK_PERIODIC:
-		delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
+		delta = ktime_us_delta(now, hcrx->tstamp_last_feedback);
 		if (delta <= 0)
 			DCCP_BUG("delta (%ld) <= 0", (long)delta);
 		else
-			hcrx->ccid3hcrx_x_recv =
-				scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+			hcrx->x_recv = scaled_div32(hcrx->bytes_recv, delta);
 		break;
 	default:
 		return;
 	}
 
-	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
-		       hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
+	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n",
+		       (long)delta, hcrx->x_recv, hcrx->p_inverse);
 
-	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_last_counter	     = dccp_hdr(skb)->dccph_ccval;
-	hcrx->ccid3hcrx_bytes_recv	     = 0;
+	hcrx->tstamp_last_feedback = now;
+	hcrx->last_counter	   = dccp_hdr(skb)->dccph_ccval;
+	hcrx->bytes_recv	   = 0;
 
 	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
@@ -738,8 +718,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	if (dccp_packet_without_ack(skb))
 		return 0;
 
-	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
-	pinv   = htonl(hcrx->ccid3hcrx_pinv);
+	x_recv = htonl(hcrx->x_recv);
+	pinv   = htonl(hcrx->p_inverse);
 
 	if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
@@ -765,22 +745,23 @@ static u32 ccid3_first_li(struct sock *sk)
 	u32 x_recv, p, delta;
 	u64 fval;
 
-	if (hcrx->ccid3hcrx_rtt == 0) {
+	if (hcrx->rtt == 0) {
 		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
-		hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
+		hcrx->rtt = DCCP_FALLBACK_RTT;
 	}
 
-	delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
-	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	delta = ktime_to_us(net_timedelta(hcrx->tstamp_last_feedback));
+	x_recv = scaled_div32(hcrx->bytes_recv, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
 		DCCP_WARN("X_recv==0\n");
-		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+		if (hcrx->x_recv == 0) {
 			DCCP_BUG("stored value of X_recv is zero");
 			return ~0U;
 		}
+		x_recv = hcrx->x_recv;
 	}
 
-	fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
+	fval = scaled_div(hcrx->s, hcrx->rtt);
 	fval = scaled_div32(fval, x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
 
@@ -797,14 +778,14 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
 	const bool is_data_packet = dccp_data_packet(skb);
 
-	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
+	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
 		if (is_data_packet) {
 			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
 			do_feedback = CCID3_FBACK_INITIAL;
 			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
-			hcrx->ccid3hcrx_s = payload;
+			hcrx->s = payload;
 			/*
-			 * Not necessary to update ccid3hcrx_bytes_recv here,
+			 * Not necessary to update bytes_recv here,
 			 * since X_recv = 0 for the first feedback packet (cf.
 			 * RFC 3448, 6.3) -- gerrit
 			 */
@@ -812,7 +793,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		goto update_records;
 	}
 
-	if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
+	if (tfrc_rx_hist_duplicate(&hcrx->hist, skb))
 		return; /* done receiving */
 
 	if (is_data_packet) {
@@ -820,20 +801,20 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/*
 		 * Update moving-average of s and the sum of received payload bytes
 		 */
-		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
-		hcrx->ccid3hcrx_bytes_recv += payload;
+		hcrx->s = tfrc_ewma(hcrx->s, payload, 9);
+		hcrx->bytes_recv += payload;
 	}
 
 	/*
 	 * Perform loss detection and handle pending losses
 	 */
-	if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist,
+	if (tfrc_rx_handle_loss(&hcrx->hist, &hcrx->li_hist,
 				skb, ndp, ccid3_first_li, sk)) {
 		do_feedback = CCID3_FBACK_PARAM_CHANGE;
 		goto done_receiving;
 	}
 
-	if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist))
+	if (tfrc_rx_hist_loss_pending(&hcrx->hist))
 		return; /* done receiving */
 
 	/*
@@ -842,17 +823,17 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(!is_data_packet))
 		goto update_records;
 
-	if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
-		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
+	if (!tfrc_lh_is_initialised(&hcrx->li_hist)) {
+		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->hist, skb);
 		/*
 		 * Empty loss history: no loss so far, hence p stays 0.
 		 * Sample RTT values, since an RTT estimate is required for the
 		 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
 		 */
 		if (sample != 0)
-			hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
+			hcrx->rtt = tfrc_ewma(hcrx->rtt, sample, 9);
 
-	} else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
+	} else if (tfrc_lh_update_i_mean(&hcrx->li_hist, skb)) {
 		/*
 		 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
 		 * has decreased (resp. p has increased), send feedback now.
@@ -863,11 +844,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 	 */
-	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
+	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
 		do_feedback = CCID3_FBACK_PERIODIC;
 
 update_records:
-	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
+	tfrc_rx_hist_add_packet(&hcrx->hist, skb, ndp);
 
 done_receiving:
 	if (do_feedback)
@@ -878,9 +859,9 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
 
-	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
-	tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
-	return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
+	hcrx->state = TFRC_RSTATE_NO_DATA;
+	tfrc_lh_init(&hcrx->li_hist);
+	return tfrc_rx_hist_alloc(&hcrx->hist);
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
@@ -889,8 +870,8 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 
 	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
 
-	tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
-	tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
+	tfrc_rx_hist_purge(&hcrx->hist);
+	tfrc_lh_cleanup(&hcrx->li_hist);
 }
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
@@ -902,9 +883,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 		return;
 
 	hcrx = ccid3_hc_rx_sk(sk);
-	info->tcpi_ca_state = hcrx->ccid3hcrx_state;
+	info->tcpi_ca_state = hcrx->state;
 	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
-	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
+	info->tcpi_rcv_rtt  = hcrx->rtt;
 }
 
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
@@ -923,10 +904,10 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	case DCCP_SOCKOPT_CCID_RX_INFO:
 		if (len < sizeof(rx_info))
 			return -EINVAL;
-		rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
-		rx_info.tfrcrx_rtt    = hcrx->ccid3hcrx_rtt;
-		rx_info.tfrcrx_p      = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
-					   scaled_div(1, hcrx->ccid3hcrx_pinv);
+		rx_info.tfrcrx_x_recv = hcrx->x_recv;
+		rx_info.tfrcrx_rtt    = hcrx->rtt;
+		rx_info.tfrcrx_p      = hcrx->p_inverse == 0 ? ~0U :
+					   scaled_div(1, hcrx->p_inverse);
 		len = sizeof(rx_info);
 		val = &rx_info;
 		break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 49ca32bd7e7..0cfcfff8f5f 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -77,44 +77,43 @@ enum ccid3_hc_tx_states {
 
 /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
  *
- * @ccid3hctx_x - Current sending rate in 64 * bytes per second
- * @ccid3hctx_x_recv - Receive rate    in 64 * bytes per second
- * @ccid3hctx_x_calc - Calculated rate in bytes per second
- * @ccid3hctx_rtt - Estimate of current round trip time in usecs
- * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
- * @ccid3hctx_s - Packet size in bytes
- * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
- * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
- * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
- * @ccid3hctx_last_win_count - Last window counter sent
- * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
- *				 with last_win_count value sent
- * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
- * @ccid3hctx_t_ld - Time last doubled during slow start
- * @ccid3hctx_t_nom - Nominal send time of next packet
- * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
- * @ccid3hctx_hist - Packet history
- * @ccid3hctx_options_received - Parsed set of retrieved options
+ * @x - Current sending rate in 64 * bytes per second
+ * @x_recv - Receive rate    in 64 * bytes per second
+ * @x_calc - Calculated rate in bytes per second
+ * @rtt - Estimate of current round trip time in usecs
+ * @p - Current loss event rate (0-1) scaled by 1000000
+ * @s - Packet size in bytes
+ * @t_rto - Nofeedback Timer setting in usecs
+ * @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
+ * @state - Sender state, one of %ccid3_hc_tx_states
+ * @last_win_count - Last window counter sent
+ * @t_last_win_count - Timestamp of earliest packet with
+ *                     last_win_count value sent
+ * @no_feedback_timer - Handle to no feedback timer
+ * @t_ld - Time last doubled during slow start
+ * @t_nom - Nominal send time of next packet
+ * @delta - Send timer delta (RFC 3448, 4.6) in usecs
+ * @hist - Packet history
+ * @options_received - Parsed set of retrieved options
  */
 struct ccid3_hc_tx_sock {
-	struct tfrc_tx_info		ccid3hctx_tfrc;
-#define ccid3hctx_x			ccid3hctx_tfrc.tfrctx_x
-#define ccid3hctx_x_recv		ccid3hctx_tfrc.tfrctx_x_recv
-#define ccid3hctx_x_calc		ccid3hctx_tfrc.tfrctx_x_calc
-#define ccid3hctx_rtt			ccid3hctx_tfrc.tfrctx_rtt
-#define ccid3hctx_p			ccid3hctx_tfrc.tfrctx_p
-#define ccid3hctx_t_rto			ccid3hctx_tfrc.tfrctx_rto
-#define ccid3hctx_t_ipi			ccid3hctx_tfrc.tfrctx_ipi
-	u16				ccid3hctx_s;
-	enum ccid3_hc_tx_states		ccid3hctx_state:8;
-	u8				ccid3hctx_last_win_count;
-	ktime_t				ccid3hctx_t_last_win_count;
-	struct timer_list		ccid3hctx_no_feedback_timer;
-	ktime_t				ccid3hctx_t_ld;
-	ktime_t				ccid3hctx_t_nom;
-	u32				ccid3hctx_delta;
-	struct tfrc_tx_hist_entry	*ccid3hctx_hist;
-	struct ccid3_options_received	ccid3hctx_options_received;
+	u64				x;
+	u64				x_recv;
+	u32				x_calc;
+	u32				rtt;
+	u32				p;
+	u32				t_rto;
+	u32				t_ipi;
+	u16				s;
+	enum ccid3_hc_tx_states		state:8;
+	u8				last_win_count;
+	ktime_t				t_last_win_count;
+	struct timer_list		no_feedback_timer;
+	ktime_t				t_ld;
+	ktime_t				t_nom;
+	u32				delta;
+	struct tfrc_tx_hist_entry	*hist;
+	struct ccid3_options_received	options_received;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -133,32 +132,32 @@ enum ccid3_hc_rx_states {
 
 /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
  *
- *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
- *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
- *  @ccid3hcrx_p  -  Current loss event rate (RFC 3448 5.4)
- *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
- *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
- *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
- *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
- *  @ccid3hcrx_rtt  -  Receiver estimate of RTT
- *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
- *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
- *  @ccid3hcrx_hist  -  Packet history (loss detection + RTT sampling)
- *  @ccid3hcrx_li_hist  -  Loss Interval database
- *  @ccid3hcrx_s  -  Received packet size in bytes
- *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
+ *  @x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
+ *  @rtt  -  Receiver estimate of rtt (non-standard)
+ *  @p  -  Current loss event rate (RFC 3448 5.4)
+ *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
+ *  @state  -  Receiver state, one of %ccid3_hc_rx_states
+ *  @bytes_recv  -  Total sum of DCCP payload bytes
+ *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
+ *  @rtt  -  Receiver estimate of RTT
+ *  @tstamp_last_feedback  -  Time at which last feedback was sent
+ *  @tstamp_last_ack  -  Time at which last feedback was sent
+ *  @hist  -  Packet history (loss detection + RTT sampling)
+ *  @li_hist  -  Loss Interval database
+ *  @s  -  Received packet size in bytes
+ *  @p_inverse  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
 struct ccid3_hc_rx_sock {
-	u8				ccid3hcrx_last_counter:4;
-	enum ccid3_hc_rx_states		ccid3hcrx_state:8;
-	u32				ccid3hcrx_bytes_recv;
-	u32				ccid3hcrx_x_recv;
-	u32				ccid3hcrx_rtt;
-	ktime_t				ccid3hcrx_tstamp_last_feedback;
-	struct tfrc_rx_hist		ccid3hcrx_hist;
-	struct tfrc_loss_hist		ccid3hcrx_li_hist;
-	u16				ccid3hcrx_s;
-#define ccid3hcrx_pinv			ccid3hcrx_li_hist.i_mean
+	u8				last_counter:4;
+	enum ccid3_hc_rx_states		state:8;
+	u32				bytes_recv;
+	u32				x_recv;
+	u32				rtt;
+	ktime_t				tstamp_last_feedback;
+	struct tfrc_rx_hist		hist;
+	struct tfrc_loss_hist		li_hist;
+	u16				s;
+#define p_inverse			li_hist.i_mean
 };
 
 static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ca783d7467..a87fd4fc1b3 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -87,10 +87,8 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			       "%llu %llu %d\n",
 			       NIPQUAD(inet->saddr), ntohs(inet->sport),
 			       NIPQUAD(inet->daddr), ntohs(inet->dport), size,
-			       hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
-			       hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
-			       hctx->ccid3hctx_x_recv >> 6,
-			       hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
+			       hctx->s, hctx->rtt, hctx->p, hctx->x_calc,
+			       hctx->x_recv >> 6, hctx->x >> 6, hctx->t_ipi);
 		else
 			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
 			       NIPQUAD(inet->saddr), ntohs(inet->sport),
-- 
cgit v1.2.3-70-g09d2


From b2e317f4b5ae73733963c702fae0f246d234100b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: No more CCID control blocks in LISTEN state

The CCIDs are activated as last of the features, at the end of the handshake,
were the LISTEN state of the master socket is inherited into the server
state of the child socket. Thus, the only states visible to CCIDs now are
OPEN/PARTOPEN, and the closing states.

This allows to remove tests which were previously necessary to protect
against referencing a socket in the listening state (in CCID3), but which
now have become redundant.

As a further byproduct of enabling the CCIDs only after the connection has been
fully established, several typecast-initialisations of ccid3_hc_{rx,tx}_sock
can now be eliminated:
 * the CCID is loaded, so it is not necessary to test if it is NULL,
 * if it is possible to load a CCID and leave the private area NULL, then this
    is a bug, which should crash loudly - and earlier,
 * the test for state==OPEN || state==PARTOPEN now reduces only to the closing
   phase (e.g. when the node has received an unexpected Reset).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/ccids/ccid3.c | 40 +++++++---------------------------------
 1 file changed, 7 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 2f026ce2714..b4cc62e2e2b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -565,29 +565,17 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 
 static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	struct ccid3_hc_tx_sock *hctx;
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return;
-
-	hctx = ccid3_hc_tx_sk(sk);
-	info->tcpi_rto = hctx->t_rto;
-	info->tcpi_rtt = hctx->rtt;
+	info->tcpi_rto = ccid3_hc_tx_sk(sk)->t_rto;
+	info->tcpi_rtt = ccid3_hc_tx_sk(sk)->rtt;
 }
 
 static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
-	const struct ccid3_hc_tx_sock *hctx;
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct tfrc_tx_info tfrc;
 	const void *val;
 
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return -EINVAL;
-
-	hctx = ccid3_hc_tx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_TX_INFO:
 		if (len < sizeof(tfrc))
@@ -707,14 +695,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 
 static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 {
-	const struct ccid3_hc_rx_sock *hcrx;
+	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	__be32 x_recv, pinv;
 
 	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
 		return 0;
 
-	hcrx = ccid3_hc_rx_sk(sk);
-
 	if (dccp_packet_without_ack(skb))
 		return 0;
 
@@ -876,30 +862,18 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct ccid3_hc_rx_sock *hcrx;
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return;
-
-	hcrx = ccid3_hc_rx_sk(sk);
-	info->tcpi_ca_state = hcrx->state;
+	info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->state;
 	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
-	info->tcpi_rcv_rtt  = hcrx->rtt;
+	info->tcpi_rcv_rtt  = ccid3_hc_rx_sk(sk)->rtt;
 }
 
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
-	const struct ccid3_hc_rx_sock *hcrx;
+	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct tfrc_rx_info rx_info;
 	const void *val;
 
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return -EINVAL;
-
-	hcrx = ccid3_hc_rx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_RX_INFO:
 		if (len < sizeof(rx_info))
-- 
cgit v1.2.3-70-g09d2


From de6f2b59e5cd15a8772adb732a1d80e141a77115 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Bug fix for the inter-packet scheduling algorithm

This fixes a subtle bug in the calculation of the inter-packet gap and shows
that t_delta, as it is currently used, is not needed. And hence replaced.

The algorithm from RFC 3448, 4.6 below continually computes a send time t_nom,
which is initialised with the current time t_now; t_gran = 1E6 / HZ specifies
the scheduling granularity, s the packet size, and X the sending rate:

  t_distance = t_nom - t_now;		// in microseconds
  t_delta    = min(t_ipi, t_gran) / 2;	// `delta' parameter in microseconds

  if (t_distance >= t_delta) {
	reschedule after (t_distance / 1000) milliseconds;
  } else {
  	t_ipi  = s / X;			// inter-packet interval in usec
	t_nom += t_ipi;			// compute the next send time
	send packet now;
  }


1) Description of the bug
-------------------------
Rescheduling requires a conversion into milliseconds, due to this call chain:

 * ccid3_hc_tx_send_packet() returns a timeout in milliseconds,
 * this value is converted by msecs_to_jiffies() in dccp_write_xmit(),
 * and finally used as jiffy-expires-value for sk_reset_timer().

The highest jiffy resolution with HZ=1000 is 1 millisecond, so using a higher
granularity does not make much sense here.

As a consequence, values of t_distance < 1000 are truncated to 0. This issue
has so far been resolved by using instead

  if (t_distance >= t_delta + 1000)
	reschedule after (t_distance / 1000) milliseconds;

The bug is in artificially inflating t_delta to t_delta' = t_delta + 1000. This
is unnecessarily large, a more adequate value is t_delta' = max(t_delta, 1000).


2) Consequences of using the corrected t_delta'
-----------------------------------------------
Since t_delta <= t_gran/2 = 10^6/(2*HZ), we have t_delta <= 1000 as long as
HZ >= 500. This means that t_delta' = max(1000, t_delta) is constant at 1000.

On the other hand, when using a coarse HZ value of HZ < 500, we have three
sub-cases that can all be reduced to using another constant of t_gran/2.

 (a) The first case arises when t_ipi > t_gran. Here t_delta' is the constant
     t_delta' = max(1000, t_gran/2) = t_gran/2.

 (b) If t_ipi <= 2000 < t_gran = 10^6/HZ usec, then t_delta = t_ipi/2 <= 1000,
     so that t_delta' = max(1000, t_delta) = 1000 < t_gran/2.

 (c) If 2000 < t_ipi <= t_gran, we have t_delta' = max(t_delta, 1000) = t_ipi/2.

In the second and third cases we have delay values less than t_gran/2, which is
in the order of less than or equal to half a jiffy.

How these are treated depends on how fractions of a jiffy are handled: they
are either always rounded down to 0, or always rounded up to 1 jiffy (assuming
non-zero values). In both cases the error is on average in the order of 50%.

Thus we are not increasing the error when in the second/third case we replace
a value less than t_gran/2 with 0, by setting t_delta' to the constant t_gran/2.


3) Summary
----------
Fixing (1) and considering (2), the patch replaces t_delta with a constant,
whose value depends on CONFIG_HZ, changing the above algorithm to:

  if (t_distance >= t_delta')
	reschedule after (t_distance / 1000) milliseconds;

where t_delta' = 10^6/(2*HZ) if HZ < 500, and t_delta' = 1000 otherwise.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 17 +++++++----------
 net/dccp/ccids/ccid3.h | 19 ++++++++++++++-----
 2 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index b4cc62e2e2b..eb1bda08eeb 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -93,19 +93,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
 	return scaled_div(w_init << 6, hctx->rtt);
 }
 
-/*
- * Recalculate t_ipi and delta (should be called whenever X changes)
+/**
+ * ccid3_update_send_interval  -  Calculate new t_ipi = s / X_inst
+ * This respects the granularity of X_inst (64 * bytes/second).
  */
 static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
-	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
 	hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x);
 
-	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
-	hctx->delta = min_t(u32, hctx->t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
-
-	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hctx->t_ipi,
-		       hctx->delta, hctx->s, (unsigned)(hctx->x >> 6));
+	ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hctx->t_ipi,
+		       hctx->s, (unsigned)(hctx->x >> 6));
 }
 
 static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
@@ -340,8 +337,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay - (s64)hctx->delta >= 1000)
-			return (u32)delay / 1000L;
+		if (delay >= TFRC_T_DELTA)
+			return (u32)delay / USEC_PER_MSEC;
 
 		ccid3_hc_tx_update_win_count(hctx, now);
 		break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0cfcfff8f5f..92a5e1688a0 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -47,12 +47,23 @@
 /* Two seconds as per RFC 3448 4.2 */
 #define TFRC_INITIAL_TIMEOUT	   (2 * USEC_PER_SEC)
 
-/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
-#define TFRC_OPSYS_HALF_TIME_GRAN  (USEC_PER_SEC / (2 * HZ))
-
 /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
 #define TFRC_T_MBI		   64
 
+/*
+ * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
+ * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
+ * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
+ * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
+ * when using the constant t_delta  =  t_gran / 2  =  %USEC_PER_SEC / (2 * HZ).
+ */
+#if (HZ >= 500)
+# define TFRC_T_DELTA		   USEC_PER_MSEC
+#else
+# define TFRC_T_DELTA		   (USEC_PER_SEC / (2 * HZ))
+#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
+#endif
+
 enum ccid3_options {
 	TFRC_OPT_LOSS_EVENT_RATE = 192,
 	TFRC_OPT_LOSS_INTERVALS	 = 193,
@@ -92,7 +103,6 @@ enum ccid3_hc_tx_states {
  * @no_feedback_timer - Handle to no feedback timer
  * @t_ld - Time last doubled during slow start
  * @t_nom - Nominal send time of next packet
- * @delta - Send timer delta (RFC 3448, 4.6) in usecs
  * @hist - Packet history
  * @options_received - Parsed set of retrieved options
  */
@@ -111,7 +121,6 @@ struct ccid3_hc_tx_sock {
 	struct timer_list		no_feedback_timer;
 	ktime_t				t_ld;
 	ktime_t				t_nom;
-	u32				delta;
 	struct tfrc_tx_hist_entry	*hist;
 	struct ccid3_options_received	options_received;
 };
-- 
cgit v1.2.3-70-g09d2


From 63b3a73bb85daf441f964aaf9b3fc89be4209c23 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove ugly RTT-sampling history lookup

This removes the RTT-sampling function tfrc_tx_hist_rtt(), since

 1. it suffered from complex passing of return values (the return value both
    indicated successful lookup while the value doubled as RTT sample);

 2. when for some odd reason the sample value equalled 0, this triggered a bug
    warning about "bogus Ack", due to the ambiguity of the return value;

 3. on a passive host which has not sent anything the TX history is empty and
    thus will lead to unwanted "bogus Ack" warnings such as
    ccid3_hc_tx_packet_recv: server(e7b7d518): DATAACK with bogus ACK-28197148
    ccid3_hc_tx_packet_recv: server(e7b7d518): DATAACK with bogus ACK-26641606.

The fix is to replace the implicit encoding by performing the steps manually.

Furthermore, the "bogus Ack" warning has been removed, since it can actually be
triggered due to several reasons (network reordering, old packet, (3) above),
hence it is not very useful.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 34 +++++++++++++++++--------------
 net/dccp/ccids/lib/packet_history.c | 40 -------------------------------------
 net/dccp/ccids/lib/packet_history.h | 22 +++++++++++++++++---
 3 files changed, 38 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index eb1bda08eeb..f74e58d9793 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -371,6 +371,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv = &hctx->options_received;
+	struct tfrc_tx_hist_entry *acked;
 	ktime_t now;
 	unsigned long t_nfb;
 	u32 pinv, r_sample;
@@ -384,17 +385,24 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	    hctx->state != TFRC_SSTATE_NO_FBACK)
 		return;
 
-	now = ktime_get_real();
-
-	/* Estimate RTT from history if ACK number is valid */
-	r_sample = tfrc_tx_hist_rtt(hctx->hist,
-				    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
-	if (r_sample == 0) {
-		DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
-			  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
-			  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
+	/*
+	 * Locate the acknowledged packet in the TX history.
+	 *
+	 * Returning "entry not found" here can for instance happen when
+	 *  - the host has not sent out anything (e.g. a passive server),
+	 *  - the Ack is outdated (packet with higher Ack number was received),
+	 *  - it is a bogus Ack (for a packet not sent on this connection).
+	 */
+	acked = tfrc_tx_hist_find_entry(hctx->hist, dccp_hdr_ack_seq(skb));
+	if (acked == NULL)
 		return;
-	}
+	/* For the sake of RTT sampling, ignore/remove all older entries */
+	tfrc_tx_hist_purge(&acked->next);
+
+	/* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
+	now	  = ktime_get_real();
+	r_sample  = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
+	hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9);
 
 	/* Update receive rate in units of 64 * bytes/second */
 	hctx->x_recv = opt_recv->ccid3or_receive_rate;
@@ -406,11 +414,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hctx->p = 0;
 	else				       /* can not exceed 100% */
 		hctx->p = scaled_div(1, pinv);
-	/*
-	 * Validate new RTT sample and update moving average
-	 */
-	r_sample = dccp_sample_rtt(sk, r_sample);
-	hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9);
+
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
 	 */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108afdc3..5c445086690 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -40,18 +40,6 @@
 #include "packet_history.h"
 #include "../../dccp.h"
 
-/**
- *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
- *  @next:  next oldest entry (LIFO order)
- *  @seqno: sequence number of this entry
- *  @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
-	struct tfrc_tx_hist_entry *next;
-	u64			  seqno;
-	ktime_t			  stamp;
-};
-
 /*
  * Transmitter History Routines
  */
@@ -73,15 +61,6 @@ void tfrc_tx_packet_history_exit(void)
 	}
 }
 
-static struct tfrc_tx_hist_entry *
-	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
-	while (head != NULL && head->seqno != seqno)
-		head = head->next;
-
-	return head;
-}
-
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
 	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -111,25 +90,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
-u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
-		     const ktime_t now)
-{
-	u32 rtt = 0;
-	struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
-
-	if (packet != NULL) {
-		rtt = ktime_us_delta(now, packet->stamp);
-		/*
-		 * Garbage-collect older (irrelevant) entries:
-		 */
-		tfrc_tx_hist_purge(&packet->next);
-	}
-
-	return rtt;
-}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
-
-
 /*
  * 	Receiver History Routines
  */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 461cc91cce8..221d8102da5 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
 #include <linux/slab.h>
 #include "tfrc.h"
 
-struct tfrc_tx_hist_entry;
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+	struct tfrc_tx_hist_entry *next;
+	u64			  seqno;
+	ktime_t			  stamp;
+};
+
+static inline struct tfrc_tx_hist_entry *
+	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
+{
+	while (head != NULL && head->seqno != seqno)
+		head = head->next;
+	return head;
+}
 
 extern int  tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
 extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
-			     const u64 seqno, const ktime_t now);
 
 /* Subtraction a-b modulo-16, respects circular wrap-around */
 #define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
-- 
cgit v1.2.3-70-g09d2


From 47a61e7b433a014296971ea1226eb1adb6310ab4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Simplify and consolidate tx_parse_options

This simplifies and consolidates the TX option-parsing code:

 1. The Loss Intervals option is not currently used, so dead code related to
    this option is removed. I am aware of no plans to support the option, but
    if someone wants to implement it (e.g. for inter-op tests), it is better
    to start afresh than having to also update currently unused code.

 2. The Loss Event and Receive Rate options have a lot of code in common (both
    are 32 bit, both have same length etc.), so this is consolidated.

 3. The test against GSR is not necessary, because
    - on first loading CCID3, ccid_new() zeroes out all fields in the socket;
    - ccid3_hc_tx_packet_recv() treats 0 and ~0U equivalently, due to

	pinv = opt_recv->ccid3or_loss_event_rate;
	if (pinv == ~0U || pinv == 0)
		hctx->p = 0;

    - as a result, the sequence number field is removed from opt_recv.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 57 +++++++++++++-------------------------------------
 net/dccp/ccids/ccid3.h |  3 ---
 2 files changed, 14 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f74e58d9793..12b601f11bf 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -487,60 +487,31 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
 				     unsigned char len, u16 idx,
 				     unsigned char *value)
 {
-	int rc = 0;
-	const struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv = &hctx->options_received;
 	__be32 opt_val;
 
-	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
-		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
-		opt_recv->ccid3or_loss_event_rate    = ~0;
-		opt_recv->ccid3or_loss_intervals_idx = 0;
-		opt_recv->ccid3or_loss_intervals_len = 0;
-		opt_recv->ccid3or_receive_rate	     = 0;
-	}
-
 	switch (option) {
+	case TFRC_OPT_RECEIVE_RATE:
 	case TFRC_OPT_LOSS_EVENT_RATE:
 		if (unlikely(len != 4)) {
-			DCCP_WARN("%s(%p), invalid len %d "
-				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
-				  dccp_role(sk), sk, len);
-			rc = -EINVAL;
-		} else {
-			opt_val = get_unaligned((__be32 *)value);
-			opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
-			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
-				       dccp_role(sk), sk,
-				       opt_recv->ccid3or_loss_event_rate);
+			DCCP_WARN("%s(%p), invalid len %d for %u\n",
+				  dccp_role(sk), sk, len, option);
+			return -EINVAL;
 		}
-		break;
-	case TFRC_OPT_LOSS_INTERVALS:
-		opt_recv->ccid3or_loss_intervals_idx = idx;
-		opt_recv->ccid3or_loss_intervals_len = len;
-		ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
-			       dccp_role(sk), sk,
-			       opt_recv->ccid3or_loss_intervals_idx,
-			       opt_recv->ccid3or_loss_intervals_len);
-		break;
-	case TFRC_OPT_RECEIVE_RATE:
-		if (unlikely(len != 4)) {
-			DCCP_WARN("%s(%p), invalid len %d "
-				  "for TFRC_OPT_RECEIVE_RATE\n",
-				  dccp_role(sk), sk, len);
-			rc = -EINVAL;
-		} else {
-			opt_val = get_unaligned((__be32 *)value);
-			opt_recv->ccid3or_receive_rate = ntohl(opt_val);
+		opt_val = ntohl(get_unaligned((__be32 *)value));
+
+		if (option == TFRC_OPT_RECEIVE_RATE) {
+			opt_recv->ccid3or_receive_rate = opt_val;
 			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
-				       dccp_role(sk), sk,
-				       opt_recv->ccid3or_receive_rate);
+				       dccp_role(sk), sk, opt_val);
+		} else {
+			opt_recv->ccid3or_loss_event_rate = opt_val;
+			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
+				       dccp_role(sk), sk, opt_val);
 		}
-		break;
 	}
-
-	return rc;
+	return 0;
 }
 
 static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 92a5e1688a0..2268785ae8e 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -71,9 +71,6 @@ enum ccid3_options {
 };
 
 struct ccid3_options_received {
-	u64 ccid3or_seqno:48,
-	    ccid3or_loss_intervals_idx:16;
-	u16 ccid3or_loss_intervals_len;
 	u32 ccid3or_loss_event_rate;
 	u32 ccid3or_receive_rate;
 };
-- 
cgit v1.2.3-70-g09d2


From 3306c781ff13aea89606435c134ec84e3c608681 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Add packet type information to CCID-specific option parsing

This patch ...
 1. adds packet type information to ccid_hc_{rx,tx}_parse_options(). This is
    necessary, since table 3 in RFC 4340, 5.8 leaves it to the CCIDs to state
    which options may (not) appear on what packet type.

 2. adds such a check for CCID-3's {Loss Event, Receive} Rate as specified in
    RFC 4340 8.3 ("Receive Rate options MUST NOT be sent on DCCP-Data packets")
    and 8.5 ("Loss Event Rate options MUST NOT be sent on DCCP-Data packets").

 3. removes an unused argument `idx' from ccid_hc_{rx,tx}_parse_options(). This
    is also no longer necessary, since the CCID-specific option-parsing routines
    are passed every single parameter of the type-length-value option encoding.

Also added documentation and made argument naming scheme consistent.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccid.h        | 46 +++++++++++++++++++++++-----------------------
 net/dccp/ccids/ccid3.c | 14 ++++++++------
 net/dccp/options.c     | 16 ++++------------
 3 files changed, 35 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 20ba066b277..bce517c9999 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -60,18 +60,14 @@ struct ccid_operations {
 	void		(*ccid_hc_tx_exit)(struct sock *sk);
 	void		(*ccid_hc_rx_packet_recv)(struct sock *sk,
 						  struct sk_buff *skb);
-	int		(*ccid_hc_rx_parse_options)(struct sock *sk,
-						    unsigned char option,
-						    unsigned char len, u16 idx,
-						    unsigned char* value);
+	int		(*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
+						    u8 opt, u8 *val, u8 len);
 	int		(*ccid_hc_rx_insert_options)(struct sock *sk,
 						     struct sk_buff *skb);
 	void		(*ccid_hc_tx_packet_recv)(struct sock *sk,
 						  struct sk_buff *skb);
-	int		(*ccid_hc_tx_parse_options)(struct sock *sk,
-						    unsigned char option,
-						    unsigned char len, u16 idx,
-						    unsigned char* value);
+	int		(*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
+						    u8 opt, u8 *val, u8 len);
 	int		(*ccid_hc_tx_send_packet)(struct sock *sk,
 						  struct sk_buff *skb);
 	void		(*ccid_hc_tx_packet_sent)(struct sock *sk,
@@ -163,27 +159,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
 		ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
 }
 
+/**
+ * ccid_hc_tx_parse_options  -  Parse CCID-specific options sent by the receiver
+ * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
+ * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
+ * @val: value of @opt
+ * @len: length of @val in bytes
+ */
 static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
-					   unsigned char option,
-					   unsigned char len, u16 idx,
-					   unsigned char* value)
+					   u8 pkt, u8 opt, u8 *val, u8 len)
 {
-	int rc = 0;
-	if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
-		rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
-						    value);
-	return rc;
+	if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+		return 0;
+	return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
 }
 
+/**
+ * ccid_hc_rx_parse_options  -  Parse CCID-specific options sent by the sender
+ * Arguments are analogous to ccid_hc_tx_parse_options()
+ */
 static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
-					   unsigned char option,
-					   unsigned char len, u16 idx,
-					   unsigned char* value)
+					   u8 pkt, u8 opt, u8 *val, u8 len)
 {
-	int rc = 0;
-	if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
-		rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
-	return rc;
+	if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+		return 0;
+	return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
 }
 
 static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 12b601f11bf..4c422fb2189 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -483,9 +483,8 @@ done_computing_x:
 			   jiffies + usecs_to_jiffies(t_nfb));
 }
 
-static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
-				     unsigned char len, u16 idx,
-				     unsigned char *value)
+static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
+				     u8 option, u8 *optval, u8 optlen)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv = &hctx->options_received;
@@ -494,12 +493,15 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
 	switch (option) {
 	case TFRC_OPT_RECEIVE_RATE:
 	case TFRC_OPT_LOSS_EVENT_RATE:
-		if (unlikely(len != 4)) {
+		/* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
+		if (packet_type == DCCP_PKT_DATA)
+			break;
+		if (unlikely(optlen != 4)) {
 			DCCP_WARN("%s(%p), invalid len %d for %u\n",
-				  dccp_role(sk), sk, len, option);
+				  dccp_role(sk), sk, optlen, option);
 			return -EINVAL;
 		}
-		opt_val = ntohl(get_unaligned((__be32 *)value));
+		opt_val = ntohl(get_unaligned((__be32 *)optval));
 
 		if (option == TFRC_OPT_RECEIVE_RATE) {
 			opt_recv->ccid3or_receive_rate = opt_val;
diff --git a/net/dccp/options.c b/net/dccp/options.c
index fd51cc70c63..b102774694c 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -226,23 +226,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
 				      dccp_role(sk), elapsed_time);
 			break;
-		case 128 ... 191: {
-			const u16 idx = value - options;
-
+		case 128 ... 191:
 			if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
-						     opt, len, idx,
-						     value) != 0)
+						     pkt_type, opt, value, len))
 				goto out_invalid_option;
-		}
 			break;
-		case 192 ... 255: {
-			const u16 idx = value - options;
-
+		case 192 ... 255:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
-						     opt, len, idx,
-						     value) != 0)
+						     pkt_type, opt, value, len))
 				goto out_invalid_option;
-		}
 			break;
 		default:
 			DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
-- 
cgit v1.2.3-70-g09d2


From 535c55df136ad2783d444e54d518a8fae8bdbf79 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc/ccid-3: Computing Loss Rate from Loss Event Rate

This adds a function to take care of the following cases occurring in the
computation of the Loss Rate p:

 * 1/(2^32-1) is mapped into 0% as per RFC 4342, 8.5;
 * 1/0        is mapped into the maximum of 100%;
 * we want to avoid that p = 1/x is rounded down to 0 when x is very large,
   since this means accidentally re-entering slow-start (indicated by p==0).

In the last case, the minimum-resolution value of p is returned.

Furthermore, a bug in ccid3_hc_rx_getsockopt is fixed (1/0 was mapped into ~0U),
which now allows to consistently print the scaled p-values as

        printf("Loss Event Rate = %u.%04u %%\n", rx_info.tfrcrx_p / 10000,
                                                 rx_info.tfrcrx_p % 10000);

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c             |  9 ++++-----
 net/dccp/ccids/lib/tfrc.h          |  1 +
 net/dccp/ccids/lib/tfrc_equation.c | 17 +++++++++++++++--
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 4c422fb2189..206204551f4 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -410,10 +410,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	/* Update loss event rate (which is scaled by 1e6) */
 	pinv = opt_recv->ccid3or_loss_event_rate;
-	if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
+	if (pinv == 0)
 		hctx->p = 0;
-	else				       /* can not exceed 100% */
-		hctx->p = scaled_div(1, pinv);
+	else
+		hctx->p = tfrc_invert_loss_event_rate(pinv);
 
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
@@ -854,8 +854,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 			return -EINVAL;
 		rx_info.tfrcrx_x_recv = hcrx->x_recv;
 		rx_info.tfrcrx_rtt    = hcrx->rtt;
-		rx_info.tfrcrx_p      = hcrx->p_inverse == 0 ? ~0U :
-					   scaled_div(1, hcrx->p_inverse);
+		rx_info.tfrcrx_p      = tfrc_invert_loss_event_rate(hcrx->p_inverse);
 		len = sizeof(rx_info);
 		val = &rx_info;
 		break;
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed9857527ac..bb47146ac7d 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -58,6 +58,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
 
 extern u32  tfrc_calc_x(u16 s, u32 R, u32 p);
 extern u32  tfrc_calc_x_reverse_lookup(u32 fvalue);
+extern u32  tfrc_invert_loss_event_rate(u32 loss_event_rate);
 
 extern int  tfrc_tx_packet_history_init(void);
 extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29cffe..bc3dc2b2a84 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -658,7 +658,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 	result = scaled_div(s, R);
 	return scaled_div32(result, f);
 }
-
 EXPORT_SYMBOL_GPL(tfrc_calc_x);
 
 /**
@@ -693,5 +692,19 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 	index = tfrc_binsearch(fvalue, 0);
 	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
-
 EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
+
+/**
+ * tfrc_invert_loss_event_rate  -  Compute p so that 10^6 corresponds to 100%
+ * When @loss_event_rate is large, there is a chance that p is truncated to 0.
+ * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
+ */
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
+{
+	if (loss_event_rate == UINT_MAX)		/* see RFC 4342, 8.5 */
+		return 0;
+	if (unlikely(loss_event_rate == 0))		/* map 1/0 into 100% */
+		return 1000000;
+	return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
+}
+EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
-- 
cgit v1.2.3-70-g09d2


From ce177ae2e6b196659e93a9408cc1f5f13f206d13 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove redundant 'options_received' struct

The `options_received' struct is redundant, since it re-duplicates the existing
`p' and `x_recv' fields. This patch removes the sub-struct and migrates the
format conversion operations (cf. below) to ccid3_hc_tx_parse_options().

                     Why the fields are redundant
                     ----------------------------
The Loss Event Rate p and the Receive Rate x_recv are initially 0 when first
loading CCID-3, as ccid_new() zeroes out the entire ccid3_hc_tx_sock.

When Loss Event Rate or Receive Rate options are received, they are stored by
ccid3_hc_tx_parse_options() into the fields `ccid3or_loss_event_rate' and
`ccid3or_receive_rate' of the sub-struct `options_received' in ccid3_hc_tx_sock.

After parsing (considering only the established state - dccp_rcv_established()),
the packet is passed on to ccid_hc_tx_packet_recv(). This calls the CCID-3
specific routine ccid3_hc_tx_packet_recv(), which performs the following copy
operations between fields of ccid3_hc_tx_sock:

 * hctx->options_received.ccid3or_receive_rate is copied into hctx->x_recv,
   after scaling it for fixpoint arithmetic, by 2^64;
 * hctx->options_received.ccid3or_loss_event_rate is copied into hctx->p,
   considering the above special cases; in addition, a value of 0 here needs to
   be mapped into p=0 (when no Loss Event Rate option has been received yet).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 24 ++++++++----------------
 net/dccp/ccids/ccid3.h |  7 -------
 2 files changed, 8 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 206204551f4..e7db8a4ee64 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -370,11 +370,10 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct ccid3_options_received *opt_recv = &hctx->options_received;
 	struct tfrc_tx_hist_entry *acked;
 	ktime_t now;
 	unsigned long t_nfb;
-	u32 pinv, r_sample;
+	u32 r_sample;
 
 	/* we are only interested in ACKs */
 	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
@@ -404,17 +403,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	r_sample  = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
 	hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9);
 
-	/* Update receive rate in units of 64 * bytes/second */
-	hctx->x_recv = opt_recv->ccid3or_receive_rate;
-	hctx->x_recv <<= 6;
-
-	/* Update loss event rate (which is scaled by 1e6) */
-	pinv = opt_recv->ccid3or_loss_event_rate;
-	if (pinv == 0)
-		hctx->p = 0;
-	else
-		hctx->p = tfrc_invert_loss_event_rate(pinv);
-
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
 	 */
@@ -487,7 +475,6 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
 				     u8 option, u8 *optval, u8 optlen)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct ccid3_options_received *opt_recv = &hctx->options_received;
 	__be32 opt_val;
 
 	switch (option) {
@@ -504,11 +491,16 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
 		opt_val = ntohl(get_unaligned((__be32 *)optval));
 
 		if (option == TFRC_OPT_RECEIVE_RATE) {
-			opt_recv->ccid3or_receive_rate = opt_val;
+			/* Receive Rate is kept in units of 64 bytes/second */
+			hctx->x_recv = opt_val;
+			hctx->x_recv <<= 6;
+
 			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
 				       dccp_role(sk), sk, opt_val);
 		} else {
-			opt_recv->ccid3or_loss_event_rate = opt_val;
+			/* Update the fixpoint Loss Event Rate fraction */
+			hctx->p = tfrc_invert_loss_event_rate(opt_val);
+
 			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
 				       dccp_role(sk), sk, opt_val);
 		}
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 2268785ae8e..ae210786c89 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -70,11 +70,6 @@ enum ccid3_options {
 	TFRC_OPT_RECEIVE_RATE	 = 194,
 };
 
-struct ccid3_options_received {
-	u32 ccid3or_loss_event_rate;
-	u32 ccid3or_receive_rate;
-};
-
 /* TFRC sender states */
 enum ccid3_hc_tx_states {
 	TFRC_SSTATE_NO_SENT = 1,
@@ -101,7 +96,6 @@ enum ccid3_hc_tx_states {
  * @t_ld - Time last doubled during slow start
  * @t_nom - Nominal send time of next packet
  * @hist - Packet history
- * @options_received - Parsed set of retrieved options
  */
 struct ccid3_hc_tx_sock {
 	u64				x;
@@ -119,7 +113,6 @@ struct ccid3_hc_tx_sock {
 	ktime_t				t_ld;
 	ktime_t				t_nom;
 	struct tfrc_tx_hist_entry	*hist;
-	struct ccid3_options_received	options_received;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
-- 
cgit v1.2.3-70-g09d2


From f10ecaee6dc2c6d56783462b2a82e98bc81b55f4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Replace magic CCID-specific numbers by symbolic constants

The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but
instead for the CCID-specific options numbers are used.

This patch unifies the use of CCID-specific option numbers, by adding symbolic
names reflecting the definitions in RFC 4340, 10.3.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  6 ++++--
 net/dccp/options.c   | 13 +++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7434a8353e2..7187bd8a75f 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -165,8 +165,10 @@ enum {
 	DCCPO_TIMESTAMP_ECHO = 42,
 	DCCPO_ELAPSED_TIME = 43,
 	DCCPO_MAX = 45,
-	DCCPO_MIN_CCID_SPECIFIC = 128,
-	DCCPO_MAX_CCID_SPECIFIC = 255,
+	DCCPO_MIN_RX_CCID_SPECIFIC = 128,	/* from sender to receiver */
+	DCCPO_MAX_RX_CCID_SPECIFIC = 191,
+	DCCPO_MIN_TX_CCID_SPECIFIC = 192,	/* from receiver to sender */
+	DCCPO_MAX_TX_CCID_SPECIFIC = 255,
 };
 /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
 #define DCCP_SINGLE_OPT_MAXLEN	253
diff --git a/net/dccp/options.c b/net/dccp/options.c
index b102774694c..e1c94e2b8be 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		}
 
 		/*
-		 * CCID-Specific Options (from RFC 4340, sec. 10.3):
-		 *
-		 * Option numbers 128 through 191 are for options sent from the
-		 * HC-Sender to the HC-Receiver; option numbers 192 through 255
-		 * are for options sent from the HC-Receiver to	the HC-Sender.
-		 *
 		 * CCID-specific options are ignored during connection setup, as
 		 * negotiation may still be in progress (see RFC 4340, 10.3).
 		 * The same applies to Ack Vectors, as these depend on the CCID.
-		 *
 		 */
-		if (dreq != NULL && (opt >= 128 ||
+		if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
 		    opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
 			goto ignore_option;
 
@@ -226,12 +219,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
 				      dccp_role(sk), elapsed_time);
 			break;
-		case 128 ... 191:
+		case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
 			if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
 			break;
-		case 192 ... 255:
+		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
-- 
cgit v1.2.3-70-g09d2


From c506d91d9ab7681e058afcd750e9118c6cdaabc1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Unused argument in CCID tx function

This removes the argument `more' from ccid_hc_tx_packet_sent, since it was
nowhere used in the entire code.

(Anecdotally, this argument was not even used in the original KAME code where
 the function originally came from; compare the variable moreToSend in the
 freebsd61-dccp-kame-28.08.2006.patch now maintained by Emmanuel Lochin.)

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccid.h        | 6 +++---
 net/dccp/ccids/ccid2.c | 2 +-
 net/dccp/ccids/ccid3.c | 3 +--
 net/dccp/output.c      | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index bce517c9999..430a3c24f97 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -71,7 +71,7 @@ struct ccid_operations {
 	int		(*ccid_hc_tx_send_packet)(struct sock *sk,
 						  struct sk_buff *skb);
 	void		(*ccid_hc_tx_packet_sent)(struct sock *sk,
-						  int more, unsigned int len);
+						  unsigned int len);
 	void		(*ccid_hc_rx_get_info)(struct sock *sk,
 					       struct tcp_info *info);
 	void		(*ccid_hc_tx_get_info)(struct sock *sk,
@@ -139,10 +139,10 @@ static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
 }
 
 static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
-					  int more, unsigned int len)
+					  unsigned int len)
 {
 	if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
-		ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
+		ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
 }
 
 static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9728bbf0ace..f56ab68a4b7 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -221,7 +221,7 @@ static void ccid2_start_rto_timer(struct sock *sk)
 		       jiffies + hctx->rto);
 }
 
-static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
+static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index e7db8a4ee64..c1cc66edfad 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -356,8 +356,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
-				    unsigned int len)
+static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 9888f61f9b0..be65bc30e20 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -301,7 +301,7 @@ void dccp_write_xmit(struct sock *sk, int block)
 				dcb->dccpd_type = DCCP_PKT_DATA;
 
 			err = dccp_transmit_skb(sk, skb);
-			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
 			if (err)
 				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
 					 err);
-- 
cgit v1.2.3-70-g09d2


From 5fe94963a163fecc34b7b51bf2ca525f9f50d7bf Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove duplicate documentation

This removes RX-socket documentation which is either duplicate or non-existent.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index ae210786c89..de26db82d65 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -131,16 +131,12 @@ enum ccid3_hc_rx_states {
 
 /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
  *
- *  @x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
- *  @rtt  -  Receiver estimate of rtt (non-standard)
- *  @p  -  Current loss event rate (RFC 3448 5.4)
  *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @state  -  Receiver state, one of %ccid3_hc_rx_states
  *  @bytes_recv  -  Total sum of DCCP payload bytes
  *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
  *  @rtt  -  Receiver estimate of RTT
  *  @tstamp_last_feedback  -  Time at which last feedback was sent
- *  @tstamp_last_ack  -  Time at which last feedback was sent
  *  @hist  -  Packet history (loss detection + RTT sampling)
  *  @li_hist  -  Loss Interval database
  *  @s  -  Received packet size in bytes
-- 
cgit v1.2.3-70-g09d2


From d0995e6a9e3328cdc76b4c45882dee118284f960 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove dead states

This patch is thanks to an investigation by Leandro Sales de Melo and his
colleagues. They worked out two state diagrams which highlight the fact that
the xxx_TERM states in CCID-3/4 are in fact not necessary.

And this can be confirmed by in turn looking at the code: the xxx_TERM states
are only ever set in ccid3_hc_{rx,tx}_exit(). These two functions are part
of the following call chain:

 * ccid_hc_{tx,rx}_exit() are called from ccid_delete() only;
 * ccid_delete() invokes ccid_hc_{tx,rx}_exit() in the way of a destructor:
   after calling ccid_hc_{tx,rx}_exit(), the CCID is released from memory;
 * ccid_delete() is in turn called only by ccid_hc_{tx,rx}_delete();
 * ccid_hc_{tx,rx}_delete() is called only if
   - feature negotiation failed   (dccp_feat_activate_values()),
   - when changing the RX/TX CCID (to eject the current CCID),
   - when destroying the socket   (in dccp_destroy_sock()).

In other words, when CCID-3 sets the state to xxx_TERM, it is at a time where
no more processing should be going on, hence it is not necessary to introduce
a dedicated exit state - this is implicit when unloading the CCID.

The patch removes this state, one switch-statement collapses as a result.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 37 +++++++++----------------------------
 net/dccp/ccids/ccid3.h |  2 --
 2 files changed, 9 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c1cc66edfad..0751a8fa936 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -56,7 +56,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
 	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
 	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
 	[TFRC_SSTATE_FBACK]    = "FBACK",
-	[TFRC_SSTATE_TERM]     = "TERM",
 	};
 
 	return ccid3_state_names[state];
@@ -210,10 +209,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hctx->state));
 
+	/* Ignore and do not restart after leaving the established state */
+	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
+		goto out;
+
+	/* Reset feedback state to "no feedback received" */
 	if (hctx->state == TFRC_SSTATE_FBACK)
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-	else if (hctx->state != TFRC_SSTATE_NO_FBACK)
-		goto out;
 
 	/*
 	 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
@@ -288,8 +290,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(skb->len == 0))
 		return -EBADMSG;
 
-	switch (hctx->state) {
-	case TFRC_SSTATE_NO_SENT:
+	if (hctx->state == TFRC_SSTATE_NO_SENT) {
 		sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies +
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
 		hctx->last_win_count   = 0;
@@ -324,9 +325,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		ccid3_update_send_interval(hctx);
 
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-		break;
-	case TFRC_SSTATE_NO_FBACK:
-	case TFRC_SSTATE_FBACK:
+
+	} else {
 		delay = ktime_us_delta(hctx->t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
@@ -341,10 +341,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			return (u32)delay / USEC_PER_MSEC;
 
 		ccid3_hc_tx_update_win_count(hctx, now);
-		break;
-	case TFRC_SSTATE_TERM:
-		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
-		return -EINVAL;
 	}
 
 	/* prepare to send now (add options etc.) */
@@ -378,11 +374,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
 	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
 		return;
-	/* ... and only in the established state */
-	if (hctx->state != TFRC_SSTATE_FBACK &&
-	    hctx->state != TFRC_SSTATE_NO_FBACK)
-		return;
-
 	/*
 	 * Locate the acknowledged packet in the TX history.
 	 *
@@ -522,9 +513,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
-	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
 	sk_stop_timer(sk, &hctx->no_feedback_timer);
-
 	tfrc_tx_hist_purge(&hctx->hist);
 }
 
@@ -583,7 +572,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 	static char *ccid3_rx_state_names[] = {
 	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
 	[TFRC_RSTATE_DATA]    = "DATA",
-	[TFRC_RSTATE_TERM]    = "TERM",
 	};
 
 	return ccid3_rx_state_names[state];
@@ -609,14 +597,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	ktime_t now;
+	ktime_t now = ktime_get_real();
 	s64 delta = 0;
 
-	if (unlikely(hcrx->state == TFRC_RSTATE_TERM))
-		return;
-
-	now = ktime_get_real();
-
 	switch (fbtype) {
 	case CCID3_FBACK_INITIAL:
 		hcrx->x_recv = 0;
@@ -819,8 +802,6 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 
-	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
-
 	tfrc_rx_hist_purge(&hcrx->hist);
 	tfrc_lh_cleanup(&hcrx->li_hist);
 }
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index de26db82d65..7884159937a 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -75,7 +75,6 @@ enum ccid3_hc_tx_states {
 	TFRC_SSTATE_NO_SENT = 1,
 	TFRC_SSTATE_NO_FBACK,
 	TFRC_SSTATE_FBACK,
-	TFRC_SSTATE_TERM,
 };
 
 /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
@@ -126,7 +125,6 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
 enum ccid3_hc_rx_states {
 	TFRC_RSTATE_NO_DATA = 1,
 	TFRC_RSTATE_DATA,
-	TFRC_RSTATE_TERM    = 127,
 };
 
 /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
-- 
cgit v1.2.3-70-g09d2


From 2975abd251d795810932b20354729ba236d95bf9 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Schedule an Ack when receiving timestamps

This schedules an Ack when receiving a timestamp, exploiting the
existing inet_csk_schedule_ack() function, saving one case in the
`dccp_ack_pending()' function.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h    | 2 +-
 net/dccp/options.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index c7370de4c4d..f9ed0cbd1bf 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -431,7 +431,7 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 static inline int dccp_ack_pending(const struct sock *sk)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
-	return dp->dccps_timestamp_echo != 0 ||
+	return
 #ifdef CONFIG_IP_DCCP_ACKVEC
 	       (dp->dccps_hc_rx_ackvec != NULL &&
 		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
diff --git a/net/dccp/options.c b/net/dccp/options.c
index e1c94e2b8be..9fe0510f402 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -163,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 				      dccp_role(sk), ntohl(opt_val),
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
+			/* schedule an Ack in case this sender is quiescent */
+			inet_csk_schedule_ack(sk);
 			break;
 		case DCCPO_TIMESTAMP_ECHO:
 			if (len != 4 && len != 6 && len != 8)
-- 
cgit v1.2.3-70-g09d2


From bfbddd085a5bced6efb9e1bc4d029438f9639784 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Fix the adjustments to AWL and SWL

This fixes a problem and a potential loophole with regard to seqno/ackno
validity: the problem is that the initial adjustments to AWL/SWL were
only performed at the begin of the connection, during the handshake.

Since the Sequence Window feature is always greater than Wmin=32 (7.5.2),
it is however necessary to perform these adjustments at least for the first
W/W' (variables as per 7.5.1) packets in the lifetime of a connection.

This requirement is complicated by the fact that W/W' can change at any time
during the lifetime of a connection.

Therefore the consequence is to perform this safety check each time SWL/AWL
are updated.

A second problem solved by this patch is that the remote/local Sequence Window
feature values (which set the bounds for AWL/SWL/SWH) are undefined until the
feature negotiation has completed.

During the initial handshake we have more stringent sequence number protection,
the changes added by this patch effect that {A,S}W{L,H} are within the correct
bounds at the instant that feature negotiation completes (since the SeqWin
feature activation handlers call dccp_update_gsr/gss()).

A detailed rationale is below -- can be removed from the commit message.


1. Server sequence number checks during initial handshake
---------------------------------------------------------
The server can not use the fields of the listening socket for seqno/ackno checks
and thus needs to store all relevant information on a per-connection basis on
the dccp_request socket. This is a size-constrained structure and has currently
only ISS (dreq_iss) and ISR (dreq_isr) defined.
Adding further fields (SW{L,H}, AW{L,H}) would increase the size of the struct
and it is questionable whether this will have any practical gain. The currently
implemented solution is as follows.
 * receiving first Request: dccp_v{4,6}_conn_request sets
                            ISR := P.seqno, ISS := dccp_v{4,6}_init_sequence()

 * sending first Response:  dccp_v{4,6}_send_response via dccp_make_response()
                            sets P.seqno := ISS, sets P.ackno := ISR

 * receiving retransmitted Request: dccp_check_req() overrides ISR := P.seqno

 * answering retransmitted Request: dccp_make_response() sets ISS += 1,
                                    otherwise as per first Response

 * completing the handshake: succeeds in dccp_check_req() for the first Ack
                             where P.ackno == ISS (P.seqno is not tested)

 * creating child socket: ISS, ISR are copied from the request_sock

This solution will succeed whenever the server can receive the Request and the
subsequent Ack in succession, without retransmissions. If there is packet loss,
the client needs to retransmit until this condition succeeds; it will otherwise
eventually give up. Adding further fields to the request_sock could increase
the robustness a bit, in that it would make possible to let a reordered Ack
(from a retransmitted Response) pass. The argument against such a solution is
that if the packet loss is not persistent and an Ack gets through, why not
wait for the one answering the original response: if the loss is persistent, it
is probably better to not start the connection in the first place.

Long story short: the present design (by Arnaldo) is simple and will likely work
just as well as a more complicated solution. As a consequence, {A,S}W{L,H} are
not needed until the moment the request_sock is cloned into the accept queue.

At that stage feature negotiation has completed, so that the values for the local
and remote Sequence Window feature (7.5.2) are known, i.e. we are now in a better
position to compute {A,S}W{L,H}.


2. Client sequence number checks during initial handshake
---------------------------------------------------------
Until entering PARTOPEN the client does not need the adjustments, since it
constrains the Ack window to the packet it sent.

 * sending first Request: dccp_v{4,6}_connect() choose ISS,
                          dccp_connect() then sets GAR := ISS (as per 8.5),
			  dccp_transmit_skb() (with the previous bug fix) sets
			         GSS := ISS, AWL := ISS, AWH := GSS
 * n-th retransmitted Request (with previous patch):
	                  dccp_retransmit_skb() via timer calls
			  dccp_transmit_skb(), which sets GSS := ISS+n
                          and then AWL := ISS, AWH := ISS+n

 * receiving any Response: dccp_rcv_request_sent_state_process()
	                   -- accepts packet if AWL <= P.ackno <= AWH;
			   -- sets GSR = ISR = P.seqno

 * sending the Ack completing the handshake: dccp_send_ack() calls
                           dccp_transmit_skb(), which sets GSS += 1
			   and AWL := ISS, AWH := GSS


Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h      | 20 ++++++++++++++++++++
 net/dccp/input.c     | 18 ++++++------------
 net/dccp/minisocks.c | 30 +++++++++---------------------
 3 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f9ed0cbd1bf..e4d6e76ced4 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -415,6 +415,23 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
 	dp->dccps_gsr = seq;
 	/* Sequence validity window depends on remote Sequence Window (7.5.1) */
 	dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
+	/*
+	 * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
+	 * 7.5.1 we perform this check beyond the initial handshake: W/W' are
+	 * always > 32, so for the first W/W' packets in the lifetime of a
+	 * connection we always have to adjust SWL.
+	 * A second reason why we are doing this is that the window depends on
+	 * the feature-remote value of Sequence Window: nothing stops the peer
+	 * from updating this value while we are busy adjusting SWL for the
+	 * first W packets (we would have to count from scratch again then).
+	 * Therefore it is safer to always make sure that the Sequence Window
+	 * is not artificially extended by a peer who grows SWL downwards by
+	 * continually updating the feature-remote Sequence-Window.
+	 * If sequence numbers wrap it is bad luck. But that will take a while
+	 * (48 bit), and this measure prevents Sequence-number attacks.
+	 */
+	if (before48(dp->dccps_swl, dp->dccps_isr))
+		dp->dccps_swl = dp->dccps_isr;
 	dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
 }
 
@@ -425,6 +442,9 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 	dp->dccps_gss = seq;
 	/* Ack validity window depends on local Sequence Window value (7.5.1) */
 	dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
+	/* Adjust AWL so that it is not below ISS - see comment above for SWL */
+	if (before48(dp->dccps_awl, dp->dccps_iss))
+		dp->dccps_awl = dp->dccps_iss;
 	dp->dccps_awh = dp->dccps_gss;
 }
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 5eb443f656c..e3f43d55e3c 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -440,20 +440,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
 
-		dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
-		dccp_update_gsr(sk, dp->dccps_isr);
 		/*
-		 * SWL and AWL are initially adjusted so that they are not less than
-		 * the initial Sequence Numbers received and sent, respectively:
-		 *	SWL := max(GSR + 1 - floor(W/4), ISR),
-		 *	AWL := max(GSS - W' + 1, ISS).
-		 * These adjustments MUST be applied only at the beginning of the
-		 * connection.
-		 *
-		 * AWL was adjusted in dccp_v4_connect -acme
+		 * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
+		 * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
+		 * is done as part of activating the feature values below, since
+		 * these settings depend on the local/remote Sequence Window
+		 * features, which were undefined or not confirmed until now.
 		 */
-		dccp_set_seqno(&dp->dccps_swl,
-			       max48(dp->dccps_swl, dp->dccps_isr));
+		dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
 
 		dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 0ecb19c5e8c..f4d9c8f60ed 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -120,30 +120,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		 *
 		 *    Choose S.ISS (initial seqno) or set from Init Cookies
 		 *    Initialize S.GAR := S.ISS
-		 *    Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
-		 */
-		newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
-		dccp_update_gss(newsk, dreq->dreq_iss);
-
-		newdp->dccps_isr = dreq->dreq_isr;
-		dccp_update_gsr(newsk, dreq->dreq_isr);
-
-		/*
-		 * SWL and AWL are initially adjusted so that they are not less than
-		 * the initial Sequence Numbers received and sent, respectively:
-		 *	SWL := max(GSR + 1 - floor(W/4), ISR),
-		 *	AWL := max(GSS - W' + 1, ISS).
-		 * These adjustments MUST be applied only at the beginning of the
-		 * connection.
+		 *    Set S.ISR, S.GSR from packet (or Init Cookies)
+		 *
+		 *    Setting AWL/AWH and SWL/SWH happens as part of the feature
+		 *    activation below, as these windows all depend on the local
+		 *    and remote Sequence Window feature values (7.5.2).
 		 */
-		dccp_set_seqno(&newdp->dccps_swl,
-			       max48(newdp->dccps_swl, newdp->dccps_isr));
-		dccp_set_seqno(&newdp->dccps_awl,
-			       max48(newdp->dccps_awl, newdp->dccps_iss));
+		newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
+		newdp->dccps_gar = newdp->dccps_iss;
+		newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
 
 		/*
-		 * Activate features after initialising the sequence numbers,
-		 * since CCID initialisation may depend on GSS, ISR, ISS etc.
+		 * Activate features: initialise CCIDs, sequence windows etc.
 		 */
 		if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
 			/* It is still raw copy of parent, so invalidate
-- 
cgit v1.2.3-70-g09d2


From a9c1656ab10480cc6f6d34f193bcde2729fe8037 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Merge now-reduced connect_init() function

After moving the assignment of GAR/ISS from dccp_connect_init() to
dccp_transmit_skb(), the former function becomes very small, so that
a merger with dccp_connect() suggests itself.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/output.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index be65bc30e20..1b316830758 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -471,8 +471,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
 /*
  * Do all connect socket setups that can be done AF independent.
  */
-static inline void dccp_connect_init(struct sock *sk)
+int dccp_connect(struct sock *sk)
 {
+	struct sk_buff *skb;
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -482,22 +483,12 @@ static inline void dccp_connect_init(struct sock *sk)
 
 	dccp_sync_mss(sk, dst_mtu(dst));
 
-	/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
-	dp->dccps_gar = dp->dccps_iss;
-
-	icsk->icsk_retransmits = 0;
-}
-
-int dccp_connect(struct sock *sk)
-{
-	struct sk_buff *skb;
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
 	/* do not connect if feature negotiation setup fails */
 	if (dccp_feat_finalise_settings(dccp_sk(sk)))
 		return -EPROTO;
 
-	dccp_connect_init(sk);
+	/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
+	dp->dccps_gar = dp->dccps_iss;
 
 	skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
 	if (unlikely(skb == NULL))
@@ -513,6 +504,7 @@ int dccp_connect(struct sock *sk)
 	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the REQUEST until an answer. */
+	icsk->icsk_retransmits = 0;
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 				  icsk->icsk_rto, DCCP_RTO_MAX);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From b8c6bcee1dbc1aadcd67af998e414e73fa166a7d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Reduce noise in output and convert to ktime_t

This fixes the problem that dccp_probe output can grow quite large without
apparent benefit (many identical data points), creating huge files (up to
over one Gigabyte for a few minutes' test run) which are very hard to
post-process (in one instance it got so bad that gnuplot ate up all memory
plus swap).

The cause for the problem is that the kprobe is inserted into dccp_sendmsg(),
which can be called in a polling-mode (whenever the TX queue is full due to
congestion-control issues, EAGAIN is returned). This creates many very
similar data points, i.e. the increase of processing time does not increase
the quality/information of the probe output.

The fix is to attach the probe to a different function -- write_xmit was
chosen since it gets called continually (both via userspace and timer);
an input-path function would stop sampling as soon as the other end stops
sending feedback.

For comparison the output file sizes for the same 20 second test
run over a lossy link:
           * before / without patch:  118   Megabytes
           * after  / with patch:       1.2 Megabytes
and there was much less noise in the output.

To allow backward compatibility with scripts that people use, the now-unused
`size' field in the output has been replaced with the CCID identifier. This
also serves for future compatibility - support for CCID2 is work in progress
(depends on the still unfinished SRTT/RTTVAR updates).

While at it, the update to ktime_t was also performed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
 net/dccp/probe.c | 66 +++++++++++++++++++++-----------------------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index a87fd4fc1b3..eaa59d82ab0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -46,70 +46,54 @@ static struct {
 	struct kfifo	  *fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
-	struct timespec	  tstart;
+	ktime_t		  start;
 } dccpw;
 
-static void printl(const char *fmt, ...)
-{
-	va_list args;
-	int len;
-	struct timespec now;
-	char tbuf[256];
-
-	va_start(args, fmt);
-	getnstimeofday(&now);
-
-	now = timespec_sub(now, dccpw.tstart);
-
-	len = sprintf(tbuf, "%lu.%06lu ",
-		      (unsigned long) now.tv_sec,
-		      (unsigned long) now.tv_nsec / NSEC_PER_USEC);
-	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
-	va_end(args);
-
-	kfifo_put(dccpw.fifo, tbuf, len);
-	wake_up(&dccpw.wait);
-}
-
-static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t size)
+static void jdccp_write_xmit(struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	struct ccid3_hc_tx_sock *hctx = NULL;
+	struct timespec tv;
+	char buf[256];
+	int len, ccid = ccid_get_current_tx_ccid(dccp_sk(sk));
 
-	if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
+	if (ccid == DCCPC_CCID3)
 		hctx = ccid3_hc_tx_sk(sk);
 
-	if (port == 0 || ntohs(inet->dport) == port ||
-	    ntohs(inet->sport) == port) {
-		if (hctx)
-			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
-			       "%llu %llu %d\n",
+	if (!port || ntohs(inet->dport) == port || ntohs(inet->sport) == port) {
+
+		tv  = ktime_to_timespec(ktime_sub(ktime_get(), dccpw.start));
+		len = sprintf(buf, "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u %d",
+			       (unsigned long)tv.tv_sec,
+			       (unsigned long)tv.tv_nsec,
 			       NIPQUAD(inet->saddr), ntohs(inet->sport),
-			       NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), ccid);
+
+		if (hctx)
+			len += sprintf(buf + len, " %d %d %d %u %u %u %d",
 			       hctx->s, hctx->rtt, hctx->p, hctx->x_calc,
-			       hctx->x_recv >> 6, hctx->x >> 6, hctx->t_ipi);
-		else
-			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
-			       NIPQUAD(inet->saddr), ntohs(inet->sport),
-			       NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+			       (unsigned)(hctx->x_recv >> 6),
+			       (unsigned)(hctx->x >> 6), hctx->t_ipi);
+
+		len += sprintf(buf + len, "\n");
+		kfifo_put(dccpw.fifo, buf, len);
+		wake_up(&dccpw.wait);
 	}
 
 	jprobe_return();
-	return 0;
 }
 
 static struct jprobe dccp_send_probe = {
 	.kp	= {
-		.symbol_name = "dccp_sendmsg",
+		.symbol_name = "dccp_write_xmit",
 	},
-	.entry	= jdccp_sendmsg,
+	.entry	= jdccp_write_xmit,
 };
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
 	kfifo_reset(dccpw.fifo);
-	getnstimeofday(&dccpw.tstart);
+	dccpw.start = ktime_get();
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ff49e27089ec363b7fc3849504e0435d447ab18a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Ack Vector interface clean-up

This patch brings the Ack Vector interface up to date. Its main purpose is
to lay the basis for the subsequent patches of this set, which will use the
new data structure fields and routines.

There are no real algorithmic changes, rather an adaptation:

 (1) Replaced the static Ack Vector size (2) with a #define so that it can
     be adapted (with low loss / Ack Ratio, a value of 1 works, so 2 seems
     to be sufficient for the moment) and added a solution so that computing
     the ECN nonce will continue to work - even with larger Ack Vectors.

 (2) Replaced the #defines for Ack Vector states with a complete enum.

 (3) Replaced #defines to compute Ack Vector length and state with general
     purpose routines (inlines), and updated code to use these.

 (4) Added a `tail' field (conversion to circular buffer in subsequent patch).

 (5) Updated the (outdated) documentation for Ack Vector struct.

 (6) All sequence number containers now trimmed to 48 bits.

 (7) Removal of unused bits:
     * removed dccpav_ack_nonce from struct dccp_ackvec, since this is already
       redundantly stored in the `dccpavr_ack_nonce' (of Ack Vector record);
     * removed Elapsed Time for Ack Vectors (it was nowhere used);
     * replaced semantics of dccpavr_sent_len with dccpavr_ack_runlen, since
       the code needs to be able to remember the old run length;
     * reduced the de-/allocation routines (redundant / duplicate tests).


Justification for removing Elapsed Time information [can be removed]:
---------------------------------------------------------------------
 1. The Elapsed Time information for Ack Vectors was nowhere used in the code.
 2. DCCP does not implement rate-based pacing of acknowledgments. The only
    recommendation for always including Elapsed Time is in section 11.3 of
    RFC 4340: "Receivers that rate-pace acknowledgements SHOULD [...]
    include Elapsed Time options". But such is not the case here.
 3. It does not really improve estimation accuracy. The Elapsed Time field only
    records the time between the arrival of the last acknowledgeable packet and
    the time the Ack Vector is sent out. Since Linux does not (yet) implement
    delayed Acks, the time difference will typically be small, since often the
    arrival of a data packet triggers sending feedback at the HC-receiver.


Justification for changes in de-/allocation routines [can be removed]:
----------------------------------------------------------------------
  * INIT_LIST_HEAD in dccp_ackvec_record_new was redundant, since the list
    pointers were later overwritten when the node was added via list_add();
  * dccp_ackvec_record_new() was called in a single place only;
  * calls to list_del_init() before calling dccp_ackvec_record_delete() were
    redundant, since subsequently the entire element was k-freed;
  * since all calls to dccp_ackvec_record_delete() were preceded to a call to
    list_del_init(), the WARN_ON test would never evaluate to true;
  * since all calls to dccp_ackvec_record_delete() were made from within
    list_for_each_entry_safe(), the test for avr == NULL was redundant;
  * list_empty() in ackvec_free was redundant, since the same condition is
    embedded in the loop condition of the subsequent list_for_each_entry_safe().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c      | 178 ++++++++++++++++++-------------------------------
 net/dccp/ackvec.h      | 103 +++++++++++++++-------------
 net/dccp/ccids/ccid2.c |  13 ++--
 net/dccp/input.c       |   6 +-
 4 files changed, 127 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01e4d39fa23..85ad70cfba5 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/ackvec.c
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  *
  *      This program is free software; you can redistribute it and/or modify it
@@ -23,24 +24,32 @@
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
-static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 {
-	struct dccp_ackvec_record *avr =
-			kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
+
+	if (av != NULL) {
+		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		INIT_LIST_HEAD(&av->av_records);
+	}
+	return av;
+}
 
-	if (avr != NULL)
-		INIT_LIST_HEAD(&avr->avr_node);
+static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
+{
+	struct dccp_ackvec_record *cur, *next;
 
-	return avr;
+	list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
+		kmem_cache_free(dccp_ackvec_record_slab, cur);
+	INIT_LIST_HEAD(&av->av_records);
 }
 
-static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
+void dccp_ackvec_free(struct dccp_ackvec *av)
 {
-	if (unlikely(avr == NULL))
-		return;
-	/* Check if deleting a linked record */
-	WARN_ON(!list_empty(&avr->avr_node));
-	kmem_cache_free(dccp_ackvec_record_slab, avr);
+	if (likely(av != NULL)) {
+		dccp_ackvec_purge_records(av);
+		kmem_cache_free(dccp_ackvec_slab, av);
+	}
 }
 
 static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
@@ -68,24 +77,16 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts, i;
-	u32 elapsed_time;
+	u16 len = av->av_vec_len + 2 * nr_opts;
+	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
 	struct dccp_ackvec_record *avr;
-	suseconds_t delta;
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
 		return -1;
 
-	delta = ktime_us_delta(ktime_get_real(), av->av_time);
-	elapsed_time = delta / 10;
-
-	if (elapsed_time != 0 &&
-	    dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
-		return -1;
-
-	avr = dccp_ackvec_record_new();
+	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 	if (avr == NULL)
 		return -1;
 
@@ -94,7 +95,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	to   = skb_push(skb, len);
 	len  = av->av_vec_len;
 	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
+	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
@@ -102,7 +103,13 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 		if (len > DCCP_SINGLE_OPT_MAXLEN)
 			copylen = DCCP_SINGLE_OPT_MAXLEN;
 
-		*to++ = DCCPO_ACK_VECTOR_0;
+		/*
+		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+		 * its type; ack_nonce is the sum of all individual buf_nonce's.
+		 */
+		nonce ^= av->av_buf_nonce[i];
+
+		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
 		*to++ = copylen + 2;
 
 		/* Check if buf_head wraps */
@@ -123,75 +130,24 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/*
-	 *	From RFC 4340, A.2:
-	 *
-	 *	For each acknowledgement it sends, the HC-Receiver will add an
-	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
-	 *	sequence number it used for the ack packet; ack_ptr will equal
-	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
-	 *	equal buf_nonce.
+	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-	avr->avr_ack_ptr   = av->av_buf_head;
-	avr->avr_ack_ackno = av->av_buf_ackno;
-	avr->avr_ack_nonce = av->av_buf_nonce;
-	avr->avr_sent_len  = av->av_vec_len;
+	avr->avr_ack_seqno  = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_ptr    = av->av_buf_head;
+	avr->avr_ack_ackno  = av->av_buf_ackno;
+	avr->avr_ack_nonce  = nonce;
+	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
 
 	dccp_ackvec_insert_avr(av, avr);
 
 	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
 		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->avr_sent_len,
+		      dccp_role(sk), avr->avr_ack_runlen,
 		      (unsigned long long)avr->avr_ack_seqno,
 		      (unsigned long long)avr->avr_ack_ackno);
 	return 0;
 }
 
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
-
-	if (av != NULL) {
-		av->av_buf_head	 = DCCP_MAX_ACKVEC_LEN - 1;
-		av->av_buf_ackno = UINT48_MAX + 1;
-		av->av_buf_nonce = 0;
-		av->av_time	 = ktime_set(0, 0);
-		av->av_vec_len	 = 0;
-		INIT_LIST_HEAD(&av->av_records);
-	}
-
-	return av;
-}
-
-void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-	if (unlikely(av == NULL))
-		return;
-
-	if (!list_empty(&av->av_records)) {
-		struct dccp_ackvec_record *avr, *next;
-
-		list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
-			list_del_init(&avr->avr_node);
-			dccp_ackvec_record_delete(avr);
-		}
-	}
-
-	kmem_cache_free(dccp_ackvec_slab, av);
-}
-
-static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
-}
-
-static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
-}
-
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
@@ -204,7 +160,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 	unsigned int gap;
 	long new_head;
 
-	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
+	if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
 		return -ENOBUFS;
 
 	gap	 = packets - 1;
@@ -212,18 +168,18 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 
 	if (new_head < 0) {
 		if (gap > 0) {
-			memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			memset(av->av_buf, DCCPAV_NOT_RECEIVED,
 			       gap + new_head + 1);
 			gap = -new_head;
 		}
-		new_head += DCCP_MAX_ACKVEC_LEN;
+		new_head += DCCPAV_MAX_ACKVEC_LEN;
 	}
 
 	av->av_buf_head = new_head;
 
 	if (gap > 0)
 		memset(av->av_buf + av->av_buf_head + 1,
-		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+		       DCCPAV_NOT_RECEIVED, gap);
 
 	av->av_buf[av->av_buf_head] = state;
 	av->av_vec_len += packets;
@@ -236,6 +192,8 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		    const u64 ackno, const u8 state)
 {
+	u8 *cur_head = av->av_buf + av->av_buf_head,
+	   *buf_end  = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 	/*
 	 * Check at the right places if the buffer is full, if it is, tell the
 	 * caller to start dropping packets till the HC-Sender acks our ACK
@@ -260,7 +218,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 
 	/* See if this is the first ackno being inserted */
 	if (av->av_vec_len == 0) {
-		av->av_buf[av->av_buf_head] = state;
+		*cur_head = state;
 		av->av_vec_len = 1;
 	} else if (after48(ackno, av->av_buf_ackno)) {
 		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
@@ -269,10 +227,9 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 * Look if the state of this packet is the same as the
 		 * previous ackno and if so if we can bump the head len.
 		 */
-		if (delta == 1 &&
-		    dccp_ackvec_state(av, av->av_buf_head) == state &&
-		    dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
-			av->av_buf[av->av_buf_head]++;
+		if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
+		    dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
+			*cur_head += 1;
 		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
 			return -ENOBUFS;
 	} else {
@@ -285,21 +242,17 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
-		u32 index = av->av_buf_head;
 
 		while (1) {
-			const u8 len = dccp_ackvec_len(av, index);
-			const u8 av_state = dccp_ackvec_state(av, index);
+			const u8 len = dccp_ackvec_runlen(cur_head);
 			/*
 			 * valid packets not yet in av_buf have a reserved
 			 * entry, with a len equal to 0.
 			 */
-			if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
-			    len == 0 && delta == 0) { /* Found our
-							 reserved seat! */
+			if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
 				dccp_pr_debug("Found %llu reserved seat!\n",
 					      (unsigned long long)ackno);
-				av->av_buf[index] = state;
+				*cur_head = state;
 				goto out;
 			}
 			/* len == 0 means one packet */
@@ -307,13 +260,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 				goto out_duplicate;
 
 			delta -= len + 1;
-			if (++index == DCCP_MAX_ACKVEC_LEN)
-				index = 0;
+			if (++cur_head == buf_end)
+				cur_head = av->av_buf;
 		}
 	}
 
 	av->av_buf_ackno = ackno;
-	av->av_time = ktime_get_real();
 out:
 	return 0;
 
@@ -333,13 +285,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 	if (av->av_buf_head <= avr->avr_ack_ptr)
 		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
 	else
-		av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
+		av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
 				 av->av_buf_head + avr->avr_ack_ptr;
 
 	/* free records */
 	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del_init(&avr->avr_node);
-		dccp_ackvec_record_delete(avr);
+		list_del(&avr->avr_node);
+		kmem_cache_free(dccp_ackvec_record_slab, avr);
 	}
 }
 
@@ -357,7 +309,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 		if (ackno == avr->avr_ack_seqno) {
 			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
 				      "ack_ackno=%llu, ACKED!\n",
-				      dccp_role(sk), 1,
+				      dccp_role(sk), avr->avr_ack_runlen,
 				      (unsigned long long)avr->avr_ack_seqno,
 				      (unsigned long long)avr->avr_ack_ackno);
 			dccp_ackvec_throw_record(av, avr);
@@ -387,7 +339,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 	 */
 	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
 	while (i--) {
-		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		const u8 rl = dccp_ackvec_runlen(vector);
 		u64 ackno_end_rl;
 
 		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
@@ -404,8 +356,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 		break;
 found:
 		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
-			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
-			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+			if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
 				dccp_pr_debug("%s ACK vector 0, len=%d, "
 					      "ack_seqno=%llu, ack_ackno=%llu, "
 					      "ACKED!\n",
@@ -448,10 +399,9 @@ int __init dccp_ackvec_init(void)
 	if (dccp_ackvec_slab == NULL)
 		goto out_err;
 
-	dccp_ackvec_record_slab =
-			kmem_cache_create("dccp_ackvec_record",
-					  sizeof(struct dccp_ackvec_record),
-					  0, SLAB_HWCACHE_ALIGN, NULL);
+	dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
+					     sizeof(struct dccp_ackvec_record),
+					     0, SLAB_HWCACHE_ALIGN, NULL);
 	if (dccp_ackvec_record_slab == NULL)
 		goto out_destroy_slab;
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 1c10814fdf7..df18f9030db 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,9 +3,9 @@
 /*
  *  net/dccp/ackvec.h
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
@@ -13,75 +13,84 @@
 
 #include <linux/dccp.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
+/*
+ * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
+ * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
+ * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
+ * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ */
+#define DCCPAV_NUM_ACKVECS	2
+#define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
 
 /* Estimated minimum average Ack Vector length - used for updating MPS */
 #define DCCPAV_MIN_OPTLEN	16
 
-#define DCCP_ACKVEC_STATE_RECEIVED	0
-#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
-#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
+enum dccp_ackvec_states {
+	DCCPAV_RECEIVED =	0x00,
+	DCCPAV_ECN_MARKED =	0x40,
+	DCCPAV_RESERVED =	0x80,
+	DCCPAV_NOT_RECEIVED =	0xC0
+};
+#define DCCPAV_MAX_RUNLEN	0x3F
 
-#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
-#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
+static inline u8 dccp_ackvec_runlen(const u8 *cell)
+{
+	return *cell & DCCPAV_MAX_RUNLEN;
+}
 
-/** struct dccp_ackvec - ack vector
- *
- * This data structure is the one defined in RFC 4340, Appendix A.
- *
- * @av_buf_head - circular buffer head
- * @av_buf_tail - circular buffer tail
- * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
- *		       buffer (i.e. %av_buf_head)
- * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * 		       by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
+static inline u8 dccp_ackvec_state(const u8 *cell)
+{
+	return *cell & ~DCCPAV_MAX_RUNLEN;
+}
+
+/** struct dccp_ackvec - Ack Vector main data structure
  *
- * @av_records - list of dccp_ackvec_record
- * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ * This implements a fixed-size circular buffer within an array and is largely
+ * based on Appendix A of RFC 4340.
  *
- * @av_time - the time in usecs
- * @av_buf - circular buffer of acknowledgeable packets
+ * @av_buf:	   circular buffer storage area
+ * @av_buf_head:   head index; begin of live portion in @av_buf
+ * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
+ * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
+ *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
+ * @av_veclen:	   length of the live portion of @av_buf
  */
 struct dccp_ackvec {
-	u64			av_buf_ackno;
-	struct list_head	av_records;
-	ktime_t			av_time;
+	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
 	u16			av_buf_head;
+	u16			av_buf_tail;
+	u64			av_buf_ackno:48;
+	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	struct list_head	av_records;
 	u16			av_vec_len;
-	u8			av_buf_nonce;
-	u8			av_ack_nonce;
-	u8			av_buf[DCCP_MAX_ACKVEC_LEN];
 };
 
-/** struct dccp_ackvec_record - ack vector record
+/** struct dccp_ackvec_record - Records information about sent Ack Vectors
  *
- * ACK vector record as defined in Appendix A of spec.
+ * These list entries define the additional information which the HC-Receiver
+ * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
  *
- * The list is sorted by avr_ack_seqno
+ * @avr_node:	    the list node in @av_records
+ * @avr_ack_seqno:  sequence number of the packet the Ack Vector was sent on
+ * @avr_ack_ackno:  the Ack number that this record/Ack Vector refers to
+ * @avr_ack_ptr:    pointer into @av_buf where this record starts
+ * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
+ * @avr_ack_nonce:  the sum of @av_buf_nonce's at the time this record was sent
  *
- * @avr_node - node in av_records
- * @avr_ack_seqno - sequence number of the packet this record was sent on
- * @avr_ack_ackno - sequence number being acknowledged
- * @avr_ack_ptr - pointer into av_buf where this record starts
- * @avr_ack_nonce - av_ack_nonce at the time this record was sent
- * @avr_sent_len - lenght of the record in av_buf
+ * The list as a whole is sorted in descending order by @avr_ack_seqno.
  */
 struct dccp_ackvec_record {
 	struct list_head avr_node;
-	u64		 avr_ack_seqno;
-	u64		 avr_ack_ackno;
+	u64		 avr_ack_seqno:48;
+	u64		 avr_ack_ackno:48;
 	u16		 avr_ack_ptr;
-	u16		 avr_sent_len;
-	u8		 avr_ack_nonce;
+	u8		 avr_ack_runlen;
+	u8		 avr_ack_nonce:1;
 };
 
 struct sock;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f56ab68a4b7..813d5cd40e8 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -580,8 +580,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 					 &vector, &veclen)) != -1) {
 		/* go through this ack vector */
 		while (veclen--) {
-			const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
-			u64 ackno_end_rl = SUB48(ackno, rl);
+			u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
 
 			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
 				       (unsigned long long)ackno,
@@ -604,17 +603,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = *vector &
-						 DCCP_ACKVEC_STATE_MASK;
+				const u8 state = dccp_ackvec_state(vector);
 
 				/* new packet received or marked */
-				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+				if (state != DCCPAV_NOT_RECEIVED &&
 				    !seqp->ccid2s_acked) {
-					if (state ==
-					    DCCP_ACKVEC_STATE_ECN_MARKED) {
+					if (state == DCCPAV_ECN_MARKED)
 						ccid2_congestion_event(sk,
 								       seqp);
-					} else
+					else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index e3f43d55e3c..70ad0ba7214 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -377,8 +377,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	if (dp->dccps_hc_rx_ackvec != NULL &&
 	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq,
-			    DCCP_ACKVEC_STATE_RECEIVED))
+			    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 		goto discard;
 	dccp_deliver_input_to_ccids(sk, skb);
 
@@ -627,8 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (dp->dccps_hc_rx_ackvec != NULL &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq,
-				    DCCP_ACKVEC_STATE_RECEIVED))
+				    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 			goto discard;
 
 		dccp_deliver_input_to_ccids(sk, skb);
-- 
cgit v1.2.3-70-g09d2


From 4829007c7bc689cbc290fc09eccbe90bd52c2a5e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Separate internals of Ack Vectors from option-parsing
 code

This patch
 * separates Ack Vector housekeeping code from option-insertion code;
 * shifts option-specific code from ackvec.c into options.c;
 * introduces a dedicated routine to take care of the Ack Vector records;
 * simplifies the dccp_ackvec_insert_avr() routine: the BUG_ON was redundant,
   since the list is automatically arranged in descending order of ack_seqno.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 100 ++++++++++-------------------------------------------
 net/dccp/ackvec.h  |   5 ++-
 net/dccp/options.c |  60 ++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 85 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 85ad70cfba5..de84dd34c93 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -52,99 +52,35 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 	}
 }
 
-static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
-				   struct dccp_ackvec_record *avr)
-{
-	/*
-	 * AVRs are sorted by seqno. Since we are sending them in order, we
-	 * just add the AVR at the head of the list.
-	 * -sorbo.
-	 */
-	if (!list_empty(&av->av_records)) {
-		const struct dccp_ackvec_record *head =
-					list_entry(av->av_records.next,
-						   struct dccp_ackvec_record,
-						   avr_node);
-		BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
-	}
-
-	list_add(&avr->avr_node, &av->av_records);
-}
-
-int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+/**
+ * dccp_ackvec_update_records  -  Record information about sent Ack Vectors
+ * @av:		Ack Vector records to update
+ * @seqno:	Sequence number of the packet carrying the Ack Vector just sent
+ * @nonce_sum:	The sum of all buffer nonces contained in the Ack Vector
+ */
+int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
-	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts;
-	u8 i, nonce = 0;
-	const unsigned char *tail, *from;
-	unsigned char *to;
 	struct dccp_ackvec_record *avr;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
-		return -1;
-
 	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 	if (avr == NULL)
-		return -1;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
-	to   = skb_push(skb, len);
-	len  = av->av_vec_len;
-	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
-
-	for (i = 0; i < nr_opts; ++i) {
-		int copylen = len;
-
-		if (len > DCCP_SINGLE_OPT_MAXLEN)
-			copylen = DCCP_SINGLE_OPT_MAXLEN;
-
-		/*
-		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
-		 * its type; ack_nonce is the sum of all individual buf_nonce's.
-		 */
-		nonce ^= av->av_buf_nonce[i];
-
-		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
-		*to++ = copylen + 2;
-
-		/* Check if buf_head wraps */
-		if (from + copylen > tail) {
-			const u16 tailsize = tail - from;
-
-			memcpy(to, from, tailsize);
-			to	+= tailsize;
-			len	-= tailsize;
-			copylen	-= tailsize;
-			from	= av->av_buf;
-		}
-
-		memcpy(to, from, copylen);
-		from += copylen;
-		to   += copylen;
-		len  -= copylen;
-	}
+		return -ENOBUFS;
 
-	/*
-	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
-	 */
-	avr->avr_ack_seqno  = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_seqno  = seqno;
 	avr->avr_ack_ptr    = av->av_buf_head;
 	avr->avr_ack_ackno  = av->av_buf_ackno;
-	avr->avr_ack_nonce  = nonce;
+	avr->avr_ack_nonce  = nonce_sum;
 	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
+	/*
+	 * Since GSS is incremented for each packet, the list is automatically
+	 * arranged in descending order of @ack_seqno.
+	 */
+	list_add(&avr->avr_node, &av->av_records);
 
-	dccp_ackvec_insert_avr(av, avr);
-
-	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
-		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->avr_ack_runlen,
+	dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
 		      (unsigned long long)avr->avr_ack_seqno,
-		      (unsigned long long)avr->avr_ack_ackno);
+		      (unsigned long long)avr->avr_ack_ackno,
+		      avr->avr_ack_runlen);
 	return 0;
 }
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index df18f9030db..b34e5ed4c34 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -112,7 +112,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     u64 *ackno, const u8 opt,
 			     const u8 *value, const u8 len);
 
-extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 
 static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
@@ -155,8 +155,7 @@ static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 	return -1;
 }
 
-static inline int dccp_insert_option_ackvec(const struct sock *sk,
-					    const struct sk_buff *skb)
+static inline int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 nonce)
 {
 	return -1;
 }
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9fe0510f402..392d7db3134 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -428,6 +428,66 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
 	return 0;
 }
 
+static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	/* Figure out how many options do we need to represent the ackvec */
+	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
+	u16 len = av->av_vec_len + 2 * nr_opts;
+	u8 i, nonce = 0;
+	const unsigned char *tail, *from;
+	unsigned char *to;
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+	to   = skb_push(skb, len);
+	len  = av->av_vec_len;
+	from = av->av_buf + av->av_buf_head;
+	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
+
+	for (i = 0; i < nr_opts; ++i) {
+		int copylen = len;
+
+		if (len > DCCP_SINGLE_OPT_MAXLEN)
+			copylen = DCCP_SINGLE_OPT_MAXLEN;
+
+		/*
+		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+		 * its type; ack_nonce is the sum of all individual buf_nonce's.
+		 */
+		nonce ^= av->av_buf_nonce[i];
+
+		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
+		*to++ = copylen + 2;
+
+		/* Check if buf_head wraps */
+		if (from + copylen > tail) {
+			const u16 tailsize = tail - from;
+
+			memcpy(to, from, tailsize);
+			to	+= tailsize;
+			len	-= tailsize;
+			copylen	-= tailsize;
+			from	= av->av_buf;
+		}
+
+		memcpy(to, from, copylen);
+		from += copylen;
+		to   += copylen;
+		len  -= copylen;
+	}
+	/*
+	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
+	 */
+	if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
+		return -ENOBUFS;
+	return 0;
+}
+
 /**
  * dccp_insert_option_mandatory  -  Mandatory option (5.8.2)
  * Note that since we are using skb_push, this function needs to be called
-- 
cgit v1.2.3-70-g09d2


From d7dc7e5f49299739e610ea8febf9ea91a4dc1ae9 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Implementation of circular Ack Vector buffer with
 overflow handling

This completes the implementation of a circular buffer for Ack Vectors, by
extending the current (linear array-based) implementation.  The changes are:

 (a) An `overflow' flag to deal with the case of overflow. As before, dynamic
     growth of the buffer will not be supported; but code will be added to deal
     robustly with overflowing Ack Vector buffers.

 (b) A `tail_seqno' field. When naively implementing the algorithm of Appendix A
     in RFC 4340, problems arise whenever subsequent Ack Vector records overlap,
     which can bring the entire run length calculation completely out of synch.
     (This is documented on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/\
                                             ack_vectors/tracking_tail_ackno/ .)
 (c) The buffer lengthi is now computed dynamically (i.e. current fill level),
     as the span between head to tail.

As a result, dccp_ackvec_pending() is now simpler - the #ifdef is no longer
necessary since buf_empty is always true when IP_DCCP_ACKVEC is not configured.

Note on overflow handling:
-------------------------
 The Ack Vector code previously simply started to drop packets when the
 Ack Vector buffer overflowed. This means that the userspace application
 will not be able to receive, only because of an Ack Vector storage problem.

 Furthermore, overflow may be transient, so that applications may later
 recover from the overflow. Recovering from dropped packets is more difficult
 (e.g. video key frames).

 Hence the patch uses a different policy: when the buffer overflows, the oldest
 entries are subsequently overwritten. This has a higher chance of recovery.
 Details are on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ack_vectors/

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 31 ++++++++++++++++++++++++++++++-
 net/dccp/ackvec.h  | 17 ++++++++++++++---
 net/dccp/dccp.h    | 14 +++++++-------
 net/dccp/options.c | 10 +++++-----
 4 files changed, 56 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index de84dd34c93..1184d5e5dc9 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -29,7 +29,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		av->av_buf_head	= av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
 		INIT_LIST_HEAD(&av->av_records);
 	}
 	return av;
@@ -71,6 +71,14 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	avr->avr_ack_ackno  = av->av_buf_ackno;
 	avr->avr_ack_nonce  = nonce_sum;
 	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
+	/*
+	 * When the buffer overflows, we keep no more than one record. This is
+	 * the simplest way of disambiguating sender-Acks dating from before the
+	 * overflow from sender-Acks which refer to after the overflow; a simple
+	 * solution is preferable here since we are handling an exception.
+	 */
+	if (av->av_overflow)
+		dccp_ackvec_purge_records(av);
 	/*
 	 * Since GSS is incremented for each packet, the list is automatically
 	 * arranged in descending order of @ack_seqno.
@@ -84,6 +92,27 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	return 0;
 }
 
+/*
+ * Buffer index and length computation using modulo-buffersize arithmetic.
+ * Note that, as pointers move from right to left, head is `before' tail.
+ */
+static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
+{
+	return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
+}
+
+static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
+{
+	return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
+}
+
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
+{
+	if (unlikely(av->av_overflow))
+		return DCCPAV_MAX_ACKVEC_LEN;
+	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
+}
+
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index b34e5ed4c34..92f65b0fef5 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -21,6 +21,7 @@
  * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
  * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
  * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ * The maximum value is bounded by the u16 types for indices and functions.
  */
 #define DCCPAV_NUM_ACKVECS	2
 #define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
@@ -55,8 +56,10 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
  * @av_buf_head:   head index; begin of live portion in @av_buf
  * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
  * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_tail_ackno: lowest  seqno of acknowledgeable packet recorded in @av_buf
  * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
  *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
  * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
  * @av_veclen:	   length of the live portion of @av_buf
  */
@@ -65,7 +68,9 @@ struct dccp_ackvec {
 	u16			av_buf_head;
 	u16			av_buf_tail;
 	u64			av_buf_ackno:48;
+	u64			av_tail_ackno:48;
 	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	u8			av_overflow:1;
 	struct list_head	av_records;
 	u16			av_vec_len;
 };
@@ -113,10 +118,11 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     const u8 *value, const u8 len);
 
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
 
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
-	return av->av_vec_len;
+	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
 #else /* CONFIG_IP_DCCP_ACKVEC */
 static inline int dccp_ackvec_init(void)
@@ -160,9 +166,14 @@ static inline int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8
 	return -1;
 }
 
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+static inline u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
 {
 	return 0;
 }
+
+static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
+{
+	return true;
+}
 #endif /* CONFIG_IP_DCCP_ACKVEC */
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e4d6e76ced4..1e65378eea3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -448,15 +448,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 	dp->dccps_awh = dp->dccps_gss;
 }
 
+static inline int dccp_ackvec_pending(const struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
+	       !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
+}
+
 static inline int dccp_ack_pending(const struct sock *sk)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	return
-#ifdef CONFIG_IP_DCCP_ACKVEC
-	       (dp->dccps_hc_rx_ackvec != NULL &&
-		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-#endif
-	       inet_csk_ack_scheduled(sk);
+	return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
 }
 
 extern int  dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 392d7db3134..3163ae980f1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -432,9 +432,10 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts;
+	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
+	u16 len = buflen + 2 * nr_opts;
 	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
@@ -445,7 +446,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
-	len  = av->av_vec_len;
+	len  = buflen;
 	from = av->av_buf + av->av_buf_head;
 	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
@@ -583,8 +584,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			if (dccp_insert_option_timestamp(sk, skb))
 				return -1;
 
-		} else if (dp->dccps_hc_rx_ackvec != NULL &&
-			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+		} else if (dccp_ackvec_pending(sk) &&
 			   dccp_insert_option_ackvec(sk, skb)) {
 				return -1;
 		}
-- 
cgit v1.2.3-70-g09d2


From 68b1de15765f2b0e0925e692dab2b2fa2abd93fc Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Algorithm to update buffer state

This provides a routine to consistently update the buffer state when the
peer acknowledges receipt of Ack Vectors; updating state in the list of Ack
Vectors as well as in the circular buffer.

While based on RFC 4340, several additional (and necessary) precautions were
added to protect the consistency of the buffer state. These additions are
essential, since analysis and experience showed that the basic algorithm was
insufficient for this task (which lead to problems that were hard to debug).

The algorithm now
 * deals with HC-sender acknowledging to HC-receiver and vice versa,
 * keeps track of the last unacknowledged but received seqno in tail_ackno,
 * has special cases to reset the overflow condition when appropriate,
 * is protected against receiving older information (would mess up buffer state).

Note: The older code performed an unnecessary step, where the sender cleared
Ack Vector state by parsing the Ack Vector received by the HC-receiver. Doing
this was entirely redundant, since
 * the receiver always puts the full acknowledgment window (groups 2,3 in 11.4.2)
   into the Ack Vectors it sends; hence the HC-receiver is only interested in the
   highest state that the HC-sender received;
 * this means that the acknowledgment number on the (Data)Ack from the HC-sender
   is sufficient; and work done in parsing earlier state is not necessary, since
   the later state subsumes the  earlier one (see also RFC 4340, A.4).
This older interface (dccp_ackvec_parse()) is therefore removed.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/ackvec.h  |  6 ++++
 net/dccp/input.c   |  4 +--
 net/dccp/options.c |  6 ++--
 4 files changed, 98 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 1184d5e5dc9..f1341a617f9 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -92,6 +92,24 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	return 0;
 }
 
+static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
+						     const u64 ackno)
+{
+	struct dccp_ackvec_record *avr;
+	/*
+	 * Exploit that records are inserted in descending order of sequence
+	 * number, start with the oldest record first. If @ackno is `before'
+	 * the earliest ack_ackno, the packet is too old to be considered.
+	 */
+	list_for_each_entry_reverse(avr, av_list, avr_node) {
+		if (avr->avr_ack_seqno == ackno)
+			return avr;
+		if (before48(ackno, avr->avr_ack_seqno))
+			break;
+	}
+	return NULL;
+}
+
 /*
  * Buffer index and length computation using modulo-buffersize arithmetic.
  * Note that, as pointers move from right to left, head is `before' tail.
@@ -356,6 +374,76 @@ int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 	return 0;
 }
 
+/**
+ * dccp_ackvec_clear_state  -  Perform house-keeping / garbage-collection
+ * This routine is called when the peer acknowledges the receipt of Ack Vectors
+ * up to and including @ackno. While based on on section A.3 of RFC 4340, here
+ * are additional precautions to prevent corrupted buffer state. In particular,
+ * we use tail_ackno to identify outdated records; it always marks the earliest
+ * packet of group (2) in 11.4.2.
+ */
+void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
+ {
+	struct dccp_ackvec_record *avr, *next;
+	u8 runlen_now, eff_runlen;
+	s64 delta;
+
+	avr = dccp_ackvec_lookup(&av->av_records, ackno);
+	if (avr == NULL)
+		return;
+	/*
+	 * Deal with outdated acknowledgments: this arises when e.g. there are
+	 * several old records and the acks from the peer come in slowly. In
+	 * that case we may still have records that pre-date tail_ackno.
+	 */
+	delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
+	if (delta < 0)
+		goto free_records;
+	/*
+	 * Deal with overlapping Ack Vectors: don't subtract more than the
+	 * number of packets between tail_ackno and ack_ackno.
+	 */
+	eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
+
+	runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
+	/*
+	 * The run length of Ack Vector cells does not decrease over time. If
+	 * the run length is the same as at the time the Ack Vector was sent, we
+	 * free the ack_ptr cell. That cell can however not be freed if the run
+	 * length has increased: in this case we need to move the tail pointer
+	 * backwards (towards higher indices), to its next-oldest neighbour.
+	 */
+	if (runlen_now > eff_runlen) {
+
+		av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
+		av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
+
+		/* This move may not have cleared the overflow flag. */
+		if (av->av_overflow)
+			av->av_overflow = (av->av_buf_head == av->av_buf_tail);
+	} else {
+		av->av_buf_tail	= avr->avr_ack_ptr;
+		/*
+		 * We have made sure that avr points to a valid cell within the
+		 * buffer. This cell is either older than head, or equals head
+		 * (empty buffer): in both cases we no longer have any overflow.
+		 */
+		av->av_overflow	= 0;
+	}
+
+	/*
+	 * The peer has acknowledged up to and including ack_ackno. Hence the
+	 * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
+	 */
+	av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
+
+free_records:
+	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
+		list_del(&avr->avr_node);
+		kmem_cache_free(dccp_ackvec_record_slab, avr);
+	}
+}
+
 int __init dccp_ackvec_init(void)
 {
 	dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 92f65b0fef5..b757e9b4110 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -118,6 +118,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     const u8 *value, const u8 len);
 
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
 extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
 
 static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
@@ -149,6 +150,11 @@ static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 	return -1;
 }
 
+static inline void dccp_ackvec_clear_state(struct dccp_ackvec *av,
+					    const u64 ackno)
+{
+}
+
 static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
 					       struct sock *sk, const u64 ackno)
 {
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 70ad0ba7214..77a5d57ab70 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -164,8 +164,8 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	if (dp->dccps_hc_rx_ackvec != NULL)
-		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
-					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		dccp_ackvec_clear_state(dp->dccps_hc_rx_ackvec,
+					DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
 static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 3163ae980f1..b11d7b7167f 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 	struct dccp_sock *dp = dccp_sk(sk);
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 	const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
-	u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
 	unsigned char *opt_ptr = options;
 	const unsigned char *opt_end = (unsigned char *)dh +
@@ -133,9 +132,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_ACK_VECTOR_1:
 			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
 				break;
-			if (dp->dccps_hc_rx_ackvec != NULL &&
-			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
-				goto out_invalid_option;
+			dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk),
+				      len);
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
-- 
cgit v1.2.3-70-g09d2


From e28fe59f9c82ef55fc9b55e745531c9fed86f00a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Update code for the Ack Vector input/registration
 routine

This patch uupdates the code which registers new packets as received, using the
new circular buffer interface. It contributes a new algorithm which
	* supports both tail/head pointers and buffer wrap-around and
	* deals with overflow (head/tail move in lock-step).

The updated code is also partioned differently, into
	1. dealing with the empty buffer,
	2. adding new packets into non-empty buffer,
	3. reserving space when encountering a `hole' in the sequence space,
	4. updating old state and deciding when old state is irrelevant.

Protection against large burst losses: With regard to (3), it is too costly to
reserve space when there are large bursts of losses. When bursts get too large,
the code does no longer reserve space and just fills in cells normally. This
measure reduces space consumption by a factor of 63.

The code reuses in part the previous implementation by Arnaldo de Melo.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/ackvec.h |   9 ++++
 2 files changed, 159 insertions(+)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index f1341a617f9..bf9cb7d7549 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -131,6 +131,156 @@ u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
 	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
 }
 
+/**
+ * dccp_ackvec_update_old  -  Update previous state as per RFC 4340, 11.4.1
+ * @av:		non-empty buffer to update
+ * @distance:   negative or zero distance of @seqno from buf_ackno downward
+ * @seqno:	the (old) sequence number whose record is to be updated
+ * @state:	state in which packet carrying @seqno was received
+ */
+static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
+				   u64 seqno, enum dccp_ackvec_states state)
+{
+	u16 ptr = av->av_buf_head;
+
+	BUG_ON(distance > 0);
+	if (unlikely(dccp_ackvec_is_empty(av)))
+		return;
+
+	do {
+		u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
+
+		if (distance + runlen >= 0) {
+			/*
+			 * Only update the state if packet has not been received
+			 * yet. This is OK as per the second table in RFC 4340,
+			 * 11.4.1; i.e. here we are using the following table:
+			 *                     RECEIVED
+			 *                      0   1   3
+			 *              S     +---+---+---+
+			 *              T   0 | 0 | 0 | 0 |
+			 *              O     +---+---+---+
+			 *              R   1 | 1 | 1 | 1 |
+			 *              E     +---+---+---+
+			 *              D   3 | 0 | 1 | 3 |
+			 *                    +---+---+---+
+			 * The "Not Received" state was set by reserve_seats().
+			 */
+			if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
+				av->av_buf[ptr] = state;
+			else
+				dccp_pr_debug("Not changing %llu state to %u\n",
+					      (unsigned long long)seqno, state);
+			break;
+		}
+
+		distance += runlen + 1;
+		ptr	  = __ackvec_idx_add(ptr, 1);
+
+	} while (ptr != av->av_buf_tail);
+}
+
+/* Mark @num entries after buf_head as "Not yet received". */
+static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
+{
+	u16 start = __ackvec_idx_add(av->av_buf_head, 1),
+	    len	  = DCCPAV_MAX_ACKVEC_LEN - start;
+
+	/* check for buffer wrap-around */
+	if (num > len) {
+		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
+		start = 0;
+		num  -= len;
+	}
+	if (num)
+		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
+}
+
+/**
+ * dccp_ackvec_add_new  -  Record one or more new entries in Ack Vector buffer
+ * @av:		 container of buffer to update (can be empty or non-empty)
+ * @num_packets: number of packets to register (must be >= 1)
+ * @seqno:	 sequence number of the first packet in @num_packets
+ * @state:	 state in which packet carrying @seqno was received
+ */
+static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
+				u64 seqno, enum dccp_ackvec_states state)
+{
+	u32 num_cells = num_packets;
+
+	if (num_packets > DCCPAV_BURST_THRESH) {
+		u32 lost_packets = num_packets - 1;
+
+		DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
+		/*
+		 * We received 1 packet and have a loss of size "num_packets-1"
+		 * which we squeeze into num_cells-1 rather than reserving an
+		 * entire byte for each lost packet.
+		 * The reason is that the vector grows in O(burst_length); when
+		 * it grows too large there will no room left for the payload.
+		 * This is a trade-off: if a few packets out of the burst show
+		 * up later, their state will not be changed; it is simply too
+		 * costly to reshuffle/reallocate/copy the buffer each time.
+		 * Should such problems persist, we will need to switch to a
+		 * different underlying data structure.
+		 */
+		for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
+			u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
+
+			av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
+			av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
+
+			lost_packets -= len;
+		}
+	}
+
+	if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
+		DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
+		av->av_overflow = true;
+	}
+
+	av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
+	if (av->av_overflow)
+		av->av_buf_tail = av->av_buf_head;
+
+	av->av_buf[av->av_buf_head] = state;
+	av->av_buf_ackno	    = seqno;
+
+	if (num_packets > 1)
+		dccp_ackvec_reserve_seats(av, num_packets - 1);
+}
+
+/**
+ * dccp_ackvec_input  -  Register incoming packet in the buffer
+ */
+void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
+{
+	u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	enum dccp_ackvec_states state = DCCPAV_RECEIVED;
+
+	if (dccp_ackvec_is_empty(av)) {
+		dccp_ackvec_add_new(av, 1, seqno, state);
+		av->av_tail_ackno = seqno;
+
+	} else {
+		s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
+		u8 *current_head = av->av_buf + av->av_buf_head;
+
+		if (num_packets == 1 &&
+		    dccp_ackvec_state(current_head) == state &&
+		    dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
+
+			*current_head   += 1;
+			av->av_buf_ackno = seqno;
+
+		} else if (num_packets > 0) {
+			dccp_ackvec_add_new(av, num_packets, seqno, state);
+		} else {
+			dccp_ackvec_update_old(av, num_packets, seqno, state);
+		}
+	}
+}
+
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index b757e9b4110..36ca2e9e5c8 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -29,6 +29,9 @@
 /* Estimated minimum average Ack Vector length - used for updating MPS */
 #define DCCPAV_MIN_OPTLEN	16
 
+/* Threshold for coping with large bursts of losses */
+#define DCCPAV_BURST_THRESH	(DCCPAV_MAX_ACKVEC_LEN / 8)
+
 enum dccp_ackvec_states {
 	DCCPAV_RECEIVED =	0x00,
 	DCCPAV_ECN_MARKED =	0x40,
@@ -117,6 +120,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     u64 *ackno, const u8 opt,
 			     const u8 *value, const u8 len);
 
+extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
 extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
@@ -144,6 +148,11 @@ static inline void dccp_ackvec_free(struct dccp_ackvec *av)
 {
 }
 
+static inline void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
+{
+
+}
+
 static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 				  const u64 ackno, const u8 state)
 {
-- 
cgit v1.2.3-70-g09d2


From 283fb4a5f39d1521d53e1044bff0ba2654acf145 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Consolidate Ack-Vector processing within main DCCP
 module

This aggregates Ack Vector processing (handling input and clearing old state)
into one function, for the following reasons and benefits:
 * all Ack Vector-specific processing is now in one place;
 * duplicated code is removed;
 * ensuring sanity: from an Ack Vector point of view, it is better to clear the
                    old state first before entering new state;
 * Ack Event handling happens mostly within the CCIDs, not the main DCCP module.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 77a5d57ab70..9a108ce17fc 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -159,13 +159,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 	dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
 }
 
-static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
 
-	if (dp->dccps_hc_rx_ackvec != NULL)
-		dccp_ackvec_clear_state(dp->dccps_hc_rx_ackvec,
-					DCCP_SKB_CB(skb)->dccpd_ack_seq);
+	if (av == NULL)
+		return;
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
+	dccp_ackvec_input(av, skb);
 }
 
 static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
@@ -364,21 +366,13 @@ discard:
 int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			 const struct dccp_hdr *dh, const unsigned len)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-
 	if (dccp_check_seqno(sk, skb))
 		goto discard;
 
 	if (dccp_parse_options(sk, NULL, skb))
 		return 1;
 
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-		dccp_event_ack_recv(sk, skb);
-
-	if (dp->dccps_hc_rx_ackvec != NULL &&
-	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
-		goto discard;
+	dccp_handle_ackvec_processing(sk, skb);
 	dccp_deliver_input_to_ccids(sk, skb);
 
 	return __dccp_rcv_established(sk, skb, dh, len);
@@ -621,14 +615,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dccp_parse_options(sk, NULL, skb))
 			return 1;
 
-		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-			dccp_event_ack_recv(sk, skb);
-
-		if (dp->dccps_hc_rx_ackvec != NULL &&
-		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
-			goto discard;
-
+		dccp_handle_ackvec_processing(sk, skb);
 		dccp_deliver_input_to_ccids(sk, skb);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From c2f42077bd06f300ae959204f3c007f820f5e769 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism

The problem with Ack Vectors is that

  i) their length is variable and can in principle grow quite large,
 ii) it is hard to predict exactly how large they will be.

Due to the second point it seems not a good idea to reduce the MPS; in
particular when on average there is enough room for the Ack Vector and an
increase in length is momentarily due to some burst loss, after which the
Ack Vector returns to its normal/average length.

The solution taken by this patch is to subtract a minimum-expected Ack Vector
length from the MPS (previous patch), and to defer any larger Ack Vectors onto
a separate Sync - but only if indeed there is no space left on the skb.

This patch provides the infrastructure to schedule Sync-packets for transporting
(urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since
it does not need to wait for new application data.

It can thus serve other parts of the DCCP code as well.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  2 ++
 net/dccp/options.c   | 24 ++++++++++++++++++++----
 net/dccp/output.c    |  8 ++++++++
 3 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7187bd8a75f..83197b601a4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -462,6 +462,7 @@ struct dccp_ackvec;
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
+ * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
  * @dccps_xmit_timer - timer for when CCID is not ready to send
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
@@ -502,6 +503,7 @@ struct dccp_sock {
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
+	__u8				dccps_sync_scheduled:1;
 	struct timer_list		dccps_xmit_timer;
 };
 
diff --git a/net/dccp/options.c b/net/dccp/options.c
index b11d7b7167f..791e07853a7 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -430,6 +430,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
@@ -438,10 +439,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	const unsigned char *tail, *from;
 	unsigned char *to;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+	if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+		DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
+			  dccp_packet_name(dcb->dccpd_type));
 		return -1;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+	}
+	/*
+	 * Since Ack Vectors are variable-length, we can not always predict
+	 * their size. To catch exception cases where the space is running out
+	 * on the skb, a separate Sync is scheduled to carry the Ack Vector.
+	 */
+	if (len > DCCPAV_MIN_OPTLEN &&
+	    len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
+		DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
+			  "MPS=%u ==> reduce payload size?\n", len, skb->len,
+			  dcb->dccpd_opt_len, dp->dccps_mss_cache);
+		dp->dccps_sync_scheduled = 1;
+		return 0;
+	}
+	dcb->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
 	len  = buflen;
@@ -482,7 +498,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
+	if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
 		return -ENOBUFS;
 	return 0;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 1b316830758..bfda071559f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -305,6 +305,8 @@ void dccp_write_xmit(struct sock *sk, int block)
 			if (err)
 				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
 					 err);
+			if (dp->dccps_sync_scheduled)
+				dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
 		} else {
 			dccp_pr_debug("packet discarded due to err=%d\n", err);
 			kfree_skb(skb);
@@ -591,6 +593,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
 	DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
 
+	/*
+	 * Clear the flag in case the Sync was scheduled for out-of-band data,
+	 * such as carrying a long Ack Vector.
+	 */
+	dccp_sk(sk)->dccps_sync_scheduled = 0;
+
 	dccp_transmit_skb(sk, skb);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5a577b488f687f339dea62e7bb4f4c5793ad523f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Remove old infrastructure

This removes
 * functions for which updates have been provided in the preceding patches and
 * the @av_vec_len field - it is no longer necessary since the buffer length is
   now always computed dynamically;
 * conditional debugging code (CONFIG_IP_DCCP_ACKVEC).

The reason for removing the conditional debugging code is that Ack Vectors are
an almost inevitable necessity - RFC 4341 says that for CCID-2, Ack Vectors must
be used. Furthermore, the code would be only interesting for coding - after some
extensive testing with this patch set, having the debug code around is no longer
of real help.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/Kconfig       |   3 -
 net/dccp/Makefile      |   5 +-
 net/dccp/ackvec.c      | 251 -------------------------------------------------
 net/dccp/ackvec.h      |  79 +---------------
 net/dccp/ccids/Kconfig |   1 -
 5 files changed, 3 insertions(+), 336 deletions(-)

(limited to 'net')

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 7aa2a7acc7e..206c16ad9c3 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -25,9 +25,6 @@ config INET_DCCP_DIAG
 	def_tristate y if (IP_DCCP = y && INET_DIAG = y)
 	def_tristate m
 
-config IP_DCCP_ACKVEC
-	bool
-
 source "net/dccp/ccids/Kconfig"
 
 menu "DCCP Kernel Hacking"
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index f4f8793aaff..b68440bd7fa 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
-dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
+dccp-y := ccid.o feat.o input.o minisocks.o options.o \
+	  output.o proto.o timer.o ackvec.o
 
 dccp_ipv4-y := ipv4.o
 
@@ -8,8 +9,6 @@ dccp_ipv4-y := ipv4.o
 obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
-dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
-
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index bf9cb7d7549..66b8a51300c 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -9,18 +9,10 @@
  *      under the terms of the GNU General Public License as published by the
  *      Free Software Foundation; version 2 of the License;
  */
-
-#include "ackvec.h"
 #include "dccp.h"
-
-#include <linux/init.h>
-#include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/skbuff.h>
 #include <linux/slab.h>
 
-#include <net/sock.h>
-
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
@@ -281,249 +273,6 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
 	}
 }
 
-/*
- * If several packets are missing, the HC-Receiver may prefer to enter multiple
- * bytes with run length 0, rather than a single byte with a larger run length;
- * this simplifies table updates if one of the missing packets arrives.
- */
-static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
-						 const unsigned int packets,
-						 const unsigned char state)
-{
-	unsigned int gap;
-	long new_head;
-
-	if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
-		return -ENOBUFS;
-
-	gap	 = packets - 1;
-	new_head = av->av_buf_head - packets;
-
-	if (new_head < 0) {
-		if (gap > 0) {
-			memset(av->av_buf, DCCPAV_NOT_RECEIVED,
-			       gap + new_head + 1);
-			gap = -new_head;
-		}
-		new_head += DCCPAV_MAX_ACKVEC_LEN;
-	}
-
-	av->av_buf_head = new_head;
-
-	if (gap > 0)
-		memset(av->av_buf + av->av_buf_head + 1,
-		       DCCPAV_NOT_RECEIVED, gap);
-
-	av->av_buf[av->av_buf_head] = state;
-	av->av_vec_len += packets;
-	return 0;
-}
-
-/*
- * Implements the RFC 4340, Appendix A
- */
-int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-		    const u64 ackno, const u8 state)
-{
-	u8 *cur_head = av->av_buf + av->av_buf_head,
-	   *buf_end  = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
-	/*
-	 * Check at the right places if the buffer is full, if it is, tell the
-	 * caller to start dropping packets till the HC-Sender acks our ACK
-	 * vectors, when we will free up space in av_buf.
-	 *
-	 * We may well decide to do buffer compression, etc, but for now lets
-	 * just drop.
-	 *
-	 * From Appendix A.1.1 (`New Packets'):
-	 *
-	 *	Of course, the circular buffer may overflow, either when the
-	 *	HC-Sender is sending data at a very high rate, when the
-	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
-	 *	or when the HC-Sender is forgetting to acknowledge those acks
-	 *	(so the HC-Receiver is unable to clean up old state). In this
-	 *	case, the HC-Receiver should either compress the buffer (by
-	 *	increasing run lengths when possible), transfer its state to
-	 *	a larger buffer, or, as a last resort, drop all received
-	 *	packets, without processing them whatsoever, until its buffer
-	 *	shrinks again.
-	 */
-
-	/* See if this is the first ackno being inserted */
-	if (av->av_vec_len == 0) {
-		*cur_head = state;
-		av->av_vec_len = 1;
-	} else if (after48(ackno, av->av_buf_ackno)) {
-		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
-
-		/*
-		 * Look if the state of this packet is the same as the
-		 * previous ackno and if so if we can bump the head len.
-		 */
-		if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
-		    dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
-			*cur_head += 1;
-		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
-			return -ENOBUFS;
-	} else {
-		/*
-		 * A.1.2.  Old Packets
-		 *
-		 *	When a packet with Sequence Number S <= buf_ackno
-		 *	arrives, the HC-Receiver will scan the table for
-		 *	the byte corresponding to S. (Indexing structures
-		 *	could reduce the complexity of this scan.)
-		 */
-		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
-
-		while (1) {
-			const u8 len = dccp_ackvec_runlen(cur_head);
-			/*
-			 * valid packets not yet in av_buf have a reserved
-			 * entry, with a len equal to 0.
-			 */
-			if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
-				dccp_pr_debug("Found %llu reserved seat!\n",
-					      (unsigned long long)ackno);
-				*cur_head = state;
-				goto out;
-			}
-			/* len == 0 means one packet */
-			if (delta < len + 1)
-				goto out_duplicate;
-
-			delta -= len + 1;
-			if (++cur_head == buf_end)
-				cur_head = av->av_buf;
-		}
-	}
-
-	av->av_buf_ackno = ackno;
-out:
-	return 0;
-
-out_duplicate:
-	/* Duplicate packet */
-	dccp_pr_debug("Received a dup or already considered lost "
-		      "packet: %llu\n", (unsigned long long)ackno);
-	return -EILSEQ;
-}
-
-static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
-				     struct dccp_ackvec_record *avr)
-{
-	struct dccp_ackvec_record *next;
-
-	/* sort out vector length */
-	if (av->av_buf_head <= avr->avr_ack_ptr)
-		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
-	else
-		av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
-				 av->av_buf_head + avr->avr_ack_ptr;
-
-	/* free records */
-	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del(&avr->avr_node);
-		kmem_cache_free(dccp_ackvec_record_slab, avr);
-	}
-}
-
-void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
-				 const u64 ackno)
-{
-	struct dccp_ackvec_record *avr;
-
-	/*
-	 * If we traverse backwards, it should be faster when we have large
-	 * windows. We will be receiving ACKs for stuff we sent a while back
-	 * -sorbo.
-	 */
-	list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
-		if (ackno == avr->avr_ack_seqno) {
-			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
-				      "ack_ackno=%llu, ACKED!\n",
-				      dccp_role(sk), avr->avr_ack_runlen,
-				      (unsigned long long)avr->avr_ack_seqno,
-				      (unsigned long long)avr->avr_ack_ackno);
-			dccp_ackvec_throw_record(av, avr);
-			break;
-		} else if (avr->avr_ack_seqno > ackno)
-			break; /* old news */
-	}
-}
-
-static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
-					    struct sock *sk, u64 *ackno,
-					    const unsigned char len,
-					    const unsigned char *vector)
-{
-	unsigned char i;
-	struct dccp_ackvec_record *avr;
-
-	/* Check if we actually sent an ACK vector */
-	if (list_empty(&av->av_records))
-		return;
-
-	i = len;
-	/*
-	 * XXX
-	 * I think it might be more efficient to work backwards. See comment on
-	 * rcv_ackno. -sorbo.
-	 */
-	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
-	while (i--) {
-		const u8 rl = dccp_ackvec_runlen(vector);
-		u64 ackno_end_rl;
-
-		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
-
-		/*
-		 * If our AVR sequence number is greater than the ack, go
-		 * forward in the AVR list until it is not so.
-		 */
-		list_for_each_entry_from(avr, &av->av_records, avr_node) {
-			if (!after48(avr->avr_ack_seqno, *ackno))
-				goto found;
-		}
-		/* End of the av_records list, not found, exit */
-		break;
-found:
-		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
-			if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
-				dccp_pr_debug("%s ACK vector 0, len=%d, "
-					      "ack_seqno=%llu, ack_ackno=%llu, "
-					      "ACKED!\n",
-					      dccp_role(sk), len,
-					      (unsigned long long)
-					      avr->avr_ack_seqno,
-					      (unsigned long long)
-					      avr->avr_ack_ackno);
-				dccp_ackvec_throw_record(av, avr);
-				break;
-			}
-			/*
-			 * If it wasn't received, continue scanning... we might
-			 * find another one.
-			 */
-		}
-
-		dccp_set_seqno(ackno, ackno_end_rl - 1);
-		++vector;
-	}
-}
-
-int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-		      u64 *ackno, const u8 opt, const u8 *value, const u8 len)
-{
-	if (len > DCCP_SINGLE_OPT_MAXLEN)
-		return -1;
-
-	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
-	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
-					ackno, len, value);
-	return 0;
-}
-
 /**
  * dccp_ackvec_clear_state  -  Perform house-keeping / garbage-collection
  * This routine is called when the peer acknowledges the receipt of Ack Vectors
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 36ca2e9e5c8..db447503b63 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -64,7 +64,6 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
  *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
  * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
  * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
- * @av_veclen:	   length of the live portion of @av_buf
  */
 struct dccp_ackvec {
 	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
@@ -75,7 +74,6 @@ struct dccp_ackvec {
 	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
 	u8			av_overflow:1;
 	struct list_head	av_records;
-	u16			av_vec_len;
 };
 
 /** struct dccp_ackvec_record - Records information about sent Ack Vectors
@@ -101,25 +99,12 @@ struct dccp_ackvec_record {
 	u8		 avr_ack_nonce:1;
 };
 
-struct sock;
-struct sk_buff;
-
-#ifdef CONFIG_IP_DCCP_ACKVEC
-extern int dccp_ackvec_init(void);
+extern int  dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
 extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
 extern void dccp_ackvec_free(struct dccp_ackvec *av);
 
-extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-			   const u64 ackno, const u8 state);
-
-extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
-					struct sock *sk, const u64 ackno);
-extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-			     u64 *ackno, const u8 opt,
-			     const u8 *value, const u8 len);
-
 extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
@@ -129,66 +114,4 @@ static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
 	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
-#else /* CONFIG_IP_DCCP_ACKVEC */
-static inline int dccp_ackvec_init(void)
-{
-	return 0;
-}
-
-static inline void dccp_ackvec_exit(void)
-{
-}
-
-static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	return NULL;
-}
-
-static inline void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-}
-
-static inline void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
-{
-
-}
-
-static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-				  const u64 ackno, const u8 state)
-{
-	return -1;
-}
-
-static inline void dccp_ackvec_clear_state(struct dccp_ackvec *av,
-					    const u64 ackno)
-{
-}
-
-static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
-					       struct sock *sk, const u64 ackno)
-{
-}
-
-static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-				    const u64 *ackno, const u8 opt,
-				    const u8 *value, const u8 len)
-{
-	return -1;
-}
-
-static inline int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 nonce)
-{
-	return -1;
-}
-
-static inline u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
-{
-	return 0;
-}
-
-static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
-{
-	return true;
-}
-#endif /* CONFIG_IP_DCCP_ACKVEC */
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 12275943eab..44c7e90248f 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -4,7 +4,6 @@ menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
 config IP_DCCP_CCID2
 	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
 	def_tristate IP_DCCP
-	select IP_DCCP_ACKVEC
 	---help---
 	  CCID 2, TCP-like Congestion Control, denotes Additive Increase,
 	  Multiplicative Decrease (AIMD) congestion control with behavior
-- 
cgit v1.2.3-70-g09d2


From c8bf462bc567c3dcb083ff95cc13060dd06f138c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Separate option parsing from CCID processing

This patch replaces an almost identical replication of code: large parts
of dccp_parse_options() re-appeared as ccid2_ackvector() in ccid2.c.

Apart from the duplication, this caused two more problems:
 1. CCIDs should not need to be concerned with parsing header options;
 2. one can not assume that Ack Vectors appear as a contiguous area within an
    skb, it is legal to insert other options and/or padding in between. The
    current code would throw an error and stop reading in such a case.

The patch provides a new data structure and associated list housekeeping.

Only small changes were necessary to integrate with CCID-2: data structure
initialisation, adapt list traversal routine, and add call to the provided
cleanup routine.

The latter also lead to fixing the following BUG: CCID-2 so far ignored
Ack Vectors on all packets other than Ack/DataAck, which is incorrect,
since Ack Vectors can be present on any packet that has an Ack field.

Details:
--------
 * received Ack Vectors are parsed by dccp_parse_options() alone, which passes
   the result on to the CCID-specific routine ccid_hc_tx_parse_options();
 * CCIDs interested in using/decoding Ack Vector information will add code
   to fetch parsed Ack Vectors via this interface;
 * a data structure, `struct dccp_ackvec_parsed' is provided as interface;
 * this structure arranges Ack Vectors of the same skb into a FIFO order;
 * a doubly-linked list is used to keep the required FIFO code small.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c      |  28 ++++++++++
 net/dccp/ackvec.h      |  19 +++++++
 net/dccp/ccids/ccid2.c | 135 +++++++++++++++----------------------------------
 net/dccp/ccids/ccid2.h |   2 +
 net/dccp/options.c     |  17 ++++---
 5 files changed, 101 insertions(+), 100 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 66b8a51300c..41819848bdd 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -343,6 +343,34 @@ free_records:
 	}
 }
 
+/*
+ *	Routines to keep track of Ack Vectors received in an skb
+ */
+int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
+{
+	struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
+
+	if (new == NULL)
+		return -ENOBUFS;
+	new->vec   = vec;
+	new->len   = len;
+	new->nonce = nonce;
+
+	list_add_tail(&new->node, head);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
+
+void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
+{
+	struct dccp_ackvec_parsed *cur, *next;
+
+	list_for_each_entry_safe(cur, next, parsed_chunks, node)
+		kfree(cur);
+	INIT_LIST_HEAD(parsed_chunks);
+}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
+
 int __init dccp_ackvec_init(void)
 {
 	dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index db447503b63..6cdca79a99f 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -114,4 +114,23 @@ static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
 	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
+
+/**
+ * struct dccp_ackvec_parsed  -  Record offsets of Ack Vectors in skb
+ * @vec:	start of vector (offset into skb)
+ * @len:	length of @vec
+ * @nonce:	whether @vec had an ECN nonce of 0 or 1
+ * @node:	FIFO - arranged in descending order of ack_ackno
+ * This structure is used by CCIDs to access Ack Vectors in a received skb.
+ */
+struct dccp_ackvec_parsed {
+	u8		 *vec,
+			 len,
+			 nonce:1;
+	struct list_head node;
+};
+
+extern int dccp_ackvec_parsed_add(struct list_head *head,
+				  u8 *vec, u8 len, u8 nonce);
+extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 813d5cd40e8..bbf16b35734 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -317,68 +317,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 #endif
 }
 
-/* XXX Lame code duplication!
- * returns -1 if none was found.
- * else returns the next offset to use in the function call.
- */
-static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
-			   unsigned char **vec, unsigned char *veclen)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
-	unsigned char *opt_ptr;
-	const unsigned char *opt_end = (unsigned char *)dh +
-					(dh->dccph_doff * 4);
-	unsigned char opt, len;
-	unsigned char *value;
-
-	BUG_ON(offset < 0);
-	options += offset;
-	opt_ptr = options;
-	if (opt_ptr >= opt_end)
-		return -1;
-
-	while (opt_ptr != opt_end) {
-		opt   = *opt_ptr++;
-		len   = 0;
-		value = NULL;
-
-		/* Check if this isn't a single byte option */
-		if (opt > DCCPO_MAX_RESERVED) {
-			if (opt_ptr == opt_end)
-				goto out_invalid_option;
-
-			len = *opt_ptr++;
-			if (len < 3)
-				goto out_invalid_option;
-			/*
-			 * Remove the type and len fields, leaving
-			 * just the value size
-			 */
-			len     -= 2;
-			value   = opt_ptr;
-			opt_ptr += len;
-
-			if (opt_ptr > opt_end)
-				goto out_invalid_option;
-		}
-
-		switch (opt) {
-		case DCCPO_ACK_VECTOR_0:
-		case DCCPO_ACK_VECTOR_1:
-			*vec	= value;
-			*veclen = len;
-			return offset + (opt_ptr - options);
-		}
-	}
-
-	return -1;
-
-out_invalid_option:
-	DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
-	return -1;
-}
-
 static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
@@ -499,15 +437,27 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 		ccid2_change_l_ack_ratio(sk, hctx->cwnd);
 }
 
+static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
+				     u8 option, u8 *optval, u8 optlen)
+{
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+	switch (option) {
+	case DCCPO_ACK_VECTOR_0:
+	case DCCPO_ACK_VECTOR_1:
+		return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen,
+					      option - DCCPO_ACK_VECTOR_0);
+	}
+	return 0;
+}
+
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	struct dccp_ackvec_parsed *avp;
 	u64 ackno, seqno;
 	struct ccid2_seq *seqp;
-	unsigned char *vector;
-	unsigned char veclen;
-	int offset = 0;
 	int done = 0;
 	unsigned int maxincr = 0;
 
@@ -542,17 +492,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* check forward path congestion */
-	/* still didn't send out new data packets */
-	if (hctx->seqh == hctx->seqt)
+	if (dccp_packet_without_ack(skb))
 		return;
 
-	switch (DCCP_SKB_CB(skb)->dccpd_type) {
-	case DCCP_PKT_ACK:
-	case DCCP_PKT_DATAACK:
-		break;
-	default:
-		return;
-	}
+	/* still didn't send out new data packets */
+	if (hctx->seqh == hctx->seqt)
+		goto done;
 
 	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 	if (after48(ackno, hctx->high_ack))
@@ -576,15 +521,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 
 	/* go through all ack vectors */
-	while ((offset = ccid2_ackvector(sk, skb, offset,
-					 &vector, &veclen)) != -1) {
+	list_for_each_entry(avp, &hctx->av_chunks, node) {
 		/* go through this ack vector */
-		while (veclen--) {
-			u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
+		for (; avp->len--; avp->vec++) {
+			u64 ackno_end_rl = SUB48(ackno,
+						 dccp_ackvec_runlen(avp->vec));
 
-			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
+			ccid2_pr_debug("ackvec %llu |%u,%u|\n",
 				       (unsigned long long)ackno,
-				       (unsigned long long)ackno_end_rl);
+				       dccp_ackvec_state(avp->vec) >> 6,
+				       dccp_ackvec_runlen(avp->vec));
 			/* if the seqno we are analyzing is larger than the
 			 * current ackno, then move towards the tail of our
 			 * seqnos.
@@ -603,7 +549,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = dccp_ackvec_state(vector);
+				const u8 state = dccp_ackvec_state(avp->vec);
 
 				/* new packet received or marked */
 				if (state != DCCPAV_NOT_RECEIVED &&
@@ -630,7 +576,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				break;
 
 			ackno = SUB48(ackno_end_rl, 1);
-			vector++;
 		}
 		if (done)
 			break;
@@ -694,6 +639,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	ccid2_hc_tx_check_sanity(hctx);
+done:
+	dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
 }
 
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -727,6 +674,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hctx->rpdupack  = -1;
 	hctx->last_cong = jiffies;
 	setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
+	INIT_LIST_HEAD(&hctx->av_chunks);
 
 	ccid2_hc_tx_check_sanity(hctx);
 	return 0;
@@ -762,17 +710,18 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct ccid_operations ccid2 = {
-	.ccid_id		= DCCPC_CCID2,
-	.ccid_name		= "TCP-like",
-	.ccid_owner		= THIS_MODULE,
-	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
-	.ccid_hc_tx_init	= ccid2_hc_tx_init,
-	.ccid_hc_tx_exit	= ccid2_hc_tx_exit,
-	.ccid_hc_tx_send_packet	= ccid2_hc_tx_send_packet,
-	.ccid_hc_tx_packet_sent	= ccid2_hc_tx_packet_sent,
-	.ccid_hc_tx_packet_recv	= ccid2_hc_tx_packet_recv,
-	.ccid_hc_rx_obj_size	= sizeof(struct ccid2_hc_rx_sock),
-	.ccid_hc_rx_packet_recv	= ccid2_hc_rx_packet_recv,
+	.ccid_id		  = DCCPC_CCID2,
+	.ccid_name		  = "TCP-like",
+	.ccid_owner		  = THIS_MODULE,
+	.ccid_hc_tx_obj_size	  = sizeof(struct ccid2_hc_tx_sock),
+	.ccid_hc_tx_init	  = ccid2_hc_tx_init,
+	.ccid_hc_tx_exit	  = ccid2_hc_tx_exit,
+	.ccid_hc_tx_send_packet	  = ccid2_hc_tx_send_packet,
+	.ccid_hc_tx_packet_sent	  = ccid2_hc_tx_packet_sent,
+	.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
+	.ccid_hc_tx_packet_recv	  = ccid2_hc_tx_packet_recv,
+	.ccid_hc_rx_obj_size	  = sizeof(struct ccid2_hc_rx_sock),
+	.ccid_hc_rx_packet_recv	  = ccid2_hc_rx_packet_recv,
 };
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d7815804bce..907deed255b 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -47,6 +47,7 @@ struct ccid2_seq {
  * @lastrtt: time RTT was last measured
  * @rpseq: last consecutive seqno
  * @rpdupack: dupacks since rpseq
+ * @av_chunks: list of Ack Vectors received on current skb
  */
 struct ccid2_hc_tx_sock {
 	u32			cwnd;
@@ -66,6 +67,7 @@ struct ccid2_hc_tx_sock {
 	int			rpdupack;
 	unsigned long		last_cong;
 	u64			high_ack;
+	struct list_head	av_chunks;
 };
 
 struct ccid2_hc_rx_sock {
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 791e07853a7..e5a32979d7d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -128,13 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			if (rc)
 				goto out_featneg_failed;
 			break;
-		case DCCPO_ACK_VECTOR_0:
-		case DCCPO_ACK_VECTOR_1:
-			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
-				break;
-			dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk),
-				      len);
-			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
 				goto out_invalid_option;
@@ -224,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
 			break;
+		case DCCPO_ACK_VECTOR_0:
+		case DCCPO_ACK_VECTOR_1:
+			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
+				break;
+			/*
+			 * Ack vectors are processed by the TX CCID if it is
+			 * interested. The RX CCID need not parse Ack Vectors,
+			 * since it is only interested in clearing old state.
+			 * Fall through.
+			 */
 		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
 						     pkt_type, opt, value, len))
-- 
cgit v1.2.3-70-g09d2


From f4a66ca4d2ff093c0f9111b449a248ffb8209b4d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Return-value convention of hc_tx_send_packet()

This patch reorganises the return value convention of the CCID TX sending
function, to permit more flexible schemes, as required by subsequent patches.

Currently the convention is
 * values < 0     mean error,
 * a value == 0   means "send now", and
 * a value x > 0  means "send in x milliseconds".

The patch provides symbolic constants and a function to interpret return values.
In addition, it caps the maximum positive return value to 0xFFFF milliseconds,
corresponding to 65.535 seconds.

This is possible since in CCID-3 the maximum inter-packet gap is t_mbi = 64 sec.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccid.h        | 34 +++++++++++++++++++++++++++++++---
 net/dccp/ccids/ccid3.c | 12 ++++++------
 2 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 430a3c24f97..d27054ba215 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -129,13 +129,41 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
 extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
 extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
 
+/*
+ * Congestion control of queued data packets via CCID decision.
+ *
+ * The TX CCID performs its congestion-control by indicating whether and when a
+ * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
+ * The following modes are supported via the symbolic constants below:
+ * - timer-based pacing    (CCID returns a delay value in milliseconds);
+ * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
+ */
+
+enum ccid_dequeueing_decision {
+	CCID_PACKET_SEND_AT_ONCE =	 0x00000,  /* "green light": no delay */
+	CCID_PACKET_DELAY_MAX =		 0x0FFFF,  /* maximum delay in msecs  */
+	CCID_PACKET_DELAY =		 0x10000,  /* CCID msec-delay mode */
+	CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000,  /* CCID autonomous mode */
+	CCID_PACKET_ERR =		 0xF0000,  /* error condition */
+};
+
+static inline int ccid_packet_dequeue_eval(const int return_code)
+{
+	if (return_code < 0)
+		return CCID_PACKET_ERR;
+	if (return_code == 0)
+		return CCID_PACKET_SEND_AT_ONCE;
+	if (return_code <= CCID_PACKET_DELAY_MAX)
+		return CCID_PACKET_DELAY;
+	return return_code;
+}
+
 static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
 					 struct sk_buff *skb)
 {
-	int rc = 0;
 	if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
-		rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
-	return rc;
+		return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
+	return CCID_PACKET_SEND_AT_ONCE;
 }
 
 static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0751a8fa936..0d406f82e43 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -269,11 +269,11 @@ out:
 	sock_put(sk);
 }
 
-/*
- * returns
- *   > 0: delay (in msecs) that should pass before actually sending
- *   = 0: can send immediately
- *   < 0: error condition; do not send packet
+/**
+ * ccid3_hc_tx_send_packet  -  Delay-based dequeueing of TX packets
+ * @skb: next packet candidate to send on @sk
+ * This function uses the convention of ccid_packet_dequeue_eval() and
+ * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
  */
 static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
@@ -349,7 +349,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 
 	/* set the nominal send time for the next following packet */
 	hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi);
-	return 0;
+	return CCID_PACKET_SEND_AT_ONCE;
 }
 
 static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
-- 
cgit v1.2.3-70-g09d2


From e7937772d7a2b0127cc4cbc67bc594e139fdaf63 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Extend CCID packet dequeueing interface

This extends the packet dequeuing interface of dccp_write_xmit() to allow
 1. CCIDs to take care of timing when the next packet may be sent;
 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds).

The main purpose is to take CCID2 out of its polling mode (when it is network-
limited, it tries every millisecond to send, without interruption).
The interface can also be used to support other CCIDs.

The mode of operation for (2) is as follows:
 * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(),
 * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full),
 * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER',
 * dccp_write_xmit() returns without further action;
 * after some time the wait-condition for CCID becomes true,
 * that CCID schedules the tasklet,
 * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(),
 * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now",
 * packet is sent, and possibly more (since dccp_write_xmit() loops).

Code reuse: the taskled function calls dccp_write_xmit(), the timer function
            reduces to a wrapper around the same code.

If the tasklet finds that the socket is locked, it re-schedules the tasklet
function (not the tasklet) after one jiffy.

Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a
local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |   4 +-
 net/dccp/output.c    | 129 ++++++++++++++++++++++++++++++++-------------------
 net/dccp/timer.c     |  25 +++++-----
 3 files changed, 98 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 83197b601a4..eed52bcd35d 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -463,7 +463,8 @@ struct dccp_ackvec;
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
  * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmit_timer - timer for when CCID is not ready to send
+ * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
+ * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
 struct dccp_sock {
@@ -504,6 +505,7 @@ struct dccp_sock {
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
 	__u8				dccps_sync_scheduled:1;
+	struct tasklet_struct		dccps_xmitlet;
 	struct timer_list		dccps_xmit_timer;
 };
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index bfda071559f..9afd58e39e2 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -251,65 +251,98 @@ do_interrupted:
 	goto out;
 }
 
+/**
+ * dccp_xmit_packet  -  Send data packet under control of CCID
+ * Transmits next-queued payload and informs CCID to account for the packet.
+ */
+static void dccp_xmit_packet(struct sock *sk)
+{
+	int err, len;
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+
+	if (unlikely(skb == NULL))
+		return;
+	len = skb->len;
+
+	if (sk->sk_state == DCCP_PARTOPEN) {
+		const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+		/*
+		 * See 8.1.5 - Handshake Completion.
+		 *
+		 * For robustness we resend Confirm options until the client has
+		 * entered OPEN. During the initial feature negotiation, the MPS
+		 * is smaller than usual, reduced by the Change/Confirm options.
+		 */
+		if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+			DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+			dccp_send_ack(sk);
+			dccp_feat_list_purge(&dp->dccps_featneg);
+		}
+
+		inet_csk_schedule_ack(sk);
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+					      inet_csk(sk)->icsk_rto,
+					      DCCP_RTO_MAX);
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+	} else if (dccp_ack_pending(sk)) {
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+	} else {
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
+	}
+
+	err = dccp_transmit_skb(sk, skb);
+	if (err)
+		dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+	/*
+	 * Register this one as sent even if an error occurred. To the remote
+	 * end a local packet drop is indistinguishable from network loss, i.e.
+	 * any local drop will eventually be reported via receiver feedback.
+	 */
+	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+
+	/*
+	 * If the CCID needs to transfer additional header options out-of-band
+	 * (e.g. Ack Vectors or feature-negotiation options), it activates this
+	 * flag to schedule a Sync. The Sync will automatically incorporate all
+	 * currently pending header options, thus clearing the backlog.
+	 */
+	if (dp->dccps_sync_scheduled)
+		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+}
+
 void dccp_write_xmit(struct sock *sk, int block)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 
 	while ((skb = skb_peek(&sk->sk_write_queue))) {
-		int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
-		if (err > 0) {
+		switch (ccid_packet_dequeue_eval(rc)) {
+		case CCID_PACKET_WILL_DEQUEUE_LATER:
+			return;
+		case CCID_PACKET_DELAY:
 			if (!block) {
 				sk_reset_timer(sk, &dp->dccps_xmit_timer,
-						msecs_to_jiffies(err)+jiffies);
+						msecs_to_jiffies(rc)+jiffies);
+				return;
+			}
+			rc = dccp_wait_for_ccid(sk, skb, rc);
+			if (rc && rc != -EINTR) {
+				DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
+				skb_dequeue(&sk->sk_write_queue);
+				kfree_skb(skb);
 				break;
-			} else
-				err = dccp_wait_for_ccid(sk, skb, err);
-			if (err && err != -EINTR)
-				DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
-		}
-
-		skb_dequeue(&sk->sk_write_queue);
-		if (err == 0) {
-			struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-			const int len = skb->len;
-
-			if (sk->sk_state == DCCP_PARTOPEN) {
-				const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
-				/*
-				 * See 8.1.5 - Handshake Completion.
-				 *
-				 * For robustness we resend Confirm options until the client has
-				 * entered OPEN. During the initial feature negotiation, the MPS
-				 * is smaller than usual, reduced by the Change/Confirm options.
-				 */
-				if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
-					DCCP_WARN("Payload too large (%d) for featneg.\n", len);
-					dccp_send_ack(sk);
-					dccp_feat_list_purge(&dp->dccps_featneg);
-				}
-
-				inet_csk_schedule_ack(sk);
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  inet_csk(sk)->icsk_rto,
-						  DCCP_RTO_MAX);
-				dcb->dccpd_type = DCCP_PKT_DATAACK;
-			} else if (dccp_ack_pending(sk))
-				dcb->dccpd_type = DCCP_PKT_DATAACK;
-			else
-				dcb->dccpd_type = DCCP_PKT_DATA;
-
-			err = dccp_transmit_skb(sk, skb);
-			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
-			if (err)
-				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
-					 err);
-			if (dp->dccps_sync_scheduled)
-				dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
-		} else {
-			dccp_pr_debug("packet discarded due to err=%d\n", err);
+			}
+			/* fall through */
+		case CCID_PACKET_SEND_AT_ONCE:
+			dccp_xmit_packet(sk);
+			break;
+		case CCID_PACKET_ERR:
+			skb_dequeue(&sk->sk_write_queue);
 			kfree_skb(skb);
+			dccp_pr_debug("packet discarded due to err=%d\n", rc);
 		}
 	}
 }
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 162d1e683c3..9369aca4b0e 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
 	sock_put(sk);
 }
 
-/* Transmit-delay timer: used by the CCIDs to delay actual send time */
-static void dccp_write_xmit_timer(unsigned long data)
+/**
+ * dccp_write_xmitlet  -  Workhorse for CCID packet dequeueing interface
+ * See the comments above %ccid_dequeueing_decision for supported modes.
+ */
+static void dccp_write_xmitlet(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
-	struct dccp_sock *dp = dccp_sk(sk);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
-		sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+		sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
 	else
 		dccp_write_xmit(sk, 0);
 	bh_unlock_sock(sk);
-	sock_put(sk);
 }
 
-static void dccp_init_write_xmit_timer(struct sock *sk)
+static void dccp_write_xmit_timer(unsigned long data)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-
-	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
-			(unsigned long)sk);
+	dccp_write_xmitlet(data);
+	sock_put((struct sock *)data);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
 {
-	dccp_init_write_xmit_timer(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
+	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+							     (unsigned long)sk);
 	inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
 				  &dccp_keepalive_timer);
 }
-- 
cgit v1.2.3-70-g09d2


From 146993cf5174472644ed11bd5fb539f0af8bfa49 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Refine the wait-for-ccid mechanism

This extends the existing wait-for-ccid routine so that it may be used with
different types of CCID. It further addresses the problems listed below.

The code looks if the write queue is non-empty and grants the TX CCID up to
`timeout' jiffies to drain the queue. It will instead purge that queue if
 * the delay suggested by the CCID exceeds the time budget;
 * a socket error occurred while waiting for the CCID;
 * there is a signal pending (eg. annoyed user pressed Control-C);
 * the CCID does not support delays (we don't know how long it will take).


                 D e t a i l s  [can be removed]
                 -------------------------------
DCCP's sending mechanism functions a bit like non-blocking I/O: dccp_sendmsg()
will enqueue up to net.dccp.default.tx_qlen packets (default=5), without waiting
for them to be released to the network.

Rate-based CCIDs, such as CCID3/4, can impose sending delays of up to maximally
64 seconds (t_mbi in RFC 3448). Hence the write queue may still contain packets
when the application closes. Since the write queue is congestion-controlled by
the CCID, draining the queue is also under control of the CCID.

There are several problems that needed to be addressed:
 1) The queue-drain mechanism only works with rate-based CCIDs. If CCID2 for
    example has a full TX queue and becomes network-limited just as the
    application wants to close, then waiting for CCID2 to become unblocked could
    lead to an indefinite  delay (i.e., application "hangs").
 2) Since each TX CCID in turn uses a feedback mechanism, there may be changes
    in its sending policy while the queue is being drained. This can lead to
    further delays during which the application will not be able to terminate.
 3) The minimum wait time for CCID3/4 can be expected to be the queue length
    times the current inter-packet delay. For example if tx_qlen=100 and a delay
    of 15 ms is used for each packet, then the application would have to wait
    for a minimum of 1.5 seconds before being allowed to exit.
 4) There is no way for the user/application to control this behaviour. It would
    be good to use the timeout argument of dccp_close() as an upper bound. Then
    the maximum time that an application is willing to wait for its CCIDs to can
    be set via the SO_LINGER option.

These problems are addressed by giving the CCID a grace period of up to the
`timeout' value.

The wait-for-ccid function is, as before, used when the application
 (a) has read all the data in its receive buffer and
 (b) if SO_LINGER was set with a non-zero linger time, or
 (c) the socket is either in the OPEN (active close) or in the PASSIVE_CLOSEREQ
     state (client application closes after receiving CloseReq).

In addition, there is a catch-all case by calling __skb_queue_purge() after
waiting for the CCID. This is necessary since the write queue may still have
data when
 (a) the host has been passively-closed,
 (b) abnormal termination (unread data, zero linger time),
 (c) wait-for-ccid could not finish within the given time limit.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h   |   3 +-
 net/dccp/output.c | 115 ++++++++++++++++++++++++++++++------------------------
 net/dccp/proto.c  |  15 ++++++-
 net/dccp/timer.c  |   2 +-
 4 files changed, 82 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1e65378eea3..74c90cd2767 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -234,8 +234,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
-extern void dccp_write_xmit(struct sock *sk, int block);
+extern void dccp_write_xmit(struct sock *sk);
 extern void dccp_write_space(struct sock *sk);
+extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
 
 extern void dccp_init_xmit_timers(struct sock *sk);
 static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 9afd58e39e2..39056dc6135 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -206,49 +206,29 @@ void dccp_write_space(struct sock *sk)
 }
 
 /**
- * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * dccp_wait_for_ccid  -  Await CCID send permission
  * @sk:    socket to wait for
- * @skb:   current skb to pass on for waiting
- * @delay: sleep timeout in milliseconds (> 0)
- * This function is called by default when the socket is closed, and
- * when a non-zero linger time is set on the socket. For consistency
+ * @delay: timeout in jiffies
+ * This is used by CCIDs which need to delay the send time in process context.
  */
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
+static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
 	DEFINE_WAIT(wait);
-	unsigned long jiffdelay;
-	int rc;
-
-	do {
-		dccp_pr_debug("delayed send by %d msec\n", delay);
-		jiffdelay = msecs_to_jiffies(delay);
-
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	long remaining;
 
-		sk->sk_write_pending++;
-		release_sock(sk);
-		schedule_timeout(jiffdelay);
-		lock_sock(sk);
-		sk->sk_write_pending--;
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	sk->sk_write_pending++;
+	release_sock(sk);
 
-		if (sk->sk_err)
-			goto do_error;
-		if (signal_pending(current))
-			goto do_interrupted;
+	remaining = schedule_timeout(delay);
 
-		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-	} while ((delay = rc) > 0);
-out:
+	lock_sock(sk);
+	sk->sk_write_pending--;
 	finish_wait(sk->sk_sleep, &wait);
-	return rc;
-
-do_error:
-	rc = -EPIPE;
-	goto out;
-do_interrupted:
-	rc = -EINTR;
-	goto out;
+
+	if (signal_pending(current) || sk->sk_err)
+		return -1;
+	return remaining;
 }
 
 /**
@@ -311,7 +291,53 @@ static void dccp_xmit_packet(struct sock *sk)
 		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
 }
 
-void dccp_write_xmit(struct sock *sk, int block)
+/**
+ * dccp_flush_write_queue  -  Drain queue at end of connection
+ * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
+ * happen that the TX queue is not empty at the end of a connection. We give the
+ * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
+ * returns with a non-empty write queue, it will be purged later.
+ */
+void dccp_flush_write_queue(struct sock *sk, long *time_budget)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct sk_buff *skb;
+	long delay, rc;
+
+	while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
+		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+
+		switch (ccid_packet_dequeue_eval(rc)) {
+		case CCID_PACKET_WILL_DEQUEUE_LATER:
+			/*
+			 * If the CCID determines when to send, the next sending
+			 * time is unknown or the CCID may not even send again
+			 * (e.g. remote host crashes or lost Ack packets).
+			 */
+			DCCP_WARN("CCID did not manage to send all packets\n");
+			return;
+		case CCID_PACKET_DELAY:
+			delay = msecs_to_jiffies(rc);
+			if (delay > *time_budget)
+				return;
+			rc = dccp_wait_for_ccid(sk, delay);
+			if (rc < 0)
+				return;
+			*time_budget -= (delay - rc);
+			/* check again if we can send now */
+			break;
+		case CCID_PACKET_SEND_AT_ONCE:
+			dccp_xmit_packet(sk);
+			break;
+		case CCID_PACKET_ERR:
+			skb_dequeue(&sk->sk_write_queue);
+			kfree_skb(skb);
+			dccp_pr_debug("packet discarded due to err=%ld\n", rc);
+		}
+	}
+}
+
+void dccp_write_xmit(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
@@ -323,19 +349,9 @@ void dccp_write_xmit(struct sock *sk, int block)
 		case CCID_PACKET_WILL_DEQUEUE_LATER:
 			return;
 		case CCID_PACKET_DELAY:
-			if (!block) {
-				sk_reset_timer(sk, &dp->dccps_xmit_timer,
-						msecs_to_jiffies(rc)+jiffies);
-				return;
-			}
-			rc = dccp_wait_for_ccid(sk, skb, rc);
-			if (rc && rc != -EINTR) {
-				DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
-				skb_dequeue(&sk->sk_write_queue);
-				kfree_skb(skb);
-				break;
-			}
-			/* fall through */
+			sk_reset_timer(sk, &dp->dccps_xmit_timer,
+				       jiffies + msecs_to_jiffies(rc));
+			return;
 		case CCID_PACKET_SEND_AT_ONCE:
 			dccp_xmit_packet(sk);
 			break;
@@ -660,7 +676,6 @@ void dccp_send_close(struct sock *sk, const int active)
 		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
 
 	if (active) {
-		dccp_write_xmit(sk, 1);
 		dccp_skb_entail(sk, skb);
 		dccp_transmit_skb(sk, skb_clone(skb, prio));
 		/*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 11905e0cf8f..8c125ffab1c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -735,7 +735,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		goto out_discard;
 
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	dccp_write_xmit(sk,0);
+	dccp_write_xmit(sk);
 out_release:
 	release_sock(sk);
 	return rc ? : len;
@@ -958,9 +958,22 @@ void dccp_close(struct sock *sk, long timeout)
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
 	} else if (sk->sk_state != DCCP_CLOSED) {
+		/*
+		 * Normal connection termination. May need to wait if there are
+		 * still packets in the TX queue that are delayed by the CCID.
+		 */
+		dccp_flush_write_queue(sk, &timeout);
 		dccp_terminate_connection(sk);
 	}
 
+	/*
+	 * Flush write queue. This may be necessary in several cases:
+	 * - we have been closed by the peer but still have application data;
+	 * - abortive termination (unread data or zero linger time),
+	 * - normal termination but queue could not be flushed within time limit
+	 */
+	__skb_queue_purge(&sk->sk_write_queue);
+
 	sk_stream_wait_close(sk, timeout);
 
 adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 9369aca4b0e..e02d5a94f4c 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -249,7 +249,7 @@ static void dccp_write_xmitlet(unsigned long data)
 	if (sock_owned_by_user(sk))
 		sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
 	else
-		dccp_write_xmit(sk, 0);
+		dccp_write_xmit(sk);
 	bh_unlock_sock(sk);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 83337dae6ca94d801b6700600244865cd694205b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Stop polling

This updates CCID2 to use the CCID dequeuing mechanism, converting from
previous constant-polling to a now event-driven mechanism.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c | 21 +++++++++++++--------
 net/dccp/ccids/ccid2.h |  5 +++++
 2 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index bbf16b35734..c7d83e3c164 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -123,12 +123,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
 
 static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
-	if (hctx->pipe < hctx->cwnd)
-		return 0;
-
-	return 1; /* XXX CCID should dequeue when ready instead of polling */
+	if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
+		return CCID_PACKET_WILL_DEQUEUE_LATER;
+	return CCID_PACKET_SEND_AT_ONCE;
 }
 
 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -168,6 +165,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
 	long s;
 
 	bh_lock_sock(sk);
@@ -187,8 +185,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	if (s > 60)
 		hctx->rto = 60 * HZ;
 
-	ccid2_start_rto_timer(sk);
-
 	/* adjust pipe, cwnd etc */
 	hctx->ssthresh = hctx->cwnd / 2;
 	if (hctx->ssthresh < 2)
@@ -205,6 +201,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->rpdupack = -1;
 	ccid2_change_l_ack_ratio(sk, 1);
 	ccid2_hc_tx_check_sanity(hctx);
+
+	/* if we were blocked before, we may now send cwnd=1 packet */
+	if (sender_was_blocked)
+		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+	ccid2_start_rto_timer(sk);
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -455,6 +456,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
 	struct dccp_ackvec_parsed *avp;
 	u64 ackno, seqno;
 	struct ccid2_seq *seqp;
@@ -640,6 +642,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	ccid2_hc_tx_check_sanity(hctx);
 done:
+	/* check if incoming Acks allow pending packets to be sent */
+	if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
+		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
 	dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
 }
 
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 907deed255b..d5900dd5d4f 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -70,6 +70,11 @@ struct ccid2_hc_tx_sock {
 	struct list_head	av_chunks;
 };
 
+static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx)
+{
+	return (hctx->pipe >= hctx->cwnd);
+}
+
 struct ccid2_hc_rx_sock {
 	int			data;
 };
-- 
cgit v1.2.3-70-g09d2


From c6f0f2e71f3088a0f05502d6adb0f667b84028c3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Remove redundant sanity tests

This removes the ccid2_hc_tx_check_sanity function: it is redundant.

Details:
========
The tx_check_sanity function performs three tests:
 1) it checks that the circular TX list is sorted
    - in ascending order of sequence number (ccid2s_seq)
    - and time (ccid2s_sent),
    - in the direction from `tail' (hctx_seqt) to `head' (hctx_seqh);
 2) it ensures that the entire list has the length seqbufc * CCID2_SEQBUF_LEN;
 3) it ensures that pipe equals the number of packets that were not
    marked `acked' (ccid2s_acked) between `tail' and `head'.

The following argues that each of these tests is redundant, this can be verified
by going through the code.

(1) is not necessary, since both time and GSS increase from one packet to the
next, so that subsequent insertions in tx_packet_sent (which advance the `head'
pointer) will be in ascending order of time and sequence number.

In (2), the length of the list is always equal to seqbufc times CCID2_SEQBUF_LEN
(set to 1024) unless allocation caused an earlier failure, because:
 * at initialisation (tx_init), there is one chunk of size 1024 and seqbufc=1;
 * subsequent calls to tx_alloc_seq take place whenever head->next == tail in
   tx_packet_sent; then a new chunk of size 1024 is inserted between head and
   tail, and seqbufc is incremented by one.

To show that (3) is redundant requires looking at two cases.

The `pipe' variable of the TX socket is incremented only in tx_packet_sent, and
decremented in tx_packet_recv.  When head == tail (TX history empty) then pipe
should be 0, which is the case directly after initialisation and after a
retransmission timeout has occurred (ccid2_hc_tx_rto_expire).

The first case involves parsing Ack Vectors for packets recorded in the live
portion of the buffer, between tail and head. For each packet marked by the
receiver as received (state 0) or ECN-marked (state 1), pipe is decremented by
one, so for all such packets the BUG_ON in tx_check_sanity will not trigger.

The second case is the loss detection in the second half of tx_packet_recv,
below the comment "Check for NUMDUPACK".

The first while-loop here ensures that the sequence number of `seqp' is either
above or equal to `high_ack', or otherwise equal to the highest sequence number
sent so far (of the entry head->prev, as head points to the next unsent entry).
The next while-loop ("while (1)") counts the number of acked packets starting
from that position of seqp, going backwards in the direction from head->prev to
tail. If NUMDUPACK=3 such packets were counted within this loop, `seqp' points
to the last acknowledged packet of these, and the "if (done == NUMDUPACK)" block
is entered next.
The while-loop contained within that block in turn traverses the list backwards,
from head to tail; the position of `seqp' is saved in the variable `last_acked'.
For each packet not marked as `acked', a congestion event is triggered within
the loop, and pipe is decremented. The loop terminates when `seqp' has reached
`tail', whereupon tail is set to the position previously stored in `last_acked'.
Thus, between `last_acked' and the previous position of `tail',
 - pipe has been decremented earlier if the packet was marked as state 0 or 1;
 - pipe was decremented if the packet was not marked as acked.
That is, pipe has been decremented by the number of packets between `last_acked'
and the previous position of `tail'. As a consequence, pipe now again reflects
the number of packets which have not (yet) been acked between the new position
of tail (at `last_acked') and head->prev, or 0 if head==tail. The result is that
the BUG_ON condition in check_sanity will also not be triggered, hence the test
(3) is also redundant.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c | 51 --------------------------------------------------
 1 file changed, 51 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c7d83e3c164..3b2548bd73f 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -34,51 +34,8 @@
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 static int ccid2_debug;
 #define ccid2_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid2_debug, format, ##a)
-
-static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
-{
-	int len = 0;
-	int pipe = 0;
-	struct ccid2_seq *seqp = hctx->seqh;
-
-	/* there is data in the chain */
-	if (seqp != hctx->seqt) {
-		seqp = seqp->ccid2s_prev;
-		len++;
-		if (!seqp->ccid2s_acked)
-			pipe++;
-
-		while (seqp != hctx->seqt) {
-			struct ccid2_seq *prev = seqp->ccid2s_prev;
-
-			len++;
-			if (!prev->ccid2s_acked)
-				pipe++;
-
-			/* packets are sent sequentially */
-			BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
-						prev->ccid2s_seq ) >= 0);
-			BUG_ON(time_before(seqp->ccid2s_sent,
-					   prev->ccid2s_sent));
-
-			seqp = prev;
-		}
-	}
-
-	BUG_ON(pipe != hctx->pipe);
-	ccid2_pr_debug("len of chain=%d\n", len);
-
-	do {
-		seqp = seqp->ccid2s_prev;
-		len++;
-	} while (seqp != hctx->seqh);
-
-	ccid2_pr_debug("total len=%d\n", len);
-	BUG_ON(len != hctx->seqbufc * CCID2_SEQBUF_LEN);
-}
 #else
 #define ccid2_pr_debug(format, a...)
-#define ccid2_hc_tx_check_sanity(hctx)
 #endif
 
 static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
@@ -176,8 +133,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 
 	ccid2_pr_debug("RTO_EXPIRE\n");
 
-	ccid2_hc_tx_check_sanity(hctx);
-
 	/* back-off timer */
 	hctx->rto <<= 1;
 
@@ -200,7 +155,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->rpseq    = 0;
 	hctx->rpdupack = -1;
 	ccid2_change_l_ack_ratio(sk, 1);
-	ccid2_hc_tx_check_sanity(hctx);
 
 	/* if we were blocked before, we may now send cwnd=1 packet */
 	if (sender_was_blocked)
@@ -314,7 +268,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 		}
 	} while (0);
 	ccid2_pr_debug("=========\n");
-	ccid2_hc_tx_check_sanity(hctx);
 #endif
 }
 
@@ -463,7 +416,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	int done = 0;
 	unsigned int maxincr = 0;
 
-	ccid2_hc_tx_check_sanity(hctx);
 	/* check reverse path congestion */
 	seqno = DCCP_SKB_CB(skb)->dccpd_seq;
 
@@ -640,7 +592,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hctx->seqt = hctx->seqt->ccid2s_next;
 	}
 
-	ccid2_hc_tx_check_sanity(hctx);
 done:
 	/* check if incoming Acks allow pending packets to be sent */
 	if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
@@ -680,8 +631,6 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hctx->last_cong = jiffies;
 	setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
 	INIT_LIST_HEAD(&hctx->av_chunks);
-
-	ccid2_hc_tx_check_sanity(hctx);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e9803c0104564698d3b8e84ccdb0b8b0e65427e2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Simplify dec_pipe and rearming of RTO timer

This removes the dec_pipe function and improves the way the RTO timer is rearmed
when a new acknowledgment comes in.

Details and justification for removal:
--------------------------------------
 1) The BUG_ON in dec_pipe is never triggered: pipe is only decremented for TX
    history entries between tail and head, for which it had previously been
    incremented in tx_packet_sent; and it is not decremented twice for the same
    entry, since it is
    - either decremented when a corresponding Ack Vector cell in state 0 or 1
      was received (and then ccid2s_acked==1),
    - or it is decremented when ccid2s_acked==0, as part of the loss detection
      in tx_packet_recv (and hence it can not have been decremented earlier).

 2) Restarting the RTO timer happens for every single entry in each Ack Vector
    parsed by tx_packet_recv (according to RFC 4340, 11.4 this can happen up to
    16192 times per Ack Vector).

 3) The RTO timer should not be restarted when all outstanding data has been
    acknowledged. This is currently done similar to (2), in dec_pipe, when
    pipe has reached 0.

The patch onsolidates the code which rearms the RTO timer, combining the
segments from new_ack and dec_pipe. As a result, the code becomes clearer
(compare with tcp_rearm_rto()).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 3b2548bd73f..fa074d44206 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -353,23 +353,6 @@ static inline void ccid2_new_ack(struct sock *sk,
 			       hctx->srtt, hctx->rttvar,
 			       hctx->rto, HZ, r);
 	}
-
-	/* we got a new ack, so re-start RTO timer */
-	ccid2_hc_tx_kill_rto_timer(sk);
-	ccid2_start_rto_timer(sk);
-}
-
-static void ccid2_hc_tx_dec_pipe(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
-	if (hctx->pipe == 0)
-		DCCP_BUG("pipe == 0");
-	else
-		hctx->pipe--;
-
-	if (hctx->pipe == 0)
-		ccid2_hc_tx_kill_rto_timer(sk);
 }
 
 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -518,7 +501,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 					seqp->ccid2s_acked = 1;
 					ccid2_pr_debug("Got ack for %llu\n",
 						       (unsigned long long)seqp->ccid2s_seq);
-					ccid2_hc_tx_dec_pipe(sk);
+					hctx->pipe--;
 				}
 				if (seqp == hctx->seqt) {
 					done = 1;
@@ -574,7 +557,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				 * one ack vector.
 				 */
 				ccid2_congestion_event(sk, seqp);
-				ccid2_hc_tx_dec_pipe(sk);
+				hctx->pipe--;
 			}
 			if (seqp == hctx->seqt)
 				break;
@@ -592,6 +575,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hctx->seqt = hctx->seqt->ccid2s_next;
 	}
 
+	/* restart RTO timer if not all outstanding data has been acked */
+	if (hctx->pipe == 0)
+		sk_stop_timer(sk, &hctx->rtotimer);
+	else
+		sk_reset_timer(sk, &hctx->rtotimer,
+			       jiffies + hctx->rto);
 done:
 	/* check if incoming Acks allow pending packets to be sent */
 	if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
-- 
cgit v1.2.3-70-g09d2


From 1435562d7e0412e4885b661843f69859013f9d25 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Replace broken RTT estimator with better algorithm

The current CCID-2 RTT estimator code is in parts broken and lags behind the
suggestions in RFC2988 of using scaled variants for SRTT/RTTVAR.
That code is replaced by the present patch, which reuses the Linux TCP RTT
estimator code - reasons for this code duplication are given below.

Further details:
----------------
 1. The minimum RTO of previously one second has been replaced with TCP's, since
    RFC4341, sec. 5 says that the minimum of 1 sec. (suggested in RFC2988, 2.4)
    is not necessary. Instead, the TCP_RTO_MIN is used, which agrees with DCCP's
    concept of a default RTT (RFC 4340, 3.4).
 2. The maximum RTO has been set to DCCP_RTO_MAX (64 sec), which agrees with
    RFC2988, (2.5).
 3. De-inlined the function ccid2_new_ack().
 4. Added a FIXME: the RTT is sampled several times per Ack Vector, which will
    give the wrong estimate. It should be replaced with one sample per Ack.
    However, at the moment this can not be resolved easily, since
    - it depends on TX history code (which also needs some work),
    - the cleanest solution is not to use the `sent' time at all (saves 4 bytes
      per entry) and use DCCP timestamps / elapsed time to estimated the RTT,
      which however is non-trivial to get right (but needs to be done).

Reasons for reusing the Linux TCP estimator algorithm:
------------------------------------------------------
Some time was spent to find a better alternative, using basic RFC2988 as a first
step. Further analysis and experimentation showed that the Linux TCP RTO
estimator is superior to a basic RFC2988 implementation. A summary is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid2/rto_estimator/

In addition, this estimator fared well in a recent empirical evaluation:

    Rewaskar, Sushant, Jasleen Kaur and F. Donelson Smith.
    A Performance Study of Loss Detection/Recovery in Real-world TCP
    Implementations. Proceedings of 15th IEEE International
    Conference on Network Protocols (ICNP-07). 2007.

Thus there is significant benefit in reusing the existing TCP code.


Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c | 171 +++++++++++++++++++++++++++----------------------
 net/dccp/ccids/ccid2.h |  18 ++++--
 2 files changed, 107 insertions(+), 82 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index fa074d44206..22753fd9869 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -110,12 +110,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
-{
-	ccid2_pr_debug("change SRTT to %ld\n", val);
-	hctx->srtt = val;
-}
-
 static void ccid2_start_rto_timer(struct sock *sk);
 
 static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -123,7 +117,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	struct sock *sk = (struct sock *)data;
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
-	long s;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -135,10 +128,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 
 	/* back-off timer */
 	hctx->rto <<= 1;
-
-	s = hctx->rto / HZ;
-	if (s > 60)
-		hctx->rto = 60 * HZ;
+	if (hctx->rto > DCCP_RTO_MAX)
+		hctx->rto = DCCP_RTO_MAX;
 
 	/* adjust pipe, cwnd etc */
 	hctx->ssthresh = hctx->cwnd / 2;
@@ -279,9 +270,87 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
 	ccid2_pr_debug("deleted RTO timer\n");
 }
 
-static inline void ccid2_new_ack(struct sock *sk,
-				 struct ccid2_seq *seqp,
-				 unsigned int *maxincr)
+/**
+ * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
+ * This code is almost identical with TCP's tcp_rtt_estimator(), since
+ * - it has a higher sampling frequency (recommended by RFC 1323),
+ * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
+ * - it is simple (cf. more complex proposals such as Eifel timer or research
+ *   which suggests that the gain should be set according to window size),
+ * - in tests it was found to work well with CCID2 [gerrit].
+ */
+static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
+{
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	long m = mrtt ? : 1;
+
+	if (hctx->srtt == 0) {
+		/* First measurement m */
+		hctx->srtt = m << 3;
+		hctx->mdev = m << 1;
+
+		hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev);
+		hctx->rttvar   = hctx->mdev_max;
+		hctx->rtt_seq  = dccp_sk(sk)->dccps_gss;
+	} else {
+		/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
+		m -= (hctx->srtt >> 3);
+		hctx->srtt += m;
+
+		/* Similarly, update scaled mdev with regard to |m| */
+		if (m < 0) {
+			m = -m;
+			m -= (hctx->mdev >> 2);
+			/*
+			 * This neutralises RTO increase when RTT < SRTT - mdev
+			 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
+			 * in Linux TCP", USENIX 2002, pp. 49-62).
+			 */
+			if (m > 0)
+				m >>= 3;
+		} else {
+			m -= (hctx->mdev >> 2);
+		}
+		hctx->mdev += m;
+
+		if (hctx->mdev > hctx->mdev_max) {
+			hctx->mdev_max = hctx->mdev;
+			if (hctx->mdev_max > hctx->rttvar)
+				hctx->rttvar = hctx->mdev_max;
+		}
+
+		/*
+		 * Decay RTTVAR at most once per flight, exploiting that
+		 *  1) pipe <= cwnd <= Sequence_Window = W  (RFC 4340, 7.5.2)
+		 *  2) AWL = GSS-W+1 <= GAR <= GSS          (RFC 4340, 7.5.1)
+		 * GAR is a useful bound for FlightSize = pipe, AWL is probably
+		 * too low as it over-estimates pipe.
+		 */
+		if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) {
+			if (hctx->mdev_max < hctx->rttvar)
+				hctx->rttvar -= (hctx->rttvar -
+						 hctx->mdev_max) >> 2;
+			hctx->rtt_seq  = dccp_sk(sk)->dccps_gss;
+			hctx->mdev_max = TCP_RTO_MIN;
+		}
+	}
+
+	/*
+	 * Set RTO from SRTT and RTTVAR
+	 * Clock granularity is ignored since the minimum error for RTTVAR is
+	 * clamped to 50msec (corresponding to HZ=20). This leads to a minimum
+	 * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP
+	 * does not retransmit data, DCCP does not require TCP's recommended
+	 * minimum timeout of one second".
+	 */
+	hctx->rto = (hctx->srtt >> 3) + hctx->rttvar;
+
+	if (hctx->rto > DCCP_RTO_MAX)
+		hctx->rto = DCCP_RTO_MAX;
+}
+
+static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
+			  unsigned int *maxincr)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
@@ -295,64 +364,15 @@ static inline void ccid2_new_ack(struct sock *sk,
 			hctx->cwnd += 1;
 			hctx->packets_acked = 0;
 	}
-
-	/* update RTO */
-	if (hctx->srtt == -1 ||
-	    time_after(jiffies, hctx->lastrtt + hctx->srtt)) {
-		unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
-		int s;
-
-		/* first measurement */
-		if (hctx->srtt == -1) {
-			ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
-				       r, jiffies,
-				       (unsigned long long)seqp->ccid2s_seq);
-			ccid2_change_srtt(hctx, r);
-			hctx->rttvar = r >> 1;
-		} else {
-			/* RTTVAR */
-			long tmp = hctx->srtt - r;
-			long srtt;
-
-			if (tmp < 0)
-				tmp *= -1;
-
-			tmp >>= 2;
-			hctx->rttvar *= 3;
-			hctx->rttvar >>= 2;
-			hctx->rttvar += tmp;
-
-			/* SRTT */
-			srtt = hctx->srtt;
-			srtt *= 7;
-			srtt >>= 3;
-			tmp = r >> 3;
-			srtt += tmp;
-			ccid2_change_srtt(hctx, srtt);
-		}
-		s = hctx->rttvar << 2;
-		/* clock granularity is 1 when based on jiffies */
-		if (!s)
-			s = 1;
-		hctx->rto = hctx->srtt + s;
-
-		/* must be at least a second */
-		s = hctx->rto / HZ;
-		/* DCCP doesn't require this [but I like it cuz my code sux] */
-#if 1
-		if (s < 1)
-			hctx->rto = HZ;
-#endif
-		/* max 60 seconds */
-		if (s > 60)
-			hctx->rto = HZ * 60;
-
-		hctx->lastrtt = jiffies;
-
-		ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
-			       hctx->srtt, hctx->rttvar,
-			       hctx->rto, HZ, r);
-	}
+	/*
+	 * FIXME: RTT is sampled several times per acknowledgment (for each
+	 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
+	 * This causes the RTT to be over-estimated, since the older entries
+	 * in the Ack Vector have earlier sending times.
+	 * The cleanest solution is to not use the ccid2s_sent field at all
+	 * and instead use DCCP timestamps - need to be resolved at some time.
+	 */
+	ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
 }
 
 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -579,8 +599,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (hctx->pipe == 0)
 		sk_stop_timer(sk, &hctx->rtotimer);
 	else
-		sk_reset_timer(sk, &hctx->rtotimer,
-			       jiffies + hctx->rto);
+		sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
 done:
 	/* check if incoming Acks allow pending packets to be sent */
 	if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
@@ -613,9 +632,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	if (ccid2_hc_tx_alloc_seq(hctx))
 		return -ENOMEM;
 
-	hctx->rto	 = 3 * HZ;
-	ccid2_change_srtt(hctx, -1);
-	hctx->rttvar	= -1;
+	hctx->rto	= DCCP_TIMEOUT_INIT;
 	hctx->rpdupack  = -1;
 	hctx->last_cong = jiffies;
 	setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d5900dd5d4f..8b7a2dee2f6 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -44,7 +44,12 @@ struct ccid2_seq {
  *
  * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
  * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
- * @lastrtt: time RTT was last measured
+ * @srtt: smoothed RTT estimate, scaled by 2^3
+ * @mdev: smoothed RTT variation, scaled by 2^2
+ * @mdev_max: maximum of @mdev during one flight
+ * @rttvar: moving average/maximum of @mdev_max
+ * @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
+ * @rtt_seq: to decay RTTVAR at most once per flight
  * @rpseq: last consecutive seqno
  * @rpdupack: dupacks since rpseq
  * @av_chunks: list of Ack Vectors received on current skb
@@ -58,10 +63,13 @@ struct ccid2_hc_tx_sock {
 	int			seqbufc;
 	struct ccid2_seq	*seqh;
 	struct ccid2_seq	*seqt;
-	long			rto;
-	long			srtt;
-	long			rttvar;
-	unsigned long		lastrtt;
+	/* RTT measurement: variables/principles are the same as in TCP */
+	u32			srtt,
+				mdev,
+				mdev_max,
+				rttvar,
+				rto;
+	u64			rtt_seq:48;
 	struct timer_list	rtotimer;
 	u64			rpseq;
 	int			rpdupack;
-- 
cgit v1.2.3-70-g09d2


From 20bbd0f75ee4b72c1dafc8e5fb6ad39ba506a75c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-2: Remove wrappers around sk_{reset,stop}_timer()

This removes the wrappers around the sk timer functions as it makes the code
clearer and not much is gained from using wrappers: the BUG_ON in
start_rto_timer will never trigger since that function was called only when
 * the RTO timer expired (rto_expire, and then timer_pending() is false);
 * in tx_packet_sent only if !timer_pending() (BUG_ON is redundant here);
 * previously in new_ack, after stopping the timer (timer_pending() false).

One further motive behind this patch is to replace the RTO timer with the
icsk retransmission timer, as it is already part of the DCCP socket.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid2.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 22753fd9869..c539f79ab8e 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -110,8 +110,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_start_rto_timer(struct sock *sk);
-
 static void ccid2_hc_tx_rto_expire(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
@@ -150,23 +148,13 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	/* if we were blocked before, we may now send cwnd=1 packet */
 	if (sender_was_blocked)
 		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
-	ccid2_start_rto_timer(sk);
+	/* restart backed-off timer */
+	sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
 
-static void ccid2_start_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
-	ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->rto);
-
-	BUG_ON(timer_pending(&hctx->rtotimer));
-	sk_reset_timer(sk, &hctx->rtotimer,
-		       jiffies + hctx->rto);
-}
-
 static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -245,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 
 	/* setup RTO timer */
 	if (!timer_pending(&hctx->rtotimer))
-		ccid2_start_rto_timer(sk);
+		sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	do {
@@ -262,14 +250,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 #endif
 }
 
-static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
-{
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
-	sk_stop_timer(sk, &hctx->rtotimer);
-	ccid2_pr_debug("deleted RTO timer\n");
-}
-
 /**
  * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
  * This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -645,7 +625,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	int i;
 
-	ccid2_hc_tx_kill_rto_timer(sk);
+	sk_stop_timer(sk, &hctx->rtotimer);
 
 	for (i = 0; i < hctx->seqbufc; i++)
 		kfree(hctx->seqbuf[i]);
-- 
cgit v1.2.3-70-g09d2


From b25b0c60b0c39a82bc651aeb6443bcb36cd17f76 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Combine the functionality of enqeueing and cloning

Realising the following call pattern,
 * first dccp_entail() is called to enqueue a new skb and
 * then skb_clone() is called to transmit a clone of that skb,

this patch integrates both interrelated steps into dccp_entail().

Note: the return value of skb_clone is not checked. It may be an idea to add a
      warning if this occurs. In both instances, however, a timer is set for
      retransmission, so that cloning is re-tried via dccp_retransmit_skb().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/output.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 39056dc6135..b1eaf7bcfb1 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -26,11 +26,13 @@ static inline void dccp_event_ack_sent(struct sock *sk)
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
 
-static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
+/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
+static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
 {
 	skb_set_owner_w(skb, sk);
 	WARN_ON(sk->sk_send_head);
 	sk->sk_send_head = skb;
+	return skb_clone(sk->sk_send_head, gfp_any());
 }
 
 /*
@@ -550,8 +552,7 @@ int dccp_connect(struct sock *sk)
 
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
 
-	dccp_skb_entail(sk, skb);
-	dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+	dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
 	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the REQUEST until an answer. */
@@ -676,8 +677,7 @@ void dccp_send_close(struct sock *sk, const int active)
 		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
 
 	if (active) {
-		dccp_skb_entail(sk, skb);
-		dccp_transmit_skb(sk, skb_clone(skb, prio));
+		skb = dccp_skb_entail(sk, skb);
 		/*
 		 * Retransmission timer for active-close: RFC 4340, 8.3 requires
 		 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
@@ -690,6 +690,6 @@ void dccp_send_close(struct sock *sk, const int active)
 		 */
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
-	} else
-		dccp_transmit_skb(sk, skb);
+	}
+	dccp_transmit_skb(sk, skb);
 }
-- 
cgit v1.2.3-70-g09d2


From 6224877b2ca4be5de96270a8ae490fe2ba11b0e0 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion

This patch consolidates the code common to TCP and CCID-2:
 * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
 * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/net/tcp.h      | 15 +++++++++++++++
 net/dccp/ccids/ccid2.c |  8 ++------
 net/ipv4/tcp_input.c   | 17 ++---------------
 3 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8983386356a..6bc4b8148ca 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -782,6 +782,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
+/*
+ * Convert RFC3390 larger initial windows into an equivalent number of packets.
+ *
+ * John Heffner states:
+ *
+ *	The RFC specifies a window of no more than 4380 bytes
+ *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
+ *	is a bit misleading because they use a clamp at 4380 bytes
+ *	rather than a multiplier in the relevant range.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 bytes)
+{
+	return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3);
+}
+
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c539f79ab8e..fa713227c66 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -596,12 +596,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hctx->ssthresh = ~0U;
 
-	/*
-	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
-	 * packets for new connections, following the rules from [RFC3390]".
-	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
-	 */
-	hctx->cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+	/* Use larger initial windows (RFC 3390, rfc2581bis) */
+	hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
 	max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2a96b..16d0040de34 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -811,25 +811,12 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-/* Numbers are taken from RFC3390.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than use a multiplier in the relevant range.
- */
 __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
-	if (!cwnd) {
-		if (tp->mss_cache > 1460)
-			cwnd = 2;
-		else
-			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
-	}
+	if (!cwnd)
+		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
-- 
cgit v1.2.3-70-g09d2


From ddab05568eaa70fc92b2aae957136f188f724e9c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Clean up slow-path input processing

This patch rearranges the order of statements of the slow-path input processing
(i.e. any other state than OPEN), to resolve the following issues.

 1. Dependencies: the order of statements now better matches RFC 4340, 8.5, i.e.
    step 7 is before step 9 (previously 9 was before 7), and parsing options in
    step 8 (which can consume resources) now comes after step 7.
 2. Bug-fix: in state CLOSED, there should not be any sequence number checking
    or option processing. This is why the test for CLOSED has been moved after
    the test for LISTEN.
 3. As before sequence number checks are omitted if in state LISTEN/REQUEST, due
    to the note underneath the table in RFC 4340, 7.5.3.
 4. Packets are now passed on to Ack Vector / CCID processing only after
    - step 7  (receive unexpected packets),
    - step 9  (receive Reset),
    - step 13 (receive CloseReq),
    - step 14 (receive Close)
    and only if the state is PARTOPEN. This simplifies CCID processing:
    - in LISTEN/CLOSED the CCIDs are non-existent;
    - in RESPOND/REQUEST the CCIDs have not yet been negotiated;
    - in CLOSEREQ and active-CLOSING the node has already closed this socket;
    - in passive-CLOSING the client is waiting for its Reset.
    In the last case, RFC 4340, 8.3 leaves it open to ignore further incoming
    data, which is the approach taken here.

As a result of (3), CCID processing is now indeed confined to OPEN/PARTOPEN
states, i.e. congestion control is performed only on the flow of data packets.

This avoids pathological cases of doing congestion control on those messages
which set up and terminate the connection.

I have done a few checks to see if this creates a problem in other parts of
the code. This seems not to be the case; even if there were one, it would be
better to fix it than to perform congestion control on Close/Request/Response
messages. Similarly for Ack Vectors (as they depend on the negotiated CCID).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 68 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 9a108ce17fc..b1e38bf9445 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -603,22 +603,36 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Caller (dccp_v4_do_rcv) will send Reset */
 		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
+	} else if (sk->sk_state == DCCP_CLOSED) {
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+		return 1;
 	}
 
-	if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
-		if (dccp_check_seqno(sk, skb))
-			goto discard;
-
-		/*
-		 * Step 8: Process options and mark acknowledgeable
-		 */
-		if (dccp_parse_options(sk, NULL, skb))
-			return 1;
+	/* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */
+	if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb))
+		goto discard;
 
-		dccp_handle_ackvec_processing(sk, skb);
-		dccp_deliver_input_to_ccids(sk, skb);
+	/*
+	 *   Step 7: Check for unexpected packet types
+	 *      If (S.is_server and P.type == Response)
+	 *	    or (S.is_client and P.type == Request)
+	 *	    or (S.state == RESPOND and P.type == Data),
+	 *	  Send Sync packet acknowledging P.seqno
+	 *	  Drop packet and return
+	 */
+	if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+	     dh->dccph_type == DCCP_PKT_RESPONSE) ||
+	    (dp->dccps_role == DCCP_ROLE_CLIENT &&
+	     dh->dccph_type == DCCP_PKT_REQUEST) ||
+	    (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
+		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
+		goto discard;
 	}
 
+	/*  Step 8: Process options */
+	if (dccp_parse_options(sk, NULL, skb))
+		return 1;
+
 	/*
 	 *  Step 9: Process Reset
 	 *	If P.type == Reset,
@@ -626,41 +640,21 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 *		S.state := TIMEWAIT
 	 *		Set TIMEWAIT timer
 	 *		Drop packet and return
-	*/
+	 */
 	if (dh->dccph_type == DCCP_PKT_RESET) {
 		dccp_rcv_reset(sk, skb);
 		return 0;
-		/*
-		 *   Step 7: Check for unexpected packet types
-		 *      If (S.is_server and P.type == Response)
-		 *	    or (S.is_client and P.type == Request)
-		 *	    or (S.state == RESPOND and P.type == Data),
-		 *	  Send Sync packet acknowledging P.seqno
-		 *	  Drop packet and return
-		 */
-	} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
-		    dh->dccph_type == DCCP_PKT_RESPONSE) ||
-		    (dp->dccps_role == DCCP_ROLE_CLIENT &&
-		     dh->dccph_type == DCCP_PKT_REQUEST) ||
-		    (sk->sk_state == DCCP_RESPOND &&
-		     dh->dccph_type == DCCP_PKT_DATA)) {
-		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
-		goto discard;
-	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {	/* Step 13 */
 		if (dccp_rcv_closereq(sk, skb))
 			return 0;
 		goto discard;
-	} else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+	} else if (dh->dccph_type == DCCP_PKT_CLOSE) {		/* Step 14 */
 		if (dccp_rcv_close(sk, skb))
 			return 0;
 		goto discard;
 	}
 
 	switch (sk->sk_state) {
-	case DCCP_CLOSED:
-		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
-		return 1;
-
 	case DCCP_REQUESTING:
 		queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
 		if (queued >= 0)
@@ -669,8 +663,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		__kfree_skb(skb);
 		return 0;
 
-	case DCCP_RESPOND:
 	case DCCP_PARTOPEN:
+		/* Step 8: if using Ack Vectors, mark packet acknowledgeable */
+		dccp_handle_ackvec_processing(sk, skb);
+		dccp_deliver_input_to_ccids(sk, skb);
+		/* fall through */
+	case DCCP_RESPOND:
 		queued = dccp_rcv_respond_partopen_state_process(sk, skb,
 								 dh, len);
 		break;
-- 
cgit v1.2.3-70-g09d2


From d6da3511d6b558d0b017777b61dc08b8fbc06ea4 Mon Sep 17 00:00:00 2001
From: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Policy-based packet dequeueing infrastructure

This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
 * a simple FIFO policy (which is the default) and
 * a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).

The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.

Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 Documentation/networking/dccp.txt |  19 ++++++
 include/linux/dccp.h              |  21 +++++++
 net/dccp/Makefile                 |   2 +-
 net/dccp/dccp.h                   |  12 ++++
 net/dccp/output.c                 |   7 +--
 net/dccp/proto.c                  |  67 +++++++++++++++++++-
 net/dccp/qpolicy.c                | 126 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 246 insertions(+), 8 deletions(-)
 create mode 100644 net/dccp/qpolicy.c

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index b132e4a3cf0..fcfc1253442 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -45,6 +45,25 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 ==============
+DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
+a policy ID as argument and can only be set before the connection (i.e. changes
+during an established connection are not supported). Currently, two policies are
+defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
+and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
+u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
+a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
+be formatted using a cmsg(3) message header filled in as follows:
+	cmsg->cmsg_level = SOL_DCCP;
+	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
+	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
+
+DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
+value is always interpreted as unbounded queue length. If different from zero,
+the interpretation of this parameter depends on the current dequeuing policy
+(see above): the "simple" policy will enforce a fixed queue size by returning
+EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
+lowest-priority packet first. The default value for this parameter is
+initialised from /proc/sys/net/dccp/default/tx_qlen.
 
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d..010e2d87ed7 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP socket control message types for cmsg */
+enum dccp_cmsg_type {
+	DCCP_SCM_PRIORITY = 1,
+	DCCP_SCM_QPOLICY_MAX = 0xFFFF,
+	/* ^-- Up to here reserved exclusively for qpolicy parameters */
+	DCCP_SCM_MAX
+};
+
+/* DCCP priorities for outgoing/queued packets */
+enum dccp_packet_dequeueing_policy {
+	DCCPQ_POLICY_SIMPLE,
+	DCCPQ_POLICY_PRIO,
+	DCCPQ_POLICY_MAX
+};
+
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */
 #define DCCP_SOCKOPT_SERVICE		2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
 #define DCCP_SOCKOPT_CCID		13
 #define DCCP_SOCKOPT_TX_CCID		14
 #define DCCP_SOCKOPT_RX_CCID		15
+#define DCCP_SOCKOPT_QPOLICY_ID		16
+#define DCCP_SOCKOPT_QPOLICY_TXQLEN	17
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
@@ -458,6 +475,8 @@ struct dccp_ackvec;
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
  * @dccps_options_received - parsed set of retrieved options
+ * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
+ * @dccps_tx_qlen - maximum length of the TX queue
  * @dccps_role - role of this sock, one of %dccp_role
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
 	struct dccp_options_received	dccps_options_received;
+	__u8				dccps_qpolicy;
+	__u32				dccps_tx_qlen;
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index b68440bd7fa..0c1c9af2bf7 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
 dccp-y := ccid.o feat.o input.o minisocks.o options.o \
-	  output.o proto.o timer.o ackvec.o
+	  qpolicy.o output.o proto.o timer.o ackvec.o
 
 dccp_ipv4-y := ipv4.o
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 74c90cd2767..ce2dd6f6f34 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -234,6 +234,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
+/*
+ * TX Packet Dequeueing Interface
+ */
+extern void		dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+extern bool		dccp_qpolicy_full(struct sock *sk);
+extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
+extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
+
+/*
+ * TX Packet Output and TX Timers
+ */
 extern void dccp_write_xmit(struct sock *sk);
 extern void dccp_write_space(struct sock *sk);
 extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b1eaf7bcfb1..2532797a800 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -241,7 +241,7 @@ static void dccp_xmit_packet(struct sock *sk)
 {
 	int err, len;
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+	struct sk_buff *skb = dccp_qpolicy_pop(sk);
 
 	if (unlikely(skb == NULL))
 		return;
@@ -344,7 +344,7 @@ void dccp_write_xmit(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 
-	while ((skb = skb_peek(&sk->sk_write_queue))) {
+	while ((skb = dccp_qpolicy_top(sk))) {
 		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
 		switch (ccid_packet_dequeue_eval(rc)) {
@@ -358,8 +358,7 @@ void dccp_write_xmit(struct sock *sk)
 			dccp_xmit_packet(sk);
 			break;
 		case CCID_PACKET_ERR:
-			skb_dequeue(&sk->sk_write_queue);
-			kfree_skb(skb);
+			dccp_qpolicy_drop(sk, skb);
 			dccp_pr_debug("packet discarded due to err=%d\n", rc);
 		}
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8c125ffab1c..b56efdd2a42 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -189,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	dp->dccps_rate_last	= jiffies;
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
+	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
 
 	dccp_init_xmit_timers(sk);
 
@@ -541,6 +542,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		err = dccp_setsockopt_cscov(sk, val, true);
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		if (sk->sk_state != DCCP_CLOSED)
+			err = -EISCONN;
+		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
+			err = -EINVAL;
+		else
+			dp->dccps_qpolicy = val;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		if (val < 0)
+			err = -EINVAL;
+		else
+			dp->dccps_tx_qlen = val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -648,6 +663,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		val = dp->dccps_qpolicy;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		val = dp->dccps_tx_qlen;
+		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 					     len, (u32 __user *)optval, optlen);
@@ -690,6 +711,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 #endif
 
+static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+
+	/*
+	 * Assign an (opaque) qpolicy priority value to skb->priority.
+	 *
+	 * We are overloading this skb field for use with the qpolicy subystem.
+	 * The skb->priority is normally used for the SO_PRIORITY option, which
+	 * is initialised from sk_priority. Since the assignment of sk_priority
+	 * to skb->priority happens later (on layer 3), we overload this field
+	 * for use with queueing priorities as long as the skb is on layer 4.
+	 * The default priority value (if nothing is set) is 0.
+	 */
+	skb->priority = 0;
+
+	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		if (cmsg->cmsg_level != SOL_DCCP)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case DCCP_SCM_PRIORITY:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
+				return -EINVAL;
+			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -705,8 +763,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	lock_sock(sk);
 
-	if (sysctl_dccp_tx_qlen &&
-	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+	if (dccp_qpolicy_full(sk)) {
 		rc = -EAGAIN;
 		goto out_release;
 	}
@@ -734,7 +791,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rc != 0)
 		goto out_discard;
 
-	skb_queue_tail(&sk->sk_write_queue, skb);
+	rc = dccp_msghdr_parse(msg, skb);
+	if (rc != 0)
+		goto out_discard;
+
+	dccp_qpolicy_push(sk, skb);
 	dccp_write_xmit(sk);
 out_release:
 	release_sock(sk);
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 00000000000..414696b0d83
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
+/*
+ *  net/dccp/qpolicy.c
+ *
+ *  Policy-based packet dequeueing interface for DCCP.
+ *
+ *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License v2
+ *  as published by the Free Software Foundation.
+ */
+#include "dccp.h"
+
+/*
+ *	Simple Dequeueing Policy:
+ *	If tx_qlen is different from 0, enqueue up to tx_qlen elements.
+ */
+static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
+{
+	skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static bool qpolicy_simple_full(struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_tx_qlen &&
+	       sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
+}
+
+static struct sk_buff *qpolicy_simple_top(struct sock *sk)
+{
+	return skb_peek(&sk->sk_write_queue);
+}
+
+/*
+ *	Priority-based Dequeueing Policy:
+ *	If tx_qlen is different from 0 and the queue has reached its upper bound
+ *	of tx_qlen elements, replace older packets lowest-priority-first.
+ */
+static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *best = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (best == NULL || skb->priority > best->priority)
+			best = skb;
+	return best;
+}
+
+static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *worst = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (worst == NULL || skb->priority < worst->priority)
+			worst = skb;
+	return worst;
+}
+
+static bool qpolicy_prio_full(struct sock *sk)
+{
+	if (qpolicy_simple_full(sk))
+		dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
+	return false;
+}
+
+/**
+ * struct dccp_qpolicy_operations  -  TX Packet Dequeueing Interface
+ * @push: add a new @skb to the write queue
+ * @full: indicates that no more packets will be admitted
+ * @top:  peeks at whatever the queueing policy defines as its `top'
+ */
+static struct dccp_qpolicy_operations {
+	void		(*push)	(struct sock *sk, struct sk_buff *skb);
+	bool		(*full) (struct sock *sk);
+	struct sk_buff*	(*top)  (struct sock *sk);
+
+} qpol_table[DCCPQ_POLICY_MAX] = {
+	[DCCPQ_POLICY_SIMPLE] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_simple_full,
+		.top  = qpolicy_simple_top,
+	},
+	[DCCPQ_POLICY_PRIO] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_prio_full,
+		.top  = qpolicy_prio_best_skb,
+	},
+};
+
+/*
+ *	Externally visible interface
+ */
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
+{
+	qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
+}
+
+bool dccp_qpolicy_full(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
+}
+
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb != NULL) {
+		skb_unlink(skb, &sk->sk_write_queue);
+		kfree_skb(skb);
+	}
+}
+
+struct sk_buff *dccp_qpolicy_top(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
+}
+
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
+{
+	struct sk_buff *skb = dccp_qpolicy_top(sk);
+
+	/* Clear any skb fields that we used internally */
+	skb->priority = 0;
+
+	if (skb)
+		skb_unlink(skb, &sk->sk_write_queue);
+	return skb;
+}
-- 
cgit v1.2.3-70-g09d2


From 7d1af6a8d935678248d057564e75e1452409a53c Mon Sep 17 00:00:00 2001
From: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp qpolicy: Parameter checking of cmsg qpolicy parameters

Ensure that cmsg->cmsg_type value is valid for qpolicy
that is currently in use.

Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h    |  1 +
 net/dccp/proto.c   |  4 ++++
 net/dccp/qpolicy.c | 23 +++++++++++++++++------
 3 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ce2dd6f6f34..1585fa26488 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -242,6 +242,7 @@ extern bool		dccp_qpolicy_full(struct sock *sk);
 extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
 extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
 extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
+extern bool		dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
 
 /*
  * TX Packet Output and TX Timers
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b56efdd2a42..a3caa11aa83 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -735,6 +735,10 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 		if (cmsg->cmsg_level != SOL_DCCP)
 			continue;
 
+		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
+		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
+			return -EINVAL;
+
 		switch (cmsg->cmsg_type) {
 		case DCCP_SCM_PRIORITY:
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
index 414696b0d83..27383f88c75 100644
--- a/net/dccp/qpolicy.c
+++ b/net/dccp/qpolicy.c
@@ -73,17 +73,20 @@ static struct dccp_qpolicy_operations {
 	void		(*push)	(struct sock *sk, struct sk_buff *skb);
 	bool		(*full) (struct sock *sk);
 	struct sk_buff*	(*top)  (struct sock *sk);
+	__be32		params;
 
 } qpol_table[DCCPQ_POLICY_MAX] = {
 	[DCCPQ_POLICY_SIMPLE] = {
-		.push = qpolicy_simple_push,
-		.full = qpolicy_simple_full,
-		.top  = qpolicy_simple_top,
+		.push   = qpolicy_simple_push,
+		.full   = qpolicy_simple_full,
+		.top    = qpolicy_simple_top,
+		.params = 0,
 	},
 	[DCCPQ_POLICY_PRIO] = {
-		.push = qpolicy_simple_push,
-		.full = qpolicy_prio_full,
-		.top  = qpolicy_prio_best_skb,
+		.push   = qpolicy_simple_push,
+		.full   = qpolicy_prio_full,
+		.top    = qpolicy_prio_best_skb,
+		.params = DCCP_SCM_PRIORITY,
 	},
 };
 
@@ -124,3 +127,11 @@ struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
 		skb_unlink(skb, &sk->sk_write_queue);
 	return skb;
 }
+
+bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
+{
+	/* check if exactly one bit is set */
+	if (!param || (param & (param - 1)))
+		return false;
+	return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
+}
-- 
cgit v1.2.3-70-g09d2


From f76fd327a8b32d3ad5b51639faf6f54d18be0981 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Runtime verification of timer resolution

The DCCP base time resolution is 10 microseconds (RFC 4340, 13.1 ... 13.3).

Using a timer with a lower resolution was found to trigger the following
bug warnings/problems on high-speed networks (e.g. local loopback):
 * RTT samples are rounded down to 0 if below resolution;
 * in some cases, negative RTT samples were observed;
 * the CCID-3 feedback timer complains that the feedback interval is 0,
   since the feedback interval is in the order of 1 RTT or less and RTT
   measurement rounded this down to 0;
On an Intel computer this will for instance happen when using a
boot-time parameter of "clocksource=jiffies".

The following system log messages were observed:
  11:24:00 kernel: BUG: delta (0) <= 0 at ccid3_hc_rx_send_feedback()
  11:26:12 kernel: BUG: delta (0) <= 0 at ccid3_hc_rx_send_feedback()
  11:26:30 kernel: dccp_sample_rtt: unusable RTT sample 0, using min
  11:26:30 last message repeated 5 times

This patch defines a global constant for the time resolution, adds this in
timer.c, and checks the available clock resolution at CCID-3 module load time.

When the resolution is worse than 10 microseconds, module loading exits with
a message "socket type not supported".

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 13 +++++++++++++
 net/dccp/dccp.h        |  5 ++++-
 net/dccp/timer.c       |  3 +--
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0d406f82e43..f566eb76aeb 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -869,6 +869,19 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
 
 static __init int ccid3_module_init(void)
 {
+	struct timespec tp;
+
+	/*
+	 * Without a fine-grained clock resolution, RTTs/X_recv are not sampled
+	 * correctly and feedback is sent either too early or too late.
+	 */
+	hrtimer_get_res(CLOCK_MONOTONIC, &tp);
+	if (tp.tv_sec || tp.tv_nsec > DCCP_TIME_RESOLUTION * NSEC_PER_USEC) {
+		printk(KERN_ERR "%s: Timer too coarse (%ld usec), need %u-usec"
+		       " resolution - check your clocksource.\n", __func__,
+		       tp.tv_nsec/NSEC_PER_USEC, DCCP_TIME_RESOLUTION);
+		return -ESOCKTNOSUPPORT;
+	}
 	return ccid_register(&ccid3);
 }
 module_init(ccid3_module_init);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1585fa26488..b63a82ccb2b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -86,10 +86,13 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define DCCP_RTO_MAX ((unsigned)(64 * HZ))
 
+/* DCCP base time resolution - 10 microseconds (RFC 4340, 13.1 ... 13.3) */
+#define DCCP_TIME_RESOLUTION	10
+
 /*
  * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
  */
-#define DCCP_SANE_RTT_MIN	100
+#define DCCP_SANE_RTT_MIN	(10 * DCCP_TIME_RESOLUTION)
 #define DCCP_FALLBACK_RTT	(USEC_PER_SEC / 5)
 #define DCCP_SANE_RTT_MAX	(3 * USEC_PER_SEC)
 
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index e02d5a94f4c..16359e29e7f 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -281,8 +281,7 @@ u32 dccp_timestamp(void)
 {
 	s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
 
-	do_div(delta, 10);
-	return delta;
+	return div_u64(delta, DCCP_TIME_RESOLUTION);
 }
 EXPORT_SYMBOL_GPL(dccp_timestamp);
 
-- 
cgit v1.2.3-70-g09d2


From d0c05fe4448db5cbdd886186860581f736f59ae9 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Simplified handling of TX states

Since CCIDs are only used during the established phase of a connection,
they have very little internal state; this specifically reduces to:

 * "no packet sent" if and only if s == 0, for the TX packet size s;

 * when the first packet has been sent (i.e. `s' > 0), the question is whether
   or not feedback has been received:
   - if a feedback packet is received, "feedback = yes" is set,
   - if the nofeedback timer expires,  "feedback = no"  is set.

Thus the CCID only needs to remember state about whether or not feedback
has been received. This is now implemented using a boolean flag, which is
toggled when a feedback packet arrives or the nofeedback timer expires.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 41 ++++++-----------------------------------
 net/dccp/ccids/ccid3.h | 11 ++---------
 2 files changed, 8 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f566eb76aeb..5470a978be0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,31 +49,6 @@ static int ccid3_debug;
 /*
  *	Transmitter Half-Connection Routines
  */
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
-{
-	static char *ccid3_state_names[] = {
-	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
-	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
-	[TFRC_SSTATE_FBACK]    = "FBACK",
-	};
-
-	return ccid3_state_names[state];
-}
-#endif
-
-static void ccid3_hc_tx_set_state(struct sock *sk,
-				  enum ccid3_hc_tx_states state)
-{
-	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	enum ccid3_hc_tx_states oldstate = hctx->state;
-
-	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
-		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
-		       ccid3_tx_state_name(state));
-	WARN_ON(state == oldstate);
-	hctx->state = state;
-}
 
 /*
  * Compute the initial sending rate X_init in the manner of RFC 3390:
@@ -206,16 +181,15 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto restart_timer;
 	}
 
-	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
-		       ccid3_tx_state_name(hctx->state));
+	ccid3_pr_debug("%s(%p) entry with%s feedback\n", dccp_role(sk), sk,
+		       hctx->feedback ? "" : "out");
 
 	/* Ignore and do not restart after leaving the established state */
 	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 		goto out;
 
 	/* Reset feedback state to "no feedback received" */
-	if (hctx->state == TFRC_SSTATE_FBACK)
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+	hctx->feedback = false;
 
 	/*
 	 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
@@ -290,7 +264,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(skb->len == 0))
 		return -EBADMSG;
 
-	if (hctx->state == TFRC_SSTATE_NO_SENT) {
+	if (hctx->s == 0) {
 		sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies +
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
 		hctx->last_win_count   = 0;
@@ -324,8 +298,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		}
 		ccid3_update_send_interval(hctx);
 
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
 	} else {
 		delay = ktime_us_delta(hctx->t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
@@ -396,8 +368,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
 	 */
-	if (hctx->state == TFRC_SSTATE_NO_FBACK) {
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+	if (!hctx->feedback) {
+		hctx->feedback = true;
 
 		if (hctx->t_rto == 0) {
 			/*
@@ -502,7 +474,6 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
 
-	hctx->state = TFRC_SSTATE_NO_SENT;
 	hctx->hist  = NULL;
 	setup_timer(&hctx->no_feedback_timer,
 		    ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 7884159937a..1773a8dd36d 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -70,13 +70,6 @@ enum ccid3_options {
 	TFRC_OPT_RECEIVE_RATE	 = 194,
 };
 
-/* TFRC sender states */
-enum ccid3_hc_tx_states {
-	TFRC_SSTATE_NO_SENT = 1,
-	TFRC_SSTATE_NO_FBACK,
-	TFRC_SSTATE_FBACK,
-};
-
 /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
  *
  * @x - Current sending rate in 64 * bytes per second
@@ -87,7 +80,7 @@ enum ccid3_hc_tx_states {
  * @s - Packet size in bytes
  * @t_rto - Nofeedback Timer setting in usecs
  * @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
- * @state - Sender state, one of %ccid3_hc_tx_states
+ * @feedback - Whether feedback has been received or not
  * @last_win_count - Last window counter sent
  * @t_last_win_count - Timestamp of earliest packet with
  *                     last_win_count value sent
@@ -105,7 +98,7 @@ struct ccid3_hc_tx_sock {
 	u32				t_rto;
 	u32				t_ipi;
 	u16				s;
-	enum ccid3_hc_tx_states		state:8;
+	bool				feedback:1;
 	u8				last_win_count;
 	ktime_t				t_last_win_count;
 	struct timer_list		no_feedback_timer;
-- 
cgit v1.2.3-70-g09d2


From 8b67ad12b04ef7bdf5d2b4de24fe5a609b26cf12 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Suppress unavoidable "below resolution" warning

In the congestion-avoidance phase a decay of p towards 0 is natural once fewer
losses are encountered. Hence the warning message "p is below resolution" is
not necessary, and thus turned into a debug message by this patch.

The TFRC_SMALLEST_P is needed since in theory p never actually reaches 0. When
no further losses are encountered, the loss interval I_0 grows in length,
causing p to decrease towards 0, causing X_calc = s/(RTT * f(p)) to increase.

With the given minimum-resolution this congestion avoidance phase stops at some
fixed value, an approximation formula has been added to the documentation.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/tfrc_equation.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index bc3dc2b2a84..38239c4d5e1 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -632,8 +632,16 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 
 	if (p <= TFRC_CALC_X_SPLIT) 		{     /* 0.0000 < p <= 0.05   */
 		if (p < TFRC_SMALLEST_P) {	      /* 0.0000 < p <  0.0001 */
-			DCCP_WARN("Value of p (%d) below resolution. "
-				  "Substituting %d\n", p, TFRC_SMALLEST_P);
+			/*
+			 * In the congestion-avoidance phase p decays towards 0
+			 * when there are no further losses, so this case is
+			 * natural. Truncating to p_min = 0.01% means that the
+			 * maximum achievable throughput is limited to about
+			 * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
+			 * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
+			 */
+			tfrc_pr_debug("Value of p (%d) below resolution. "
+				      "Substituting %d\n", p, TFRC_SMALLEST_P);
 			index = 0;
 		} else 				      /* 0.0001 <= p <= 0.05  */
 			index =  p/TFRC_SMALLEST_P - 1;
-- 
cgit v1.2.3-70-g09d2


From 24b8d343215919c7a2ba18b9f89a0961e1459cad Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Receiver history initialisation routine

This patch
 1) separates history allocation and initialisation, to facilitate early
    loss detection (implemented by a subsequent patch);

 2) removes duplication by using the existing tfrc_rx_hist_purge() if the
    allocation fails. This is now possible, since the initialisation routine
 3) zeroes out the entire history before using it.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              |  2 +-
 net/dccp/ccids/lib/packet_history.c | 52 +++++++++++++++++++++----------------
 net/dccp/ccids/lib/packet_history.h |  2 +-
 3 files changed, 32 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5470a978be0..36f4992f3c3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -766,7 +766,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 
 	hcrx->state = TFRC_RSTATE_NO_DATA;
 	tfrc_lh_init(&hcrx->li_hist);
-	return tfrc_rx_hist_alloc(&hcrx->hist);
+	return tfrc_rx_hist_init(&hcrx->hist, sk);
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 5c445086690..5b4e1cf8439 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -352,28 +352,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
 
-int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
-{
-	int i;
-
-	for (i = 0; i <= TFRC_NDUPACK; i++) {
-		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
-		if (h->ring[i] == NULL)
-			goto out_free;
-	}
-
-	h->loss_count = h->loss_start = 0;
-	return 0;
-
-out_free:
-	while (i-- != 0) {
-		kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
-		h->ring[i] = NULL;
-	}
-	return -ENOBUFS;
-}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
-
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
 	int i;
@@ -386,6 +364,36 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
+static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+{
+	int i;
+
+	memset(h, 0, sizeof(*h));
+
+	for (i = 0; i <= TFRC_NDUPACK; i++) {
+		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+		if (h->ring[i] == NULL) {
+			tfrc_rx_hist_purge(h);
+			return -ENOBUFS;
+		}
+	}
+	return 0;
+}
+
+int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
+{
+	if (tfrc_rx_hist_alloc(h))
+		return -ENOBUFS;
+	/*
+	 * Initialise first entry with GSR to start loss detection as early as
+	 * possible. Code using this must not use any other fields. The entry
+	 * will be overwritten once the CCID updates its received packets.
+	 */
+	tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
+
 /**
  * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
  */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 221d8102da5..e9d8097947d 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -153,7 +153,7 @@ extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 				struct sock *sk);
 extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
 				   const struct sk_buff *skb);
-extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern int  tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
 extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
 #endif /* _DCCP_PKT_HIST_ */
-- 
cgit v1.2.3-70-g09d2


From d20ed95f8bf3d98d31dbbab8b00bb4c1a4a140f3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Perform early loss detection

This enables the TFRC code to begin loss detection (as soon as the module
is loaded), using the latest updates from rfc3448bis-06, 6.3.1:

 * when the first data packet(s) are lost or marked, set
 * X_target = s/(2*R) => f(p) = s/(R * X_target) = 2,
 * corresponding to a loss rate of ~ 20.64%.

The handle_loss() function is now called right at the begin of rx_packet_recv()
and thus no longer protected against duplicates: hence a call to rx_duplicate()
has been added.  Such a call makes sense now, as the previous patch initialises
the first entry with a sequence number of GSR.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 48 ++++++++++++++++++++++++++++++-------
 net/dccp/ccids/lib/packet_history.c |  3 +++
 2 files changed, 42 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 36f4992f3c3..0a7c22598d6 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -577,6 +577,28 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		hcrx->p_inverse = ~0U;   /* see RFC 4342, 8.5 */
 		break;
 	case CCID3_FBACK_PARAM_CHANGE:
+		if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
+			/*
+			 * rfc3448bis-06, 6.3.1: First packet(s) lost or marked
+			 * FIXME: in rfc3448bis the receiver returns X_recv=0
+			 * here as it normally would in the first feedback packet.
+			 * However this is not possible yet, since the code still
+			 * uses RFC 3448, i.e.
+			 *    If (p > 0)
+			 *      Calculate X_calc using the TCP throughput equation.
+			 *      X = max(min(X_calc, 2*X_recv), s/t_mbi);
+			 * would bring X down to s/t_mbi. That is why we return
+			 * X_recv according to rfc3448bis-06 for the moment.
+			 */
+			u32 rtt = hcrx->rtt ? : DCCP_FALLBACK_RTT, s = hcrx->s;
+
+			if (s == 0) {
+				DCCP_WARN("No sample for s, using fallback\n");
+				s = TCP_MIN_RCVMSS;
+			}
+			hcrx->x_recv = scaled_div32(s, 2 * rtt);
+			break;
+		}
 		/*
 		 * When parameters change (new loss or p > p_prev), we do not
 		 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
@@ -650,6 +672,14 @@ static u32 ccid3_first_li(struct sock *sk)
 	u32 x_recv, p, delta;
 	u64 fval;
 
+	/*
+	 * rfc3448bis-06, 6.3.1: First data packet(s) are marked or lost. Set p
+	 * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p
+	 * is about 20.64%. This yields an interval length of 4.84 (rounded up).
+	 */
+	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA))
+		return 5;
+
 	if (hcrx->rtt == 0) {
 		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
 		hcrx->rtt = DCCP_FALLBACK_RTT;
@@ -683,6 +713,15 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
 	const bool is_data_packet = dccp_data_packet(skb);
 
+	/*
+	 * Perform loss detection and handle pending losses
+	 */
+	if (tfrc_rx_handle_loss(&hcrx->hist, &hcrx->li_hist,
+				skb, ndp, ccid3_first_li, sk)) {
+		do_feedback = CCID3_FBACK_PARAM_CHANGE;
+		goto done_receiving;
+	}
+
 	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
 		if (is_data_packet) {
 			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
@@ -710,15 +749,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hcrx->bytes_recv += payload;
 	}
 
-	/*
-	 * Perform loss detection and handle pending losses
-	 */
-	if (tfrc_rx_handle_loss(&hcrx->hist, &hcrx->li_hist,
-				skb, ndp, ccid3_first_li, sk)) {
-		do_feedback = CCID3_FBACK_PARAM_CHANGE;
-		goto done_receiving;
-	}
-
 	if (tfrc_rx_hist_loss_pending(&hcrx->hist))
 		return; /* done receiving */
 
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 5b4e1cf8439..8db34225c00 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -335,6 +335,9 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 {
 	int is_new_loss = 0;
 
+	if (tfrc_rx_hist_duplicate(h, skb))
+		return 0;
+
 	if (h->loss_count == 0) {
 		__do_track_loss(h, skb, ndp);
 	} else if (h->loss_count == 1) {
-- 
cgit v1.2.3-70-g09d2


From 3ca7aea04152255bb65275b0018d3c673bc1f4e7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Return type of update_i_mean is void

This changes the return type of tfrc_lh_update_i_mean() to void, since that
function returns always `false'. This is due to

 	len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;

 	if (len - (s64)cur->li_length <= 0)	/* duplicate or reordered */
		return 0;

which means that update_i_mean can only increase the length of the open loss
interval I_0, and hence the value of I_tot0 (RFC 3448, 5.4). Consequently the
test `i_mean < old_i_mean' at the end of the function always evaluates to false.

There is no known way by which a loss interval can suddenly become shorter,
therefore the return type of the function is changed to void. (That is, under
the given circumstances step (3) in RFC 3448, 6.1 will not occur.)

Further changes:
----------------
 * the function is now called from tfrc_rx_handle_loss, which is equivalent
   to the previous way of calling from rx_packet_recv (it was called whenever
   there was no new or pending loss, now  it is also updated when there is
   a pending loss - this increases the accuracy a bit);
 * added a FIXME to possibly consider NDP counting as per RFC 4342 (this is
   not implemented yet).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              |  8 --------
 net/dccp/ccids/lib/loss_interval.c  | 20 +++++++++++---------
 net/dccp/ccids/lib/loss_interval.h  |  2 +-
 net/dccp/ccids/lib/packet_history.c |  5 +++++
 4 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0a7c22598d6..50dac01e48c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -767,15 +767,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (sample != 0)
 			hcrx->rtt = tfrc_ewma(hcrx->rtt, sample, 9);
-
-	} else if (tfrc_lh_update_i_mean(&hcrx->li_hist, skb)) {
-		/*
-		 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
-		 * has decreased (resp. p has increased), send feedback now.
-		 */
-		do_feedback = CCID3_FBACK_PARAM_CHANGE;
 	}
-
 	/*
 	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 	 */
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce0688c5..fe5c2a31c77 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -86,21 +86,26 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 
 /**
  * tfrc_lh_update_i_mean  -  Update the `open' loss interval I_0
- * For recomputing p: returns `true' if p > p_prev  <=>  1/p < 1/p_prev
+ * This updates I_mean as the sequence numbers increase. As a consequence, the
+ * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
+ * decreases, and thus there is no need to send renewed feedback.
  */
-u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
+void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
-	u32 old_i_mean = lh->i_mean;
 	s64 len;
 
 	if (cur == NULL)			/* not initialised */
-		return 0;
+		return;
+
+	/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
+	if (!dccp_data_packet(skb))
+		return;
 
 	len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
 
 	if (len - (s64)cur->li_length <= 0)	/* duplicate or reordered */
-		return 0;
+		return;
 
 	if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
 		/*
@@ -114,14 +119,11 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 		cur->li_is_closed = 1;
 
 	if (tfrc_lh_length(lh) == 1)		/* due to RFC 3448, 6.3.1 */
-		return 0;
+		return;
 
 	cur->li_length = len;
 	tfrc_lh_calc_i_mean(lh);
-
-	return (lh->i_mean < old_i_mean);
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 246018a3b26..f101ae2cf52 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -69,7 +69,7 @@ struct tfrc_rx_hist;
 
 extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
 				 u32 (*first_li)(struct sock *), struct sock *);
-extern u8   tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
 extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
 
 #endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 8db34225c00..8ea96903033 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -351,6 +351,11 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
 		__three_after_loss(h);
 	}
+
+	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
+	if (!is_new_loss)
+		tfrc_lh_update_i_mean(lh, skb);
+
 	return is_new_loss;
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
-- 
cgit v1.2.3-70-g09d2


From 34a081be8e14b7ada70e069b65b05d54db4af497 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Let dccp_tfrc_lib do the sampling work

This migrates more TFRC-related code into the dccp_tfrc_lib:
 * sampling of the packet size `s' (which is only needed until the first
   loss interval is computed (ccid3_first_li));
 * updating the byte-counter `bytes_recvd' in between sending feedbacks.
The result is a better separation of CCID-3 specific and TFRC specific
code, which aids future integration with ECN and e.g. CCID-4.

Further changes:
----------------
 * replaced magic number of 536 with equivalent constant TCP_MIN_RCVMSS;
   (this constant is also used when no estimate for `s' is available).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 38 ++++++++-----------------------------
 net/dccp/ccids/ccid3.h              |  4 ----
 net/dccp/ccids/lib/packet_history.c | 10 ++++++++++
 net/dccp/ccids/lib/packet_history.h | 16 ++++++++++++++++
 net/dccp/proto.c                    |  2 +-
 5 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 50dac01e48c..8744590acb3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -590,12 +590,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 			 * would bring X down to s/t_mbi. That is why we return
 			 * X_recv according to rfc3448bis-06 for the moment.
 			 */
-			u32 rtt = hcrx->rtt ? : DCCP_FALLBACK_RTT, s = hcrx->s;
+			u32 rtt = hcrx->rtt ? : DCCP_FALLBACK_RTT,
+			    s	= tfrc_rx_hist_packet_size(&hcrx->hist);
 
-			if (s == 0) {
-				DCCP_WARN("No sample for s, using fallback\n");
-				s = TCP_MIN_RCVMSS;
-			}
 			hcrx->x_recv = scaled_div32(s, 2 * rtt);
 			break;
 		}
@@ -617,7 +614,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		if (delta <= 0)
 			DCCP_BUG("delta (%ld) <= 0", (long)delta);
 		else
-			hcrx->x_recv = scaled_div32(hcrx->bytes_recv, delta);
+			hcrx->x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
 		break;
 	default:
 		return;
@@ -628,7 +625,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 
 	hcrx->tstamp_last_feedback = now;
 	hcrx->last_counter	   = dccp_hdr(skb)->dccph_ccval;
-	hcrx->bytes_recv	   = 0;
+	hcrx->hist.bytes_recvd	   = 0;
 
 	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
@@ -669,7 +666,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	u32 x_recv, p, delta;
+	u32 x_recv, p, delta,
+	    s = tfrc_rx_hist_packet_size(&hcrx->hist);
 	u64 fval;
 
 	/*
@@ -686,7 +684,7 @@ static u32 ccid3_first_li(struct sock *sk)
 	}
 
 	delta = ktime_to_us(net_timedelta(hcrx->tstamp_last_feedback));
-	x_recv = scaled_div32(hcrx->bytes_recv, delta);
+	x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
 		DCCP_WARN("X_recv==0\n");
 		if (hcrx->x_recv == 0) {
@@ -696,8 +694,7 @@ static u32 ccid3_first_li(struct sock *sk)
 		x_recv = hcrx->x_recv;
 	}
 
-	fval = scaled_div(hcrx->s, hcrx->rtt);
-	fval = scaled_div32(fval, x_recv);
+	fval = scaled_div32(scaled_div(s, hcrx->rtt), x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
 
 	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
@@ -724,31 +721,12 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
 		if (is_data_packet) {
-			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
 			do_feedback = CCID3_FBACK_INITIAL;
 			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
-			hcrx->s = payload;
-			/*
-			 * Not necessary to update bytes_recv here,
-			 * since X_recv = 0 for the first feedback packet (cf.
-			 * RFC 3448, 6.3) -- gerrit
-			 */
 		}
 		goto update_records;
 	}
 
-	if (tfrc_rx_hist_duplicate(&hcrx->hist, skb))
-		return; /* done receiving */
-
-	if (is_data_packet) {
-		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
-		/*
-		 * Update moving-average of s and the sum of received payload bytes
-		 */
-		hcrx->s = tfrc_ewma(hcrx->s, payload, 9);
-		hcrx->bytes_recv += payload;
-	}
-
 	if (tfrc_rx_hist_loss_pending(&hcrx->hist))
 		return; /* done receiving */
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1773a8dd36d..0c4fadd85f9 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -124,25 +124,21 @@ enum ccid3_hc_rx_states {
  *
  *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @state  -  Receiver state, one of %ccid3_hc_rx_states
- *  @bytes_recv  -  Total sum of DCCP payload bytes
  *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
  *  @rtt  -  Receiver estimate of RTT
  *  @tstamp_last_feedback  -  Time at which last feedback was sent
  *  @hist  -  Packet history (loss detection + RTT sampling)
  *  @li_hist  -  Loss Interval database
- *  @s  -  Received packet size in bytes
  *  @p_inverse  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
 struct ccid3_hc_rx_sock {
 	u8				last_counter:4;
 	enum ccid3_hc_rx_states		state:8;
-	u32				bytes_recv;
 	u32				x_recv;
 	u32				rtt;
 	ktime_t				tstamp_last_feedback;
 	struct tfrc_rx_hist		hist;
 	struct tfrc_loss_hist		li_hist;
-	u16				s;
 #define p_inverse			li_hist.i_mean
 };
 
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 8ea96903033..ee34b456424 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -352,6 +352,16 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 		__three_after_loss(h);
 	}
 
+	/*
+	 * Update moving-average of `s' and the sum of received payload bytes.
+	 */
+	if (dccp_data_packet(skb)) {
+		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+
+		h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
+		h->bytes_recvd += payload;
+	}
+
 	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
 	if (!is_new_loss)
 		tfrc_lh_update_i_mean(lh, skb);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index e9d8097947d..b7c87a1a272 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -91,12 +91,16 @@ struct tfrc_rx_hist_entry {
  * @loss_count:		Number of entries in circular history
  * @loss_start:		Movable index (for loss detection)
  * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
+ * @packet_size:	Packet size in bytes (as per RFC 3448, 3.1)
+ * @bytes_recvd:	Number of bytes received since last sending feedback
  */
 struct tfrc_rx_hist {
 	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
 	u8			  loss_count:2,
 				  loss_start:2;
 #define rtt_sample_prev		  loss_start
+	u32			  packet_size,
+				  bytes_recvd;
 };
 
 /**
@@ -140,6 +144,18 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
 	return h->loss_count > 0;
 }
 
+/*
+ * Accessor functions to retrieve parameters sampled by the RX history
+ */
+static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
+{
+	if (h->packet_size == 0) {
+		DCCP_WARN("No sample for s, using fallback\n");
+		return TCP_MIN_RCVMSS;
+	}
+	return h->packet_size;
+}
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u64 ndp);
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a3caa11aa83..ecf3be961e1 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,7 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	sk->sk_state		= DCCP_CLOSED;
 	sk->sk_write_space	= dccp_write_space;
 	icsk->icsk_sync_mss	= dccp_sync_mss;
-	dp->dccps_mss_cache	= 536;
+	dp->dccps_mss_cache	= TCP_MIN_RCVMSS;
 	dp->dccps_rate_last	= jiffies;
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
-- 
cgit v1.2.3-70-g09d2


From 2f3e3bbad917c426d3aba03a535809e5699de156 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Remove duplicate RX states

The only state information that the CCID-3 receiver keeps is whether initial
feedback has been sent or not. Further, this overlaps with use of feedback:

 * state == TFRC_RSTATE_NO_DATA as long as no feedback has been sent;
 * state == TFRC_RSTATE_DATA    as soon as the first feedback has been sent.

This patch reduces the duplication, by memorising the type of the last feedback.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 47 +++++------------------------------------------
 net/dccp/ccids/ccid3.h | 14 ++++++++------
 2 files changed, 13 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8744590acb3..04b183548aa 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -528,40 +528,6 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 /*
  *	Receiver Half-Connection Routines
  */
-
-/* CCID3 feedback types */
-enum ccid3_fback_type {
-	CCID3_FBACK_NONE = 0,
-	CCID3_FBACK_INITIAL,
-	CCID3_FBACK_PERIODIC,
-	CCID3_FBACK_PARAM_CHANGE
-};
-
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
-{
-	static char *ccid3_rx_state_names[] = {
-	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
-	[TFRC_RSTATE_DATA]    = "DATA",
-	};
-
-	return ccid3_rx_state_names[state];
-}
-#endif
-
-static void ccid3_hc_rx_set_state(struct sock *sk,
-				  enum ccid3_hc_rx_states state)
-{
-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	enum ccid3_hc_rx_states oldstate = hcrx->state;
-
-	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
-		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
-		       ccid3_rx_state_name(state));
-	WARN_ON(state == oldstate);
-	hcrx->state = state;
-}
-
 static void ccid3_hc_rx_send_feedback(struct sock *sk,
 				      const struct sk_buff *skb,
 				      enum ccid3_fback_type fbtype)
@@ -577,7 +543,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		hcrx->p_inverse = ~0U;   /* see RFC 4342, 8.5 */
 		break;
 	case CCID3_FBACK_PARAM_CHANGE:
-		if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
+		if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) {
 			/*
 			 * rfc3448bis-06, 6.3.1: First packet(s) lost or marked
 			 * FIXME: in rfc3448bis the receiver returns X_recv=0
@@ -626,6 +592,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 	hcrx->tstamp_last_feedback = now;
 	hcrx->last_counter	   = dccp_hdr(skb)->dccph_ccval;
 	hcrx->hist.bytes_recvd	   = 0;
+	hcrx->feedback		   = fbtype;
 
 	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
@@ -675,7 +642,7 @@ static u32 ccid3_first_li(struct sock *sk)
 	 * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p
 	 * is about 20.64%. This yields an interval length of 4.84 (rounded up).
 	 */
-	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA))
+	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE))
 		return 5;
 
 	if (hcrx->rtt == 0) {
@@ -719,11 +686,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		goto done_receiving;
 	}
 
-	if (unlikely(hcrx->state == TFRC_RSTATE_NO_DATA)) {
-		if (is_data_packet) {
+	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) {
+		if (is_data_packet)
 			do_feedback = CCID3_FBACK_INITIAL;
-			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
-		}
 		goto update_records;
 	}
 
@@ -764,7 +729,6 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
 
-	hcrx->state = TFRC_RSTATE_NO_DATA;
 	tfrc_lh_init(&hcrx->li_hist);
 	return tfrc_rx_hist_init(&hcrx->hist, sk);
 }
@@ -779,7 +743,6 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->state;
 	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
 	info->tcpi_rcv_rtt  = ccid3_hc_rx_sk(sk)->rtt;
 }
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0c4fadd85f9..72e110a1100 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -114,16 +114,18 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
     return hctx;
 }
 
-/* TFRC receiver states */
-enum ccid3_hc_rx_states {
-	TFRC_RSTATE_NO_DATA = 1,
-	TFRC_RSTATE_DATA,
+
+enum ccid3_fback_type {
+	CCID3_FBACK_NONE = 0,
+	CCID3_FBACK_INITIAL,
+	CCID3_FBACK_PERIODIC,
+	CCID3_FBACK_PARAM_CHANGE
 };
 
 /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
  *
  *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
- *  @state  -  Receiver state, one of %ccid3_hc_rx_states
+ *  @feedback  -  The type of the feedback last sent
  *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
  *  @rtt  -  Receiver estimate of RTT
  *  @tstamp_last_feedback  -  Time at which last feedback was sent
@@ -133,7 +135,7 @@ enum ccid3_hc_rx_states {
  */
 struct ccid3_hc_rx_sock {
 	u8				last_counter:4;
-	enum ccid3_hc_rx_states		state:8;
+	enum ccid3_fback_type		feedback:4;
 	u32				x_recv;
 	u32				rtt;
 	ktime_t				tstamp_last_feedback;
-- 
cgit v1.2.3-70-g09d2


From 2b81143aa3505e2460b24b357996c2f21840ea58 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Always perform receiver RTT sampling

This updates the CCID-3 receiver in part with regard to errata 610 and 611
(http://www.rfc-editor.org/errata_list.php), which change RFC 4342 to use the
Receive Rate as specified in rfc3448bis, requiring to constantly sample the
RTT (or use a sender RTT).

Doing this requires reusing the RX history structure after dealing with a loss.

The patch does not resolve how to compute X_recv if the interval is less
than 1 RTT. A FIXME has been added (and is resolved in subsequent patch).

Furthermore, since this is all TFRC-based functionality, the RTT estimation
is now also performed by the dccp_tfrc_lib module. This further simplifies
the CCID-3 code.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 43 +++++++++-----------------
 net/dccp/ccids/ccid3.h              |  2 --
 net/dccp/ccids/lib/packet_history.c | 60 +++++++++++++++++++++++++++----------
 net/dccp/ccids/lib/packet_history.h | 17 +++++++++--
 4 files changed, 73 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 04b183548aa..8e64d9665a2 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -556,8 +556,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 			 * would bring X down to s/t_mbi. That is why we return
 			 * X_recv according to rfc3448bis-06 for the moment.
 			 */
-			u32 rtt = hcrx->rtt ? : DCCP_FALLBACK_RTT,
-			    s	= tfrc_rx_hist_packet_size(&hcrx->hist);
+			u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
+			    rtt = tfrc_rx_hist_rtt(&hcrx->hist);
 
 			hcrx->x_recv = scaled_div32(s, 2 * rtt);
 			break;
@@ -576,6 +576,11 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 			break;
 		/* fall through */
 	case CCID3_FBACK_PERIODIC:
+		/*
+		 * FIXME: check if delta is less than or equal to 1 RTT using
+		 * the receiver RTT sample. This is described in Errata 610/611
+		 * of RFC 4342 which reference section 6.2 of RFC 3448.
+		 */
 		delta = ktime_us_delta(now, hcrx->tstamp_last_feedback);
 		if (delta <= 0)
 			DCCP_BUG("delta (%ld) <= 0", (long)delta);
@@ -633,8 +638,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	u32 x_recv, p, delta,
-	    s = tfrc_rx_hist_packet_size(&hcrx->hist);
+	u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
+	    rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p, delta;
 	u64 fval;
 
 	/*
@@ -645,11 +650,6 @@ static u32 ccid3_first_li(struct sock *sk)
 	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE))
 		return 5;
 
-	if (hcrx->rtt == 0) {
-		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
-		hcrx->rtt = DCCP_FALLBACK_RTT;
-	}
-
 	delta = ktime_to_us(net_timedelta(hcrx->tstamp_last_feedback));
 	x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
@@ -661,7 +661,7 @@ static u32 ccid3_first_li(struct sock *sk)
 		x_recv = hcrx->x_recv;
 	}
 
-	fval = scaled_div32(scaled_div(s, hcrx->rtt), x_recv);
+	fval = scaled_div32(scaled_div(s, rtt), x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
 
 	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
@@ -695,26 +695,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	if (tfrc_rx_hist_loss_pending(&hcrx->hist))
 		return; /* done receiving */
 
-	/*
-	 * Handle data packets: RTT sampling and monitoring p
-	 */
-	if (unlikely(!is_data_packet))
-		goto update_records;
-
-	if (!tfrc_lh_is_initialised(&hcrx->li_hist)) {
-		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->hist, skb);
-		/*
-		 * Empty loss history: no loss so far, hence p stays 0.
-		 * Sample RTT values, since an RTT estimate is required for the
-		 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
-		 */
-		if (sample != 0)
-			hcrx->rtt = tfrc_ewma(hcrx->rtt, sample, 9);
-	}
 	/*
 	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 	 */
-	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
+	if (is_data_packet &&
+	    SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
 		do_feedback = CCID3_FBACK_PERIODIC;
 
 update_records:
@@ -744,7 +729,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 {
 	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
-	info->tcpi_rcv_rtt  = ccid3_hc_rx_sk(sk)->rtt;
+	info->tcpi_rcv_rtt  = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist);
 }
 
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
@@ -759,7 +744,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 		if (len < sizeof(rx_info))
 			return -EINVAL;
 		rx_info.tfrcrx_x_recv = hcrx->x_recv;
-		rx_info.tfrcrx_rtt    = hcrx->rtt;
+		rx_info.tfrcrx_rtt    = tfrc_rx_hist_rtt(&hcrx->hist);
 		rx_info.tfrcrx_p      = tfrc_invert_loss_event_rate(hcrx->p_inverse);
 		len = sizeof(rx_info);
 		val = &rx_info;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 72e110a1100..342235c57bf 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -127,7 +127,6 @@ enum ccid3_fback_type {
  *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @feedback  -  The type of the feedback last sent
  *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
- *  @rtt  -  Receiver estimate of RTT
  *  @tstamp_last_feedback  -  Time at which last feedback was sent
  *  @hist  -  Packet history (loss detection + RTT sampling)
  *  @li_hist  -  Loss Interval database
@@ -137,7 +136,6 @@ struct ccid3_hc_rx_sock {
 	u8				last_counter:4;
 	enum ccid3_fback_type		feedback:4;
 	u32				x_recv;
-	u32				rtt;
 	ktime_t				tstamp_last_feedback;
 	struct tfrc_rx_hist		hist;
 	struct tfrc_loss_hist		li_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index ee34b456424..e2e250aa5c8 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
+
+static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
+{
+	struct tfrc_rx_hist_entry *tmp = h->ring[a];
+
+	h->ring[a] = h->ring[b];
+	h->ring[b] = tmp;
+}
+
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
-	const u8 idx_a = tfrc_rx_hist_index(h, a),
-		 idx_b = tfrc_rx_hist_index(h, b);
-	struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
+	__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
+			       tfrc_rx_hist_index(h, b));
+}
 
-	h->ring[idx_a] = h->ring[idx_b];
-	h->ring[idx_b] = tmp;
+/**
+ * tfrc_rx_hist_resume_rtt_sampling  -  Prepare RX history for RTT sampling
+ * This is called after loss detection has finished, when the history entry
+ * with the index of `loss_count' holds the highest-received sequence number.
+ * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
+ */
+static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
+{
+	__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
+	h->loss_count = h->loss_start = 0;
 }
 
 /*
@@ -200,8 +217,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
 
 		if (dccp_loss_free(s2, s1, n1)) {
 			/* hole is filled: S0, S2, and S1 are consecutive */
-			h->loss_count = 0;
-			h->loss_start = tfrc_rx_hist_index(h, 1);
+			tfrc_rx_hist_resume_rtt_sampling(h);
 		} else
 			/* gap between S2 and S1: just update loss_prev */
 			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -254,8 +270,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 
 			if (dccp_loss_free(s1, s2, n2)) {
 				/* entire hole filled by S0, S3, S1, S2 */
-				h->loss_start = tfrc_rx_hist_index(h, 2);
-				h->loss_count = 0;
+				tfrc_rx_hist_resume_rtt_sampling(h);
 			} else {
 				/* gap remains between S1 and S2 */
 				h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -299,8 +314,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 
 		if (dccp_loss_free(s2, s3, n3)) {
 			/* no gap between S2 and S3: entire hole is filled */
-			h->loss_start = tfrc_rx_hist_index(h, 3);
-			h->loss_count = 0;
+			tfrc_rx_hist_resume_rtt_sampling(h);
 		} else {
 			/* gap between S2 and S3 */
 			h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -340,6 +354,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 
 	if (h->loss_count == 0) {
 		__do_track_loss(h, skb, ndp);
+		tfrc_rx_hist_sample_rtt(h, skb);
 	} else if (h->loss_count == 1) {
 		__one_after_loss(h, skb, ndp);
 	} else if (h->loss_count != 2) {
@@ -435,11 +450,24 @@ static inline struct tfrc_rx_hist_entry *
  * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
  * to compute a sample with given data - calling function should check this.
  */
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 {
-	u32 sample = 0,
-	    delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
-			    tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+	u32 sample = 0, delta_v;
+
+	/*
+	 * When not to sample:
+	 * - on non-data packets
+	 *   (RFC 4342, 8.1: CCVal only fully defined for data packets);
+	 * - when no data packets have been received yet
+	 *   (FIXME: using sampled packet size as indicator here);
+	 * - as long as there are gaps in the sequence space (pending loss).
+	 */
+	if (!dccp_data_packet(skb) || h->packet_size == 0 ||
+	    tfrc_rx_hist_loss_pending(h))
+		return;
+
+	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+			tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
 
 	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
 		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
@@ -479,6 +507,6 @@ u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
 keep_ref_for_next_time:
 
-	return sample;
+	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 9);
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index b7c87a1a272..ba5832bbc34 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -91,6 +91,7 @@ struct tfrc_rx_hist_entry {
  * @loss_count:		Number of entries in circular history
  * @loss_start:		Movable index (for loss detection)
  * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
+ * @rtt_estimate:	Receiver RTT estimate
  * @packet_size:	Packet size in bytes (as per RFC 3448, 3.1)
  * @bytes_recvd:	Number of bytes received since last sending feedback
  */
@@ -98,7 +99,10 @@ struct tfrc_rx_hist {
 	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
 	u8			  loss_count:2,
 				  loss_start:2;
+	/* Receiver RTT sampling */
 #define rtt_sample_prev		  loss_start
+	u32			  rtt_estimate;
+	/* Receiver sampling of application payload lengths */
 	u32			  packet_size,
 				  bytes_recvd;
 };
@@ -154,6 +158,15 @@ static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
 		return TCP_MIN_RCVMSS;
 	}
 	return h->packet_size;
+
+}
+static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
+{
+	if (h->rtt_estimate == 0) {
+		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+		return  DCCP_FALLBACK_RTT;
+	}
+	return h->rtt_estimate;
 }
 
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
@@ -167,8 +180,8 @@ extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 				struct sk_buff *skb, const u64 ndp,
 				u32 (*first_li)(struct sock *sk),
 				struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
-				   const struct sk_buff *skb);
+extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb);
 extern int  tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
 extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
-- 
cgit v1.2.3-70-g09d2


From 49ffc29a0223adbe0ea7005eea3ab2a03abbeb06 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp: Clamping RTT values

This extracts the clamping part of dccp_sample_rtt() and makes it available
to other parts of the code (as e.g. used in the next patch).

Note: The function dccp_sample_rtt() now reduces to subtracting the elapsed
time. This could be eliminated but would require shorter prefixes and thus
is not done by this patch - maybe an idea for later.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h  |  9 ++++++++-
 net/dccp/input.c | 11 +----------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b63a82ccb2b..5281190aa19 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -334,7 +334,14 @@ extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
 extern int	   dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
 extern int	   dccp_invalid_packet(struct sk_buff *skb);
-extern u32	   dccp_sample_rtt(struct sock *sk, long delta);
+
+static inline u32  dccp_sane_rtt(long usec_sample)
+{
+	if (unlikely(usec_sample <= 0 || usec_sample > DCCP_SANE_RTT_MAX))
+		DCCP_WARN("RTT sample %ld out of bounds!\n", usec_sample);
+	return clamp_val(usec_sample, DCCP_SANE_RTT_MIN, DCCP_SANE_RTT_MAX);
+}
+extern u32 dccp_sample_rtt(struct sock *sk, long delta);
 
 static inline int dccp_bad_service_code(const struct sock *sk,
 					const __be32 service)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index b1e38bf9445..df0e6714aa1 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -707,16 +707,7 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
 	/* dccpor_elapsed_time is either zeroed out or set and > 0 */
 	delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10;
 
-	if (unlikely(delta <= 0)) {
-		DCCP_WARN("unusable RTT sample %ld, using min\n", delta);
-		return DCCP_SANE_RTT_MIN;
-	}
-	if (unlikely(delta > DCCP_SANE_RTT_MAX)) {
-		DCCP_WARN("RTT sample %ld too large, using max\n", delta);
-		return DCCP_SANE_RTT_MAX;
-	}
-
-	return delta;
+	return dccp_sane_rtt(delta);
 }
 
 EXPORT_SYMBOL_GPL(dccp_sample_rtt);
-- 
cgit v1.2.3-70-g09d2


From 22338f09bd60434a3f1d6608f0fa55972067985f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp tfrc: Increase number of RTT samples

This improves the receiver RTT sampling algorithm so that it tries harder to get
as many RTT samples as possible.

The algorithm is based the concepts presented in RFC 4340, 8.1, using timestamps
and the CCVal window counter. There exist 4 cases for the CCVal difference:
 * == 0: less than RTT/4 passed since last packet -- unusable;
 *  > 4: (much) more than 1 RTT has passed since last packet -- also unusable;
 * == 4: perfect sample (exactly one RTT has passed since last packet);
 * 1..3: sub-optimal sample (between RTT/4 and 3*RTT/4 has passed).

In the last case the algorithm tried to optimise by storing away the candidate
and then re-trying next time. The problem is that
 * a large number of samples is needed to smooth out the inaccuracies of the
   algorithm;
 * the sender may not be sending enough packets to warrant a "next time";
 * hence it is better to use suboptimal samples whenever possible.
The algorithm now stores away the current sample only if the difference is 0.

Applicability and background
----------------------------
A realistic example is MP3 streaming where packets are sent at a rate of less
than one packet per RTT, which means that suitable samples are absent for a
very long time.

The effectiveness of using suboptimal samples (with a delta between 1 and 4) was
confirmed by instrumenting the algorithm with counters. The results of two 20
second test runs were:
 * With the old algorithm and a total of 38442 function calls, only 394 of these
   calls resulted in usable RTT samples (about 1%), and 378 out of these were
   "perfect" samples and 28013 (unused) samples had a delta of 1..3.
 * With the new algorithm and a total of 37057 function calls, 1702 usable RTT
   samples were retrieved (about 4.6%), 5 out of these were "perfect" samples.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/packet_history.c | 83 +++++++++++--------------------------
 1 file changed, 24 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index e2e250aa5c8..5c4ded1cf42 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -427,32 +427,17 @@ int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
 
-/**
- * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
-{
-	return h->ring[0];
-}
-
-/**
- * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
- */
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
-{
-	return h->ring[h->rtt_sample_prev];
-}
-
 /**
  * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
- * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
- * to compute a sample with given data - calling function should check this.
+ * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
+ * is pending and uses the following history entries (via rtt_sample_prev):
+ * - h->ring[0]  contains the most recent history entry prior to @skb;
+ * - h->ring[1]  is an unused `dummy' entry when the current difference is 0;
  */
 void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 {
-	u32 sample = 0, delta_v;
+	struct tfrc_rx_hist_entry *last = h->ring[0];
+	u32 sample, delta_v;
 
 	/*
 	 * When not to sample:
@@ -466,47 +451,27 @@ void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 	    tfrc_rx_hist_loss_pending(h))
 		return;
 
-	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
-			tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-
-	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
-		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
-			sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
-				       tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-			if (sample)
-				sample = 4 / sample *
-				         ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
-							tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
-			else    /*
-				 * FIXME: This condition is in principle not
-				 * possible but occurs when CCID is used for
-				 * two-way data traffic. I have tried to trace
-				 * it, but the cause does not seem to be here.
-				 */
-				DCCP_BUG("please report to dccp@vger.kernel.org"
-					 " => prev = %u, last = %u",
-					 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
-					 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-		} else if (delta_v < 1) {
-			h->rtt_sample_prev = 1;
-			goto keep_ref_for_next_time;
-		}
-
-	} else if (delta_v == 4) /* optimal match */
-		sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
-	else {			 /* suboptimal match */
-		h->rtt_sample_prev = 2;
-		goto keep_ref_for_next_time;
-	}
+	h->rtt_sample_prev = 0;		/* reset previous candidate */
 
-	if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
-		DCCP_WARN("RTT sample %u too large, using max\n", sample);
-		sample = DCCP_SANE_RTT_MAX;
+	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
+	if (delta_v == 0) {		/* less than RTT/4 difference */
+		h->rtt_sample_prev = 1;
+		return;
 	}
+	sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
 
-	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
-keep_ref_for_next_time:
+	if (delta_v <= 4)		/* between RTT/4 and RTT */
+		sample *= 4 / delta_v;
+	else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
+		/*
+		* Optimisation: CCVal difference is greater than 1 RTT, yet the
+		* sample is less than the local RTT estimate; which means that
+		* the RTT estimate is too high.
+		* To avoid noise, it is not done if the sample is below RTT/2.
+		*/
+		return;
 
-	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 9);
+	/* Use a lower weight than usual to increase responsiveness */
+	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
-- 
cgit v1.2.3-70-g09d2


From 68c89ee53571a441799c03d5e240c6441bced620 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Update the computation of X_recv

This updates the computation of X_recv with regard to Errata 610/611 for
RFC 4342 and draft rfc3448bis-06, ensuring that at least an interval of 1
RTT is used to compute X_recv.  The change is wrapped into a new function
ccid3_hc_rx_x_recv().

Further changes:
----------------
 * feedback is not sent when no data packets arrived (bytes_recv == 0), as per
   rfc3448bis-06, 6.2;
 * take the timestamp for the feedback /after/ dccp_send_ack() returns, to avoid
   taking the transmission time into account (in case layer-2 is busy);
 * clearer handling of failure in ccid3_first_li().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 64 ++++++++++++++-----------------------
 net/dccp/ccids/lib/packet_history.c | 30 +++++++++++++++++
 net/dccp/ccids/lib/packet_history.h | 13 +++++++-
 3 files changed, 66 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8e64d9665a2..f2f9514dbad 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -533,9 +533,6 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 				      enum ccid3_fback_type fbtype)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct dccp_sock *dp = dccp_sk(sk);
-	ktime_t now = ktime_get_real();
-	s64 delta = 0;
 
 	switch (fbtype) {
 	case CCID3_FBACK_INITIAL:
@@ -565,42 +562,33 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		/*
 		 * When parameters change (new loss or p > p_prev), we do not
 		 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
-		 * need to  reuse the previous value of X_recv. However, when
-		 * X_recv was 0 (due to early loss), this would kill X down to
-		 * s/t_mbi (i.e. one packet in 64 seconds).
-		 * To avoid such drastic reduction, we approximate X_recv as
-		 * the number of bytes since last feedback.
-		 * This is a safe fallback, since X is bounded above by X_calc.
+		 * always check whether at least RTT time units were covered.
 		 */
-		if (hcrx->x_recv > 0)
-			break;
-		/* fall through */
+		hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
+		break;
 	case CCID3_FBACK_PERIODIC:
 		/*
-		 * FIXME: check if delta is less than or equal to 1 RTT using
-		 * the receiver RTT sample. This is described in Errata 610/611
-		 * of RFC 4342 which reference section 6.2 of RFC 3448.
+		 * Step (2) of rfc3448bis-06, 6.2:
+		 * - if no data packets have been received, just restart timer
+		 * - if data packets have been received, re-compute X_recv
 		 */
-		delta = ktime_us_delta(now, hcrx->tstamp_last_feedback);
-		if (delta <= 0)
-			DCCP_BUG("delta (%ld) <= 0", (long)delta);
-		else
-			hcrx->x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
+		if (hcrx->hist.bytes_recvd == 0)
+			goto prepare_for_next_time;
+		hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
 		break;
 	default:
 		return;
 	}
 
-	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n",
-		       (long)delta, hcrx->x_recv, hcrx->p_inverse);
+	ccid3_pr_debug("X_recv=%u, 1/p=%u\n", hcrx->x_recv, hcrx->p_inverse);
 
-	hcrx->tstamp_last_feedback = now;
-	hcrx->last_counter	   = dccp_hdr(skb)->dccph_ccval;
-	hcrx->hist.bytes_recvd	   = 0;
-	hcrx->feedback		   = fbtype;
-
-	dp->dccps_hc_rx_insert_options = 1;
+	dccp_sk(sk)->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
+
+prepare_for_next_time:
+	tfrc_rx_hist_restart_byte_counter(&hcrx->hist);
+	hcrx->last_counter = dccp_hdr(skb)->dccph_ccval;
+	hcrx->feedback	   = fbtype;
 }
 
 static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
@@ -639,7 +627,7 @@ static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
-	    rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p, delta;
+	    rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p;
 	u64 fval;
 
 	/*
@@ -650,16 +638,9 @@ static u32 ccid3_first_li(struct sock *sk)
 	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE))
 		return 5;
 
-	delta = ktime_to_us(net_timedelta(hcrx->tstamp_last_feedback));
-	x_recv = scaled_div32(hcrx->hist.bytes_recvd, delta);
-	if (x_recv == 0) {		/* would also trigger divide-by-zero */
-		DCCP_WARN("X_recv==0\n");
-		if (hcrx->x_recv == 0) {
-			DCCP_BUG("stored value of X_recv is zero");
-			return ~0U;
-		}
-		x_recv = hcrx->x_recv;
-	}
+	x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
+	if (x_recv == 0)
+		goto failed;
 
 	fval = scaled_div32(scaled_div(s, rtt), x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
@@ -667,7 +648,10 @@ static u32 ccid3_first_li(struct sock *sk)
 	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
 		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
 
-	return p == 0 ? ~0U : scaled_div(1, p);
+	if (p > 0)
+		return scaled_div(1, p);
+failed:
+	return UINT_MAX;
 }
 
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 5c4ded1cf42..547ad098ea6 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -385,6 +385,36 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
 
+/* Compute the sending rate X_recv measured between feedback intervals */
+u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
+{
+	u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
+	s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
+
+	WARN_ON(delta <= 0);
+	/*
+	 * Ensure that the sampling interval for X_recv is at least one RTT,
+	 * by extending the sampling interval backwards in time, over the last
+	 * R_(m-1) seconds, as per rfc3448bis-06, 6.2.
+	 * To reduce noise (e.g. when the RTT changes often), this is only
+	 * done when delta is smaller than RTT/2.
+	 */
+	if (last_x_recv > 0 && delta < last_rtt/2) {
+		tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
+			      (long)delta, (unsigned)last_rtt);
+
+		delta = (bytes ? delta : 0) + last_rtt;
+		bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
+	}
+
+	if (unlikely(bytes == 0)) {
+		DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
+		return last_x_recv;
+	}
+	return scaled_div32(bytes, delta);
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
+
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
 	int i;
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index ba5832bbc34..6552be63cb0 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -93,7 +93,8 @@ struct tfrc_rx_hist_entry {
  * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
  * @rtt_estimate:	Receiver RTT estimate
  * @packet_size:	Packet size in bytes (as per RFC 3448, 3.1)
- * @bytes_recvd:	Number of bytes received since last sending feedback
+ * @bytes_recvd:	Number of bytes received since @bytes_start
+ * @bytes_start:	Start time for counting @bytes_recvd
  */
 struct tfrc_rx_hist {
 	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
@@ -105,6 +106,7 @@ struct tfrc_rx_hist {
 	/* Receiver sampling of application payload lengths */
 	u32			  packet_size,
 				  bytes_recvd;
+	ktime_t			  bytes_start;
 };
 
 /**
@@ -169,6 +171,15 @@ static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
 	return h->rtt_estimate;
 }
 
+static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
+{
+	h->bytes_recvd = 0;
+	h->bytes_start = ktime_get_real();
+}
+
+extern u32  tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
+
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u64 ndp);
 
-- 
cgit v1.2.3-70-g09d2


From 88e97a93342c0b9e835d510921e7b2df8547d1bd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Update the RX history records in one place

This patch is a requirement for enabling ECN support later on. With that change
in mind, the following preparations are done:
 * renamed handle_loss() into congestion_event() since it returns true when a
   congestion event happens (it will eventually also take care of ECN packets);
 * lets tfrc_rx_congestion_event() always update the RX history records, since
   this routine needs to be called for each non-duplicate packet anyway;
 * made all involved boolean-type functions to have return type `bool';

Updating the RX history records is now only necessary for the packets received
up to sending the first feedback. The receiver code becomes again simpler.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c              | 37 +++++++++++--------------------------
 net/dccp/ccids/lib/loss_interval.c  | 10 +++++-----
 net/dccp/ccids/lib/loss_interval.h  |  2 +-
 net/dccp/ccids/lib/packet_history.c | 37 ++++++++++++++++++-------------------
 net/dccp/ccids/lib/packet_history.h | 10 +++++-----
 5 files changed, 40 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f2f9514dbad..aca072b3eda 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -657,41 +657,26 @@ failed:
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
 	const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
 	const bool is_data_packet = dccp_data_packet(skb);
 
 	/*
 	 * Perform loss detection and handle pending losses
 	 */
-	if (tfrc_rx_handle_loss(&hcrx->hist, &hcrx->li_hist,
-				skb, ndp, ccid3_first_li, sk)) {
-		do_feedback = CCID3_FBACK_PARAM_CHANGE;
-		goto done_receiving;
-	}
-
-	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) {
-		if (is_data_packet)
-			do_feedback = CCID3_FBACK_INITIAL;
-		goto update_records;
-	}
-
-	if (tfrc_rx_hist_loss_pending(&hcrx->hist))
-		return; /* done receiving */
-
+	if (tfrc_rx_congestion_event(&hcrx->hist, &hcrx->li_hist,
+				     skb, ndp, ccid3_first_li, sk))
+		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PARAM_CHANGE);
+	/*
+	 * Feedback for first non-empty data packet (RFC 3448, 6.3)
+	 */
+	else if (unlikely(hcrx->feedback == CCID3_FBACK_NONE && is_data_packet))
+		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_INITIAL);
 	/*
 	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 	 */
-	if (is_data_packet &&
-	    SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
-		do_feedback = CCID3_FBACK_PERIODIC;
-
-update_records:
-	tfrc_rx_hist_add_packet(&hcrx->hist, skb, ndp);
-
-done_receiving:
-	if (do_feedback)
-		ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
+	else if (!tfrc_rx_hist_loss_pending(&hcrx->hist) && is_data_packet &&
+		 SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
+		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PERIODIC);
 }
 
 static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index fe5c2a31c77..b1ae8f8259e 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -140,18 +140,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
  * @sk:		   Used by @calc_first_li in caller-specific way (subtyping)
  * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
  */
-int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
-			 u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
+			  u32 (*calc_first_li)(struct sock *), struct sock *sk)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
 
 	if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
-		return 0;
+		return false;
 
 	new = tfrc_lh_demand_next(lh);
 	if (unlikely(new == NULL)) {
 		DCCP_CRIT("Cannot allocate/add loss record.");
-		return 0;
+		return false;
 	}
 
 	new->li_seqno	  = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -169,7 +169,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 
 		tfrc_lh_calc_i_mean(lh);
 	}
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index f101ae2cf52..d08a226db43 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -67,7 +67,7 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
 
 struct tfrc_rx_hist;
 
-extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
 				 u32 (*first_li)(struct sock *), struct sock *);
 extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
 extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 547ad098ea6..cce9f03bda3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -192,10 +192,8 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
 	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 	    s1 = DCCP_SKB_CB(skb)->dccpd_seq;
 
-	if (!dccp_loss_free(s0, s1, n1)) {	/* gap between S0 and S1 */
+	if (!dccp_loss_free(s0, s1, n1))	/* gap between S0 and S1 */
 		h->loss_count = 1;
-		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
-	}
 }
 
 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -328,13 +326,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 }
 
 /**
- *  tfrc_rx_handle_loss  -  Loss detection and further processing
- *  @h:		    The non-empty RX history object
- *  @lh:	    Loss Intervals database to update
- *  @skb:	    Currently received packet
- *  @ndp:	    The NDP count belonging to @skb
- *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
- *  @sk:	    Used by @calc_first_li (see tfrc_lh_interval_add)
+ *  tfrc_rx_congestion_event  -  Loss detection and further processing
+ *  @h:		The non-empty RX history object
+ *  @lh:	Loss Intervals database to update
+ *  @skb:	Currently received packet
+ *  @ndp:	The NDP count belonging to @skb
+ *  @first_li:	Caller-dependent computation of first loss interval in @lh
+ *  @sk:	Used by @calc_first_li (see tfrc_lh_interval_add)
  *  Chooses action according to pending loss, updates LI database when a new
  *  loss was detected, and does required post-processing. Returns 1 when caller
  *  should send feedback, 0 otherwise.
@@ -342,12 +340,12 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
  *  records accordingly, the caller should not perform any more RX history
  *  operations when loss_count is greater than 0 after calling this function.
  */
-int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
-			struct tfrc_loss_hist *lh,
-			struct sk_buff *skb, const u64 ndp,
-			u32 (*calc_first_li)(struct sock *), struct sock *sk)
+bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+			      struct tfrc_loss_hist *lh,
+			      struct sk_buff *skb, const u64 ndp,
+			      u32 (*first_li)(struct sock *), struct sock *sk)
 {
-	int is_new_loss = 0;
+	bool new_event = false;
 
 	if (tfrc_rx_hist_duplicate(h, skb))
 		return 0;
@@ -355,6 +353,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 	if (h->loss_count == 0) {
 		__do_track_loss(h, skb, ndp);
 		tfrc_rx_hist_sample_rtt(h, skb);
+		tfrc_rx_hist_add_packet(h, skb, ndp);
 	} else if (h->loss_count == 1) {
 		__one_after_loss(h, skb, ndp);
 	} else if (h->loss_count != 2) {
@@ -363,7 +362,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 		/*
 		 * Update Loss Interval database and recycle RX records
 		 */
-		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
+		new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
 		__three_after_loss(h);
 	}
 
@@ -378,12 +377,12 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 	}
 
 	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
-	if (!is_new_loss)
+	if (!new_event)
 		tfrc_lh_update_i_mean(lh, skb);
 
-	return is_new_loss;
+	return new_event;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
+EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
 
 /* Compute the sending rate X_recv measured between feedback intervals */
 u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 6552be63cb0..555e65cd73a 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -186,11 +186,11 @@ extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
 
 struct tfrc_loss_hist;
-extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
-				struct tfrc_loss_hist *lh,
-				struct sk_buff *skb, const u64 ndp,
-				u32 (*first_li)(struct sock *sk),
-				struct sock *sk);
+extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
+				     struct tfrc_loss_hist *lh,
+				     struct sk_buff *skb, const u64 ndp,
+				     u32 (*first_li)(struct sock *sk),
+				     struct sock *sk);
 extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb);
 extern int  tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
-- 
cgit v1.2.3-70-g09d2


From 9d497a2c9120e31ff417e75f9f5576c4cde11281 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Implement rfc3448bis change to initial-rate computation

The patch updates CCID-3 with regard to the latest rfc3448bis-06:
 * in the first revisions of the draft, MSS was used for the RFC 3390 window;
 * then (from revision #1 to revision #2), it used the packet size `s';
 * now, in this revision (and apparently final), the value is back to MSS.

This change has an implication for the case when no RTT sample is available,
at the time of sending the first packet:

 * with RTT sample, 2*MSS/RTT <= initial_rate <= 4*MSS/RTT;
 * without RTT sample, the initial rate is one packet (s bytes) per second
   (sec. 4.2), but using s instead of MSS here creates an imbalance, since
   this would further reduce the initial sending rate.

Hence the patch uses MSS (called MPS in RFC 4340) in all places.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index aca072b3eda..d654264c510 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -53,18 +53,16 @@ static int ccid3_debug;
 /*
  * Compute the initial sending rate X_init in the manner of RFC 3390:
  *
- *	X_init  =  min(4 * s, max(2 * s, 4380 bytes)) / RTT
+ *	X_init  =  min(4 * MPS, max(2 * MPS, 4380 bytes)) / RTT
  *
- * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
- * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
  * For consistency with other parts of the code, X_init is scaled by 2^6.
  */
 static inline u64 rfc3390_initial_rate(struct sock *sk)
 {
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	const __u32 w_init = clamp_t(__u32, 4380U, 2 * hctx->s, 4 * hctx->s);
+	const u32 mps = dccp_sk(sk)->dccps_mss_cache,
+	       w_init = clamp(4380U, 2 * mps, 4 * mps);
 
-	return scaled_div(w_init << 6, hctx->rtt);
+	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->rtt);
 }
 
 /**
@@ -293,7 +291,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			 * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
 			 */
 			hctx->rtt = DCCP_FALLBACK_RTT;
-			hctx->x   = hctx->s;
+			hctx->x	  = dp->dccps_mss_cache;
 			hctx->x <<= 6;
 		}
 		ccid3_update_send_interval(hctx);
-- 
cgit v1.2.3-70-g09d2


From 891e4d8a402427bc40dee4c8413213a584710372 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Tidy up CCID-Kconfig dependencies

The per-CCID menu has several dependencies on EXPERIMENTAL. These are redundant,
since net/dccp/ccids/Kconfig is sourced by net/dccp/Kconfig and since the
latter menu in turn asserts a dependency on EXPERIMENTAL.

The patch removes the redundant dependencies as well as the repeated reference
within the sub-menu.

Further changes:
----------------
Two single dependencies on CCID-3 are replaced with a single enclosing `if'.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/Kconfig | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 44c7e90248f..dc973abe03b 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,8 +1,7 @@
 menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
 
 config IP_DCCP_CCID2
-	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
+	tristate "CCID2 (TCP-Like)"
 	def_tristate IP_DCCP
 	---help---
 	  CCID 2, TCP-like Congestion Control, denotes Additive Increase,
@@ -35,7 +34,7 @@ config IP_DCCP_CCID2_DEBUG
 	    If in doubt, say N.
 
 config IP_DCCP_CCID3
-	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
+	tristate "CCID3 (TCP-Friendly)"
 	def_tristate IP_DCCP
 	select IP_DCCP_TFRC_LIB
 	---help---
@@ -63,9 +62,9 @@ config IP_DCCP_CCID3
 
 	  If in doubt, say M.
 
+if IP_DCCP_CCID3
 config IP_DCCP_CCID3_DEBUG
 	  bool "CCID3 debugging messages"
-	  depends on IP_DCCP_CCID3
 	  ---help---
 	    Enable CCID3-specific debugging messages.
 
@@ -78,7 +77,6 @@ config IP_DCCP_CCID3_DEBUG
 config IP_DCCP_CCID3_RTO
 	  int "Use higher bound for nofeedback timer"
 	  default 100
-	  depends on IP_DCCP_CCID3 && EXPERIMENTAL
 	  ---help---
 	    Use higher lower bound for nofeedback timer expiration.
 
@@ -105,6 +103,7 @@ config IP_DCCP_CCID3_RTO
 	    The purpose of the nofeedback timer is to slow DCCP down when there
 	    is serious network congestion: experimenting with larger values should
 	    therefore not be performed on WANs.
+endif	# IP_DCCP_CCID3
 
 config IP_DCCP_TFRC_LIB
 	tristate
-- 
cgit v1.2.3-70-g09d2


From c8f41d50adc380bfb38538ce39ca0ffea5926221 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Measuring the packet size s with regard to rfc3448bis-06

rfc3448bis allows three different ways of tracking the packet size `s':

 1. using the MSS/MPS (at initialisation, 4.2, and in 4.1 (1));
 2. using the average of `s' (in 4.1);
 3. using the maximum of `s' (in 4.2).

Instead of hard-coding a single interpretation of rfc3448bis, this implements
a choice of all three alternatives and suggests the first as default, since it
is the option which is most consistent with other parts of the specification.

The patch further deprecates the update of t_ipi whenever `s' changes. The
gains of doing this are only small since a change of s takes effect at the
next instant X is updated:
 * when the next feedback comes in (within one RTT or less);
 * when the nofeedback timer expires (within at most 4 RTTs).

Further, there are complications caused by updating t_ipi whenever s changes:
 * if t_ipi had previously been updated to effect oscillation prevention (4.5),
   then it is impossible to make the same adjustment to t_ipi again, thus
   counter-acting the algorithm;
 * s may be updated any time and a modification of t_ipi depends on the current
   state (e.g. no oscillation prevention is done in the absence of feedback);
 * in rev-06 of rfc3448bis, there are more possible cases, depending on whether
   the sender is in slow-start (t_ipi <= R/W_init), or in congestion-avoidance,
   limited by X_recv or the throughput equation (t_ipi <= t_mbi).

Thus there are side effects of always updating t_ipi as s changes. These may not
be desirable. The only case I can think of where such an update makes sense is
to recompute X_calc when p > 0 and when s changes (not done by this patch).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/Kconfig | 20 ++++++++++++++++++++
 net/dccp/ccids/ccid3.c | 27 +++++++++++++++------------
 2 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index dc973abe03b..fb168be2cb4 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -74,6 +74,26 @@ config IP_DCCP_CCID3_DEBUG
 
 	    If in doubt, say N.
 
+choice
+	  prompt "Select method for measuring the packet size s"
+	  default IP_DCCP_CCID3_MEASURE_S_AS_MPS
+
+config IP_DCCP_CCID3_MEASURE_S_AS_MPS
+	  bool "Always use MPS in place of s"
+	  ---help---
+	    This use is recommended as it is consistent with the initialisation
+	    of X and suggested when s varies (rfc3448bis, (1) in section 4.1).
+config IP_DCCP_CCID3_MEASURE_S_AS_AVG
+	  bool "Use moving average"
+	  ---help---
+	    An alternative way of tracking s, also supported by rfc3448bis.
+	    This used to be the default for CCID-3 in previous kernels.
+config IP_DCCP_CCID3_MEASURE_S_AS_MAX
+	  bool "Track the maximum payload length"
+	  ---help---
+	    An experimental method based on tracking the maximum packet size.
+endchoice
+
 config IP_DCCP_CCID3_RTO
 	  int "Use higher bound for nofeedback timer"
 	  default 100
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d654264c510..d77d3e664b7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -136,17 +136,18 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 }
 
 /*
- *	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
- *	@len: DCCP packet payload size in bytes
+ * ccid3_hc_tx_measure_packet_size  -  Measuring the packet size `s' (sec 4.1)
+ * @new_len: DCCP payload size in bytes (not used by all methods)
  */
-static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
+static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len)
 {
-	const u16 old_s = hctx->s;
-
-	hctx->s = tfrc_ewma(hctx->s, len, 9);
-
-	if (hctx->s != old_s)
-		ccid3_update_send_interval(hctx);
+#if   defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_AVG)
+	return tfrc_ewma(ccid3_hc_tx_sk(sk)->s, new_len, 9);
+#elif defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MAX)
+	return max(ccid3_hc_tx_sk(sk)->s, new_len);
+#else /* CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MPS	*/
+	return dccp_sk(sk)->dccps_mss_cache;
+#endif
 }
 
 /*
@@ -271,8 +272,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		/* Set t_0 for initial packet */
 		hctx->t_nom = now;
 
-		hctx->s = skb->len;
-
 		/*
 		 * Use initial RTT sample when available: recommended by erratum
 		 * to RFC 4342. This implements the initialisation procedure of
@@ -294,6 +293,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			hctx->x	  = dp->dccps_mss_cache;
 			hctx->x <<= 6;
 		}
+
+		/* Compute t_ipi = s / X */
+		hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len);
 		ccid3_update_send_interval(hctx);
 
 	} else {
@@ -326,7 +328,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
-	ccid3_hc_tx_update_s(hctx, len);
+	/* Changes to s will become effective the next time X is computed */
+	hctx->s = ccid3_hc_tx_measure_packet_size(sk, len);
 
 	if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss))
 		DCCP_CRIT("packet history - out of memory!");
-- 
cgit v1.2.3-70-g09d2


From 53ac9570c8145710aaed9e1eb850c2e991a4ebc1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Simplify computing and range-checking of t_ipi

This patch simplifies the computation of t_ipi, avoiding expensive computations
to enforce the minimum sending rate.

Both RFC 3448 and rfc3448bis (revision #06), as well as RFC 4342 sec 5., require
at various stages that at least one packet must be sent per t_mbi = 64 seconds.
This requires frequent divisions of the type X_min = s/t_mbi, which are later
converted back into an inter-packet-interval t_ipi_max = s/X_min = t_mbi.

The patch removes the expensive indirection; in the unlikely case of having
a sending rate less than one packet per 64 seconds, it also re-adjusts X.

The following cases document conformance with RFC 3448  / rfc3448bis-06:
 1) Time until receiving the first feedback packet:
   * if the sender has no initial RTT sample then X = s/1 Bps > s/t_mbi;
   * if the sender has an initial RTT sample or when the first feedback
     packet is received, X = W_init/R > s/t_mbi.

 2) Slow-start (p == 0 and feedback packets come in):
   * RFC 3448  (current code) enforces a minimum of s/R > s/t_mbi;
   * rfc3448bis (future code) enforces an even higher minimum of W_init/R.

 3) Congestion avoidance with no absence of feedback (p > 0):
   * when X_calc or X_recv/2 are too low, the minimum of X_min = s/t_mbi
     is enforced in update_x() when calling update_send_interval();
   * update_send_interval() is, as before, only called when X changes
     (i.e. either when increasing or decreasing, not when in equilibrium).

 4) Reduction of X without prior feedback or during slow-start (p==0):
   * both RFC 3448 and rfc3448bis here halve X directly;
   * the associated constraint X >= s/t_mbi is nforced here by send_interval().

 5) Reduction of X when p > 0:
   * X is modified indirectly via X_recv (RFC 3448) or X_recv_set (rfc3448bis);
   * in both cases, control goes back to section 4.3 (in both documents);
   * since p > 0, both documents use X = max(min(...), s/t_mbi), which is
     enforced in this patch by calling send_interval() from update_x().

I think that this analysis is exhaustive. Should I have forgotten a case,
the worst-case consideration arises when X sinks below s/t_mbi, and is then
increased back up to this minimum value. Even under this assumption, the
behaviour is correct, since all lower limits of X in RFC 3448 / rfc3448bis
are either equal to or greater than s/t_mbi.

Note on the condition X >= s/t_mbi  <==> t_ipi = s/X <= t_mbi: since X is
scaled by 64, and all time units are in microseconds, the coded condition is:

    t_ipi = s * 64 * 10^6 usec / X <= 64 * 10^6 usec

This simplifies to s / X <= 1 second <==> X * 1 second >= s > 0.
(A zero `s' is not allowed by the CCID-3 code).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d77d3e664b7..7cd76c6c790 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -66,15 +66,15 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
 }
 
 /**
- * ccid3_update_send_interval  -  Calculate new t_ipi = s / X_inst
- * This respects the granularity of X_inst (64 * bytes/second).
+ * ccid3_update_send_interval  -  Calculate new t_ipi = s / X
+ * This respects the granularity of X (64 * bytes/second) and enforces the
+ * scaled minimum of s * 64 / t_mbi = `s' bytes/second as per RFC 3448/4342.
  */
 static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
+	if (unlikely(hctx->x <= hctx->s))
+		hctx->x	= hctx->s;
 	hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x);
-
-	ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hctx->t_ipi,
-		       hctx->s, (unsigned)(hctx->x >> 6));
 }
 
 static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
@@ -115,7 +115,6 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 	if (hctx->p > 0) {
 
 		hctx->x = min(((u64)hctx->x_calc) << 6, min_rate);
-		hctx->x = max(hctx->x, (((u64)hctx->s) << 6) / TFRC_T_MBI);
 
 	} else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) {
 
@@ -197,8 +196,9 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	if (hctx->t_rto == 0 || hctx->p == 0) {
 
 		/* halve send rate directly */
-		hctx->x = max(hctx->x / 2, (((u64)hctx->s) << 6) / TFRC_T_MBI);
+		hctx->x /= 2;
 		ccid3_update_send_interval(hctx);
+
 	} else {
 		/*
 		 *  Modify the cached value of X_recv
@@ -213,9 +213,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		BUG_ON(hctx->p && !hctx->x_calc);
 
 		if (hctx->x_calc > (hctx->x_recv >> 5))
-			hctx->x_recv =
-				max(hctx->x_recv / 2,
-				    (((__u64)hctx->s) << 6) / (2 * TFRC_T_MBI));
+			hctx->x_recv /= 2;
 		else {
 			hctx->x_recv = hctx->x_calc;
 			hctx->x_recv <<= 4;
-- 
cgit v1.2.3-70-g09d2


From a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 4 Sep 2008 07:30:19 +0200
Subject: dccp ccid-3: Preventing Oscillations

This implements [RFC 3448, 4.5], which performs congestion avoidance behaviour
by reducing the transmit rate as the queueing delay (measured in terms of
long-term RTT) increases.

Oscillation can be turned on/off via a module option (do_osc_prev) and via sysfs
(using mode 0644), the default is off.

Overflow analysis:
------------------
 * oscillation prevention is done after update_x(), so that t_ipi <= 64000;
 * hence the multiplication "t_ipi * sqrt(R_sample)" needs 64 bits;
 * done using u64 for sqrt_sample and explicit typecast of t_ipi;
 * the divisor, R_sqmean, is non-zero because oscillation prevention is first
   called when receiving the second feedback packet, and tfrc_scaled_rtt() > 0.

A detailed discussion of the algorithm (with plots) is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3/sender_notes/oscillation_prevention/

The algorithm has negative side effects:
  * when allowing to decrease t_ipi (leads to a large RTT) and
  * when using it during slow-start;
both uses are therefore disabled.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 net/dccp/ccids/ccid3.h    |  6 ++++--
 net/dccp/ccids/lib/tfrc.h | 15 +++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 7cd76c6c790..06cfdad84a6 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,6 +49,8 @@ static int ccid3_debug;
 /*
  *	Transmitter Half-Connection Routines
  */
+/* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */
+static int do_osc_prev = true;
 
 /*
  * Compute the initial sending rate X_init in the manner of RFC 3390:
@@ -296,6 +298,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len);
 		ccid3_update_send_interval(hctx);
 
+		/* Seed value for Oscillation Prevention (sec. 4.5) */
+		hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt);
+
 	} else {
 		delay = ktime_us_delta(hctx->t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
@@ -400,6 +405,38 @@ done_computing_x:
 			       hctx->s, hctx->p, hctx->x_calc,
 			       (unsigned)(hctx->x_recv >> 6),
 			       (unsigned)(hctx->x >> 6));
+	/*
+	 * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to
+	 * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This
+	 * can be useful if few connections share a link, avoiding that buffer
+	 * fill levels (RTT) oscillate as a result of frequent adjustments to X.
+	 * A useful presentation with background information is in
+	 *    Joerg Widmer, "Equation-Based Congestion Control",
+	 *    MSc Thesis, University of Mannheim, Germany, 2000
+	 * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation").
+	 */
+	if (do_osc_prev) {
+		r_sample = tfrc_scaled_sqrt(r_sample);
+		/*
+		 * The modulation can work in both ways: increase/decrease t_ipi
+		 * according to long-term increases/decreases of the RTT. The
+		 * former is a useful measure, since it works against queue
+		 * build-up. The latter temporarily increases the sending rate,
+		 * so that buffers fill up more quickly. This in turn causes
+		 * the RTT to increase, so that either later reduction becomes
+		 * necessary or the RTT stays at a very high level. Decreasing
+		 * t_ipi is therefore not supported.
+		 * Furthermore, during the initial slow-start phase the RTT
+		 * naturally increases, where using the algorithm would cause
+		 * delays. Hence it is disabled during the initial slow-start.
+		 */
+		if (r_sample > hctx->r_sqmean && hctx->p > 0)
+			hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample,
+					      hctx->r_sqmean);
+		hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI);
+		/* update R_sqmean _after_ computing the modulation factor */
+		hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9);
+	}
 
 	/* unschedule no feedback timer */
 	sk_stop_timer(sk, &hctx->no_feedback_timer);
@@ -749,6 +786,9 @@ static struct ccid_operations ccid3 = {
 	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
 };
 
+module_param(do_osc_prev, bool, 0644);
+MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)");
+
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 module_param(ccid3_debug, bool, 0644);
 MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 342235c57bf..af6e1bf937d 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -47,8 +47,8 @@
 /* Two seconds as per RFC 3448 4.2 */
 #define TFRC_INITIAL_TIMEOUT	   (2 * USEC_PER_SEC)
 
-/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
-#define TFRC_T_MBI		   64
+/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
+#define TFRC_T_MBI		   (64 * USEC_PER_SEC)
 
 /*
  * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
@@ -76,6 +76,7 @@ enum ccid3_options {
  * @x_recv - Receive rate    in 64 * bytes per second
  * @x_calc - Calculated rate in bytes per second
  * @rtt - Estimate of current round trip time in usecs
+ * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
  * @p - Current loss event rate (0-1) scaled by 1000000
  * @s - Packet size in bytes
  * @t_rto - Nofeedback Timer setting in usecs
@@ -94,6 +95,7 @@ struct ccid3_hc_tx_sock {
 	u64				x_recv;
 	u32				x_calc;
 	u32				rtt;
+	u16				r_sqmean;
 	u32				p;
 	u32				t_rto;
 	u32				t_ipi;
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index bb47146ac7d..ede12f53de5 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -47,6 +47,21 @@ static inline u32 scaled_div32(u64 a, u64 b)
 	return result;
 }
 
+/**
+ * tfrc_scaled_sqrt  -  Compute scaled integer sqrt(x) for 0 < x < 2^22-1
+ * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
+ * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
+ * clamped RTT samples (dccp_sample_rtt).
+ * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
+ * scaling factor is neutralised. For this purpose, it avoids returning zero.
+ */
+static inline u16 tfrc_scaled_sqrt(const u32 sample)
+{
+	const unsigned long non_zero_sample = sample ? : 1;
+
+	return int_sqrt(non_zero_sample << 10);
+}
+
 /**
  * tfrc_ewma  -  Exponentially weighted moving average
  * @weight: Weight to be used as damping factor, in units of 1/10
-- 
cgit v1.2.3-70-g09d2


From fab0de02fb0da83b90cec7fce4294747d86d5c6f Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:32 +0200
Subject: IPVS: Add CONFIG_IP_VS_IPV6 option for IPv6 support

Add boolean config option CONFIG_IP_VS_IPV6 for enabling experimental IPv6
support in IPVS. Only visible if IPv6 support is set to 'y' or both IPv6
and IPVS are modules.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 2e48a7e2722..794cecb249a 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -24,6 +24,14 @@ menuconfig IP_VS
 
 if IP_VS
 
+config	IP_VS_IPV6
+	bool "IPv6 support for IPVS (DANGEROUS)"
+	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+	---help---
+	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+	  Say N if unsure.
+
 config	IP_VS_DEBUG
 	bool "IP virtual server debugging"
 	---help---
-- 
cgit v1.2.3-70-g09d2


From e7ade46a53055c19a01c8becbe7807f9075d6fee Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:33 +0200
Subject: IPVS: Change IPVS data structures to support IPv6 addresses

Introduce new 'af' fields into IPVS data structures for specifying an
entry's address family. Convert IP addresses to be of type union
nf_inet_addr.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             | 18 ++++++++-----
 net/ipv4/ipvs/ip_vs_conn.c      | 60 ++++++++++++++++++++---------------------
 net/ipv4/ipvs/ip_vs_core.c      | 28 +++++++++----------
 net/ipv4/ipvs/ip_vs_ctl.c       | 37 ++++++++++++-------------
 net/ipv4/ipvs/ip_vs_dh.c        |  2 +-
 net/ipv4/ipvs/ip_vs_ftp.c       | 18 ++++++-------
 net/ipv4/ipvs/ip_vs_lblc.c      |  4 +--
 net/ipv4/ipvs/ip_vs_lblcr.c     |  8 +++---
 net/ipv4/ipvs/ip_vs_lc.c        |  2 +-
 net/ipv4/ipvs/ip_vs_nq.c        |  2 +-
 net/ipv4/ipvs/ip_vs_proto_tcp.c | 16 +++++------
 net/ipv4/ipvs/ip_vs_proto_udp.c | 12 ++++-----
 net/ipv4/ipvs/ip_vs_rr.c        |  2 +-
 net/ipv4/ipvs/ip_vs_sed.c       |  2 +-
 net/ipv4/ipvs/ip_vs_sh.c        |  2 +-
 net/ipv4/ipvs/ip_vs_sync.c      |  6 ++---
 net/ipv4/ipvs/ip_vs_wlc.c       |  2 +-
 net/ipv4/ipvs/ip_vs_wrr.c       |  2 +-
 net/ipv4/ipvs/ip_vs_xmit.c      | 12 ++++-----
 19 files changed, 121 insertions(+), 114 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a25ad243031..d3282558550 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -21,6 +21,9 @@
 #include <linux/timer.h>
 
 #include <net/checksum.h>
+#include <linux/netfilter.h>		/* for union nf_inet_addr */
+#include <linux/ipv6.h>			/* for struct ipv6hdr */
+#include <net/ipv6.h>			/* for ipv6_addr_copy */
 
 #ifdef CONFIG_IP_VS_DEBUG
 #include <linux/net.h>
@@ -259,9 +262,10 @@ struct ip_vs_conn {
 	struct list_head        c_list;         /* hashed list heads */
 
 	/* Protocol, addresses and port numbers */
-	__be32                   caddr;          /* client address */
-	__be32                   vaddr;          /* virtual address */
-	__be32                   daddr;          /* destination address */
+	u16                      af;		/* address family */
+	union nf_inet_addr       caddr;          /* client address */
+	union nf_inet_addr       vaddr;          /* virtual address */
+	union nf_inet_addr       daddr;          /* destination address */
 	__be16                   cport;
 	__be16                   vport;
 	__be16                   dport;
@@ -314,8 +318,9 @@ struct ip_vs_service {
 	atomic_t		refcnt;   /* reference counter */
 	atomic_t		usecnt;   /* use counter */
 
+	u16			af;       /* address family */
 	__u16			protocol; /* which protocol (TCP/UDP) */
-	__be32			addr;	  /* IP address for virtual service */
+	union nf_inet_addr	addr;	  /* IP address for virtual service */
 	__be16			port;	  /* port number for the service */
 	__u32                   fwmark;   /* firewall mark of the service */
 	unsigned		flags;	  /* service status flags */
@@ -342,7 +347,8 @@ struct ip_vs_dest {
 	struct list_head	n_list;   /* for the dests in the service */
 	struct list_head	d_list;   /* for table with all the dests */
 
-	__be32			addr;		/* IP address of the server */
+	u16			af;		/* address family */
+	union nf_inet_addr	addr;		/* IP address of the server */
 	__be16			port;		/* port number of the server */
 	volatile unsigned	flags;		/* dest status flags */
 	atomic_t		conn_flags;	/* flags to copy to conn */
@@ -366,7 +372,7 @@ struct ip_vs_dest {
 	/* for virtual service */
 	struct ip_vs_service	*svc;		/* service it belongs to */
 	__u16			protocol;	/* which protocol (TCP/UDP) */
-	__be32			vaddr;		/* virtual IP address */
+	union nf_inet_addr	vaddr;		/* virtual IP address */
 	__be16			vport;		/* virtual port number */
 	__u32			vfwmark;	/* firewall mark of service */
 };
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 44a6872dc24..639d4bc7fc1 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -131,7 +131,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	int ret;
 
 	/* Hash by protocol, client address and port */
-	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport);
 
 	ct_write_lock(hash);
 
@@ -162,7 +162,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	int ret;
 
 	/* unhash it and decrease its reference counter */
-	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport);
 
 	ct_write_lock(hash);
 
@@ -197,10 +197,10 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (s_addr==cp->caddr && s_port==cp->cport &&
-		    d_port==cp->vport && d_addr==cp->vaddr &&
+		if (s_addr == cp->caddr.ip && s_port == cp->cport &&
+		    d_port == cp->vport && d_addr == cp->vaddr.ip &&
 		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-		    protocol==cp->protocol) {
+		    protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ct_read_unlock(hash);
@@ -243,10 +243,10 @@ struct ip_vs_conn *ip_vs_ct_in_get
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (s_addr==cp->caddr && s_port==cp->cport &&
-		    d_port==cp->vport && d_addr==cp->vaddr &&
+		if (s_addr == cp->caddr.ip && s_port == cp->cport &&
+		    d_port == cp->vport && d_addr == cp->vaddr.ip &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-		    protocol==cp->protocol) {
+		    protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			goto out;
@@ -286,8 +286,8 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (d_addr == cp->caddr && d_port == cp->cport &&
-		    s_port == cp->dport && s_addr == cp->daddr &&
+		if (d_addr == cp->caddr.ip && d_port == cp->cport &&
+		    s_port == cp->dport && s_addr == cp->daddr.ip &&
 		    protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
@@ -406,9 +406,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
 		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
-		  NIPQUAD(cp->caddr), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
 		  ip_vs_fwd_tag(cp), cp->state,
 		  cp->flags, atomic_read(&cp->refcnt),
 		  atomic_read(&dest->refcnt));
@@ -444,8 +444,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 	struct ip_vs_dest *dest;
 
 	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->daddr, cp->dport,
-				       cp->vaddr, cp->vport, cp->protocol);
+		dest = ip_vs_find_dest(cp->daddr.ip, cp->dport,
+				       cp->vaddr.ip, cp->vport, cp->protocol);
 		ip_vs_bind_dest(cp, dest);
 		return dest;
 	} else
@@ -468,9 +468,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
 		  "dest->refcnt:%d\n",
 		  ip_vs_proto_name(cp->protocol),
-		  NIPQUAD(cp->caddr), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
 		  ip_vs_fwd_tag(cp), cp->state,
 		  cp->flags, atomic_read(&cp->refcnt),
 		  atomic_read(&dest->refcnt));
@@ -530,9 +530,9 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 			  "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
 			  "-> d:%u.%u.%u.%u:%d\n",
 			  ip_vs_proto_name(ct->protocol),
-			  NIPQUAD(ct->caddr), ntohs(ct->cport),
-			  NIPQUAD(ct->vaddr), ntohs(ct->vport),
-			  NIPQUAD(ct->daddr), ntohs(ct->dport));
+			  NIPQUAD(ct->caddr.ip), ntohs(ct->cport),
+			  NIPQUAD(ct->vaddr.ip), ntohs(ct->vport),
+			  NIPQUAD(ct->daddr.ip), ntohs(ct->dport));
 
 		/*
 		 * Invalidate the connection template
@@ -641,11 +641,11 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
 	INIT_LIST_HEAD(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
 	cp->protocol	   = proto;
-	cp->caddr	   = caddr;
+	cp->caddr.ip	   = caddr;
 	cp->cport	   = cport;
-	cp->vaddr	   = vaddr;
+	cp->vaddr.ip	   = vaddr;
 	cp->vport	   = vport;
-	cp->daddr          = daddr;
+	cp->daddr.ip	   = daddr;
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	spin_lock_init(&cp->lock);
@@ -763,9 +763,9 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq,
 			"%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
-				ntohl(cp->caddr), ntohs(cp->cport),
-				ntohl(cp->vaddr), ntohs(cp->vport),
-				ntohl(cp->daddr), ntohs(cp->dport),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
 				ip_vs_state_name(cp->protocol, cp->state),
 				(cp->timer.expires-jiffies)/HZ);
 	}
@@ -812,9 +812,9 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq,
 			"%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
-				ntohl(cp->caddr), ntohs(cp->cport),
-				ntohl(cp->vaddr), ntohs(cp->vport),
-				ntohl(cp->daddr), ntohs(cp->dport),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
 				ip_vs_state_name(cp->protocol, cp->state),
 				ip_vs_origin_name(cp->flags),
 				(cp->timer.expires-jiffies)/HZ);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 9fbf0a6d739..4a54f33b60d 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -232,14 +232,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    snet, 0,
 						    iph->daddr,
 						    ports[1],
-						    dest->addr, dest->port,
+						    dest->addr.ip, dest->port,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
-						    dest->addr, 0,
+						    dest->addr.ip, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
@@ -286,14 +286,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				ct = ip_vs_conn_new(IPPROTO_IP,
 						    snet, 0,
 						    htonl(svc->fwmark), 0,
-						    dest->addr, 0,
+						    dest->addr.ip, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
-						    dest->addr, 0,
+						    dest->addr.ip, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
@@ -313,7 +313,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	cp = ip_vs_conn_new(iph->protocol,
 			    iph->saddr, ports[0],
 			    iph->daddr, ports[1],
-			    dest->addr, dport,
+			    dest->addr.ip, dport,
 			    0,
 			    dest);
 	if (cp == NULL) {
@@ -380,7 +380,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	cp = ip_vs_conn_new(iph->protocol,
 			    iph->saddr, pptr[0],
 			    iph->daddr, pptr[1],
-			    dest->addr, dest->port?dest->port:pptr[1],
+			    dest->addr.ip, dest->port ? dest->port : pptr[1],
 			    0,
 			    dest);
 	if (cp == NULL)
@@ -389,9 +389,9 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
 		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
 		  ip_vs_fwd_tag(cp),
-		  NIPQUAD(cp->caddr), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
 		  cp->flags, atomic_read(&cp->refcnt));
 
 	ip_vs_conn_stats(cp, svc);
@@ -526,14 +526,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
 
 	if (inout) {
-		iph->saddr = cp->vaddr;
+		iph->saddr = cp->vaddr.ip;
 		ip_send_check(iph);
-		ciph->daddr = cp->vaddr;
+		ciph->daddr = cp->vaddr.ip;
 		ip_send_check(ciph);
 	} else {
-		iph->daddr = cp->daddr;
+		iph->daddr = cp->daddr.ip;
 		ip_send_check(iph);
-		ciph->saddr = cp->daddr;
+		ciph->saddr = cp->daddr.ip;
 		ip_send_check(ciph);
 	}
 
@@ -762,7 +762,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	/* mangle the packet */
 	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
 		goto drop;
-	ip_hdr(skb)->saddr = cp->vaddr;
+	ip_hdr(skb)->saddr = cp->vaddr.ip;
 	ip_send_check(ip_hdr(skb));
 
 	/* For policy routing, packets originating from this
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index ede101eeec1..3f2277b847d 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -317,7 +317,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 		/*
 		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
 		 */
-		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
+		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip,
+					 svc->port);
 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
 		/*
@@ -373,7 +374,7 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
 	hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
 
 	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
-		if ((svc->addr == vaddr)
+		if ((svc->addr.ip == vaddr)
 		    && (svc->port == vport)
 		    && (svc->protocol == protocol)) {
 			/* HIT */
@@ -503,7 +504,7 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 	 *	Hash by proto,addr,port,
 	 *	which are the parameters of the real service.
 	 */
-	hash = ip_vs_rs_hashkey(dest->addr, dest->port);
+	hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
 	list_add(&dest->d_list, &ip_vs_rtable[hash]);
 
 	return 1;
@@ -543,7 +544,7 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
 
 	read_lock(&__ip_vs_rs_lock);
 	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
-		if ((dest->addr == daddr)
+		if ((dest->addr.ip == daddr)
 		    && (dest->port == dport)
 		    && ((dest->protocol == protocol) ||
 			dest->vfwmark)) {
@@ -569,7 +570,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 	 * Find the destination for the given service
 	 */
 	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if ((dest->addr == daddr) && (dest->port == dport)) {
+		if ((dest->addr.ip == daddr) && (dest->port == dport)) {
 			/* HIT */
 			return dest;
 		}
@@ -626,14 +627,14 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
 			  "dest->refcnt=%d\n",
 			  dest->vfwmark,
-			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  NIPQUAD(dest->addr.ip), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
-		if (dest->addr == daddr &&
+		if (dest->addr.ip == daddr &&
 		    dest->port == dport &&
 		    dest->vfwmark == svc->fwmark &&
 		    dest->protocol == svc->protocol &&
 		    (svc->fwmark ||
-		     (dest->vaddr == svc->addr &&
+		     (dest->vaddr.ip == svc->addr.ip &&
 		      dest->vport == svc->port))) {
 			/* HIT */
 			return dest;
@@ -646,7 +647,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 			IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
 				  "from trash\n",
 				  dest->vfwmark,
-				  NIPQUAD(dest->addr), ntohs(dest->port));
+				  NIPQUAD(dest->addr.ip), ntohs(dest->port));
 			list_del(&dest->n_list);
 			ip_vs_dst_reset(dest);
 			__ip_vs_unbind_svc(dest);
@@ -779,10 +780,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 	}
 
 	dest->protocol = svc->protocol;
-	dest->vaddr = svc->addr;
+	dest->vaddr.ip = svc->addr.ip;
 	dest->vport = svc->port;
 	dest->vfwmark = svc->fwmark;
-	dest->addr = udest->addr;
+	dest->addr.ip = udest->addr;
 	dest->port = udest->port;
 
 	atomic_set(&dest->activeconns, 0);
@@ -847,7 +848,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 			  NIPQUAD(daddr), ntohs(dport),
 			  atomic_read(&dest->refcnt),
 			  dest->vfwmark,
-			  NIPQUAD(dest->vaddr),
+			  NIPQUAD(dest->vaddr.ip),
 			  ntohs(dest->vport));
 		__ip_vs_update_dest(svc, dest, udest);
 
@@ -993,7 +994,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 	} else {
 		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
 			  "dest->refcnt=%d\n",
-			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  NIPQUAD(dest->addr.ip), ntohs(dest->port),
 			  atomic_read(&dest->refcnt));
 		list_add(&dest->n_list, &ip_vs_dest_trash);
 		atomic_inc(&dest->refcnt);
@@ -1101,7 +1102,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
 	atomic_set(&svc->refcnt, 0);
 
 	svc->protocol = u->protocol;
-	svc->addr = u->addr;
+	svc->addr.ip = u->addr;
 	svc->port = u->port;
 	svc->fwmark = u->fwmark;
 	svc->flags = u->flags;
@@ -1751,7 +1752,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		if (iter->table == ip_vs_svc_table)
 			seq_printf(seq, "%s  %08X:%04X %s ",
 				   ip_vs_proto_name(svc->protocol),
-				   ntohl(svc->addr),
+				   ntohl(svc->addr.ip),
 				   ntohs(svc->port),
 				   svc->scheduler->name);
 		else
@@ -1768,7 +1769,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		list_for_each_entry(dest, &svc->destinations, n_list) {
 			seq_printf(seq,
 				   "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
-				   ntohl(dest->addr), ntohs(dest->port),
+				   ntohl(dest->addr.ip), ntohs(dest->port),
 				   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
 				   atomic_read(&dest->weight),
 				   atomic_read(&dest->activeconns),
@@ -2040,7 +2041,7 @@ static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
 	dst->protocol = src->protocol;
-	dst->addr = src->addr;
+	dst->addr = src->addr.ip;
 	dst->port = src->port;
 	dst->fwmark = src->fwmark;
 	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
@@ -2114,7 +2115,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 			if (count >= get->num_dests)
 				break;
 
-			entry.addr = dest->addr;
+			entry.addr = dest->addr.ip;
 			entry.port = dest->port;
 			entry.conn_flags = atomic_read(&dest->conn_flags);
 			entry.weight = atomic_read(&dest->weight);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index fa66824d264..9f9d795dbd7 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
 		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr),
+		  NIPQUAD(dest->addr.ip),
 		  ntohs(dest->port));
 
 	return dest;
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index c1c758e4f73..bfe5d7050a5 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -172,17 +172,17 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 		IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
 			  "%u.%u.%u.%u:%d detected\n",
-			  NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
+			  NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr.ip), 0);
 
 		/*
 		 * Now update or create an connection entry for it
 		 */
 		n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
-					  cp->caddr, 0);
+					  cp->caddr.ip, 0);
 		if (!n_cp) {
 			n_cp = ip_vs_conn_new(IPPROTO_TCP,
-					      cp->caddr, 0,
-					      cp->vaddr, port,
+					      cp->caddr.ip, 0,
+					      cp->vaddr.ip, port,
 					      from, port,
 					      IP_VS_CONN_F_NO_CPORT,
 					      cp->dest);
@@ -196,7 +196,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		/*
 		 * Replace the old passive address with the new one
 		 */
-		from = n_cp->vaddr;
+		from = n_cp->vaddr.ip;
 		port = n_cp->vport;
 		sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
 			(ntohs(port)>>8)&255, ntohs(port)&255);
@@ -306,16 +306,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	 */
 	IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
 		  ip_vs_proto_name(iph->protocol),
-		  NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
+		  NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
 
 	n_cp = ip_vs_conn_in_get(iph->protocol,
 				 to, port,
-				 cp->vaddr, htons(ntohs(cp->vport)-1));
+				 cp->vaddr.ip, htons(ntohs(cp->vport)-1));
 	if (!n_cp) {
 		n_cp = ip_vs_conn_new(IPPROTO_TCP,
 				      to, port,
-				      cp->vaddr, htons(ntohs(cp->vport)-1),
-				      cp->daddr, htons(ntohs(cp->dport)-1),
+				      cp->vaddr.ip, htons(ntohs(cp->vport)-1),
+				      cp->daddr.ip, htons(ntohs(cp->dport)-1),
 				      0,
 				      cp->dest);
 		if (!n_cp)
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index d2a43aa3fe4..69309edc0c4 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -422,7 +422,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 
 	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
@@ -506,7 +506,7 @@ out:
 	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
 		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr),
+		  NIPQUAD(dest->addr.ip),
 		  ntohs(dest->port));
 
 	return dest;
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 375a1ffb6b6..51c746e2083 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
@@ -250,7 +250,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(most->addr), ntohs(most->port),
+		  NIPQUAD(most->addr.ip), ntohs(most->port),
 		  atomic_read(&most->activeconns),
 		  atomic_read(&most->refcnt),
 		  atomic_read(&most->weight), moh);
@@ -598,7 +598,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 
 	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
@@ -706,7 +706,7 @@ out:
 	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
 		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr),
+		  NIPQUAD(dest->addr.ip),
 		  ntohs(dest->port));
 
 	return dest;
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index 2c3de1b6351..551d293347f 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -68,7 +68,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	if (least)
 	IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->inactconns));
 
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index 5330d5a2de1..aa0e32ad348 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -101,7 +101,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
   out:
 	IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index d0ea467986a..15860e1441b 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -147,7 +147,7 @@ tcp_snat_handler(struct sk_buff *skb,
 	/* Adjust TCP checksums */
 	if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
+		tcp_fast_csum_update(tcph, cp->daddr.ip, cp->vaddr.ip,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -155,7 +155,7 @@ tcp_snat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		tcph->check = 0;
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-		tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
+		tcph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip,
 						skb->len - tcphoff,
 						cp->protocol, skb->csum);
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -198,7 +198,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
+		tcp_fast_csum_update(tcph, cp->vaddr.ip, cp->daddr.ip,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -206,7 +206,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		tcph->check = 0;
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-		tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
+		tcph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip,
 						skb->len - tcphoff,
 						cp->protocol, skb->csum);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -427,8 +427,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 			  th->fin? 'F' : '.',
 			  th->ack? 'A' : '.',
 			  th->rst? 'R' : '.',
-			  NIPQUAD(cp->daddr), ntohs(cp->dport),
-			  NIPQUAD(cp->caddr), ntohs(cp->cport),
+			  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
+			  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
 			  tcp_state_name(cp->state),
 			  tcp_state_name(new_state),
 			  atomic_read(&cp->refcnt));
@@ -549,8 +549,8 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
 				  "%u.%u.%u.%u:%u to app %s on port %u\n",
 				  __func__,
-				  NIPQUAD(cp->caddr), ntohs(cp->cport),
-				  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+				  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+				  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
 				  inc->name, ntohs(inc->port));
 			cp->app = inc;
 			if (inc->init_conn)
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index c6be5d56823..8dfad5db829 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -158,7 +158,7 @@ udp_snat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
+		udp_fast_csum_update(udph, cp->daddr.ip, cp->vaddr.ip,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -166,7 +166,7 @@ udp_snat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		udph->check = 0;
 		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-		udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
+		udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip,
 						skb->len - udphoff,
 						cp->protocol, skb->csum);
 		if (udph->check == 0)
@@ -211,7 +211,7 @@ udp_dnat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
+		udp_fast_csum_update(udph, cp->vaddr.ip, cp->daddr.ip,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -219,7 +219,7 @@ udp_dnat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		udph->check = 0;
 		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-		udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
+		udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip,
 						skb->len - udphoff,
 						cp->protocol, skb->csum);
 		if (udph->check == 0)
@@ -343,8 +343,8 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
 				  "%u.%u.%u.%u:%u to app %s on port %u\n",
 				  __func__,
-				  NIPQUAD(cp->caddr), ntohs(cp->cport),
-				  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+				  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
+				  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
 				  inc->name, ntohs(inc->port));
 			cp->app = inc;
 			if (inc->init_conn)
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index f7492911753..27f0b624283 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -76,7 +76,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	write_unlock(&svc->sched_lock);
 	IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
 		  "activeconns %d refcnt %d weight %d\n",
-		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  NIPQUAD(dest->addr.ip), ntohs(dest->port),
 		  atomic_read(&dest->activeconns),
 		  atomic_read(&dest->refcnt), atomic_read(&dest->weight));
 
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 53f73bea66c..38b574b2fdf 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -103,7 +103,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 7b979e22805..c9e54e2ec29 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
 		  "--> server %u.%u.%u.%u:%d\n",
 		  NIPQUAD(iph->saddr),
-		  NIPQUAD(dest->addr),
+		  NIPQUAD(dest->addr.ip),
 		  ntohs(dest->port));
 
 	return dest;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index a652da2c320..2cf47b2e166 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 	s->cport = cp->cport;
 	s->vport = cp->vport;
 	s->dport = cp->dport;
-	s->caddr = cp->caddr;
-	s->vaddr = cp->vaddr;
-	s->daddr = cp->daddr;
+	s->caddr = cp->caddr.ip;
+	s->vaddr = cp->vaddr.ip;
+	s->daddr = cp->daddr.ip;
 	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
 	s->state = htons(cp->state);
 	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index df7ad8d7476..09fd993040f 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -91,7 +91,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
 		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr), ntohs(least->port),
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
 		  atomic_read(&least->activeconns),
 		  atomic_read(&least->refcnt),
 		  atomic_read(&least->weight), loh);
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 0d86a79b87b..19c49b234f3 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -197,7 +197,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
 		  "activeconns %d refcnt %d weight %d\n",
-		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  NIPQUAD(dest->addr.ip), ntohs(dest->port),
 		  atomic_read(&dest->activeconns),
 		  atomic_read(&dest->refcnt),
 		  atomic_read(&dest->weight));
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 9892d4aca42..88199c9f2d3 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -71,7 +71,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 				.oif = 0,
 				.nl_u = {
 					.ip4_u = {
-						.daddr = dest->addr,
+						.daddr = dest->addr.ip,
 						.saddr = 0,
 						.tos = rtos, } },
 			};
@@ -80,12 +80,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 				spin_unlock(&dest->dst_lock);
 				IP_VS_DBG_RL("ip_route_output error, "
 					     "dest: %u.%u.%u.%u\n",
-					     NIPQUAD(dest->addr));
+					     NIPQUAD(dest->addr.ip));
 				return NULL;
 			}
 			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
 			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
-				  NIPQUAD(dest->addr),
+				  NIPQUAD(dest->addr.ip),
 				  atomic_read(&rt->u.dst.__refcnt), rtos);
 		}
 		spin_unlock(&dest->dst_lock);
@@ -94,14 +94,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 			.oif = 0,
 			.nl_u = {
 				.ip4_u = {
-					.daddr = cp->daddr,
+					.daddr = cp->daddr.ip,
 					.saddr = 0,
 					.tos = rtos, } },
 		};
 
 		if (ip_route_output_key(&init_net, &rt, &fl)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: "
-				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
 			return NULL;
 		}
 	}
@@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
-	ip_hdr(skb)->daddr = cp->daddr;
+	ip_hdr(skb)->daddr = cp->daddr.ip;
 	ip_send_check(ip_hdr(skb));
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
-- 
cgit v1.2.3-70-g09d2


From c860c6b1479992440e4962e9c95d258bfdce4fca Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:36 +0200
Subject: IPVS: Add internal versions of sockopt interface structs

Add extended internal versions of struct ip_vs_service_user and struct
ip_vs_dest_user (the originals can't be modified as they are part
of the old sockopt interface). Adjust ip_vs_ctl.c to work with the new
data structures and add some minor AF-awareness.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h       |  39 +++++++++++++
 net/ipv4/ipvs/ip_vs_ctl.c | 138 ++++++++++++++++++++++++++++++----------------
 2 files changed, 129 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 0400e590b6a..1adf8a026e4 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -399,6 +399,45 @@ struct ip_vs_conn {
 };
 
 
+/*
+ *	Extended internal versions of struct ip_vs_service_user and
+ *	ip_vs_dest_user for IPv6 support.
+ *
+ *	We need these to conveniently pass around service and destination
+ *	options, but unfortunately, we also need to keep the old definitions to
+ *	maintain userspace backwards compatibility for the setsockopt interface.
+ */
+struct ip_vs_service_user_kern {
+	/* virtual service addresses */
+	u16			af;
+	u16			protocol;
+	union nf_inet_addr	addr;		/* virtual ip address */
+	u16			port;
+	u32			fwmark;		/* firwall mark of service */
+
+	/* virtual service options */
+	char			*sched_name;
+	unsigned		flags;		/* virtual service flags */
+	unsigned		timeout;	/* persistent timeout in sec */
+	u32			netmask;	/* persistent netmask */
+};
+
+
+struct ip_vs_dest_user_kern {
+	/* destination server address */
+	union nf_inet_addr	addr;
+	u16			port;
+
+	/* real server options */
+	unsigned		conn_flags;	/* connection flags */
+	int			weight;		/* destination weight */
+
+	/* thresholds for active connections */
+	u32			u_threshold;	/* upper threshold */
+	u32			l_threshold;	/* lower threshold */
+};
+
+
 /*
  *	The information about the virtual service offered to the net
  *	and the forwarding entries
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 3f2277b847d..a0c8b7bb553 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -708,7 +708,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
  */
 static void
 __ip_vs_update_dest(struct ip_vs_service *svc,
-		    struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
+		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
 {
 	int conn_flags;
 
@@ -717,7 +717,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 
 	/* check if local node and update the flags */
-	if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
+	if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
 		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 			| IP_VS_CONN_F_LOCALNODE;
 	}
@@ -761,7 +761,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
  *	Create a destination for the given service
  */
 static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 	       struct ip_vs_dest **dest_p)
 {
 	struct ip_vs_dest *dest;
@@ -769,7 +769,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 
 	EnterFunction(2);
 
-	atype = inet_addr_type(&init_net, udest->addr);
+	atype = inet_addr_type(&init_net, udest->addr.ip);
 	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 		return -EINVAL;
 
@@ -779,11 +779,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 		return -ENOMEM;
 	}
 
+	dest->af = svc->af;
 	dest->protocol = svc->protocol;
-	dest->vaddr.ip = svc->addr.ip;
+	dest->vaddr = svc->addr;
 	dest->vport = svc->port;
 	dest->vfwmark = svc->fwmark;
-	dest->addr.ip = udest->addr;
+	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
 	dest->port = udest->port;
 
 	atomic_set(&dest->activeconns, 0);
@@ -808,10 +809,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
  *	Add a destination into an existing service
  */
 static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
-	__be32 daddr = udest->addr;
+	union nf_inet_addr daddr;
 	__be16 dport = udest->port;
 	int ret;
 
@@ -828,10 +829,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 		return -ERANGE;
 	}
 
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
 	/*
 	 * Check if the dest already exists in the list
 	 */
-	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
 	if (dest != NULL) {
 		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
 		return -EEXIST;
@@ -841,7 +844,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 	 * Check if the dest already exists in the trash and
 	 * is from the same service
 	 */
-	dest = ip_vs_trash_get_dest(svc, daddr, dport);
+	dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
 	if (dest != NULL) {
 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
 			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
@@ -916,10 +919,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
  *	Edit a destination in the given service
  */
 static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
-	__be32 daddr = udest->addr;
+	union nf_inet_addr daddr;
 	__be16 dport = udest->port;
 
 	EnterFunction(2);
@@ -935,10 +938,12 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 		return -ERANGE;
 	}
 
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
 	/*
 	 *  Lookup the destination list
 	 */
-	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
 		return -ENOENT;
@@ -1029,15 +1034,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
  *	Delete a destination server in the given service
  */
 static int
-ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 {
 	struct ip_vs_dest *dest;
-	__be32 daddr = udest->addr;
 	__be16 dport = udest->port;
 
 	EnterFunction(2);
 
-	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
+
 	if (dest == NULL) {
 		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
 		return -ENOENT;
@@ -1072,7 +1077,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
  *	Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+		  struct ip_vs_service **svc_p)
 {
 	int ret = 0;
 	struct ip_vs_scheduler *sched = NULL;
@@ -1101,8 +1107,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
 	atomic_set(&svc->usecnt, 1);
 	atomic_set(&svc->refcnt, 0);
 
+	svc->af = u->af;
 	svc->protocol = u->protocol;
-	svc->addr.ip = u->addr;
+	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
 	svc->port = u->port;
 	svc->fwmark = u->fwmark;
 	svc->flags = u->flags;
@@ -1161,7 +1168,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
  *	Edit a service and bind it with a new scheduler
  */
 static int
-ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 {
 	struct ip_vs_scheduler *sched, *old_sched;
 	int ret = 0;
@@ -1905,14 +1912,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
 	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
 };
 
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+				  struct ip_vs_service_user *usvc_compat)
+{
+	usvc->af		= AF_INET;
+	usvc->protocol		= usvc_compat->protocol;
+	usvc->addr.ip		= usvc_compat->addr;
+	usvc->port		= usvc_compat->port;
+	usvc->fwmark		= usvc_compat->fwmark;
+
+	/* Deep copy of sched_name is not needed here */
+	usvc->sched_name	= usvc_compat->sched_name;
+
+	usvc->flags		= usvc_compat->flags;
+	usvc->timeout		= usvc_compat->timeout;
+	usvc->netmask		= usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+				   struct ip_vs_dest_user *udest_compat)
+{
+	udest->addr.ip		= udest_compat->addr;
+	udest->port		= udest_compat->port;
+	udest->conn_flags	= udest_compat->conn_flags;
+	udest->weight		= udest_compat->weight;
+	udest->u_threshold	= udest_compat->u_threshold;
+	udest->l_threshold	= udest_compat->l_threshold;
+}
+
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
 	int ret;
 	unsigned char arg[MAX_ARG_LEN];
-	struct ip_vs_service_user *usvc;
+	struct ip_vs_service_user *usvc_compat;
+	struct ip_vs_service_user_kern usvc;
 	struct ip_vs_service *svc;
-	struct ip_vs_dest_user *udest;
+	struct ip_vs_dest_user *udest_compat;
+	struct ip_vs_dest_user_kern udest;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1952,35 +1989,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		goto out_unlock;
 	}
 
-	usvc = (struct ip_vs_service_user *)arg;
-	udest = (struct ip_vs_dest_user *)(usvc + 1);
+	usvc_compat = (struct ip_vs_service_user *)arg;
+	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+	/* We only use the new structs internally, so copy userspace compat
+	 * structs to extended internal versions */
+	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+	ip_vs_copy_udest_compat(&udest, udest_compat);
 
 	if (cmd == IP_VS_SO_SET_ZERO) {
 		/* if no service address is set, zero counters in all */
-		if (!usvc->fwmark && !usvc->addr && !usvc->port) {
+		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
 			ret = ip_vs_zero_all();
 			goto out_unlock;
 		}
 	}
 
 	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-	if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
 		IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
-			  usvc->protocol, NIPQUAD(usvc->addr),
-			  ntohs(usvc->port), usvc->sched_name);
+			  usvc.protocol, NIPQUAD(usvc.addr.ip),
+			  ntohs(usvc.port), usvc.sched_name);
 		ret = -EFAULT;
 		goto out_unlock;
 	}
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
-	if (usvc->fwmark == 0)
-		svc = __ip_vs_service_get(usvc->protocol,
-					  usvc->addr, usvc->port);
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol,
+					  usvc.addr.ip, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
 
 	if (cmd != IP_VS_SO_SET_ADD
-	    && (svc == NULL || svc->protocol != usvc->protocol)) {
+	    && (svc == NULL || svc->protocol != usvc.protocol)) {
 		ret = -ESRCH;
 		goto out_unlock;
 	}
@@ -1990,10 +2032,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		if (svc != NULL)
 			ret = -EEXIST;
 		else
-			ret = ip_vs_add_service(usvc, &svc);
+			ret = ip_vs_add_service(&usvc, &svc);
 		break;
 	case IP_VS_SO_SET_EDIT:
-		ret = ip_vs_edit_service(svc, usvc);
+		ret = ip_vs_edit_service(svc, &usvc);
 		break;
 	case IP_VS_SO_SET_DEL:
 		ret = ip_vs_del_service(svc);
@@ -2004,13 +2046,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		ret = ip_vs_zero_service(svc);
 		break;
 	case IP_VS_SO_SET_ADDDEST:
-		ret = ip_vs_add_dest(svc, udest);
+		ret = ip_vs_add_dest(svc, &udest);
 		break;
 	case IP_VS_SO_SET_EDITDEST:
-		ret = ip_vs_edit_dest(svc, udest);
+		ret = ip_vs_edit_dest(svc, &udest);
 		break;
 	case IP_VS_SO_SET_DELDEST:
-		ret = ip_vs_del_dest(svc, udest);
+		ret = ip_vs_del_dest(svc, &udest);
 		break;
 	default:
 		ret = -EINVAL;
@@ -2517,7 +2559,7 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry)
 {
 	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -2537,6 +2579,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
 		return -EINVAL;
 
+	usvc->af = nla_get_u16(nla_af);
 	/* For now, only support IPv4 */
 	if (nla_get_u16(nla_af) != AF_INET)
 		return -EAFNOSUPPORT;
@@ -2572,7 +2615,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 		if (usvc->fwmark)
 			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
 		else
-			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr.ip,
 						  usvc->port);
 		if (svc) {
 			usvc->flags = svc->flags;
@@ -2583,9 +2626,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 		/* set new flags from userland */
 		usvc->flags = (usvc->flags & ~flags.mask) |
 			      (flags.flags & flags.mask);
-
-		strlcpy(usvc->sched_name, nla_data(nla_sched),
-			sizeof(usvc->sched_name));
+		usvc->sched_name = nla_data(nla_sched);
 		usvc->timeout = nla_get_u32(nla_timeout);
 		usvc->netmask = nla_get_u32(nla_netmask);
 	}
@@ -2595,7 +2636,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
 
 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
 {
-	struct ip_vs_service_user usvc;
+	struct ip_vs_service_user_kern usvc;
 	int ret;
 
 	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
@@ -2605,7 +2646,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
 	if (usvc.fwmark)
 		return __ip_vs_svc_fwm_get(usvc.fwmark);
 	else
-		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+		return __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
 					   usvc.port);
 }
 
@@ -2705,7 +2746,7 @@ out_err:
 	return skb->len;
 }
 
-static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 				 struct nlattr *nla, int full_entry)
 {
 	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
@@ -2861,8 +2902,8 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	struct ip_vs_service *svc = NULL;
-	struct ip_vs_service_user usvc;
-	struct ip_vs_dest_user udest;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
 	int need_full_svc = 0, need_full_dest = 0;
 
@@ -2914,7 +2955,8 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
+					  usvc.port);
 	else
 		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
 
-- 
cgit v1.2.3-70-g09d2


From b18610de9ec2728159f723a9b864ca78a5774193 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:37 +0200
Subject: IPVS: Convert __ip_vs_svc_get() and __ip_vs_fwm_get()

Add support for getting services based on their address family to
__ip_vs_service_get(), __ip_vs_fwm_get() and the helper hash function
ip_vs_svc_hashkey(). Adjust the callers.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 79 ++++++++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index a0c8b7bb553..a2d69b2ce6a 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -282,11 +282,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
  *	Returns hash value for virtual service
  */
 static __inline__ unsigned
-ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+		  __be16 port)
 {
 	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
 
-	return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 		& IP_VS_SVC_TAB_MASK;
 }
 
@@ -317,7 +325,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 		/*
 		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
 		 */
-		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip,
+		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
 					 svc->port);
 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
@@ -364,17 +372,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 /*
  *	Get service by {proto,addr,port} in the service table.
  */
-static __inline__ struct ip_vs_service *
-__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+		    __be16 vport)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
 
 	/* Check for "full" addressed entries */
-	hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
+	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
 
 	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
-		if ((svc->addr.ip == vaddr)
+		if ((svc->af == af)
+		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
 		    && (svc->protocol == protocol)) {
 			/* HIT */
@@ -390,7 +400,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
 /*
  *	Get service by {fwmark} in the service table.
  */
-static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
@@ -399,7 +410,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
 	hash = ip_vs_svc_fwm_hashkey(fwmark);
 
 	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-		if (svc->fwmark == fwmark) {
+		if (svc->fwmark == fwmark && svc->af == af) {
 			/* HIT */
 			atomic_inc(&svc->usecnt);
 			return svc;
@@ -413,20 +424,20 @@ struct ip_vs_service *
 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
-
+	union nf_inet_addr _vaddr = { .ip = vaddr };
 	read_lock(&__ip_vs_svc_lock);
 
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark)))
 		goto out;
 
 	/*
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_get(protocol, vaddr, vport);
+	svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport);
 
 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
@@ -436,7 +447,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
+		svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT);
 	}
 
 	if (svc == NULL
@@ -444,7 +455,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_get(protocol, vaddr, 0);
+		svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0);
 	}
 
   out:
@@ -2016,10 +2027,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.protocol,
-					  usvc.addr.ip, usvc.port);
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
 
 	if (cmd != IP_VS_SO_SET_ADD
 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2141,13 +2152,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 			 struct ip_vs_get_dests __user *uptr)
 {
 	struct ip_vs_service *svc;
+	union nf_inet_addr addr = { .ip = get->addr };
 	int ret = 0;
 
 	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_get(get->fwmark);
+		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
 	else
-		svc = __ip_vs_service_get(get->protocol,
-					  get->addr, get->port);
+		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+					  get->port);
+
 	if (svc) {
 		int count = 0;
 		struct ip_vs_dest *dest;
@@ -2282,13 +2295,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_service_entry *entry;
 		struct ip_vs_service *svc;
+		union nf_inet_addr addr;
 
 		entry = (struct ip_vs_service_entry *)arg;
+		addr.ip = entry->addr;
 		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_get(entry->fwmark);
+			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
 		else
-			svc = __ip_vs_service_get(entry->protocol,
-						  entry->addr, entry->port);
+			svc = __ip_vs_service_get(AF_INET, entry->protocol,
+						  &addr, entry->port);
 		if (svc) {
 			ip_vs_copy_service(entry, svc);
 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2613,10 +2628,10 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 
 		/* prefill flags from service if it already exists */
 		if (usvc->fwmark)
-			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
 		else
-			svc = __ip_vs_service_get(usvc->protocol, usvc->addr.ip,
-						  usvc->port);
+			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+						  &usvc->addr, usvc->port);
 		if (svc) {
 			usvc->flags = svc->flags;
 			ip_vs_service_put(svc);
@@ -2644,10 +2659,10 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
 		return ERR_PTR(ret);
 
 	if (usvc.fwmark)
-		return __ip_vs_svc_fwm_get(usvc.fwmark);
+		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
 	else
-		return __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
-					   usvc.port);
+		return __ip_vs_service_get(usvc.af, usvc.protocol,
+					   &usvc.addr, usvc.port);
 }
 
 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2955,10 +2970,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
-					  usvc.port);
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
 
 	/* Unless we're adding a new service, the service must already exist */
 	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
-- 
cgit v1.2.3-70-g09d2


From 3c2e0505d25cdc9425336f167fd4ff5f505aecff Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:38 +0200
Subject: IPVS: Add v6 support to ip_vs_service_get()

Add support for selecting services based on their address family to
ip_vs_service_get() and adjust the callers.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |  3 ++-
 net/ipv4/ipvs/ip_vs_ctl.c       | 24 +++++++++++++-----------
 net/ipv4/ipvs/ip_vs_proto_tcp.c |  9 ++++++---
 net/ipv4/ipvs/ip_vs_proto_udp.c | 11 +++++++----
 4 files changed, 28 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1adf8a026e4..30baed0e696 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -783,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
 
 extern struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+		  const union nf_inet_addr *vaddr, __be16 vport);
 
 static inline void ip_vs_service_put(struct ip_vs_service *svc)
 {
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index a2d69b2ce6a..1f3fc66e694 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -421,23 +421,24 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+		  const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
-	union nf_inet_addr _vaddr = { .ip = vaddr };
+
 	read_lock(&__ip_vs_svc_lock);
 
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark)))
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
 		goto out;
 
 	/*
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport);
+	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
 
 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
@@ -447,7 +448,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT);
+		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
 	}
 
 	if (svc == NULL
@@ -455,16 +456,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0);
+		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
 	}
 
   out:
 	read_unlock(&__ip_vs_svc_lock);
 
-	IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
-		  fwmark, ip_vs_proto_name(protocol),
-		  NIPQUAD(vaddr), ntohs(vport),
-		  svc?"hit":"not hit");
+	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+		      fwmark, ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+		      svc ? "hit" : "not hit");
 
 	return svc;
 }
@@ -605,8 +606,9 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_service *svc;
+	union nf_inet_addr _vaddr = { .ip = vaddr };
 
-	svc = ip_vs_service_get(0, protocol, vaddr, vport);
+	svc = ip_vs_service_get(AF_INET, 0, protocol, &_vaddr, vport);
 	if (!svc)
 		return NULL;
 	dest = ip_vs_lookup_dest(svc, daddr, dport);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 15860e1441b..fe93c9e6ff6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -74,16 +74,19 @@ tcp_conn_schedule(struct sk_buff *skb,
 {
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
+	struct ip_vs_iphdr iph;
 
-	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+
+	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*verdict = NF_DROP;
 		return 0;
 	}
 
 	if (th->syn &&
-	    (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-				     ip_hdr(skb)->daddr, th->dest))) {
+	    (svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol,
+				     &iph.daddr, th->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8dfad5db829..d208ed6eb9f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -80,16 +80,19 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
+	struct ip_vs_iphdr iph;
 
-	uh = skb_header_pointer(skb, ip_hdrlen(skb),
-				sizeof(_udph), &_udph);
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+
+	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
 	if (uh == NULL) {
 		*verdict = NF_DROP;
 		return 0;
 	}
 
-	if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-				     ip_hdr(skb)->daddr, uh->dest))) {
+	svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol,
+				&iph.daddr, uh->dest);
+	if (svc) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
-- 
cgit v1.2.3-70-g09d2


From b14198f6c1bea1687d20723db35d8effecd9d899 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:39 +0200
Subject: IPVS: Add IPv6 support flag to schedulers

Add 'supports_ipv6' flag to struct ip_vs_scheduler to indicate whether a
scheduler supports IPv6. Set the flag to 1 in schedulers that work with
IPv6, 0 otherwise. This flag is checked in a later patch while trying to
add a service with a specific scheduler. Adjust debug in v6-supporting
schedulers to work with both address families.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h         |  3 +++
 net/ipv4/ipvs/ip_vs_dh.c    |  3 +++
 net/ipv4/ipvs/ip_vs_lblc.c  |  3 +++
 net/ipv4/ipvs/ip_vs_lblcr.c |  3 +++
 net/ipv4/ipvs/ip_vs_lc.c    | 11 +++++++----
 net/ipv4/ipvs/ip_vs_nq.c    | 15 +++++++++------
 net/ipv4/ipvs/ip_vs_rr.c    | 13 ++++++++-----
 net/ipv4/ipvs/ip_vs_sed.c   | 15 +++++++++------
 net/ipv4/ipvs/ip_vs_sh.c    |  3 +++
 net/ipv4/ipvs/ip_vs_wlc.c   | 15 +++++++++------
 net/ipv4/ipvs/ip_vs_wrr.c   | 15 +++++++++------
 11 files changed, 66 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 30baed0e696..3d3b699dc02 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -516,6 +516,9 @@ struct ip_vs_scheduler {
 	char			*name;		/* scheduler name */
 	atomic_t		refcnt;		/* reference counter */
 	struct module		*module;	/* THIS_MODULE/NULL */
+#ifdef CONFIG_IP_VS_IPV6
+	int			supports_ipv6;	/* scheduler has IPv6 support */
+#endif
 
 	/* scheduler initializing service */
 	int (*init_service)(struct ip_vs_service *svc);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 9f9d795dbd7..a16943fd72f 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
 	.init_service =		ip_vs_dh_init_svc,
 	.done_service =		ip_vs_dh_done_svc,
 	.update_service =	ip_vs_dh_update_svc,
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 69309edc0c4..6ecef3518ca 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -522,6 +522,9 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
 	.init_service =		ip_vs_lblc_init_svc,
 	.done_service =		ip_vs_lblc_done_svc,
 	.schedule =		ip_vs_lblc_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 51c746e2083..1f75ea83bcf 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -722,6 +722,9 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
 	.init_service =		ip_vs_lblcr_init_svc,
 	.done_service =		ip_vs_lblcr_done_svc,
 	.schedule =		ip_vs_lblcr_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index 551d293347f..b69f808ac46 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -67,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	if (least)
-	IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->inactconns));
+	IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->inactconns));
 
 	return least;
 }
@@ -81,6 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.schedule =		ip_vs_lc_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index aa0e32ad348..9a2d8033f08 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -99,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 
   out:
-	IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
+	IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
 
 	return least;
 }
@@ -116,6 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.schedule =		ip_vs_nq_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 27f0b624283..a22195f68ac 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -74,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
   out:
 	svc->sched_data = q;
 	write_unlock(&svc->sched_lock);
-	IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
-		  "activeconns %d refcnt %d weight %d\n",
-		  NIPQUAD(dest->addr.ip), ntohs(dest->port),
-		  atomic_read(&dest->activeconns),
-		  atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+	IP_VS_DBG_BUF(6, "RR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt), atomic_read(&dest->weight));
 
 	return dest;
 }
@@ -89,6 +89,9 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.init_service =		ip_vs_rr_init_svc,
 	.update_service =	ip_vs_rr_update_svc,
 	.schedule =		ip_vs_rr_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 38b574b2fdf..7d2f22f04b8 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -101,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		}
 	}
 
-	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
+	IP_VS_DBG_BUF(6, "SED: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
 
 	return least;
 }
@@ -118,6 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.schedule =		ip_vs_sed_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index c9e54e2ec29..1d96de27fef 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
 	.init_service =		ip_vs_sh_init_svc,
 	.done_service =		ip_vs_sh_done_svc,
 	.update_service =	ip_vs_sh_update_svc,
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 09fd993040f..8c596e71259 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -89,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		}
 	}
 
-	IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
+	IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
 
 	return least;
 }
@@ -106,6 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.schedule =		ip_vs_wlc_schedule,
 };
 
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 19c49b234f3..7ea92fed50b 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		}
 	}
 
-	IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
-		  "activeconns %d refcnt %d weight %d\n",
-		  NIPQUAD(dest->addr.ip), ntohs(dest->port),
-		  atomic_read(&dest->activeconns),
-		  atomic_read(&dest->refcnt),
-		  atomic_read(&dest->weight));
+	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt),
+		      atomic_read(&dest->weight));
 
   out:
 	write_unlock(&svc->sched_lock);
@@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
 	.init_service =		ip_vs_wrr_init_svc,
 	.done_service =		ip_vs_wrr_done_svc,
 	.update_service =	ip_vs_wrr_update_svc,
-- 
cgit v1.2.3-70-g09d2


From 51ef348b14183789e4cb3444d05ce83b1b69d8fb Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:40 +0200
Subject: IPVS: Add 'af' args to protocol handler functions

Add 'af' arguments to conn_schedule(), conn_in_get(), conn_out_get() and
csum_check() function pointers in struct ip_vs_protocol. Extend the
respective functions for TCP, UDP, AH and ESP and adjust the callers.

The changes in the callers need to be somewhat extensive, since they now
need to pass a filled out struct ip_vs_iphdr * to the modified functions
instead of a struct iphdr *.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                | 15 ++++---
 net/ipv4/ipvs/ip_vs_core.c         | 64 +++++++++++++++---------------
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 56 +++++++++++++-------------
 net/ipv4/ipvs/ip_vs_proto_tcp.c    | 79 ++++++++++++++++++++++++-------------
 net/ipv4/ipvs/ip_vs_proto_udp.c    | 81 ++++++++++++++++++++++++--------------
 5 files changed, 171 insertions(+), 124 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3d3b699dc02..68f004f9bcc 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -296,21 +296,23 @@ struct ip_vs_protocol {
 
 	void (*exit)(struct ip_vs_protocol *pp);
 
-	int (*conn_schedule)(struct sk_buff *skb,
+	int (*conn_schedule)(int af, struct sk_buff *skb,
 			     struct ip_vs_protocol *pp,
 			     int *verdict, struct ip_vs_conn **cpp);
 
 	struct ip_vs_conn *
-	(*conn_in_get)(const struct sk_buff *skb,
+	(*conn_in_get)(int af,
+		       const struct sk_buff *skb,
 		       struct ip_vs_protocol *pp,
-		       const struct iphdr *iph,
+		       const struct ip_vs_iphdr *iph,
 		       unsigned int proto_off,
 		       int inverse);
 
 	struct ip_vs_conn *
-	(*conn_out_get)(const struct sk_buff *skb,
+	(*conn_out_get)(int af,
+			const struct sk_buff *skb,
 			struct ip_vs_protocol *pp,
-			const struct iphdr *iph,
+			const struct ip_vs_iphdr *iph,
 			unsigned int proto_off,
 			int inverse);
 
@@ -320,7 +322,8 @@ struct ip_vs_protocol {
 	int (*dnat_handler)(struct sk_buff *skb,
 			    struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
 
-	int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
+	int (*csum_check)(int af, struct sk_buff *skb,
+			  struct ip_vs_protocol *pp);
 
 	const char *(*state_name)(int state);
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 4a54f33b60d..34aaa1480d9 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -572,6 +572,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	unsigned int offset, ihl, verdict;
@@ -627,8 +628,9 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 
 	offset += cih->ihl * 4;
 
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(skb, pp, cih, offset, 1);
+	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -686,43 +688,41 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	  const struct net_device *in, const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
 {
-	struct iphdr	*iph;
+	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
-	int ihl;
 
 	EnterFunction(11);
 
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
-	iph = ip_hdr(skb);
-	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_out_icmp(skb, &related);
 
 		if (related)
 			return verdict;
-		iph = ip_hdr(skb);
+		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
 	}
 
-	pp = ip_vs_proto_get(iph->protocol);
+	pp = ip_vs_proto_get(iph.protocol);
 	if (unlikely(!pp))
 		return NF_ACCEPT;
 
 	/* reassemble IP fragments */
-	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
+	if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
 		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
 			return NF_STOLEN;
-		iph = ip_hdr(skb);
-	}
 
-	ihl = iph->ihl << 2;
+		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	}
 
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
+	cp = pp->conn_out_get(AF_INET, skb, pp, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		if (sysctl_ip_vs_nat_icmp_send &&
@@ -730,18 +730,18 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 		     pp->protocol == IPPROTO_UDP)) {
 			__be16 _ports[2], *pptr;
 
-			pptr = skb_header_pointer(skb, ihl,
+			pptr = skb_header_pointer(skb, iph.len,
 						  sizeof(_ports), _ports);
 			if (pptr == NULL)
 				return NF_ACCEPT;	/* Not for me */
-			if (ip_vs_lookup_real_service(iph->protocol,
-						      iph->saddr, pptr[0])) {
+			if (ip_vs_lookup_real_service(iph.protocol,
+						      iph.saddr.ip, pptr[0])) {
 				/*
 				 * Notify the real server: there is no
 				 * existing entry if it is not RST
 				 * packet or not TCP packet.
 				 */
-				if (iph->protocol != IPPROTO_TCP
+				if (iph.protocol != IPPROTO_TCP
 				    || !is_tcp_reset(skb)) {
 					icmp_send(skb,ICMP_DEST_UNREACH,
 						  ICMP_PORT_UNREACH, 0);
@@ -756,7 +756,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 
 	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
 
-	if (!skb_make_writable(skb, ihl))
+	if (!skb_make_writable(skb, iph.len))
 		goto drop;
 
 	/* mangle the packet */
@@ -804,6 +804,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	unsigned int offset, ihl, verdict;
@@ -860,8 +861,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	offset += cih->ihl * 4;
 
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(skb, pp, cih, offset, 1);
+	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -897,11 +899,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 const struct net_device *in, const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	struct iphdr	*iph;
+	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
 	int ret, restart;
-	int ihl;
+
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
 
 	/*
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
@@ -909,38 +912,35 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 */
 	if (unlikely(skb->pkt_type != PACKET_HOST
 		     || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
-		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
-			  skb->pkt_type,
-			  ip_hdr(skb)->protocol,
-			  NIPQUAD(ip_hdr(skb)->daddr));
+		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+			      skb->pkt_type,
+			      iph.protocol,
+			      IP_VS_DBG_ADDR(AF_INET, &iph.daddr));
 		return NF_ACCEPT;
 	}
 
-	iph = ip_hdr(skb);
-	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
 
 		if (related)
 			return verdict;
-		iph = ip_hdr(skb);
+		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
 	}
 
 	/* Protocol supported? */
-	pp = ip_vs_proto_get(iph->protocol);
+	pp = ip_vs_proto_get(iph.protocol);
 	if (unlikely(!pp))
 		return NF_ACCEPT;
 
-	ihl = iph->ihl << 2;
-
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
+	cp = pp->conn_in_get(AF_INET, skb, pp, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		int v;
 
-		if (!pp->conn_schedule(skb, pp, &v, &cp))
+		if (!pp->conn_schedule(AF_INET, skb, pp, &v, &cp))
 			return v;
 	}
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 3f9ebd7639a..2a361a99174 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -39,25 +39,23 @@ struct isakmp_hdr {
 
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(const struct sk_buff *skb,
-		   struct ip_vs_protocol *pp,
-		   const struct iphdr *iph,
-		   unsigned int proto_off,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
 		   int inverse)
 {
 	struct ip_vs_conn *cp;
 
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->saddr,
+				       iph->saddr.ip,
 				       htons(PORT_ISAKMP),
-				       iph->daddr,
+				       iph->daddr.ip,
 				       htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->daddr,
+				       iph->daddr.ip,
 				       htons(PORT_ISAKMP),
-				       iph->saddr,
+				       iph->saddr.ip,
 				       htons(PORT_ISAKMP));
 	}
 
@@ -66,12 +64,12 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
 		 * We are not sure if the packet is from our
 		 * service, so our conn_schedule hook should return NF_ACCEPT
 		 */
-		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
 
 	return cp;
@@ -79,32 +77,35 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
 
 
 static struct ip_vs_conn *
-ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		    const struct iphdr *iph, unsigned int proto_off, int inverse)
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+		    struct ip_vs_protocol *pp,
+		    const struct ip_vs_iphdr *iph,
+		    unsigned int proto_off,
+		    int inverse)
 {
 	struct ip_vs_conn *cp;
 
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->saddr,
+					iph->saddr.ip,
 					htons(PORT_ISAKMP),
-					iph->daddr,
+					iph->daddr.ip,
 					htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->daddr,
+					iph->daddr.ip,
 					htons(PORT_ISAKMP),
-					iph->saddr,
+					iph->saddr.ip,
 					htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
-		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-			  inverse ? "ICMP+" : "",
-			  pp->name,
-			  NIPQUAD(iph->saddr),
-			  NIPQUAD(iph->daddr));
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
 
 	return cp;
@@ -112,8 +113,7 @@ ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static int
-ah_esp_conn_schedule(struct sk_buff *skb,
-		     struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		     int *verdict, struct ip_vs_conn **cpp)
 {
 	/*
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index fe93c9e6ff6..9211afa8f30 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -25,8 +25,9 @@
 
 
 static struct ip_vs_conn *
-tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
 {
 	__be16 _ports[2], *pptr;
 
@@ -36,18 +37,19 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	if (likely(!inverse)) {
 		return ip_vs_conn_in_get(iph->protocol,
-					 iph->saddr, pptr[0],
-					 iph->daddr, pptr[1]);
+					 iph->saddr.ip, pptr[0],
+					 iph->daddr.ip, pptr[1]);
 	} else {
 		return ip_vs_conn_in_get(iph->protocol,
-					 iph->daddr, pptr[1],
-					 iph->saddr, pptr[0]);
+					 iph->daddr.ip, pptr[1],
+					 iph->saddr.ip, pptr[0]);
 	}
 }
 
 static struct ip_vs_conn *
-tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
 {
 	__be16 _ports[2], *pptr;
 
@@ -57,26 +59,25 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	if (likely(!inverse)) {
 		return ip_vs_conn_out_get(iph->protocol,
-					  iph->saddr, pptr[0],
-					  iph->daddr, pptr[1]);
+					  iph->saddr.ip, pptr[0],
+					  iph->daddr.ip, pptr[1]);
 	} else {
 		return ip_vs_conn_out_get(iph->protocol,
-					  iph->daddr, pptr[1],
-					  iph->saddr, pptr[0]);
+					  iph->daddr.ip, pptr[1],
+					  iph->saddr.ip, pptr[0]);
 	}
 }
 
 
 static int
-tcp_conn_schedule(struct sk_buff *skb,
-		  struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 	struct ip_vs_iphdr iph;
 
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
@@ -85,8 +86,8 @@ tcp_conn_schedule(struct sk_buff *skb,
 	}
 
 	if (th->syn &&
-	    (svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol,
-				     &iph.daddr, th->dest))) {
+	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+				     th->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -136,7 +137,7 @@ tcp_snat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(skb, pp))
+		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -182,7 +183,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(skb, pp))
+		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
 			return 0;
 
 		/*
@@ -219,21 +220,43 @@ tcp_dnat_handler(struct sk_buff *skb,
 
 
 static int
-tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
-	const unsigned int tcphoff = ip_hdrlen(skb);
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 	case CHECKSUM_COMPLETE:
-		if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-				      skb->len - tcphoff,
-				      ip_hdr(skb)->protocol, skb->csum)) {
-			IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-					 "Failed checksum for");
-			return 0;
-		}
+#ifdef CONFIG_IP_VS_IPV6
+		if (af == AF_INET6) {
+			if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len - tcphoff,
+					    ipv6_hdr(skb)->nexthdr,
+					    skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		} else
+#endif
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
+					      skb->len - tcphoff,
+					      ip_hdr(skb)->protocol,
+					      skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
 		break;
 	default:
 		/* No need to checksum. */
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index d208ed6eb9f..d3a1b1f2d10 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -24,8 +24,9 @@
 #include <net/ip.h>
 
 static struct ip_vs_conn *
-udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
 {
 	struct ip_vs_conn *cp;
 	__be16 _ports[2], *pptr;
@@ -36,12 +37,12 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_in_get(iph->protocol,
-				       iph->saddr, pptr[0],
-				       iph->daddr, pptr[1]);
+				       iph->saddr.ip, pptr[0],
+				       iph->daddr.ip, pptr[1]);
 	} else {
 		cp = ip_vs_conn_in_get(iph->protocol,
-				       iph->daddr, pptr[1],
-				       iph->saddr, pptr[0]);
+				       iph->daddr.ip, pptr[1],
+				       iph->saddr.ip, pptr[0]);
 	}
 
 	return cp;
@@ -49,25 +50,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static struct ip_vs_conn *
-udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
 {
 	struct ip_vs_conn *cp;
 	__be16 _ports[2], *pptr;
 
-	pptr = skb_header_pointer(skb, ip_hdrlen(skb),
-				  sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;
 
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_out_get(iph->protocol,
-					iph->saddr, pptr[0],
-					iph->daddr, pptr[1]);
+					iph->saddr.ip, pptr[0],
+					iph->daddr.ip, pptr[1]);
 	} else {
 		cp = ip_vs_conn_out_get(iph->protocol,
-					iph->daddr, pptr[1],
-					iph->saddr, pptr[0]);
+					iph->daddr.ip, pptr[1],
+					iph->saddr.ip, pptr[0]);
 	}
 
 	return cp;
@@ -75,14 +76,14 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static int
-udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 	struct ip_vs_iphdr iph;
 
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
 	if (uh == NULL) {
@@ -90,7 +91,7 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 	}
 
-	svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol,
+	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
 				&iph.daddr, uh->dest);
 	if (svc) {
 		if (ip_vs_todrop()) {
@@ -143,7 +144,7 @@ udp_snat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(skb, pp))
+		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
 			return 0;
 
 		/*
@@ -195,7 +196,7 @@ udp_dnat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(skb, pp))
+		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
 			return 0;
 
 		/*
@@ -234,10 +235,17 @@ udp_dnat_handler(struct sk_buff *skb,
 
 
 static int
-udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
 	struct udphdr _udph, *uh;
-	const unsigned int udphoff = ip_hdrlen(skb);
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
 
 	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 	if (uh == NULL)
@@ -249,15 +257,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
 		case CHECKSUM_COMPLETE:
-			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
-					      ip_hdr(skb)->daddr,
-					      skb->len - udphoff,
-					      ip_hdr(skb)->protocol,
-					      skb->csum)) {
-				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-						 "Failed checksum for");
-				return 0;
-			}
+#ifdef CONFIG_IP_VS_IPV6
+			if (af == AF_INET6) {
+				if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						    &ipv6_hdr(skb)->daddr,
+						    skb->len - udphoff,
+						    ipv6_hdr(skb)->nexthdr,
+						    skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			} else
+#endif
+				if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						      ip_hdr(skb)->daddr,
+						      skb->len - udphoff,
+						      ip_hdr(skb)->protocol,
+						      skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
 			break;
 		default:
 			/* No need to checksum. */
-- 
cgit v1.2.3-70-g09d2


From 3b047d9d0407e78a52f009835a0e26cb62edb8c7 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:41 +0200
Subject: IPVS: Add protocol debug functions for IPv6

Add protocol (TCP, UDP, AH, ESP) debug functions for IPv6 packet debug
output.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_proto.c        | 63 +++++++++++++++++++++++++++++++++++---
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 36 ++++++++++++++++++++--
 2 files changed, 93 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 6099a88fc20..50f6215beda 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -152,10 +152,10 @@ const char * ip_vs_state_name(__u16 proto, int state)
 
 
 void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
-			  const struct sk_buff *skb,
-			  int offset,
-			  const char *msg)
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
 {
 	char buf[128];
 	struct iphdr _iph, *ih;
@@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
 	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[192];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->nexthdr == IPPROTO_FRAGMENT)
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+	else {
+		__be16 _ports[2], *pptr;
+
+		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+				pp->name,
+				NIP6(ih->saddr),
+				NIP6(ih->daddr));
+		else
+			sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+				pp->name,
+				NIP6(ih->saddr),
+				ntohs(pptr[0]),
+				NIP6(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
+			  int offset,
+			  const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == __constant_htons(ETH_P_IPV6))
+		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
 
 int __init ip_vs_protocol_init(void)
 {
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 2a361a99174..4b0b8f268d1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -125,8 +125,8 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		    int offset, const char *msg)
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
 {
 	char buf[256];
 	struct iphdr _iph, *ih;
@@ -142,6 +142,38 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
+{
+	char buf[256];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		    int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == __constant_htons(ETH_P_IPV6))
+		ah_esp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
 
 static void ah_esp_init(struct ip_vs_protocol *pp)
 {
-- 
cgit v1.2.3-70-g09d2


From 0bbdd42b7efa66685b6d74701bcde3a596a3a59d Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:42 +0200
Subject: IPVS: Extend protocol DNAT/SNAT and state handlers

Extend protocol DNAT/SNAT and state handlers to work with IPv6. Also
change/introduce new checksumming helper functions for this.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             | 11 ++++++
 net/ipv4/ipvs/ip_vs_proto_tcp.c | 85 ++++++++++++++++++++++++++++++++---------
 net/ipv4/ipvs/ip_vs_proto_udp.c | 82 ++++++++++++++++++++++++++++++---------
 3 files changed, 142 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 68f004f9bcc..c173f1a7de2 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -904,6 +904,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
 	return csum_partial((char *) diff, sizeof(diff), oldsum);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
+					__wsum oldsum)
+{
+	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
+			    new[3],  new[2],  new[1],  new[0] };
+
+	return csum_partial((char *) diff, sizeof(diff), oldsum);
+}
+#endif
+
 static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 {
 	__be16 diff[2] = { ~old, new };
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 9211afa8f30..3daae43ae44 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -114,11 +114,21 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static inline void
-tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
 		     __be16 oldport, __be16 newport)
 {
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+	else
+#endif
 	tcph->check =
-		csum_fold(ip_vs_check_diff4(oldip, newip,
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
 				 ip_vs_check_diff2(oldport, newport,
 						~csum_unfold(tcph->check))));
 }
@@ -129,7 +139,14 @@ tcp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(skb);
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -137,7 +154,7 @@ tcp_snat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -145,13 +162,13 @@ tcp_snat_handler(struct sk_buff *skb,
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(skb) + tcphoff;
+	tcph = (void *)skb_network_header(skb) + tcphoff;
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
 	if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(tcph, cp->daddr.ip, cp->vaddr.ip,
+		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -159,9 +176,20 @@ tcp_snat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		tcph->check = 0;
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-		tcph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip,
-						skb->len - tcphoff,
-						cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 			  pp->name, tcph->check,
 			  (char*)&(tcph->check) - (char*)tcph);
@@ -175,7 +203,14 @@ tcp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(skb);
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -183,7 +218,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -194,7 +229,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(skb) + tcphoff;
+	tcph = (void *)skb_network_header(skb) + tcphoff;
 	tcph->dest = cp->dport;
 
 	/*
@@ -202,7 +237,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(tcph, cp->vaddr.ip, cp->daddr.ip,
+		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -210,9 +245,19 @@ tcp_dnat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		tcph->check = 0;
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-		tcph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip,
-						skb->len - tcphoff,
-						cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	return 1;
@@ -487,7 +532,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+#ifdef CONFIG_IP_VS_IPV6
+	int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	int ihl = ip_hdrlen(skb);
+#endif
+
+	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index d3a1b1f2d10..6cca0ad8e32 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -120,13 +120,23 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 
 static inline void
-udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
 		     __be16 oldport, __be16 newport)
 {
-	uhdr->check =
-		csum_fold(ip_vs_check_diff4(oldip, newip,
-				 ip_vs_check_diff2(oldport, newport,
-					~csum_unfold(uhdr->check))));
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+		uhdr->check =
+			csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
 	if (!uhdr->check)
 		uhdr->check = CSUM_MANGLED_0;
 }
@@ -136,7 +146,14 @@ udp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	const unsigned int udphoff = ip_hdrlen(skb);
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -144,7 +161,7 @@ udp_snat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -154,7 +171,7 @@ udp_snat_handler(struct sk_buff *skb,
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(skb) + udphoff;
+	udph = (void *)skb_network_header(skb) + udphoff;
 	udph->source = cp->vport;
 
 	/*
@@ -162,7 +179,7 @@ udp_snat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(udph, cp->daddr.ip, cp->vaddr.ip,
+		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -170,9 +187,19 @@ udp_snat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		udph->check = 0;
 		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-		udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip,
-						skb->len - udphoff,
-						cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -188,7 +215,14 @@ udp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = ip_hdrlen(skb);
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -196,7 +230,7 @@ udp_dnat_handler(struct sk_buff *skb,
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp))
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -207,7 +241,7 @@ udp_dnat_handler(struct sk_buff *skb,
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(skb) + udphoff;
+	udph = (void *)skb_network_header(skb) + udphoff;
 	udph->dest = cp->dport;
 
 	/*
@@ -215,7 +249,7 @@ udp_dnat_handler(struct sk_buff *skb,
 	 */
 	if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(udph, cp->vaddr.ip, cp->daddr.ip,
+		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->ip_summed = CHECKSUM_NONE;
@@ -223,9 +257,19 @@ udp_dnat_handler(struct sk_buff *skb,
 		/* full checksum calculation */
 		udph->check = 0;
 		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-		udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip,
-						skb->len - udphoff,
-						cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-- 
cgit v1.2.3-70-g09d2


From 28364a59f3dfe7fed3560ec7aff9b7aeb02824fb Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:43 +0200
Subject: IPVS: Extend functions for getting/creating connections

Extend functions for getting/creating connections and connection
templates for IPv6 support and fix the callers.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                |  16 ++++--
 net/ipv4/ipvs/ip_vs_conn.c         | 100 ++++++++++++++++++++-------------
 net/ipv4/ipvs/ip_vs_core.c         | 112 ++++++++++++++++++++-----------------
 net/ipv4/ipvs/ip_vs_ftp.c          |  45 +++++++--------
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c |  24 ++++----
 net/ipv4/ipvs/ip_vs_proto_tcp.c    |  24 ++++----
 net/ipv4/ipvs/ip_vs_proto_udp.c    |  24 ++++----
 net/ipv4/ipvs/ip_vs_sync.c         |  27 +++++----
 8 files changed, 209 insertions(+), 163 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c173f1a7de2..26893499eb6 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -642,11 +642,16 @@ enum {
 };
 
 extern struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
 extern struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
 extern struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
 
 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -657,8 +662,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp);
 extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 
 extern struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-	       __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+	       const union nf_inet_addr *vaddr, __be16 vport,
+	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
 	       struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 639d4bc7fc1..15eec282b17 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *	Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+				       const union nf_inet_addr *addr,
+				       __be16 port)
 {
-	return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd)
+			& IP_VS_CONN_TAB_MASK;
+#endif
+	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd)
 		& IP_VS_CONN_TAB_MASK;
 }
 
@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	int ret;
 
 	/* Hash by protocol, client address and port */
-	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport);
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
 
@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	int ret;
 
 	/* unhash it and decrease its reference counter */
-	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport);
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
 
@@ -187,18 +196,21 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
  *	d_addr, d_port: pkt dest address (load balancer)
  */
 static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (s_addr == cp->caddr.ip && s_port == cp->cport &&
-		    d_port == cp->vport && d_addr == cp->vaddr.ip &&
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
 		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
 		    protocol == cp->protocol) {
 			/* HIT */
@@ -214,37 +226,42 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 }
 
 struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	struct ip_vs_conn *cp;
 
-	cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
 	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+					 d_port);
 
-	IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-		  ip_vs_proto_name(protocol),
-		  NIPQUAD(s_addr), ntohs(s_port),
-		  NIPQUAD(d_addr), ntohs(d_port),
-		  cp?"hit":"not hit");
+	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
 
 	return cp;
 }
 
 /* Get reference to connection template */
 struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (s_addr == cp->caddr.ip && s_port == cp->cport &&
-		    d_port == cp->vport && d_addr == cp->vaddr.ip &&
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
 		    protocol == cp->protocol) {
 			/* HIT */
@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
   out:
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-		  ip_vs_proto_name(protocol),
-		  NIPQUAD(s_addr), ntohs(s_port),
-		  NIPQUAD(d_addr), ntohs(d_port),
-		  cp?"hit":"not hit");
+	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
 
 	return cp;
 }
@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
  *	d_addr, d_port: pkt dest address (foreign host)
  */
 struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp, *ret=NULL;
@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	/*
 	 *	Check for "full" addressed entries
 	 */
-	hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+	hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (d_addr == cp->caddr.ip && d_port == cp->cport &&
-		    s_port == cp->dport && s_addr == cp->daddr.ip &&
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+		    d_port == cp->cport && s_port == cp->dport &&
 		    protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
 
 	ct_read_unlock(hash);
 
-	IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-		  ip_vs_proto_name(protocol),
-		  NIPQUAD(s_addr), ntohs(s_port),
-		  NIPQUAD(d_addr), ntohs(d_port),
-		  ret?"hit":"not hit");
+	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ret ? "hit" : "not hit");
 
 	return ret;
 }
@@ -625,8 +645,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
  *	Create a new connection entry and hash it into the ip_vs_conn_tab
  */
 struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-	       __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+	       const union nf_inet_addr *vaddr, __be16 vport,
+	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
 	       struct ip_vs_dest *dest)
 {
 	struct ip_vs_conn *cp;
@@ -640,12 +661,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
 
 	INIT_LIST_HEAD(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	cp->af		   = af;
 	cp->protocol	   = proto;
-	cp->caddr.ip	   = caddr;
+	ip_vs_addr_copy(af, &cp->caddr, caddr);
 	cp->cport	   = cport;
-	cp->vaddr.ip	   = vaddr;
+	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
 	cp->vport	   = vport;
-	cp->daddr.ip	   = daddr;
+	ip_vs_addr_copy(af, &cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	spin_lock_init(&cp->lock);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 34aaa1480d9..2d5a4331709 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -173,19 +173,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		    __be16 ports[2])
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;	 /* destination port to forward */
-	__be32  snet;	 /* source network of the client, after masking */
+	union nf_inet_addr snet;	/* source network of the client,
+					   after masking */
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
 
 	/* Mask saddr with the netmask to adjust template granularity */
-	snet = iph->saddr & svc->netmask;
+	snet.ip = iph.saddr.ip & svc->netmask;
 
 	IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
 		  "mnet %u.%u.%u.%u\n",
-		  NIPQUAD(iph->saddr), ntohs(ports[0]),
-		  NIPQUAD(iph->daddr), ntohs(ports[1]),
+		  NIPQUAD(iph.saddr.ip), ntohs(ports[0]),
+		  NIPQUAD(iph.daddr.ip), ntohs(ports[1]),
 		  NIPQUAD(snet));
 
 	/*
@@ -204,11 +206,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	if (ports[1] == svc->port) {
 		/* Check if a template already exists */
 		if (svc->port != FTPPORT)
-			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-					       iph->daddr, ports[1]);
+			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+					     &iph.daddr, ports[1]);
 		else
-			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-					       iph->daddr, 0);
+			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
 			/*
@@ -228,18 +230,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			 * for ftp service.
 			 */
 			if (svc->port != FTPPORT)
-				ct = ip_vs_conn_new(iph->protocol,
-						    snet, 0,
-						    iph->daddr,
+				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr,
 						    ports[1],
-						    dest->addr.ip, dest->port,
+						    &dest->addr, dest->port,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
-				ct = ip_vs_conn_new(iph->protocol,
-						    snet, 0,
-						    iph->daddr, 0,
-						    dest->addr.ip, 0,
+				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
@@ -258,12 +260,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
 		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
 		 */
-		if (svc->fwmark)
-			ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
-					       htonl(svc->fwmark), 0);
-		else
-			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-					       iph->daddr, 0);
+		if (svc->fwmark) {
+			union nf_inet_addr fwmark = {
+				.all = { 0, 0, 0, htonl(svc->fwmark) }
+			};
+
+			ct = ip_vs_ct_in_get(AF_INET, IPPROTO_IP, &snet, 0,
+					     &fwmark, 0);
+		} else
+			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
 			/*
@@ -282,18 +288,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			/*
 			 * Create a template according to the service
 			 */
-			if (svc->fwmark)
-				ct = ip_vs_conn_new(IPPROTO_IP,
-						    snet, 0,
-						    htonl(svc->fwmark), 0,
-						    dest->addr.ip, 0,
+			if (svc->fwmark) {
+				union nf_inet_addr fwmark = {
+					.all = { 0, 0, 0, htonl(svc->fwmark) }
+				};
+
+				ct = ip_vs_conn_new(AF_INET, IPPROTO_IP,
+						    &snet, 0,
+						    &fwmark, 0,
+						    &dest->addr, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
-			else
-				ct = ip_vs_conn_new(iph->protocol,
-						    snet, 0,
-						    iph->daddr, 0,
-						    dest->addr.ip, 0,
+			} else
+				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
@@ -310,10 +320,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	cp = ip_vs_conn_new(iph->protocol,
-			    iph->saddr, ports[0],
-			    iph->daddr, ports[1],
-			    dest->addr.ip, dport,
+	cp = ip_vs_conn_new(AF_INET, iph.protocol,
+			    &iph.saddr, ports[0],
+			    &iph.daddr, ports[1],
+			    &dest->addr, dport,
 			    0,
 			    dest);
 	if (cp == NULL) {
@@ -342,12 +352,12 @@ struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	__be16 _ports[2], *pptr;
 
-	pptr = skb_header_pointer(skb, iph->ihl*4,
-				  sizeof(_ports), _ports);
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;
 
@@ -377,10 +387,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/*
 	 *    Create a connection entry.
 	 */
-	cp = ip_vs_conn_new(iph->protocol,
-			    iph->saddr, pptr[0],
-			    iph->daddr, pptr[1],
-			    dest->addr.ip, dest->port ? dest->port : pptr[1],
+	cp = ip_vs_conn_new(AF_INET, iph.protocol,
+			    &iph.saddr, pptr[0],
+			    &iph.daddr, pptr[1],
+			    &dest->addr, dest->port ? dest->port : pptr[1],
 			    0,
 			    dest);
 	if (cp == NULL)
@@ -408,10 +418,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		struct ip_vs_protocol *pp)
 {
 	__be16 _ports[2], *pptr;
-	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_iphdr iph;
+	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
 
-	pptr = skb_header_pointer(skb, iph->ihl*4,
-				  sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		ip_vs_service_put(svc);
 		return NF_DROP;
@@ -421,7 +431,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	   and the destination is RTN_UNICAST (and not local), then create
 	   a cache_bypass connection entry */
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark
-	    && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
+	    && (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST)) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 
@@ -429,9 +439,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 		/* create a new connection entry */
 		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
-		cp = ip_vs_conn_new(iph->protocol,
-				    iph->saddr, pptr[0],
-				    iph->daddr, pptr[1],
+		cp = ip_vs_conn_new(AF_INET, iph.protocol,
+				    &iph.saddr, pptr[0],
+				    &iph.daddr, pptr[1],
 				    0, 0,
 				    IP_VS_CONN_F_BYPASS,
 				    NULL);
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index bfe5d7050a5..0c3fbe0de5f 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -140,7 +140,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	struct tcphdr *th;
 	char *data, *data_limit;
 	char *start, *end;
-	__be32 from;
+	union nf_inet_addr from;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
 	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
@@ -166,24 +166,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		if (ip_vs_ftp_get_addrport(data, data_limit,
 					   SERVER_STRING,
 					   sizeof(SERVER_STRING)-1, ')',
-					   &from, &port,
+					   &from.ip, &port,
 					   &start, &end) != 1)
 			return 1;
 
 		IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
 			  "%u.%u.%u.%u:%d detected\n",
-			  NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr.ip), 0);
+			  NIPQUAD(from.ip), ntohs(port),
+			  NIPQUAD(cp->caddr.ip), 0);
 
 		/*
 		 * Now update or create an connection entry for it
 		 */
-		n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
-					  cp->caddr.ip, 0);
+		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+					  &cp->caddr, 0);
 		if (!n_cp) {
-			n_cp = ip_vs_conn_new(IPPROTO_TCP,
-					      cp->caddr.ip, 0,
-					      cp->vaddr.ip, port,
-					      from, port,
+			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+					      &cp->caddr, 0,
+					      &cp->vaddr, port,
+					      &from, port,
 					      IP_VS_CONN_F_NO_CPORT,
 					      cp->dest);
 			if (!n_cp)
@@ -196,9 +197,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		/*
 		 * Replace the old passive address with the new one
 		 */
-		from = n_cp->vaddr.ip;
+		from.ip = n_cp->vaddr.ip;
 		port = n_cp->vport;
-		sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
+		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
 			(ntohs(port)>>8)&255, ntohs(port)&255);
 		buf_len = strlen(buf);
 
@@ -243,7 +244,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	struct tcphdr *th;
 	char *data, *data_start, *data_limit;
 	char *start, *end;
-	__be32 to;
+	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
 
@@ -291,12 +292,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	 */
 	if (ip_vs_ftp_get_addrport(data_start, data_limit,
 				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
-				   '\r', &to, &port,
+				   '\r', &to.ip, &port,
 				   &start, &end) != 1)
 		return 1;
 
 	IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
-		  NIPQUAD(to), ntohs(port));
+		  NIPQUAD(to.ip), ntohs(port));
 
 	/* Passive mode off */
 	cp->app_data = NULL;
@@ -306,16 +307,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	 */
 	IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
 		  ip_vs_proto_name(iph->protocol),
-		  NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
+		  NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
 
-	n_cp = ip_vs_conn_in_get(iph->protocol,
-				 to, port,
-				 cp->vaddr.ip, htons(ntohs(cp->vport)-1));
+	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+				 &to, port,
+				 &cp->vaddr, htons(ntohs(cp->vport)-1));
 	if (!n_cp) {
-		n_cp = ip_vs_conn_new(IPPROTO_TCP,
-				      to, port,
-				      cp->vaddr.ip, htons(ntohs(cp->vport)-1),
-				      cp->daddr.ip, htons(ntohs(cp->dport)-1),
+		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+				      &to, port,
+				      &cp->vaddr, htons(ntohs(cp->vport)-1),
+				      &cp->daddr, htons(ntohs(cp->dport)-1),
 				      0,
 				      cp->dest);
 		if (!n_cp)
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 4b0b8f268d1..2b18a78d039 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -46,16 +46,16 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_conn *cp;
 
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->saddr.ip,
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->saddr,
 				       htons(PORT_ISAKMP),
-				       iph->daddr.ip,
+				       &iph->daddr,
 				       htons(PORT_ISAKMP));
 	} else {
-		cp = ip_vs_conn_in_get(IPPROTO_UDP,
-				       iph->daddr.ip,
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->daddr,
 				       htons(PORT_ISAKMP),
-				       iph->saddr.ip,
+				       &iph->saddr,
 				       htons(PORT_ISAKMP));
 	}
 
@@ -86,16 +86,16 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 	struct ip_vs_conn *cp;
 
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->saddr.ip,
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->saddr,
 					htons(PORT_ISAKMP),
-					iph->daddr.ip,
+					&iph->daddr,
 					htons(PORT_ISAKMP));
 	} else {
-		cp = ip_vs_conn_out_get(IPPROTO_UDP,
-					iph->daddr.ip,
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->daddr,
 					htons(PORT_ISAKMP),
-					iph->saddr.ip,
+					&iph->saddr,
 					htons(PORT_ISAKMP));
 	}
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 3daae43ae44..3da2bb05ee7 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -36,13 +36,13 @@ tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return NULL;
 
 	if (likely(!inverse)) {
-		return ip_vs_conn_in_get(iph->protocol,
-					 iph->saddr.ip, pptr[0],
-					 iph->daddr.ip, pptr[1]);
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
 	} else {
-		return ip_vs_conn_in_get(iph->protocol,
-					 iph->daddr.ip, pptr[1],
-					 iph->saddr.ip, pptr[0]);
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
 	}
 }
 
@@ -58,13 +58,13 @@ tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return NULL;
 
 	if (likely(!inverse)) {
-		return ip_vs_conn_out_get(iph->protocol,
-					  iph->saddr.ip, pptr[0],
-					  iph->daddr.ip, pptr[1]);
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
 	} else {
-		return ip_vs_conn_out_get(iph->protocol,
-					  iph->daddr.ip, pptr[1],
-					  iph->saddr.ip, pptr[0]);
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
 	}
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 6cca0ad8e32..fd8bd934cc0 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -36,13 +36,13 @@ udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return NULL;
 
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(iph->protocol,
-				       iph->saddr.ip, pptr[0],
-				       iph->daddr.ip, pptr[1]);
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->saddr, pptr[0],
+				       &iph->daddr, pptr[1]);
 	} else {
-		cp = ip_vs_conn_in_get(iph->protocol,
-				       iph->daddr.ip, pptr[1],
-				       iph->saddr.ip, pptr[0]);
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->daddr, pptr[1],
+				       &iph->saddr, pptr[0]);
 	}
 
 	return cp;
@@ -62,13 +62,13 @@ udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return NULL;
 
 	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(iph->protocol,
-					iph->saddr.ip, pptr[0],
-					iph->daddr.ip, pptr[1]);
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->saddr, pptr[0],
+					&iph->daddr, pptr[1]);
 	} else {
-		cp = ip_vs_conn_out_get(iph->protocol,
-					iph->daddr.ip, pptr[1],
-					iph->saddr.ip, pptr[0]);
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->daddr, pptr[1],
+					&iph->saddr, pptr[0]);
 	}
 
 	return cp;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2cf47b2e166..3ce1093e067 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -366,13 +366,17 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		}
 
 		if (!(flags & IP_VS_CONN_F_TEMPLATE))
-			cp = ip_vs_conn_in_get(s->protocol,
-					       s->caddr, s->cport,
-					       s->vaddr, s->vport);
+			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+					       (union nf_inet_addr *)&s->caddr,
+					       s->cport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport);
 		else
-			cp = ip_vs_ct_in_get(s->protocol,
-					       s->caddr, s->cport,
-					       s->vaddr, s->vport);
+			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+					     (union nf_inet_addr *)&s->caddr,
+					     s->cport,
+					     (union nf_inet_addr *)&s->vaddr,
+					     s->vport);
 		if (!cp) {
 			/*
 			 * Find the appropriate destination for the connection.
@@ -389,10 +393,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				else
 					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
-			cp = ip_vs_conn_new(s->protocol,
-					    s->caddr, s->cport,
-					    s->vaddr, s->vport,
-					    s->daddr, s->dport,
+			cp = ip_vs_conn_new(AF_INET, s->protocol,
+					    (union nf_inet_addr *)s->caddr,
+					    s->cport,
+					    (union nf_inet_addr *)s->vaddr,
+					    s->vport,
+					    (union nf_inet_addr *)s->daddr,
+					    s->dport,
 					    flags, dest);
 			if (dest)
 				atomic_dec(&dest->refcnt);
-- 
cgit v1.2.3-70-g09d2


From 38cdcc9a039b92a9972dca3c954fb3d8b3ef13bf Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:44 +0200
Subject: IPVS: Add IPv6 support to xmit() support functions

Add IPv6 support to IP_VS_XMIT() and to the xmit routing cache, introducing
a new function __ip_vs_get_out_rt_v6().

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_xmit.c | 82 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 75 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 88199c9f2d3..fd8342ec7e1 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -20,6 +20,9 @@
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>                  /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
@@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
 
 	if (!dst)
 		return NULL;
-	if ((dst->obsolete || rtos != dest->dst_rtos) &&
+	if ((dst->obsolete
+	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
 	    dst->ops->check(dst, cookie) == NULL) {
 		dest->dst_cache = NULL;
 		dst_release(dst);
@@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 	return rt;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+		if (!rt) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip6_u = {
+						.daddr = dest->addr.in6,
+						.saddr = {
+							.s6_addr32 =
+								{ 0, 0, 0, 0 },
+						},
+					},
+				},
+			};
+
+			rt = (struct rt6_info *)ip6_route_output(&init_net,
+								 NULL, &fl);
+			if (!rt) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip6_route_output error, "
+					     "dest: " NIP6_FMT "\n",
+					     NIP6(dest->addr.in6));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+				  NIP6(dest->addr.in6),
+				  atomic_read(&rt->u.dst.__refcnt));
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip6_u = {
+					.daddr = cp->daddr.in6,
+					.saddr = {
+						.s6_addr32 = { 0, 0, 0, 0 },
+					},
+				},
+			},
+		};
+
+		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+		if (!rt) {
+			IP_VS_DBG_RL("ip6_route_output error, dest: "
+				     NIP6_FMT "\n", NIP6(cp->daddr.in6));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+#endif
+
 
 /*
  *	Release dest->dst_cache before a dest is removed
@@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 	dst_release(old_dst);
 }
 
-#define IP_VS_XMIT(skb, rt)				\
+#define IP_VS_XMIT(pf, skb, rt)				\
 do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
-	NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL,	\
+	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
 		(rt)->u.dst.dev, dst_output);		\
 } while (0)
 
@@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(skb, rt);
+	IP_VS_XMIT(PF_INET, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -276,7 +344,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(skb, rt);
+	IP_VS_XMIT(PF_INET, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -467,7 +535,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(skb, rt);
+	IP_VS_XMIT(PF_INET, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -540,7 +608,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(skb, rt);
+	IP_VS_XMIT(PF_INET, skb, rt);
 
 	rc = NF_STOLEN;
 	goto out;
-- 
cgit v1.2.3-70-g09d2


From b3cdd2a73867d309dca288b8e820c09e3b7f1da1 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:45 +0200
Subject: IPVS: Add and bind IPv6 xmit functions

Add xmit functions for IPv6. Also add the already needed __ip_vs_get_out_rt_v6()
to ip_vs_core.c. Bind the new xmit functions to v6 connections.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        |  20 ++-
 net/ipv4/ipvs/ip_vs_conn.c |  34 +++-
 net/ipv4/ipvs/ip_vs_core.c |  43 ++++++
 net/ipv4/ipvs/ip_vs_xmit.c | 377 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 472 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 26893499eb6..ac709fa5a79 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -855,6 +855,19 @@ extern int ip_vs_icmp_xmit
 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
 extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
 
+#ifdef CONFIG_IP_VS_IPV6
+extern int ip_vs_bypass_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_nat_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_tunnel_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_dr_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_icmp_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
+ int offset);
+#endif
 
 /*
  *	This is a simple mechanism to ignore packets when
@@ -899,7 +912,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
 }
 
 extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
-		struct ip_vs_conn *cp, int dir);
+			   struct ip_vs_conn *cp, int dir);
+
+#ifdef CONFIG_IP_VS_IPV6
+extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+			      struct ip_vs_conn *cp, int dir);
+#endif
 
 extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
 
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 15eec282b17..f5dddad6d5e 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -389,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
 	}
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit_v6;
+		break;
+	}
+}
+#endif
+
 
 static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
 {
@@ -694,7 +721,12 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	cp->timeout = 3*HZ;
 
 	/* Bind its packet transmitter */
-	ip_vs_bind_xmit(cp);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_bind_xmit_v6(cp);
+	else
+#endif
+		ip_vs_bind_xmit(cp);
 
 	if (unlikely(pp && atomic_read(&pp->appcnt)))
 		ip_vs_bind_app(cp, pp);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 2d5a4331709..d6f5bf9049a 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -570,6 +570,49 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 			"Forwarding altered incoming ICMP");
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct ipv6hdr *iph	 = ipv6_hdr(skb);
+	unsigned int icmp_offset = sizeof(struct ipv6hdr);
+	struct icmp6hdr *icmph	 = (struct icmp6hdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct ipv6hdr *ciph	 = (struct ipv6hdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.in6;
+		ciph->daddr = cp->vaddr.in6;
+	} else {
+		iph->daddr = cp->daddr.in6;
+		ciph->saddr = cp->daddr.in6;
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->icmp6_cksum = 0;
+	/* TODO IPv6: is this correct for ICMPv6? */
+	ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMPv6");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMPv6");
+}
+#endif
+
 /*
  *	Handle ICMP messages in the inside-to-outside direction (outgoing).
  *	Find any that might be relevant, check against existing connections,
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index fd8342ec7e1..02ddc2b3ce2 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -281,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ipv6hdr  *iph = ipv6_hdr(skb);
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	EnterFunction(10);
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (!rt) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+			     "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
 
 /*
  *      NAT transmitter (only for outside-to-inside nat forwarding)
@@ -360,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	goto tx_error;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	int mtu;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+				       sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				 "ip_vs_nat_xmit_v6(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+		goto tx_error;
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
+
 
 /*
  *   IP Tunneling transmitter
@@ -491,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	struct net_device *tdev;	/* Device to other host */
+	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
+	sk_buff_data_t old_transport_header = skb->transport_header;
+	struct ipv6hdr  *iph;		/* Our new IP header */
+	unsigned int max_headroom;	/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+			     "ETH_P_IPV6: %d, skb protocol: %d\n",
+			     htons(ETH_P_IPV6), skb->protocol);
+		goto tx_error;
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	/* TODO IPv6: do we need this check in IPv6? */
+	if (mtu < 1280) {
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			dst_release(&rt->u.dst);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = ipv6_hdr(skb);
+	}
+
+	skb->transport_header = old_transport_header;
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ipv6_hdr(skb);
+	iph->version		=	6;
+	iph->nexthdr		=	IPPROTO_IPV6;
+	iph->payload_len	=	old_iph->payload_len + sizeof(old_iph);
+	iph->priority		=	old_iph->priority;
+	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+	iph->daddr		=	rt->rt6i_dst.addr;
+	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
+	iph->hop_limit		=	old_iph->hop_limit;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	ip6_local_out(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
 
 /*
  *      Direct Routing transmitter
@@ -548,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	return NF_STOLEN;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		 struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	int    mtu;
+
+	EnterFunction(10);
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
 
 /*
  *	ICMP packet transmitter
@@ -625,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_rt_put(rt);
 	goto tx_error;
 }
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rt6_info	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+out:
+	LeaveFunction(10);
+	return rc;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
-- 
cgit v1.2.3-70-g09d2


From cd17f9ed099ed27e9b0d298253e5c05e335ac656 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:46 +0200
Subject: IPVS: Extend scheduling functions for IPv6 support

Convert ip_vs_schedule() and ip_vs_sched_persist() to support scheduling of
IPv6 connections.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 56 +++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index d6f5bf9049a..8bfd7c2f0eb 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -176,19 +176,25 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
-	__be16  dport;	 /* destination port to forward */
+	__be16  dport;			/* destination port to forward */
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
 	/* Mask saddr with the netmask to adjust template granularity */
-	snet.ip = iph.saddr.ip & svc->netmask;
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+	else
+#endif
+		snet.ip = iph.saddr.ip & svc->netmask;
 
-	IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
-		  "mnet %u.%u.%u.%u\n",
-		  NIPQUAD(iph.saddr.ip), ntohs(ports[0]),
-		  NIPQUAD(iph.daddr.ip), ntohs(ports[1]),
-		  NIPQUAD(snet));
+	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+		      "mnet %s\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+		      IP_VS_DBG_ADDR(svc->af, &snet));
 
 	/*
 	 * As far as we know, FTP is a very complicated network protocol, and
@@ -206,10 +212,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	if (ports[1] == svc->port) {
 		/* Check if a template already exists */
 		if (svc->port != FTPPORT)
-			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
 					     &iph.daddr, ports[1]);
 		else
-			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
 					     &iph.daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -230,7 +236,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			 * for ftp service.
 			 */
 			if (svc->port != FTPPORT)
-				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr,
 						    ports[1],
@@ -238,7 +244,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
-				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr, 0,
 						    &dest->addr, 0,
@@ -265,10 +271,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				.all = { 0, 0, 0, htonl(svc->fwmark) }
 			};
 
-			ct = ip_vs_ct_in_get(AF_INET, IPPROTO_IP, &snet, 0,
+			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
 					     &fwmark, 0);
 		} else
-			ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0,
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
 					     &iph.daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -293,14 +299,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 					.all = { 0, 0, 0, htonl(svc->fwmark) }
 				};
 
-				ct = ip_vs_conn_new(AF_INET, IPPROTO_IP,
+				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
 						    &snet, 0,
 						    &fwmark, 0,
 						    &dest->addr, 0,
 						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			} else
-				ct = ip_vs_conn_new(AF_INET, iph.protocol,
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
 						    &snet, 0,
 						    &iph.daddr, 0,
 						    &dest->addr, 0,
@@ -320,7 +326,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	cp = ip_vs_conn_new(AF_INET, iph.protocol,
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
@@ -387,7 +393,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/*
 	 *    Create a connection entry.
 	 */
-	cp = ip_vs_conn_new(AF_INET, iph.protocol,
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
@@ -396,13 +402,13 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	if (cp == NULL)
 		return NULL;
 
-	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
-		  ip_vs_fwd_tag(cp),
-		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
-		  cp->flags, atomic_read(&cp->refcnt));
+	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+		      ip_vs_fwd_tag(cp),
+		      IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+		      cp->flags, atomic_read(&cp->refcnt));
 
 	ip_vs_conn_stats(cp, svc);
 	return cp;
-- 
cgit v1.2.3-70-g09d2


From 2a3b791e6e1169f374224d164738e9f7be703d77 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:47 +0200
Subject: IPVS: Add/adjust Netfilter hook functions and helpers for v6

Add Netfilter hook functions or modify existing ones, if possible, to
process IPv6 packets. Some support functions are also added/modified for
this. ip_vs_nat_icmp_v6() was already added in the patch that added the v6
xmit functions, as it is called from one of them.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 365 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 329 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 8bfd7c2f0eb..035a511e12f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -39,6 +39,11 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
 #include <net/ip_vs.h>
 
 
@@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
+#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
 
 const char *ip_vs_proto_name(unsigned proto)
 {
@@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto)
 		return "TCP";
 	case IPPROTO_ICMP:
 		return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+	case IPPROTO_ICMPV6:
+		return "ICMPv6";
+#endif
 	default:
 		sprintf(buf, "IP_%d", proto);
 		return buf;
@@ -425,7 +435,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 {
 	__be16 _ports[2], *pptr;
 	struct ip_vs_iphdr iph;
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	int unicast;
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
 	if (pptr == NULL) {
@@ -433,11 +444,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		return NF_DROP;
 	}
 
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+	else
+#endif
+		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
 	/* if it is fwmark-based service, the cache_bypass sysctl is up
-	   and the destination is RTN_UNICAST (and not local), then create
+	   and the destination is a non-local unicast, then create
 	   a cache_bypass connection entry */
-	if (sysctl_ip_vs_cache_bypass && svc->fwmark
-	    && (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST)) {
+	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 
@@ -445,7 +462,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 		/* create a new connection entry */
 		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
-		cp = ip_vs_conn_new(AF_INET, iph.protocol,
+		cp = ip_vs_conn_new(svc->af, iph.protocol,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    0, 0,
@@ -489,7 +506,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 * created, the TCP RST packet cannot be sent, instead that
 	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
 	 */
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+			    skb->dev);
+	else
+#endif
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
 	return NF_DROP;
 }
 
@@ -528,6 +552,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 	return err;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+	/* TODO IPv6: Find out what to do here for IPv6 */
+	return 0;
+}
+#endif
+
 /*
  * Packet has been made sufficiently writable in caller
  * - inout: 1=in->out, 0=out->in
@@ -727,11 +759,117 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	return verdict;
 }
 
-static inline int is_tcp_reset(const struct sk_buff *skb)
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, verdict;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the "
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb)
+	    && ip_vs_checksum_complete(skb, sizeof(struct ipv6hdr))) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Forward ICMPv6: failed checksum from "
+			  NIP6_FMT "!\n",
+			  NIP6(iph->saddr));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+		offset += 2 * sizeof(__u16);
+	if (!skb_make_writable(skb, offset))
+		goto out;
+
+	ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+#endif
+
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 	return th->rst;
@@ -750,38 +888,64 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
+	int af;
 
 	EnterFunction(11);
 
+	af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
-	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_out_icmp(skb, &related);
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
 
-		if (related)
-			return verdict;
-		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
-	}
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+			int related, verdict = ip_vs_out_icmp(skb, &related);
+
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
 
 	pp = ip_vs_proto_get(iph.protocol);
 	if (unlikely(!pp))
 		return NF_ACCEPT;
 
 	/* reassemble IP fragments */
-	if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
-		     !pp->dont_defrag)) {
-		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-			return NF_STOLEN;
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
 
-		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
-	}
+			if (related)
+				return verdict;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+			     !pp->dont_defrag)) {
+			if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+				return NF_STOLEN;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
 
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(AF_INET, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		if (sysctl_ip_vs_nat_icmp_send &&
@@ -794,16 +958,26 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 			if (pptr == NULL)
 				return NF_ACCEPT;	/* Not for me */
 			if (ip_vs_lookup_real_service(iph.protocol,
-						      iph.saddr.ip, pptr[0])) {
+						      iph.saddr.ip,
+						      pptr[0])) {
 				/*
 				 * Notify the real server: there is no
 				 * existing entry if it is not RST
 				 * packet or not TCP packet.
 				 */
 				if (iph.protocol != IPPROTO_TCP
-				    || !is_tcp_reset(skb)) {
-					icmp_send(skb,ICMP_DEST_UNREACH,
-						  ICMP_PORT_UNREACH, 0);
+				    || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+					if (af == AF_INET6)
+						icmpv6_send(skb,
+							    ICMPV6_DEST_UNREACH,
+							    ICMPV6_PORT_UNREACH,
+							    0, skb->dev);
+					else
+#endif
+						icmp_send(skb,
+							  ICMP_DEST_UNREACH,
+							  ICMP_PORT_UNREACH, 0);
 					return NF_DROP;
 				}
 			}
@@ -821,8 +995,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	/* mangle the packet */
 	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
 		goto drop;
-	ip_hdr(skb)->saddr = cp->vaddr.ip;
-	ip_send_check(ip_hdr(skb));
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
 
 	/* For policy routing, packets originating from this
 	 * machine itself may be routed differently to packets
@@ -830,8 +1012,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-		goto drop;
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (ip6_route_me_harder(skb) != 0)
+			goto drop;
+	} else
+#endif
+		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			goto drop;
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
@@ -949,6 +1137,94 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	return verdict;
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, verdict;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+					       IP_DEFRAG_VS_IN :
+					       IP_DEFRAG_VS_FWD))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	verdict = NF_DROP;
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+#endif
+
+
 /*
  *	Check if it's for virtual services, look it up,
  *	and send it on its way...
@@ -961,9 +1237,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
-	int ret, restart;
+	int ret, restart, af;
+
+	af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
 
-	ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	/*
 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
@@ -974,7 +1252,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
 			      skb->pkt_type,
 			      iph.protocol,
-			      IP_VS_DBG_ADDR(AF_INET, &iph.daddr));
+			      IP_VS_DBG_ADDR(af, &iph.daddr));
 		return NF_ACCEPT;
 	}
 
@@ -983,7 +1261,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 
 		if (related)
 			return verdict;
-		ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph);
+		ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 	}
 
 	/* Protocol supported? */
@@ -994,12 +1272,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(AF_INET, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		int v;
 
-		if (!pp->conn_schedule(AF_INET, skb, pp, &v, &cp))
+		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
 			return v;
 	}
 
@@ -1082,6 +1360,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 	return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+		      const struct net_device *in, const struct net_device *out,
+		      int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
 
 static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
-- 
cgit v1.2.3-70-g09d2


From 7937df1564783806c285d34a1c6fd63d8da29d7a Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:48 +0200
Subject: IPVS: Convert real server lookup functions

Convert functions for looking up destinations (real servers) to support
IPv6 services/dests.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        |  8 +++--
 net/ipv4/ipvs/ip_vs_conn.c |  5 +--
 net/ipv4/ipvs/ip_vs_core.c |  4 +--
 net/ipv4/ipvs/ip_vs_ctl.c  | 80 ++++++++++++++++++++++++++++++----------------
 net/ipv4/ipvs/ip_vs_sync.c |  7 ++--
 5 files changed, 67 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ac709fa5a79..a719c0ef99e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -804,14 +804,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }
 
 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport);
+ip_vs_lookup_real_service(int af, __u16 protocol,
+			  const union nf_inet_addr *daddr, __be16 dport);
+
 extern int ip_vs_use_count_inc(void);
 extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(__be32 daddr, __be16 dport,
-		 __be32 vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+		const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index f5dddad6d5e..c2a42a62433 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -491,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 	struct ip_vs_dest *dest;
 
 	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->daddr.ip, cp->dport,
-				       cp->vaddr.ip, cp->vport, cp->protocol);
+		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+				       &cp->vaddr, cp->vport,
+				       cp->protocol);
 		ip_vs_bind_dest(cp, dest);
 		return dest;
 	} else
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 035a511e12f..27bef1d67aa 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -957,8 +957,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 						  sizeof(_ports), _ports);
 			if (pptr == NULL)
 				return NF_ACCEPT;	/* Not for me */
-			if (ip_vs_lookup_real_service(iph.protocol,
-						      iph.saddr.ip,
+			if (ip_vs_lookup_real_service(af, iph.protocol,
+						      &iph.saddr,
 						      pptr[0])) {
 				/*
 				 * Notify the real server: there is no
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 1f3fc66e694..bb0e1e3c885 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -492,11 +492,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 /*
  *	Returns hash value for real service
  */
-static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
+static inline unsigned ip_vs_rs_hashkey(int af,
+					    const union nf_inet_addr *addr,
+					    __be16 port)
 {
 	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
 
-	return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
+	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 		& IP_VS_RTAB_MASK;
 }
 
@@ -516,7 +525,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 	 *	Hash by proto,addr,port,
 	 *	which are the parameters of the real service.
 	 */
-	hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
 	list_add(&dest->d_list, &ip_vs_rtable[hash]);
 
 	return 1;
@@ -543,7 +553,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *	Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
+ip_vs_lookup_real_service(int af, __u16 protocol,
+			  const union nf_inet_addr *daddr,
+			  __be16 dport)
 {
 	unsigned hash;
 	struct ip_vs_dest *dest;
@@ -552,11 +564,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
 	 *	Check for "full" addressed entries
 	 *	Return the first found entry
 	 */
-	hash = ip_vs_rs_hashkey(daddr, dport);
+	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
 	read_lock(&__ip_vs_rs_lock);
 	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
-		if ((dest->addr.ip == daddr)
+		if ((dest->af == af)
+		    && ip_vs_addr_equal(af, &dest->addr, daddr)
 		    && (dest->port == dport)
 		    && ((dest->protocol == protocol) ||
 			dest->vfwmark)) {
@@ -574,7 +587,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
  *	Lookup destination by {addr,port} in the given service
  */
 static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		  __be16 dport)
 {
 	struct ip_vs_dest *dest;
 
@@ -582,7 +596,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 	 * Find the destination for the given service
 	 */
 	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if ((dest->addr.ip == daddr) && (dest->port == dport)) {
+		if ((dest->af == svc->af)
+		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+		    && (dest->port == dport)) {
 			/* HIT */
 			return dest;
 		}
@@ -601,14 +617,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
-				    __be32 vaddr, __be16 vport, __u16 protocol)
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+				   __be16 dport,
+				   const union nf_inet_addr *vaddr,
+				   __be16 vport, __u16 protocol)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_service *svc;
-	union nf_inet_addr _vaddr = { .ip = vaddr };
 
-	svc = ip_vs_service_get(AF_INET, 0, protocol, &_vaddr, vport);
+	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -629,7 +646,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
  *  scheduling.
  */
 static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		     __be16 dport)
 {
 	struct ip_vs_dest *dest, *nxt;
 
@@ -637,17 +655,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 	 * Find the destination in trash
 	 */
 	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-			  "dest->refcnt=%d\n",
-			  dest->vfwmark,
-			  NIPQUAD(dest->addr.ip), ntohs(dest->port),
-			  atomic_read(&dest->refcnt));
-		if (dest->addr.ip == daddr &&
+		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+			      "dest->refcnt=%d\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		if (dest->af == svc->af &&
+		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 		    dest->port == dport &&
 		    dest->vfwmark == svc->fwmark &&
 		    dest->protocol == svc->protocol &&
 		    (svc->fwmark ||
-		     (dest->vaddr.ip == svc->addr.ip &&
+		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 		      dest->vport == svc->port))) {
 			/* HIT */
 			return dest;
@@ -657,10 +677,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 		 * Try to purge the destination from trash if not referenced
 		 */
 		if (atomic_read(&dest->refcnt) == 1) {
-			IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
-				  "from trash\n",
-				  dest->vfwmark,
-				  NIPQUAD(dest->addr.ip), ntohs(dest->port));
+			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+				      "from trash\n",
+				      dest->vfwmark,
+				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+				      ntohs(dest->port));
 			list_del(&dest->n_list);
 			ip_vs_dst_reset(dest);
 			__ip_vs_unbind_svc(dest);
@@ -847,7 +868,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 * Check if the dest already exists in the list
 	 */
-	dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
 	if (dest != NULL) {
 		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
 		return -EEXIST;
@@ -857,7 +879,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	 * Check if the dest already exists in the trash and
 	 * is from the same service
 	 */
-	dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
+	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
 	if (dest != NULL) {
 		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
 			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
@@ -956,7 +979,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 *  Lookup the destination list
 	 */
-	dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
 	if (dest == NULL) {
 		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
 		return -ENOENT;
@@ -1054,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 
 	EnterFunction(2);
 
-	dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
+	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
 
 	if (dest == NULL) {
 		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 3ce1093e067..40647edf102 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -383,8 +383,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			 * If it is not found the connection will remain unbound
 			 * but still handled.
 			 */
-			dest = ip_vs_find_dest(s->daddr, s->dport,
-					       s->vaddr, s->vport,
+			dest = ip_vs_find_dest(AF_INET,
+					       (union nf_inet_addr *)&s->daddr,
+					       s->dport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport,
 					       s->protocol);
 			/*  Set the approprite ativity flag */
 			if (s->protocol == IPPROTO_TCP) {
-- 
cgit v1.2.3-70-g09d2


From 667a5f18162e803e30722af46ade1737e3b93198 Mon Sep 17 00:00:00 2001
From: Vince Busam <vbusam@google.com>
Date: Tue, 2 Sep 2008 15:55:49 +0200
Subject: IPVS: Convert procfs files for IPv6 entry output

Correctly output IPv6 connection/service/dest entries in procfs files.

Signed-off-by: Vince Busam <vbusam@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_conn.c | 37 +++++++++++++++++++++++++++----
 net/ipv4/ipvs/ip_vs_ctl.c  | 54 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 73 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index c2a42a62433..e7603d749c0 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -815,8 +815,22 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 	else {
 		const struct ip_vs_conn *cp = v;
 
-		seq_printf(seq,
-			"%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X"
+				" %08X %04X %-11s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
@@ -864,8 +878,23 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 	else {
 		const struct ip_vs_conn *cp = v;
 
-		seq_printf(seq,
-			"%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X "
+				"%08X %04X %-11s %-6s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index bb0e1e3c885..25d9e98e31f 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1793,15 +1793,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		const struct ip_vs_iter *iter = seq->private;
 		const struct ip_vs_dest *dest;
 
-		if (iter->table == ip_vs_svc_table)
-			seq_printf(seq, "%s  %08X:%04X %s ",
-				   ip_vs_proto_name(svc->protocol),
-				   ntohl(svc->addr.ip),
-				   ntohs(svc->port),
-				   svc->scheduler->name);
-		else
+		if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (svc->af == AF_INET6)
+				seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   NIP6(svc->addr.in6),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+			else
+#endif
+				seq_printf(seq, "%s  %08X:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   ntohl(svc->addr.ip),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+		} else {
 			seq_printf(seq, "FWM  %08X %s ",
 				   svc->fwmark, svc->scheduler->name);
+		}
 
 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
 			seq_printf(seq, "persistent %d %08X\n",
@@ -1811,13 +1821,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 			seq_putc(seq, '\n');
 
 		list_for_each_entry(dest, &svc->destinations, n_list) {
-			seq_printf(seq,
-				   "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
-				   ntohl(dest->addr.ip), ntohs(dest->port),
-				   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
-				   atomic_read(&dest->weight),
-				   atomic_read(&dest->activeconns),
-				   atomic_read(&dest->inactconns));
+#ifdef CONFIG_IP_VS_IPV6
+			if (dest->af == AF_INET6)
+				seq_printf(seq,
+					   "  -> [" NIP6_FMT "]:%04X"
+					   "      %-7s %-6d %-10d %-10d\n",
+					   NIP6(dest->addr.in6),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+			else
+#endif
+				seq_printf(seq,
+					   "  -> %08X:%04X      "
+					   "%-7s %-6d %-10d %-10d\n",
+					   ntohl(dest->addr.ip),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+
 		}
 	}
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From c6883f587341a3ed113856de8769d0992b4bbd85 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:50 +0200
Subject: IVPS: Disable sync daemon for IPv6 connections

Disable the sync daemon for IPv6 connections, works only with IPv4 for now.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 27bef1d67aa..5a7a81778b0 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1321,7 +1321,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 * encorage the standby servers to update the connections timeout
 	 */
 	atomic_inc(&cp->in_pkts);
-	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	if (af == AF_INET &&
+	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
 	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
-- 
cgit v1.2.3-70-g09d2


From a0eb662f9ec8962928d937a185ad128db12c4637 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:51 +0200
Subject: IPVS: Turn off FTP application helper for IPv6

Immediately return from FTP application helper and do nothing when dealing
with IPv6 packets. IPv6 is not supported by this helper yet.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ftp.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 0c3fbe0de5f..2e7dbd8b73a 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -147,6 +147,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	unsigned buf_len;
 	int ret;
 
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
 	*diff = 0;
 
 	/* Only useful for established sessions */
@@ -248,6 +256,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	__be16 port;
 	struct ip_vs_conn *n_cp;
 
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
 	/* no diff required for incoming packets */
 	*diff = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 09571c7ae30865adfa79dccd12a822a65d2c4b5a Mon Sep 17 00:00:00 2001
From: Vince Busam <vbusam@google.com>
Date: Tue, 2 Sep 2008 15:55:52 +0200
Subject: IPVS: Add function to determine if IPv6 address is local

Add __ip_vs_addr_is_local_v6() to find out if an IPv6 address belongs to a
local interface. Use this function to decide whether to set the
IP_VS_CONN_F_LOCALNODE flag for IPv6 destinations.

Signed-off-by: Vince Busam <vbusam@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 56 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 25d9e98e31f..640203a153c 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -35,6 +35,10 @@
 
 #include <net/net_namespace.h>
 #include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
 #include <net/route.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
@@ -91,6 +95,26 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+	struct rt6_info *rt;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *addr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+			return 1;
+
+	return 0;
+}
+#endif
 /*
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
@@ -751,10 +775,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 
 	/* check if local node and update the flags */
-	if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
-		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-			| IP_VS_CONN_F_LOCALNODE;
-	}
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
+	} else
+#endif
+		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
 
 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
@@ -803,9 +835,19 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
 	EnterFunction(2);
 
-	atype = inet_addr_type(&init_net, udest->addr.ip);
-	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
-		return -EINVAL;
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		atype = ipv6_addr_type(&udest->addr.in6);
+		if (!(atype & IPV6_ADDR_UNICAST) &&
+			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			return -EINVAL;
+	} else
+#endif
+	{
+		atype = inet_addr_type(&init_net, udest->addr.ip);
+		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+			return -EINVAL;
+	}
 
 	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 	if (dest == NULL) {
-- 
cgit v1.2.3-70-g09d2


From cfc78c5a09241a3a9561466834996a7fb90c4228 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:53 +0200
Subject: IPVS: Adjust various debug outputs to use new macros

Adjust various debug outputs to use the new *_BUF macro variants for
correct output of v4/v6 addresses.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             | 53 +++++++++++++++++++++++---------------
 net/ipv4/ipvs/ip_vs_conn.c      | 57 ++++++++++++++++++++++-------------------
 net/ipv4/ipvs/ip_vs_ctl.c       | 24 +++++++++--------
 net/ipv4/ipvs/ip_vs_proto_tcp.c | 45 ++++++++++++++++++--------------
 net/ipv4/ipvs/ip_vs_proto_udp.c | 15 ++++++-----
 5 files changed, 111 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a719c0ef99e..1b13cef4b54 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -680,24 +680,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp)
 {
 	struct ip_vs_conn *ctl_cp = cp->control;
 	if (!ctl_cp) {
-		IP_VS_ERR("request control DEL for uncontrolled: "
-			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-			  NIPQUAD(cp->caddr),ntohs(cp->cport),
-			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		IP_VS_ERR_BUF("request control DEL for uncontrolled: "
+			      "%s:%d to %s:%d\n",
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+			      ntohs(cp->vport));
+
 		return;
 	}
 
-	IP_VS_DBG(7, "DELeting control for: "
-		  "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
-		  NIPQUAD(cp->caddr),ntohs(cp->cport),
-		  NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+	IP_VS_DBG_BUF(7, "DELeting control for: "
+		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+		      ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+		      ntohs(ctl_cp->cport));
 
 	cp->control = NULL;
 	if (atomic_read(&ctl_cp->n_control) == 0) {
-		IP_VS_ERR("BUG control DEL with n=0 : "
-			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-			  NIPQUAD(cp->caddr),ntohs(cp->cport),
-			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		IP_VS_ERR_BUF("BUG control DEL with n=0 : "
+			      "%s:%d to %s:%d\n",
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+			      ntohs(cp->vport));
+
 		return;
 	}
 	atomic_dec(&ctl_cp->n_control);
@@ -707,17 +715,22 @@ static inline void
 ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 {
 	if (cp->control) {
-		IP_VS_ERR("request control ADD for already controlled: "
-			  "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
-			  NIPQUAD(cp->caddr),ntohs(cp->cport),
-			  NIPQUAD(cp->vaddr),ntohs(cp->vport));
+		IP_VS_ERR_BUF("request control ADD for already controlled: "
+			      "%s:%d to %s:%d\n",
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+			      ntohs(cp->vport));
+
 		ip_vs_control_del(cp);
 	}
 
-	IP_VS_DBG(7, "ADDing control for: "
-		  "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
-		  NIPQUAD(cp->caddr),ntohs(cp->cport),
-		  NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+	IP_VS_DBG_BUF(7, "ADDing control for: "
+		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+		      ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+		      ntohs(ctl_cp->cport));
 
 	cp->control = ctl_cp;
 	atomic_inc(&ctl_cp->n_control);
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index e7603d749c0..9a24332fbed 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -449,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		cp->flags |= atomic_read(&dest->conn_flags);
 	cp->dest = dest;
 
-	IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-		  "dest->refcnt:%d\n",
-		  ip_vs_proto_name(cp->protocol),
-		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
-		  ip_vs_fwd_tag(cp), cp->state,
-		  cp->flags, atomic_read(&cp->refcnt),
-		  atomic_read(&dest->refcnt));
+	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
 	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -512,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 	if (!dest)
 		return;
 
-	IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-		  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-		  "dest->refcnt:%d\n",
-		  ip_vs_proto_name(cp->protocol),
-		  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-		  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
-		  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
-		  ip_vs_fwd_tag(cp), cp->state,
-		  cp->flags, atomic_read(&cp->refcnt),
-		  atomic_read(&dest->refcnt));
+	IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
 	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -574,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
 	    (sysctl_ip_vs_expire_quiescent_template &&
 	     (atomic_read(&dest->weight) == 0))) {
-		IP_VS_DBG(9, "check_template: dest not available for "
-			  "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-			  "-> d:%u.%u.%u.%u:%d\n",
-			  ip_vs_proto_name(ct->protocol),
-			  NIPQUAD(ct->caddr.ip), ntohs(ct->cport),
-			  NIPQUAD(ct->vaddr.ip), ntohs(ct->vport),
-			  NIPQUAD(ct->daddr.ip), ntohs(ct->dport));
+		IP_VS_DBG_BUF(9, "check_template: dest not available for "
+			      "protocol %s s:%s:%d v:%s:%d "
+			      "-> d:%s:%d\n",
+			      ip_vs_proto_name(ct->protocol),
+			      IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+			      ntohs(ct->cport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+			      ntohs(ct->vport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+			      ntohs(ct->dport));
 
 		/*
 		 * Invalidate the connection template
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 640203a153c..6dbc527285f 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -924,13 +924,14 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
 
 	if (dest != NULL) {
-		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-			  "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
-			  NIPQUAD(daddr), ntohs(dport),
-			  atomic_read(&dest->refcnt),
-			  dest->vfwmark,
-			  NIPQUAD(dest->vaddr.ip),
-			  ntohs(dest->vport));
+		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+			      "dest->refcnt=%d, service %u/%s:%u\n",
+			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+			      atomic_read(&dest->refcnt),
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+			      ntohs(dest->vport));
+
 		__ip_vs_update_dest(svc, dest, udest);
 
 		/*
@@ -1076,10 +1077,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		atomic_dec(&dest->svc->refcnt);
 		kfree(dest);
 	} else {
-		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
-			  "dest->refcnt=%d\n",
-			  NIPQUAD(dest->addr.ip), ntohs(dest->port),
-			  atomic_read(&dest->refcnt));
+		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+			      "dest->refcnt=%d\n",
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
 		list_add(&dest->n_list, &ip_vs_dest_trash);
 		atomic_inc(&dest->refcnt);
 	}
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 3da2bb05ee7..de8ed73997c 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -490,19 +490,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 	if (new_state != cp->state) {
 		struct ip_vs_dest *dest = cp->dest;
 
-		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-			  "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
-			  pp->name,
-			  (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
-			  th->syn? 'S' : '.',
-			  th->fin? 'F' : '.',
-			  th->ack? 'A' : '.',
-			  th->rst? 'R' : '.',
-			  NIPQUAD(cp->daddr.ip), ntohs(cp->dport),
-			  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-			  tcp_state_name(cp->state),
-			  tcp_state_name(new_state),
-			  atomic_read(&cp->refcnt));
+		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+			      "%s:%d state: %s->%s conn->refcnt:%d\n",
+			      pp->name,
+			      ((state_off == TCP_DIR_OUTPUT) ?
+			       "output " : "input "),
+			      th->syn ? 'S' : '.',
+			      th->fin ? 'F' : '.',
+			      th->ack ? 'A' : '.',
+			      th->rst ? 'R' : '.',
+			      IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+			      ntohs(cp->dport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      tcp_state_name(cp->state),
+			      tcp_state_name(new_state),
+			      atomic_read(&cp->refcnt));
+
 		if (dest) {
 			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
 			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
@@ -623,12 +627,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 				break;
 			spin_unlock(&tcp_app_lock);
 
-			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-				  "%u.%u.%u.%u:%u to app %s on port %u\n",
-				  __func__,
-				  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-				  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
-				  inc->name, ntohs(inc->port));
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index fd8bd934cc0..5f2073e41cf 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -408,12 +408,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 				break;
 			spin_unlock(&udp_app_lock);
 
-			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-				  "%u.%u.%u.%u:%u to app %s on port %u\n",
-				  __func__,
-				  NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
-				  NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
-				  inc->name, ntohs(inc->port));
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-- 
cgit v1.2.3-70-g09d2


From 473b23d37b697c66ac0bfcfdcc9badf718e25d2a Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:54 +0200
Subject: IPVS: Activate IPv6 Netfilter hooks

Register the previously defined or adapted netfilter hook functions for
IPv6 as PF_INET6 hooks.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 5a7a81778b0..7d3de9db5ac 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1413,6 +1413,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_POST_ROUTING,
 		.priority       = NF_IP_PRI_NAT_SRC-1,
 	},
+#ifdef CONFIG_IP_VS_IPV6
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp_v6,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP6_PRI_NAT_SRC-1,
+	},
+#endif
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From f94fd041402e4e70d2b4ed00008b9bb857e6ae87 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Tue, 2 Sep 2008 15:55:55 +0200
Subject: IPVS: Allow adding IPv6 services from userspace

Allow adding IPv6 services through the genetlink interface and add checks
to see if the chosen scheduler is supported with IPv6 and whether the
supplied prefix length is sane. Make sure the service count exported via
the sockopt interface only counts IPv4 services.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 53 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 6dbc527285f..7f89c588e58 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1177,6 +1177,19 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		goto out_mod_dec;
 	}
 
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = -EAFNOSUPPORT;
+			goto out_err;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+	}
+#endif
+
 	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
 	if (svc == NULL) {
 		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
@@ -1214,7 +1227,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		atomic_inc(&ip_vs_nullsvc_counter);
 
 	ip_vs_new_estimator(&svc->stats);
-	ip_vs_num_services++;
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services++;
 
 	/* Hash the service into the service table */
 	write_lock_bh(&__ip_vs_svc_lock);
@@ -1265,6 +1281,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	}
 	old_sched = sched;
 
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = EAFNOSUPPORT;
+			goto out;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = EINVAL;
+			goto out;
+		}
+	}
+#endif
+
 	write_lock_bh(&__ip_vs_svc_lock);
 
 	/*
@@ -1329,7 +1358,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
 
-	ip_vs_num_services--;
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services--;
+
 	ip_vs_kill_estimator(&svc->stats);
 
 	/* Unbind scheduler */
@@ -2212,6 +2244,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
 			if (count >= get->num_services)
 				goto out;
 			memset(&entry, 0, sizeof(entry));
@@ -2227,6 +2263,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
 			if (count >= get->num_services)
 				goto out;
 			memset(&entry, 0, sizeof(entry));
@@ -2584,7 +2624,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	if (!nl_service)
 		return -EMSGSIZE;
 
-	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
 
 	if (svc->fwmark) {
 		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
@@ -2691,8 +2731,11 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 		return -EINVAL;
 
 	usvc->af = nla_get_u16(nla_af);
-	/* For now, only support IPv4 */
-	if (nla_get_u16(nla_af) != AF_INET)
+#ifdef CONFIG_IP_VS_IPV6
+	if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+	if (usvc->af != AF_INET)
+#endif
 		return -EAFNOSUPPORT;
 
 	if (nla_fwmark) {
-- 
cgit v1.2.3-70-g09d2


From 4856c84c1358b79852743ac64e50c1e9d5118f05 Mon Sep 17 00:00:00 2001
From: Malcolm Turnbull <malcolm@loadbalancer.org>
Date: Fri, 5 Sep 2008 11:17:13 +1000
Subject: ipvs: load balance IPv4 connections from a local process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows IPVS to load balance connections made by a local process.
For example a proxy server running locally.

External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer

Signed-off-by: Siim Põder <siim@p6drad-teel.net>
Signed-off-by: Malcolm Turnbull <malcolm@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c      | 224 +++++++++++++++++++++++-----------------
 net/ipv4/ipvs/ip_vs_proto_tcp.c |   4 +-
 2 files changed, 134 insertions(+), 94 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 7d3de9db5ac..26e3d99bbee 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 }
 #endif
 
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
+				struct iphdr *cih, struct ip_vs_conn *cp,
+				struct ip_vs_protocol *pp,
+				unsigned int offset, unsigned int ihl)
+{
+	unsigned int verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the "
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1,
+			  "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	if (!skb_make_writable(skb, offset))
+		goto out;
+
+	ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
 /*
  *	Handle ICMP messages in the inside-to-outside direction (outgoing).
- *	Find any that might be relevant, check against existing connections,
- *	forward to the right destination host if relevant.
+ *	Find any that might be relevant, check against existing connections.
  *	Currently handles error types - unreachable, quench, ttl exceeded.
- *	(Only used in VS/NAT)
  */
 static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
-	unsigned int offset, ihl, verdict;
+	unsigned int offset, ihl;
 
 	*related = 1;
 
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	if (!cp)
 		return NF_ACCEPT;
 
-	verdict = NF_DROP;
-
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		IP_VS_ERR("shouldn't reach here, because the box is on the "
-			  "half connection in the tun/dr module.\n");
-	}
-
-	/* Ensure the checksum is correct */
-	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-		/* Failed checksum! */
-		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-			  NIPQUAD(iph->saddr));
-		goto out;
-	}
-
-	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-		offset += 2 * sizeof(__u16);
-	if (!skb_make_writable(skb, offset))
-		goto out;
-
-	ip_vs_nat_icmp(skb, pp, cp, 1);
-
-	/* do the statistics and put it back */
-	ip_vs_out_stats(cp, skb);
-
-	skb->ipvs_property = 1;
-	verdict = NF_ACCEPT;
-
-  out:
-	__ip_vs_conn_put(cp);
-
-	return verdict;
+	return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 	return th->rst;
 }
 
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		struct ip_vs_conn *cp, int ihl)
+{
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+	if (!skb_make_writable(skb, ihl))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+		goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (ip6_route_me_harder(skb) != 0)
+			goto drop;
+	} else
+#endif
+		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			goto drop;
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+		goto drop;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_conn_put(cp);
+
+	skb->ipvs_property = 1;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
 /*
  *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *	Check if outgoing packet belongs to the established ip_vs_conn,
- *      rewrite addresses of the packet and send it on its way...
+ *	Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
-	if (!skb_make_writable(skb, iph.len))
-		goto drop;
-
-	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-		goto drop;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
-	else
-#endif
-	{
-		ip_hdr(skb)->saddr = cp->vaddr.ip;
-		ip_send_check(ip_hdr(skb));
-	}
-
-	/* For policy routing, packets originating from this
-	 * machine itself may be routed differently to packets
-	 * passing through.  We want this packet to be routed as
-	 * if it came from this machine itself.  So re-compute
-	 * the routing information.
-	 */
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ip6_route_me_harder(skb) != 0)
-			goto drop;
-	} else
-#endif
-		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto drop;
-
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
-	ip_vs_out_stats(cp, skb);
-	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-	ip_vs_conn_put(cp);
-
-	skb->ipvs_property = 1;
-
-	LeaveFunction(11);
-	return NF_ACCEPT;
-
-  drop:
-	ip_vs_conn_put(cp);
-	kfree_skb(skb);
-	return NF_STOLEN;
+	return handle_response(af, skb, pp, cp, iph.len);
 }
 
 
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
 	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
-	if (!cp)
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+		if (cp)
+			return handle_response_icmp(skb, iph, cih, cp, pp,
+						    offset, ihl);
 		return NF_ACCEPT;
+	}
 
 	verdict = NF_DROP;
 
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	/*
-	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
-	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 *	Big tappo: only PACKET_HOST, including loopback for local client
+	 *	Don't handle local packets on IPv6 for now
 	 */
-	if (unlikely(skb->pkt_type != PACKET_HOST
-		     || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
+	if (unlikely(skb->pkt_type != PACKET_HOST ||
+		     (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
+					 skb->sk)))) {
 		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
 			      skb->pkt_type,
 			      iph.protocol,
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(!cp)) {
 		int v;
 
+		/* For local client packets, it could be a response */
+		cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+		if (cp)
+			return handle_response(af, skb, pp, cp, iph.len);
+
 		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
 			return v;
 	}
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index de8ed73997c..808e8be0280 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb,
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
-	if (!cp->app) {
+	if (!cp->app && (tcph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	/*
 	 *	Adjust TCP checksums
 	 */
-	if (!cp->app) {
+	if (!cp->app && (tcph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
-- 
cgit v1.2.3-70-g09d2


From f2428ed5e7bc89c7716ead22748cb5d076e204f0 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 5 Sep 2008 11:17:14 +1000
Subject: ipvs: load balance ipv6 connections from a local process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows IPVS to load balance IPv6 connections made by a local process.
For example a proxy server running locally.

External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer

This is an extenstion to the IPv4 work done in this area
by Siim Põder and Malcolm Turnbull.

Cc: Siim Põder <siim@p6drad-teel.net>
Cc: Malcolm Turnbull <malcolm@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 91 +++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 26e3d99bbee..05797a5ce15 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -654,8 +654,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 /* Handle relevant response ICMP messages - forward to the right
  * destination host. Used for NAT and local client.
  */
-static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
-				struct iphdr *cih, struct ip_vs_conn *cp,
+static int handle_response_icmp(int af, struct sk_buff *skb,
+				union nf_inet_addr *snet,
+				__u8 protocol, struct ip_vs_conn *cp,
 				struct ip_vs_protocol *pp,
 				unsigned int offset, unsigned int ihl)
 {
@@ -669,18 +670,22 @@ static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
-		IP_VS_DBG(1,
-			  "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-			  NIPQUAD(iph->saddr));
+		IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+			      IP_VS_DBG_ADDR(af, snet));
 		goto out;
 	}
 
-	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
 		offset += 2 * sizeof(__u16);
 	if (!skb_make_writable(skb, offset))
 		goto out;
 
-	ip_vs_nat_icmp(skb, pp, cp, 1);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+	else
+#endif
+		ip_vs_nat_icmp(skb, pp, cp, 1);
 
 	/* do the statistics and put it back */
 	ip_vs_out_stats(cp, skb);
@@ -708,6 +713,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	unsigned int offset, ihl;
+	union nf_inet_addr snet;
 
 	*related = 1;
 
@@ -766,7 +772,9 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 	if (!cp)
 		return NF_ACCEPT;
 
-	return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
+	snet.ip = iph->saddr;
+	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+				    pp, offset, ihl);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -779,7 +787,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
-	unsigned int offset, verdict;
+	unsigned int offset;
+	union nf_inet_addr snet;
 
 	*related = 1;
 
@@ -838,40 +847,9 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 	if (!cp)
 		return NF_ACCEPT;
 
-	verdict = NF_DROP;
-
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		IP_VS_ERR("shouldn't reach here, because the box is on the "
-			  "half connection in the tun/dr module.\n");
-	}
-
-	/* Ensure the checksum is correct */
-	if (!skb_csum_unnecessary(skb)
-	    && ip_vs_checksum_complete(skb, sizeof(struct ipv6hdr))) {
-		/* Failed checksum! */
-		IP_VS_DBG(1, "Forward ICMPv6: failed checksum from "
-			  NIP6_FMT "!\n",
-			  NIP6(iph->saddr));
-		goto out;
-	}
-
-	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
-		offset += 2 * sizeof(__u16);
-	if (!skb_make_writable(skb, offset))
-		goto out;
-
-	ip_vs_nat_icmp_v6(skb, pp, cp, 1);
-
-	/* do the statistics and put it back */
-	ip_vs_out_stats(cp, skb);
-
-	skb->ipvs_property = 1;
-	verdict = NF_ACCEPT;
-
-out:
-	__ip_vs_conn_put(cp);
-
-	return verdict;
+	snet.in6 = iph->saddr;
+	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+				    pp, offset, sizeof(struct ipv6hdr));
 }
 #endif
 
@@ -1055,7 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 							  ICMP_DEST_UNREACH,
 							  ICMP_PORT_UNREACH, 0);
 					return NF_DROP;
-				}
+			}
 			}
 		}
 		IP_VS_DBG_PKT(12, pp, skb, 0,
@@ -1083,6 +1061,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	unsigned int offset, ihl, verdict;
+	union nf_inet_addr snet;
 
 	*related = 1;
 
@@ -1142,9 +1121,12 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (!cp) {
 		/* The packet could also belong to a local client */
 		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
-		if (cp)
-			return handle_response_icmp(skb, iph, cih, cp, pp,
+		if (cp) {
+			snet.ip = iph->saddr;
+			return handle_response_icmp(AF_INET, skb, &snet,
+						    cih->protocol, cp, pp,
 						    offset, ihl);
+		}
 		return NF_ACCEPT;
 	}
 
@@ -1183,6 +1165,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	unsigned int offset, verdict;
+	union nf_inet_addr snet;
 
 	*related = 1;
 
@@ -1240,8 +1223,18 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
 	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
-	if (!cp)
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+		if (cp) {
+			snet.in6 = iph->saddr;
+			return handle_response_icmp(AF_INET6, skb, &snet,
+						    cih->nexthdr,
+						    cp, pp, offset,
+						    sizeof(struct ipv6hdr));
+		}
 		return NF_ACCEPT;
+	}
 
 	verdict = NF_DROP;
 
@@ -1281,9 +1274,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 *	Big tappo: only PACKET_HOST, including loopback for local client
 	 *	Don't handle local packets on IPv6 for now
 	 */
-	if (unlikely(skb->pkt_type != PACKET_HOST ||
-		     (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
-					 skb->sk)))) {
+	if (unlikely(skb->pkt_type != PACKET_HOST)) {
 		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
 			      skb->pkt_type,
 			      iph.protocol,
-- 
cgit v1.2.3-70-g09d2


From bf7394ccc13fe291d9258f01113b4c61214ddeae Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 5 Sep 2008 12:38:09 -0700
Subject: Revert "mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM"

This reverts commit 087d833e5a9f67ba933cb32eaf5a2279c1a5b47c, which was
reported to break wireless at least in some combinations with 32bit user
space and a 64bit kernel.  Alex Williamnson bisected it to this commit.

Reported-and-bisected-by: Alex Williamson <alex.williamson@hp.com>
Acked-by: John W. Linville <linville@tuxdriver.com>
Cc: David Miller <davem@davemloft.net>
Cc: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/mac80211/mlme.c | 48 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9bb68c6a8f4..902cac1bd24 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -478,21 +478,51 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 static void ieee80211_sta_send_associnfo(struct net_device *dev,
 					 struct ieee80211_if_sta *ifsta)
 {
+	char *buf;
+	size_t len;
+	int i;
 	union iwreq_data wrqu;
 
+	if (!ifsta->assocreq_ies && !ifsta->assocresp_ies)
+		return;
+
+	buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len +
+				ifsta->assocresp_ies_len), GFP_KERNEL);
+	if (!buf)
+		return;
+
+	len = sprintf(buf, "ASSOCINFO(");
 	if (ifsta->assocreq_ies) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = ifsta->assocreq_ies_len;
-		wireless_send_event(dev, IWEVASSOCREQIE, &wrqu,
-				    ifsta->assocreq_ies);
+		len += sprintf(buf + len, "ReqIEs=");
+		for (i = 0; i < ifsta->assocreq_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocreq_ies[i]);
+		}
 	}
-
 	if (ifsta->assocresp_ies) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = ifsta->assocresp_ies_len;
-		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu,
-				    ifsta->assocresp_ies);
+		if (ifsta->assocreq_ies)
+			len += sprintf(buf + len, " ");
+		len += sprintf(buf + len, "RespIEs=");
+		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocresp_ies[i]);
+		}
+	}
+	len += sprintf(buf + len, ")");
+
+	if (len > IW_CUSTOM_MAX) {
+		len = sprintf(buf, "ASSOCRESPIE=");
+		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocresp_ies[i]);
+		}
 	}
+
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.data.length = len;
+	wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+
+	kfree(buf);
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From f59ac0481660e66cec67f1d6b024e78b9dc715fe Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 29 Aug 2008 16:26:43 -0700
Subject: cfg80211: keep track of supported interface modes

It is obviously good for userspace to know up front which
interface modes a given piece of hardware might support (even
if adding such an interface might fail later because of
concurrency issues), so let's make cfg80211 aware of that.
For good measure, disallow adding interfaces in all other
modes so drivers don't forget to announce support for one mode
when they add it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Stephen Blackheath <tramp.enshrine.stephen@blacksapphire.com>
Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  1 +
 drivers/net/wireless/ath5k/base.c           |  6 ++++++
 drivers/net/wireless/ath9k/main.c           |  5 +++++
 drivers/net/wireless/b43/main.c             |  7 +++++++
 drivers/net/wireless/b43legacy/main.c       |  5 +++++
 drivers/net/wireless/iwlwifi/iwl-core.c     |  4 ++++
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  5 +++++
 drivers/net/wireless/mac80211_hwsim.c       |  3 +++
 drivers/net/wireless/p54/p54common.c        |  3 +++
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  5 +++++
 drivers/net/wireless/rtl8187_dev.c          |  2 ++
 drivers/net/wireless/zd1211rw/zd_mac.c      |  5 +++++
 include/linux/nl80211.h                     |  6 ++++++
 include/net/wireless.h                      |  3 +++
 net/mac80211/main.c                         |  7 +++++++
 net/wireless/core.c                         |  9 ++++++++-
 net/wireless/nl80211.c                      | 22 ++++++++++++++++++++--
 17 files changed, 95 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 3333d4596b8..c6a55cd12db 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1884,6 +1884,7 @@ static int __devinit adm8211_probe(struct pci_dev *pdev,
 	dev->extra_tx_headroom = sizeof(struct adm8211_tx_hdr);
 	/* dev->flags = IEEE80211_HW_RX_INCLUDES_FCS in promisc mode */
 	dev->flags = IEEE80211_HW_SIGNAL_UNSPEC;
+	dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
 
 	dev->channel_change_time = 1000;
 	dev->max_signal = 100;    /* FIXME: find better value */
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 7989ab5c2bb..85260c39aa2 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -485,6 +485,12 @@ ath5k_pci_probe(struct pci_dev *pdev,
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
 		    IEEE80211_HW_SIGNAL_DBM |
 		    IEEE80211_HW_NOISE_DBM;
+
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_ADHOC) |
+		BIT(NL80211_IFTYPE_MESH_POINT);
+
 	hw->extra_tx_headroom = 2;
 	hw->channel_change_time = 5000;
 	sc = hw->priv;
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index dc45eef3289..39a4a70d013 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1482,6 +1482,11 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		IEEE80211_HW_SIGNAL_DBM |
 		IEEE80211_HW_NOISE_DBM;
 
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_AP) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	SET_IEEE80211_DEV(hw, &pdev->dev);
 	pci_set_drvdata(pdev, hw);
 
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 63bafc2f3f0..2d915c1a82a 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4569,6 +4569,13 @@ static int b43_wireless_init(struct ssb_device *dev)
 		    IEEE80211_HW_SIGNAL_DBM |
 		    IEEE80211_HW_NOISE_DBM;
 
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_AP) |
+		BIT(NL80211_IFTYPE_MESH_POINT) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_WDS) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	hw->queues = b43_modparam_qos ? 4 : 1;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 1cb77db5c29..68f63f5093a 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3704,6 +3704,11 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
 		    IEEE80211_HW_SIGNAL_DBM |
 		    IEEE80211_HW_NOISE_DBM;
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_AP) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_WDS) |
+		BIT(NL80211_IFTYPE_ADHOC);
 	hw->queues = 1; /* FIXME: hardware has more queues */
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index fbf75a62958..0a511ef8e35 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -819,6 +819,10 @@ int iwl_setup_mac(struct iwl_priv *priv)
 	/* Tell mac80211 our characteristics */
 	hw->flags = IEEE80211_HW_SIGNAL_DBM |
 		    IEEE80211_HW_NOISE_DBM;
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_AP) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_ADHOC);
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
 	/* queues to support 11n aggregation */
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index a622fc33590..cee3045f160 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -7888,6 +7888,11 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 	hw->flags = IEEE80211_HW_SIGNAL_DBM |
 		    IEEE80211_HW_NOISE_DBM;
 
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_AP) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	/* 4 EDCA QOS priorities */
 	hw->queues = 4;
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 732429d4912..6ba50f087f7 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -447,6 +447,9 @@ static int __init init_mac80211_hwsim(void)
 
 		hw->channel_change_time = 1;
 		hw->queues = 4;
+		hw->wiphy->interface_modes =
+			BIT(NL80211_IFTYPE_STATION) |
+			BIT(NL80211_IFTYPE_AP);
 		hw->ampdu_queues = 1;
 
 		memcpy(data->channels, hwsim_channels, sizeof(hwsim_channels));
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 17e06bbc996..6da98e6e6a9 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -1072,6 +1072,9 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len)
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | /* not sure */
 		     IEEE80211_HW_RX_INCLUDES_FCS |
 		     IEEE80211_HW_SIGNAL_UNSPEC;
+
+	dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+
 	dev->channel_change_time = 1000;	/* TODO: find actual value */
 	dev->max_signal = 127;
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 369b0b2d864..2f3bfc60688 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1052,6 +1052,11 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
 	 */
 	rt2x00dev->hw->vif_data_size = sizeof(struct rt2x00_intf);
 
+	rt2x00dev->hw->wiphy->interface_modes =
+	    BIT(NL80211_IFTYPE_AP) |
+	    BIT(NL80211_IFTYPE_STATION) |
+	    BIT(NL80211_IFTYPE_ADHOC);
+
 	/*
 	 * Let the driver probe the device to detect the capabilities.
 	 */
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index 060a2650535..8a42bfa6d4f 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -1184,6 +1184,8 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
 		dev->max_signal = 65;
 	}
 
+	dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+
 	if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b)
 		printk(KERN_INFO "rtl8187: inconsistency between id with OEM"
 		       " info!\n");
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 4d7b98b0503..e019102b228 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -937,6 +937,11 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
 		    IEEE80211_HW_SIGNAL_DB;
 
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_MESH_POINT) |
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	hw->max_signal = 100;
 	hw->queues = 1;
 	hw->extra_tx_headroom = sizeof(struct zd_ctrlset);
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0c1147de3ec..5e51f4e7600 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -210,6 +210,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from
  *	association request when used with NL80211_CMD_NEW_STATION)
  *
+ * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all
+ *	supported interface types, each a flag attribute with the number
+ *	of the interface mode.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -259,6 +263,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_HT_CAPABILITY,
 
+	NL80211_ATTR_SUPPORTED_IFTYPES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 9324f8dd183..1dc8ec3daa2 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -185,6 +185,9 @@ struct wiphy {
 	/* permanent MAC address */
 	u8 perm_addr[ETH_ALEN];
 
+	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
+	u16 interface_modes;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 638b75f36e2..396cfb2d0f4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1675,6 +1675,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
+	/* if low-level driver supports AP, we also support VLAN */
+	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
+		local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
+
+	/* mac80211 always supports monitor */
+	local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		return result;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f1da0b93bc5..7e995ac06a0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1,7 +1,7 @@
 /*
  * This is the linux wireless configuration interface.
  *
- * Copyright 2006, 2007		Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2008		Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -259,6 +259,13 @@ int wiphy_register(struct wiphy *wiphy)
 	struct ieee80211_supported_band *sband;
 	bool have_band = false;
 	int i;
+	u16 ifmodes = wiphy->interface_modes;
+
+	/* sanity check ifmodes */
+	WARN_ON(!ifmodes);
+	ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
+	if (WARN_ON(ifmodes != wiphy->interface_modes))
+		wiphy->interface_modes = ifmodes;
 
 	/* sanity check supported bands/channels */
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4d6c02afd6f..77880ba8b61 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -113,10 +113,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	struct nlattr *nl_bands, *nl_band;
 	struct nlattr *nl_freqs, *nl_freq;
 	struct nlattr *nl_rates, *nl_rate;
+	struct nlattr *nl_modes;
 	enum ieee80211_band band;
 	struct ieee80211_channel *chan;
 	struct ieee80211_rate *rate;
 	int i;
+	u16 ifmodes = dev->wiphy.interface_modes;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
 	if (!hdr)
@@ -125,6 +127,20 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx);
 	NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
 
+	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
+	if (!nl_modes)
+		goto nla_put_failure;
+
+	i = 0;
+	while (ifmodes) {
+		if (ifmodes & 1)
+			NLA_PUT_FLAG(msg, i);
+		ifmodes >>= 1;
+		i++;
+	}
+
+	nla_nest_end(msg, nl_modes);
+
 	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
 	if (!nl_bands)
 		goto nla_put_failure;
@@ -415,7 +431,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	ifindex = dev->ifindex;
 	dev_put(dev);
 
-	if (!drv->ops->change_virtual_intf) {
+	if (!drv->ops->change_virtual_intf ||
+	    !(drv->wiphy.interface_modes & (1 << type))) {
 		err = -EOPNOTSUPP;
 		goto unlock;
 	}
@@ -462,7 +479,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(drv))
 		return PTR_ERR(drv);
 
-	if (!drv->ops->add_virtual_intf) {
+	if (!drv->ops->add_virtual_intf ||
+	    !(drv->wiphy.interface_modes & (1 << type))) {
 		err = -EOPNOTSUPP;
 		goto unlock;
 	}
-- 
cgit v1.2.3-70-g09d2


From cd9fe6c4f0afe334862c871bf5d32770daa748ec Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Fri, 5 Sep 2008 13:46:00 +0200
Subject: ipvs: Use pointer to address from sync message

We want a pointer to it, not the value casted to a pointer.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 40647edf102..28237a5f62e 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -397,11 +397,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 			cp = ip_vs_conn_new(AF_INET, s->protocol,
-					    (union nf_inet_addr *)s->caddr,
+					    (union nf_inet_addr *)&s->caddr,
 					    s->cport,
-					    (union nf_inet_addr *)s->vaddr,
+					    (union nf_inet_addr *)&s->vaddr,
 					    s->vport,
-					    (union nf_inet_addr *)s->daddr,
+					    (union nf_inet_addr *)&s->daddr,
 					    s->dport,
 					    flags, dest);
 			if (dest)
-- 
cgit v1.2.3-70-g09d2


From a5ba4bf2732c85d8c95e0432966f79aa2b159478 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Fri, 5 Sep 2008 13:47:37 +0200
Subject: ipvs: Return negative error values from ip_vs_edit_service()

Like the other code in this function does.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 7f89c588e58..d2dc05a843f 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1284,11 +1284,11 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 #ifdef CONFIG_IP_VS_IPV6
 	if (u->af == AF_INET6) {
 		if (!sched->supports_ipv6) {
-			ret = EAFNOSUPPORT;
+			ret = -EAFNOSUPPORT;
 			goto out;
 		}
 		if ((u->netmask < 1) || (u->netmask > 128)) {
-			ret = EINVAL;
+			ret = -EINVAL;
 			goto out;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 77eb851630bba8ea9962a1b2f01b23bd5d57c58e Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Fri, 5 Sep 2008 14:43:00 +0200
Subject: ipvs: Mark tcp/udp v4 and v6 debug functions static

They are only used in this file, so they should be static

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_proto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 50f6215beda..b06da1c3445 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -151,7 +151,7 @@ const char * ip_vs_state_name(__u16 proto, int state)
 }
 
 
-void
+static void
 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
 			     int offset,
@@ -190,7 +190,7 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 }
 
 #ifdef CONFIG_IP_VS_IPV6
-void
+static void
 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
 			     int offset,
-- 
cgit v1.2.3-70-g09d2


From 3bfb92f4073aa829f8e67e459d54c79306ddbd73 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Fri, 5 Sep 2008 16:53:49 +0200
Subject: ipvs: Reject ipv6 link-local addresses for destinations

We can't use non-local link-local addresses for destinations, without
knowing the interface on which we can reach the address. Reject them for
now.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index d2dc05a843f..e53efe41f01 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -838,7 +838,8 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6) {
 		atype = ipv6_addr_type(&udest->addr.in6);
-		if (!(atype & IPV6_ADDR_UNICAST) &&
+		if ((!(atype & IPV6_ADDR_UNICAST) ||
+			atype & IPV6_ADDR_LINKLOCAL) &&
 			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
 			return -EINVAL;
 	} else
-- 
cgit v1.2.3-70-g09d2


From 5af149cc34143c4e24abcc6355b29b3161eff3b8 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 8 Sep 2008 09:34:45 +1000
Subject: IPVS: fix bogus indentation

Sorry, this was my error.
Thanks to Julius Volz for pointing it out.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julius Volz <juliusv@google.com>
---
 net/ipv4/ipvs/ip_vs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 05797a5ce15..1f4f3b94359 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1033,7 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 							  ICMP_DEST_UNREACH,
 							  ICMP_PORT_UNREACH, 0);
 					return NF_DROP;
-			}
+				}
 			}
 		}
 		IP_VS_DBG_PKT(12, pp, skb, 0,
-- 
cgit v1.2.3-70-g09d2


From 178f5e494e3c0252d06a9b1473016addff71e01e Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 8 Sep 2008 09:34:46 +1000
Subject: IPVS: use ipv6_addr_copy()

It is standard to use ipv6_addr_copy() to fill in
the in6 element of a union nf_inet_addr snet.

Thanks to Julius Volz for pointing this out.

Cc: Brian Haley <brian.haley@hp.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julius Volz <juliusv@google.com>
---
 net/ipv4/ipvs/ip_vs_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1f4f3b94359..f5180ac56be 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -847,7 +847,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 	if (!cp)
 		return NF_ACCEPT;
 
-	snet.in6 = iph->saddr;
+	ipv6_addr_copy(&snet.in6, &iph->saddr);
 	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
 				    pp, offset, sizeof(struct ipv6hdr));
 }
@@ -1227,7 +1227,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 		/* The packet could also belong to a local client */
 		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
 		if (cp) {
-			snet.in6 = iph->saddr;
+			ipv6_addr_copy(&snet.in6, &iph->saddr);
 			return handle_response_icmp(AF_INET6, skb, &snet,
 						    cih->nexthdr,
 						    cp, pp, offset,
-- 
cgit v1.2.3-70-g09d2


From 66bf79182d6531c14c1f9a507b6bbf374a2ae4cd Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 7 Sep 2008 18:19:25 -0700
Subject: netfilter: nf_conntrack_sip: de-static helper pointers

Helper's ->help hook can run concurrently with itself, so iterating over
SIP helpers with static pointer won't work reliably.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_sip.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2f9bbc058b4..1fa306be60f 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1193,7 +1193,6 @@ static const struct sip_handler sip_handlers[] = {
 static int process_sip_response(struct sk_buff *skb,
 				const char **dptr, unsigned int *datalen)
 {
-	static const struct sip_handler *handler;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	unsigned int matchoff, matchlen;
@@ -1214,6 +1213,8 @@ static int process_sip_response(struct sk_buff *skb,
 	dataoff = matchoff + matchlen + 1;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+		const struct sip_handler *handler;
+
 		handler = &sip_handlers[i];
 		if (handler->response == NULL)
 			continue;
@@ -1228,13 +1229,14 @@ static int process_sip_response(struct sk_buff *skb,
 static int process_sip_request(struct sk_buff *skb,
 			       const char **dptr, unsigned int *datalen)
 {
-	static const struct sip_handler *handler;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	unsigned int matchoff, matchlen;
 	unsigned int cseq, i;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+		const struct sip_handler *handler;
+
 		handler = &sip_handlers[i];
 		if (handler->request == NULL)
 			continue;
-- 
cgit v1.2.3-70-g09d2


From 887464a41fde7e9e1e11ca86748338033c502446 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 7 Sep 2008 18:20:08 -0700
Subject: netfilter: nf_conntrack_gre: more locking around keymap list

gre_keymap_list should be protected in all places.
(unless I'm misreading something)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_gre.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 654a4f7f12c..b308bb4c12b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -97,10 +97,14 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	kmp = &help->help.ct_pptp_info.keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
+		read_lock_bh(&nf_ct_gre_lock);
 		list_for_each_entry(km, &gre_keymap_list, list) {
-			if (gre_key_cmpfn(km, t) && km == *kmp)
+			if (gre_key_cmpfn(km, t) && km == *kmp) {
+				read_unlock_bh(&nf_ct_gre_lock);
 				return 0;
+			}
 		}
+		read_unlock_bh(&nf_ct_gre_lock);
 		pr_debug("trying to override keymap_%s for ct %p\n",
 			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
-- 
cgit v1.2.3-70-g09d2


From 51807e91a76a531d059ec7ce3395c435e4df52a8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 7 Sep 2008 18:20:36 -0700
Subject: netfilter: nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet

It does "kfree(list_head)" which looks wrong because entity that was
allocated is definitely not list_head.

However, this all works because list_head is first item in
struct nf_ct_gre_keymap.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_gre.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index b308bb4c12b..9bd03967fea 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -45,12 +45,12 @@ static LIST_HEAD(gre_keymap_list);
 
 void nf_ct_gre_keymap_flush(void)
 {
-	struct list_head *pos, *n;
+	struct nf_ct_gre_keymap *km, *tmp;
 
 	write_lock_bh(&nf_ct_gre_lock);
-	list_for_each_safe(pos, n, &gre_keymap_list) {
-		list_del(pos);
-		kfree(pos);
+	list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+		list_del(&km->list);
+		kfree(km);
 	}
 	write_unlock_bh(&nf_ct_gre_lock);
 }
-- 
cgit v1.2.3-70-g09d2


From e3b802ba885b54f4050164c3cfd9e0ba9c73173a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 7 Sep 2008 18:21:24 -0700
Subject: netfilter: nf_conntrack_irc: make sure string is terminated before
 calling simple_strtoul

Alexey Dobriyan points out:

1. simple_strtoul() silently accepts all characters for given base even
   if result won't fit into unsigned long. This is amazing stupidity in
   itself, but

2. nf_conntrack_irc helper use simple_strtoul() for DCC request parsing.
   Data first copied into 64KB buffer, so theoretically nothing prevents
   reading past the end of it, since data comes from network given 1).

This is not actually a problem currently since we're guaranteed to have
a 0 byte in skb_shared_info or in the buffer the data is copied to, but
to make this more robust, make sure the string is actually terminated.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_irc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1b1226d6653..20633fdf7e6 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -68,11 +68,21 @@ static const char *const dccprotos[] = {
 static int parse_dcc(char *data, const char *data_end, u_int32_t *ip,
 		     u_int16_t *port, char **ad_beg_p, char **ad_end_p)
 {
+	char *tmp;
+
 	/* at least 12: "AAAAAAAA P\1\n" */
 	while (*data++ != ' ')
 		if (data > data_end - 12)
 			return -1;
 
+	/* Make sure we have a newline character within the packet boundaries
+	 * because simple_strtoul parses until the first invalid character. */
+	for (tmp = data; tmp <= data_end; tmp++)
+		if (*tmp == '\n')
+			break;
+	if (tmp > data_end || *tmp != '\n')
+		return -1;
+
 	*ad_beg_p = data;
 	*ip = simple_strtoul(data, &data, 10);
 
-- 
cgit v1.2.3-70-g09d2


From e8a83e10d7dfe5d0841062780769b30f65417e15 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 7 Sep 2008 18:41:21 -0700
Subject: pkt_sched: Fix qdisc state in net_tx_action()

net_tx_action() can skip __QDISC_STATE_SCHED bit clearing while qdisc
is neither ran nor rescheduled, which may cause endless loop in
dev_deactivate().

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Tested-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 60c51f76588..e719ed29310 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1991,8 +1991,13 @@ static void net_tx_action(struct softirq_action *h)
 				spin_unlock(root_lock);
 			} else {
 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
-					      &q->state))
+					      &q->state)) {
 					__netif_reschedule(q);
+				} else {
+					smp_mb__before_clear_bit();
+					clear_bit(__QDISC_STATE_SCHED,
+						  &q->state);
+				}
 			}
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 9818babc8fd9a542978a235f1c1786f948cbac68 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 3 Sep 2008 23:42:19 +0300
Subject: mac80211: Fix low bit rate in IBSS

This patch fixes regression in iwlwifi IBSS rate scaling caused by patch:

    commit 6bc37c06bc424bcf3f944e6a79e2d5bb537e02ed
    Author: Vladimir Koutny <vlado@work.ksp.sk>
    Date:   Fri Jun 13 16:50:44 2008 +0200

        mac80211: eliminate IBSS warning in rate_lowest_index()

An IBSS station is added in prepare_for_handlers where the rate scaling was
initialized only with single rate matching the received packet.
The correct rate scale information should be updated only in
ieee80211_rx_bss_info function where beacon is parsed. Because
of coding error the rate info was left untouched.
If a beacon has triggered the connection the rate remined 1Mbps.
This patch fixes this coding error

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Cc: Vladimir Koutny <vlado@work.ksp.sk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7d53382f1a5..75510a9f3f1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2595,7 +2595,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 							rx_status->band);
 
 		prev_rates = sta->supp_rates[rx_status->band];
-		sta->supp_rates[rx_status->band] &= supp_rates;
+		sta->supp_rates[rx_status->band] = supp_rates;
 		if (sta->supp_rates[rx_status->band] == 0) {
 			/* No matching rates - this should not really happen.
 			 * Make sure that at least one rate is marked
-- 
cgit v1.2.3-70-g09d2


From 8e1535d51bc93fbe9b3ded6555680044bc571d19 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 3 Sep 2008 23:42:20 +0300
Subject: mac80211: Fix rate scale initialization in IBSS

This patch address some IBSS rate issues introduced or not covered
by "mac80211: eliminate IBSS warning in rate_lowest_index()" and
"cfg80211 API for channels/bitrates, mac80211 and driver conversion".

This patch:
1. Moves addition of IBSS station from
prepare_for_handlers to ieee80211_rx_bss_info when triggered from beacon
eliminating bogus supported rates.
2. Initialize properly supported rates also in IBSS merging
3. Ensure that mandatory rates are always added into supported
rates. This is needed in case when station addition is triggered from
non beacon/probe packet. Some management frames need to be sent
4. Remove initialization of supported rates from self rates. This path
was dead code after 6bc37c06bc4 and in general incorrect.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Cc: Vladimir Koutny <vlado@work.ksp.sk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 90 +++++++++++++++++++++++++++++++++++++----------------
 net/mac80211/rx.c   |  4 ---
 2 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75510a9f3f1..c396c3522f7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2557,6 +2557,33 @@ u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 	return supp_rates;
 }
 
+static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local,
+					enum ieee80211_band band)
+{
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_rate *bitrates;
+	u64 mandatory_rates;
+	enum ieee80211_rate_flags mandatory_flag;
+	int i;
+
+	sband = local->hw.wiphy->bands[band];
+	if (!sband) {
+		WARN_ON(1);
+		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	}
+
+	if (band == IEEE80211_BAND_2GHZ)
+		mandatory_flag = IEEE80211_RATE_MANDATORY_B;
+	else
+		mandatory_flag = IEEE80211_RATE_MANDATORY_A;
+
+	bitrates = sband->bitrates;
+	mandatory_rates = 0;
+	for (i = 0; i < sband->n_bitrates; i++)
+		if (bitrates[i].flags & mandatory_flag)
+			mandatory_rates |= BIT(i);
+	return mandatory_rates;
+}
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt,
@@ -2568,9 +2595,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	int freq, clen;
 	struct ieee80211_sta_bss *bss;
 	struct sta_info *sta;
-	u64 beacon_timestamp, rx_timestamp;
 	struct ieee80211_channel *channel;
+	u64 beacon_timestamp, rx_timestamp;
+	u64 supp_rates = 0;
 	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
+	enum ieee80211_band band = rx_status->band;
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
@@ -2578,30 +2607,41 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
 	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
-		u64 rates = ieee80211_sta_get_rates(local, elems,
-						rx_status->band);
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 
-		mesh_neighbour_update(mgmt->sa, rates, sdata,
+		mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
 				      mesh_peer_accepts_plinks(elems));
 	}
 
 	rcu_read_lock();
 
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
-	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
-	    (sta = sta_info_get(local, mgmt->sa))) {
-		u64 prev_rates;
-		u64 supp_rates = ieee80211_sta_get_rates(local, elems,
-							rx_status->band);
-
-		prev_rates = sta->supp_rates[rx_status->band];
-		sta->supp_rates[rx_status->band] = supp_rates;
-		if (sta->supp_rates[rx_status->band] == 0) {
-			/* No matching rates - this should not really happen.
-			 * Make sure that at least one rate is marked
-			 * supported to avoid issues with TX rate ctrl. */
-			sta->supp_rates[rx_status->band] =
-				sdata->u.sta.supp_rates_bits[rx_status->band];
+	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
+
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
+
+		sta = sta_info_get(local, mgmt->sa);
+		if (sta) {
+			u64 prev_rates;
+
+			prev_rates = sta->supp_rates[band];
+			/* make sure mandatory rates are always added */
+			sta->supp_rates[band] = supp_rates |
+				ieee80211_sta_get_mandatory_rates(local, band);
+
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+			if (sta->supp_rates[band] != prev_rates)
+				printk(KERN_DEBUG "%s: updated supp_rates set "
+				    "for %s based on beacon info (0x%llx | "
+				    "0x%llx -> 0x%llx)\n",
+				    sdata->dev->name, print_mac(mac, sta->addr),
+				    (unsigned long long) prev_rates,
+				    (unsigned long long) supp_rates,
+				    (unsigned long long) sta->supp_rates[band]);
+#endif
+		} else {
+			ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid,
+					       mgmt->sa, supp_rates);
 		}
 	}
 
@@ -2683,7 +2723,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		bss->supp_rates_len += clen;
 	}
 
-	bss->band = rx_status->band;
+	bss->band = band;
 
 	bss->timestamp = beacon_timestamp;
 	bss->last_update = jiffies;
@@ -2738,7 +2778,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			 * e.g: at 1 MBit that means mactime is 192 usec earlier
 			 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
 			 */
-			int rate = local->hw.wiphy->bands[rx_status->band]->
+			int rate = local->hw.wiphy->bands[band]->
 					bitrates[rx_status->rate_idx].bitrate;
 			rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
 		} else if (local && local->ops && local->ops->get_tsf)
@@ -2766,7 +2806,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss);
 			ieee80211_ibss_add_sta(sdata, NULL,
 					       mgmt->bssid, mgmt->sa,
-					       BIT(rx_status->rate_idx));
+					       supp_rates);
 		}
 	}
 
@@ -3032,7 +3072,6 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *
 	kfree_skb(skb);
 }
 
-
 static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 					 struct sk_buff *skb)
 {
@@ -4316,10 +4355,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 
 	set_sta_flags(sta, WLAN_STA_AUTHORIZED);
 
-	if (supp_rates)
-		sta->supp_rates[band] = supp_rates;
-	else
-		sta->supp_rates[band] = sdata->u.sta.supp_rates_bits[band];
+	/* make sure mandatory rates are always added */
+	sta->supp_rates[band] = supp_rates |
+			ieee80211_sta_get_mandatory_rates(local, band);
 
 	rate_control_rate_init(sta, local);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fd83ef760a3..7e09b30dd39 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1743,10 +1743,6 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		if (!bssid)
 			return 0;
 		if (ieee80211_is_beacon(hdr->frame_control)) {
-			if (!rx->sta)
-				rx->sta = ieee80211_ibss_add_sta(sdata,
-						rx->skb, bssid, hdr->addr2,
-						BIT(rx->status->rate_idx));
 			return 1;
 		}
 		else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) {
-- 
cgit v1.2.3-70-g09d2


From 701b9cb37bae3d50dbe5f345a7cdbd2648bf7df6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 4 Sep 2008 09:24:53 -0700
Subject: mac80211: add missing kernel-doc

Fix mac80211 kernel-doc missing struct field:

Warning(linux-2.6.27-rc1-git2//net/mac80211/sta_info.h:329): No description found for parameter 'tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 109db787ccb..4a581a5b576 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -204,6 +204,7 @@ struct sta_ampdu_mlme {
  * @tx_fragments: number of transmitted MPDUs
  * @txrate_idx: TBD
  * @last_txrate_idx: TBD
+ * @tid_seq: TBD
  * @wme_tx_queue: TBD
  * @ampdu_mlme: TBD
  * @timer_to_tid: identity mapping to ID timers
-- 
cgit v1.2.3-70-g09d2


From d315492b1a6ba29da0fa2860759505ae1b2db857 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Mon, 8 Sep 2008 13:17:27 -0700
Subject: netns : fix kernel panic in timewait socket destruction

How to reproduce ?
 - create a network namespace
 - use tcp protocol and get timewait socket
 - exit the network namespace
 - after a moment (when the timewait socket is destroyed), the kernel
   panics.

# BUG: unable to handle kernel NULL pointer dereference at
0000000000000007
IP: [<ffffffff821e394d>] inet_twdr_do_twkill_work+0x6e/0xb8
PGD 119985067 PUD 11c5c0067 PMD 0
Oops: 0000 [1] SMP
CPU 1
Modules linked in: ipv6 button battery ac loop dm_mod tg3 libphy ext3 jbd
edd fan thermal processor thermal_sys sg sata_svw libata dock serverworks
sd_mod scsi_mod ide_disk ide_core [last unloaded: freq_table]
Pid: 0, comm: swapper Not tainted 2.6.27-rc2 #3
RIP: 0010:[<ffffffff821e394d>] [<ffffffff821e394d>]
inet_twdr_do_twkill_work+0x6e/0xb8
RSP: 0018:ffff88011ff7fed0 EFLAGS: 00010246
RAX: ffffffffffffffff RBX: ffffffff82339420 RCX: ffff88011ff7ff30
RDX: 0000000000000001 RSI: ffff88011a4d03c0 RDI: ffff88011ac2fc00
RBP: ffffffff823392e0 R08: 0000000000000000 R09: ffff88002802a200
R10: ffff8800a5c4b000 R11: ffffffff823e4080 R12: ffff88011ac2fc00
R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000
FS: 0000000041cbd940(0000) GS:ffff8800bff839c0(0000)
knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000007 CR3: 00000000bd87c000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffff8800bff9e000, task
ffff88011ff76690)
Stack: ffffffff823392e0 0000000000000100 ffffffff821e3a3a
0000000000000008
0000000000000000 ffffffff821e3a61 ffff8800bff7c000 ffffffff8203c7e7
ffff88011ff7ff10 ffff88011ff7ff10 0000000000000021 ffffffff82351108
Call Trace:
<IRQ> [<ffffffff821e3a3a>] ? inet_twdr_hangman+0x0/0x9e
[<ffffffff821e3a61>] ? inet_twdr_hangman+0x27/0x9e
[<ffffffff8203c7e7>] ? run_timer_softirq+0x12c/0x193
[<ffffffff820390d1>] ? __do_softirq+0x5e/0xcd
[<ffffffff8200d08c>] ? call_softirq+0x1c/0x28
[<ffffffff8200e611>] ? do_softirq+0x2c/0x68
[<ffffffff8201a055>] ? smp_apic_timer_interrupt+0x8e/0xa9
[<ffffffff8200cad6>] ? apic_timer_interrupt+0x66/0x70
<EOI> [<ffffffff82011f4c>] ? default_idle+0x27/0x3b
[<ffffffff8200abbd>] ? cpu_idle+0x5f/0x7d


Code: e8 01 00 00 4c 89 e7 41 ff c5 e8 8d fd ff ff 49 8b 44 24 38 4c 89 e7
65 8b 14 25 24 00 00 00 89 d2 48 8b 80 e8 00 00 00 48 f7 d0 <48> 8b 04 d0
48 ff 40 58 e8 fc fc ff ff 48 89 df e8 c0 5f 04 00
RIP [<ffffffff821e394d>] inet_twdr_do_twkill_work+0x6e/0xb8
RSP <ffff88011ff7fed0>
CR2: 0000000000000007

This patch provides a function to purge all timewait sockets related
to a network namespace. The timewait sockets life cycle is not tied with
the network namespace, that means the timewait sockets stay alive while
the network namespace dies. The timewait sockets are for avoiding to
receive a duplicate packet from the network, if the network namespace is
freed, the network stack is removed, so no chance to receive any packets
from the outside world. Furthermore, having a pending destruction timer
on these sockets with a network namespace freed is not safe and will lead
to an oops if the timer callback which try to access data belonging to
the namespace like for example in:
	inet_twdr_do_twkill_work
		-> NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);

Purging the timewait sockets at the network namespace destruction will:
 1) speed up memory freeing for the namespace
 2) fix kernel panic on asynchronous timewait destruction

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h |  3 +++
 net/ipv4/inet_timewait_sock.c    | 35 +++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c              |  1 +
 net/ipv6/tcp_ipv6.c              |  1 +
 4 files changed, 40 insertions(+)

(limited to 'net')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 95c660c9719..91324908fcc 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -208,6 +208,9 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
 extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
 				 struct inet_timewait_death_row *twdr);
 
+extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
+			    struct inet_timewait_death_row *twdr, int family);
+
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index d985bd613d2..743f011b9a8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -409,3 +409,38 @@ out:
 }
 
 EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
+
+void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
+		     struct inet_timewait_death_row *twdr, int family)
+{
+	struct inet_timewait_sock *tw;
+	struct sock *sk;
+	struct hlist_node *node;
+	int h;
+
+	local_bh_disable();
+	for (h = 0; h < (hashinfo->ehash_size); h++) {
+		struct inet_ehash_bucket *head =
+			inet_ehash_bucket(hashinfo, h);
+		rwlock_t *lock = inet_ehash_lockp(hashinfo, h);
+restart:
+		write_lock(lock);
+		sk_for_each(sk, node, &head->twchain) {
+
+			tw = inet_twsk(sk);
+			if (!net_eq(twsk_net(tw), net) ||
+			    tw->tw_family != family)
+				continue;
+
+			atomic_inc(&tw->tw_refcnt);
+			write_unlock(lock);
+			inet_twsk_deschedule(tw, twdr);
+			inet_twsk_put(tw);
+
+			goto restart;
+		}
+		write_unlock(lock);
+	}
+	local_bh_enable();
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e934824..1b4fee20fc9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2376,6 +2376,7 @@ static int __net_init tcp_sk_init(struct net *net)
 static void __net_exit tcp_sk_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
+	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
 }
 
 static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5b90b369ccb..b585c850a89 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2148,6 +2148,7 @@ static int tcpv6_net_init(struct net *net)
 static void tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
-- 
cgit v1.2.3-70-g09d2


From 8d4698f7a54a492a1b96c505b30fe750ae3e61d5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 8 Sep 2008 13:44:40 -0700
Subject: bridge: don't allow setting hello time to zero

Dushan Tcholich reports that on his system ksoftirqd can consume
between %6 to %10 of cpu time, and cause ~200 context switches per
second.

He then correlated this with a report by bdupree@techfinesse.com:

	http://marc.info/?l=linux-kernel&m=119613299024398&w=2

and the culprit cause seems to be starting the bridge interface.
In particular, when starting the bridge interface, his scripts
are specifying a hello timer interval of "0".

The bridge hello time can't be safely set to values less than 1
second, otherwise it is possible to end up with a runaway timer.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_ioctl.c    |  8 +++++++-
 net/bridge/br_sysfs_br.c | 26 ++++++++++++++++++--------
 2 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index eeee218eed8..5bbf0736217 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -188,15 +188,21 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		return 0;
 
 	case BRCTL_SET_BRIDGE_HELLO_TIME:
+	{
+		unsigned long t = clock_t_to_jiffies(args[1]);
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
+		if (t < HZ)
+			return -EINVAL;
+
 		spin_lock_bh(&br->lock);
-		br->bridge_hello_time = clock_t_to_jiffies(args[1]);
+		br->bridge_hello_time = t;
 		if (br_is_root_bridge(br))
 			br->hello_time = br->bridge_hello_time;
 		spin_unlock_bh(&br->lock);
 		return 0;
+	}
 
 	case BRCTL_SET_BRIDGE_MAX_AGE:
 		if (!capable(CAP_NET_ADMIN))
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 27d6a511c8c..158dee8b496 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -29,11 +29,12 @@
  */
 static ssize_t store_bridge_parm(struct device *d,
 				 const char *buf, size_t len,
-				 void (*set)(struct net_bridge *, unsigned long))
+				 int (*set)(struct net_bridge *, unsigned long))
 {
 	struct net_bridge *br = to_bridge(d);
 	char *endp;
 	unsigned long val;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -43,9 +44,9 @@ static ssize_t store_bridge_parm(struct device *d,
 		return -EINVAL;
 
 	spin_lock_bh(&br->lock);
-	(*set)(br, val);
+	err = (*set)(br, val);
 	spin_unlock_bh(&br->lock);
-	return len;
+	return err ? err : len;
 }
 
 
@@ -56,12 +57,13 @@ static ssize_t show_forward_delay(struct device *d,
 	return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
 }
 
-static void set_forward_delay(struct net_bridge *br, unsigned long val)
+static int set_forward_delay(struct net_bridge *br, unsigned long val)
 {
 	unsigned long delay = clock_t_to_jiffies(val);
 	br->forward_delay = delay;
 	if (br_is_root_bridge(br))
 		br->bridge_forward_delay = delay;
+	return 0;
 }
 
 static ssize_t store_forward_delay(struct device *d,
@@ -80,12 +82,17 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr,
 		       jiffies_to_clock_t(to_bridge(d)->hello_time));
 }
 
-static void set_hello_time(struct net_bridge *br, unsigned long val)
+static int set_hello_time(struct net_bridge *br, unsigned long val)
 {
 	unsigned long t = clock_t_to_jiffies(val);
+
+	if (t < HZ)
+		return -EINVAL;
+
 	br->hello_time = t;
 	if (br_is_root_bridge(br))
 		br->bridge_hello_time = t;
+	return 0;
 }
 
 static ssize_t store_hello_time(struct device *d,
@@ -104,12 +111,13 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr,
 		       jiffies_to_clock_t(to_bridge(d)->max_age));
 }
 
-static void set_max_age(struct net_bridge *br, unsigned long val)
+static int set_max_age(struct net_bridge *br, unsigned long val)
 {
 	unsigned long t = clock_t_to_jiffies(val);
 	br->max_age = t;
 	if (br_is_root_bridge(br))
 		br->bridge_max_age = t;
+	return 0;
 }
 
 static ssize_t store_max_age(struct device *d, struct device_attribute *attr,
@@ -126,9 +134,10 @@ static ssize_t show_ageing_time(struct device *d,
 	return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time));
 }
 
-static void set_ageing_time(struct net_bridge *br, unsigned long val)
+static int set_ageing_time(struct net_bridge *br, unsigned long val)
 {
 	br->ageing_time = clock_t_to_jiffies(val);
+	return 0;
 }
 
 static ssize_t store_ageing_time(struct device *d,
@@ -180,9 +189,10 @@ static ssize_t show_priority(struct device *d, struct device_attribute *attr,
 		       (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]);
 }
 
-static void set_priority(struct net_bridge *br, unsigned long val)
+static int set_priority(struct net_bridge *br, unsigned long val)
 {
 	br_stp_set_bridge_priority(br, (u16) val);
+	return 0;
 }
 
 static ssize_t store_priority(struct device *d, struct device_attribute *attr,
-- 
cgit v1.2.3-70-g09d2


From e2a6b85247aacc52d6ba0d9b37a99b8d1a3e0d83 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 8 Sep 2008 16:10:02 -0700
Subject: net: Enable TSO if supported by at least one device

As it stands users of netdev_compute_features (e.g., bridges/bonding)
will only enable TSO if all consituent devices support it.  This
is unnecessarily pessimistic since even on devices that do not
support hardware TSO and SG, emulated TSO still performs to a par
with TSO off.

This patch enables TSO if at least on constituent device supports
it in hardware.

The direct beneficiaries will be virtualisation that uses bridging
since this means that TSO will always be enabled for communication
from the host to the guests.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 60c51f76588..abef86ec4cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4663,6 +4663,12 @@ int netdev_compute_features(unsigned long all, unsigned long one)
 		one |= NETIF_F_GSO_SOFTWARE;
 	one |= NETIF_F_GSO;
 
+	/*
+	 * If even one device supports a GSO protocol with software fallback,
+	 * enable it for all.
+	 */
+	all |= one & NETIF_F_GSO_SOFTWARE;
+
 	/* If even one device supports robust GSO, enable it for all. */
 	if (one & NETIF_F_GSO_ROBUST)
 		all |= NETIF_F_GSO_ROBUST;
-- 
cgit v1.2.3-70-g09d2


From 5337407c673e2c7c66a84b9838d55a45a760ecff Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 8 Sep 2008 16:17:42 -0700
Subject: warn: Turn the netdev timeout WARN_ON() into a WARN()

this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(),
so that the device and driver names are inside the warning message.
This helps automated tools like kerneloops.org to collect the data
and do statistics, as well as making it more likely that humans
cut-n-paste the important message as part of a bugreport.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-generic/bug.h | 10 ++++++++++
 net/sched/sch_generic.c   |  3 +--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a3f738cffdb..edc6ba82e09 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,16 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ONCE(condition, format...)	({			\
+	static int __warned;					\
+	int __ret_warn_once = !!(condition);			\
+								\
+	if (unlikely(__ret_warn_once))				\
+		if (WARN(!__warned, format)) 			\
+			__warned = 1;				\
+	unlikely(__ret_warn_once);				\
+})
+
 #define WARN_ON_RATELIMIT(condition, state)			\
 		WARN_ON((condition) && __ratelimit(state))
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9634091ee2f..ec0a0839ce5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg)
 			    time_after(jiffies, (dev->trans_start +
 						 dev->watchdog_timeo))) {
 				char drivername[64];
-				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
 				dev->tx_timeout(dev);
-				WARN_ON_ONCE(1);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
-- 
cgit v1.2.3-70-g09d2


From 4aa678ba44aa35759c04f300afbc97d3dab5faa2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Sep 2008 16:19:58 -0700
Subject: netns bridge: allow bridges in netns!

Bridge as netdevice doesn't cross netns boundaries.

Bridge ports and bridge itself live in same netns.

Notifiers are fixed.

netns propagated from userspace socket for setup and teardown.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Stephen Hemminger <shemming@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c   |  3 ++-
 net/bridge/br_if.c       | 11 ++++++-----
 net/bridge/br_ioctl.c    | 20 ++++++++++----------
 net/bridge/br_netlink.c  | 15 +++++----------
 net/bridge/br_notify.c   |  3 ---
 net/bridge/br_private.h  |  4 ++--
 net/bridge/br_stp_bpdu.c |  3 ---
 7 files changed, 25 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 4f52c3d50eb..22ba8632196 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -178,5 +178,6 @@ void br_dev_setup(struct net_device *dev)
 	dev->priv_flags = IFF_EBRIDGE;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX;
+			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
+			NETIF_F_NETNS_LOCAL;
 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 63c18aacde8..66c4f7122cf 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -168,7 +168,7 @@ static void del_br(struct net_bridge *br)
 	unregister_netdevice(br->dev);
 }
 
-static struct net_device *new_bridge_dev(const char *name)
+static struct net_device *new_bridge_dev(struct net *net, const char *name)
 {
 	struct net_bridge *br;
 	struct net_device *dev;
@@ -178,6 +178,7 @@ static struct net_device *new_bridge_dev(const char *name)
 
 	if (!dev)
 		return NULL;
+	dev_net_set(dev, net);
 
 	br = netdev_priv(dev);
 	br->dev = dev;
@@ -262,12 +263,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	return p;
 }
 
-int br_add_bridge(const char *name)
+int br_add_bridge(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	int ret;
 
-	dev = new_bridge_dev(name);
+	dev = new_bridge_dev(net, name);
 	if (!dev)
 		return -ENOMEM;
 
@@ -294,13 +295,13 @@ out_free:
 	goto out;
 }
 
-int br_del_bridge(const char *name)
+int br_del_bridge(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	int ret = 0;
 
 	rtnl_lock();
-	dev = __dev_get_by_name(&init_net, name);
+	dev = __dev_get_by_name(net, name);
 	if (dev == NULL)
 		ret =  -ENXIO; 	/* Could not find device */
 
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index eeee218eed8..3ec1c636e62 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,12 +21,12 @@
 #include "br_private.h"
 
 /* called with RTNL */
-static int get_bridge_ifindices(int *indices, int num)
+static int get_bridge_ifindices(struct net *net, int *indices, int num)
 {
 	struct net_device *dev;
 	int i = 0;
 
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		if (i >= num)
 			break;
 		if (dev->priv_flags & IFF_EBRIDGE)
@@ -89,7 +89,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	dev = dev_get_by_index(&init_net, ifindex);
+	dev = dev_get_by_index(dev_net(br->dev), ifindex);
 	if (dev == NULL)
 		return -EINVAL;
 
@@ -309,7 +309,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return -EOPNOTSUPP;
 }
 
-static int old_deviceless(void __user *uarg)
+static int old_deviceless(struct net *net, void __user *uarg)
 {
 	unsigned long args[3];
 
@@ -331,7 +331,7 @@ static int old_deviceless(void __user *uarg)
 		if (indices == NULL)
 			return -ENOMEM;
 
-		args[2] = get_bridge_ifindices(indices, args[2]);
+		args[2] = get_bridge_ifindices(net, indices, args[2]);
 
 		ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
 			? -EFAULT : args[2];
@@ -354,9 +354,9 @@ static int old_deviceless(void __user *uarg)
 		buf[IFNAMSIZ-1] = 0;
 
 		if (args[0] == BRCTL_ADD_BRIDGE)
-			return br_add_bridge(buf);
+			return br_add_bridge(net, buf);
 
-		return br_del_bridge(buf);
+		return br_del_bridge(net, buf);
 	}
 	}
 
@@ -368,7 +368,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar
 	switch (cmd) {
 	case SIOCGIFBR:
 	case SIOCSIFBR:
-		return old_deviceless(uarg);
+		return old_deviceless(net, uarg);
 
 	case SIOCBRADDBR:
 	case SIOCBRDELBR:
@@ -383,9 +383,9 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar
 
 		buf[IFNAMSIZ-1] = 0;
 		if (cmd == SIOCBRADDBR)
-			return br_add_bridge(buf);
+			return br_add_bridge(net, buf);
 
-		return br_del_bridge(buf);
+		return br_del_bridge(net, buf);
 	}
 	}
 	return -EOPNOTSUPP;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f155e6ce8a2..ba7be195803 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -82,6 +82,7 @@ nla_put_failure:
  */
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
+	struct net *net = dev_net(port->dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
@@ -97,10 +98,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
+		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
 /*
@@ -112,11 +113,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int idx;
 
-	if (net != &init_net)
-		return 0;
-
 	idx = 0;
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		/* not a bridge port */
 		if (dev->br_port == NULL || idx < cb->args[0])
 			goto skip;
@@ -147,9 +145,6 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	struct net_bridge_port *p;
 	u8 new_state;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	if (nlmsg_len(nlh) < sizeof(*ifm))
 		return -EINVAL;
 
@@ -165,7 +160,7 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (new_state > BR_STATE_BLOCKING)
 		return -EINVAL;
 
-	dev = __dev_get_by_index(&init_net, ifm->ifi_index);
+	dev = __dev_get_by_index(net, ifm->ifi_index);
 	if (!dev)
 		return -ENODEV;
 
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 76340bdd052..763a3ec292e 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -35,9 +35,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	struct net_bridge_port *p = dev->br_port;
 	struct net_bridge *br;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
 	/* not a port of a bridge */
 	if (p == NULL)
 		return NOTIFY_DONE;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c3dc18ddc04..51eaeaaa58c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -178,8 +178,8 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb);
 
 /* br_if.c */
 extern void br_port_carrier_check(struct net_bridge_port *p);
-extern int br_add_bridge(const char *name);
-extern int br_del_bridge(const char *name);
+extern int br_add_bridge(struct net *net, const char *name);
+extern int br_del_bridge(struct net *net, const char *name);
 extern void br_cleanup_bridges(void);
 extern int br_add_if(struct net_bridge *br,
 	      struct net_device *dev);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 8b200f96f72..81ae40b3f65 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -140,9 +140,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		goto err;
-
 	if (!p)
 		goto err;
 
-- 
cgit v1.2.3-70-g09d2


From 712d6954e3998d0de2840d8130941e8042541246 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Sep 2008 16:20:18 -0700
Subject: netns bridge: cleanup bridges during netns stop

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Stephen Hemminger <shemming@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         | 22 ++++++++++++++++------
 net/bridge/br_if.c      |  4 ++--
 net/bridge/br_private.h |  2 +-
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 573acdf6f9f..4d2c1f1cb52 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -28,6 +28,10 @@ static const struct stp_proto br_stp_proto = {
 	.rcv	= br_stp_rcv,
 };
 
+static struct pernet_operations br_net_ops = {
+	.exit	= br_net_exit,
+};
+
 static int __init br_init(void)
 {
 	int err;
@@ -42,18 +46,22 @@ static int __init br_init(void)
 	if (err)
 		goto err_out;
 
-	err = br_netfilter_init();
+	err = register_pernet_subsys(&br_net_ops);
 	if (err)
 		goto err_out1;
 
-	err = register_netdevice_notifier(&br_device_notifier);
+	err = br_netfilter_init();
 	if (err)
 		goto err_out2;
 
-	err = br_netlink_init();
+	err = register_netdevice_notifier(&br_device_notifier);
 	if (err)
 		goto err_out3;
 
+	err = br_netlink_init();
+	if (err)
+		goto err_out4;
+
 	brioctl_set(br_ioctl_deviceless_stub);
 	br_handle_frame_hook = br_handle_frame;
 
@@ -61,10 +69,12 @@ static int __init br_init(void)
 	br_fdb_put_hook = br_fdb_put;
 
 	return 0;
-err_out3:
+err_out4:
 	unregister_netdevice_notifier(&br_device_notifier);
-err_out2:
+err_out3:
 	br_netfilter_fini();
+err_out2:
+	unregister_pernet_subsys(&br_net_ops);
 err_out1:
 	br_fdb_fini();
 err_out:
@@ -80,7 +90,7 @@ static void __exit br_deinit(void)
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
 
-	br_cleanup_bridges();
+	unregister_pernet_subsys(&br_net_ops);
 
 	synchronize_net();
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 66c4f7122cf..573e20f7dba 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -446,13 +446,13 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	return 0;
 }
 
-void __exit br_cleanup_bridges(void)
+void br_net_exit(struct net *net)
 {
 	struct net_device *dev;
 
 	rtnl_lock();
 restart:
-	for_each_netdev(&init_net, dev) {
+	for_each_netdev(net, dev) {
 		if (dev->priv_flags & IFF_EBRIDGE) {
 			del_br(dev->priv);
 			goto restart;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 51eaeaaa58c..b6c3b71974d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -180,7 +180,7 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb);
 extern void br_port_carrier_check(struct net_bridge_port *p);
 extern int br_add_bridge(struct net *net, const char *name);
 extern int br_del_bridge(struct net *net, const char *name);
-extern void br_cleanup_bridges(void);
+extern void br_net_exit(struct net *net);
 extern int br_add_if(struct net_bridge *br,
 	      struct net_device *dev);
 extern int br_del_if(struct net_bridge *br,
-- 
cgit v1.2.3-70-g09d2


From 503e81f65adac596a0275ea0230f2ae1fd64c301 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 8 Sep 2008 12:04:21 +1000
Subject: ipvs: handle PARTIAL_CHECKSUM

Now that LVS can load balance locally generated traffic, packets may come
from the loopback device and thus may have a partial checksum.

The existing code allows for the case where there is no checksum at all for
TCP, however Herbert Xu has confirmed that this is not legal.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julius Volz <juliusv@google.com>
---
 net/ipv4/ipvs/ip_vs_proto_tcp.c | 37 +++++++++++++++++++++++++++++++++++--
 net/ipv4/ipvs/ip_vs_proto_udp.c | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 808e8be0280..537f616776d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -134,12 +134,34 @@ tcp_fast_csum_update(int af, struct tcphdr *tcph,
 }
 
 
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+}
+
+
 static int
 tcp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
 	unsigned int tcphoff;
+	int oldlen;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -147,6 +169,7 @@ tcp_snat_handler(struct sk_buff *skb,
 	else
 #endif
 		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -166,7 +189,11 @@ tcp_snat_handler(struct sk_buff *skb,
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
-	if (!cp->app && (tcph->check != 0)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
@@ -204,6 +231,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 {
 	struct tcphdr *tcph;
 	unsigned int tcphoff;
+	int oldlen;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -211,6 +239,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	else
 #endif
 		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -235,7 +264,11 @@ tcp_dnat_handler(struct sk_buff *skb,
 	/*
 	 *	Adjust TCP checksums
 	 */
-	if (!cp->app && (tcph->check != 0)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 5f2073e41cf..e3ee26bd1de 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -141,12 +141,34 @@ udp_fast_csum_update(int af, struct udphdr *uhdr,
 		uhdr->check = CSUM_MANGLED_0;
 }
 
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+	uhdr->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+}
+
+
 static int
 udp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
 	unsigned int udphoff;
+	int oldlen;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -154,6 +176,7 @@ udp_snat_handler(struct sk_buff *skb,
 	else
 #endif
 		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -177,7 +200,11 @@ udp_snat_handler(struct sk_buff *skb,
 	/*
 	 *	Adjust UDP checksums
 	 */
-	if (!cp->app && (udph->check != 0)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
@@ -216,6 +243,7 @@ udp_dnat_handler(struct sk_buff *skb,
 {
 	struct udphdr *udph;
 	unsigned int udphoff;
+	int oldlen;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -223,6 +251,7 @@ udp_dnat_handler(struct sk_buff *skb,
 	else
 #endif
 		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
 
 	/* csum_check requires unshared skb */
 	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -247,7 +276,11 @@ udp_dnat_handler(struct sk_buff *skb,
 	/*
 	 *	Adjust UDP checksums
 	 */
-	if (!cp->app && (udph->check != 0)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
-- 
cgit v1.2.3-70-g09d2


From 9d7f2a2b1aa9e55537a053c68bdbd119fc479dd3 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Mon, 8 Sep 2008 14:55:42 +0200
Subject: IPVS: Remove incorrect ip_route_me_harder(), fix IPv6

Remove an incorrect ip_route_me_harder() that was probably a result of
merging my IPv6 patches with the local client patches. With this, IPv6+NAT
are working again.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index f5180ac56be..bdc92d73fbe 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -904,15 +904,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto drop;
 
-	/* For policy routing, packets originating from this
-	 * machine itself may be routed differently to packets
-	 * passing through.  We want this packet to be routed as
-	 * if it came from this machine itself.  So re-compute
-	 * the routing information.
-	 */
-	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-		goto drop;
-
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
-- 
cgit v1.2.3-70-g09d2


From 2206a3f5b75be5dadf11541961bd7c924857eb5d Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Mon, 8 Sep 2008 13:38:11 +0200
Subject: ipvs: Restrict connection table size via Kconfig

Instead of checking the value in include/net/ip_vs.h, we can just
restrict the range in our Kconfig file. This will prevent values outside
of the range early.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Reviewed-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h   | 10 +---------
 net/ipv4/ipvs/Kconfig |  3 ++-
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1b13cef4b54..38f4f690b18 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -621,16 +621,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
 #ifndef CONFIG_IP_VS_TAB_BITS
 #define CONFIG_IP_VS_TAB_BITS   12
 #endif
-/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
-#if CONFIG_IP_VS_TAB_BITS < 8
-#define IP_VS_CONN_TAB_BITS	8
-#endif
-#if CONFIG_IP_VS_TAB_BITS > 20
-#define IP_VS_CONN_TAB_BITS	20
-#endif
-#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
+
 #define IP_VS_CONN_TAB_BITS	CONFIG_IP_VS_TAB_BITS
-#endif
 #define IP_VS_CONN_TAB_SIZE     (1 << IP_VS_CONN_TAB_BITS)
 #define IP_VS_CONN_TAB_MASK     (IP_VS_CONN_TAB_SIZE - 1)
 
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 794cecb249a..de6004de80b 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -41,7 +41,8 @@ config	IP_VS_DEBUG
 
 config	IP_VS_TAB_BITS
 	int "IPVS connection table size (the Nth power of 2)"
-	default "12" 
+	range 8 20
+	default 12
 	---help---
 	  The IPVS connection hash table uses the chaining scheme to handle
 	  hash collisions. Using a big IPVS connection hash table will greatly
-- 
cgit v1.2.3-70-g09d2


From e9c0ce232e7a36daae1ca08282609d7f0c57c567 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Mon, 8 Sep 2008 13:39:04 +0200
Subject: ipvs: Embed user stats structure into kernel stats structure

Instead of duplicating the fields, integrate a user stats structure into
the kernel stats structure. This is more robust when the members are
changed, because they are now automatically kept in sync.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Reviewed-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        | 21 ++----------------
 net/ipv4/ipvs/ip_vs_core.c | 30 +++++++++++++-------------
 net/ipv4/ipvs/ip_vs_ctl.c  | 53 ++++++++++++++++++----------------------------
 net/ipv4/ipvs/ip_vs_est.c  | 40 +++++++++++++++++-----------------
 4 files changed, 58 insertions(+), 86 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 38f4f690b18..33e2ac6ceb3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -254,27 +254,10 @@ struct ip_vs_estimator {
 
 struct ip_vs_stats
 {
-	__u32                   conns;          /* connections scheduled */
-	__u32                   inpkts;         /* incoming packets */
-	__u32                   outpkts;        /* outgoing packets */
-	__u64                   inbytes;        /* incoming bytes */
-	__u64                   outbytes;       /* outgoing bytes */
-
-	__u32			cps;		/* current connection rate */
-	__u32			inpps;		/* current in packet rate */
-	__u32			outpps;		/* current out packet rate */
-	__u32			inbps;		/* current in byte rate */
-	__u32			outbps;		/* current out byte rate */
-
-	/*
-	 * Don't add anything before the lock, because we use memcpy() to copy
-	 * the members before the lock to struct ip_vs_stats_user in
-	 * ip_vs_ctl.c.
-	 */
+	struct ip_vs_stats_user	ustats;         /* statistics */
+	struct ip_vs_estimator	est;		/* estimator */
 
 	spinlock_t              lock;           /* spin lock */
-
-	struct ip_vs_estimator	est;		/* estimator */
 };
 
 struct dst_entry;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index bdc92d73fbe..80a4fcf33a5 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -102,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 	struct ip_vs_dest *dest = cp->dest;
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		spin_lock(&dest->stats.lock);
-		dest->stats.inpkts++;
-		dest->stats.inbytes += skb->len;
+		dest->stats.ustats.inpkts++;
+		dest->stats.ustats.inbytes += skb->len;
 		spin_unlock(&dest->stats.lock);
 
 		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.inpkts++;
-		dest->svc->stats.inbytes += skb->len;
+		dest->svc->stats.ustats.inpkts++;
+		dest->svc->stats.ustats.inbytes += skb->len;
 		spin_unlock(&dest->svc->stats.lock);
 
 		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.inpkts++;
-		ip_vs_stats.inbytes += skb->len;
+		ip_vs_stats.ustats.inpkts++;
+		ip_vs_stats.ustats.inbytes += skb->len;
 		spin_unlock(&ip_vs_stats.lock);
 	}
 }
@@ -125,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 	struct ip_vs_dest *dest = cp->dest;
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		spin_lock(&dest->stats.lock);
-		dest->stats.outpkts++;
-		dest->stats.outbytes += skb->len;
+		dest->stats.ustats.outpkts++;
+		dest->stats.ustats.outbytes += skb->len;
 		spin_unlock(&dest->stats.lock);
 
 		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.outpkts++;
-		dest->svc->stats.outbytes += skb->len;
+		dest->svc->stats.ustats.outpkts++;
+		dest->svc->stats.ustats.outbytes += skb->len;
 		spin_unlock(&dest->svc->stats.lock);
 
 		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.outpkts++;
-		ip_vs_stats.outbytes += skb->len;
+		ip_vs_stats.ustats.outpkts++;
+		ip_vs_stats.ustats.outbytes += skb->len;
 		spin_unlock(&ip_vs_stats.lock);
 	}
 }
@@ -146,15 +146,15 @@ static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
 	spin_lock(&cp->dest->stats.lock);
-	cp->dest->stats.conns++;
+	cp->dest->stats.ustats.conns++;
 	spin_unlock(&cp->dest->stats.lock);
 
 	spin_lock(&svc->stats.lock);
-	svc->stats.conns++;
+	svc->stats.ustats.conns++;
 	spin_unlock(&svc->stats.lock);
 
 	spin_lock(&ip_vs_stats.lock);
-	ip_vs_stats.conns++;
+	ip_vs_stats.ustats.conns++;
 	spin_unlock(&ip_vs_stats.lock);
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index e53efe41f01..993a83fb0d5 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -744,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
 	spin_lock_bh(&stats->lock);
 
-	stats->conns = 0;
-	stats->inpkts = 0;
-	stats->outpkts = 0;
-	stats->inbytes = 0;
-	stats->outbytes = 0;
-
-	stats->cps = 0;
-	stats->inpps = 0;
-	stats->outpps = 0;
-	stats->inbps = 0;
-	stats->outbps = 0;
-
+	memset(&stats->ustats, 0, sizeof(stats->ustats));
 	ip_vs_zero_estimator(stats);
 
 	spin_unlock_bh(&stats->lock);
@@ -1964,20 +1953,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
 	spin_lock_bh(&ip_vs_stats.lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
-		   ip_vs_stats.inpkts, ip_vs_stats.outpkts,
-		   (unsigned long long) ip_vs_stats.inbytes,
-		   (unsigned long long) ip_vs_stats.outbytes);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+		   (unsigned long long) ip_vs_stats.ustats.inbytes,
+		   (unsigned long long) ip_vs_stats.ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
 	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			ip_vs_stats.cps,
-			ip_vs_stats.inpps,
-			ip_vs_stats.outpps,
-			ip_vs_stats.inbps,
-			ip_vs_stats.outbps);
+			ip_vs_stats.ustats.cps,
+			ip_vs_stats.ustats.inpps,
+			ip_vs_stats.ustats.outpps,
+			ip_vs_stats.ustats.inbps,
+			ip_vs_stats.ustats.outbps);
 	spin_unlock_bh(&ip_vs_stats.lock);
 
 	return 0;
@@ -2215,7 +2204,7 @@ static void
 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 {
 	spin_lock_bh(&src->lock);
-	memcpy(dst, src, (char*)&src->lock - (char*)src);
+	memcpy(dst, &src->ustats, sizeof(*dst));
 	spin_unlock_bh(&src->lock);
 }
 
@@ -2591,16 +2580,16 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
 
 	spin_lock_bh(&stats->lock);
 
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
 
 	spin_unlock_bh(&stats->lock);
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 4fb620ec208..2eb2860dabb 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg)
 		s = container_of(e, struct ip_vs_stats, est);
 
 		spin_lock(&s->lock);
-		n_conns = s->conns;
-		n_inpkts = s->inpkts;
-		n_outpkts = s->outpkts;
-		n_inbytes = s->inbytes;
-		n_outbytes = s->outbytes;
+		n_conns = s->ustats.conns;
+		n_inpkts = s->ustats.inpkts;
+		n_outpkts = s->ustats.outpkts;
+		n_inbytes = s->ustats.inbytes;
+		n_outbytes = s->ustats.outbytes;
 
 		/* scaled by 2^10, but divided 2 seconds */
 		rate = (n_conns - e->last_conns)<<9;
 		e->last_conns = n_conns;
 		e->cps += ((long)rate - (long)e->cps)>>2;
-		s->cps = (e->cps+0x1FF)>>10;
+		s->ustats.cps = (e->cps+0x1FF)>>10;
 
 		rate = (n_inpkts - e->last_inpkts)<<9;
 		e->last_inpkts = n_inpkts;
 		e->inpps += ((long)rate - (long)e->inpps)>>2;
-		s->inpps = (e->inpps+0x1FF)>>10;
+		s->ustats.inpps = (e->inpps+0x1FF)>>10;
 
 		rate = (n_outpkts - e->last_outpkts)<<9;
 		e->last_outpkts = n_outpkts;
 		e->outpps += ((long)rate - (long)e->outpps)>>2;
-		s->outpps = (e->outpps+0x1FF)>>10;
+		s->ustats.outpps = (e->outpps+0x1FF)>>10;
 
 		rate = (n_inbytes - e->last_inbytes)<<4;
 		e->last_inbytes = n_inbytes;
 		e->inbps += ((long)rate - (long)e->inbps)>>2;
-		s->inbps = (e->inbps+0xF)>>5;
+		s->ustats.inbps = (e->inbps+0xF)>>5;
 
 		rate = (n_outbytes - e->last_outbytes)<<4;
 		e->last_outbytes = n_outbytes;
 		e->outbps += ((long)rate - (long)e->outbps)>>2;
-		s->outbps = (e->outbps+0xF)>>5;
+		s->ustats.outbps = (e->outbps+0xF)>>5;
 		spin_unlock(&s->lock);
 	}
 	spin_unlock(&est_lock);
@@ -108,20 +108,20 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 
 	INIT_LIST_HEAD(&est->list);
 
-	est->last_conns = stats->conns;
-	est->cps = stats->cps<<10;
+	est->last_conns = stats->ustats.conns;
+	est->cps = stats->ustats.cps<<10;
 
-	est->last_inpkts = stats->inpkts;
-	est->inpps = stats->inpps<<10;
+	est->last_inpkts = stats->ustats.inpkts;
+	est->inpps = stats->ustats.inpps<<10;
 
-	est->last_outpkts = stats->outpkts;
-	est->outpps = stats->outpps<<10;
+	est->last_outpkts = stats->ustats.outpkts;
+	est->outpps = stats->ustats.outpps<<10;
 
-	est->last_inbytes = stats->inbytes;
-	est->inbps = stats->inbps<<5;
+	est->last_inbytes = stats->ustats.inbytes;
+	est->inbps = stats->ustats.inbps<<5;
 
-	est->last_outbytes = stats->outbytes;
-	est->outbps = stats->outbps<<5;
+	est->last_outbytes = stats->ustats.outbytes;
+	est->outbps = stats->ustats.outbps<<5;
 
 	spin_lock_bh(&est_lock);
 	list_add(&est->list, &est_list);
-- 
cgit v1.2.3-70-g09d2


From f1c08ca559387ab30992055596d54061dfa022b1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 9 Sep 2008 07:19:19 +0200
Subject: [Bluetooth] Fix reference counting during ACL config stage

The ACL config stage keeps holding a reference count on incoming
connections when requesting the extended features. This results in
keeping an ACL link up without any users. The problem here is that
the Bluetooth specification doesn't define an ownership of the ACL
link and thus it can happen that the implementation on the initiator
side doesn't care about disconnecting unused links. In this case the
acceptor needs to take care of this.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0e3db289f4b..ad7a553d771 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1605,14 +1605,11 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 
 		if (conn->state == BT_CONFIG) {
 			if (!ev->status && hdev->ssp_mode > 0 &&
-							conn->ssp_mode > 0) {
-				if (conn->out) {
-					struct hci_cp_auth_requested cp;
-					cp.handle = ev->handle;
-					hci_send_cmd(hdev,
-						HCI_OP_AUTH_REQUESTED,
+					conn->ssp_mode > 0 && conn->out) {
+				struct hci_cp_auth_requested cp;
+				cp.handle = ev->handle;
+				hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
 							sizeof(cp), &cp);
-				}
 			} else {
 				conn->state = BT_CONNECTED;
 				hci_proto_connect_cfm(conn, ev->status);
-- 
cgit v1.2.3-70-g09d2


From 09ab6f4c2376a0fc31abde1e2991513f900ea825 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 9 Sep 2008 07:19:20 +0200
Subject: [Bluetooth] Enforce correct authentication requirements

With the introduction of Security Mode 4 and Simple Pairing from the
Bluetooth 2.1 specification it became mandatory that the initiator
requires authentication and encryption before any L2CAP channel can
be established. The only exception here is PSM 1 for the service
discovery protocol (SDP). It is meant to be used without any encryption
since it contains only public information. This is how Bluetooth 2.0
and before handle connections on PSM 1.

For Bluetooth 2.1 devices the pairing procedure differentiates between
no bonding, general bonding and dedicated bonding. The L2CAP layer
wrongly uses always general bonding when creating new connections, but it
should not do this for SDP connections. In this case the authentication
requirement should be no bonding and the just-works model should be used,
but in case of non-SDP connection it is required to use general bonding.

If the new connection requires man-in-the-middle (MITM) protection, it
also first wrongly creates an unauthenticated link key and then later on
requests an upgrade to an authenticated link key to provide full MITM
protection. With Simple Pairing the link key generation is an expensive
operation (compared to Bluetooth 2.0 and before) and doing this twice
during a connection setup causes a noticeable delay when establishing
a new connection. This should be avoided to not regress from the expected
Bluetooth 2.0 connection times. The authentication requirements are known
up-front and so enforce them.

To fulfill these requirements the hci_connect() function has been extended
with an authentication requirement parameter that will be stored inside
the connection information and can be retrieved by userspace at any
time. This allows the correct IO capabilities exchange and results in
the expected behavior.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 +-
 net/bluetooth/hci_conn.c         |  8 +++++---
 net/bluetooth/l2cap.c            | 19 +++++++++++++++++--
 net/bluetooth/sco.c              |  2 +-
 4 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cbf75109468..5e785b968e7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -325,7 +325,7 @@ int hci_conn_del(struct hci_conn *conn);
 void hci_conn_hash_flush(struct hci_dev *hdev);
 void hci_conn_check_pending(struct hci_dev *hdev);
 
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *src);
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type);
 int hci_conn_auth(struct hci_conn *conn);
 int hci_conn_encrypt(struct hci_conn *conn);
 int hci_conn_change_link_key(struct hci_conn *conn);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ca8d05245ca..a2f9efaa336 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -330,7 +330,7 @@ EXPORT_SYMBOL(hci_get_route);
 
 /* Create SCO or ACL connection.
  * Device _must_ be locked */
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type)
 {
 	struct hci_conn *acl;
 	struct hci_conn *sco;
@@ -344,8 +344,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
 
 	hci_conn_hold(acl);
 
-	if (acl->state == BT_OPEN || acl->state == BT_CLOSED)
+	if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
+		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
+	}
 
 	if (type == ACL_LINK)
 		return acl;
@@ -381,7 +383,7 @@ int hci_conn_auth(struct hci_conn *conn)
 
 	if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) {
 		if (!(conn->auth_type & 0x01)) {
-			conn->auth_type = HCI_AT_GENERAL_BONDING_MITM;
+			conn->auth_type |= 0x01;
 			conn->link_mode &= ~HCI_LM_AUTH;
 		}
 	}
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 3396d5bdef1..a96d6de80d1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,7 +55,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.10"
+#define VERSION "2.11"
 
 static u32 l2cap_feat_mask = 0x0000;
 
@@ -778,6 +778,7 @@ static int l2cap_do_connect(struct sock *sk)
 	struct l2cap_conn *conn;
 	struct hci_conn *hcon;
 	struct hci_dev *hdev;
+	__u8 auth_type;
 	int err = 0;
 
 	BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm);
@@ -789,7 +790,21 @@ static int l2cap_do_connect(struct sock *sk)
 
 	err = -ENOMEM;
 
-	hcon = hci_connect(hdev, ACL_LINK, dst);
+	if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH ||
+			l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT ||
+				l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) {
+		if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001))
+			auth_type = HCI_AT_NO_BONDING_MITM;
+		else
+			auth_type = HCI_AT_GENERAL_BONDING_MITM;
+	} else {
+		if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001))
+			auth_type = HCI_AT_NO_BONDING;
+		else
+			auth_type = HCI_AT_GENERAL_BONDING;
+	}
+
+	hcon = hci_connect(hdev, ACL_LINK, dst, auth_type);
 	if (!hcon)
 		goto done;
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index a16011fedc1..0cc91e6da76 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -200,7 +200,7 @@ static int sco_connect(struct sock *sk)
 	else
 		type = SCO_LINK;
 
-	hcon = hci_connect(hdev, type, dst);
+	hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING);
 	if (!hcon)
 		goto done;
 
-- 
cgit v1.2.3-70-g09d2


From e7c29cb16c833441fd2160642bb13025f4e7ac70 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 9 Sep 2008 07:19:20 +0200
Subject: [Bluetooth] Reject L2CAP connections on an insecure ACL link

The Security Mode 4 of the Bluetooth 2.1 specification has strict
authentication and encryption requirements. It is the initiators job
to create a secure ACL link. However in case of malicious devices, the
acceptor has to make sure that the ACL is encrypted before allowing
any kind of L2CAP connection. The only exception here is the PSM 1 for
the service discovery protocol, because that is allowed to run on an
insecure ACL link.

Previously it was enough to reject a L2CAP connection during the
connection setup phase, but with Bluetooth 2.1 it is forbidden to
do any L2CAP protocol exchange on an insecure link (except SDP).

The new hci_conn_check_link_mode() function can be used to check the
integrity of an ACL link. This functions also takes care of the cases
where Security Mode 4 is disabled or one of the devices is based on
an older specification.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/af_bluetooth.c     |  2 +-
 net/bluetooth/hci_conn.c         | 13 +++++++++++++
 net/bluetooth/l2cap.c            | 15 +++++++++++----
 4 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5e785b968e7..46a43b721dd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -326,6 +326,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev);
 void hci_conn_check_pending(struct hci_dev *hdev);
 
 struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type);
+int hci_conn_check_link_mode(struct hci_conn *conn);
 int hci_conn_auth(struct hci_conn *conn);
 int hci_conn_encrypt(struct hci_conn *conn);
 int hci_conn_change_link_key(struct hci_conn *conn);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1edfdf4c095..f6348e078aa 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -49,7 +49,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.12"
+#define VERSION "2.13"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a2f9efaa336..b7002429f15 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -376,6 +376,19 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 }
 EXPORT_SYMBOL(hci_connect);
 
+/* Check link security requirement */
+int hci_conn_check_link_mode(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
+
+	if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0 &&
+					!(conn->link_mode & HCI_LM_ENCRYPT))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL(hci_conn_check_link_mode);
+
 /* Authenticate remote device */
 int hci_conn_auth(struct hci_conn *conn)
 {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a96d6de80d1..9610a9c85b9 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1568,10 +1568,10 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
 	struct l2cap_conn_rsp rsp;
 	struct sock *sk, *parent;
-	int result, status = 0;
+	int result, status = L2CAP_CS_NO_INFO;
 
 	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
-	__le16 psm  = req->psm;
+	__le16 psm = req->psm;
 
 	BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid);
 
@@ -1582,6 +1582,13 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		goto sendresp;
 	}
 
+	/* Check if the ACL is secure enough (if not SDP) */
+	if (psm != cpu_to_le16(0x0001) &&
+				!hci_conn_check_link_mode(conn->hcon)) {
+		result = L2CAP_CR_SEC_BLOCK;
+		goto response;
+	}
+
 	result = L2CAP_CR_NO_MEM;
 
 	/* Check for backlog size */
@@ -2239,7 +2246,7 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
 			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
 			rsp.result = cpu_to_le16(result);
-			rsp.status = cpu_to_le16(0);
+			rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 		}
@@ -2311,7 +2318,7 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
 			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
 			rsp.result = cpu_to_le16(result);
-			rsp.status = cpu_to_le16(0);
+			rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 		}
-- 
cgit v1.2.3-70-g09d2


From 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 9 Sep 2008 13:27:22 +0200
Subject: This reverts "Merge branch 'dccp' of
 git://eden-feed.erg.abdn.ac.uk/dccp_exp" as it accentally contained the wrong
 set of patches. These will be submitted separately. Signed-off-by: Gerrit
 Renker <gerrit@erg.abdn.ac.uk>

---
 Documentation/networking/dccp.txt   |   54 +-
 include/linux/dccp.h                |  122 ++-
 include/net/tcp.h                   |   15 -
 net/dccp/Kconfig                    |    3 +
 net/dccp/Makefile                   |    5 +-
 net/dccp/ackvec.c                   |  619 ++++++------
 net/dccp/ackvec.h                   |  204 ++--
 net/dccp/ccid.c                     |  101 +-
 net/dccp/ccid.h                     |  113 +--
 net/dccp/ccids/Kconfig              |   30 +-
 net/dccp/ccids/ccid2.c              |  622 +++++++-----
 net/dccp/ccids/ccid2.h              |   63 +-
 net/dccp/ccids/ccid3.c              |  762 +++++++++------
 net/dccp/ccids/ccid3.h              |  153 +--
 net/dccp/ccids/lib/loss_interval.c  |   30 +-
 net/dccp/ccids/lib/loss_interval.h  |    4 +-
 net/dccp/ccids/lib/packet_history.c |  282 +++---
 net/dccp/ccids/lib/packet_history.h |   78 +-
 net/dccp/ccids/lib/tfrc.h           |   16 -
 net/dccp/ccids/lib/tfrc_equation.c  |   29 +-
 net/dccp/dccp.h                     |  104 +-
 net/dccp/diag.c                     |    2 +-
 net/dccp/feat.c                     | 1805 +++++++++--------------------------
 net/dccp/feat.h                     |  144 +--
 net/dccp/input.c                    |  164 ++--
 net/dccp/ipv4.c                     |    4 +-
 net/dccp/ipv6.c                     |    4 +-
 net/dccp/minisocks.c                |   87 +-
 net/dccp/options.c                  |  341 ++++---
 net/dccp/output.c                   |  279 ++----
 net/dccp/probe.c                    |   75 +-
 net/dccp/proto.c                    |  281 ++----
 net/dccp/qpolicy.c                  |  137 ---
 net/dccp/sysctl.c                   |   64 +-
 net/dccp/timer.c                    |   42 +-
 net/ipv4/tcp_input.c                |   17 +-
 36 files changed, 2884 insertions(+), 3971 deletions(-)
 delete mode 100644 net/dccp/qpolicy.c

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index fcfc1253442..39131a3c78f 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -45,25 +45,6 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 ==============
-DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
-a policy ID as argument and can only be set before the connection (i.e. changes
-during an established connection are not supported). Currently, two policies are
-defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
-and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
-u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
-a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
-be formatted using a cmsg(3) message header filled in as follows:
-	cmsg->cmsg_level = SOL_DCCP;
-	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
-	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
-
-DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
-value is always interpreted as unbounded queue length. If different from zero,
-the interpretation of this parameter depends on the current dequeuing policy
-(see above): the "simple" policy will enforce a fixed queue size by returning
-EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
-lowest-priority packet first. The default value for this parameter is
-initialised from /proc/sys/net/dccp/default/tx_qlen.
 
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
@@ -76,24 +57,6 @@ can be set before calling bind().
 DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
 size (application payload size) in bytes, see RFC 4340, section 14.
 
-DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
-supported by the endpoint (see include/linux/dccp.h for symbolic constants).
-The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
-
-DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
-time, combining the operation of the next two socket options. This option is
-preferrable over the latter two, since often applications will use the same
-type of CCID for both directions; and mixed use of CCIDs is not currently well
-understood. This socket option takes as argument at least one uint8_t value, or
-an array of uint8_t values, which must match available CCIDS (see above). CCIDs
-must be registered on the socket before calling connect() or listen().
-
-DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
-the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
-Please note that the getsockopt argument type here is `int', not uint8_t.
-
-DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
-
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
@@ -152,16 +115,23 @@ retries2
 	importance for retransmitted acknowledgments and feature negotiation,
 	data packets are never retransmitted. Analogue of tcp_retries2.
 
+send_ndp = 1
+	Whether or not to send NDP count options (sec. 7.7.2).
+
+send_ackvec = 1
+	Whether or not to send Ack Vector options (sec. 11.5).
+
+ack_ratio = 2
+	The default Ack Ratio (sec. 11.3) to use.
+
 tx_ccid = 2
-	Default CCID for the sender-receiver half-connection. Depending on the
-	choice of CCID, the Send Ack Vector feature is enabled automatically.
+	Default CCID for the sender-receiver half-connection.
 
 rx_ccid = 2
-	Default CCID for the receiver-sender half-connection; see tx_ccid.
+	Default CCID for the receiver-sender half-connection.
 
 seq_window = 100
-	The initial sequence window (sec. 7.5.2) of the sender. This influences
-	the local ackno validity and the remote seqno validity windows (7.5.1).
+	The initial sequence window (sec. 7.5.2).
 
 tx_qlen = 5
 	The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 010e2d87ed7..6080449fbec 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -165,13 +165,9 @@ enum {
 	DCCPO_TIMESTAMP_ECHO = 42,
 	DCCPO_ELAPSED_TIME = 43,
 	DCCPO_MAX = 45,
-	DCCPO_MIN_RX_CCID_SPECIFIC = 128,	/* from sender to receiver */
-	DCCPO_MAX_RX_CCID_SPECIFIC = 191,
-	DCCPO_MIN_TX_CCID_SPECIFIC = 192,	/* from receiver to sender */
-	DCCPO_MAX_TX_CCID_SPECIFIC = 255,
+	DCCPO_MIN_CCID_SPECIFIC = 128,
+	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
-/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
-#define DCCP_SINGLE_OPT_MAXLEN	253
 
 /* DCCP CCIDS */
 enum {
@@ -180,36 +176,27 @@ enum {
 };
 
 /* DCCP features (RFC 4340 section 6.4) */
-enum dccp_feature_numbers {
+enum {
 	DCCPF_RESERVED = 0,
 	DCCPF_CCID = 1,
-	DCCPF_SHORT_SEQNOS = 2,
+	DCCPF_SHORT_SEQNOS = 2,		/* XXX: not yet implemented */
 	DCCPF_SEQUENCE_WINDOW = 3,
-	DCCPF_ECN_INCAPABLE = 4,
+	DCCPF_ECN_INCAPABLE = 4,	/* XXX: not yet implemented */
 	DCCPF_ACK_RATIO = 5,
 	DCCPF_SEND_ACK_VECTOR = 6,
 	DCCPF_SEND_NDP_COUNT = 7,
 	DCCPF_MIN_CSUM_COVER = 8,
-	DCCPF_DATA_CHECKSUM = 9,
+	DCCPF_DATA_CHECKSUM = 9,	/* XXX: not yet implemented */
 	/* 10-127 reserved */
 	DCCPF_MIN_CCID_SPECIFIC = 128,
-	DCCPF_SEND_LEV_RATE = 192,	/* RFC 4342, sec. 8.4 */
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
-/* DCCP socket control message types for cmsg */
-enum dccp_cmsg_type {
-	DCCP_SCM_PRIORITY = 1,
-	DCCP_SCM_QPOLICY_MAX = 0xFFFF,
-	/* ^-- Up to here reserved exclusively for qpolicy parameters */
-	DCCP_SCM_MAX
-};
-
-/* DCCP priorities for outgoing/queued packets */
-enum dccp_packet_dequeueing_policy {
-	DCCPQ_POLICY_SIMPLE,
-	DCCPQ_POLICY_PRIO,
-	DCCPQ_POLICY_MAX
+/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */
+struct dccp_so_feat {
+	__u8 dccpsf_feat;
+	__u8 __user *dccpsf_val;
+	__u8 dccpsf_len;
 };
 
 /* DCCP socket options */
@@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy {
 #define DCCP_SOCKOPT_SERVER_TIMEWAIT	6
 #define DCCP_SOCKOPT_SEND_CSCOV		10
 #define DCCP_SOCKOPT_RECV_CSCOV		11
-#define DCCP_SOCKOPT_AVAILABLE_CCIDS	12
-#define DCCP_SOCKOPT_CCID		13
-#define DCCP_SOCKOPT_TX_CCID		14
-#define DCCP_SOCKOPT_RX_CCID		15
-#define DCCP_SOCKOPT_QPOLICY_ID		16
-#define DCCP_SOCKOPT_QPOLICY_TXQLEN	17
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
@@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 	return __dccp_hdr_len(dccp_hdr(skb));
 }
 
+
+/* initial values for each feature */
+#define DCCPF_INITIAL_SEQUENCE_WINDOW		100
+#define DCCPF_INITIAL_ACK_RATIO			2
+#define DCCPF_INITIAL_CCID			DCCPC_CCID2
+#define DCCPF_INITIAL_SEND_ACK_VECTOR		1
+/* FIXME: for now we're default to 1 but it should really be 0 */
+#define DCCPF_INITIAL_SEND_NDP_COUNT		1
+
+/**
+  * struct dccp_minisock - Minimal DCCP connection representation
+  *
+  * Will be used to pass the state from dccp_request_sock to dccp_sock.
+  *
+  * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
+  * @dccpms_ccid - Congestion Control Id (CCID) (section 10)
+  * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
+  * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
+  * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3)
+  * @dccpms_pending - List of features being negotiated
+  * @dccpms_conf -
+  */
+struct dccp_minisock {
+	__u64			dccpms_sequence_window;
+	__u8			dccpms_rx_ccid;
+	__u8			dccpms_tx_ccid;
+	__u8			dccpms_send_ack_vector;
+	__u8			dccpms_send_ndp_count;
+	__u8			dccpms_ack_ratio;
+	struct list_head	dccpms_pending;
+	struct list_head	dccpms_conf;
+};
+
+struct dccp_opt_conf {
+	__u8			*dccpoc_val;
+	__u8			dccpoc_len;
+};
+
+struct dccp_opt_pend {
+	struct list_head	dccpop_node;
+	__u8			dccpop_type;
+	__u8			dccpop_feat;
+	__u8		        *dccpop_val;
+	__u8			dccpop_len;
+	int			dccpop_conf;
+	struct dccp_opt_conf    *dccpop_sc;
+};
+
+extern void dccp_minisock_init(struct dccp_minisock *dmsk);
+
 /**
  * struct dccp_request_sock  -  represent DCCP-specific connection request
  * @dreq_inet_rsk: structure inherited from
  * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1)
  * @dreq_isr: initial sequence number received on the Request
  * @dreq_service: service code present on the Request (there is just one)
- * @dreq_featneg: feature negotiation options for this connection
  * The following two fields are analogous to the ones in dccp_sock:
  * @dreq_timestamp_echo: last received timestamp to echo (13.1)
  * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
@@ -390,7 +420,6 @@ struct dccp_request_sock {
 	__u64			 dreq_iss;
 	__u64			 dreq_isr;
 	__be32			 dreq_service;
-	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
 };
@@ -462,28 +491,21 @@ struct dccp_ackvec;
  * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
  * @dccps_l_ack_ratio - feature-local Ack Ratio
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
- * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
- * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
- * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
+ * @dccps_minisock - associated minisock (accessed via dccp_msk)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
  * @dccps_options_received - parsed set of retrieved options
- * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
- * @dccps_tx_qlen - maximum length of the TX queue
  * @dccps_role - role of this sock, one of %dccp_role
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
- * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
- * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
+ * @dccps_xmit_timer - timer for when CCID is not ready to send
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
 struct dccp_sock {
@@ -507,26 +529,19 @@ struct dccp_sock {
 	__u32				dccps_timestamp_time;
 	__u16				dccps_l_ack_ratio;
 	__u16				dccps_r_ack_ratio;
-	__u64				dccps_l_seq_win:48;
-	__u64				dccps_r_seq_win:48;
-	__u8				dccps_pcslen:4;
-	__u8				dccps_pcrlen:4;
-	__u8				dccps_send_ndp_count:1;
+	__u16				dccps_pcslen;
+	__u16				dccps_pcrlen;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
-	struct list_head		dccps_featneg;
+	struct dccp_minisock		dccps_minisock;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
 	struct dccp_options_received	dccps_options_received;
-	__u8				dccps_qpolicy;
-	__u32				dccps_tx_qlen;
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
-	__u8				dccps_sync_scheduled:1;
-	struct tasklet_struct		dccps_xmitlet;
 	struct timer_list		dccps_xmit_timer;
 };
 
@@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
 	return (struct dccp_sock *)sk;
 }
 
+static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
+{
+	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
+}
+
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6bc4b8148ca..8983386356a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -782,21 +782,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
-/*
- * Convert RFC3390 larger initial windows into an equivalent number of packets.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than a multiplier in the relevant range.
- */
-static inline u32 rfc3390_bytes_to_packets(const u32 bytes)
-{
-	return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3);
-}
-
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 206c16ad9c3..7aa2a7acc7e 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -25,6 +25,9 @@ config INET_DCCP_DIAG
 	def_tristate y if (IP_DCCP = y && INET_DIAG = y)
 	def_tristate m
 
+config IP_DCCP_ACKVEC
+	bool
+
 source "net/dccp/ccids/Kconfig"
 
 menu "DCCP Kernel Hacking"
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 0c1c9af2bf7..f4f8793aaff 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,6 @@
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
-dccp-y := ccid.o feat.o input.o minisocks.o options.o \
-	  qpolicy.o output.o proto.o timer.o ackvec.o
+dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
 
 dccp_ipv4-y := ipv4.o
 
@@ -9,6 +8,8 @@ dccp_ipv4-y := ipv4.o
 obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
+dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
+
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 41819848bdd..1e8be246ad1 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,375 +1,445 @@
 /*
  *  net/dccp/ackvec.c
  *
- *  An implementation of Ack Vectors for the DCCP protocol
- *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
+ *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  *
  *      This program is free software; you can redistribute it and/or modify it
  *      under the terms of the GNU General Public License as published by the
  *      Free Software Foundation; version 2 of the License;
  */
+
+#include "ackvec.h"
 #include "dccp.h"
+
+#include <linux/dccp.h>
+#include <linux/init.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 
+#include <net/sock.h>
+
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
+static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
 {
-	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
+	struct dccp_ackvec_record *avr =
+			kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 
-	if (av != NULL) {
-		av->av_buf_head	= av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
-		INIT_LIST_HEAD(&av->av_records);
-	}
-	return av;
+	if (avr != NULL)
+		INIT_LIST_HEAD(&avr->avr_node);
+
+	return avr;
 }
 
-static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
+static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
 {
-	struct dccp_ackvec_record *cur, *next;
-
-	list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
-		kmem_cache_free(dccp_ackvec_record_slab, cur);
-	INIT_LIST_HEAD(&av->av_records);
+	if (unlikely(avr == NULL))
+		return;
+	/* Check if deleting a linked record */
+	WARN_ON(!list_empty(&avr->avr_node));
+	kmem_cache_free(dccp_ackvec_record_slab, avr);
 }
 
-void dccp_ackvec_free(struct dccp_ackvec *av)
+static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
+				   struct dccp_ackvec_record *avr)
 {
-	if (likely(av != NULL)) {
-		dccp_ackvec_purge_records(av);
-		kmem_cache_free(dccp_ackvec_slab, av);
+	/*
+	 * AVRs are sorted by seqno. Since we are sending them in order, we
+	 * just add the AVR at the head of the list.
+	 * -sorbo.
+	 */
+	if (!list_empty(&av->av_records)) {
+		const struct dccp_ackvec_record *head =
+					list_entry(av->av_records.next,
+						   struct dccp_ackvec_record,
+						   avr_node);
+		BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
 	}
+
+	list_add(&avr->avr_node, &av->av_records);
 }
 
-/**
- * dccp_ackvec_update_records  -  Record information about sent Ack Vectors
- * @av:		Ack Vector records to update
- * @seqno:	Sequence number of the packet carrying the Ack Vector just sent
- * @nonce_sum:	The sum of all buffer nonces contained in the Ack Vector
- */
-int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
+int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	/* Figure out how many options do we need to represent the ackvec */
+	const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+	u16 len = av->av_vec_len + 2 * nr_opts, i;
+	u32 elapsed_time;
+	const unsigned char *tail, *from;
+	unsigned char *to;
 	struct dccp_ackvec_record *avr;
+	suseconds_t delta;
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+		return -1;
+
+	delta = ktime_us_delta(ktime_get_real(), av->av_time);
+	elapsed_time = delta / 10;
 
-	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+	if (elapsed_time != 0 &&
+	    dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
+		return -1;
+
+	avr = dccp_ackvec_record_new();
 	if (avr == NULL)
-		return -ENOBUFS;
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+	to   = skb_push(skb, len);
+	len  = av->av_vec_len;
+	from = av->av_buf + av->av_buf_head;
+	tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
+
+	for (i = 0; i < nr_opts; ++i) {
+		int copylen = len;
+
+		if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+			copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+
+		*to++ = DCCPO_ACK_VECTOR_0;
+		*to++ = copylen + 2;
+
+		/* Check if buf_head wraps */
+		if (from + copylen > tail) {
+			const u16 tailsize = tail - from;
+
+			memcpy(to, from, tailsize);
+			to	+= tailsize;
+			len	-= tailsize;
+			copylen	-= tailsize;
+			from	= av->av_buf;
+		}
+
+		memcpy(to, from, copylen);
+		from += copylen;
+		to   += copylen;
+		len  -= copylen;
+	}
 
-	avr->avr_ack_seqno  = seqno;
-	avr->avr_ack_ptr    = av->av_buf_head;
-	avr->avr_ack_ackno  = av->av_buf_ackno;
-	avr->avr_ack_nonce  = nonce_sum;
-	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
-	/*
-	 * When the buffer overflows, we keep no more than one record. This is
-	 * the simplest way of disambiguating sender-Acks dating from before the
-	 * overflow from sender-Acks which refer to after the overflow; a simple
-	 * solution is preferable here since we are handling an exception.
-	 */
-	if (av->av_overflow)
-		dccp_ackvec_purge_records(av);
 	/*
-	 * Since GSS is incremented for each packet, the list is automatically
-	 * arranged in descending order of @ack_seqno.
+	 *	From RFC 4340, A.2:
+	 *
+	 *	For each acknowledgement it sends, the HC-Receiver will add an
+	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
+	 *	sequence number it used for the ack packet; ack_ptr will equal
+	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+	 *	equal buf_nonce.
 	 */
-	list_add(&avr->avr_node, &av->av_records);
+	avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_ptr   = av->av_buf_head;
+	avr->avr_ack_ackno = av->av_buf_ackno;
+	avr->avr_ack_nonce = av->av_buf_nonce;
+	avr->avr_sent_len  = av->av_vec_len;
 
-	dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
+	dccp_ackvec_insert_avr(av, avr);
+
+	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
+		      "ack_ackno=%llu\n",
+		      dccp_role(sk), avr->avr_sent_len,
 		      (unsigned long long)avr->avr_ack_seqno,
-		      (unsigned long long)avr->avr_ack_ackno,
-		      avr->avr_ack_runlen);
+		      (unsigned long long)avr->avr_ack_ackno);
 	return 0;
 }
 
-static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
-						     const u64 ackno)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 {
-	struct dccp_ackvec_record *avr;
-	/*
-	 * Exploit that records are inserted in descending order of sequence
-	 * number, start with the oldest record first. If @ackno is `before'
-	 * the earliest ack_ackno, the packet is too old to be considered.
-	 */
-	list_for_each_entry_reverse(avr, av_list, avr_node) {
-		if (avr->avr_ack_seqno == ackno)
-			return avr;
-		if (before48(ackno, avr->avr_ack_seqno))
-			break;
+	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
+
+	if (av != NULL) {
+		av->av_buf_head	 = DCCP_MAX_ACKVEC_LEN - 1;
+		av->av_buf_ackno = UINT48_MAX + 1;
+		av->av_buf_nonce = 0;
+		av->av_time	 = ktime_set(0, 0);
+		av->av_vec_len	 = 0;
+		INIT_LIST_HEAD(&av->av_records);
 	}
-	return NULL;
+
+	return av;
 }
 
-/*
- * Buffer index and length computation using modulo-buffersize arithmetic.
- * Note that, as pointers move from right to left, head is `before' tail.
- */
-static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
+void dccp_ackvec_free(struct dccp_ackvec *av)
 {
-	return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
+	if (unlikely(av == NULL))
+		return;
+
+	if (!list_empty(&av->av_records)) {
+		struct dccp_ackvec_record *avr, *next;
+
+		list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
+			list_del_init(&avr->avr_node);
+			dccp_ackvec_record_delete(avr);
+		}
+	}
+
+	kmem_cache_free(dccp_ackvec_slab, av);
 }
 
-static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
+static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
+				   const u32 index)
 {
-	return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
+	return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
 }
 
-u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
+static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
+				 const u32 index)
 {
-	if (unlikely(av->av_overflow))
-		return DCCPAV_MAX_ACKVEC_LEN;
-	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
+	return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
 }
 
-/**
- * dccp_ackvec_update_old  -  Update previous state as per RFC 4340, 11.4.1
- * @av:		non-empty buffer to update
- * @distance:   negative or zero distance of @seqno from buf_ackno downward
- * @seqno:	the (old) sequence number whose record is to be updated
- * @state:	state in which packet carrying @seqno was received
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
  */
-static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
-				   u64 seqno, enum dccp_ackvec_states state)
+static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
+						 const unsigned int packets,
+						 const unsigned char state)
 {
-	u16 ptr = av->av_buf_head;
+	unsigned int gap;
+	long new_head;
 
-	BUG_ON(distance > 0);
-	if (unlikely(dccp_ackvec_is_empty(av)))
-		return;
+	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
+		return -ENOBUFS;
 
-	do {
-		u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
+	gap	 = packets - 1;
+	new_head = av->av_buf_head - packets;
 
-		if (distance + runlen >= 0) {
-			/*
-			 * Only update the state if packet has not been received
-			 * yet. This is OK as per the second table in RFC 4340,
-			 * 11.4.1; i.e. here we are using the following table:
-			 *                     RECEIVED
-			 *                      0   1   3
-			 *              S     +---+---+---+
-			 *              T   0 | 0 | 0 | 0 |
-			 *              O     +---+---+---+
-			 *              R   1 | 1 | 1 | 1 |
-			 *              E     +---+---+---+
-			 *              D   3 | 0 | 1 | 3 |
-			 *                    +---+---+---+
-			 * The "Not Received" state was set by reserve_seats().
-			 */
-			if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
-				av->av_buf[ptr] = state;
-			else
-				dccp_pr_debug("Not changing %llu state to %u\n",
-					      (unsigned long long)seqno, state);
-			break;
+	if (new_head < 0) {
+		if (gap > 0) {
+			memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			       gap + new_head + 1);
+			gap = -new_head;
 		}
+		new_head += DCCP_MAX_ACKVEC_LEN;
+	}
 
-		distance += runlen + 1;
-		ptr	  = __ackvec_idx_add(ptr, 1);
+	av->av_buf_head = new_head;
 
-	} while (ptr != av->av_buf_tail);
-}
+	if (gap > 0)
+		memset(av->av_buf + av->av_buf_head + 1,
+		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
 
-/* Mark @num entries after buf_head as "Not yet received". */
-static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
-{
-	u16 start = __ackvec_idx_add(av->av_buf_head, 1),
-	    len	  = DCCPAV_MAX_ACKVEC_LEN - start;
-
-	/* check for buffer wrap-around */
-	if (num > len) {
-		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
-		start = 0;
-		num  -= len;
-	}
-	if (num)
-		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
+	av->av_buf[av->av_buf_head] = state;
+	av->av_vec_len += packets;
+	return 0;
 }
 
-/**
- * dccp_ackvec_add_new  -  Record one or more new entries in Ack Vector buffer
- * @av:		 container of buffer to update (can be empty or non-empty)
- * @num_packets: number of packets to register (must be >= 1)
- * @seqno:	 sequence number of the first packet in @num_packets
- * @state:	 state in which packet carrying @seqno was received
+/*
+ * Implements the RFC 4340, Appendix A
  */
-static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
-				u64 seqno, enum dccp_ackvec_states state)
+int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+		    const u64 ackno, const u8 state)
 {
-	u32 num_cells = num_packets;
+	/*
+	 * Check at the right places if the buffer is full, if it is, tell the
+	 * caller to start dropping packets till the HC-Sender acks our ACK
+	 * vectors, when we will free up space in av_buf.
+	 *
+	 * We may well decide to do buffer compression, etc, but for now lets
+	 * just drop.
+	 *
+	 * From Appendix A.1.1 (`New Packets'):
+	 *
+	 *	Of course, the circular buffer may overflow, either when the
+	 *	HC-Sender is sending data at a very high rate, when the
+	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
+	 *	or when the HC-Sender is forgetting to acknowledge those acks
+	 *	(so the HC-Receiver is unable to clean up old state). In this
+	 *	case, the HC-Receiver should either compress the buffer (by
+	 *	increasing run lengths when possible), transfer its state to
+	 *	a larger buffer, or, as a last resort, drop all received
+	 *	packets, without processing them whatsoever, until its buffer
+	 *	shrinks again.
+	 */
 
-	if (num_packets > DCCPAV_BURST_THRESH) {
-		u32 lost_packets = num_packets - 1;
+	/* See if this is the first ackno being inserted */
+	if (av->av_vec_len == 0) {
+		av->av_buf[av->av_buf_head] = state;
+		av->av_vec_len = 1;
+	} else if (after48(ackno, av->av_buf_ackno)) {
+		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
 
-		DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
 		/*
-		 * We received 1 packet and have a loss of size "num_packets-1"
-		 * which we squeeze into num_cells-1 rather than reserving an
-		 * entire byte for each lost packet.
-		 * The reason is that the vector grows in O(burst_length); when
-		 * it grows too large there will no room left for the payload.
-		 * This is a trade-off: if a few packets out of the burst show
-		 * up later, their state will not be changed; it is simply too
-		 * costly to reshuffle/reallocate/copy the buffer each time.
-		 * Should such problems persist, we will need to switch to a
-		 * different underlying data structure.
+		 * Look if the state of this packet is the same as the
+		 * previous ackno and if so if we can bump the head len.
 		 */
-		for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
-			u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
-
-			av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
-			av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
+		if (delta == 1 &&
+		    dccp_ackvec_state(av, av->av_buf_head) == state &&
+		    dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
+			av->av_buf[av->av_buf_head]++;
+		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
+			return -ENOBUFS;
+	} else {
+		/*
+		 * A.1.2.  Old Packets
+		 *
+		 *	When a packet with Sequence Number S <= buf_ackno
+		 *	arrives, the HC-Receiver will scan the table for
+		 *	the byte corresponding to S. (Indexing structures
+		 *	could reduce the complexity of this scan.)
+		 */
+		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
+		u32 index = av->av_buf_head;
 
-			lost_packets -= len;
+		while (1) {
+			const u8 len = dccp_ackvec_len(av, index);
+			const u8 av_state = dccp_ackvec_state(av, index);
+			/*
+			 * valid packets not yet in av_buf have a reserved
+			 * entry, with a len equal to 0.
+			 */
+			if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+			    len == 0 && delta == 0) { /* Found our
+							 reserved seat! */
+				dccp_pr_debug("Found %llu reserved seat!\n",
+					      (unsigned long long)ackno);
+				av->av_buf[index] = state;
+				goto out;
+			}
+			/* len == 0 means one packet */
+			if (delta < len + 1)
+				goto out_duplicate;
+
+			delta -= len + 1;
+			if (++index == DCCP_MAX_ACKVEC_LEN)
+				index = 0;
 		}
 	}
 
-	if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
-		DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
-		av->av_overflow = true;
-	}
-
-	av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
-	if (av->av_overflow)
-		av->av_buf_tail = av->av_buf_head;
-
-	av->av_buf[av->av_buf_head] = state;
-	av->av_buf_ackno	    = seqno;
+	av->av_buf_ackno = ackno;
+	av->av_time = ktime_get_real();
+out:
+	return 0;
 
-	if (num_packets > 1)
-		dccp_ackvec_reserve_seats(av, num_packets - 1);
+out_duplicate:
+	/* Duplicate packet */
+	dccp_pr_debug("Received a dup or already considered lost "
+		      "packet: %llu\n", (unsigned long long)ackno);
+	return -EILSEQ;
 }
 
-/**
- * dccp_ackvec_input  -  Register incoming packet in the buffer
- */
-void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
+static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
+				     struct dccp_ackvec_record *avr)
 {
-	u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-	enum dccp_ackvec_states state = DCCPAV_RECEIVED;
+	struct dccp_ackvec_record *next;
 
-	if (dccp_ackvec_is_empty(av)) {
-		dccp_ackvec_add_new(av, 1, seqno, state);
-		av->av_tail_ackno = seqno;
+	/* sort out vector length */
+	if (av->av_buf_head <= avr->avr_ack_ptr)
+		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
+	else
+		av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
+				 av->av_buf_head + avr->avr_ack_ptr;
 
-	} else {
-		s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
-		u8 *current_head = av->av_buf + av->av_buf_head;
-
-		if (num_packets == 1 &&
-		    dccp_ackvec_state(current_head) == state &&
-		    dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
+	/* free records */
+	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
+		list_del_init(&avr->avr_node);
+		dccp_ackvec_record_delete(avr);
+	}
+}
 
-			*current_head   += 1;
-			av->av_buf_ackno = seqno;
+void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
+				 const u64 ackno)
+{
+	struct dccp_ackvec_record *avr;
 
-		} else if (num_packets > 0) {
-			dccp_ackvec_add_new(av, num_packets, seqno, state);
-		} else {
-			dccp_ackvec_update_old(av, num_packets, seqno, state);
-		}
+	/*
+	 * If we traverse backwards, it should be faster when we have large
+	 * windows. We will be receiving ACKs for stuff we sent a while back
+	 * -sorbo.
+	 */
+	list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
+		if (ackno == avr->avr_ack_seqno) {
+			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
+				      "ack_ackno=%llu, ACKED!\n",
+				      dccp_role(sk), 1,
+				      (unsigned long long)avr->avr_ack_seqno,
+				      (unsigned long long)avr->avr_ack_ackno);
+			dccp_ackvec_throw_record(av, avr);
+			break;
+		} else if (avr->avr_ack_seqno > ackno)
+			break; /* old news */
 	}
 }
 
-/**
- * dccp_ackvec_clear_state  -  Perform house-keeping / garbage-collection
- * This routine is called when the peer acknowledges the receipt of Ack Vectors
- * up to and including @ackno. While based on on section A.3 of RFC 4340, here
- * are additional precautions to prevent corrupted buffer state. In particular,
- * we use tail_ackno to identify outdated records; it always marks the earliest
- * packet of group (2) in 11.4.2.
- */
-void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
- {
-	struct dccp_ackvec_record *avr, *next;
-	u8 runlen_now, eff_runlen;
-	s64 delta;
+static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
+					    struct sock *sk, u64 *ackno,
+					    const unsigned char len,
+					    const unsigned char *vector)
+{
+	unsigned char i;
+	struct dccp_ackvec_record *avr;
 
-	avr = dccp_ackvec_lookup(&av->av_records, ackno);
-	if (avr == NULL)
+	/* Check if we actually sent an ACK vector */
+	if (list_empty(&av->av_records))
 		return;
-	/*
-	 * Deal with outdated acknowledgments: this arises when e.g. there are
-	 * several old records and the acks from the peer come in slowly. In
-	 * that case we may still have records that pre-date tail_ackno.
-	 */
-	delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
-	if (delta < 0)
-		goto free_records;
-	/*
-	 * Deal with overlapping Ack Vectors: don't subtract more than the
-	 * number of packets between tail_ackno and ack_ackno.
-	 */
-	eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
 
-	runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
+	i = len;
 	/*
-	 * The run length of Ack Vector cells does not decrease over time. If
-	 * the run length is the same as at the time the Ack Vector was sent, we
-	 * free the ack_ptr cell. That cell can however not be freed if the run
-	 * length has increased: in this case we need to move the tail pointer
-	 * backwards (towards higher indices), to its next-oldest neighbour.
+	 * XXX
+	 * I think it might be more efficient to work backwards. See comment on
+	 * rcv_ackno. -sorbo.
 	 */
-	if (runlen_now > eff_runlen) {
+	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
+	while (i--) {
+		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		u64 ackno_end_rl;
 
-		av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
-		av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
+		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
 
-		/* This move may not have cleared the overflow flag. */
-		if (av->av_overflow)
-			av->av_overflow = (av->av_buf_head == av->av_buf_tail);
-	} else {
-		av->av_buf_tail	= avr->avr_ack_ptr;
 		/*
-		 * We have made sure that avr points to a valid cell within the
-		 * buffer. This cell is either older than head, or equals head
-		 * (empty buffer): in both cases we no longer have any overflow.
+		 * If our AVR sequence number is greater than the ack, go
+		 * forward in the AVR list until it is not so.
 		 */
-		av->av_overflow	= 0;
-	}
-
-	/*
-	 * The peer has acknowledged up to and including ack_ackno. Hence the
-	 * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
-	 */
-	av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
+		list_for_each_entry_from(avr, &av->av_records, avr_node) {
+			if (!after48(avr->avr_ack_seqno, *ackno))
+				goto found;
+		}
+		/* End of the av_records list, not found, exit */
+		break;
+found:
+		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
+			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
+			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+				dccp_pr_debug("%s ACK vector 0, len=%d, "
+					      "ack_seqno=%llu, ack_ackno=%llu, "
+					      "ACKED!\n",
+					      dccp_role(sk), len,
+					      (unsigned long long)
+					      avr->avr_ack_seqno,
+					      (unsigned long long)
+					      avr->avr_ack_ackno);
+				dccp_ackvec_throw_record(av, avr);
+				break;
+			}
+			/*
+			 * If it wasn't received, continue scanning... we might
+			 * find another one.
+			 */
+		}
 
-free_records:
-	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del(&avr->avr_node);
-		kmem_cache_free(dccp_ackvec_record_slab, avr);
+		dccp_set_seqno(ackno, ackno_end_rl - 1);
+		++vector;
 	}
 }
 
-/*
- *	Routines to keep track of Ack Vectors received in an skb
- */
-int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
+int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+		      u64 *ackno, const u8 opt, const u8 *value, const u8 len)
 {
-	struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
-
-	if (new == NULL)
-		return -ENOBUFS;
-	new->vec   = vec;
-	new->len   = len;
-	new->nonce = nonce;
+	if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+		return -1;
 
-	list_add_tail(&new->node, head);
+	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
+	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
+					ackno, len, value);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
-
-void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
-{
-	struct dccp_ackvec_parsed *cur, *next;
-
-	list_for_each_entry_safe(cur, next, parsed_chunks, node)
-		kfree(cur);
-	INIT_LIST_HEAD(parsed_chunks);
-}
-EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
 
 int __init dccp_ackvec_init(void)
 {
@@ -379,9 +449,10 @@ int __init dccp_ackvec_init(void)
 	if (dccp_ackvec_slab == NULL)
 		goto out_err;
 
-	dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
-					     sizeof(struct dccp_ackvec_record),
-					     0, SLAB_HWCACHE_ALIGN, NULL);
+	dccp_ackvec_record_slab =
+			kmem_cache_create("dccp_ackvec_record",
+					  sizeof(struct dccp_ackvec_record),
+					  0, SLAB_HWCACHE_ALIGN, NULL);
 	if (dccp_ackvec_record_slab == NULL)
 		goto out_destroy_slab;
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 6cdca79a99f..bcb64fb4ace 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,134 +3,156 @@
 /*
  *  net/dccp/ackvec.h
  *
- *  An implementation of Ack Vectors for the DCCP protocol
- *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
+ *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
  */
 
-#include <linux/dccp.h>
 #include <linux/compiler.h>
+#include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/*
- * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
- * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
- * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
- * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
- * The maximum value is bounded by the u16 types for indices and functions.
- */
-#define DCCPAV_NUM_ACKVECS	2
-#define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
-
-/* Estimated minimum average Ack Vector length - used for updating MPS */
-#define DCCPAV_MIN_OPTLEN	16
-
-/* Threshold for coping with large bursts of losses */
-#define DCCPAV_BURST_THRESH	(DCCPAV_MAX_ACKVEC_LEN / 8)
-
-enum dccp_ackvec_states {
-	DCCPAV_RECEIVED =	0x00,
-	DCCPAV_ECN_MARKED =	0x40,
-	DCCPAV_RESERVED =	0x80,
-	DCCPAV_NOT_RECEIVED =	0xC0
-};
-#define DCCPAV_MAX_RUNLEN	0x3F
+/* Read about the ECN nonce to see why it is 253 */
+#define DCCP_MAX_ACKVEC_OPT_LEN 253
+/* We can spread an ack vector across multiple options */
+#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
 
-static inline u8 dccp_ackvec_runlen(const u8 *cell)
-{
-	return *cell & DCCPAV_MAX_RUNLEN;
-}
+#define DCCP_ACKVEC_STATE_RECEIVED	0
+#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
+#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
 
-static inline u8 dccp_ackvec_state(const u8 *cell)
-{
-	return *cell & ~DCCPAV_MAX_RUNLEN;
-}
+#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
+#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
 
-/** struct dccp_ackvec - Ack Vector main data structure
+/** struct dccp_ackvec - ack vector
+ *
+ * This data structure is the one defined in RFC 4340, Appendix A.
  *
- * This implements a fixed-size circular buffer within an array and is largely
- * based on Appendix A of RFC 4340.
+ * @av_buf_head - circular buffer head
+ * @av_buf_tail - circular buffer tail
+ * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ *		       buffer (i.e. %av_buf_head)
+ * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ * 		       by the buffer with State 0
  *
- * @av_buf:	   circular buffer storage area
- * @av_buf_head:   head index; begin of live portion in @av_buf
- * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
- * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
- * @av_tail_ackno: lowest  seqno of acknowledgeable packet recorded in @av_buf
- * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
- *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
- * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
- * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @av_records - list of dccp_ackvec_record
+ * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @av_time - the time in usecs
+ * @av_buf - circular buffer of acknowledgeable packets
  */
 struct dccp_ackvec {
-	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
-	u16			av_buf_head;
-	u16			av_buf_tail;
-	u64			av_buf_ackno:48;
-	u64			av_tail_ackno:48;
-	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
-	u8			av_overflow:1;
+	u64			av_buf_ackno;
 	struct list_head	av_records;
+	ktime_t			av_time;
+	u16			av_buf_head;
+	u16			av_vec_len;
+	u8			av_buf_nonce;
+	u8			av_ack_nonce;
+	u8			av_buf[DCCP_MAX_ACKVEC_LEN];
 };
 
-/** struct dccp_ackvec_record - Records information about sent Ack Vectors
+/** struct dccp_ackvec_record - ack vector record
  *
- * These list entries define the additional information which the HC-Receiver
- * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
+ * ACK vector record as defined in Appendix A of spec.
  *
- * @avr_node:	    the list node in @av_records
- * @avr_ack_seqno:  sequence number of the packet the Ack Vector was sent on
- * @avr_ack_ackno:  the Ack number that this record/Ack Vector refers to
- * @avr_ack_ptr:    pointer into @av_buf where this record starts
- * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
- * @avr_ack_nonce:  the sum of @av_buf_nonce's at the time this record was sent
+ * The list is sorted by avr_ack_seqno
  *
- * The list as a whole is sorted in descending order by @avr_ack_seqno.
+ * @avr_node - node in av_records
+ * @avr_ack_seqno - sequence number of the packet this record was sent on
+ * @avr_ack_ackno - sequence number being acknowledged
+ * @avr_ack_ptr - pointer into av_buf where this record starts
+ * @avr_ack_nonce - av_ack_nonce at the time this record was sent
+ * @avr_sent_len - lenght of the record in av_buf
  */
 struct dccp_ackvec_record {
 	struct list_head avr_node;
-	u64		 avr_ack_seqno:48;
-	u64		 avr_ack_ackno:48;
+	u64		 avr_ack_seqno;
+	u64		 avr_ack_ackno;
 	u16		 avr_ack_ptr;
-	u8		 avr_ack_runlen;
-	u8		 avr_ack_nonce:1;
+	u16		 avr_sent_len;
+	u8		 avr_ack_nonce;
 };
 
-extern int  dccp_ackvec_init(void);
+struct sock;
+struct sk_buff;
+
+#ifdef CONFIG_IP_DCCP_ACKVEC
+extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
 extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
 extern void dccp_ackvec_free(struct dccp_ackvec *av);
 
-extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
-extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
-extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
-extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
+extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+			   const u64 ackno, const u8 state);
+
+extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					struct sock *sk, const u64 ackno);
+extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+			     u64 *ackno, const u8 opt,
+			     const u8 *value, const u8 len);
 
-static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
+extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return av->av_vec_len;
+}
+#else /* CONFIG_IP_DCCP_ACKVEC */
+static inline int dccp_ackvec_init(void)
 {
-	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
+	return 0;
 }
 
-/**
- * struct dccp_ackvec_parsed  -  Record offsets of Ack Vectors in skb
- * @vec:	start of vector (offset into skb)
- * @len:	length of @vec
- * @nonce:	whether @vec had an ECN nonce of 0 or 1
- * @node:	FIFO - arranged in descending order of ack_ackno
- * This structure is used by CCIDs to access Ack Vectors in a received skb.
- */
-struct dccp_ackvec_parsed {
-	u8		 *vec,
-			 len,
-			 nonce:1;
-	struct list_head node;
-};
+static inline void dccp_ackvec_exit(void)
+{
+}
+
+static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
+{
+	return NULL;
+}
+
+static inline void dccp_ackvec_free(struct dccp_ackvec *av)
+{
+}
+
+static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+				  const u64 ackno, const u8 state)
+{
+	return -1;
+}
 
-extern int dccp_ackvec_parsed_add(struct list_head *head,
-				  u8 *vec, u8 len, u8 nonce);
-extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
+static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					       struct sock *sk, const u64 ackno)
+{
+}
+
+static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+				    const u64 *ackno, const u8 opt,
+				    const u8 *value, const u8 len)
+{
+	return -1;
+}
+
+static inline int dccp_insert_option_ackvec(const struct sock *sk,
+					    const struct sk_buff *skb)
+{
+	return -1;
+}
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return 0;
+}
+#endif /* CONFIG_IP_DCCP_ACKVEC */
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index e3fb52b4f5c..4809753d12a 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,13 +13,6 @@
 
 #include "ccid.h"
 
-static u8 builtin_ccids[] = {
-	DCCPC_CCID2,		/* CCID2 is supported by default */
-#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
-	DCCPC_CCID3,
-#endif
-};
-
 static struct ccid_operations *ccids[CCID_MAX];
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 static atomic_t ccids_lockct = ATOMIC_INIT(0);
@@ -93,47 +86,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
-/* check that up to @array_len members in @ccid_array are supported */
-bool ccid_support_check(u8 const *ccid_array, u8 array_len)
-{
-	u8 i, j, found;
-
-	for (i = 0, found = 0; i < array_len; i++, found = 0) {
-		for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
-			found = (ccid_array[i] == builtin_ccids[j]);
-		if (!found)
-			return false;
-	}
-	return true;
-}
-
-/**
- * ccid_get_builtin_ccids  -  Provide copy of `builtin' CCID array
- * @ccid_array: pointer to copy into
- * @array_len: value to return length into
- * This function allocates memory - caller must see that it is freed after use.
- */
-int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
-{
-	*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
-	if (*ccid_array == NULL)
-		return -ENOBUFS;
-	*array_len = ARRAY_SIZE(builtin_ccids);
-	return 0;
-}
-
-int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
-				    char __user *optval, int __user *optlen)
-{
-	if (len < sizeof(builtin_ccids))
-		return -EINVAL;
-
-	if (put_user(sizeof(builtin_ccids), optlen) ||
-	    copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
-		return -EFAULT;
-	return 0;
-}
-
 int ccid_register(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
@@ -196,41 +148,22 @@ int ccid_unregister(struct ccid_operations *ccid_ops)
 
 EXPORT_SYMBOL_GPL(ccid_unregister);
 
-/**
- * ccid_request_module  -  Pre-load CCID module for later use
- * This should be called only from process context (e.g. during connection
- * setup) and is necessary for later calls to ccid_new (typically in software
- * interrupt), so that it has the modules available when they are needed.
- */
-static int ccid_request_module(u8 id)
-{
-	if (!in_atomic()) {
-		ccids_read_lock();
-		if (ccids[id] == NULL) {
-			ccids_read_unlock();
-			return request_module("net-dccp-ccid-%d", id);
-		}
-		ccids_read_unlock();
-	}
-	return 0;
-}
-
-int ccid_request_modules(u8 const *ccid_array, u8 array_len)
-{
-#ifdef CONFIG_KMOD
-	while (array_len--)
-		if (ccid_request_module(ccid_array[array_len]))
-			return -1;
-#endif
-	return 0;
-}
-
 struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 {
 	struct ccid_operations *ccid_ops;
 	struct ccid *ccid = NULL;
 
 	ccids_read_lock();
+#ifdef CONFIG_KMOD
+	if (ccids[id] == NULL) {
+		/* We only try to load if in process context */
+		ccids_read_unlock();
+		if (gfp & GFP_ATOMIC)
+			goto out;
+		request_module("net-dccp-ccid-%d", id);
+		ccids_read_lock();
+	}
+#endif
 	ccid_ops = ccids[id];
 	if (ccid_ops == NULL)
 		goto out_unlock;
@@ -272,6 +205,20 @@ out_module_put:
 
 EXPORT_SYMBOL_GPL(ccid_new);
 
+struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
+{
+	return ccid_new(id, sk, 1, gfp);
+}
+
+EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
+
+struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk,  gfp_t gfp)
+{
+	return ccid_new(id, sk, 0, gfp);
+}
+
+EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
+
 static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
 {
 	struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index d27054ba215..fdeae7b5731 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -60,18 +60,22 @@ struct ccid_operations {
 	void		(*ccid_hc_tx_exit)(struct sock *sk);
 	void		(*ccid_hc_rx_packet_recv)(struct sock *sk,
 						  struct sk_buff *skb);
-	int		(*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
-						    u8 opt, u8 *val, u8 len);
+	int		(*ccid_hc_rx_parse_options)(struct sock *sk,
+						    unsigned char option,
+						    unsigned char len, u16 idx,
+						    unsigned char* value);
 	int		(*ccid_hc_rx_insert_options)(struct sock *sk,
 						     struct sk_buff *skb);
 	void		(*ccid_hc_tx_packet_recv)(struct sock *sk,
 						  struct sk_buff *skb);
-	int		(*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
-						    u8 opt, u8 *val, u8 len);
+	int		(*ccid_hc_tx_parse_options)(struct sock *sk,
+						    unsigned char option,
+						    unsigned char len, u16 idx,
+						    unsigned char* value);
 	int		(*ccid_hc_tx_send_packet)(struct sock *sk,
 						  struct sk_buff *skb);
 	void		(*ccid_hc_tx_packet_sent)(struct sock *sk,
-						  unsigned int len);
+						  int more, unsigned int len);
 	void		(*ccid_hc_rx_get_info)(struct sock *sk,
 					       struct tcp_info *info);
 	void		(*ccid_hc_tx_get_info)(struct sock *sk,
@@ -99,78 +103,31 @@ static inline void *ccid_priv(const struct ccid *ccid)
 	return (void *)ccid->ccid_priv;
 }
 
-extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
-extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
-extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
-					  char __user *, int __user *);
-
-extern int    ccid_request_modules(u8 const *ccid_array, u8 array_len);
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
-static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
-{
-	struct ccid *ccid = dp->dccps_hc_rx_ccid;
-
-	if (ccid == NULL || ccid->ccid_ops == NULL)
-		return -1;
-	return ccid->ccid_ops->ccid_id;
-}
-
-static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
-{
-	struct ccid *ccid = dp->dccps_hc_tx_ccid;
-
-	if (ccid == NULL || ccid->ccid_ops == NULL)
-		return -1;
-	return ccid->ccid_ops->ccid_id;
-}
+extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
+				   gfp_t gfp);
+extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
+				   gfp_t gfp);
 
 extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
 extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
 
-/*
- * Congestion control of queued data packets via CCID decision.
- *
- * The TX CCID performs its congestion-control by indicating whether and when a
- * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
- * The following modes are supported via the symbolic constants below:
- * - timer-based pacing    (CCID returns a delay value in milliseconds);
- * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
- */
-
-enum ccid_dequeueing_decision {
-	CCID_PACKET_SEND_AT_ONCE =	 0x00000,  /* "green light": no delay */
-	CCID_PACKET_DELAY_MAX =		 0x0FFFF,  /* maximum delay in msecs  */
-	CCID_PACKET_DELAY =		 0x10000,  /* CCID msec-delay mode */
-	CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000,  /* CCID autonomous mode */
-	CCID_PACKET_ERR =		 0xF0000,  /* error condition */
-};
-
-static inline int ccid_packet_dequeue_eval(const int return_code)
-{
-	if (return_code < 0)
-		return CCID_PACKET_ERR;
-	if (return_code == 0)
-		return CCID_PACKET_SEND_AT_ONCE;
-	if (return_code <= CCID_PACKET_DELAY_MAX)
-		return CCID_PACKET_DELAY;
-	return return_code;
-}
-
 static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
 					 struct sk_buff *skb)
 {
+	int rc = 0;
 	if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
-		return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
-	return CCID_PACKET_SEND_AT_ONCE;
+		rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
+	return rc;
 }
 
 static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
-					  unsigned int len)
+					  int more, unsigned int len)
 {
 	if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
-		ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
+		ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
 }
 
 static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
@@ -187,31 +144,27 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
 		ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
 }
 
-/**
- * ccid_hc_tx_parse_options  -  Parse CCID-specific options sent by the receiver
- * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
- * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
- * @val: value of @opt
- * @len: length of @val in bytes
- */
 static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
-					   u8 pkt, u8 opt, u8 *val, u8 len)
+					   unsigned char option,
+					   unsigned char len, u16 idx,
+					   unsigned char* value)
 {
-	if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
-		return 0;
-	return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
+	int rc = 0;
+	if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
+		rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
+						    value);
+	return rc;
 }
 
-/**
- * ccid_hc_rx_parse_options  -  Parse CCID-specific options sent by the sender
- * Arguments are analogous to ccid_hc_tx_parse_options()
- */
 static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
-					   u8 pkt, u8 opt, u8 *val, u8 len)
+					   unsigned char option,
+					   unsigned char len, u16 idx,
+					   unsigned char* value)
 {
-	if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
-		return 0;
-	return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
+	int rc = 0;
+	if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
+		rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+	return rc;
 }
 
 static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index fb168be2cb4..12275943eab 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,8 +1,10 @@
 menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
 
 config IP_DCCP_CCID2
-	tristate "CCID2 (TCP-Like)"
+	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
 	def_tristate IP_DCCP
+	select IP_DCCP_ACKVEC
 	---help---
 	  CCID 2, TCP-like Congestion Control, denotes Additive Increase,
 	  Multiplicative Decrease (AIMD) congestion control with behavior
@@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG
 	    If in doubt, say N.
 
 config IP_DCCP_CCID3
-	tristate "CCID3 (TCP-Friendly)"
+	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
 	def_tristate IP_DCCP
 	select IP_DCCP_TFRC_LIB
 	---help---
@@ -62,9 +64,9 @@ config IP_DCCP_CCID3
 
 	  If in doubt, say M.
 
-if IP_DCCP_CCID3
 config IP_DCCP_CCID3_DEBUG
 	  bool "CCID3 debugging messages"
+	  depends on IP_DCCP_CCID3
 	  ---help---
 	    Enable CCID3-specific debugging messages.
 
@@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG
 
 	    If in doubt, say N.
 
-choice
-	  prompt "Select method for measuring the packet size s"
-	  default IP_DCCP_CCID3_MEASURE_S_AS_MPS
-
-config IP_DCCP_CCID3_MEASURE_S_AS_MPS
-	  bool "Always use MPS in place of s"
-	  ---help---
-	    This use is recommended as it is consistent with the initialisation
-	    of X and suggested when s varies (rfc3448bis, (1) in section 4.1).
-config IP_DCCP_CCID3_MEASURE_S_AS_AVG
-	  bool "Use moving average"
-	  ---help---
-	    An alternative way of tracking s, also supported by rfc3448bis.
-	    This used to be the default for CCID-3 in previous kernels.
-config IP_DCCP_CCID3_MEASURE_S_AS_MAX
-	  bool "Track the maximum payload length"
-	  ---help---
-	    An experimental method based on tracking the maximum packet size.
-endchoice
-
 config IP_DCCP_CCID3_RTO
 	  int "Use higher bound for nofeedback timer"
 	  default 100
+	  depends on IP_DCCP_CCID3 && EXPERIMENTAL
 	  ---help---
 	    Use higher lower bound for nofeedback timer expiration.
 
@@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO
 	    The purpose of the nofeedback timer is to slow DCCP down when there
 	    is serious network congestion: experimenting with larger values should
 	    therefore not be performed on WANs.
-endif	# IP_DCCP_CCID3
 
 config IP_DCCP_TFRC_LIB
 	tristate
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index fa713227c66..9a430734530 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,7 +25,7 @@
 /*
  * This implementation should follow RFC 4341
  */
-#include "../feat.h"
+
 #include "../ccid.h"
 #include "../dccp.h"
 #include "ccid2.h"
@@ -34,8 +34,51 @@
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 static int ccid2_debug;
 #define ccid2_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid2_debug, format, ##a)
+
+static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
+{
+	int len = 0;
+	int pipe = 0;
+	struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
+
+	/* there is data in the chain */
+	if (seqp != hctx->ccid2hctx_seqt) {
+		seqp = seqp->ccid2s_prev;
+		len++;
+		if (!seqp->ccid2s_acked)
+			pipe++;
+
+		while (seqp != hctx->ccid2hctx_seqt) {
+			struct ccid2_seq *prev = seqp->ccid2s_prev;
+
+			len++;
+			if (!prev->ccid2s_acked)
+				pipe++;
+
+			/* packets are sent sequentially */
+			BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
+						prev->ccid2s_seq ) >= 0);
+			BUG_ON(time_before(seqp->ccid2s_sent,
+					   prev->ccid2s_sent));
+
+			seqp = prev;
+		}
+	}
+
+	BUG_ON(pipe != hctx->ccid2hctx_pipe);
+	ccid2_pr_debug("len of chain=%d\n", len);
+
+	do {
+		seqp = seqp->ccid2s_prev;
+		len++;
+	} while (seqp != hctx->ccid2hctx_seqh);
+
+	ccid2_pr_debug("total len=%d\n", len);
+	BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
+}
 #else
 #define ccid2_pr_debug(format, a...)
+#define ccid2_hc_tx_check_sanity(hctx)
 #endif
 
 static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
@@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
 	int i;
 
 	/* check if we have space to preserve the pointer to the buffer */
-	if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *))
+	if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
+					sizeof(struct ccid2_seq*)))
 		return -ENOMEM;
 
 	/* allocate buffer and initialize linked list */
@@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
 	seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
 
 	/* This is the first allocation.  Initiate the head and tail.  */
-	if (hctx->seqbufc == 0)
-		hctx->seqh = hctx->seqt = seqp;
+	if (hctx->ccid2hctx_seqbufc == 0)
+		hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
 	else {
 		/* link the existing list with the one we just created */
-		hctx->seqh->ccid2s_next = seqp;
-		seqp->ccid2s_prev = hctx->seqh;
+		hctx->ccid2hctx_seqh->ccid2s_next = seqp;
+		seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
 
-		hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
-		seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt;
+		hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
+		seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
 	}
 
 	/* store the original pointer to the buffer so we can free it */
-	hctx->seqbuf[hctx->seqbufc] = seqp;
-	hctx->seqbufc++;
+	hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
+	hctx->ccid2hctx_seqbufc++;
 
 	return 0;
 }
 
 static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
-	if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
-		return CCID_PACKET_WILL_DEQUEUE_LATER;
-	return CCID_PACKET_SEND_AT_ONCE;
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+	if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
+		return 0;
+
+	return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
 
 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2);
+	u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
 
 	/*
 	 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
@@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 		DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
 		val = max_ratio;
 	}
-	if (val > DCCPF_ACK_RATIO_MAX)
-		val = DCCPF_ACK_RATIO_MAX;
+	if (val > 0xFFFF)		/* RFC 4340, 11.3 */
+		val = 0xFFFF;
 
 	if (val == dp->dccps_l_ack_ratio)
 		return;
@@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 	dp->dccps_l_ack_ratio = val;
 }
 
+static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
+{
+	ccid2_pr_debug("change SRTT to %ld\n", val);
+	hctx->ccid2hctx_srtt = val;
+}
+
+static void ccid2_start_rto_timer(struct sock *sk);
+
 static void ccid2_hc_tx_rto_expire(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-	const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
+	long s;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
-		sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5);
+		sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
+			       jiffies + HZ / 5);
 		goto out;
 	}
 
 	ccid2_pr_debug("RTO_EXPIRE\n");
 
+	ccid2_hc_tx_check_sanity(hctx);
+
 	/* back-off timer */
-	hctx->rto <<= 1;
-	if (hctx->rto > DCCP_RTO_MAX)
-		hctx->rto = DCCP_RTO_MAX;
+	hctx->ccid2hctx_rto <<= 1;
+
+	s = hctx->ccid2hctx_rto / HZ;
+	if (s > 60)
+		hctx->ccid2hctx_rto = 60 * HZ;
+
+	ccid2_start_rto_timer(sk);
 
 	/* adjust pipe, cwnd etc */
-	hctx->ssthresh = hctx->cwnd / 2;
-	if (hctx->ssthresh < 2)
-		hctx->ssthresh = 2;
-	hctx->cwnd = 1;
-	hctx->pipe = 0;
+	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
+	if (hctx->ccid2hctx_ssthresh < 2)
+		hctx->ccid2hctx_ssthresh = 2;
+	hctx->ccid2hctx_cwnd	 = 1;
+	hctx->ccid2hctx_pipe	 = 0;
 
 	/* clear state about stuff we sent */
-	hctx->seqt = hctx->seqh;
-	hctx->packets_acked = 0;
+	hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
+	hctx->ccid2hctx_packets_acked = 0;
 
 	/* clear ack ratio state. */
-	hctx->rpseq    = 0;
-	hctx->rpdupack = -1;
+	hctx->ccid2hctx_rpseq	 = 0;
+	hctx->ccid2hctx_rpdupack = -1;
 	ccid2_change_l_ack_ratio(sk, 1);
-
-	/* if we were blocked before, we may now send cwnd=1 packet */
-	if (sender_was_blocked)
-		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
-	/* restart backed-off timer */
-	sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
+	ccid2_hc_tx_check_sanity(hctx);
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
 
-static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
+static void ccid2_start_rto_timer(struct sock *sk)
+{
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+	ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
+
+	BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
+	sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
+		       jiffies + hctx->ccid2hctx_rto);
+}
+
+static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	struct ccid2_seq *next;
 
-	hctx->pipe++;
+	hctx->ccid2hctx_pipe++;
 
-	hctx->seqh->ccid2s_seq   = dp->dccps_gss;
-	hctx->seqh->ccid2s_acked = 0;
-	hctx->seqh->ccid2s_sent  = jiffies;
+	hctx->ccid2hctx_seqh->ccid2s_seq   = dp->dccps_gss;
+	hctx->ccid2hctx_seqh->ccid2s_acked = 0;
+	hctx->ccid2hctx_seqh->ccid2s_sent  = jiffies;
 
-	next = hctx->seqh->ccid2s_next;
+	next = hctx->ccid2hctx_seqh->ccid2s_next;
 	/* check if we need to alloc more space */
-	if (next == hctx->seqt) {
+	if (next == hctx->ccid2hctx_seqt) {
 		if (ccid2_hc_tx_alloc_seq(hctx)) {
 			DCCP_CRIT("packet history - out of memory!");
 			/* FIXME: find a more graceful way to bail out */
 			return;
 		}
-		next = hctx->seqh->ccid2s_next;
-		BUG_ON(next == hctx->seqt);
+		next = hctx->ccid2hctx_seqh->ccid2s_next;
+		BUG_ON(next == hctx->ccid2hctx_seqt);
 	}
-	hctx->seqh = next;
+	hctx->ccid2hctx_seqh = next;
 
-	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe);
+	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
+		       hctx->ccid2hctx_pipe);
 
 	/*
 	 * FIXME: The code below is broken and the variables have been removed
@@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 	 */
 #if 0
 	/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
-	hctx->arsent++;
+	hctx->ccid2hctx_arsent++;
 	/* We had an ack loss in this window... */
-	if (hctx->ackloss) {
-		if (hctx->arsent >= hctx->cwnd) {
-			hctx->arsent  = 0;
-			hctx->ackloss = 0;
+	if (hctx->ccid2hctx_ackloss) {
+		if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
+			hctx->ccid2hctx_arsent	= 0;
+			hctx->ccid2hctx_ackloss	= 0;
 		}
 	} else {
 		/* No acks lost up to now... */
@@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 			int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
 				    dp->dccps_l_ack_ratio;
 
-			denom = hctx->cwnd * hctx->cwnd / denom;
+			denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
 
-			if (hctx->arsent >= denom) {
+			if (hctx->ccid2hctx_arsent >= denom) {
 				ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
-				hctx->arsent = 0;
+				hctx->ccid2hctx_arsent = 0;
 			}
 		} else {
 			/* we can't increase ack ratio further [1] */
-			hctx->arsent = 0; /* or maybe set it to cwnd*/
+			hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
 		}
 	}
 #endif
 
 	/* setup RTO timer */
-	if (!timer_pending(&hctx->rtotimer))
-		sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
+	if (!timer_pending(&hctx->ccid2hctx_rtotimer))
+		ccid2_start_rto_timer(sk);
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	do {
-		struct ccid2_seq *seqp = hctx->seqt;
+		struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
 
-		while (seqp != hctx->seqh) {
+		while (seqp != hctx->ccid2hctx_seqh) {
 			ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
 				       (unsigned long long)seqp->ccid2s_seq,
 				       seqp->ccid2s_acked, seqp->ccid2s_sent);
@@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 		}
 	} while (0);
 	ccid2_pr_debug("=========\n");
+	ccid2_hc_tx_check_sanity(hctx);
 #endif
 }
 
-/**
- * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
- * This code is almost identical with TCP's tcp_rtt_estimator(), since
- * - it has a higher sampling frequency (recommended by RFC 1323),
- * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
- * - it is simple (cf. more complex proposals such as Eifel timer or research
- *   which suggests that the gain should be set according to window size),
- * - in tests it was found to work well with CCID2 [gerrit].
+/* XXX Lame code duplication!
+ * returns -1 if none was found.
+ * else returns the next offset to use in the function call.
  */
-static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
+static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
+			   unsigned char **vec, unsigned char *veclen)
 {
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-	long m = mrtt ? : 1;
-
-	if (hctx->srtt == 0) {
-		/* First measurement m */
-		hctx->srtt = m << 3;
-		hctx->mdev = m << 1;
-
-		hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev);
-		hctx->rttvar   = hctx->mdev_max;
-		hctx->rtt_seq  = dccp_sk(sk)->dccps_gss;
-	} else {
-		/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
-		m -= (hctx->srtt >> 3);
-		hctx->srtt += m;
-
-		/* Similarly, update scaled mdev with regard to |m| */
-		if (m < 0) {
-			m = -m;
-			m -= (hctx->mdev >> 2);
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+	unsigned char *opt_ptr;
+	const unsigned char *opt_end = (unsigned char *)dh +
+					(dh->dccph_doff * 4);
+	unsigned char opt, len;
+	unsigned char *value;
+
+	BUG_ON(offset < 0);
+	options += offset;
+	opt_ptr = options;
+	if (opt_ptr >= opt_end)
+		return -1;
+
+	while (opt_ptr != opt_end) {
+		opt   = *opt_ptr++;
+		len   = 0;
+		value = NULL;
+
+		/* Check if this isn't a single byte option */
+		if (opt > DCCPO_MAX_RESERVED) {
+			if (opt_ptr == opt_end)
+				goto out_invalid_option;
+
+			len = *opt_ptr++;
+			if (len < 3)
+				goto out_invalid_option;
 			/*
-			 * This neutralises RTO increase when RTT < SRTT - mdev
-			 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
-			 * in Linux TCP", USENIX 2002, pp. 49-62).
+			 * Remove the type and len fields, leaving
+			 * just the value size
 			 */
-			if (m > 0)
-				m >>= 3;
-		} else {
-			m -= (hctx->mdev >> 2);
-		}
-		hctx->mdev += m;
+			len     -= 2;
+			value   = opt_ptr;
+			opt_ptr += len;
 
-		if (hctx->mdev > hctx->mdev_max) {
-			hctx->mdev_max = hctx->mdev;
-			if (hctx->mdev_max > hctx->rttvar)
-				hctx->rttvar = hctx->mdev_max;
+			if (opt_ptr > opt_end)
+				goto out_invalid_option;
 		}
 
-		/*
-		 * Decay RTTVAR at most once per flight, exploiting that
-		 *  1) pipe <= cwnd <= Sequence_Window = W  (RFC 4340, 7.5.2)
-		 *  2) AWL = GSS-W+1 <= GAR <= GSS          (RFC 4340, 7.5.1)
-		 * GAR is a useful bound for FlightSize = pipe, AWL is probably
-		 * too low as it over-estimates pipe.
-		 */
-		if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) {
-			if (hctx->mdev_max < hctx->rttvar)
-				hctx->rttvar -= (hctx->rttvar -
-						 hctx->mdev_max) >> 2;
-			hctx->rtt_seq  = dccp_sk(sk)->dccps_gss;
-			hctx->mdev_max = TCP_RTO_MIN;
+		switch (opt) {
+		case DCCPO_ACK_VECTOR_0:
+		case DCCPO_ACK_VECTOR_1:
+			*vec	= value;
+			*veclen = len;
+			return offset + (opt_ptr - options);
 		}
 	}
 
-	/*
-	 * Set RTO from SRTT and RTTVAR
-	 * Clock granularity is ignored since the minimum error for RTTVAR is
-	 * clamped to 50msec (corresponding to HZ=20). This leads to a minimum
-	 * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP
-	 * does not retransmit data, DCCP does not require TCP's recommended
-	 * minimum timeout of one second".
-	 */
-	hctx->rto = (hctx->srtt >> 3) + hctx->rttvar;
+	return -1;
 
-	if (hctx->rto > DCCP_RTO_MAX)
-		hctx->rto = DCCP_RTO_MAX;
+out_invalid_option:
+	DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
+	return -1;
 }
 
-static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
-			  unsigned int *maxincr)
+static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (hctx->cwnd < hctx->ssthresh) {
-		if (*maxincr > 0 && ++hctx->packets_acked == 2) {
-			hctx->cwnd += 1;
-			*maxincr   -= 1;
-			hctx->packets_acked = 0;
-		}
-	} else if (++hctx->packets_acked >= hctx->cwnd) {
-			hctx->cwnd += 1;
-			hctx->packets_acked = 0;
-	}
-	/*
-	 * FIXME: RTT is sampled several times per acknowledgment (for each
-	 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
-	 * This causes the RTT to be over-estimated, since the older entries
-	 * in the Ack Vector have earlier sending times.
-	 * The cleanest solution is to not use the ccid2s_sent field at all
-	 * and instead use DCCP timestamps - need to be resolved at some time.
-	 */
-	ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
+	sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
+	ccid2_pr_debug("deleted RTO timer\n");
 }
 
-static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
+static inline void ccid2_new_ack(struct sock *sk,
+				 struct ccid2_seq *seqp,
+				 unsigned int *maxincr)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	if (time_before(seqp->ccid2s_sent, hctx->last_cong)) {
-		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
-		return;
+	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
+		if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
+			hctx->ccid2hctx_cwnd += 1;
+			*maxincr	     -= 1;
+			hctx->ccid2hctx_packets_acked = 0;
+		}
+	} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
+			hctx->ccid2hctx_cwnd += 1;
+			hctx->ccid2hctx_packets_acked = 0;
 	}
 
-	hctx->last_cong = jiffies;
+	/* update RTO */
+	if (hctx->ccid2hctx_srtt == -1 ||
+	    time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
+		unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
+		int s;
+
+		/* first measurement */
+		if (hctx->ccid2hctx_srtt == -1) {
+			ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
+				       r, jiffies,
+				       (unsigned long long)seqp->ccid2s_seq);
+			ccid2_change_srtt(hctx, r);
+			hctx->ccid2hctx_rttvar = r >> 1;
+		} else {
+			/* RTTVAR */
+			long tmp = hctx->ccid2hctx_srtt - r;
+			long srtt;
+
+			if (tmp < 0)
+				tmp *= -1;
+
+			tmp >>= 2;
+			hctx->ccid2hctx_rttvar *= 3;
+			hctx->ccid2hctx_rttvar >>= 2;
+			hctx->ccid2hctx_rttvar += tmp;
+
+			/* SRTT */
+			srtt = hctx->ccid2hctx_srtt;
+			srtt *= 7;
+			srtt >>= 3;
+			tmp = r >> 3;
+			srtt += tmp;
+			ccid2_change_srtt(hctx, srtt);
+		}
+		s = hctx->ccid2hctx_rttvar << 2;
+		/* clock granularity is 1 when based on jiffies */
+		if (!s)
+			s = 1;
+		hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
+
+		/* must be at least a second */
+		s = hctx->ccid2hctx_rto / HZ;
+		/* DCCP doesn't require this [but I like it cuz my code sux] */
+#if 1
+		if (s < 1)
+			hctx->ccid2hctx_rto = HZ;
+#endif
+		/* max 60 seconds */
+		if (s > 60)
+			hctx->ccid2hctx_rto = HZ * 60;
 
-	hctx->cwnd     = hctx->cwnd / 2 ? : 1U;
-	hctx->ssthresh = max(hctx->cwnd, 2U);
+		hctx->ccid2hctx_lastrtt = jiffies;
 
-	/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
-	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd)
-		ccid2_change_l_ack_ratio(sk, hctx->cwnd);
+		ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
+			       hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
+			       hctx->ccid2hctx_rto, HZ, r);
+	}
+
+	/* we got a new ack, so re-start RTO timer */
+	ccid2_hc_tx_kill_rto_timer(sk);
+	ccid2_start_rto_timer(sk);
 }
 
-static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
-				     u8 option, u8 *optval, u8 optlen)
+static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	switch (option) {
-	case DCCPO_ACK_VECTOR_0:
-	case DCCPO_ACK_VECTOR_1:
-		return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen,
-					      option - DCCPO_ACK_VECTOR_0);
+	if (hctx->ccid2hctx_pipe == 0)
+		DCCP_BUG("pipe == 0");
+	else
+		hctx->ccid2hctx_pipe--;
+
+	if (hctx->ccid2hctx_pipe == 0)
+		ccid2_hc_tx_kill_rto_timer(sk);
+}
+
+static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
+{
+	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+	if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
+		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
+		return;
 	}
-	return 0;
+
+	hctx->ccid2hctx_last_cong = jiffies;
+
+	hctx->ccid2hctx_cwnd     = hctx->ccid2hctx_cwnd / 2 ? : 1U;
+	hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
+
+	/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
+	if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
+		ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
 }
 
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-	const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
-	struct dccp_ackvec_parsed *avp;
 	u64 ackno, seqno;
 	struct ccid2_seq *seqp;
+	unsigned char *vector;
+	unsigned char veclen;
+	int offset = 0;
 	int done = 0;
 	unsigned int maxincr = 0;
 
+	ccid2_hc_tx_check_sanity(hctx);
 	/* check reverse path congestion */
 	seqno = DCCP_SKB_CB(skb)->dccpd_seq;
 
@@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	 * -sorbo.
 	 */
 	/* need to bootstrap */
-	if (hctx->rpdupack == -1) {
-		hctx->rpdupack = 0;
-		hctx->rpseq = seqno;
+	if (hctx->ccid2hctx_rpdupack == -1) {
+		hctx->ccid2hctx_rpdupack = 0;
+		hctx->ccid2hctx_rpseq = seqno;
 	} else {
 		/* check if packet is consecutive */
-		if (dccp_delta_seqno(hctx->rpseq, seqno) == 1)
-			hctx->rpseq = seqno;
+		if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
+			hctx->ccid2hctx_rpseq = seqno;
 		/* it's a later packet */
-		else if (after48(seqno, hctx->rpseq)) {
-			hctx->rpdupack++;
+		else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
+			hctx->ccid2hctx_rpdupack++;
 
 			/* check if we got enough dupacks */
-			if (hctx->rpdupack >= NUMDUPACK) {
-				hctx->rpdupack = -1; /* XXX lame */
-				hctx->rpseq = 0;
+			if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
+				hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
+				hctx->ccid2hctx_rpseq = 0;
 
 				ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
 			}
@@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* check forward path congestion */
-	if (dccp_packet_without_ack(skb))
+	/* still didn't send out new data packets */
+	if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
 		return;
 
-	/* still didn't send out new data packets */
-	if (hctx->seqh == hctx->seqt)
-		goto done;
+	switch (DCCP_SKB_CB(skb)->dccpd_type) {
+	case DCCP_PKT_ACK:
+	case DCCP_PKT_DATAACK:
+		break;
+	default:
+		return;
+	}
 
 	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
-	if (after48(ackno, hctx->high_ack))
-		hctx->high_ack = ackno;
+	if (after48(ackno, hctx->ccid2hctx_high_ack))
+		hctx->ccid2hctx_high_ack = ackno;
 
-	seqp = hctx->seqt;
+	seqp = hctx->ccid2hctx_seqt;
 	while (before48(seqp->ccid2s_seq, ackno)) {
 		seqp = seqp->ccid2s_next;
-		if (seqp == hctx->seqh) {
-			seqp = hctx->seqh->ccid2s_prev;
+		if (seqp == hctx->ccid2hctx_seqh) {
+			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
 			break;
 		}
 	}
@@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	 * packets per acknowledgement. Rounding up avoids that cwnd is not
 	 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
 	 */
-	if (hctx->cwnd < hctx->ssthresh)
+	if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
 		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 
 	/* go through all ack vectors */
-	list_for_each_entry(avp, &hctx->av_chunks, node) {
+	while ((offset = ccid2_ackvector(sk, skb, offset,
+					 &vector, &veclen)) != -1) {
 		/* go through this ack vector */
-		for (; avp->len--; avp->vec++) {
-			u64 ackno_end_rl = SUB48(ackno,
-						 dccp_ackvec_runlen(avp->vec));
+		while (veclen--) {
+			const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+			u64 ackno_end_rl = SUB48(ackno, rl);
 
-			ccid2_pr_debug("ackvec %llu |%u,%u|\n",
+			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
 				       (unsigned long long)ackno,
-				       dccp_ackvec_state(avp->vec) >> 6,
-				       dccp_ackvec_runlen(avp->vec));
+				       (unsigned long long)ackno_end_rl);
 			/* if the seqno we are analyzing is larger than the
 			 * current ackno, then move towards the tail of our
 			 * seqnos.
 			 */
 			while (after48(seqp->ccid2s_seq, ackno)) {
-				if (seqp == hctx->seqt) {
+				if (seqp == hctx->ccid2hctx_seqt) {
 					done = 1;
 					break;
 				}
@@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = dccp_ackvec_state(avp->vec);
+				const u8 state = *vector &
+						 DCCP_ACKVEC_STATE_MASK;
 
 				/* new packet received or marked */
-				if (state != DCCPAV_NOT_RECEIVED &&
+				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
 				    !seqp->ccid2s_acked) {
-					if (state == DCCPAV_ECN_MARKED)
+					if (state ==
+					    DCCP_ACKVEC_STATE_ECN_MARKED) {
 						ccid2_congestion_event(sk,
 								       seqp);
-					else
+					} else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
 
 					seqp->ccid2s_acked = 1;
 					ccid2_pr_debug("Got ack for %llu\n",
 						       (unsigned long long)seqp->ccid2s_seq);
-					hctx->pipe--;
+					ccid2_hc_tx_dec_pipe(sk);
 				}
-				if (seqp == hctx->seqt) {
+				if (seqp == hctx->ccid2hctx_seqt) {
 					done = 1;
 					break;
 				}
@@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				break;
 
 			ackno = SUB48(ackno_end_rl, 1);
+			vector++;
 		}
 		if (done)
 			break;
@@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	/* The state about what is acked should be correct now
 	 * Check for NUMDUPACK
 	 */
-	seqp = hctx->seqt;
-	while (before48(seqp->ccid2s_seq, hctx->high_ack)) {
+	seqp = hctx->ccid2hctx_seqt;
+	while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
 		seqp = seqp->ccid2s_next;
-		if (seqp == hctx->seqh) {
-			seqp = hctx->seqh->ccid2s_prev;
+		if (seqp == hctx->ccid2hctx_seqh) {
+			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
 			break;
 		}
 	}
@@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			if (done == NUMDUPACK)
 				break;
 		}
-		if (seqp == hctx->seqt)
+		if (seqp == hctx->ccid2hctx_seqt)
 			break;
 		seqp = seqp->ccid2s_prev;
 	}
@@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				 * one ack vector.
 				 */
 				ccid2_congestion_event(sk, seqp);
-				hctx->pipe--;
+				ccid2_hc_tx_dec_pipe(sk);
 			}
-			if (seqp == hctx->seqt)
+			if (seqp == hctx->ccid2hctx_seqt)
 				break;
 			seqp = seqp->ccid2s_prev;
 		}
 
-		hctx->seqt = last_acked;
+		hctx->ccid2hctx_seqt = last_acked;
 	}
 
 	/* trim acked packets in tail */
-	while (hctx->seqt != hctx->seqh) {
-		if (!hctx->seqt->ccid2s_acked)
+	while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
+		if (!hctx->ccid2hctx_seqt->ccid2s_acked)
 			break;
 
-		hctx->seqt = hctx->seqt->ccid2s_next;
+		hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
 	}
 
-	/* restart RTO timer if not all outstanding data has been acked */
-	if (hctx->pipe == 0)
-		sk_stop_timer(sk, &hctx->rtotimer);
-	else
-		sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
-done:
-	/* check if incoming Acks allow pending packets to be sent */
-	if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
-		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
-	dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
+	ccid2_hc_tx_check_sanity(hctx);
 }
 
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	u32 max_ratio;
 
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
-	hctx->ssthresh = ~0U;
+	hctx->ccid2hctx_ssthresh  = ~0U;
 
-	/* Use larger initial windows (RFC 3390, rfc2581bis) */
-	hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
+	/*
+	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
+	 * packets for new connections, following the rules from [RFC3390]".
+	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
+	 */
+	hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
-	max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
+	max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
 	if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
 		dp->dccps_l_ack_ratio = max_ratio;
 
@@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	if (ccid2_hc_tx_alloc_seq(hctx))
 		return -ENOMEM;
 
-	hctx->rto	= DCCP_TIMEOUT_INIT;
-	hctx->rpdupack  = -1;
-	hctx->last_cong = jiffies;
-	setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
-	INIT_LIST_HEAD(&hctx->av_chunks);
+	hctx->ccid2hctx_rto	 = 3 * HZ;
+	ccid2_change_srtt(hctx, -1);
+	hctx->ccid2hctx_rttvar	 = -1;
+	hctx->ccid2hctx_rpdupack = -1;
+	hctx->ccid2hctx_last_cong = jiffies;
+	setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
+			(unsigned long)sk);
+
+	ccid2_hc_tx_check_sanity(hctx);
 	return 0;
 }
 
@@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 	int i;
 
-	sk_stop_timer(sk, &hctx->rtotimer);
+	ccid2_hc_tx_kill_rto_timer(sk);
 
-	for (i = 0; i < hctx->seqbufc; i++)
-		kfree(hctx->seqbuf[i]);
-	hctx->seqbufc = 0;
+	for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
+		kfree(hctx->ccid2hctx_seqbuf[i]);
+	hctx->ccid2hctx_seqbufc = 0;
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	switch (DCCP_SKB_CB(skb)->dccpd_type) {
 	case DCCP_PKT_DATA:
 	case DCCP_PKT_DATAACK:
-		hcrx->data++;
-		if (hcrx->data >= dp->dccps_r_ack_ratio) {
+		hcrx->ccid2hcrx_data++;
+		if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
 			dccp_send_ack(sk);
-			hcrx->data = 0;
+			hcrx->ccid2hcrx_data = 0;
 		}
 		break;
 	}
 }
 
 static struct ccid_operations ccid2 = {
-	.ccid_id		  = DCCPC_CCID2,
-	.ccid_name		  = "TCP-like",
-	.ccid_owner		  = THIS_MODULE,
-	.ccid_hc_tx_obj_size	  = sizeof(struct ccid2_hc_tx_sock),
-	.ccid_hc_tx_init	  = ccid2_hc_tx_init,
-	.ccid_hc_tx_exit	  = ccid2_hc_tx_exit,
-	.ccid_hc_tx_send_packet	  = ccid2_hc_tx_send_packet,
-	.ccid_hc_tx_packet_sent	  = ccid2_hc_tx_packet_sent,
-	.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
-	.ccid_hc_tx_packet_recv	  = ccid2_hc_tx_packet_recv,
-	.ccid_hc_rx_obj_size	  = sizeof(struct ccid2_hc_rx_sock),
-	.ccid_hc_rx_packet_recv	  = ccid2_hc_rx_packet_recv,
+	.ccid_id		= DCCPC_CCID2,
+	.ccid_name		= "TCP-like",
+	.ccid_owner		= THIS_MODULE,
+	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
+	.ccid_hc_tx_init	= ccid2_hc_tx_init,
+	.ccid_hc_tx_exit	= ccid2_hc_tx_exit,
+	.ccid_hc_tx_send_packet	= ccid2_hc_tx_send_packet,
+	.ccid_hc_tx_packet_sent	= ccid2_hc_tx_packet_sent,
+	.ccid_hc_tx_packet_recv	= ccid2_hc_tx_packet_recv,
+	.ccid_hc_rx_obj_size	= sizeof(struct ccid2_hc_rx_sock),
+	.ccid_hc_rx_packet_recv	= ccid2_hc_rx_packet_recv,
 };
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 8b7a2dee2f6..2c94ca02901 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,49 +42,34 @@ struct ccid2_seq {
 
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
- * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
- * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
- * @srtt: smoothed RTT estimate, scaled by 2^3
- * @mdev: smoothed RTT variation, scaled by 2^2
- * @mdev_max: maximum of @mdev during one flight
- * @rttvar: moving average/maximum of @mdev_max
- * @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
- * @rtt_seq: to decay RTTVAR at most once per flight
- * @rpseq: last consecutive seqno
- * @rpdupack: dupacks since rpseq
- * @av_chunks: list of Ack Vectors received on current skb
- */
+ * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
+ * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
+ * @ccid2hctx_lastrtt -time RTT was last measured
+ * @ccid2hctx_rpseq - last consecutive seqno
+ * @ccid2hctx_rpdupack - dupacks since rpseq
+*/
 struct ccid2_hc_tx_sock {
-	u32			cwnd;
-	u32			ssthresh;
-	u32			pipe;
-	u32			packets_acked;
-	struct ccid2_seq	*seqbuf[CCID2_SEQBUF_MAX];
-	int			seqbufc;
-	struct ccid2_seq	*seqh;
-	struct ccid2_seq	*seqt;
-	/* RTT measurement: variables/principles are the same as in TCP */
-	u32			srtt,
-				mdev,
-				mdev_max,
-				rttvar,
-				rto;
-	u64			rtt_seq:48;
-	struct timer_list	rtotimer;
-	u64			rpseq;
-	int			rpdupack;
-	unsigned long		last_cong;
-	u64			high_ack;
-	struct list_head	av_chunks;
+	u32			ccid2hctx_cwnd;
+	u32			ccid2hctx_ssthresh;
+	u32			ccid2hctx_pipe;
+	u32			ccid2hctx_packets_acked;
+	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
+	int			ccid2hctx_seqbufc;
+	struct ccid2_seq	*ccid2hctx_seqh;
+	struct ccid2_seq	*ccid2hctx_seqt;
+	long			ccid2hctx_rto;
+	long			ccid2hctx_srtt;
+	long			ccid2hctx_rttvar;
+	unsigned long		ccid2hctx_lastrtt;
+	struct timer_list	ccid2hctx_rtotimer;
+	u64			ccid2hctx_rpseq;
+	int			ccid2hctx_rpdupack;
+	unsigned long		ccid2hctx_last_cong;
+	u64			ccid2hctx_high_ack;
 };
 
-static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx)
-{
-	return (hctx->pipe >= hctx->cwnd);
-}
-
 struct ccid2_hc_rx_sock {
-	int			data;
+	int	ccid2hcrx_data;
 };
 
 static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 06cfdad84a6..3b8bd7ca676 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,41 +49,75 @@ static int ccid3_debug;
 /*
  *	Transmitter Half-Connection Routines
  */
-/* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */
-static int do_osc_prev = true;
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
+static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
+{
+	static char *ccid3_state_names[] = {
+	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
+	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
+	[TFRC_SSTATE_FBACK]    = "FBACK",
+	[TFRC_SSTATE_TERM]     = "TERM",
+	};
+
+	return ccid3_state_names[state];
+}
+#endif
+
+static void ccid3_hc_tx_set_state(struct sock *sk,
+				  enum ccid3_hc_tx_states state)
+{
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+
+	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+		       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
+		       ccid3_tx_state_name(state));
+	WARN_ON(state == oldstate);
+	hctx->ccid3hctx_state = state;
+}
 
 /*
  * Compute the initial sending rate X_init in the manner of RFC 3390:
  *
- *	X_init  =  min(4 * MPS, max(2 * MPS, 4380 bytes)) / RTT
+ *	X_init  =  min(4 * s, max(2 * s, 4380 bytes)) / RTT
  *
+ * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
+ * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
  * For consistency with other parts of the code, X_init is scaled by 2^6.
  */
 static inline u64 rfc3390_initial_rate(struct sock *sk)
 {
-	const u32 mps = dccp_sk(sk)->dccps_mss_cache,
-	       w_init = clamp(4380U, 2 * mps, 4 * mps);
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const __u32 w_init = clamp_t(__u32, 4380U,
+			2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s);
 
-	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->rtt);
+	return scaled_div(w_init << 6, hctx->ccid3hctx_rtt);
 }
 
-/**
- * ccid3_update_send_interval  -  Calculate new t_ipi = s / X
- * This respects the granularity of X (64 * bytes/second) and enforces the
- * scaled minimum of s * 64 / t_mbi = `s' bytes/second as per RFC 3448/4342.
+/*
+ * Recalculate t_ipi and delta (should be called whenever X changes)
  */
 static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
-	if (unlikely(hctx->x <= hctx->s))
-		hctx->x	= hctx->s;
-	hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x);
+	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
+					     hctx->ccid3hctx_x);
+
+	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
+	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
+					   TFRC_OPSYS_HALF_TIME_GRAN);
+
+	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
+		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
+		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
+
 }
 
 static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
 {
-	u32 delta = ktime_us_delta(now, hctx->t_last_win_count);
+	u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
 
-	return delta / hctx->rtt;
+	return delta / hctx->ccid3hctx_rtt;
 }
 
 /**
@@ -99,8 +133,8 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
 static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	u64 min_rate = 2 * hctx->x_recv;
-	const u64 old_x = hctx->x;
+	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
+	const  __u64 old_x = hctx->ccid3hctx_x;
 	ktime_t now = stamp ? *stamp : ktime_get_real();
 
 	/*
@@ -111,44 +145,50 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 	 */
 	if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) {
 		min_rate = rfc3390_initial_rate(sk);
-		min_rate = max(min_rate, 2 * hctx->x_recv);
+		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
 	}
 
-	if (hctx->p > 0) {
+	if (hctx->ccid3hctx_p > 0) {
 
-		hctx->x = min(((u64)hctx->x_calc) << 6, min_rate);
+		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
+					min_rate);
+		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
+					(((__u64)hctx->ccid3hctx_s) << 6) /
+								TFRC_T_MBI);
 
-	} else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) {
+	} else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld)
+				- (s64)hctx->ccid3hctx_rtt >= 0) {
 
-		hctx->x = min(2 * hctx->x, min_rate);
-		hctx->x = max(hctx->x,
-			      scaled_div(((u64)hctx->s) << 6, hctx->rtt));
-		hctx->t_ld = now;
+		hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate);
+		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
+			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
+				       hctx->ccid3hctx_rtt));
+		hctx->ccid3hctx_t_ld = now;
 	}
 
-	if (hctx->x != old_x) {
+	if (hctx->ccid3hctx_x != old_x) {
 		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
 			       "X_recv=%u\n", (unsigned)(old_x >> 6),
-			       (unsigned)(hctx->x >> 6), hctx->x_calc,
-			       (unsigned)(hctx->x_recv >> 6));
+			       (unsigned)(hctx->ccid3hctx_x >> 6),
+			       hctx->ccid3hctx_x_calc,
+			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
 
 		ccid3_update_send_interval(hctx);
 	}
 }
 
 /*
- * ccid3_hc_tx_measure_packet_size  -  Measuring the packet size `s' (sec 4.1)
- * @new_len: DCCP payload size in bytes (not used by all methods)
+ *	Track the mean packet size `s' (cf. RFC 4342, 5.3 and  RFC 3448, 4.1)
+ *	@len: DCCP packet payload size in bytes
  */
-static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len)
+static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
-#if   defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_AVG)
-	return tfrc_ewma(ccid3_hc_tx_sk(sk)->s, new_len, 9);
-#elif defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MAX)
-	return max(ccid3_hc_tx_sk(sk)->s, new_len);
-#else /* CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MPS	*/
-	return dccp_sk(sk)->dccps_mss_cache;
-#endif
+	const u16 old_s = hctx->ccid3hctx_s;
+
+	hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9);
+
+	if (hctx->ccid3hctx_s != old_s)
+		ccid3_update_send_interval(hctx);
 }
 
 /*
@@ -158,13 +198,13 @@ static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len)
 static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
 						ktime_t now)
 {
-	u32 delta = ktime_us_delta(now, hctx->t_last_win_count),
-	    quarter_rtts = (4 * delta) / hctx->rtt;
+	u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count),
+	    quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt;
 
 	if (quarter_rtts > 0) {
-		hctx->t_last_win_count = now;
-		hctx->last_win_count  += min(quarter_rtts, 5U);
-		hctx->last_win_count  &= 0xF;		/* mod 16 */
+		hctx->ccid3hctx_t_last_win_count = now;
+		hctx->ccid3hctx_last_win_count  += min(quarter_rtts, 5U);
+		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */
 	}
 }
 
@@ -181,26 +221,25 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto restart_timer;
 	}
 
-	ccid3_pr_debug("%s(%p) entry with%s feedback\n", dccp_role(sk), sk,
-		       hctx->feedback ? "" : "out");
+	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
+		       ccid3_tx_state_name(hctx->ccid3hctx_state));
 
-	/* Ignore and do not restart after leaving the established state */
-	if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
+	if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+	else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
 		goto out;
 
-	/* Reset feedback state to "no feedback received" */
-	hctx->feedback = false;
-
 	/*
 	 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
-	 * RTO is 0 if and only if no feedback has been received yet.
 	 */
-	if (hctx->t_rto == 0 || hctx->p == 0) {
+	if (hctx->ccid3hctx_t_rto == 0 ||	/* no feedback received yet */
+	    hctx->ccid3hctx_p == 0) {
 
 		/* halve send rate directly */
-		hctx->x /= 2;
+		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
+					(((__u64)hctx->ccid3hctx_s) << 6) /
+								    TFRC_T_MBI);
 		ccid3_update_send_interval(hctx);
-
 	} else {
 		/*
 		 *  Modify the cached value of X_recv
@@ -212,41 +251,44 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		 *
 		 *  Note that X_recv is scaled by 2^6 while X_calc is not
 		 */
-		BUG_ON(hctx->p && !hctx->x_calc);
+		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
-		if (hctx->x_calc > (hctx->x_recv >> 5))
-			hctx->x_recv /= 2;
+		if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
+			hctx->ccid3hctx_x_recv =
+				max(hctx->ccid3hctx_x_recv / 2,
+				    (((__u64)hctx->ccid3hctx_s) << 6) /
+							      (2 * TFRC_T_MBI));
 		else {
-			hctx->x_recv = hctx->x_calc;
-			hctx->x_recv <<= 4;
+			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+			hctx->ccid3hctx_x_recv <<= 4;
 		}
 		ccid3_hc_tx_update_x(sk, NULL);
 	}
 	ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
-			(unsigned long long)hctx->x);
+			(unsigned long long)hctx->ccid3hctx_x);
 
 	/*
 	 * Set new timeout for the nofeedback timer.
 	 * See comments in packet_recv() regarding the value of t_RTO.
 	 */
-	if (unlikely(hctx->t_rto == 0))		/* no feedback received yet */
+	if (unlikely(hctx->ccid3hctx_t_rto == 0))	/* no feedback yet */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
 	else
-		t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi);
+		t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 
 restart_timer:
-	sk_reset_timer(sk, &hctx->no_feedback_timer,
+	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
 out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
 }
 
-/**
- * ccid3_hc_tx_send_packet  -  Delay-based dequeueing of TX packets
- * @skb: next packet candidate to send on @sk
- * This function uses the convention of ccid_packet_dequeue_eval() and
- * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
+/*
+ * returns
+ *   > 0: delay (in msecs) that should pass before actually sending
+ *   = 0: can send immediately
+ *   < 0: error condition; do not send packet
  */
 static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 {
@@ -263,14 +305,18 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(skb->len == 0))
 		return -EBADMSG;
 
-	if (hctx->s == 0) {
-		sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies +
+	switch (hctx->ccid3hctx_state) {
+	case TFRC_SSTATE_NO_SENT:
+		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+			       (jiffies +
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
-		hctx->last_win_count   = 0;
-		hctx->t_last_win_count = now;
+		hctx->ccid3hctx_last_win_count	 = 0;
+		hctx->ccid3hctx_t_last_win_count = now;
 
 		/* Set t_0 for initial packet */
-		hctx->t_nom = now;
+		hctx->ccid3hctx_t_nom = now;
+
+		hctx->ccid3hctx_s = skb->len;
 
 		/*
 		 * Use initial RTT sample when available: recommended by erratum
@@ -279,9 +325,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (dp->dccps_syn_rtt) {
 			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
-			hctx->rtt  = dp->dccps_syn_rtt;
-			hctx->x    = rfc3390_initial_rate(sk);
-			hctx->t_ld = now;
+			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+			hctx->ccid3hctx_t_ld = now;
 		} else {
 			/*
 			 * Sender does not have RTT sample:
@@ -289,20 +335,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			 *   is needed in several parts (e.g.  window counter);
 			 * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
 			 */
-			hctx->rtt = DCCP_FALLBACK_RTT;
-			hctx->x	  = dp->dccps_mss_cache;
-			hctx->x <<= 6;
+			hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT;
+			hctx->ccid3hctx_x   = hctx->ccid3hctx_s;
+			hctx->ccid3hctx_x <<= 6;
 		}
-
-		/* Compute t_ipi = s / X */
-		hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len);
 		ccid3_update_send_interval(hctx);
 
-		/* Seed value for Oscillation Prevention (sec. 4.5) */
-		hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt);
-
-	} else {
-		delay = ktime_us_delta(hctx->t_nom, now);
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+		break;
+	case TFRC_SSTATE_NO_FBACK:
+	case TFRC_SSTATE_FBACK:
+		delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now);
 		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
@@ -312,80 +355,99 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		 * else
 		 *       // send the packet in (t_nom - t_now) milliseconds.
 		 */
-		if (delay >= TFRC_T_DELTA)
-			return (u32)delay / USEC_PER_MSEC;
+		if (delay - (s64)hctx->ccid3hctx_delta >= 1000)
+			return (u32)delay / 1000L;
 
 		ccid3_hc_tx_update_win_count(hctx, now);
+		break;
+	case TFRC_SSTATE_TERM:
+		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+		return -EINVAL;
 	}
 
 	/* prepare to send now (add options etc.) */
 	dp->dccps_hc_tx_insert_options = 1;
-	DCCP_SKB_CB(skb)->dccpd_ccval  = hctx->last_win_count;
+	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
 
 	/* set the nominal send time for the next following packet */
-	hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi);
-	return CCID_PACKET_SEND_AT_ONCE;
+	hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
+					     hctx->ccid3hctx_t_ipi);
+	return 0;
 }
 
-static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
+				    unsigned int len)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
-	/* Changes to s will become effective the next time X is computed */
-	hctx->s = ccid3_hc_tx_measure_packet_size(sk, len);
+	ccid3_hc_tx_update_s(hctx, len);
 
-	if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss))
+	if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss))
 		DCCP_CRIT("packet history - out of memory!");
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct tfrc_tx_hist_entry *acked;
+	struct ccid3_options_received *opt_recv;
 	ktime_t now;
 	unsigned long t_nfb;
-	u32 r_sample;
+	u32 pinv, r_sample;
 
 	/* we are only interested in ACKs */
 	if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
 	      DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
 		return;
-	/*
-	 * Locate the acknowledged packet in the TX history.
-	 *
-	 * Returning "entry not found" here can for instance happen when
-	 *  - the host has not sent out anything (e.g. a passive server),
-	 *  - the Ack is outdated (packet with higher Ack number was received),
-	 *  - it is a bogus Ack (for a packet not sent on this connection).
-	 */
-	acked = tfrc_tx_hist_find_entry(hctx->hist, dccp_hdr_ack_seq(skb));
-	if (acked == NULL)
+	/* ... and only in the established state */
+	if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
+	    hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+		return;
+
+	opt_recv = &hctx->ccid3hctx_options_received;
+	now = ktime_get_real();
+
+	/* Estimate RTT from history if ACK number is valid */
+	r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
+				    DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
+	if (r_sample == 0) {
+		DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
+			  dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
+			  (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
 		return;
-	/* For the sake of RTT sampling, ignore/remove all older entries */
-	tfrc_tx_hist_purge(&acked->next);
+	}
 
-	/* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
-	now	  = ktime_get_real();
-	r_sample  = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
-	hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9);
+	/* Update receive rate in units of 64 * bytes/second */
+	hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+	hctx->ccid3hctx_x_recv <<= 6;
 
+	/* Update loss event rate (which is scaled by 1e6) */
+	pinv = opt_recv->ccid3or_loss_event_rate;
+	if (pinv == ~0U || pinv == 0)	       /* see RFC 4342, 8.5   */
+		hctx->ccid3hctx_p = 0;
+	else				       /* can not exceed 100% */
+		hctx->ccid3hctx_p = scaled_div(1, pinv);
+	/*
+	 * Validate new RTT sample and update moving average
+	 */
+	r_sample = dccp_sample_rtt(sk, r_sample);
+	hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
 	/*
 	 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
 	 */
-	if (!hctx->feedback) {
-		hctx->feedback = true;
+	if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 
-		if (hctx->t_rto == 0) {
+		if (hctx->ccid3hctx_t_rto == 0) {
 			/*
 			 * Initial feedback packet: Larger Initial Windows (4.2)
 			 */
-			hctx->x    = rfc3390_initial_rate(sk);
-			hctx->t_ld = now;
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_interval(hctx);
 
 			goto done_computing_x;
-		} else if (hctx->p == 0) {
+		} else if (hctx->ccid3hctx_p == 0) {
 			/*
 			 * First feedback after nofeedback timer expiry (4.3)
 			 */
@@ -394,52 +456,25 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
-	if (hctx->p > 0)
-		hctx->x_calc = tfrc_calc_x(hctx->s, hctx->rtt, hctx->p);
+	if (hctx->ccid3hctx_p > 0)
+		hctx->ccid3hctx_x_calc =
+				tfrc_calc_x(hctx->ccid3hctx_s,
+					    hctx->ccid3hctx_rtt,
+					    hctx->ccid3hctx_p);
 	ccid3_hc_tx_update_x(sk, &now);
 
 done_computing_x:
 	ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
 			       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
-			       dccp_role(sk), sk, hctx->rtt, r_sample,
-			       hctx->s, hctx->p, hctx->x_calc,
-			       (unsigned)(hctx->x_recv >> 6),
-			       (unsigned)(hctx->x >> 6));
-	/*
-	 * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to
-	 * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This
-	 * can be useful if few connections share a link, avoiding that buffer
-	 * fill levels (RTT) oscillate as a result of frequent adjustments to X.
-	 * A useful presentation with background information is in
-	 *    Joerg Widmer, "Equation-Based Congestion Control",
-	 *    MSc Thesis, University of Mannheim, Germany, 2000
-	 * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation").
-	 */
-	if (do_osc_prev) {
-		r_sample = tfrc_scaled_sqrt(r_sample);
-		/*
-		 * The modulation can work in both ways: increase/decrease t_ipi
-		 * according to long-term increases/decreases of the RTT. The
-		 * former is a useful measure, since it works against queue
-		 * build-up. The latter temporarily increases the sending rate,
-		 * so that buffers fill up more quickly. This in turn causes
-		 * the RTT to increase, so that either later reduction becomes
-		 * necessary or the RTT stays at a very high level. Decreasing
-		 * t_ipi is therefore not supported.
-		 * Furthermore, during the initial slow-start phase the RTT
-		 * naturally increases, where using the algorithm would cause
-		 * delays. Hence it is disabled during the initial slow-start.
-		 */
-		if (r_sample > hctx->r_sqmean && hctx->p > 0)
-			hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample,
-					      hctx->r_sqmean);
-		hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI);
-		/* update R_sqmean _after_ computing the modulation factor */
-		hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9);
-	}
+			       dccp_role(sk),
+			       sk, hctx->ccid3hctx_rtt, r_sample,
+			       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
+			       hctx->ccid3hctx_x_calc,
+			       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
+			       (unsigned)(hctx->ccid3hctx_x >> 6));
 
 	/* unschedule no feedback timer */
-	sk_stop_timer(sk, &hctx->no_feedback_timer);
+	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
 
 	/*
 	 * As we have calculated new ipi, delta, t_nom it is possible
@@ -453,66 +488,95 @@ done_computing_x:
 	 * This can help avoid triggering the nofeedback timer too
 	 * often ('spinning') on LANs with small RTTs.
 	 */
-	hctx->t_rto = max_t(u32, 4 * hctx->rtt, (CONFIG_IP_DCCP_CCID3_RTO *
-						 (USEC_PER_SEC / 1000)));
+	hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+					   (CONFIG_IP_DCCP_CCID3_RTO *
+					    (USEC_PER_SEC / 1000)));
 	/*
 	 * Schedule no feedback timer to expire in
 	 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
 	 */
-	t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi);
+	t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
 
 	ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
 		       "expire in %lu jiffies (%luus)\n",
-		       dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb);
+		       dccp_role(sk),
+		       sk, usecs_to_jiffies(t_nfb), t_nfb);
 
-	sk_reset_timer(sk, &hctx->no_feedback_timer,
+	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
 }
 
-static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
-				     u8 option, u8 *optval, u8 optlen)
+static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
+				     unsigned char len, u16 idx,
+				     unsigned char *value)
 {
+	int rc = 0;
+	const struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	struct ccid3_options_received *opt_recv;
 	__be32 opt_val;
 
-	switch (option) {
-	case TFRC_OPT_RECEIVE_RATE:
-	case TFRC_OPT_LOSS_EVENT_RATE:
-		/* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
-		if (packet_type == DCCP_PKT_DATA)
-			break;
-		if (unlikely(optlen != 4)) {
-			DCCP_WARN("%s(%p), invalid len %d for %u\n",
-				  dccp_role(sk), sk, optlen, option);
-			return -EINVAL;
-		}
-		opt_val = ntohl(get_unaligned((__be32 *)optval));
+	opt_recv = &hctx->ccid3hctx_options_received;
 
-		if (option == TFRC_OPT_RECEIVE_RATE) {
-			/* Receive Rate is kept in units of 64 bytes/second */
-			hctx->x_recv = opt_val;
-			hctx->x_recv <<= 6;
+	if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
+		opt_recv->ccid3or_seqno		     = dp->dccps_gsr;
+		opt_recv->ccid3or_loss_event_rate    = ~0;
+		opt_recv->ccid3or_loss_intervals_idx = 0;
+		opt_recv->ccid3or_loss_intervals_len = 0;
+		opt_recv->ccid3or_receive_rate	     = 0;
+	}
 
-			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
-				       dccp_role(sk), sk, opt_val);
+	switch (option) {
+	case TFRC_OPT_LOSS_EVENT_RATE:
+		if (unlikely(len != 4)) {
+			DCCP_WARN("%s(%p), invalid len %d "
+				  "for TFRC_OPT_LOSS_EVENT_RATE\n",
+				  dccp_role(sk), sk, len);
+			rc = -EINVAL;
 		} else {
-			/* Update the fixpoint Loss Event Rate fraction */
-			hctx->p = tfrc_invert_loss_event_rate(opt_val);
-
+			opt_val = get_unaligned((__be32 *)value);
+			opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
 			ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
-				       dccp_role(sk), sk, opt_val);
+				       dccp_role(sk), sk,
+				       opt_recv->ccid3or_loss_event_rate);
 		}
+		break;
+	case TFRC_OPT_LOSS_INTERVALS:
+		opt_recv->ccid3or_loss_intervals_idx = idx;
+		opt_recv->ccid3or_loss_intervals_len = len;
+		ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
+			       dccp_role(sk), sk,
+			       opt_recv->ccid3or_loss_intervals_idx,
+			       opt_recv->ccid3or_loss_intervals_len);
+		break;
+	case TFRC_OPT_RECEIVE_RATE:
+		if (unlikely(len != 4)) {
+			DCCP_WARN("%s(%p), invalid len %d "
+				  "for TFRC_OPT_RECEIVE_RATE\n",
+				  dccp_role(sk), sk, len);
+			rc = -EINVAL;
+		} else {
+			opt_val = get_unaligned((__be32 *)value);
+			opt_recv->ccid3or_receive_rate = ntohl(opt_val);
+			ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
+				       dccp_role(sk), sk,
+				       opt_recv->ccid3or_receive_rate);
+		}
+		break;
 	}
-	return 0;
+
+	return rc;
 }
 
 static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
 
-	hctx->hist  = NULL;
-	setup_timer(&hctx->no_feedback_timer,
-		    ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
+	hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
+	hctx->ccid3hctx_hist = NULL;
+	setup_timer(&hctx->ccid3hctx_no_feedback_timer,
+			ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
+
 	return 0;
 }
 
@@ -520,36 +584,42 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
-	sk_stop_timer(sk, &hctx->no_feedback_timer);
-	tfrc_tx_hist_purge(&hctx->hist);
+	ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
+	sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+	tfrc_tx_hist_purge(&hctx->ccid3hctx_hist);
 }
 
 static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	info->tcpi_rto = ccid3_hc_tx_sk(sk)->t_rto;
-	info->tcpi_rtt = ccid3_hc_tx_sk(sk)->rtt;
+	struct ccid3_hc_tx_sock *hctx;
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return;
+
+	hctx = ccid3_hc_tx_sk(sk);
+	info->tcpi_rto = hctx->ccid3hctx_t_rto;
+	info->tcpi_rtt = hctx->ccid3hctx_rtt;
 }
 
 static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	struct tfrc_tx_info tfrc;
+	const struct ccid3_hc_tx_sock *hctx;
 	const void *val;
 
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	hctx = ccid3_hc_tx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_TX_INFO:
-		if (len < sizeof(tfrc))
+		if (len < sizeof(hctx->ccid3hctx_tfrc))
 			return -EINVAL;
-		tfrc.tfrctx_x	   = hctx->x;
-		tfrc.tfrctx_x_recv = hctx->x_recv;
-		tfrc.tfrctx_x_calc = hctx->x_calc;
-		tfrc.tfrctx_rtt	   = hctx->rtt;
-		tfrc.tfrctx_p	   = hctx->p;
-		tfrc.tfrctx_rto	   = hctx->t_rto;
-		tfrc.tfrctx_ipi	   = hctx->t_ipi;
-		len = sizeof(tfrc);
-		val = &tfrc;
+		len = sizeof(hctx->ccid3hctx_tfrc);
+		val = &hctx->ccid3hctx_tfrc;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -564,82 +634,112 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 /*
  *	Receiver Half-Connection Routines
  */
+
+/* CCID3 feedback types */
+enum ccid3_fback_type {
+	CCID3_FBACK_NONE = 0,
+	CCID3_FBACK_INITIAL,
+	CCID3_FBACK_PERIODIC,
+	CCID3_FBACK_PARAM_CHANGE
+};
+
+#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
+static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
+{
+	static char *ccid3_rx_state_names[] = {
+	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
+	[TFRC_RSTATE_DATA]    = "DATA",
+	[TFRC_RSTATE_TERM]    = "TERM",
+	};
+
+	return ccid3_rx_state_names[state];
+}
+#endif
+
+static void ccid3_hc_rx_set_state(struct sock *sk,
+				  enum ccid3_hc_rx_states state)
+{
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+
+	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+		       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
+		       ccid3_rx_state_name(state));
+	WARN_ON(state == oldstate);
+	hcrx->ccid3hcrx_state = state;
+}
+
 static void ccid3_hc_rx_send_feedback(struct sock *sk,
 				      const struct sk_buff *skb,
 				      enum ccid3_fback_type fbtype)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
+	ktime_t now;
+	s64 delta = 0;
+
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
+		return;
+
+	now = ktime_get_real();
 
 	switch (fbtype) {
 	case CCID3_FBACK_INITIAL:
-		hcrx->x_recv = 0;
-		hcrx->p_inverse = ~0U;   /* see RFC 4342, 8.5 */
+		hcrx->ccid3hcrx_x_recv = 0;
+		hcrx->ccid3hcrx_pinv   = ~0U;   /* see RFC 4342, 8.5 */
 		break;
 	case CCID3_FBACK_PARAM_CHANGE:
-		if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) {
-			/*
-			 * rfc3448bis-06, 6.3.1: First packet(s) lost or marked
-			 * FIXME: in rfc3448bis the receiver returns X_recv=0
-			 * here as it normally would in the first feedback packet.
-			 * However this is not possible yet, since the code still
-			 * uses RFC 3448, i.e.
-			 *    If (p > 0)
-			 *      Calculate X_calc using the TCP throughput equation.
-			 *      X = max(min(X_calc, 2*X_recv), s/t_mbi);
-			 * would bring X down to s/t_mbi. That is why we return
-			 * X_recv according to rfc3448bis-06 for the moment.
-			 */
-			u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
-			    rtt = tfrc_rx_hist_rtt(&hcrx->hist);
-
-			hcrx->x_recv = scaled_div32(s, 2 * rtt);
-			break;
-		}
 		/*
 		 * When parameters change (new loss or p > p_prev), we do not
 		 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
-		 * always check whether at least RTT time units were covered.
+		 * need to  reuse the previous value of X_recv. However, when
+		 * X_recv was 0 (due to early loss), this would kill X down to
+		 * s/t_mbi (i.e. one packet in 64 seconds).
+		 * To avoid such drastic reduction, we approximate X_recv as
+		 * the number of bytes since last feedback.
+		 * This is a safe fallback, since X is bounded above by X_calc.
 		 */
-		hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
-		break;
+		if (hcrx->ccid3hcrx_x_recv > 0)
+			break;
+		/* fall through */
 	case CCID3_FBACK_PERIODIC:
-		/*
-		 * Step (2) of rfc3448bis-06, 6.2:
-		 * - if no data packets have been received, just restart timer
-		 * - if data packets have been received, re-compute X_recv
-		 */
-		if (hcrx->hist.bytes_recvd == 0)
-			goto prepare_for_next_time;
-		hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
+		delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
+		if (delta <= 0)
+			DCCP_BUG("delta (%ld) <= 0", (long)delta);
+		else
+			hcrx->ccid3hcrx_x_recv =
+				scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
 		break;
 	default:
 		return;
 	}
 
-	ccid3_pr_debug("X_recv=%u, 1/p=%u\n", hcrx->x_recv, hcrx->p_inverse);
+	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+		       hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
 
-	dccp_sk(sk)->dccps_hc_rx_insert_options = 1;
-	dccp_send_ack(sk);
+	hcrx->ccid3hcrx_tstamp_last_feedback = now;
+	hcrx->ccid3hcrx_last_counter	     = dccp_hdr(skb)->dccph_ccval;
+	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
-prepare_for_next_time:
-	tfrc_rx_hist_restart_byte_counter(&hcrx->hist);
-	hcrx->last_counter = dccp_hdr(skb)->dccph_ccval;
-	hcrx->feedback	   = fbtype;
+	dp->dccps_hc_rx_insert_options = 1;
+	dccp_send_ack(sk);
 }
 
 static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 {
-	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	const struct ccid3_hc_rx_sock *hcrx;
 	__be32 x_recv, pinv;
 
 	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
 		return 0;
 
+	hcrx = ccid3_hc_rx_sk(sk);
+
 	if (dccp_packet_without_ack(skb))
 		return 0;
 
-	x_recv = htonl(hcrx->x_recv);
-	pinv   = htonl(hcrx->p_inverse);
+	x_recv = htonl(hcrx->ccid3hcrx_x_recv);
+	pinv   = htonl(hcrx->ccid3hcrx_pinv);
 
 	if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
@@ -662,95 +762,171 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	u32 s = tfrc_rx_hist_packet_size(&hcrx->hist),
-	    rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p;
+	u32 x_recv, p, delta;
 	u64 fval;
 
-	/*
-	 * rfc3448bis-06, 6.3.1: First data packet(s) are marked or lost. Set p
-	 * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p
-	 * is about 20.64%. This yields an interval length of 4.84 (rounded up).
-	 */
-	if (unlikely(hcrx->feedback == CCID3_FBACK_NONE))
-		return 5;
+	if (hcrx->ccid3hcrx_rtt == 0) {
+		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
+		hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
+	}
 
-	x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv);
-	if (x_recv == 0)
-		goto failed;
+	delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
+	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	if (x_recv == 0) {		/* would also trigger divide-by-zero */
+		DCCP_WARN("X_recv==0\n");
+		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+			DCCP_BUG("stored value of X_recv is zero");
+			return ~0U;
+		}
+	}
 
-	fval = scaled_div32(scaled_div(s, rtt), x_recv);
+	fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
+	fval = scaled_div32(fval, x_recv);
 	p = tfrc_calc_x_reverse_lookup(fval);
 
 	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
 		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
 
-	if (p > 0)
-		return scaled_div(1, p);
-failed:
-	return UINT_MAX;
+	return p == 0 ? ~0U : scaled_div(1, p);
 }
 
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
 	const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
 	const bool is_data_packet = dccp_data_packet(skb);
 
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
+		if (is_data_packet) {
+			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+			do_feedback = CCID3_FBACK_INITIAL;
+			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+			hcrx->ccid3hcrx_s = payload;
+			/*
+			 * Not necessary to update ccid3hcrx_bytes_recv here,
+			 * since X_recv = 0 for the first feedback packet (cf.
+			 * RFC 3448, 6.3) -- gerrit
+			 */
+		}
+		goto update_records;
+	}
+
+	if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
+		return; /* done receiving */
+
+	if (is_data_packet) {
+		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+		/*
+		 * Update moving-average of s and the sum of received payload bytes
+		 */
+		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
+		hcrx->ccid3hcrx_bytes_recv += payload;
+	}
+
 	/*
 	 * Perform loss detection and handle pending losses
 	 */
-	if (tfrc_rx_congestion_event(&hcrx->hist, &hcrx->li_hist,
-				     skb, ndp, ccid3_first_li, sk))
-		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PARAM_CHANGE);
+	if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist,
+				skb, ndp, ccid3_first_li, sk)) {
+		do_feedback = CCID3_FBACK_PARAM_CHANGE;
+		goto done_receiving;
+	}
+
+	if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist))
+		return; /* done receiving */
+
 	/*
-	 * Feedback for first non-empty data packet (RFC 3448, 6.3)
+	 * Handle data packets: RTT sampling and monitoring p
 	 */
-	else if (unlikely(hcrx->feedback == CCID3_FBACK_NONE && is_data_packet))
-		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_INITIAL);
+	if (unlikely(!is_data_packet))
+		goto update_records;
+
+	if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
+		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
+		/*
+		 * Empty loss history: no loss so far, hence p stays 0.
+		 * Sample RTT values, since an RTT estimate is required for the
+		 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
+		 */
+		if (sample != 0)
+			hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
+
+	} else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
+		/*
+		 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
+		 * has decreased (resp. p has increased), send feedback now.
+		 */
+		do_feedback = CCID3_FBACK_PARAM_CHANGE;
+	}
+
 	/*
 	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 	 */
-	else if (!tfrc_rx_hist_loss_pending(&hcrx->hist) && is_data_packet &&
-		 SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3)
-		ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PERIODIC);
+	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
+		do_feedback = CCID3_FBACK_PERIODIC;
+
+update_records:
+	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
+
+done_receiving:
+	if (do_feedback)
+		ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
 }
 
 static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
 
-	tfrc_lh_init(&hcrx->li_hist);
-	return tfrc_rx_hist_init(&hcrx->hist, sk);
+	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
+	tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
+	return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 
-	tfrc_rx_hist_purge(&hcrx->hist);
-	tfrc_lh_cleanup(&hcrx->li_hist);
+	ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
+
+	tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
+	tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
 }
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 {
+	const struct ccid3_hc_rx_sock *hcrx;
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return;
+
+	hcrx = ccid3_hc_rx_sk(sk);
+	info->tcpi_ca_state = hcrx->ccid3hcrx_state;
 	info->tcpi_options  |= TCPI_OPT_TIMESTAMPS;
-	info->tcpi_rcv_rtt  = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist);
+	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
 }
 
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
-	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	const struct ccid3_hc_rx_sock *hcrx;
 	struct tfrc_rx_info rx_info;
 	const void *val;
 
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	hcrx = ccid3_hc_rx_sk(sk);
 	switch (optname) {
 	case DCCP_SOCKOPT_CCID_RX_INFO:
 		if (len < sizeof(rx_info))
 			return -EINVAL;
-		rx_info.tfrcrx_x_recv = hcrx->x_recv;
-		rx_info.tfrcrx_rtt    = tfrc_rx_hist_rtt(&hcrx->hist);
-		rx_info.tfrcrx_p      = tfrc_invert_loss_event_rate(hcrx->p_inverse);
+		rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
+		rx_info.tfrcrx_rtt    = hcrx->ccid3hcrx_rtt;
+		rx_info.tfrcrx_p      = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
+					   scaled_div(1, hcrx->ccid3hcrx_pinv);
 		len = sizeof(rx_info);
 		val = &rx_info;
 		break;
@@ -786,9 +962,6 @@ static struct ccid_operations ccid3 = {
 	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
 };
 
-module_param(do_osc_prev, bool, 0644);
-MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)");
-
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 module_param(ccid3_debug, bool, 0644);
 MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
@@ -796,19 +969,6 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
 
 static __init int ccid3_module_init(void)
 {
-	struct timespec tp;
-
-	/*
-	 * Without a fine-grained clock resolution, RTTs/X_recv are not sampled
-	 * correctly and feedback is sent either too early or too late.
-	 */
-	hrtimer_get_res(CLOCK_MONOTONIC, &tp);
-	if (tp.tv_sec || tp.tv_nsec > DCCP_TIME_RESOLUTION * NSEC_PER_USEC) {
-		printk(KERN_ERR "%s: Timer too coarse (%ld usec), need %u-usec"
-		       " resolution - check your clocksource.\n", __func__,
-		       tp.tv_nsec/NSEC_PER_USEC, DCCP_TIME_RESOLUTION);
-		return -ESOCKTNOSUPPORT;
-	}
 	return ccid_register(&ccid3);
 }
 module_init(ccid3_module_init);
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index af6e1bf937d..49ca32bd7e7 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -47,22 +47,11 @@
 /* Two seconds as per RFC 3448 4.2 */
 #define TFRC_INITIAL_TIMEOUT	   (2 * USEC_PER_SEC)
 
-/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
-#define TFRC_T_MBI		   (64 * USEC_PER_SEC)
+/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
+#define TFRC_OPSYS_HALF_TIME_GRAN  (USEC_PER_SEC / (2 * HZ))
 
-/*
- * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
- * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
- * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
- * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
- * when using the constant t_delta  =  t_gran / 2  =  %USEC_PER_SEC / (2 * HZ).
- */
-#if (HZ >= 500)
-# define TFRC_T_DELTA		   USEC_PER_MSEC
-#else
-# define TFRC_T_DELTA		   (USEC_PER_SEC / (2 * HZ))
-#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
-#endif
+/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
+#define TFRC_T_MBI		   64
 
 enum ccid3_options {
 	TFRC_OPT_LOSS_EVENT_RATE = 192,
@@ -70,43 +59,62 @@ enum ccid3_options {
 	TFRC_OPT_RECEIVE_RATE	 = 194,
 };
 
+struct ccid3_options_received {
+	u64 ccid3or_seqno:48,
+	    ccid3or_loss_intervals_idx:16;
+	u16 ccid3or_loss_intervals_len;
+	u32 ccid3or_loss_event_rate;
+	u32 ccid3or_receive_rate;
+};
+
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+	TFRC_SSTATE_NO_SENT = 1,
+	TFRC_SSTATE_NO_FBACK,
+	TFRC_SSTATE_FBACK,
+	TFRC_SSTATE_TERM,
+};
+
 /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
  *
- * @x - Current sending rate in 64 * bytes per second
- * @x_recv - Receive rate    in 64 * bytes per second
- * @x_calc - Calculated rate in bytes per second
- * @rtt - Estimate of current round trip time in usecs
- * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
- * @p - Current loss event rate (0-1) scaled by 1000000
- * @s - Packet size in bytes
- * @t_rto - Nofeedback Timer setting in usecs
- * @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
- * @feedback - Whether feedback has been received or not
- * @last_win_count - Last window counter sent
- * @t_last_win_count - Timestamp of earliest packet with
- *                     last_win_count value sent
- * @no_feedback_timer - Handle to no feedback timer
- * @t_ld - Time last doubled during slow start
- * @t_nom - Nominal send time of next packet
- * @hist - Packet history
+ * @ccid3hctx_x - Current sending rate in 64 * bytes per second
+ * @ccid3hctx_x_recv - Receive rate    in 64 * bytes per second
+ * @ccid3hctx_x_calc - Calculated rate in bytes per second
+ * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+ * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+ * @ccid3hctx_s - Packet size in bytes
+ * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
+ * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
+ * @ccid3hctx_last_win_count - Last window counter sent
+ * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+ *				 with last_win_count value sent
+ * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+ * @ccid3hctx_t_ld - Time last doubled during slow start
+ * @ccid3hctx_t_nom - Nominal send time of next packet
+ * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
+ * @ccid3hctx_hist - Packet history
+ * @ccid3hctx_options_received - Parsed set of retrieved options
  */
 struct ccid3_hc_tx_sock {
-	u64				x;
-	u64				x_recv;
-	u32				x_calc;
-	u32				rtt;
-	u16				r_sqmean;
-	u32				p;
-	u32				t_rto;
-	u32				t_ipi;
-	u16				s;
-	bool				feedback:1;
-	u8				last_win_count;
-	ktime_t				t_last_win_count;
-	struct timer_list		no_feedback_timer;
-	ktime_t				t_ld;
-	ktime_t				t_nom;
-	struct tfrc_tx_hist_entry	*hist;
+	struct tfrc_tx_info		ccid3hctx_tfrc;
+#define ccid3hctx_x			ccid3hctx_tfrc.tfrctx_x
+#define ccid3hctx_x_recv		ccid3hctx_tfrc.tfrctx_x_recv
+#define ccid3hctx_x_calc		ccid3hctx_tfrc.tfrctx_x_calc
+#define ccid3hctx_rtt			ccid3hctx_tfrc.tfrctx_rtt
+#define ccid3hctx_p			ccid3hctx_tfrc.tfrctx_p
+#define ccid3hctx_t_rto			ccid3hctx_tfrc.tfrctx_rto
+#define ccid3hctx_t_ipi			ccid3hctx_tfrc.tfrctx_ipi
+	u16				ccid3hctx_s;
+	enum ccid3_hc_tx_states		ccid3hctx_state:8;
+	u8				ccid3hctx_last_win_count;
+	ktime_t				ccid3hctx_t_last_win_count;
+	struct timer_list		ccid3hctx_no_feedback_timer;
+	ktime_t				ccid3hctx_t_ld;
+	ktime_t				ccid3hctx_t_nom;
+	u32				ccid3hctx_delta;
+	struct tfrc_tx_hist_entry	*ccid3hctx_hist;
+	struct ccid3_options_received	ccid3hctx_options_received;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
     return hctx;
 }
 
-
-enum ccid3_fback_type {
-	CCID3_FBACK_NONE = 0,
-	CCID3_FBACK_INITIAL,
-	CCID3_FBACK_PERIODIC,
-	CCID3_FBACK_PARAM_CHANGE
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+	TFRC_RSTATE_NO_DATA = 1,
+	TFRC_RSTATE_DATA,
+	TFRC_RSTATE_TERM    = 127,
 };
 
 /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
  *
- *  @last_counter  -  Tracks window counter (RFC 4342, 8.1)
- *  @feedback  -  The type of the feedback last sent
- *  @x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
- *  @tstamp_last_feedback  -  Time at which last feedback was sent
- *  @hist  -  Packet history (loss detection + RTT sampling)
- *  @li_hist  -  Loss Interval database
- *  @p_inverse  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
+ *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
+ *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
+ *  @ccid3hcrx_p  -  Current loss event rate (RFC 3448 5.4)
+ *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
+ *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
+ *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
+ *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
+ *  @ccid3hcrx_rtt  -  Receiver estimate of RTT
+ *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
+ *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
+ *  @ccid3hcrx_hist  -  Packet history (loss detection + RTT sampling)
+ *  @ccid3hcrx_li_hist  -  Loss Interval database
+ *  @ccid3hcrx_s  -  Received packet size in bytes
+ *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
 struct ccid3_hc_rx_sock {
-	u8				last_counter:4;
-	enum ccid3_fback_type		feedback:4;
-	u32				x_recv;
-	ktime_t				tstamp_last_feedback;
-	struct tfrc_rx_hist		hist;
-	struct tfrc_loss_hist		li_hist;
-#define p_inverse			li_hist.i_mean
+	u8				ccid3hcrx_last_counter:4;
+	enum ccid3_hc_rx_states		ccid3hcrx_state:8;
+	u32				ccid3hcrx_bytes_recv;
+	u32				ccid3hcrx_x_recv;
+	u32				ccid3hcrx_rtt;
+	ktime_t				ccid3hcrx_tstamp_last_feedback;
+	struct tfrc_rx_hist		ccid3hcrx_hist;
+	struct tfrc_loss_hist		ccid3hcrx_li_hist;
+	u16				ccid3hcrx_s;
+#define ccid3hcrx_pinv			ccid3hcrx_li_hist.i_mean
 };
 
 static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index b1ae8f8259e..5b3ce0688c5 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 
 /**
  * tfrc_lh_update_i_mean  -  Update the `open' loss interval I_0
- * This updates I_mean as the sequence numbers increase. As a consequence, the
- * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
- * decreases, and thus there is no need to send renewed feedback.
+ * For recomputing p: returns `true' if p > p_prev  <=>  1/p < 1/p_prev
  */
-void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
+u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
+	u32 old_i_mean = lh->i_mean;
 	s64 len;
 
 	if (cur == NULL)			/* not initialised */
-		return;
-
-	/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
-	if (!dccp_data_packet(skb))
-		return;
+		return 0;
 
 	len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
 
 	if (len - (s64)cur->li_length <= 0)	/* duplicate or reordered */
-		return;
+		return 0;
 
 	if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
 		/*
@@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 		cur->li_is_closed = 1;
 
 	if (tfrc_lh_length(lh) == 1)		/* due to RFC 3448, 6.3.1 */
-		return;
+		return 0;
 
 	cur->li_length = len;
 	tfrc_lh_calc_i_mean(lh);
+
+	return (lh->i_mean < old_i_mean);
 }
+EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
  * @sk:		   Used by @calc_first_li in caller-specific way (subtyping)
  * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
  */
-bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
-			  u32 (*calc_first_li)(struct sock *), struct sock *sk)
+int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
+			 u32 (*calc_first_li)(struct sock *), struct sock *sk)
 {
 	struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
 
 	if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
-		return false;
+		return 0;
 
 	new = tfrc_lh_demand_next(lh);
 	if (unlikely(new == NULL)) {
 		DCCP_CRIT("Cannot allocate/add loss record.");
-		return false;
+		return 0;
 	}
 
 	new->li_seqno	  = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 
 		tfrc_lh_calc_i_mean(lh);
 	}
-	return true;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index d08a226db43..246018a3b26 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
 
 struct tfrc_rx_hist;
 
-extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+extern int  tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
 				 u32 (*first_li)(struct sock *), struct sock *);
-extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+extern u8   tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
 extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
 
 #endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index cce9f03bda3..6cc108afdc3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -40,6 +40,18 @@
 #include "packet_history.h"
 #include "../../dccp.h"
 
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+	struct tfrc_tx_hist_entry *next;
+	u64			  seqno;
+	ktime_t			  stamp;
+};
+
 /*
  * Transmitter History Routines
  */
@@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void)
 	}
 }
 
+static struct tfrc_tx_hist_entry *
+	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
+{
+	while (head != NULL && head->seqno != seqno)
+		head = head->next;
+
+	return head;
+}
+
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
 	struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
+u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
+		     const ktime_t now)
+{
+	u32 rtt = 0;
+	struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
+
+	if (packet != NULL) {
+		rtt = ktime_us_delta(now, packet->stamp);
+		/*
+		 * Garbage-collect older (irrelevant) entries:
+		 */
+		tfrc_tx_hist_purge(&packet->next);
+	}
+
+	return rtt;
+}
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
+
+
 /*
  * 	Receiver History Routines
  */
@@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
-
-static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
-{
-	struct tfrc_rx_hist_entry *tmp = h->ring[a];
-
-	h->ring[a] = h->ring[b];
-	h->ring[b] = tmp;
-}
-
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
-	__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
-			       tfrc_rx_hist_index(h, b));
-}
+	const u8 idx_a = tfrc_rx_hist_index(h, a),
+		 idx_b = tfrc_rx_hist_index(h, b);
+	struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
 
-/**
- * tfrc_rx_hist_resume_rtt_sampling  -  Prepare RX history for RTT sampling
- * This is called after loss detection has finished, when the history entry
- * with the index of `loss_count' holds the highest-received sequence number.
- * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
- */
-static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
-{
-	__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
-	h->loss_count = h->loss_start = 0;
+	h->ring[idx_a] = h->ring[idx_b];
+	h->ring[idx_b] = tmp;
 }
 
 /*
@@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
 	u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 	    s1 = DCCP_SKB_CB(skb)->dccpd_seq;
 
-	if (!dccp_loss_free(s0, s1, n1))	/* gap between S0 and S1 */
+	if (!dccp_loss_free(s0, s1, n1)) {	/* gap between S0 and S1 */
 		h->loss_count = 1;
+		tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
+	}
 }
 
 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
 
 		if (dccp_loss_free(s2, s1, n1)) {
 			/* hole is filled: S0, S2, and S1 are consecutive */
-			tfrc_rx_hist_resume_rtt_sampling(h);
+			h->loss_count = 0;
+			h->loss_start = tfrc_rx_hist_index(h, 1);
 		} else
 			/* gap between S2 and S1: just update loss_prev */
 			tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 
 			if (dccp_loss_free(s1, s2, n2)) {
 				/* entire hole filled by S0, S3, S1, S2 */
-				tfrc_rx_hist_resume_rtt_sampling(h);
+				h->loss_start = tfrc_rx_hist_index(h, 2);
+				h->loss_count = 0;
 			} else {
 				/* gap remains between S1 and S2 */
 				h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 
 		if (dccp_loss_free(s2, s3, n3)) {
 			/* no gap between S2 and S3: entire hole is filled */
-			tfrc_rx_hist_resume_rtt_sampling(h);
+			h->loss_start = tfrc_rx_hist_index(h, 3);
+			h->loss_count = 0;
 		} else {
 			/* gap between S2 and S3 */
 			h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
 }
 
 /**
- *  tfrc_rx_congestion_event  -  Loss detection and further processing
- *  @h:		The non-empty RX history object
- *  @lh:	Loss Intervals database to update
- *  @skb:	Currently received packet
- *  @ndp:	The NDP count belonging to @skb
- *  @first_li:	Caller-dependent computation of first loss interval in @lh
- *  @sk:	Used by @calc_first_li (see tfrc_lh_interval_add)
+ *  tfrc_rx_handle_loss  -  Loss detection and further processing
+ *  @h:		    The non-empty RX history object
+ *  @lh:	    Loss Intervals database to update
+ *  @skb:	    Currently received packet
+ *  @ndp:	    The NDP count belonging to @skb
+ *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
+ *  @sk:	    Used by @calc_first_li (see tfrc_lh_interval_add)
  *  Chooses action according to pending loss, updates LI database when a new
  *  loss was detected, and does required post-processing. Returns 1 when caller
  *  should send feedback, 0 otherwise.
@@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
  *  records accordingly, the caller should not perform any more RX history
  *  operations when loss_count is greater than 0 after calling this function.
  */
-bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
-			      struct tfrc_loss_hist *lh,
-			      struct sk_buff *skb, const u64 ndp,
-			      u32 (*first_li)(struct sock *), struct sock *sk)
+int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
+			struct tfrc_loss_hist *lh,
+			struct sk_buff *skb, const u64 ndp,
+			u32 (*calc_first_li)(struct sock *), struct sock *sk)
 {
-	bool new_event = false;
-
-	if (tfrc_rx_hist_duplicate(h, skb))
-		return 0;
+	int is_new_loss = 0;
 
 	if (h->loss_count == 0) {
 		__do_track_loss(h, skb, ndp);
-		tfrc_rx_hist_sample_rtt(h, skb);
-		tfrc_rx_hist_add_packet(h, skb, ndp);
 	} else if (h->loss_count == 1) {
 		__one_after_loss(h, skb, ndp);
 	} else if (h->loss_count != 2) {
@@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
 		/*
 		 * Update Loss Interval database and recycle RX records
 		 */
-		new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
+		is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
 		__three_after_loss(h);
 	}
-
-	/*
-	 * Update moving-average of `s' and the sum of received payload bytes.
-	 */
-	if (dccp_data_packet(skb)) {
-		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
-
-		h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
-		h->bytes_recvd += payload;
-	}
-
-	/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
-	if (!new_event)
-		tfrc_lh_update_i_mean(lh, skb);
-
-	return new_event;
+	return is_new_loss;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
+EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
 
-/* Compute the sending rate X_recv measured between feedback intervals */
-u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 {
-	u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
-	s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
-
-	WARN_ON(delta <= 0);
-	/*
-	 * Ensure that the sampling interval for X_recv is at least one RTT,
-	 * by extending the sampling interval backwards in time, over the last
-	 * R_(m-1) seconds, as per rfc3448bis-06, 6.2.
-	 * To reduce noise (e.g. when the RTT changes often), this is only
-	 * done when delta is smaller than RTT/2.
-	 */
-	if (last_x_recv > 0 && delta < last_rtt/2) {
-		tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
-			      (long)delta, (unsigned)last_rtt);
+	int i;
 
-		delta = (bytes ? delta : 0) + last_rtt;
-		bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
+	for (i = 0; i <= TFRC_NDUPACK; i++) {
+		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+		if (h->ring[i] == NULL)
+			goto out_free;
 	}
 
-	if (unlikely(bytes == 0)) {
-		DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
-		return last_x_recv;
+	h->loss_count = h->loss_start = 0;
+	return 0;
+
+out_free:
+	while (i-- != 0) {
+		kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+		h->ring[i] = NULL;
 	}
-	return scaled_div32(bytes, delta);
+	return -ENOBUFS;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
 
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
@@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
-static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+/**
+ * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
 {
-	int i;
-
-	memset(h, 0, sizeof(*h));
-
-	for (i = 0; i <= TFRC_NDUPACK; i++) {
-		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
-		if (h->ring[i] == NULL) {
-			tfrc_rx_hist_purge(h);
-			return -ENOBUFS;
-		}
-	}
-	return 0;
+	return h->ring[0];
 }
 
-int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
+/**
+ * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
 {
-	if (tfrc_rx_hist_alloc(h))
-		return -ENOBUFS;
-	/*
-	 * Initialise first entry with GSR to start loss detection as early as
-	 * possible. Code using this must not use any other fields. The entry
-	 * will be overwritten once the CCID updates its received packets.
-	 */
-	tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
-	return 0;
+	return h->ring[h->rtt_sample_prev];
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
 
 /**
  * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
- * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
- * is pending and uses the following history entries (via rtt_sample_prev):
- * - h->ring[0]  contains the most recent history entry prior to @skb;
- * - h->ring[1]  is an unused `dummy' entry when the current difference is 0;
+ * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
+ * to compute a sample with given data - calling function should check this.
  */
-void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 {
-	struct tfrc_rx_hist_entry *last = h->ring[0];
-	u32 sample, delta_v;
-
-	/*
-	 * When not to sample:
-	 * - on non-data packets
-	 *   (RFC 4342, 8.1: CCVal only fully defined for data packets);
-	 * - when no data packets have been received yet
-	 *   (FIXME: using sampled packet size as indicator here);
-	 * - as long as there are gaps in the sequence space (pending loss).
-	 */
-	if (!dccp_data_packet(skb) || h->packet_size == 0 ||
-	    tfrc_rx_hist_loss_pending(h))
-		return;
+	u32 sample = 0,
+	    delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+			    tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+
+	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
+		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
+			sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+				       tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+			if (sample)
+				sample = 4 / sample *
+				         ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
+							tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
+			else    /*
+				 * FIXME: This condition is in principle not
+				 * possible but occurs when CCID is used for
+				 * two-way data traffic. I have tried to trace
+				 * it, but the cause does not seem to be here.
+				 */
+				DCCP_BUG("please report to dccp@vger.kernel.org"
+					 " => prev = %u, last = %u",
+					 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+					 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+		} else if (delta_v < 1) {
+			h->rtt_sample_prev = 1;
+			goto keep_ref_for_next_time;
+		}
 
-	h->rtt_sample_prev = 0;		/* reset previous candidate */
+	} else if (delta_v == 4) /* optimal match */
+		sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
+	else {			 /* suboptimal match */
+		h->rtt_sample_prev = 2;
+		goto keep_ref_for_next_time;
+	}
 
-	delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
-	if (delta_v == 0) {		/* less than RTT/4 difference */
-		h->rtt_sample_prev = 1;
-		return;
+	if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
+		DCCP_WARN("RTT sample %u too large, using max\n", sample);
+		sample = DCCP_SANE_RTT_MAX;
 	}
-	sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
 
-	if (delta_v <= 4)		/* between RTT/4 and RTT */
-		sample *= 4 / delta_v;
-	else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
-		/*
-		* Optimisation: CCVal difference is greater than 1 RTT, yet the
-		* sample is less than the local RTT estimate; which means that
-		* the RTT estimate is too high.
-		* To avoid noise, it is not done if the sample is below RTT/2.
-		*/
-		return;
+	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
+keep_ref_for_next_time:
 
-	/* Use a lower weight than usual to increase responsiveness */
-	h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
+	return sample;
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 555e65cd73a..461cc91cce8 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,28 +40,12 @@
 #include <linux/slab.h>
 #include "tfrc.h"
 
-/**
- *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
- *  @next:  next oldest entry (LIFO order)
- *  @seqno: sequence number of this entry
- *  @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
-	struct tfrc_tx_hist_entry *next;
-	u64			  seqno;
-	ktime_t			  stamp;
-};
-
-static inline struct tfrc_tx_hist_entry *
-	tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
-	while (head != NULL && head->seqno != seqno)
-		head = head->next;
-	return head;
-}
+struct tfrc_tx_hist_entry;
 
 extern int  tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
 extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
+extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
+			     const u64 seqno, const ktime_t now);
 
 /* Subtraction a-b modulo-16, respects circular wrap-around */
 #define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry {
  * @loss_count:		Number of entries in circular history
  * @loss_start:		Movable index (for loss detection)
  * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
- * @rtt_estimate:	Receiver RTT estimate
- * @packet_size:	Packet size in bytes (as per RFC 3448, 3.1)
- * @bytes_recvd:	Number of bytes received since @bytes_start
- * @bytes_start:	Start time for counting @bytes_recvd
  */
 struct tfrc_rx_hist {
 	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
 	u8			  loss_count:2,
 				  loss_start:2;
-	/* Receiver RTT sampling */
 #define rtt_sample_prev		  loss_start
-	u32			  rtt_estimate;
-	/* Receiver sampling of application payload lengths */
-	u32			  packet_size,
-				  bytes_recvd;
-	ktime_t			  bytes_start;
 };
 
 /**
@@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
 	return h->loss_count > 0;
 }
 
-/*
- * Accessor functions to retrieve parameters sampled by the RX history
- */
-static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
-{
-	if (h->packet_size == 0) {
-		DCCP_WARN("No sample for s, using fallback\n");
-		return TCP_MIN_RCVMSS;
-	}
-	return h->packet_size;
-
-}
-static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
-{
-	if (h->rtt_estimate == 0) {
-		DCCP_WARN("No RTT estimate available, using fallback RTT\n");
-		return  DCCP_FALLBACK_RTT;
-	}
-	return h->rtt_estimate;
-}
-
-static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
-{
-	h->bytes_recvd = 0;
-	h->bytes_start = ktime_get_real();
-}
-
-extern u32  tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
-
-
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 				    const struct sk_buff *skb, const u64 ndp);
 
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
 
 struct tfrc_loss_hist;
-extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
-				     struct tfrc_loss_hist *lh,
-				     struct sk_buff *skb, const u64 ndp,
-				     u32 (*first_li)(struct sock *sk),
-				     struct sock *sk);
-extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
-				    const struct sk_buff *skb);
-extern int  tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
+extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
+				struct tfrc_loss_hist *lh,
+				struct sk_buff *skb, const u64 ndp,
+				u32 (*first_li)(struct sock *sk),
+				struct sock *sk);
+extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+				   const struct sk_buff *skb);
+extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
 extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
 #endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ede12f53de5..ed9857527ac 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -47,21 +47,6 @@ static inline u32 scaled_div32(u64 a, u64 b)
 	return result;
 }
 
-/**
- * tfrc_scaled_sqrt  -  Compute scaled integer sqrt(x) for 0 < x < 2^22-1
- * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
- * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
- * clamped RTT samples (dccp_sample_rtt).
- * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
- * scaling factor is neutralised. For this purpose, it avoids returning zero.
- */
-static inline u16 tfrc_scaled_sqrt(const u32 sample)
-{
-	const unsigned long non_zero_sample = sample ? : 1;
-
-	return int_sqrt(non_zero_sample << 10);
-}
-
 /**
  * tfrc_ewma  -  Exponentially weighted moving average
  * @weight: Weight to be used as damping factor, in units of 1/10
@@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
 
 extern u32  tfrc_calc_x(u16 s, u32 R, u32 p);
 extern u32  tfrc_calc_x_reverse_lookup(u32 fvalue);
-extern u32  tfrc_invert_loss_event_rate(u32 loss_event_rate);
 
 extern int  tfrc_tx_packet_history_init(void);
 extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 38239c4d5e1..2f20a29cffe 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 
 	if (p <= TFRC_CALC_X_SPLIT) 		{     /* 0.0000 < p <= 0.05   */
 		if (p < TFRC_SMALLEST_P) {	      /* 0.0000 < p <  0.0001 */
-			/*
-			 * In the congestion-avoidance phase p decays towards 0
-			 * when there are no further losses, so this case is
-			 * natural. Truncating to p_min = 0.01% means that the
-			 * maximum achievable throughput is limited to about
-			 * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
-			 * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
-			 */
-			tfrc_pr_debug("Value of p (%d) below resolution. "
-				      "Substituting %d\n", p, TFRC_SMALLEST_P);
+			DCCP_WARN("Value of p (%d) below resolution. "
+				  "Substituting %d\n", p, TFRC_SMALLEST_P);
 			index = 0;
 		} else 				      /* 0.0001 <= p <= 0.05  */
 			index =  p/TFRC_SMALLEST_P - 1;
@@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 	result = scaled_div(s, R);
 	return scaled_div32(result, f);
 }
+
 EXPORT_SYMBOL_GPL(tfrc_calc_x);
 
 /**
@@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 	index = tfrc_binsearch(fvalue, 0);
 	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
-EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
 
-/**
- * tfrc_invert_loss_event_rate  -  Compute p so that 10^6 corresponds to 100%
- * When @loss_event_rate is large, there is a chance that p is truncated to 0.
- * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
- */
-u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
-{
-	if (loss_event_rate == UINT_MAX)		/* see RFC 4342, 8.5 */
-		return 0;
-	if (unlikely(loss_event_rate == 0))		/* map 1/0 into 100% */
-		return 1000000;
-	return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
-}
-EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
+EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5281190aa19..b4bc6e095a0 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -42,11 +42,9 @@
 extern int dccp_debug;
 #define dccp_pr_debug(format, a...)	  DCCP_PR_DEBUG(dccp_debug, format, ##a)
 #define dccp_pr_debug_cat(format, a...)   DCCP_PRINTK(dccp_debug, format, ##a)
-#define dccp_debug(fmt, a...)		  dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
 #else
 #define dccp_pr_debug(format, a...)
 #define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
 #endif
 
 extern struct inet_hashinfo dccp_hashinfo;
@@ -63,14 +61,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
  *    - DCCP-Reset    with ACK Subheader and 4 bytes of Reset Code fields
  *  Hence a safe upper bound for the maximum option length is 1020-28 = 992
  */
-#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
+#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
 #define DCCP_MAX_PACKET_HDR 28
 #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
 #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
 
-/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
-#define DCCP_FEATNEG_OVERHEAD	 (32 * sizeof(uint32_t))
-
 #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
 				     * state, about 60 seconds */
 
@@ -86,13 +81,10 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define DCCP_RTO_MAX ((unsigned)(64 * HZ))
 
-/* DCCP base time resolution - 10 microseconds (RFC 4340, 13.1 ... 13.3) */
-#define DCCP_TIME_RESOLUTION	10
-
 /*
  * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
  */
-#define DCCP_SANE_RTT_MIN	(10 * DCCP_TIME_RESOLUTION)
+#define DCCP_SANE_RTT_MIN	100
 #define DCCP_FALLBACK_RTT	(USEC_PER_SEC / 5)
 #define DCCP_SANE_RTT_MAX	(3 * USEC_PER_SEC)
 
@@ -103,6 +95,12 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
 extern int  sysctl_dccp_retries2;
+extern int  sysctl_dccp_feat_sequence_window;
+extern int  sysctl_dccp_feat_rx_ccid;
+extern int  sysctl_dccp_feat_tx_ccid;
+extern int  sysctl_dccp_feat_ack_ratio;
+extern int  sysctl_dccp_feat_send_ack_vector;
+extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
@@ -237,22 +235,8 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
-/*
- * TX Packet Dequeueing Interface
- */
-extern void		dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
-extern bool		dccp_qpolicy_full(struct sock *sk);
-extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
-extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
-extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
-extern bool		dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
-
-/*
- * TX Packet Output and TX Timers
- */
-extern void dccp_write_xmit(struct sock *sk);
+extern void dccp_write_xmit(struct sock *sk, int block);
 extern void dccp_write_space(struct sock *sk);
-extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
 
 extern void dccp_init_xmit_timers(struct sock *sk);
 static inline void dccp_clear_xmit_timers(struct sock *sk)
@@ -268,8 +252,7 @@ extern const char *dccp_state_name(const int state);
 extern void dccp_set_state(struct sock *sk, const int state);
 extern void dccp_done(struct sock *sk);
 
-extern int  dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
-			    struct sk_buff const *skb);
+extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
 
 extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 
@@ -334,14 +317,7 @@ extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
 extern int	   dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
 extern int	   dccp_invalid_packet(struct sk_buff *skb);
-
-static inline u32  dccp_sane_rtt(long usec_sample)
-{
-	if (unlikely(usec_sample <= 0 || usec_sample > DCCP_SANE_RTT_MAX))
-		DCCP_WARN("RTT sample %ld out of bounds!\n", usec_sample);
-	return clamp_val(usec_sample, DCCP_SANE_RTT_MIN, DCCP_SANE_RTT_MAX);
-}
-extern u32 dccp_sample_rtt(struct sock *sk, long delta);
+extern u32	   dccp_sample_rtt(struct sock *sk, long delta);
 
 static inline int dccp_bad_service_code(const struct sock *sk,
 					const __be32 service)
@@ -435,62 +411,36 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
 static inline void dccp_update_gsr(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	const struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	dp->dccps_gsr = seq;
-	/* Sequence validity window depends on remote Sequence Window (7.5.1) */
-	dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
-	/*
-	 * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
-	 * 7.5.1 we perform this check beyond the initial handshake: W/W' are
-	 * always > 32, so for the first W/W' packets in the lifetime of a
-	 * connection we always have to adjust SWL.
-	 * A second reason why we are doing this is that the window depends on
-	 * the feature-remote value of Sequence Window: nothing stops the peer
-	 * from updating this value while we are busy adjusting SWL for the
-	 * first W packets (we would have to count from scratch again then).
-	 * Therefore it is safer to always make sure that the Sequence Window
-	 * is not artificially extended by a peer who grows SWL downwards by
-	 * continually updating the feature-remote Sequence-Window.
-	 * If sequence numbers wrap it is bad luck. But that will take a while
-	 * (48 bit), and this measure prevents Sequence-number attacks.
-	 */
-	if (before48(dp->dccps_swl, dp->dccps_isr))
-		dp->dccps_swl = dp->dccps_isr;
-	dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
+	dccp_set_seqno(&dp->dccps_swl,
+		       dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
+	dccp_set_seqno(&dp->dccps_swh,
+		       dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
 }
 
 static inline void dccp_update_gss(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	dp->dccps_gss = seq;
-	/* Ack validity window depends on local Sequence Window value (7.5.1) */
-	dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
-	/* Adjust AWL so that it is not below ISS - see comment above for SWL */
-	if (before48(dp->dccps_awl, dp->dccps_iss))
-		dp->dccps_awl = dp->dccps_iss;
-	dp->dccps_awh = dp->dccps_gss;
-}
-
-static inline int dccp_ackvec_pending(const struct sock *sk)
-{
-	return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
-	       !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
+	dp->dccps_awh = dp->dccps_gss = seq;
+	dccp_set_seqno(&dp->dccps_awl,
+		       (dp->dccps_gss -
+			dccp_msk(sk)->dccpms_sequence_window + 1));
 }
 
 static inline int dccp_ack_pending(const struct sock *sk)
 {
-	return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
+	const struct dccp_sock *dp = dccp_sk(sk);
+	return dp->dccps_timestamp_echo != 0 ||
+#ifdef CONFIG_IP_DCCP_ACKVEC
+	       (dccp_msk(sk)->dccpms_send_ack_vector &&
+		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
+#endif
+	       inet_csk_ack_scheduled(sk);
 }
 
-extern int  dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
-extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
-extern int  dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
-extern int  dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
-				  struct sk_buff *skb);
-extern int  dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
-extern void dccp_feat_list_purge(struct list_head *fn_list);
-
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
 extern int dccp_insert_option_elapsed_time(struct sock *sk,
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 93aae7c9555..d8a3509b26f 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_backoff	= icsk->icsk_backoff;
 	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
-	if (dp->dccps_hc_rx_ackvec != NULL)
+	if (dccp_msk(sk)->dccpms_send_ack_vector)
 		info->tcpi_options |= TCPI_OPT_SACK;
 
 	ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f94c7c9d1a7..933a0ecf8d4 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1,19 +1,11 @@
 /*
  *  net/dccp/feat.c
  *
- *  Feature negotiation for the DCCP protocol (RFC 4340, section 6)
- *
- *  Copyright (c) 2008 The University of Aberdeen, Scotland, UK
- *  Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
- *  Rewrote from scratch, some bits from earlier code by
- *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- *
+ *  An implementation of the DCCP protocol
+ *  Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
  *  ASSUMPTIONS
  *  -----------
- *  o Feature negotiation is coordinated with connection setup (as in TCP), wild
- *    changes of parameters of an established connection are not supported.
- *  o Changing NN values (Ack Ratio only) is supported in state OPEN/PARTOPEN.
  *  o All currently known SP features have 1-byte quantities. If in the future
  *    extensions of RFCs 4340..42 define features with item lengths larger than
  *    one byte, a feature-specific extension of the code will be required.
@@ -23,1510 +15,635 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
+
 #include <linux/module.h>
+
 #include "ccid.h"
 #include "feat.h"
 
-/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
-unsigned long	sysctl_dccp_sequence_window __read_mostly = 100;
-int		sysctl_dccp_rx_ccid	    __read_mostly = 2,
-		sysctl_dccp_tx_ccid	    __read_mostly = 2;
+#define DCCP_FEAT_SP_NOAGREE (-123)
 
-/*
- * Feature activation handlers.
- *
- * These all use an u64 argument, to provide enough room for NN/SP features. At
- * this stage the negotiated values have been checked to be within their range.
- */
-static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
+int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
+		     u8 *val, u8 len, gfp_t gfp)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any());
+	struct dccp_opt_pend *opt;
 
-	if (new_ccid == NULL)
-		return -ENOMEM;
+	dccp_feat_debug(type, feature, *val);
 
-	if (rx) {
-		ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-		dp->dccps_hc_rx_ccid = new_ccid;
-	} else {
-		ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
-		dp->dccps_hc_tx_ccid = new_ccid;
+	if (len > 3) {
+		DCCP_WARN("invalid length %d\n", len);
+		return -EINVAL;
+	}
+	/* XXX add further sanity checks */
+
+	/* check if that feature is already being negotiated */
+	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+		/* ok we found a negotiation for this option already */
+		if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
+			dccp_pr_debug("Replacing old\n");
+			/* replace */
+			BUG_ON(opt->dccpop_val == NULL);
+			kfree(opt->dccpop_val);
+			opt->dccpop_val	 = val;
+			opt->dccpop_len	 = len;
+			opt->dccpop_conf = 0;
+			return 0;
+		}
 	}
-	return 0;
-}
 
-static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
+	/* negotiation for a new feature */
+	opt = kmalloc(sizeof(*opt), gfp);
+	if (opt == NULL)
+		return -ENOMEM;
 
-	if (rx) {
-		dp->dccps_r_seq_win = seq_win;
-		/* propagate changes to update SWL/SWH */
-		dccp_update_gsr(sk, dp->dccps_gsr);
-	} else {
-		dp->dccps_l_seq_win = seq_win;
-		/* propagate changes to update AWL */
-		dccp_update_gss(sk, dp->dccps_gss);
-	}
-	return 0;
-}
+	opt->dccpop_type = type;
+	opt->dccpop_feat = feature;
+	opt->dccpop_len	 = len;
+	opt->dccpop_val	 = val;
+	opt->dccpop_conf = 0;
+	opt->dccpop_sc	 = NULL;
 
-static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
-{
-#ifndef __CCID2_COPES_GRACEFULLY_WITH_DYNAMIC_ACK_RATIO_UPDATES__
-	/*
-	 * FIXME: This is required until several problems in the CCID-2 code are
-	 * resolved. The CCID-2 code currently does not cope well; using dynamic
-	 * Ack Ratios greater than 1 caused instabilities. These were manifest
-	 * in hangups and long RTO timeouts (1...3 seconds). Until this has been
-	 * stabilised, it is safer not to activate dynamic Ack Ratio changes.
-	 */
-	dccp_pr_debug("Not changing %s Ack Ratio from 1 to %u\n",
-		      rx ? "RX" : "TX", (u16)ratio);
-	ratio = 1;
-#endif
-	if (rx)
-		dccp_sk(sk)->dccps_r_ack_ratio = ratio;
-	else
-		dccp_sk(sk)->dccps_l_ack_ratio = ratio;
+	BUG_ON(opt->dccpop_val == NULL);
+
+	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
 	return 0;
 }
 
-static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
+EXPORT_SYMBOL_GPL(dccp_feat_change);
+
+static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	/* figure out if we are changing our CCID or the peer's */
+	const int rx = type == DCCPO_CHANGE_R;
+	const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
+	struct ccid *new_ccid;
+
+	/* Check if nothing is being changed. */
+	if (ccid_nr == new_ccid_nr)
+		return 0;
+
+	new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
+	if (new_ccid == NULL)
+		return -ENOMEM;
 
 	if (rx) {
-		if (enable && dp->dccps_hc_rx_ackvec == NULL) {
-			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any());
-			if (dp->dccps_hc_rx_ackvec == NULL)
-				return -ENOMEM;
-		} else if (!enable) {
-			dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
-			dp->dccps_hc_rx_ackvec = NULL;
-		}
+		ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+		dp->dccps_hc_rx_ccid = new_ccid;
+		dmsk->dccpms_rx_ccid = new_ccid_nr;
+	} else {
+		ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+		dp->dccps_hc_tx_ccid = new_ccid;
+		dmsk->dccpms_tx_ccid = new_ccid_nr;
 	}
-	return 0;
-}
 
-static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
-{
-	if (!rx)
-		dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
 	return 0;
 }
 
-/*
- * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that
- * `rx' holds when the sending peer informs about his partial coverage via a
- * ChangeR() option. In the other case, we are the sender and the receiver
- * announces its coverage via ChangeL() options. The policy here is to honour
- * such communication by enabling the corresponding partial coverage - but only
- * if it has not been set manually before; the warning here means that all
- * packets will be dropped.
- */
-static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx)
+static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
+	dccp_feat_debug(type, feat, val);
 
-	if (rx)
-		dp->dccps_pcrlen = cscov;
-	else {
-		if (dp->dccps_pcslen == 0)
-			dp->dccps_pcslen = cscov;
-		else if (cscov > dp->dccps_pcslen)
-			DCCP_WARN("CsCov %u too small, peer requires >= %u\n",
-				  dp->dccps_pcslen, (u8)cscov);
+	switch (feat) {
+	case DCCPF_CCID:
+		return dccp_feat_update_ccid(sk, type, val);
+	default:
+		dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
+			      dccp_feat_typename(type), feat);
+		break;
 	}
 	return 0;
 }
 
-static const struct {
-	u8			feat_num;		/* DCCPF_xxx */
-	enum dccp_feat_type	rxtx;			/* RX or TX  */
-	enum dccp_feat_type	reconciliation;		/* SP or NN  */
-	u8			default_value;		/* as in 6.4 */
-	int (*activation_hdlr)(struct sock *sk, u64 val, bool rx);
-/*
- *    Lookup table for location and type of features (from RFC 4340/4342)
- *  +--------------------------+----+-----+----+----+---------+-----------+
- *  | Feature                  | Location | Reconc. | Initial |  Section  |
- *  |                          | RX | TX  | SP | NN |  Value  | Reference |
- *  +--------------------------+----+-----+----+----+---------+-----------+
- *  | DCCPF_CCID               |    |  X  | X  |    |   2     | 10        |
- *  | DCCPF_SHORT_SEQNOS       |    |  X  | X  |    |   0     |  7.6.1    |
- *  | DCCPF_SEQUENCE_WINDOW    |    |  X  |    | X  | 100     |  7.5.2    |
- *  | DCCPF_ECN_INCAPABLE      | X  |     | X  |    |   0     | 12.1      |
- *  | DCCPF_ACK_RATIO          |    |  X  |    | X  |   2     | 11.3      |
- *  | DCCPF_SEND_ACK_VECTOR    | X  |     | X  |    |   0     | 11.5      |
- *  | DCCPF_SEND_NDP_COUNT     |    |  X  | X  |    |   0     |  7.7.2    |
- *  | DCCPF_MIN_CSUM_COVER     | X  |     | X  |    |   0     |  9.2.1    |
- *  | DCCPF_DATA_CHECKSUM      | X  |     | X  |    |   0     |  9.3.1    |
- *  | DCCPF_SEND_LEV_RATE      | X  |     | X  |    |   0     | 4342/8.4  |
- *  +--------------------------+----+-----+----+----+---------+-----------+
- */
-} dccp_feat_table[] = {
-	{ DCCPF_CCID,		 FEAT_AT_TX, FEAT_SP, 2,   dccp_hdlr_ccid     },
-	{ DCCPF_SHORT_SEQNOS,	 FEAT_AT_TX, FEAT_SP, 0,   NULL },
-	{ DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win  },
-	{ DCCPF_ECN_INCAPABLE,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
-	{ DCCPF_ACK_RATIO,	 FEAT_AT_TX, FEAT_NN, 2,   dccp_hdlr_ack_ratio},
-	{ DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0,   dccp_hdlr_ackvec   },
-	{ DCCPF_SEND_NDP_COUNT,  FEAT_AT_TX, FEAT_SP, 0,   dccp_hdlr_ndp      },
-	{ DCCPF_MIN_CSUM_COVER,  FEAT_AT_RX, FEAT_SP, 0,   dccp_hdlr_min_cscov},
-	{ DCCPF_DATA_CHECKSUM,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
-	{ DCCPF_SEND_LEV_RATE,	 FEAT_AT_RX, FEAT_SP, 0,   NULL },
-};
-#define DCCP_FEAT_SUPPORTED_MAX		ARRAY_SIZE(dccp_feat_table)
-
-/**
- * dccp_feat_index  -  Hash function to map feature number into array position
- * Returns consecutive array index or -1 if the feature is not understood.
- */
-static int dccp_feat_index(u8 feat_num)
+static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
+			       u8 *rpref, u8 rlen)
 {
-	/* The first 9 entries are occupied by the types from RFC 4340, 6.4 */
-	if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM)
-		return feat_num - 1;
+	struct dccp_sock *dp = dccp_sk(sk);
+	u8 *spref, slen, *res = NULL;
+	int i, j, rc, agree = 1;
 
+	BUG_ON(rpref == NULL);
+
+	/* check if we are the black sheep */
+	if (dp->dccps_role == DCCP_ROLE_CLIENT) {
+		spref = rpref;
+		slen  = rlen;
+		rpref = opt->dccpop_val;
+		rlen  = opt->dccpop_len;
+	} else {
+		spref = opt->dccpop_val;
+		slen  = opt->dccpop_len;
+	}
 	/*
-	 * Other features: add cases for new feature types here after adding
-	 * them to the above table.
+	 * Now we have server preference list in spref and client preference in
+	 * rpref
 	 */
-	switch (feat_num) {
-	case DCCPF_SEND_LEV_RATE:
-			return DCCP_FEAT_SUPPORTED_MAX - 1;
-	}
-	return -1;
-}
-
-static u8 dccp_feat_type(u8 feat_num)
-{
-	int idx = dccp_feat_index(feat_num);
-
-	if (idx < 0)
-		return FEAT_UNKNOWN;
-	return dccp_feat_table[idx].reconciliation;
-}
+	BUG_ON(spref == NULL);
+	BUG_ON(rpref == NULL);
 
-static int dccp_feat_default_value(u8 feat_num)
-{
-	int idx = dccp_feat_index(feat_num);
+	/* FIXME sanity check vals */
 
-	return idx < 0 ? : dccp_feat_table[idx].default_value;
-}
-
-/*
- *	Debugging and verbose-printing section
- */
-static const char *dccp_feat_fname(const u8 feat)
-{
-	static const char *feature_names[] = {
-		[DCCPF_RESERVED]	= "Reserved",
-		[DCCPF_CCID]		= "CCID",
-		[DCCPF_SHORT_SEQNOS]	= "Allow Short Seqnos",
-		[DCCPF_SEQUENCE_WINDOW]	= "Sequence Window",
-		[DCCPF_ECN_INCAPABLE]	= "ECN Incapable",
-		[DCCPF_ACK_RATIO]	= "Ack Ratio",
-		[DCCPF_SEND_ACK_VECTOR]	= "Send ACK Vector",
-		[DCCPF_SEND_NDP_COUNT]	= "Send NDP Count",
-		[DCCPF_MIN_CSUM_COVER]	= "Min. Csum Coverage",
-		[DCCPF_DATA_CHECKSUM]	= "Send Data Checksum",
-	};
-	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
-		return feature_names[DCCPF_RESERVED];
-
-	if (feat ==  DCCPF_SEND_LEV_RATE)
-		return "Send Loss Event Rate";
-	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
-		return "CCID-specific";
-
-	return feature_names[feat];
-}
-
-static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING",
-					 "UNSTABLE", "STABLE" };
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-static const char *dccp_feat_oname(const u8 opt)
-{
-	switch (opt) {
-	case DCCPO_CHANGE_L:  return "Change_L";
-	case DCCPO_CONFIRM_L: return "Confirm_L";
-	case DCCPO_CHANGE_R:  return "Change_R";
-	case DCCPO_CONFIRM_R: return "Confirm_R";
+	/* Are values in any order?  XXX Lame "algorithm" here */
+	for (i = 0; i < slen; i++) {
+		for (j = 0; j < rlen; j++) {
+			if (spref[i] == rpref[j]) {
+				res = &spref[i];
+				break;
+			}
+		}
+		if (res)
+			break;
 	}
-	return NULL;
-}
 
-static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val)
-{
-	u8 i, type = dccp_feat_type(feat_num);
-
-	if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL))
-		dccp_pr_debug_cat("(NULL)");
-	else if (type == FEAT_SP)
-		for (i = 0; i < val->sp.len; i++)
-			dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]);
-	else if (type == FEAT_NN)
-		dccp_pr_debug_cat("%llu", (unsigned long long)val->nn);
-	else
-		dccp_pr_debug_cat("unknown type %u", type);
-}
-
-static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len)
-{
-	u8 type = dccp_feat_type(feat_num);
-	dccp_feat_val fval = { .sp.vec = list, .sp.len = len };
-
-	if (type == FEAT_NN)
-		fval.nn = dccp_decode_value_var(list, len);
-	dccp_feat_printval(feat_num, &fval);
-}
+	/* we didn't agree on anything */
+	if (res == NULL) {
+		/* confirm previous value */
+		switch (opt->dccpop_feat) {
+		case DCCPF_CCID:
+			/* XXX did i get this right? =P */
+			if (opt->dccpop_type == DCCPO_CHANGE_L)
+				res = &dccp_msk(sk)->dccpms_tx_ccid;
+			else
+				res = &dccp_msk(sk)->dccpms_rx_ccid;
+			break;
 
-static void dccp_feat_print_entry(struct dccp_feat_entry const *entry)
-{
-	dccp_debug("   * %s %s = ", entry->is_local ? "local" : "remote",
-				    dccp_feat_fname(entry->feat_num));
-	dccp_feat_printval(entry->feat_num, &entry->val);
-	dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state],
-			  entry->needs_confirm ? "(Confirm pending)" : "");
-}
+		default:
+			DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
+			/* XXX implement res */
+			return -EFAULT;
+		}
 
-#define dccp_feat_print_opt(opt, feat, val, len, mandatory)	do {	      \
-	dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\
-	dccp_feat_printvals(feat, val, len);				      \
-	dccp_pr_debug_cat(") %s\n", mandatory ? "!" : "");	} while (0)
-
-#define dccp_feat_print_fnlist(fn_list)  {		\
-	const struct dccp_feat_entry *___entry;		\
-							\
-	dccp_pr_debug("List Dump:\n");			\
-	list_for_each_entry(___entry, fn_list, node)	\
-		dccp_feat_print_entry(___entry);	\
-}
-#else	/* ! CONFIG_IP_DCCP_DEBUG */
-#define dccp_feat_print_opt(opt, feat, val, len, mandatory)
-#define dccp_feat_print_fnlist(fn_list)
-#endif
+		dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
+		agree = 0; /* this is used for mandatory options... */
+	}
 
-static int __dccp_feat_activate(struct sock *sk, const int idx,
-				const bool is_local, dccp_feat_val const *fval)
-{
-	bool rx;
-	u64 val;
+	/* need to put result and our preference list */
+	rlen = 1 + opt->dccpop_len;
+	rpref = kmalloc(rlen, GFP_ATOMIC);
+	if (rpref == NULL)
+		return -ENOMEM;
 
-	if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX)
-		return -1;
-	if (dccp_feat_table[idx].activation_hdlr == NULL)
-		return 0;
+	*rpref = *res;
+	memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
 
-	if (fval == NULL) {
-		val = dccp_feat_table[idx].default_value;
-	} else if (dccp_feat_table[idx].reconciliation == FEAT_SP) {
-		if (fval->sp.vec == NULL) {
-			/*
-			 * This can happen when an empty Confirm is sent
-			 * for an SP (i.e. known) feature. In this case
-			 * we would be using the default anyway.
-			 */
-			DCCP_CRIT("Feature #%d undefined: using default", idx);
-			val = dccp_feat_table[idx].default_value;
-		} else {
-			val = fval->sp.vec[0];
+	/* put it in the "confirm queue" */
+	if (opt->dccpop_sc == NULL) {
+		opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
+		if (opt->dccpop_sc == NULL) {
+			kfree(rpref);
+			return -ENOMEM;
 		}
 	} else {
-		val = fval->nn;
+		/* recycle the confirm slot */
+		BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
+		kfree(opt->dccpop_sc->dccpoc_val);
+		dccp_pr_debug("recycling confirm slot\n");
+	}
+	memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
+
+	opt->dccpop_sc->dccpoc_val = rpref;
+	opt->dccpop_sc->dccpoc_len = rlen;
+
+	/* update the option on our side [we are about to send the confirm] */
+	rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
+	if (rc) {
+		kfree(opt->dccpop_sc->dccpoc_val);
+		kfree(opt->dccpop_sc);
+		opt->dccpop_sc = NULL;
+		return rc;
 	}
 
-	/* Location is RX if this is a local-RX or remote-TX feature */
-	rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
-
-	dccp_debug("   -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX",
-		   dccp_feat_fname(dccp_feat_table[idx].feat_num),
-		   fval ? "" : "default ",  (unsigned long long)val);
-
-	return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
-}
-
-/**
- * dccp_feat_activate  -  Activate feature value on socket
- * @sk: fully connected DCCP socket (after handshake is complete)
- * @feat_num: feature to activate, one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is meant
- * @fval: the value (SP or NN) to activate, or NULL to use the default value
- * For general use this function is preferable over __dccp_feat_activate().
- */
-static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
-			      dccp_feat_val const *fval)
-{
-	return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
-}
-
-/* Test for "Req'd" feature (RFC 4340, 6.4) */
-static inline int dccp_feat_must_be_understood(u8 feat_num)
-{
-	return	feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS ||
-		feat_num == DCCPF_SEQUENCE_WINDOW;
-}
+	dccp_pr_debug("Will confirm %d\n", *rpref);
 
-/* copy constructor, fval must not already contain allocated memory */
-static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
-{
-	fval->sp.len = len;
-	if (fval->sp.len > 0) {
-		fval->sp.vec = kmemdup(val, len, gfp_any());
-		if (fval->sp.vec == NULL) {
-			fval->sp.len = 0;
-			return -ENOBUFS;
-		}
+	/* say we want to change to X but we just got a confirm X, suppress our
+	 * change
+	 */
+	if (!opt->dccpop_conf) {
+		if (*opt->dccpop_val == *res)
+			opt->dccpop_conf = 1;
+		dccp_pr_debug("won't ask for change of same feature\n");
 	}
-	return 0;
-}
 
-static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
-{
-	if (unlikely(val == NULL))
-		return;
-	if (dccp_feat_type(feat_num) == FEAT_SP)
-		kfree(val->sp.vec);
-	memset(val, 0, sizeof(*val));
+	return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
 }
 
-static struct dccp_feat_entry *
-	      dccp_feat_clone_entry(struct dccp_feat_entry const *original)
+static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 {
-	struct dccp_feat_entry *new;
-	u8 type = dccp_feat_type(original->feat_num);
-
-	if (type == FEAT_UNKNOWN)
-		return NULL;
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	struct dccp_opt_pend *opt;
+	int rc = 1;
+	u8 t;
 
-	new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
-	if (new == NULL)
-		return NULL;
+	/*
+	 * We received a CHANGE.  We gotta match it against our own preference
+	 * list.  If we got a CHANGE_R it means it's a change for us, so we need
+	 * to compare our CHANGE_L list.
+	 */
+	if (type == DCCPO_CHANGE_L)
+		t = DCCPO_CHANGE_R;
+	else
+		t = DCCPO_CHANGE_L;
 
-	if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
-						      original->val.sp.vec,
-						      original->val.sp.len)) {
-		kfree(new);
-		return NULL;
-	}
-	return new;
-}
+	/* find our preference list for this feature */
+	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+		if (opt->dccpop_type != t || opt->dccpop_feat != feature)
+			continue;
 
-static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
-{
-	if (entry != NULL) {
-		dccp_feat_val_destructor(entry->feat_num, &entry->val);
-		kfree(entry);
+		/* find the winner from the two preference lists */
+		rc = dccp_feat_reconcile(sk, opt, val, len);
+		break;
 	}
-}
 
-/*
- * List management functions
- *
- * Feature negotiation lists rely on and maintain the following invariants:
- * - each feat_num in the list is known, i.e. we know its type and default value
- * - each feat_num/is_local combination is unique (old entries are overwritten)
- * - SP values are always freshly allocated
- * - list is sorted in increasing order of feature number (faster lookup)
- */
-static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
-						     u8 feat_num, bool is_local)
-{
-	struct dccp_feat_entry *entry;
+	/* We didn't deal with the change.  This can happen if we have no
+	 * preference list for the feature.  In fact, it just shouldn't
+	 * happen---if we understand a feature, we should have a preference list
+	 * with at least the default value.
+	 */
+	BUG_ON(rc == 1);
 
-	list_for_each_entry(entry, fn_list, node)
-		if (entry->feat_num == feat_num && entry->is_local == is_local)
-			return entry;
-		else if (entry->feat_num > feat_num)
-			break;
-	return NULL;
+	return rc;
 }
 
-/**
- * dccp_feat_entry_new  -  Central list update routine (called by all others)
- * @head:  list to add to
- * @feat:  feature number
- * @local: whether the local (1) or remote feature with number @feat is meant
- * This is the only constructor and serves to ensure the above invariants.
- */
-static struct dccp_feat_entry *
-	      dccp_feat_entry_new(struct list_head *head, u8 feat, bool local)
+static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 {
-	struct dccp_feat_entry *entry;
-
-	list_for_each_entry(entry, head, node)
-		if (entry->feat_num == feat && entry->is_local == local) {
-			dccp_feat_val_destructor(entry->feat_num, &entry->val);
-			return entry;
-		} else if (entry->feat_num > feat) {
-			head = &entry->node;
-			break;
-		}
+	struct dccp_opt_pend *opt;
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	u8 *copy;
+	int rc;
 
-	entry = kmalloc(sizeof(*entry), gfp_any());
-	if (entry != NULL) {
-		entry->feat_num = feat;
-		entry->is_local = local;
-		list_add_tail(&entry->node, head);
+	/* NN features must be Change L (sec. 6.3.2) */
+	if (type != DCCPO_CHANGE_L) {
+		dccp_pr_debug("received %s for NN feature %d\n",
+				dccp_feat_typename(type), feature);
+		return -EFAULT;
 	}
-	return entry;
-}
 
-/**
- * dccp_feat_push_change  -  Add/overwrite a Change option in the list
- * @fn_list: feature-negotiation list to update
- * @feat: one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is meant
- * @needs_mandatory: whether to use Mandatory feature negotiation options
- * @fval: pointer to NN/SP value to be inserted (will be copied)
- */
-static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
-				 u8 mandatory, dccp_feat_val *fval)
-{
-	struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
+	/* XXX sanity check opt val */
 
-	if (new == NULL)
+	/* copy option so we can confirm it */
+	opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
+	if (opt == NULL)
 		return -ENOMEM;
 
-	new->feat_num	     = feat;
-	new->is_local	     = local;
-	new->state	     = FEAT_INITIALISING;
-	new->needs_confirm   = 0;
-	new->empty_confirm   = 0;
-	new->val	     = *fval;
-	new->needs_mandatory = mandatory;
+	copy = kmemdup(val, len, GFP_ATOMIC);
+	if (copy == NULL) {
+		kfree(opt);
+		return -ENOMEM;
+	}
 
-	return 0;
-}
+	opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
+	opt->dccpop_feat = feature;
+	opt->dccpop_val	 = copy;
+	opt->dccpop_len	 = len;
 
-/**
- * dccp_feat_push_confirm  -  Add a Confirm entry to the FN list
- * @fn_list: feature-negotiation list to add to
- * @feat: one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is being confirmed
- * @fval: pointer to NN/SP value to be inserted or NULL
- * Returns 0 on success, a Reset code for further processing otherwise.
- */
-static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
-				  dccp_feat_val *fval)
-{
-	struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
+	/* change feature */
+	rc = dccp_feat_update(sk, type, feature, *val);
+	if (rc) {
+		kfree(opt->dccpop_val);
+		kfree(opt);
+		return rc;
+	}
 
-	if (new == NULL)
-		return DCCP_RESET_CODE_TOO_BUSY;
+	dccp_feat_debug(type, feature, *copy);
 
-	new->feat_num	     = feat;
-	new->is_local	     = local;
-	new->state	     = FEAT_STABLE;	/* transition in 6.6.2 */
-	new->needs_confirm   = 1;
-	new->empty_confirm   = (fval == NULL);
-	new->val.nn	     = 0;		/* zeroes the whole structure */
-	if (!new->empty_confirm)
-		new->val     = *fval;
-	new->needs_mandatory = 0;
+	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
 
 	return 0;
 }
 
-static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local)
+static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
+				    u8 type, u8 feature)
 {
-	return dccp_feat_push_confirm(fn_list, feat, local, NULL);
-}
+	/* XXX check if other confirms for that are queued and recycle slot */
+	struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
 
-static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry)
-{
-	list_del(&entry->node);
-	dccp_feat_entry_destructor(entry);
-}
-
-void dccp_feat_list_purge(struct list_head *fn_list)
-{
-	struct dccp_feat_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, fn_list, node)
-		dccp_feat_entry_destructor(entry);
-	INIT_LIST_HEAD(fn_list);
-}
-EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
-
-/* generate @to as full clone of @from - @to must not contain any nodes */
-int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
-{
-	struct dccp_feat_entry *entry, *new;
-
-	INIT_LIST_HEAD(to);
-	list_for_each_entry(entry, from, node) {
-		new = dccp_feat_clone_entry(entry);
-		if (new == NULL)
-			goto cloning_failed;
-		list_add_tail(&new->node, to);
+	if (opt == NULL) {
+		/* XXX what do we do?  Ignoring should be fine.  It's a change
+		 * after all =P
+		 */
+		return;
 	}
-	return 0;
 
-cloning_failed:
-	dccp_feat_list_purge(to);
-	return -ENOMEM;
-}
+	switch (type) {
+	case DCCPO_CHANGE_L:
+		opt->dccpop_type = DCCPO_CONFIRM_R;
+		break;
+	case DCCPO_CHANGE_R:
+		opt->dccpop_type = DCCPO_CONFIRM_L;
+		break;
+	default:
+		DCCP_WARN("invalid type %d\n", type);
+		kfree(opt);
+		return;
+	}
+	opt->dccpop_feat = feature;
+	opt->dccpop_val	 = NULL;
+	opt->dccpop_len	 = 0;
 
-/**
- * dccp_feat_valid_nn_length  -  Enforce length constraints on NN options
- * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
- * incoming options are accepted as long as their values are valid.
- */
-static u8 dccp_feat_valid_nn_length(u8 feat_num)
-{
-	if (feat_num == DCCPF_ACK_RATIO)	/* RFC 4340, 11.3 and 6.6.8 */
-		return 2;
-	if (feat_num == DCCPF_SEQUENCE_WINDOW)	/* RFC 4340, 7.5.2 and 6.5  */
-		return 6;
-	return 0;
-}
+	/* change feature */
+	dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
 
-static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
-{
-	switch (feat_num) {
-	case DCCPF_ACK_RATIO:
-		return val <= DCCPF_ACK_RATIO_MAX;
-	case DCCPF_SEQUENCE_WINDOW:
-		return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX;
-	}
-	return 0;	/* feature unknown - so we can't tell */
+	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
 }
 
-/* check that SP values are within the ranges defined in RFC 4340 */
-static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val)
+static void dccp_feat_flush_confirm(struct sock *sk)
 {
-	switch (feat_num) {
-	case DCCPF_CCID:
-		return val == DCCPC_CCID2 || val == DCCPC_CCID3;
-	/* Type-check Boolean feature values: */
-	case DCCPF_SHORT_SEQNOS:
-	case DCCPF_ECN_INCAPABLE:
-	case DCCPF_SEND_ACK_VECTOR:
-	case DCCPF_SEND_NDP_COUNT:
-	case DCCPF_DATA_CHECKSUM:
-	case DCCPF_SEND_LEV_RATE:
-		return val < 2;
-	case DCCPF_MIN_CSUM_COVER:
-		return val < 16;
-	}
-	return 0;			/* feature unknown */
-}
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	/* Check if there is anything to confirm in the first place */
+	int yes = !list_empty(&dmsk->dccpms_conf);
 
-static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
-{
-	if (sp_list == NULL || sp_len < 1)
-		return 0;
-	while (sp_len--)
-		if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++))
-			return 0;
-	return 1;
-}
+	if (!yes) {
+		struct dccp_opt_pend *opt;
 
-/**
- * dccp_feat_insert_opts  -  Generate FN options from current list state
- * @skb: next sk_buff to be sent to the peer
- * @dp: for client during handshake and general negotiation
- * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND)
- */
-int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
-			  struct sk_buff *skb)
-{
-	struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
-	struct dccp_feat_entry *pos, *next;
-	u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN];
-	bool rpt;
-
-	/* put entries into @skb in the order they appear in the list */
-	list_for_each_entry_safe_reverse(pos, next, fn, node) {
-		opt  = dccp_feat_genopt(pos);
-		type = dccp_feat_type(pos->feat_num);
-		rpt  = false;
-
-		if (pos->empty_confirm) {
-			len = 0;
-			ptr = NULL;
-		} else {
-			if (type == FEAT_SP) {
-				len = pos->val.sp.len;
-				ptr = pos->val.sp.vec;
-				rpt = pos->needs_confirm;
-			} else if (type == FEAT_NN) {
-				len = dccp_feat_valid_nn_length(pos->feat_num);
-				ptr = nn_in_nbo;
-				dccp_encode_value_var(pos->val.nn, ptr, len);
-			} else {
-				DCCP_BUG("unknown feature %u", pos->feat_num);
-				return -1;
+		list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+			if (opt->dccpop_conf) {
+				yes = 1;
+				break;
 			}
 		}
-		dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0);
-
-		if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
-			return -1;
-		if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
-			return -1;
-		/*
-		 * Enter CHANGING after transmitting the Change option (6.6.2).
-		 */
-		if (pos->state == FEAT_INITIALISING)
-			pos->state = FEAT_CHANGING;
 	}
-	return 0;
-}
-
-/**
- * __feat_register_nn  -  Register new NN value on socket
- * @fn: feature-negotiation list to register with
- * @feat: an NN feature from %dccp_feature_numbers
- * @mandatory: use Mandatory option if 1
- * @nn_val: value to register (restricted to 4 bytes)
- * Note that NN features are local by definition (RFC 4340, 6.3.2).
- */
-static int __feat_register_nn(struct list_head *fn, u8 feat,
-			      u8 mandatory, u64 nn_val)
-{
-	dccp_feat_val fval = { .nn = nn_val };
-
-	if (dccp_feat_type(feat) != FEAT_NN ||
-	    !dccp_feat_is_valid_nn_val(feat, nn_val))
-		return -EINVAL;
-
-	/* Don't bother with default values, they will be activated anyway. */
-	if (nn_val - (u64)dccp_feat_default_value(feat) == 0)
-		return 0;
-
-	return dccp_feat_push_change(fn, feat, 1, mandatory, &fval);
-}
-
-/**
- * __feat_register_sp  -  Register new SP value/list on socket
- * @fn: feature-negotiation list to register with
- * @feat: an SP feature from %dccp_feature_numbers
- * @is_local: whether the local (1) or the remote (0) @feat is meant
- * @mandatory: use Mandatory option if 1
- * @sp_val: SP value followed by optional preference list
- * @sp_len: length of @sp_val in bytes
- */
-static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
-			      u8 mandatory, u8 const *sp_val, u8 sp_len)
-{
-	dccp_feat_val fval;
 
-	if (dccp_feat_type(feat) != FEAT_SP ||
-	    !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
-		return -EINVAL;
-
-	/* Avoid negotiating alien CCIDs by only advertising supported ones */
-	if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
-		return -EOPNOTSUPP;
-
-	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
-		return -ENOMEM;
+	if (!yes)
+		return;
 
-	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+	/* OK there is something to confirm... */
+	/* XXX check if packet is in flight?  Send delayed ack?? */
+	if (sk->sk_state == DCCP_OPEN)
+		dccp_send_ack(sk);
 }
 
-/**
- * dccp_feat_register_sp  -  Register requests to change SP feature values
- * @sk: client or listening socket
- * @feat: one of %dccp_feature_numbers
- * @is_local: whether the local (1) or remote (0) @feat is meant
- * @list: array of preferred values, in descending order of preference
- * @len: length of @list in bytes
- */
-int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
-			  u8 const *list, u8 len)
-{	 /* any changes must be registered before establishing the connection */
-	if (sk->sk_state != DCCP_CLOSED)
-		return -EISCONN;
-	if (dccp_feat_type(feat) != FEAT_SP)
-		return -EINVAL;
-	return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
-				  0, list, len);
-}
-
-/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
-int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
+int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
 {
-	/* any changes must be registered before establishing the connection */
-	if (sk->sk_state != DCCP_CLOSED)
-		return -EISCONN;
-	if (dccp_feat_type(feat) != FEAT_NN)
-		return -EINVAL;
-	return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
-}
+	int rc;
 
-/**
- * dccp_feat_signal_nn_change  -  Update NN values for an established connection
- * @sk: DCCP socket of an established connection
- * @feat: NN feature number from %dccp_feature_numbers
- * @nn_val: the new value to use
- * This function is used to communicate NN updates out-of-band. The difference
- * to feature negotiation during connection setup is that values are activated
- * immediately after validation, i.e. we don't wait for the Confirm: either the
- * value is accepted by the peer (and then the waiting is futile), or it is not
- * (Reset or empty Confirm). We don't accept empty Confirms - transmitted values
- * are validated, and the peer "MUST accept any valid value" (RFC 4340, 6.3.2).
- */
-int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
-{
-	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
-	dccp_feat_val fval = { .nn = nn_val };
-	struct dccp_feat_entry *entry;
+	dccp_feat_debug(type, feature, *val);
 
-	if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
-		return 0;
+	/* figure out if it's SP or NN feature */
+	switch (feature) {
+	/* deal with SP features */
+	case DCCPF_CCID:
+		rc = dccp_feat_sp(sk, type, feature, val, len);
+		break;
 
-	if (dccp_feat_type(feat) != FEAT_NN ||
-	    !dccp_feat_is_valid_nn_val(feat, nn_val))
-		return -EINVAL;
+	/* deal with NN features */
+	case DCCPF_ACK_RATIO:
+		rc = dccp_feat_nn(sk, type, feature, val, len);
+		break;
 
-	entry = dccp_feat_list_lookup(fn, feat, 1);
-	if (entry != NULL) {
-		dccp_pr_debug("Ignoring %llu, entry %llu exists in state %s\n",
-			      (unsigned long long)nn_val,
-			      (unsigned long long)entry->val.nn,
-			      dccp_feat_sname[entry->state]);
-		return 0;
+	/* XXX implement other features */
+	default:
+		dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
+			      dccp_feat_typename(type), feature);
+		rc = -EFAULT;
+		break;
 	}
 
-	if (dccp_feat_activate(sk, feat, 1, &fval))
-		return -EADV;
-
-	inet_csk_schedule_ack(sk);
-	return dccp_feat_push_change(fn, feat, 1, 0, &fval);
-}
-EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
-
-/*
- *	Tracking features whose value depend on the choice of CCID
- *
- * This is designed with an extension in mind so that a list walk could be done
- * before activating any features. However, the existing framework was found to
- * work satisfactorily up until now, the automatic verification is left open.
- * When adding new CCIDs, add a corresponding dependency table here.
- */
-static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
-{
-	static const struct ccid_dependency ccid2_dependencies[2][2] = {
-		/*
-		 * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX
-		 * feature and Send Ack Vector is an RX feature, `is_local'
-		 * needs to be reversed.
+	/* check if there were problems changing features */
+	if (rc) {
+		/* If we don't agree on SP, we sent a confirm for old value.
+		 * However we propagate rc to caller in case option was
+		 * mandatory
 		 */
-		{	/* Dependencies of the receiver-side (remote) CCID2 */
-			{
-				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
-				.is_local	= true,
-				.is_mandatory	= true,
-				.val		= 1
-			},
-			{ 0, 0, 0, 0 }
-		},
-		{	/* Dependencies of the sender-side (local) CCID2 */
-			{
-				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
-				.is_local	= false,
-				.is_mandatory	= true,
-				.val		= 1
-			},
-			{ 0, 0, 0, 0 }
-		}
-	};
-	static const struct ccid_dependency ccid3_dependencies[2][5] = {
-		{	/*
-			 * Dependencies of the receiver-side CCID3
-			 */
-			{	/* locally disable Ack Vectors */
-				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
-				.is_local	= true,
-				.is_mandatory	= false,
-				.val		= 0
-			},
-			{	/* see below why Send Loss Event Rate is on */
-				.dependent_feat	= DCCPF_SEND_LEV_RATE,
-				.is_local	= true,
-				.is_mandatory	= true,
-				.val		= 1
-			},
-			{	/* NDP Count is needed as per RFC 4342, 6.1.1 */
-				.dependent_feat	= DCCPF_SEND_NDP_COUNT,
-				.is_local	= false,
-				.is_mandatory	= true,
-				.val		= 1
-			},
-			{ 0, 0, 0, 0 },
-		},
-		{	/*
-			 * CCID3 at the TX side: we request that the HC-receiver
-			 * will not send Ack Vectors (they will be ignored, so
-			 * Mandatory is not set); we enable Send Loss Event Rate
-			 * (Mandatory since the implementation does not support
-			 * the Loss Intervals option of RFC 4342, 8.6).
-			 * The last two options are for peer's information only.
-			*/
-			{
-				.dependent_feat	= DCCPF_SEND_ACK_VECTOR,
-				.is_local	= false,
-				.is_mandatory	= false,
-				.val		= 0
-			},
-			{
-				.dependent_feat	= DCCPF_SEND_LEV_RATE,
-				.is_local	= false,
-				.is_mandatory	= true,
-				.val		= 1
-			},
-			{	/* this CCID does not support Ack Ratio */
-				.dependent_feat	= DCCPF_ACK_RATIO,
-				.is_local	= true,
-				.is_mandatory	= false,
-				.val		= 0
-			},
-			{	/* tell receiver we are sending NDP counts */
-				.dependent_feat	= DCCPF_SEND_NDP_COUNT,
-				.is_local	= true,
-				.is_mandatory	= false,
-				.val		= 1
-			},
-			{ 0, 0, 0, 0 }
-		}
-	};
-	switch (ccid) {
-	case DCCPC_CCID2:
-		return ccid2_dependencies[is_local];
-	case DCCPC_CCID3:
-		return ccid3_dependencies[is_local];
-	default:
-		return NULL;
+		if (rc != DCCP_FEAT_SP_NOAGREE)
+			dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
 	}
-}
 
-/**
- * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID
- * @fn: feature-negotiation list to update
- * @id: CCID number to track
- * @is_local: whether TX CCID (1) or RX CCID (0) is meant
- * This function needs to be called after registering all other features.
- */
-static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
-{
-	const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local);
-	int i, rc = (table == NULL);
-
-	for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++)
-		if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP)
-			rc = __feat_register_sp(fn, table[i].dependent_feat,
-						    table[i].is_local,
-						    table[i].is_mandatory,
-						    &table[i].val, 1);
-		else
-			rc = __feat_register_nn(fn, table[i].dependent_feat,
-						    table[i].is_mandatory,
-						    table[i].val);
-	return rc;
-}
-
-/**
- * dccp_feat_finalise_settings  -  Finalise settings before starting negotiation
- * @dp: client or listening socket (settings will be inherited)
- * This is called after all registrations (socket initialisation, sysctls, and
- * sockopt calls), and before sending the first packet containing Change options
- * (ie. client-Request or server-Response), to ensure internal consistency.
- */
-int dccp_feat_finalise_settings(struct dccp_sock *dp)
-{
-	struct list_head *fn = &dp->dccps_featneg;
-	struct dccp_feat_entry *entry;
-	int i = 2, ccids[2] = { -1, -1 };
+	/* generate the confirm [if required] */
+	dccp_feat_flush_confirm(sk);
 
-	/*
-	 * Propagating CCIDs:
-	 * 1) not useful to propagate CCID settings if this host advertises more
-	 *    than one CCID: the choice of CCID  may still change - if this is
-	 *    the client, or if this is the server and the client sends
-	 *    singleton CCID values.
-	 * 2) since is that propagate_ccid changes the list, we defer changing
-	 *    the sorted list until after the traversal.
-	 */
-	list_for_each_entry(entry, fn, node)
-		if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1)
-			ccids[entry->is_local] = entry->val.sp.vec[0];
-	while (i--)
-		if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
-			return -1;
-	dccp_feat_print_fnlist(fn);
-	return 0;
+	return rc;
 }
 
-/**
- * dccp_feat_server_ccid_dependencies  -  Resolve CCID-dependent features
- * It is the server which resolves the dependencies once the CCID has been
- * fully negotiated. If no CCID has been negotiated, it uses the default CCID.
- */
-int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
-{
-	struct list_head *fn = &dreq->dreq_featneg;
-	struct dccp_feat_entry *entry;
-	u8 is_local, ccid;
-
-	for (is_local = 0; is_local <= 1; is_local++) {
-		entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local);
-
-		if (entry != NULL && !entry->empty_confirm)
-			ccid = entry->val.sp.vec[0];
-		else
-			ccid = dccp_feat_default_value(DCCPF_CCID);
-
-		if (dccp_feat_propagate_ccid(fn, ccid, is_local))
-			return -1;
-	}
-	return 0;
-}
+EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
 
-/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
-static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
+int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
+			   u8 *val, u8 len)
 {
-	u8 c, s;
+	u8 t;
+	struct dccp_opt_pend *opt;
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	int found = 0;
+	int all_confirmed = 1;
 
-	for (s = 0; s < slen; s++)
-		for (c = 0; c < clen; c++)
-			if (servlist[s] == clilist[c])
-				return servlist[s];
-	return -1;
-}
+	dccp_feat_debug(type, feature, *val);
 
-/**
- * dccp_feat_prefer  -  Move preferred entry to the start of array
- * Reorder the @array_len elements in @array so that @preferred_value comes
- * first. Returns >0 to indicate that @preferred_value does occur in @array.
- */
-static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len)
-{
-	u8 i, does_occur = 0;
+	/* locate our change request */
+	switch (type) {
+	case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
+	case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
+	default:	      DCCP_WARN("invalid type %d\n", type);
+			      return 1;
 
-	if (array != NULL) {
-		for (i = 0; i < array_len; i++)
-			if (array[i] == preferred_value) {
-				array[i] = array[0];
-				does_occur++;
-			}
-		if (does_occur)
-			array[0] = preferred_value;
 	}
-	return does_occur;
-}
+	/* XXX sanity check feature value */
 
-/**
- * dccp_feat_reconcile  -  Reconcile SP preference lists
- *  @fval: SP list to reconcile into
- *  @arr: received SP preference list
- *  @len: length of @arr in bytes
- *  @is_server: whether this side is the server (and @fv is the server's list)
- *  @reorder: whether to reorder the list in @fv after reconciling with @arr
- * When successful, > 0 is returned and the reconciled list is in @fval.
- * A value of 0 means that negotiation failed (no shared entry).
- */
-static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
-			       bool is_server, bool reorder)
-{
-	int rc;
+	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+		if (!opt->dccpop_conf && opt->dccpop_type == t &&
+		    opt->dccpop_feat == feature) {
+			found = 1;
+			dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
 
-	if (!fv->sp.vec || !arr) {
-		DCCP_CRIT("NULL feature value or array");
-		return 0;
-	}
+			/* XXX do sanity check */
 
-	if (is_server)
-		rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len);
-	else
-		rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len);
-
-	if (!reorder)
-		return rc;
-	if (rc < 0)
-		return 0;
+			opt->dccpop_conf = 1;
 
-	/*
-	 * Reorder list: used for activating features and in dccp_insert_fn_opt.
-	 */
-	return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
-}
+			/* We got a confirmation---change the option */
+			dccp_feat_update(sk, opt->dccpop_type,
+					 opt->dccpop_feat, *val);
 
-/**
- * dccp_feat_change_recv  -  Process incoming ChangeL/R options
- * @fn: feature-negotiation list to update
- * @is_mandatory: whether the Change was preceded by a Mandatory option
- * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R
- * @feat: one of %dccp_feature_numbers
- * @val: NN value or SP value/preference list
- * @len: length of @val in bytes
- * @server: whether this node is the server (1) or the client (0)
- */
-static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
-				u8 feat, u8 *val, u8 len, const bool server)
-{
-	u8 defval, type = dccp_feat_type(feat);
-	const bool local = (opt == DCCPO_CHANGE_R);
-	struct dccp_feat_entry *entry;
-	dccp_feat_val fval;
-
-	if (len == 0 || type == FEAT_UNKNOWN)		/* 6.1 and 6.6.8 */
-		goto unknown_feature_or_value;
-
-	dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
-
-	/*
-	 *	Negotiation of NN features: Change R is invalid, so there is no
-	 *	simultaneous negotiation; hence we do not look up in the list.
-	 */
-	if (type == FEAT_NN) {
-		if (local || len > sizeof(fval.nn))
-			goto unknown_feature_or_value;
-
-		/* 6.3.2: "The feature remote MUST accept any valid value..." */
-		fval.nn = dccp_decode_value_var(val, len);
-		if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
-			goto unknown_feature_or_value;
+			/* XXX check the return value of dccp_feat_update */
+			break;
+		}
 
-		return dccp_feat_push_confirm(fn, feat, local, &fval);
+		if (!opt->dccpop_conf)
+			all_confirmed = 0;
 	}
 
-	/*
-	 *	Unidirectional/simultaneous negotiation of SP features (6.3.1)
+	/* fix re-transmit timer */
+	/* XXX gotta make sure that no option negotiation occurs during
+	 * connection shutdown.  Consider that the CLOSEREQ is sent and timer is
+	 * on.  if all options are confirmed it might kill timer which should
+	 * remain alive until close is received.
 	 */
-	entry = dccp_feat_list_lookup(fn, feat, local);
-	if (entry == NULL) {
-		/*
-		 * No particular preferences have been registered. We deal with
-		 * this situation by assuming that all valid values are equally
-		 * acceptable, and apply the following checks:
-		 * - if the peer's list is a singleton, we accept a valid value;
-		 * - if we are the server, we first try to see if the peer (the
-		 *   client) advertises the default value. If yes, we use it,
-		 *   otherwise we accept the preferred value;
-		 * - else if we are the client, we use the first list element.
-		 */
-		if (dccp_feat_clone_sp_val(&fval, val, 1))
-			return DCCP_RESET_CODE_TOO_BUSY;
-
-		if (len > 1 && server) {
-			defval = dccp_feat_default_value(feat);
-			if (dccp_feat_preflist_match(&defval, 1, val, len) > -1)
-				fval.sp.vec[0] = defval;
-		} else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) {
-			kfree(fval.sp.vec);
-			goto unknown_feature_or_value;
-		}
-
-		/* Treat unsupported CCIDs like invalid values */
-		if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) {
-			kfree(fval.sp.vec);
-			goto not_valid_or_not_known;
-		}
-
-		return dccp_feat_push_confirm(fn, feat, local, &fval);
-
-	} else if (entry->state == FEAT_UNSTABLE) {	/* 6.6.2 */
-		return 0;
+	if (all_confirmed) {
+		dccp_pr_debug("clear feat negotiation timer %p\n", sk);
+		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	}
 
-	if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
-		entry->empty_confirm = 0;
-	} else if (is_mandatory) {
-		return DCCP_RESET_CODE_MANDATORY_ERROR;
-	} else if (entry->state == FEAT_INITIALISING) {
-		/*
-		 * Failed simultaneous negotiation (server only): try to `save'
-		 * the connection by checking whether entry contains the default
-		 * value for @feat. If yes, send an empty Confirm to signal that
-		 * the received Change was not understood - which implies using
-		 * the default value.
-		 * If this also fails, we use Reset as the last resort.
-		 */
-		WARN_ON(!server);
-		defval = dccp_feat_default_value(feat);
-		if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
-			return DCCP_RESET_CODE_OPTION_ERROR;
-		entry->empty_confirm = 1;
-	}
-	entry->needs_confirm   = 1;
-	entry->needs_mandatory = 0;
-	entry->state	       = FEAT_STABLE;
+	if (!found)
+		dccp_pr_debug("%s(%d, ...) never requested\n",
+			      dccp_feat_typename(type), feature);
 	return 0;
-
-unknown_feature_or_value:
-	if (!is_mandatory)
-		return dccp_push_empty_confirm(fn, feat, local);
-
-not_valid_or_not_known:
-	return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
-			    : DCCP_RESET_CODE_OPTION_ERROR;
 }
 
-/**
- * dccp_feat_confirm_recv  -  Process received Confirm options
- * @fn: feature-negotiation list to update
- * @is_mandatory: whether @opt was preceded by a Mandatory option
- * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R
- * @feat: one of %dccp_feature_numbers
- * @val: NN value or SP value/preference list
- * @len: length of @val in bytes
- * @server: whether this node is server (1) or client (0)
- */
-static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
-				 u8 feat, u8 *val, u8 len, const bool server)
-{
-	u8 *plist, plen, type = dccp_feat_type(feat);
-	const bool local = (opt == DCCPO_CONFIRM_R);
-	struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
-
-	dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
-
-	if (entry == NULL) {	/* nothing queued: ignore or handle error */
-		if (is_mandatory && type == FEAT_UNKNOWN)
-			return DCCP_RESET_CODE_MANDATORY_ERROR;
-
-		if (!local && type == FEAT_NN)		/* 6.3.2 */
-			goto confirmation_failed;
-		return 0;
-	}
-
-	if (entry->state != FEAT_CHANGING)		/* 6.6.2 */
-		return 0;
-
-	if (len == 0) {
-		if (dccp_feat_must_be_understood(feat))	/* 6.6.7 */
-			goto confirmation_failed;
-		/*
-		 * Empty Confirm during connection setup: this means reverting
-		 * to the `old' value, which in this case is the default. Since
-		 * we handle default values automatically when no other values
-		 * have been set, we revert to the old value by removing this
-		 * entry from the list.
-		 */
-		dccp_feat_list_pop(entry);
-		return 0;
-	}
+EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
 
-	if (type == FEAT_NN) {
-		if (len > sizeof(entry->val.nn))
-			goto confirmation_failed;
+void dccp_feat_clean(struct dccp_minisock *dmsk)
+{
+	struct dccp_opt_pend *opt, *next;
 
-		if (entry->val.nn == dccp_decode_value_var(val, len))
-			goto confirmation_succeeded;
+	list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
+				 dccpop_node) {
+		BUG_ON(opt->dccpop_val == NULL);
+		kfree(opt->dccpop_val);
 
-		DCCP_WARN("Bogus Confirm for non-existing value\n");
-		goto confirmation_failed;
-	}
+		if (opt->dccpop_sc != NULL) {
+			BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
+			kfree(opt->dccpop_sc->dccpoc_val);
+			kfree(opt->dccpop_sc);
+		}
 
-	/*
-	 * Parsing SP Confirms: the first element of @val is the preferred
-	 * SP value which the peer confirms, the remainder depends on @len.
-	 * Note that only the confirmed value need to be a valid SP value.
-	 */
-	if (!dccp_feat_is_valid_sp_val(feat, *val))
-		goto confirmation_failed;
-
-	if (len == 1) {		/* peer didn't supply a preference list */
-		plist = val;
-		plen  = len;
-	} else {		/* preferred value + preference list */
-		plist = val + 1;
-		plen  = len - 1;
+		kfree(opt);
 	}
+	INIT_LIST_HEAD(&dmsk->dccpms_pending);
 
-	/* Check whether the peer got the reconciliation right (6.6.8) */
-	if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) {
-		DCCP_WARN("Confirm selected the wrong value %u\n", *val);
-		return DCCP_RESET_CODE_OPTION_ERROR;
+	list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
+		BUG_ON(opt == NULL);
+		if (opt->dccpop_val != NULL)
+			kfree(opt->dccpop_val);
+		kfree(opt);
 	}
-	entry->val.sp.vec[0] = *val;
-
-confirmation_succeeded:
-	entry->state = FEAT_STABLE;
-	return 0;
-
-confirmation_failed:
-	DCCP_WARN("Confirmation failed\n");
-	return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
-			    : DCCP_RESET_CODE_OPTION_ERROR;
+	INIT_LIST_HEAD(&dmsk->dccpms_conf);
 }
 
-/**
- * dccp_feat_handle_nn_established  -  Fast-path reception of NN options
- * @sk:		socket of an established DCCP connection
- * @mandatory:	whether @opt was preceded by a Mandatory option
- * @opt:	%DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
- * @feat:	NN number, one of %dccp_feature_numbers
- * @val:	NN value
- * @len:	length of @val in bytes
- * This function combines the functionality of change_recv/confirm_recv, with
- * the following differences (reset codes are the same):
- *    - cleanup after receiving the Confirm;
- *    - values are directly activated after successful parsing;
- *    - deliberately restricted to NN features.
- * The restriction to NN features is essential since SP features can have non-
- * predictable outcomes (depending on the remote configuration), and are inter-
- * dependent (CCIDs for instance cause further dependencies).
+EXPORT_SYMBOL_GPL(dccp_feat_clean);
+
+/* this is to be called only when a listening sock creates its child.  It is
+ * assumed by the function---the confirm is not duplicated, but rather it is
+ * "passed on".
  */
-static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
-					  u8 feat, u8 *val, u8 len)
+int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
 {
-	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
-	const bool local = (opt == DCCPO_CONFIRM_R);
-	struct dccp_feat_entry *entry;
-	u8 type = dccp_feat_type(feat);
-	dccp_feat_val fval;
+	struct dccp_minisock *olddmsk = dccp_msk(oldsk);
+	struct dccp_minisock *newdmsk = dccp_msk(newsk);
+	struct dccp_opt_pend *opt;
+	int rc = 0;
 
-	dccp_feat_print_opt(opt, feat, val, len, mandatory);
+	INIT_LIST_HEAD(&newdmsk->dccpms_pending);
+	INIT_LIST_HEAD(&newdmsk->dccpms_conf);
 
-	/* Ignore non-mandatory unknown and non-NN features */
-	if (type == FEAT_UNKNOWN) {
-		if (local && !mandatory)
-			return 0;
-		goto fast_path_unknown;
-	} else if (type != FEAT_NN) {
-		return 0;
-	}
-
-	/*
-	 * We don't accept empty Confirms, since in fast-path feature
-	 * negotiation the values are enabled immediately after sending
-	 * the Change option.
-	 * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
-	 */
-	if (len == 0 || len > sizeof(fval.nn))
-		goto fast_path_unknown;
-
-	if (opt == DCCPO_CHANGE_L) {
-		fval.nn = dccp_decode_value_var(val, len);
-		if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
-			goto fast_path_unknown;
+	list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
+		struct dccp_opt_pend *newopt;
+		/* copy the value of the option */
+		u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
 
-		if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
-		    dccp_feat_activate(sk, feat, local, &fval))
-			return DCCP_RESET_CODE_TOO_BUSY;
+		if (val == NULL)
+			goto out_clean;
 
-		/* set the `Ack Pending' flag to piggyback a Confirm */
-		inet_csk_schedule_ack(sk);
-
-	} else if (opt == DCCPO_CONFIRM_R) {
-		entry = dccp_feat_list_lookup(fn, feat, local);
-		if (entry == NULL || entry->state != FEAT_CHANGING)
-			return 0;
-
-		fval.nn = dccp_decode_value_var(val, len);
-		if (fval.nn != entry->val.nn) {
-			DCCP_WARN("Bogus Confirm for non-existing value\n");
-			goto fast_path_failed;
+		newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
+		if (newopt == NULL) {
+			kfree(val);
+			goto out_clean;
 		}
 
-		/* It has been confirmed - so remove the entry */
-		dccp_feat_list_pop(entry);
+		/* insert the option */
+		newopt->dccpop_val = val;
+		list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
 
-	} else {
-		DCCP_WARN("Received illegal option %u\n", opt);
-		goto fast_path_failed;
+		/* XXX what happens with backlogs and multiple connections at
+		 * once...
+		 */
+		/* the master socket no longer needs to worry about confirms */
+		opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
+
+		/* reset state for a new socket */
+		opt->dccpop_conf = 0;
 	}
-	return 0;
 
-fast_path_unknown:
-	if (!mandatory)
-		return dccp_push_empty_confirm(fn, feat, local);
+	/* XXX not doing anything about the conf queue */
+
+out:
+	return rc;
 
-fast_path_failed:
-	return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
-			 : DCCP_RESET_CODE_OPTION_ERROR;
+out_clean:
+	dccp_feat_clean(newdmsk);
+	rc = -ENOMEM;
+	goto out;
 }
 
-/**
- * dccp_feat_parse_options  -  Process Feature-Negotiation Options
- * @sk: for general use and used by the client during connection setup
- * @dreq: used by the server during connection setup
- * @mandatory: whether @opt was preceded by a Mandatory option
- * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R
- * @feat: one of %dccp_feature_numbers
- * @val: value contents of @opt
- * @len: length of @val in bytes
- * Returns 0 on success, a Reset code for ending the connection otherwise.
- */
-int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
-			    u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len)
+EXPORT_SYMBOL_GPL(dccp_feat_clone);
+
+static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
+			    u8 *val, u8 len)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
-	bool server = false;
+	int rc = -ENOMEM;
+	u8 *copy = kmemdup(val, len, GFP_KERNEL);
 
-	switch (sk->sk_state) {
-	/*
-	 *	Negotiation during connection setup
-	 */
-	case DCCP_LISTEN:
-		server = true;			/* fall through */
-	case DCCP_REQUESTING:
-		switch (opt) {
-		case DCCPO_CHANGE_L:
-		case DCCPO_CHANGE_R:
-			return dccp_feat_change_recv(fn, mandatory, opt, feat,
-						     val, len, server);
-		case DCCPO_CONFIRM_R:
-		case DCCPO_CONFIRM_L:
-			return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
-						      val, len, server);
-		}
-		break;
-	/*
-	 *	Support for exchanging NN options on an established connection
-	 *	This is currently restricted to Ack Ratio (RFC 4341, 6.1.2)
-	 */
-	case DCCP_OPEN:
-	case DCCP_PARTOPEN:
-		return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
-						       val, len);
+	if (copy != NULL) {
+		rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
+		if (rc)
+			kfree(copy);
 	}
-	return 0;	/* ignore FN options in all other states */
+	return rc;
 }
 
-/**
- * dccp_feat_init  -  Seed feature negotiation with host-specific defaults
- * This initialises global defaults, depending on the value of the sysctls.
- * These can later be overridden by registering changes via setsockopt calls.
- * The last link in the chain is finalise_settings, to make sure that between
- * here and the start of actual feature negotiation no inconsistencies enter.
- *
- * All features not appearing below use either defaults or are otherwise
- * later adjusted through dccp_feat_finalise_settings().
- */
-int dccp_feat_init(struct sock *sk)
+int dccp_feat_init(struct dccp_minisock *dmsk)
 {
-	struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
-	u8 on = 1, off = 0;
 	int rc;
-	struct {
-		u8 *val;
-		u8 len;
-	} tx, rx;
-
-	/* Non-negotiable (NN) features */
-	rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
-				    sysctl_dccp_sequence_window);
-	if (rc)
-		return rc;
 
-	/* Server-priority (SP) features */
-
-	/* Advertise that short seqnos are not supported (7.6.1) */
-	rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
-	if (rc)
-		return rc;
+	INIT_LIST_HEAD(&dmsk->dccpms_pending);
+	INIT_LIST_HEAD(&dmsk->dccpms_conf);
 
-	/* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
-	rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
+	/* CCID L */
+	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
+			      &dmsk->dccpms_tx_ccid, 1);
 	if (rc)
-		return rc;
-
-	/*
-	 * We advertise the available list of CCIDs and reorder according to
-	 * preferences, to avoid failure resulting from negotiating different
-	 * singleton values (which always leads to failure).
-	 * These settings can still (later) be overridden via sockopts.
-	 */
-	if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
-	    ccid_get_builtin_ccids(&rx.val, &rx.len))
-		return -ENOBUFS;
-
-	/* Pre-load all CCID modules that are going to be advertised */
-	rc = -EUNATCH;
-	if (ccid_request_modules(tx.val, tx.len))
-		goto free_ccid_lists;
-
-	if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
-	    !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
-		goto free_ccid_lists;
+		goto out;
 
-	rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
+	/* CCID R */
+	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
+			      &dmsk->dccpms_rx_ccid, 1);
 	if (rc)
-		goto free_ccid_lists;
+		goto out;
 
-	rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
-
-free_ccid_lists:
-	kfree(tx.val);
-	kfree(rx.val);
+	/* Ack ratio */
+	rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
+			      &dmsk->dccpms_ack_ratio, 1);
+out:
 	return rc;
 }
 
-int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_feat_entry *cur, *next;
-	int idx;
-	dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = {
-		 [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL }
-	};
-
-	list_for_each_entry(cur, fn_list, node) {
-		/*
-		 * An empty Confirm means that either an unknown feature type
-		 * or an invalid value was present. In the first case there is
-		 * nothing to activate, in the other the default value is used.
-		 */
-		if (cur->empty_confirm)
-			continue;
+EXPORT_SYMBOL_GPL(dccp_feat_init);
 
-		idx = dccp_feat_index(cur->feat_num);
-		if (idx < 0) {
-			DCCP_BUG("Unknown feature %u", cur->feat_num);
-			goto activation_failed;
-		}
-		if (cur->state != FEAT_STABLE) {
-			DCCP_CRIT("Negotiation of %s %s failed in state %s",
-				  cur->is_local ? "local" : "remote",
-				  dccp_feat_fname(cur->feat_num),
-				  dccp_feat_sname[cur->state]);
-			goto activation_failed;
-		}
-		fvals[idx][cur->is_local] = &cur->val;
+#ifdef CONFIG_IP_DCCP_DEBUG
+const char *dccp_feat_typename(const u8 type)
+{
+	switch(type) {
+	case DCCPO_CHANGE_L:  return("ChangeL");
+	case DCCPO_CONFIRM_L: return("ConfirmL");
+	case DCCPO_CHANGE_R:  return("ChangeR");
+	case DCCPO_CONFIRM_R: return("ConfirmR");
+	/* the following case must not appear in feature negotation  */
+	default:	      dccp_pr_debug("unknown type %d [BUG!]\n", type);
 	}
+	return NULL;
+}
 
-	/*
-	 * Activate in decreasing order of index, so that the CCIDs are always
-	 * activated as the last feature. This avoids the case where a CCID
-	 * relies on the initialisation of one or more features that it depends
-	 * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features).
-	 */
-	for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;)
-		if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) ||
-		    __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) {
-			DCCP_CRIT("Could not activate %d", idx);
-			goto activation_failed;
-		}
+EXPORT_SYMBOL_GPL(dccp_feat_typename);
 
-	/* Clean up Change options which have been confirmed already */
-	list_for_each_entry_safe(cur, next, fn_list, node)
-		if (!cur->needs_confirm)
-			dccp_feat_list_pop(cur);
+const char *dccp_feat_name(const u8 feat)
+{
+	static const char *feature_names[] = {
+		[DCCPF_RESERVED]	= "Reserved",
+		[DCCPF_CCID]		= "CCID",
+		[DCCPF_SHORT_SEQNOS]	= "Allow Short Seqnos",
+		[DCCPF_SEQUENCE_WINDOW]	= "Sequence Window",
+		[DCCPF_ECN_INCAPABLE]	= "ECN Incapable",
+		[DCCPF_ACK_RATIO]	= "Ack Ratio",
+		[DCCPF_SEND_ACK_VECTOR]	= "Send ACK Vector",
+		[DCCPF_SEND_NDP_COUNT]	= "Send NDP Count",
+		[DCCPF_MIN_CSUM_COVER]	= "Min. Csum Coverage",
+		[DCCPF_DATA_CHECKSUM]	= "Send Data Checksum",
+	};
+	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
+		return feature_names[DCCPF_RESERVED];
 
-	dccp_pr_debug("Activation OK\n");
-	return 0;
+	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
+		return "CCID-specific";
 
-activation_failed:
-	/*
-	 * We clean up everything that may have been allocated, since
-	 * it is difficult to track at which stage negotiation failed.
-	 * This is ok, since all allocation functions below are robust
-	 * against NULL arguments.
-	 */
-	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
-	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
-	dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
-	dp->dccps_hc_rx_ackvec = NULL;
-	return -1;
+	return feature_names[feat];
 }
+
+EXPORT_SYMBOL_GPL(dccp_feat_name);
+#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 2217066e22d..e272222c7ac 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -3,134 +3,38 @@
 /*
  *  net/dccp/feat.h
  *
- *  Feature negotiation for the DCCP protocol (RFC 4340, section 6)
- *  Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
  *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
  */
+
 #include <linux/types.h>
 #include "dccp.h"
 
-/*
- * Known limit values
- */
-/* Ack Ratio takes 2-byte integer values (11.3) */
-#define DCCPF_ACK_RATIO_MAX	0xFFFF
-/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
-#define DCCPF_SEQ_WMIN		32
-#define DCCPF_SEQ_WMAX		0x3FFFFFFFFFFFull
-/* Maximum number of SP values that fit in a single (Confirm) option */
-#define DCCP_FEAT_MAX_SP_VALS	(DCCP_SINGLE_OPT_MAXLEN - 2)
-
-enum dccp_feat_type {
-	FEAT_AT_RX   = 1,	/* located at RX side of half-connection  */
-	FEAT_AT_TX   = 2,	/* located at TX side of half-connection  */
-	FEAT_SP      = 4,	/* server-priority reconciliation (6.3.1) */
-	FEAT_NN	     = 8,	/* non-negotiable reconciliation (6.3.2)  */
-	FEAT_UNKNOWN = 0xFF	/* not understood or invalid feature	  */
-};
-
-enum dccp_feat_state {
-	FEAT_DEFAULT = 0,	/* using default values from 6.4 */
-	FEAT_INITIALISING,	/* feature is being initialised  */
-	FEAT_CHANGING,		/* Change sent but not confirmed yet */
-	FEAT_UNSTABLE,		/* local modification in state CHANGING */
-	FEAT_STABLE		/* both ends (think they) agree */
-};
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern const char *dccp_feat_typename(const u8 type);
+extern const char *dccp_feat_name(const u8 feat);
 
-/**
- * dccp_feat_val  -  Container for SP or NN feature values
- * @nn:     single NN value
- * @sp.vec: single SP value plus optional preference list
- * @sp.len: length of @sp.vec in bytes
- */
-typedef union {
-	u64 nn;
-	struct {
-		u8	*vec;
-		u8	len;
-	}   sp;
-} dccp_feat_val;
-
-/**
- * struct feat_entry  -  Data structure to perform feature negotiation
- * @feat_num: one of %dccp_feature_numbers
- * @val: feature's current value (SP features may have preference list)
- * @state: feature's current state
- * @needs_mandatory: whether Mandatory options should be sent
- * @needs_confirm: whether to send a Confirm instead of a Change
- * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
- * @is_local: feature location (1) or feature-remote (0)
- * @node: list pointers, entries arranged in FIFO order
- */
-struct dccp_feat_entry {
-	u8                      feat_num;
-	dccp_feat_val           val;
-	enum dccp_feat_state    state:8;
-	bool			needs_mandatory:1,
-				needs_confirm:1,
-				empty_confirm:1,
-				is_local:1;
-
-	struct list_head	node;
-};
-
-static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
+static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 {
-	if (entry->needs_confirm)
-		return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
-	return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
+	dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
+					   dccp_feat_name(feat), feat, val);
 }
+#else
+#define dccp_feat_debug(type, feat, val)
+#endif /* CONFIG_IP_DCCP_DEBUG */
+
+extern int  dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
+			     u8 *val, u8 len, gfp_t gfp);
+extern int  dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
+				  u8 *val, u8 len);
+extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
+				   u8 *val, u8 len);
+extern void dccp_feat_clean(struct dccp_minisock *dmsk);
+extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
+extern int  dccp_feat_init(struct dccp_minisock *dmsk);
 
-/**
- * struct ccid_dependency  -  Track changes resulting from choosing a CCID
- * @dependent_feat: one of %dccp_feature_numbers
- * @is_local: local (1) or remote (0) @dependent_feat
- * @is_mandatory: whether presence of @dependent_feat is mission-critical or not
- * @val: corresponding default value for @dependent_feat (u8 is sufficient here)
- */
-struct ccid_dependency {
-	u8	dependent_feat;
-	bool	is_local:1,
-		is_mandatory:1;
-	u8	val;
-};
-
-/*
- * Sysctls to seed defaults for feature negotiation
- */
-extern unsigned long sysctl_dccp_sequence_window;
-extern int	     sysctl_dccp_rx_ccid;
-extern int	     sysctl_dccp_tx_ccid;
-
-extern int  dccp_feat_init(struct sock *sk);
-extern void dccp_feat_initialise_sysctls(void);
-extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
-				  u8 const *list, u8 len);
-extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
-extern int  dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
-				    u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
-
-/*
- * Encoding variable-length options and their maximum length.
- *
- * This affects NN options (SP options are all u8) and other variable-length
- * options (see table 3 in RFC 4340). The limit is currently given the Sequence
- * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
- * options consume less than 6 bytes (timestamps are 4 bytes).
- * When updating this constant (e.g. due to new internet drafts / RFCs), make
- * sure that you also update all code which refers to it.
- */
-#define DCCP_OPTVAL_MAXLEN	6
-
-extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
-extern u64  dccp_decode_value_var(const u8 *bf, const u8 len);
-
-extern int  dccp_insert_option_mandatory(struct sk_buff *skb);
-extern int  dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
-			       u8 *val, u8 len, bool repeat_first);
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index df0e6714aa1..779d0ed9ae9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -159,15 +159,13 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 	dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
 }
 
-static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
+static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
+	struct dccp_sock *dp = dccp_sk(sk);
 
-	if (av == NULL)
-		return;
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-		dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
-	dccp_ackvec_input(av, skb);
+	if (dccp_msk(sk)->dccpms_send_ack_vector)
+		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
+					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
 static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
@@ -366,13 +364,22 @@ discard:
 int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			 const struct dccp_hdr *dh, const unsigned len)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
+
 	if (dccp_check_seqno(sk, skb))
 		goto discard;
 
 	if (dccp_parse_options(sk, NULL, skb))
 		return 1;
 
-	dccp_handle_ackvec_processing(sk, skb);
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_event_ack_recv(sk, skb);
+
+	if (dccp_msk(sk)->dccpms_send_ack_vector &&
+	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+			    DCCP_SKB_CB(skb)->dccpd_seq,
+			    DCCP_ACKVEC_STATE_RECEIVED))
+		goto discard;
 	dccp_deliver_input_to_ccids(sk, skb);
 
 	return __dccp_rcv_established(sk, skb, dh, len);
@@ -414,33 +421,40 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
-		/*
-		 * If option processing (Step 8) failed, return 1 here so that
-		 * dccp_v4_do_rcv() sends a Reset. The Reset code depends on
-		 * the option type and is set in dccp_parse_options().
-		 */
 		if (dccp_parse_options(sk, NULL, skb))
-			return 1;
+			goto out_invalid_packet;
 
 		/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
 		if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
 			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
 			    dp->dccps_options_received.dccpor_timestamp_echo));
 
+		if (dccp_msk(sk)->dccpms_send_ack_vector &&
+		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+				    DCCP_SKB_CB(skb)->dccpd_seq,
+				    DCCP_ACKVEC_STATE_RECEIVED))
+			goto out_invalid_packet; /* FIXME: change error code */
+
 		/* Stop the REQUEST timer */
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 		WARN_ON(sk->sk_send_head == NULL);
 		kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
 
+		dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+		dccp_update_gsr(sk, dp->dccps_isr);
 		/*
-		 * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
-		 * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
-		 * is done as part of activating the feature values below, since
-		 * these settings depend on the local/remote Sequence Window
-		 * features, which were undefined or not confirmed until now.
+		 * SWL and AWL are initially adjusted so that they are not less than
+		 * the initial Sequence Numbers received and sent, respectively:
+		 *	SWL := max(GSR + 1 - floor(W/4), ISR),
+		 *	AWL := max(GSS - W' + 1, ISS).
+		 * These adjustments MUST be applied only at the beginning of the
+		 * connection.
+		 *
+		 * AWL was adjusted in dccp_v4_connect -acme
 		 */
-		dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+		dccp_set_seqno(&dp->dccps_swl,
+			       max48(dp->dccps_swl, dp->dccps_isr));
 
 		dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 
@@ -461,15 +475,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		 */
 		dccp_set_state(sk, DCCP_PARTOPEN);
 
-		/*
-		 * If feature negotiation was successful, activate features now;
-		 * an activation failure means that this host could not activate
-		 * one ore more features (e.g. insufficient memory), which would
-		 * leave at least one feature in an undefined state.
-		 */
-		if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
-			goto unable_to_proceed;
-
 		/* Make sure socket is routed, for correct metrics. */
 		icsk->icsk_af_ops->rebuild_header(sk);
 
@@ -504,16 +509,6 @@ out_invalid_packet:
 	/* dccp_v4_do_rcv will send a reset */
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
 	return 1;
-
-unable_to_proceed:
-	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
-	/*
-	 * We mark this socket as no longer usable, so that the loop in
-	 * dccp_sendmsg() terminates and the application gets notified.
-	 */
-	dccp_set_state(sk, DCCP_CLOSED);
-	sk->sk_err = ECOMM;
-	return 1;
 }
 
 static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -595,6 +590,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
 								    skb) < 0)
 				return 1;
+
+			/* FIXME: do congestion control initialization */
 			goto discard;
 		}
 		if (dh->dccph_type == DCCP_PKT_RESET)
@@ -603,35 +600,29 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Caller (dccp_v4_do_rcv) will send Reset */
 		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
-	} else if (sk->sk_state == DCCP_CLOSED) {
-		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
-		return 1;
 	}
 
-	/* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */
-	if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb))
-		goto discard;
+	if (sk->sk_state != DCCP_REQUESTING) {
+		if (dccp_check_seqno(sk, skb))
+			goto discard;
 
-	/*
-	 *   Step 7: Check for unexpected packet types
-	 *      If (S.is_server and P.type == Response)
-	 *	    or (S.is_client and P.type == Request)
-	 *	    or (S.state == RESPOND and P.type == Data),
-	 *	  Send Sync packet acknowledging P.seqno
-	 *	  Drop packet and return
-	 */
-	if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
-	     dh->dccph_type == DCCP_PKT_RESPONSE) ||
-	    (dp->dccps_role == DCCP_ROLE_CLIENT &&
-	     dh->dccph_type == DCCP_PKT_REQUEST) ||
-	    (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
-		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
-		goto discard;
-	}
+		/*
+		 * Step 8: Process options and mark acknowledgeable
+		 */
+		if (dccp_parse_options(sk, NULL, skb))
+			return 1;
 
-	/*  Step 8: Process options */
-	if (dccp_parse_options(sk, NULL, skb))
-		return 1;
+		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+			dccp_event_ack_recv(sk, skb);
+
+		if (dccp_msk(sk)->dccpms_send_ack_vector &&
+		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+				    DCCP_SKB_CB(skb)->dccpd_seq,
+				    DCCP_ACKVEC_STATE_RECEIVED))
+			goto discard;
+
+		dccp_deliver_input_to_ccids(sk, skb);
+	}
 
 	/*
 	 *  Step 9: Process Reset
@@ -640,22 +631,44 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 *		S.state := TIMEWAIT
 	 *		Set TIMEWAIT timer
 	 *		Drop packet and return
-	 */
+	*/
 	if (dh->dccph_type == DCCP_PKT_RESET) {
 		dccp_rcv_reset(sk, skb);
 		return 0;
-	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {	/* Step 13 */
+		/*
+		 *   Step 7: Check for unexpected packet types
+		 *      If (S.is_server and P.type == Response)
+		 *	    or (S.is_client and P.type == Request)
+		 *	    or (S.state == RESPOND and P.type == Data),
+		 *	  Send Sync packet acknowledging P.seqno
+		 *	  Drop packet and return
+		 */
+	} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+		    dh->dccph_type == DCCP_PKT_RESPONSE) ||
+		    (dp->dccps_role == DCCP_ROLE_CLIENT &&
+		     dh->dccph_type == DCCP_PKT_REQUEST) ||
+		    (sk->sk_state == DCCP_RESPOND &&
+		     dh->dccph_type == DCCP_PKT_DATA)) {
+		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
+		goto discard;
+	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
 		if (dccp_rcv_closereq(sk, skb))
 			return 0;
 		goto discard;
-	} else if (dh->dccph_type == DCCP_PKT_CLOSE) {		/* Step 14 */
+	} else if (dh->dccph_type == DCCP_PKT_CLOSE) {
 		if (dccp_rcv_close(sk, skb))
 			return 0;
 		goto discard;
 	}
 
 	switch (sk->sk_state) {
+	case DCCP_CLOSED:
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+		return 1;
+
 	case DCCP_REQUESTING:
+		/* FIXME: do congestion control initialization */
+
 		queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
 		if (queued >= 0)
 			return queued;
@@ -663,12 +676,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		__kfree_skb(skb);
 		return 0;
 
-	case DCCP_PARTOPEN:
-		/* Step 8: if using Ack Vectors, mark packet acknowledgeable */
-		dccp_handle_ackvec_processing(sk, skb);
-		dccp_deliver_input_to_ccids(sk, skb);
-		/* fall through */
 	case DCCP_RESPOND:
+	case DCCP_PARTOPEN:
 		queued = dccp_rcv_respond_partopen_state_process(sk, skb,
 								 dh, len);
 		break;
@@ -707,7 +716,16 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
 	/* dccpor_elapsed_time is either zeroed out or set and > 0 */
 	delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10;
 
-	return dccp_sane_rtt(delta);
+	if (unlikely(delta <= 0)) {
+		DCCP_WARN("unusable RTT sample %ld, using min\n", delta);
+		return DCCP_SANE_RTT_MIN;
+	}
+	if (unlikely(delta > DCCP_SANE_RTT_MAX)) {
+		DCCP_WARN("RTT sample %ld too large, using max\n", delta);
+		return DCCP_SANE_RTT_MAX;
+	}
+
+	return delta;
 }
 
 EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b623f6b2548..882c5c4de69 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -545,7 +545,6 @@ out:
 
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
-	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
 	kfree(inet_rsk(req)->opt);
 }
 
@@ -596,8 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
-		goto drop_and_free;
+	dccp_reqsk_init(req, skb);
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ad6212e0043..5e1ee0da2c4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -302,7 +302,6 @@ done:
 
 static void dccp_v6_reqsk_destructor(struct request_sock *req)
 {
-	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
 	if (inet6_rsk(req)->pktopts != NULL)
 		kfree_skb(inet6_rsk(req)->pktopts);
 }
@@ -425,8 +424,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
-		goto drop_and_free;
+	dccp_reqsk_init(req, skb);
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index f4d9c8f60ed..b2804e2d1b8 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = {
 
 EXPORT_SYMBOL_GPL(dccp_death_row);
 
+void dccp_minisock_init(struct dccp_minisock *dmsk)
+{
+	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
+	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
+	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
+	dmsk->dccpms_ack_ratio	     = sysctl_dccp_feat_ack_ratio;
+	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
+	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
+}
+
 void dccp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
@@ -102,9 +112,10 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 	struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
 
 	if (newsk != NULL) {
-		struct dccp_request_sock *dreq = dccp_rsk(req);
+		const struct dccp_request_sock *dreq = dccp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
+		struct dccp_minisock *newdmsk = dccp_msk(newsk);
 
 		newdp->dccps_role	    = DCCP_ROLE_SERVER;
 		newdp->dccps_hc_rx_ackvec   = NULL;
@@ -114,32 +125,65 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
 		newicsk->icsk_rto	    = DCCP_TIMEOUT_INIT;
 
-		INIT_LIST_HEAD(&newdp->dccps_featneg);
+		if (dccp_feat_clone(sk, newsk))
+			goto out_free;
+
+		if (newdmsk->dccpms_send_ack_vector) {
+			newdp->dccps_hc_rx_ackvec =
+						dccp_ackvec_alloc(GFP_ATOMIC);
+			if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
+				goto out_free;
+		}
+
+		newdp->dccps_hc_rx_ccid =
+			    ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
+					   newsk, GFP_ATOMIC);
+		newdp->dccps_hc_tx_ccid =
+			    ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
+					   newsk, GFP_ATOMIC);
+		if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
+			     newdp->dccps_hc_tx_ccid == NULL)) {
+			dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
+			ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
+			ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
+out_free:
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			sk_free(newsk);
+			return NULL;
+		}
+
 		/*
 		 * Step 3: Process LISTEN state
 		 *
 		 *    Choose S.ISS (initial seqno) or set from Init Cookies
 		 *    Initialize S.GAR := S.ISS
-		 *    Set S.ISR, S.GSR from packet (or Init Cookies)
-		 *
-		 *    Setting AWL/AWH and SWL/SWH happens as part of the feature
-		 *    activation below, as these windows all depend on the local
-		 *    and remote Sequence Window feature values (7.5.2).
+		 *    Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
 		 */
-		newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
-		newdp->dccps_gar = newdp->dccps_iss;
-		newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
+
+		/* See dccp_v4_conn_request */
+		newdmsk->dccpms_sequence_window = req->rcv_wnd;
+
+		newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
+		dccp_update_gss(newsk, dreq->dreq_iss);
+
+		newdp->dccps_isr = dreq->dreq_isr;
+		dccp_update_gsr(newsk, dreq->dreq_isr);
 
 		/*
-		 * Activate features: initialise CCIDs, sequence windows etc.
+		 * SWL and AWL are initially adjusted so that they are not less than
+		 * the initial Sequence Numbers received and sent, respectively:
+		 *	SWL := max(GSR + 1 - floor(W/4), ISR),
+		 *	AWL := max(GSS - W' + 1, ISS).
+		 * These adjustments MUST be applied only at the beginning of the
+		 * connection.
 		 */
-		if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			sk_free(newsk);
-			return NULL;
-		}
+		dccp_set_seqno(&newdp->dccps_swl,
+			       max48(newdp->dccps_swl, newdp->dccps_isr));
+		dccp_set_seqno(&newdp->dccps_awl,
+			       max48(newdp->dccps_awl, newdp->dccps_iss));
+
 		dccp_init_xmit_timers(newsk);
 
 		DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
@@ -260,17 +304,14 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
 
-int dccp_reqsk_init(struct request_sock *req,
-		    struct dccp_sock const *dp, struct sk_buff const *skb)
+void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
 	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
 	inet_rsk(req)->acked	  = 0;
+	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
-
-	/* inherit feature negotiation options from listening socket */
-	return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
 }
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index e5a32979d7d..0809b63cb05 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -23,20 +23,23 @@
 #include "dccp.h"
 #include "feat.h"
 
-u64 dccp_decode_value_var(const u8 *bf, const u8 len)
+int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
+int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
+int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
+int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
+int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
+
+static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
 {
-	u64 value = 0;
+	u32 value = 0;
 
-	if (len >= DCCP_OPTVAL_MAXLEN)
-		value += ((u64)*bf++) << 40;
-	if (len > 4)
-		value += ((u64)*bf++) << 32;
 	if (len > 3)
-		value += ((u64)*bf++) << 24;
+		value += *bf++ << 24;
 	if (len > 2)
-		value += ((u64)*bf++) << 16;
+		value += *bf++ << 16;
 	if (len > 1)
-		value += ((u64)*bf++) << 8;
+		value += *bf++ << 8;
 	if (len > 0)
 		value += *bf;
 
@@ -54,6 +57,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 	struct dccp_sock *dp = dccp_sk(sk);
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 	const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+	u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
 	unsigned char *opt_ptr = options;
 	const unsigned char *opt_end = (unsigned char *)dh +
@@ -95,11 +99,18 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		}
 
 		/*
+		 * CCID-Specific Options (from RFC 4340, sec. 10.3):
+		 *
+		 * Option numbers 128 through 191 are for options sent from the
+		 * HC-Sender to the HC-Receiver; option numbers 192 through 255
+		 * are for options sent from the HC-Receiver to	the HC-Sender.
+		 *
 		 * CCID-specific options are ignored during connection setup, as
 		 * negotiation may still be in progress (see RFC 4340, 10.3).
 		 * The same applies to Ack Vectors, as these depend on the CCID.
+		 *
 		 */
-		if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
+		if (dreq != NULL && (opt >= 128 ||
 		    opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
 			goto ignore_option;
 
@@ -120,13 +131,43 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
 				      (unsigned long long)opt_recv->dccpor_ndp);
 			break;
-		case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
-			if (pkt_type == DCCP_PKT_DATA)      /* RFC 4340, 6 */
+		case DCCPO_CHANGE_L:
+			/* fall through */
+		case DCCPO_CHANGE_R:
+			if (pkt_type == DCCP_PKT_DATA)
 				break;
-			rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
-						    *value, value + 1, len - 1);
-			if (rc)
-				goto out_featneg_failed;
+			if (len < 2)
+				goto out_invalid_option;
+			rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
+						   len - 1);
+			/*
+			 * When there is a change error, change_recv is
+			 * responsible for dealing with it.  i.e. reply with an
+			 * empty confirm.
+			 * If the change was mandatory, then we need to die.
+			 */
+			if (rc && mandatory)
+				goto out_invalid_option;
+			break;
+		case DCCPO_CONFIRM_L:
+			/* fall through */
+		case DCCPO_CONFIRM_R:
+			if (pkt_type == DCCP_PKT_DATA)
+				break;
+			if (len < 2)	/* FIXME this disallows empty confirm */
+				goto out_invalid_option;
+			if (dccp_feat_confirm_recv(sk, opt, *value,
+						   value + 1, len - 1))
+				goto out_invalid_option;
+			break;
+		case DCCPO_ACK_VECTOR_0:
+		case DCCPO_ACK_VECTOR_1:
+			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
+				break;
+
+			if (dccp_msk(sk)->dccpms_send_ack_vector &&
+			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
+				goto out_invalid_option;
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
@@ -154,8 +195,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 				      dccp_role(sk), ntohl(opt_val),
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
-			/* schedule an Ack in case this sender is quiescent */
-			inet_csk_schedule_ack(sk);
 			break;
 		case DCCPO_TIMESTAMP_ECHO:
 			if (len != 4 && len != 6 && len != 8)
@@ -212,25 +251,23 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
 				      dccp_role(sk), elapsed_time);
 			break;
-		case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
+		case 128 ... 191: {
+			const u16 idx = value - options;
+
 			if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
-						     pkt_type, opt, value, len))
+						     opt, len, idx,
+						     value) != 0)
 				goto out_invalid_option;
+		}
 			break;
-		case DCCPO_ACK_VECTOR_0:
-		case DCCPO_ACK_VECTOR_1:
-			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
-				break;
-			/*
-			 * Ack vectors are processed by the TX CCID if it is
-			 * interested. The RX CCID need not parse Ack Vectors,
-			 * since it is only interested in clearing old state.
-			 * Fall through.
-			 */
-		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
+		case 192 ... 255: {
+			const u16 idx = value - options;
+
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
-						     pkt_type, opt, value, len))
+						     opt, len, idx,
+						     value) != 0)
 				goto out_invalid_option;
+		}
 			break;
 		default:
 			DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
@@ -252,10 +289,8 @@ out_nonsensical_length:
 
 out_invalid_option:
 	DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
-	rc = DCCP_RESET_CODE_OPTION_ERROR;
-out_featneg_failed:
-	DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
-	DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
+	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
+	DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
 	DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
 	DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
 	DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
@@ -264,12 +299,9 @@ out_featneg_failed:
 
 EXPORT_SYMBOL_GPL(dccp_parse_options);
 
-void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
+static void dccp_encode_value_var(const u32 value, unsigned char *to,
+				  const unsigned int len)
 {
-	if (len >= DCCP_OPTVAL_MAXLEN)
-		*to++ = (value & 0xFF0000000000ull) >> 40;
-	if (len > 4)
-		*to++ = (value & 0xFF00000000ull) >> 32;
 	if (len > 3)
 		*to++ = (value & 0xFF000000) >> 24;
 	if (len > 2)
@@ -429,140 +461,92 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
 	return 0;
 }
 
-static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
+				u8 *val, u8 len)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
-	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-	const u16 buflen = dccp_ackvec_buflen(av);
-	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = buflen + 2 * nr_opts;
-	u8 i, nonce = 0;
-	const unsigned char *tail, *from;
-	unsigned char *to;
+	u8 *to;
 
-	if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
-		DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
-			  dccp_packet_name(dcb->dccpd_type));
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
+		DCCP_WARN("packet too small for feature %d option!\n", feat);
 		return -1;
 	}
-	/*
-	 * Since Ack Vectors are variable-length, we can not always predict
-	 * their size. To catch exception cases where the space is running out
-	 * on the skb, a separate Sync is scheduled to carry the Ack Vector.
-	 */
-	if (len > DCCPAV_MIN_OPTLEN &&
-	    len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
-		DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
-			  "MPS=%u ==> reduce payload size?\n", len, skb->len,
-			  dcb->dccpd_opt_len, dp->dccps_mss_cache);
-		dp->dccps_sync_scheduled = 1;
-		return 0;
-	}
-	dcb->dccpd_opt_len += len;
 
-	to   = skb_push(skb, len);
-	len  = buflen;
-	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
 
-	for (i = 0; i < nr_opts; ++i) {
-		int copylen = len;
-
-		if (len > DCCP_SINGLE_OPT_MAXLEN)
-			copylen = DCCP_SINGLE_OPT_MAXLEN;
-
-		/*
-		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
-		 * its type; ack_nonce is the sum of all individual buf_nonce's.
-		 */
-		nonce ^= av->av_buf_nonce[i];
-
-		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
-		*to++ = copylen + 2;
-
-		/* Check if buf_head wraps */
-		if (from + copylen > tail) {
-			const u16 tailsize = tail - from;
-
-			memcpy(to, from, tailsize);
-			to	+= tailsize;
-			len	-= tailsize;
-			copylen	-= tailsize;
-			from	= av->av_buf;
-		}
-
-		memcpy(to, from, copylen);
-		from += copylen;
-		to   += copylen;
-		len  -= copylen;
-	}
-	/*
-	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
-	 */
-	if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
-		return -ENOBUFS;
-	return 0;
-}
+	to    = skb_push(skb, len + 3);
+	*to++ = type;
+	*to++ = len + 3;
+	*to++ = feat;
 
-/**
- * dccp_insert_option_mandatory  -  Mandatory option (5.8.2)
- * Note that since we are using skb_push, this function needs to be called
- * _after_ inserting the option it is supposed to influence (stack order).
- */
-int dccp_insert_option_mandatory(struct sk_buff *skb)
-{
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
-		return -1;
+	if (len)
+		memcpy(to, val, len);
 
-	DCCP_SKB_CB(skb)->dccpd_opt_len++;
-	*skb_push(skb, 1) = DCCPO_MANDATORY;
+	dccp_pr_debug("%s(%s (%d), ...), length %d\n",
+		      dccp_feat_typename(type),
+		      dccp_feat_name(feat), feat, len);
 	return 0;
 }
 
-/**
- * dccp_insert_fn_opt  -  Insert single Feature-Negotiation option into @skb
- * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
- * @feat: one out of %dccp_feature_numbers
- * @val: NN value or SP array (preferred element first) to copy
- * @len: true length of @val in bytes (excluding first element repetition)
- * @repeat_first: whether to copy the first element of @val twice
- * The last argument is used to construct Confirm options, where the preferred
- * value and the preference list appear separately (RFC 4340, 6.3.1). Preference
- * lists are kept such that the preferred entry is always first, so we only need
- * to copy twice, and avoid the overhead of cloning into a bigger array.
- */
-int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
-		       u8 *val, u8 len, bool repeat_first)
+static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
 {
-	u8 tot_len, *to;
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
+	struct dccp_opt_pend *opt, *next;
+	int change = 0;
+
+	/* confirm any options [NN opts] */
+	list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
+		dccp_insert_feat_opt(skb, opt->dccpop_type,
+				     opt->dccpop_feat, opt->dccpop_val,
+				     opt->dccpop_len);
+		/* fear empty confirms */
+		if (opt->dccpop_val)
+			kfree(opt->dccpop_val);
+		kfree(opt);
+	}
+	INIT_LIST_HEAD(&dmsk->dccpms_conf);
+
+	/* see which features we need to send */
+	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+		/* see if we need to send any confirm */
+		if (opt->dccpop_sc) {
+			dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
+					     opt->dccpop_feat,
+					     opt->dccpop_sc->dccpoc_val,
+					     opt->dccpop_sc->dccpoc_len);
+
+			BUG_ON(!opt->dccpop_sc->dccpoc_val);
+			kfree(opt->dccpop_sc->dccpoc_val);
+			kfree(opt->dccpop_sc);
+			opt->dccpop_sc = NULL;
+		}
 
-	/* take the `Feature' field and possible repetition into account */
-	if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
-		DCCP_WARN("length %u for feature %u too large\n", len, feat);
-		return -1;
+		/* any option not confirmed, re-send it */
+		if (!opt->dccpop_conf) {
+			dccp_insert_feat_opt(skb, opt->dccpop_type,
+					     opt->dccpop_feat, opt->dccpop_val,
+					     opt->dccpop_len);
+			change++;
+		}
 	}
 
-	if (unlikely(val == NULL || len == 0))
-		len = repeat_first = 0;
-	tot_len = 3 + repeat_first + len;
+	/* Retransmit timer.
+	 * If this is the master listening sock, we don't set a timer on it.  It
+	 * should be fine because if the dude doesn't receive our RESPONSE
+	 * [which will contain the CHANGE] he will send another REQUEST which
+	 * will "retrnasmit" the change.
+	 */
+	if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
+		dccp_pr_debug("reset feat negotiation timer %p\n", sk);
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
-		DCCP_WARN("packet too small for feature %d option!\n", feat);
-		return -1;
+		/* XXX don't reset the timer on re-transmissions.  I.e. reset it
+		 * only when sending new stuff i guess.  Currently the timer
+		 * never backs off because on re-transmission it just resets it!
+		 */
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
 	}
-	DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
-
-	to    = skb_push(skb, tot_len);
-	*to++ = type;
-	*to++ = tot_len;
-	*to++ = feat;
 
-	if (repeat_first)
-		*to++ = *val;
-	if (len)
-		memcpy(to, val, len);
 	return 0;
 }
 
@@ -581,30 +565,19 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
 int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
-	if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
+	if (dmsk->dccpms_send_ndp_count &&
+	    dccp_insert_option_ndp(sk, skb))
 		return -1;
 
-	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
-
-		/* Feature Negotiation */
-		if (dccp_feat_insert_opts(dp, NULL, skb))
+	if (!dccp_packet_without_ack(skb)) {
+		if (dmsk->dccpms_send_ack_vector &&
+		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+		    dccp_insert_option_ackvec(sk, skb))
 			return -1;
-
-		if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
-			/*
-			 * Obtain RTT sample from Request/Response exchange.
-			 * This is currently used in CCID 3 initialisation.
-			 */
-			if (dccp_insert_option_timestamp(sk, skb))
-				return -1;
-
-		} else if (dccp_ackvec_pending(sk) &&
-			   dccp_insert_option_ackvec(sk, skb)) {
-				return -1;
-		}
 	}
 
 	if (dp->dccps_hc_rx_insert_options) {
@@ -613,6 +586,21 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		dp->dccps_hc_rx_insert_options = 0;
 	}
 
+	/* Feature negotiation */
+	/* Data packets can't do feat negotiation */
+	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
+	    DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
+	    dccp_insert_options_feat(sk, skb))
+		return -1;
+
+	/*
+	 * Obtain RTT sample from Request/Response exchange.
+	 * This is currently used in CCID 3 initialisation.
+	 */
+	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
+	    dccp_insert_option_timestamp(sk, skb))
+		return -1;
+
 	if (dp->dccps_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(dp, NULL, skb))
 		return -1;
@@ -625,9 +613,6 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
 {
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
-	if (dccp_feat_insert_opts(NULL, dreq, skb))
-		return -1;
-
 	if (dreq->dreq_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(NULL, dreq, skb))
 		return -1;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2532797a800..d06945c7d3d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -26,13 +26,11 @@ static inline void dccp_event_ack_sent(struct sock *sk)
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
 
-/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
-static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
+static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
 {
 	skb_set_owner_w(skb, sk);
 	WARN_ON(sk->sk_send_head);
 	sk->sk_send_head = skb;
-	return skb_clone(sk->sk_send_head, gfp_any());
 }
 
 /*
@@ -163,27 +161,21 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	u32 ccmps = dccp_determine_ccmps(dp);
-	u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
+	int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
 
 	/* Account for header lengths and IPv4/v6 option overhead */
 	cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
 		    sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
 
 	/*
-	 * Leave enough headroom for common DCCP header options.
-	 * This only considers options which may appear on DCCP-Data packets, as
-	 * per table 3 in RFC 4340, 5.8. When running out of space for other
-	 * options (eg. Ack Vector which can take up to 255 bytes), it is better
-	 * to schedule a separate Ack. Thus we leave headroom for the following:
-	 *  - 1 byte for Slow Receiver (11.6)
-	 *  - 6 bytes for Timestamp (13.1)
-	 *  - 10 bytes for Timestamp Echo (13.3)
-	 *  - 8 bytes for NDP count (7.7, when activated)
-	 *  - 6 bytes for Data Checksum (9.3)
-	 *  - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
+	 * FIXME: this should come from the CCID infrastructure, where, say,
+	 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
+	 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
+	 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
+	 * make it a multiple of 4
 	 */
-	cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
-			   (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
+
+	cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
 	/* And store cached results */
 	icsk->icsk_pmtu_cookie = pmtu;
@@ -208,158 +200,95 @@ void dccp_write_space(struct sock *sk)
 }
 
 /**
- * dccp_wait_for_ccid  -  Await CCID send permission
+ * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
  * @sk:    socket to wait for
- * @delay: timeout in jiffies
- * This is used by CCIDs which need to delay the send time in process context.
+ * @skb:   current skb to pass on for waiting
+ * @delay: sleep timeout in milliseconds (> 0)
+ * This function is called by default when the socket is closed, and
+ * when a non-zero linger time is set on the socket. For consistency
  */
-static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
 	DEFINE_WAIT(wait);
-	long remaining;
-
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-	sk->sk_write_pending++;
-	release_sock(sk);
+	unsigned long jiffdelay;
+	int rc;
 
-	remaining = schedule_timeout(delay);
-
-	lock_sock(sk);
-	sk->sk_write_pending--;
-	finish_wait(sk->sk_sleep, &wait);
+	do {
+		dccp_pr_debug("delayed send by %d msec\n", delay);
+		jiffdelay = msecs_to_jiffies(delay);
 
-	if (signal_pending(current) || sk->sk_err)
-		return -1;
-	return remaining;
-}
-
-/**
- * dccp_xmit_packet  -  Send data packet under control of CCID
- * Transmits next-queued payload and informs CCID to account for the packet.
- */
-static void dccp_xmit_packet(struct sock *sk)
-{
-	int err, len;
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct sk_buff *skb = dccp_qpolicy_pop(sk);
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
-	if (unlikely(skb == NULL))
-		return;
-	len = skb->len;
+		sk->sk_write_pending++;
+		release_sock(sk);
+		schedule_timeout(jiffdelay);
+		lock_sock(sk);
+		sk->sk_write_pending--;
 
-	if (sk->sk_state == DCCP_PARTOPEN) {
-		const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
-		/*
-		 * See 8.1.5 - Handshake Completion.
-		 *
-		 * For robustness we resend Confirm options until the client has
-		 * entered OPEN. During the initial feature negotiation, the MPS
-		 * is smaller than usual, reduced by the Change/Confirm options.
-		 */
-		if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
-			DCCP_WARN("Payload too large (%d) for featneg.\n", len);
-			dccp_send_ack(sk);
-			dccp_feat_list_purge(&dp->dccps_featneg);
-		}
+		if (sk->sk_err)
+			goto do_error;
+		if (signal_pending(current))
+			goto do_interrupted;
 
-		inet_csk_schedule_ack(sk);
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-					      inet_csk(sk)->icsk_rto,
-					      DCCP_RTO_MAX);
-		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
-	} else if (dccp_ack_pending(sk)) {
-		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
-	} else {
-		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
-	}
-
-	err = dccp_transmit_skb(sk, skb);
-	if (err)
-		dccp_pr_debug("transmit_skb() returned err=%d\n", err);
-	/*
-	 * Register this one as sent even if an error occurred. To the remote
-	 * end a local packet drop is indistinguishable from network loss, i.e.
-	 * any local drop will eventually be reported via receiver feedback.
-	 */
-	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
-
-	/*
-	 * If the CCID needs to transfer additional header options out-of-band
-	 * (e.g. Ack Vectors or feature-negotiation options), it activates this
-	 * flag to schedule a Sync. The Sync will automatically incorporate all
-	 * currently pending header options, thus clearing the backlog.
-	 */
-	if (dp->dccps_sync_scheduled)
-		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+	} while ((delay = rc) > 0);
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return rc;
+
+do_error:
+	rc = -EPIPE;
+	goto out;
+do_interrupted:
+	rc = -EINTR;
+	goto out;
 }
 
-/**
- * dccp_flush_write_queue  -  Drain queue at end of connection
- * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
- * happen that the TX queue is not empty at the end of a connection. We give the
- * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
- * returns with a non-empty write queue, it will be purged later.
- */
-void dccp_flush_write_queue(struct sock *sk, long *time_budget)
+void dccp_write_xmit(struct sock *sk, int block)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
-	long delay, rc;
-
-	while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
-		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
-		switch (ccid_packet_dequeue_eval(rc)) {
-		case CCID_PACKET_WILL_DEQUEUE_LATER:
-			/*
-			 * If the CCID determines when to send, the next sending
-			 * time is unknown or the CCID may not even send again
-			 * (e.g. remote host crashes or lost Ack packets).
-			 */
-			DCCP_WARN("CCID did not manage to send all packets\n");
-			return;
-		case CCID_PACKET_DELAY:
-			delay = msecs_to_jiffies(rc);
-			if (delay > *time_budget)
-				return;
-			rc = dccp_wait_for_ccid(sk, delay);
-			if (rc < 0)
-				return;
-			*time_budget -= (delay - rc);
-			/* check again if we can send now */
-			break;
-		case CCID_PACKET_SEND_AT_ONCE:
-			dccp_xmit_packet(sk);
-			break;
-		case CCID_PACKET_ERR:
-			skb_dequeue(&sk->sk_write_queue);
-			kfree_skb(skb);
-			dccp_pr_debug("packet discarded due to err=%ld\n", rc);
+	while ((skb = skb_peek(&sk->sk_write_queue))) {
+		int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+
+		if (err > 0) {
+			if (!block) {
+				sk_reset_timer(sk, &dp->dccps_xmit_timer,
+						msecs_to_jiffies(err)+jiffies);
+				break;
+			} else
+				err = dccp_wait_for_ccid(sk, skb, err);
+			if (err && err != -EINTR)
+				DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
 		}
-	}
-}
 
-void dccp_write_xmit(struct sock *sk)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct sk_buff *skb;
+		skb_dequeue(&sk->sk_write_queue);
+		if (err == 0) {
+			struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+			const int len = skb->len;
 
-	while ((skb = dccp_qpolicy_top(sk))) {
-		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-
-		switch (ccid_packet_dequeue_eval(rc)) {
-		case CCID_PACKET_WILL_DEQUEUE_LATER:
-			return;
-		case CCID_PACKET_DELAY:
-			sk_reset_timer(sk, &dp->dccps_xmit_timer,
-				       jiffies + msecs_to_jiffies(rc));
-			return;
-		case CCID_PACKET_SEND_AT_ONCE:
-			dccp_xmit_packet(sk);
-			break;
-		case CCID_PACKET_ERR:
-			dccp_qpolicy_drop(sk, skb);
-			dccp_pr_debug("packet discarded due to err=%d\n", rc);
+			if (sk->sk_state == DCCP_PARTOPEN) {
+				/* See 8.1.5.  Handshake Completion */
+				inet_csk_schedule_ack(sk);
+				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+						  inet_csk(sk)->icsk_rto,
+						  DCCP_RTO_MAX);
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			} else if (dccp_ack_pending(sk))
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			else
+				dcb->dccpd_type = DCCP_PKT_DATA;
+
+			err = dccp_transmit_skb(sk, skb);
+			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+			if (err)
+				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
+					 err);
+		} else {
+			dccp_pr_debug("packet discarded due to err=%d\n", err);
+			kfree_skb(skb);
 		}
 	}
 }
@@ -410,12 +339,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
 	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
 
-	/* Resolve feature dependencies resulting from choice of CCID */
-	if (dccp_feat_server_ccid_dependencies(dreq))
-		goto response_failed;
-
-	if (dccp_insert_options_rsk(dreq, skb))
-		goto response_failed;
+	if (dccp_insert_options_rsk(dreq, skb)) {
+		kfree_skb(skb);
+		return NULL;
+	}
 
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -436,9 +363,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	inet_rsk(req)->acked = 1;
 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 	return skb;
-response_failed:
-	kfree_skb(skb);
-	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(dccp_make_response);
@@ -523,9 +447,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
 /*
  * Do all connect socket setups that can be done AF independent.
  */
-int dccp_connect(struct sock *sk)
+static inline void dccp_connect_init(struct sock *sk)
 {
-	struct sk_buff *skb;
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -535,13 +458,19 @@ int dccp_connect(struct sock *sk)
 
 	dccp_sync_mss(sk, dst_mtu(dst));
 
-	/* do not connect if feature negotiation setup fails */
-	if (dccp_feat_finalise_settings(dccp_sk(sk)))
-		return -EPROTO;
-
 	/* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
 	dp->dccps_gar = dp->dccps_iss;
 
+	icsk->icsk_retransmits = 0;
+}
+
+int dccp_connect(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	dccp_connect_init(sk);
+
 	skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
 	if (unlikely(skb == NULL))
 		return -ENOBUFS;
@@ -551,11 +480,11 @@ int dccp_connect(struct sock *sk)
 
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
 
-	dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
+	dccp_skb_entail(sk, skb);
+	dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
 	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the REQUEST until an answer. */
-	icsk->icsk_retransmits = 0;
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 				  icsk->icsk_rto, DCCP_RTO_MAX);
 	return 0;
@@ -642,12 +571,6 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
 	DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
 
-	/*
-	 * Clear the flag in case the Sync was scheduled for out-of-band data,
-	 * such as carrying a long Ack Vector.
-	 */
-	dccp_sk(sk)->dccps_sync_scheduled = 0;
-
 	dccp_transmit_skb(sk, skb);
 }
 
@@ -676,7 +599,9 @@ void dccp_send_close(struct sock *sk, const int active)
 		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
 
 	if (active) {
-		skb = dccp_skb_entail(sk, skb);
+		dccp_write_xmit(sk, 1);
+		dccp_skb_entail(sk, skb);
+		dccp_transmit_skb(sk, skb_clone(skb, prio));
 		/*
 		 * Retransmission timer for active-close: RFC 4340, 8.3 requires
 		 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
@@ -689,6 +614,6 @@ void dccp_send_close(struct sock *sk, const int active)
 		 */
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
-	}
-	dccp_transmit_skb(sk, skb);
+	} else
+		dccp_transmit_skb(sk, skb);
 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index eaa59d82ab0..81368a7f537 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -46,54 +46,75 @@ static struct {
 	struct kfifo	  *fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
-	ktime_t		  start;
+	struct timespec	  tstart;
 } dccpw;
 
-static void jdccp_write_xmit(struct sock *sk)
+static void printl(const char *fmt, ...)
 {
-	const struct inet_sock *inet = inet_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = NULL;
-	struct timespec tv;
-	char buf[256];
-	int len, ccid = ccid_get_current_tx_ccid(dccp_sk(sk));
+	va_list args;
+	int len;
+	struct timespec now;
+	char tbuf[256];
 
-	if (ccid == DCCPC_CCID3)
-		hctx = ccid3_hc_tx_sk(sk);
+	va_start(args, fmt);
+	getnstimeofday(&now);
 
-	if (!port || ntohs(inet->dport) == port || ntohs(inet->sport) == port) {
+	now = timespec_sub(now, dccpw.tstart);
 
-		tv  = ktime_to_timespec(ktime_sub(ktime_get(), dccpw.start));
-		len = sprintf(buf, "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u %d",
-			       (unsigned long)tv.tv_sec,
-			       (unsigned long)tv.tv_nsec,
-			       NIPQUAD(inet->saddr), ntohs(inet->sport),
-			       NIPQUAD(inet->daddr), ntohs(inet->dport), ccid);
+	len = sprintf(tbuf, "%lu.%06lu ",
+		      (unsigned long) now.tv_sec,
+		      (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
+	va_end(args);
 
+	kfifo_put(dccpw.fifo, tbuf, len);
+	wake_up(&dccpw.wait);
+}
+
+static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t size)
+{
+	const struct dccp_minisock *dmsk = dccp_msk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ccid3_hc_tx_sock *hctx;
+
+	if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+		hctx = ccid3_hc_tx_sk(sk);
+	else
+		hctx = NULL;
+
+	if (port == 0 || ntohs(inet->dport) == port ||
+	    ntohs(inet->sport) == port) {
 		if (hctx)
-			len += sprintf(buf + len, " %d %d %d %u %u %u %d",
-			       hctx->s, hctx->rtt, hctx->p, hctx->x_calc,
-			       (unsigned)(hctx->x_recv >> 6),
-			       (unsigned)(hctx->x >> 6), hctx->t_ipi);
-
-		len += sprintf(buf + len, "\n");
-		kfifo_put(dccpw.fifo, buf, len);
-		wake_up(&dccpw.wait);
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+			       "%llu %llu %d\n",
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+			       hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+			       hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
+			       hctx->ccid3hctx_x_recv >> 6,
+			       hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
+		else
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size);
 	}
 
 	jprobe_return();
+	return 0;
 }
 
 static struct jprobe dccp_send_probe = {
 	.kp	= {
-		.symbol_name = "dccp_write_xmit",
+		.symbol_name = "dccp_sendmsg",
 	},
-	.entry	= jdccp_write_xmit,
+	.entry	= jdccp_sendmsg,
 };
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
 	kfifo_reset(dccpw.fifo);
-	dccpw.start = ktime_get();
+	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ecf3be961e1..d0bd3481976 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -67,9 +67,6 @@ void dccp_set_state(struct sock *sk, const int state)
 	case DCCP_OPEN:
 		if (oldstate != DCCP_OPEN)
 			DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
-		/* Client retransmits all Confirm options until entering OPEN */
-		if (oldstate == DCCP_PARTOPEN)
-			dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
 		break;
 
 	case DCCP_CLOSED:
@@ -178,25 +175,63 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	dccp_minisock_init(&dp->dccps_minisock);
+
 	icsk->icsk_rto		= DCCP_TIMEOUT_INIT;
 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
 	sk->sk_state		= DCCP_CLOSED;
 	sk->sk_write_space	= dccp_write_space;
 	icsk->icsk_sync_mss	= dccp_sync_mss;
-	dp->dccps_mss_cache	= TCP_MIN_RCVMSS;
+	dp->dccps_mss_cache	= 536;
 	dp->dccps_rate_last	= jiffies;
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
-	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
+	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
 
 	dccp_init_xmit_timers(sk);
 
-	INIT_LIST_HEAD(&dp->dccps_featneg);
-	/* control socket doesn't need feat nego */
-	if (likely(ctl_sock_initialized))
-		return dccp_feat_init(sk);
+	/*
+	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
+	 * the listening (master) sock get CCID control blocks, which is not
+	 * necessary, but for now, to not mess with the test userspace apps,
+	 * lets leave it here, later the real solution is to do this in a
+	 * setsockopt(CCIDs-I-want/accept). -acme
+	 */
+	if (likely(ctl_sock_initialized)) {
+		int rc = dccp_feat_init(dmsk);
+
+		if (rc)
+			return rc;
+
+		if (dmsk->dccpms_send_ack_vector) {
+			dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
+			if (dp->dccps_hc_rx_ackvec == NULL)
+				return -ENOMEM;
+		}
+		dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
+						      sk, GFP_KERNEL);
+		dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
+						      sk, GFP_KERNEL);
+		if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
+			     dp->dccps_hc_tx_ccid == NULL)) {
+			ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+			ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+			if (dmsk->dccpms_send_ack_vector) {
+				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+				dp->dccps_hc_rx_ackvec = NULL;
+			}
+			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+			return -ENOMEM;
+		}
+	} else {
+		/* control socket doesn't need feat nego */
+		INIT_LIST_HEAD(&dmsk->dccpms_pending);
+		INIT_LIST_HEAD(&dmsk->dccpms_conf);
+	}
+
 	return 0;
 }
 
@@ -205,6 +240,7 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
 void dccp_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	/*
 	 * DCCP doesn't use sk_write_queue, just sk_send_head
@@ -222,7 +258,7 @@ void dccp_destroy_sock(struct sock *sk)
 	kfree(dp->dccps_service_list);
 	dp->dccps_service_list = NULL;
 
-	if (dp->dccps_hc_rx_ackvec != NULL) {
+	if (dmsk->dccpms_send_ack_vector) {
 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 		dp->dccps_hc_rx_ackvec = NULL;
 	}
@@ -231,7 +267,7 @@ void dccp_destroy_sock(struct sock *sk)
 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 
 	/* clean up feature negotiation state */
-	dccp_feat_list_purge(&dp->dccps_featneg);
+	dccp_feat_clean(dmsk);
 }
 
 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
@@ -241,9 +277,6 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	dp->dccps_role = DCCP_ROLE_LISTEN;
-	/* do not start to listen if feature negotiation setup fails */
-	if (dccp_feat_finalise_settings(dp))
-		return -EPROTO;
 	return inet_csk_listen_start(sk, backlog);
 }
 
@@ -433,70 +466,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 	return 0;
 }
 
-static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
+/* byte 1 is feature.  the rest is the preference list */
+static int dccp_setsockopt_change(struct sock *sk, int type,
+				  struct dccp_so_feat __user *optval)
 {
-	u8 *list, len;
-	int i, rc;
+	struct dccp_so_feat opt;
+	u8 *val;
+	int rc;
 
-	if (cscov < 0 || cscov > 15)
-		return -EINVAL;
+	if (copy_from_user(&opt, optval, sizeof(opt)))
+		return -EFAULT;
 	/*
-	 * Populate a list of permissible values, in the range cscov...15. This
-	 * is necessary since feature negotiation of single values only works if
-	 * both sides incidentally choose the same value. Since the list starts
-	 * lowest-value first, negotiation will pick the smallest shared value.
+	 * rfc4340: 6.1. Change Options
 	 */
-	if (cscov == 0)
-		return 0;
-	len = 16 - cscov;
-
-	list = kmalloc(len, GFP_KERNEL);
-	if (list == NULL)
-		return -ENOBUFS;
-
-	for (i = 0; i < len; i++)
-		list[i] = cscov++;
-
-	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
-
-	if (rc == 0) {
-		if (rx)
-			dccp_sk(sk)->dccps_pcrlen = cscov;
-		else
-			dccp_sk(sk)->dccps_pcslen = cscov;
-	}
-	kfree(list);
-	return rc;
-}
-
-static int dccp_setsockopt_ccid(struct sock *sk, int type,
-				char __user *optval, int optlen)
-{
-	u8 *val;
-	int rc = 0;
-
-	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
+	if (opt.dccpsf_len < 1)
 		return -EINVAL;
 
-	val = kmalloc(optlen, GFP_KERNEL);
-	if (val == NULL)
+	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
+	if (!val)
 		return -ENOMEM;
 
-	if (copy_from_user(val, optval, optlen)) {
-		kfree(val);
-		return -EFAULT;
+	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
+		rc = -EFAULT;
+		goto out_free_val;
 	}
 
-	lock_sock(sk);
-	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
-		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
+	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
+			      val, opt.dccpsf_len, GFP_KERNEL);
+	if (rc)
+		goto out_free_val;
 
-	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
-		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
-	release_sock(sk);
+out:
+	return rc;
 
+out_free_val:
 	kfree(val);
-	return rc;
+	goto out;
 }
 
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -505,21 +510,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	struct dccp_sock *dp = dccp_sk(sk);
 	int val, err = 0;
 
-	switch (optname) {
-	case DCCP_SOCKOPT_PACKET_SIZE:
-		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
-		return 0;
-	case DCCP_SOCKOPT_CHANGE_L:
-	case DCCP_SOCKOPT_CHANGE_R:
-		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
-		return 0;
-	case DCCP_SOCKOPT_CCID:
-	case DCCP_SOCKOPT_RX_CCID:
-	case DCCP_SOCKOPT_TX_CCID:
-		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
-	}
-
-	if (optlen < (int)sizeof(int))
+	if (optlen < sizeof(int))
 		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
@@ -530,38 +521,53 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 
 	lock_sock(sk);
 	switch (optname) {
+	case DCCP_SOCKOPT_PACKET_SIZE:
+		DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+		err = 0;
+		break;
+	case DCCP_SOCKOPT_CHANGE_L:
+		if (optlen != sizeof(struct dccp_so_feat))
+			err = -EINVAL;
+		else
+			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
+						     (struct dccp_so_feat __user *)
+						     optval);
+		break;
+	case DCCP_SOCKOPT_CHANGE_R:
+		if (optlen != sizeof(struct dccp_so_feat))
+			err = -EINVAL;
+		else
+			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
+						     (struct dccp_so_feat __user *)
+						     optval);
+		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		if (dp->dccps_role != DCCP_ROLE_SERVER)
 			err = -EOPNOTSUPP;
 		else
 			dp->dccps_server_timewait = (val != 0);
 		break;
-	case DCCP_SOCKOPT_SEND_CSCOV:
-		err = dccp_setsockopt_cscov(sk, val, false);
-		break;
-	case DCCP_SOCKOPT_RECV_CSCOV:
-		err = dccp_setsockopt_cscov(sk, val, true);
-		break;
-	case DCCP_SOCKOPT_QPOLICY_ID:
-		if (sk->sk_state != DCCP_CLOSED)
-			err = -EISCONN;
-		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
+	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
+		if (val < 0 || val > 15)
 			err = -EINVAL;
 		else
-			dp->dccps_qpolicy = val;
+			dp->dccps_pcslen = val;
 		break;
-	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
-		if (val < 0)
+	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
+		if (val < 0 || val > 15)
 			err = -EINVAL;
-		else
-			dp->dccps_tx_qlen = val;
+		else {
+			dp->dccps_pcrlen = val;
+			/* FIXME: add feature negotiation,
+			 * ChangeL(MinimumChecksumCoverage, val) */
+		}
 		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
 	}
-	release_sock(sk);
 
+	release_sock(sk);
 	return err;
 }
 
@@ -642,18 +648,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_GET_CUR_MPS:
 		val = dp->dccps_mss_cache;
 		break;
-	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
-		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
-	case DCCP_SOCKOPT_TX_CCID:
-		val = ccid_get_current_tx_ccid(dp);
-		if (val < 0)
-			return -ENOPROTOOPT;
-		break;
-	case DCCP_SOCKOPT_RX_CCID:
-		val = ccid_get_current_rx_ccid(dp);
-		if (val < 0)
-			return -ENOPROTOOPT;
-		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		val = dp->dccps_server_timewait;
 		break;
@@ -663,12 +657,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
 		break;
-	case DCCP_SOCKOPT_QPOLICY_ID:
-		val = dp->dccps_qpolicy;
-		break;
-	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
-		val = dp->dccps_tx_qlen;
-		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 					     len, (u32 __user *)optval, optlen);
@@ -711,47 +699,6 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 #endif
 
-static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
-{
-	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
-
-	/*
-	 * Assign an (opaque) qpolicy priority value to skb->priority.
-	 *
-	 * We are overloading this skb field for use with the qpolicy subystem.
-	 * The skb->priority is normally used for the SO_PRIORITY option, which
-	 * is initialised from sk_priority. Since the assignment of sk_priority
-	 * to skb->priority happens later (on layer 3), we overload this field
-	 * for use with queueing priorities as long as the skb is on layer 4.
-	 * The default priority value (if nothing is set) is 0.
-	 */
-	skb->priority = 0;
-
-	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
-
-		if (!CMSG_OK(msg, cmsg))
-			return -EINVAL;
-
-		if (cmsg->cmsg_level != SOL_DCCP)
-			continue;
-
-		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
-		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
-			return -EINVAL;
-
-		switch (cmsg->cmsg_type) {
-		case DCCP_SCM_PRIORITY:
-			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
-				return -EINVAL;
-			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -767,7 +714,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	lock_sock(sk);
 
-	if (dccp_qpolicy_full(sk)) {
+	if (sysctl_dccp_tx_qlen &&
+	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
 		rc = -EAGAIN;
 		goto out_release;
 	}
@@ -795,12 +743,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rc != 0)
 		goto out_discard;
 
-	rc = dccp_msghdr_parse(msg, skb);
-	if (rc != 0)
-		goto out_discard;
-
-	dccp_qpolicy_push(sk, skb);
-	dccp_write_xmit(sk);
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	dccp_write_xmit(sk,0);
 out_release:
 	release_sock(sk);
 	return rc ? : len;
@@ -1023,22 +967,9 @@ void dccp_close(struct sock *sk, long timeout)
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
 	} else if (sk->sk_state != DCCP_CLOSED) {
-		/*
-		 * Normal connection termination. May need to wait if there are
-		 * still packets in the TX queue that are delayed by the CCID.
-		 */
-		dccp_flush_write_queue(sk, &timeout);
 		dccp_terminate_connection(sk);
 	}
 
-	/*
-	 * Flush write queue. This may be necessary in several cases:
-	 * - we have been closed by the peer but still have application data;
-	 * - abortive termination (unread data or zero linger time),
-	 * - normal termination but queue could not be flushed within time limit
-	 */
-	__skb_queue_purge(&sk->sk_write_queue);
-
 	sk_stream_wait_close(sk, timeout);
 
 adjudge_to_death:
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
deleted file mode 100644
index 27383f88c75..00000000000
--- a/net/dccp/qpolicy.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- *  net/dccp/qpolicy.c
- *
- *  Policy-based packet dequeueing interface for DCCP.
- *
- *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License v2
- *  as published by the Free Software Foundation.
- */
-#include "dccp.h"
-
-/*
- *	Simple Dequeueing Policy:
- *	If tx_qlen is different from 0, enqueue up to tx_qlen elements.
- */
-static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
-{
-	skb_queue_tail(&sk->sk_write_queue, skb);
-}
-
-static bool qpolicy_simple_full(struct sock *sk)
-{
-	return dccp_sk(sk)->dccps_tx_qlen &&
-	       sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
-}
-
-static struct sk_buff *qpolicy_simple_top(struct sock *sk)
-{
-	return skb_peek(&sk->sk_write_queue);
-}
-
-/*
- *	Priority-based Dequeueing Policy:
- *	If tx_qlen is different from 0 and the queue has reached its upper bound
- *	of tx_qlen elements, replace older packets lowest-priority-first.
- */
-static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
-{
-	struct sk_buff *skb, *best = NULL;
-
-	skb_queue_walk(&sk->sk_write_queue, skb)
-		if (best == NULL || skb->priority > best->priority)
-			best = skb;
-	return best;
-}
-
-static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
-{
-	struct sk_buff *skb, *worst = NULL;
-
-	skb_queue_walk(&sk->sk_write_queue, skb)
-		if (worst == NULL || skb->priority < worst->priority)
-			worst = skb;
-	return worst;
-}
-
-static bool qpolicy_prio_full(struct sock *sk)
-{
-	if (qpolicy_simple_full(sk))
-		dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
-	return false;
-}
-
-/**
- * struct dccp_qpolicy_operations  -  TX Packet Dequeueing Interface
- * @push: add a new @skb to the write queue
- * @full: indicates that no more packets will be admitted
- * @top:  peeks at whatever the queueing policy defines as its `top'
- */
-static struct dccp_qpolicy_operations {
-	void		(*push)	(struct sock *sk, struct sk_buff *skb);
-	bool		(*full) (struct sock *sk);
-	struct sk_buff*	(*top)  (struct sock *sk);
-	__be32		params;
-
-} qpol_table[DCCPQ_POLICY_MAX] = {
-	[DCCPQ_POLICY_SIMPLE] = {
-		.push   = qpolicy_simple_push,
-		.full   = qpolicy_simple_full,
-		.top    = qpolicy_simple_top,
-		.params = 0,
-	},
-	[DCCPQ_POLICY_PRIO] = {
-		.push   = qpolicy_simple_push,
-		.full   = qpolicy_prio_full,
-		.top    = qpolicy_prio_best_skb,
-		.params = DCCP_SCM_PRIORITY,
-	},
-};
-
-/*
- *	Externally visible interface
- */
-void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
-{
-	qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
-}
-
-bool dccp_qpolicy_full(struct sock *sk)
-{
-	return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
-}
-
-void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
-{
-	if (skb != NULL) {
-		skb_unlink(skb, &sk->sk_write_queue);
-		kfree_skb(skb);
-	}
-}
-
-struct sk_buff *dccp_qpolicy_top(struct sock *sk)
-{
-	return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
-}
-
-struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
-{
-	struct sk_buff *skb = dccp_qpolicy_top(sk);
-
-	/* Clear any skb fields that we used internally */
-	skb->priority = 0;
-
-	if (skb)
-		skb_unlink(skb, &sk->sk_write_queue);
-	return skb;
-}
-
-bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
-{
-	/* check if exactly one bit is set */
-	if (!param || (param & (param - 1)))
-		return false;
-	return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
-}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index a5a1856234e..21295993fdb 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,72 +18,76 @@
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
-/* Boundary values */
-static int		zero     = 0,
-			u8_max   = 0xFF;
-static unsigned long	seqw_min = 32;
-
 static struct ctl_table dccp_default_table[] = {
 	{
 		.procname	= "seq_window",
-		.data		= &sysctl_dccp_sequence_window,
-		.maxlen		= sizeof(sysctl_dccp_sequence_window),
+		.data		= &sysctl_dccp_feat_sequence_window,
+		.maxlen		= sizeof(sysctl_dccp_feat_sequence_window),
 		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &seqw_min,		/* RFC 4340, 7.5.2 */
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "rx_ccid",
-		.data		= &sysctl_dccp_rx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_rx_ccid),
+		.data		= &sysctl_dccp_feat_rx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_feat_rx_ccid),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &u8_max,		/* RFC 4340, 10. */
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "tx_ccid",
-		.data		= &sysctl_dccp_tx_ccid,
-		.maxlen		= sizeof(sysctl_dccp_tx_ccid),
+		.data		= &sysctl_dccp_feat_tx_ccid,
+		.maxlen		= sizeof(sysctl_dccp_feat_tx_ccid),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "ack_ratio",
+		.data		= &sysctl_dccp_feat_ack_ratio,
+		.maxlen		= sizeof(sysctl_dccp_feat_ack_ratio),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "send_ackvec",
+		.data		= &sysctl_dccp_feat_send_ack_vector,
+		.maxlen		= sizeof(sysctl_dccp_feat_send_ack_vector),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "send_ndp",
+		.data		= &sysctl_dccp_feat_send_ndp_count,
+		.maxlen		= sizeof(sysctl_dccp_feat_send_ndp_count),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &u8_max,		/* RFC 4340, 10. */
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
 		.maxlen		= sizeof(sysctl_dccp_request_retries),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &u8_max,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "retries1",
 		.data		= &sysctl_dccp_retries1,
 		.maxlen		= sizeof(sysctl_dccp_retries1),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &u8_max,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "retries2",
 		.data		= &sysctl_dccp_retries2,
 		.maxlen		= sizeof(sysctl_dccp_retries2),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &u8_max,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "tx_qlen",
 		.data		= &sysctl_dccp_tx_qlen,
 		.maxlen		= sizeof(sysctl_dccp_tx_qlen),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "sync_ratelimit",
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 16359e29e7f..54b3c7e9e01 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -87,6 +87,17 @@ static void dccp_retransmit_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	/* retransmit timer is used for feature negotiation throughout
+	 * connection.  In this case, no packet is re-transmitted, but rather an
+	 * ack is generated and pending changes are placed into its options.
+	 */
+	if (sk->sk_send_head == NULL) {
+		dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
+		if (sk->sk_state == DCCP_OPEN)
+			dccp_send_ack(sk);
+		goto backoff;
+	}
+
 	/*
 	 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
 	 * sent, no need to retransmit, this sock is dead.
@@ -115,6 +126,7 @@ static void dccp_retransmit_timer(struct sock *sk)
 		return;
 	}
 
+backoff:
 	icsk->icsk_backoff++;
 
 	icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
@@ -237,35 +249,32 @@ out:
 	sock_put(sk);
 }
 
-/**
- * dccp_write_xmitlet  -  Workhorse for CCID packet dequeueing interface
- * See the comments above %ccid_dequeueing_decision for supported modes.
- */
-static void dccp_write_xmitlet(unsigned long data)
+/* Transmit-delay timer: used by the CCIDs to delay actual send time */
+static void dccp_write_xmit_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
+	struct dccp_sock *dp = dccp_sk(sk);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
-		sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
+		sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
 	else
-		dccp_write_xmit(sk);
+		dccp_write_xmit(sk, 0);
 	bh_unlock_sock(sk);
+	sock_put(sk);
 }
 
-static void dccp_write_xmit_timer(unsigned long data)
+static void dccp_init_write_xmit_timer(struct sock *sk)
 {
-	dccp_write_xmitlet(data);
-	sock_put((struct sock *)data);
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+			(unsigned long)sk);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-
-	tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
-	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
-							     (unsigned long)sk);
+	dccp_init_write_xmit_timer(sk);
 	inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
 				  &dccp_keepalive_timer);
 }
@@ -281,7 +290,8 @@ u32 dccp_timestamp(void)
 {
 	s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
 
-	return div_u64(delta, DCCP_TIME_RESOLUTION);
+	do_div(delta, 10);
+	return delta;
 }
 EXPORT_SYMBOL_GPL(dccp_timestamp);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9da9f19ece8..f79a5160729 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -811,12 +811,25 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
+/* Numbers are taken from RFC3390.
+ *
+ * John Heffner states:
+ *
+ *	The RFC specifies a window of no more than 4380 bytes
+ *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
+ *	is a bit misleading because they use a clamp at 4380 bytes
+ *	rather than use a multiplier in the relevant range.
+ */
 __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
-	if (!cwnd)
-		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
+	if (!cwnd) {
+		if (tp->mss_cache > 1460)
+			cwnd = 2;
+		else
+			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+	}
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 225f40055f779032974a9fce7b2f9c9eda04ff58 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 9 Sep 2008 05:23:37 -0700
Subject: ipsec: Restore larval states and socket policies in dump

The commit commit 4c563f7669c10a12354b72b518c2287ffc6ebfb3 ("[XFRM]:
Speed up xfrm_policy and xfrm_state walking") inadvertently removed
larval states and socket policies from netlink dumps.  This patch
restores them.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 1 +
 net/xfrm/xfrm_state.c  | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 46914b79d85..b7754b1b73a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1077,6 +1077,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
 						     pol->family, dir);
 
+	list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]);
 	hlist_add_head(&pol->bydst, chain);
 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
 	xfrm_policy_count[dir]++;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7bd62f61593..0a8f09c3144 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -858,6 +858,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
+			list_add_tail(&x->all, &xfrm_state_all);
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1055,6 +1056,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
+		list_add_tail(&x->all, &xfrm_state_all);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
-- 
cgit v1.2.3-70-g09d2


From e550dfb0c2c31b6363aa463a035fc9f8dcaa3c9b Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 9 Sep 2008 13:51:35 -0700
Subject: ipv6: Fix OOPS in ip6_dst_lookup_tail().

This fixes kernel bugzilla 11469: "TUN with 1024 neighbours:
ip6_dst_lookup_tail NULL crash"

dst->neighbour is not necessarily hooked up at this point
in the processing path, so blindly dereferencing it is
the wrong thing to do.  This NULL check exists in other
similar paths and this case was just an oversight.

Also fix the completely wrong and confusing indentation
here while we're at it.

Based upon a patch by Evgeniy Polyakov.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 64 +++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0e844c2736a..3df2c442d90 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -943,39 +943,39 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	}
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-		/*
-		 * Here if the dst entry we've looked up
-		 * has a neighbour entry that is in the INCOMPLETE
-		 * state and the src address from the flow is
-		 * marked as OPTIMISTIC, we release the found
-		 * dst entry and replace it instead with the
-		 * dst entry of the nexthop router
-		 */
-		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
-			struct inet6_ifaddr *ifp;
-			struct flowi fl_gw;
-			int redirect;
-
-			ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
-					      (*dst)->dev, 1);
-
-			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
-			if (ifp)
-				in6_ifa_put(ifp);
-
-			if (redirect) {
-				/*
-				 * We need to get the dst entry for the
-				 * default router instead
-				 */
-				dst_release(*dst);
-				memcpy(&fl_gw, fl, sizeof(struct flowi));
-				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
-				*dst = ip6_route_output(net, sk, &fl_gw);
-				if ((err = (*dst)->error))
-					goto out_err_release;
-			}
+	/*
+	 * Here if the dst entry we've looked up
+	 * has a neighbour entry that is in the INCOMPLETE
+	 * state and the src address from the flow is
+	 * marked as OPTIMISTIC, we release the found
+	 * dst entry and replace it instead with the
+	 * dst entry of the nexthop router
+	 */
+	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
+		struct inet6_ifaddr *ifp;
+		struct flowi fl_gw;
+		int redirect;
+
+		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
+				      (*dst)->dev, 1);
+
+		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+		if (ifp)
+			in6_ifa_put(ifp);
+
+		if (redirect) {
+			/*
+			 * We need to get the dst entry for the
+			 * default router instead
+			 */
+			dst_release(*dst);
+			memcpy(&fl_gw, fl, sizeof(struct flowi));
+			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+			*dst = ip6_route_output(net, sk, &fl_gw);
+			if ((err = (*dst)->error))
+				goto out_err_release;
 		}
+	}
 #endif
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 28faa979746b2352cd78a376bf9f52db953bda46 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Sep 2008 16:08:51 -0700
Subject: ipsec: Make xfrm_larval_drop default to 1.

The previous default behavior is definitely the least user
friendly.  Hanging there forever just because the keying
daemon is wedged or the refreshing of the policy can't move
forward is anti-social to say the least.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 46914b79d85..638bb5ff99a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -34,7 +34,7 @@
 
 #include "xfrm_hash.h"
 
-int sysctl_xfrm_larval_drop __read_mostly;
+int sysctl_xfrm_larval_drop __read_mostly = 1;
 
 #ifdef CONFIG_XFRM_STATISTICS
 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
-- 
cgit v1.2.3-70-g09d2


From abb81c4f3cb9b8d421f1e5474811ef1d461d341c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 9 Sep 2008 19:58:29 -0700
Subject: ipsec: Use RCU-like construct for saved state within a walk

Now that we save states within a walk we need synchronisation
so that the list the saved state is on doesn't disappear from
under us.

As it stands this is done by keeping the state on the list which
is bad because it gets in the way of the management of the state
life-cycle.

An alternative is to make our own pseudo-RCU system where we use
counters to indicate which state can't be freed immediately as
it may be referenced by an ongoing walk when that resumes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 15 +++++----------
 net/xfrm/xfrm_state.c | 52 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 44 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2933d7474a7..4bb94992b5f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -120,9 +120,11 @@ extern struct mutex xfrm_cfg_mutex;
 /* Full description of state of transformer. */
 struct xfrm_state
 {
-	/* Note: bydst is re-used during gc */
 	struct list_head	all;
-	struct hlist_node	bydst;
+	union {
+		struct list_head	gclist;
+		struct hlist_node	bydst;
+	};
 	struct hlist_node	bysrc;
 	struct hlist_node	byspi;
 
@@ -1286,16 +1288,9 @@ static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
 	walk->count = 0;
 }
 
-static inline void xfrm_state_walk_done(struct xfrm_state_walk *walk)
-{
-	if (walk->state != NULL) {
-		xfrm_state_put(walk->state);
-		walk->state = NULL;
-	}
-}
-
 extern int xfrm_state_walk(struct xfrm_state_walk *walk,
 			   int (*func)(struct xfrm_state *, int, void*), void *);
+extern void xfrm_state_walk_done(struct xfrm_state_walk *walk);
 extern struct xfrm_state *xfrm_state_alloc(void);
 extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
 					  struct flowi *fl, struct xfrm_tmpl *tmpl,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0a8f09c3144..aaafcee02fc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -59,6 +59,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
+/* Counter indicating ongoing walk, protected by xfrm_state_lock. */
+static unsigned long xfrm_state_walk_ongoing;
+/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
+static unsigned long xfrm_state_walk_completed;
+
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -191,7 +196,8 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static HLIST_HEAD(xfrm_state_gc_list);
+static LIST_HEAD(xfrm_state_gc_leftovers);
+static LIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -403,17 +409,22 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *data)
 {
-	struct xfrm_state *x;
-	struct hlist_node *entry, *tmp;
-	struct hlist_head gc_list;
+	struct xfrm_state *x, *tmp;
+	unsigned long completed;
 
+	mutex_lock(&xfrm_cfg_mutex);
 	spin_lock_bh(&xfrm_state_gc_lock);
-	gc_list.first = xfrm_state_gc_list.first;
-	INIT_HLIST_HEAD(&xfrm_state_gc_list);
+	list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
+	completed = xfrm_state_walk_completed;
+	mutex_unlock(&xfrm_cfg_mutex);
+
+	list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
+		if ((long)(x->lastused - completed) > 0)
+			break;
 		xfrm_state_gc_destroy(x);
+	}
 
 	wake_up(&km_waitq);
 }
@@ -540,12 +551,8 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 {
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
-	spin_lock_bh(&xfrm_state_lock);
-	list_del(&x->all);
-	spin_unlock_bh(&xfrm_state_lock);
-
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
+	list_add_tail(&x->gclist, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -558,6 +565,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
+		x->lastused = xfrm_state_walk_ongoing;
+		list_del_rcu(&x->all);
 		hlist_del(&x->bydst);
 		hlist_del(&x->bysrc);
 		if (x->id.spi)
@@ -1574,6 +1583,7 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 			if (err) {
 				xfrm_state_hold(last);
 				walk->state = last;
+				xfrm_state_walk_ongoing++;
 				goto out;
 			}
 		}
@@ -1588,12 +1598,28 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 		err = func(last, 0, data);
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	if (old != NULL)
+	if (old != NULL) {
 		xfrm_state_put(old);
+		xfrm_state_walk_completed++;
+		if (!list_empty(&xfrm_state_gc_leftovers))
+			schedule_work(&xfrm_state_gc_work);
+	}
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
+void xfrm_state_walk_done(struct xfrm_state_walk *walk)
+{
+	if (walk->state != NULL) {
+		xfrm_state_put(walk->state);
+		walk->state = NULL;
+		xfrm_state_walk_completed++;
+		if (!list_empty(&xfrm_state_gc_leftovers))
+			schedule_work(&xfrm_state_gc_work);
+	}
+}
+EXPORT_SYMBOL(xfrm_state_walk_done);
+
 
 void xfrm_replay_notify(struct xfrm_state *x, int event)
 {
-- 
cgit v1.2.3-70-g09d2


From 08569908fffec3625e29eec7cf7577eaa512e719 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Sep 2008 22:13:28 -0700
Subject: ipsec: Add missing list_del() in xfrm_state_gc_task().

Otherwise entries stay on the GC todo list forever, even after we free
them.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index aaafcee02fc..abbe2702c40 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -423,6 +423,7 @@ static void xfrm_state_gc_task(struct work_struct *data)
 	list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
 		if ((long)(x->lastused - completed) > 0)
 			break;
+		list_del(&x->gclist);
 		xfrm_state_gc_destroy(x);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 1e493d1946a0b26b79001c18d7312d536156ff5a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 Sep 2008 17:27:15 -0700
Subject: ipv6: On interface down/unregister, purge icmp routes too.

Johannes Berg reported that occaisionally, bringing an interface
down or unregistering it would hang for up to 30 seconds.  Using
debugging output he provided it became clear that ICMP6 routes
were the culprit.

The problem is that ICMP6 routes live in their own world totally
separate from normal ipv6 routes.  So there are all kinds of special
cases throughout the ipv6 code to handle this.

While we should really try to unify all of this stuff somehow,
for the time being let's fix this by purging the ICMP6 routes
that match the device in question during rt6_ifdown().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9af6115f0f5..776871ee228 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1003,6 +1003,25 @@ int icmp6_dst_gc(void)
 	return more;
 }
 
+static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
+			    void *arg)
+{
+	struct dst_entry *dst, **pprev;
+
+	spin_lock_bh(&icmp6_dst_lock);
+	pprev = &icmp6_dst_gc_list;
+	while ((dst = *pprev) != NULL) {
+		struct rt6_info *rt = (struct rt6_info *) dst;
+		if (func(rt, arg)) {
+			*pprev = dst->next;
+			dst_free(dst);
+		} else {
+			pprev = &dst->next;
+		}
+	}
+	spin_unlock_bh(&icmp6_dst_lock);
+}
+
 static int ip6_dst_gc(struct dst_ops *ops)
 {
 	unsigned long now = jiffies;
@@ -1930,6 +1949,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
 	};
 
 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
+	icmp6_clean_all(fib6_ifdown, &adn);
 }
 
 struct rt6_mtu_change_arg
-- 
cgit v1.2.3-70-g09d2


From a40c24a13366e324bc0ff8c3bb107db89312c984 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 Sep 2008 04:51:14 -0700
Subject: net: Add SKB DMA mapping helper functions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  8 ++++++
 net/core/Makefile      |  1 +
 net/core/skb_dma_map.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+)
 create mode 100644 net/core/skb_dma_map.c

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4b2be23903c..aa80ad9cbc8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -359,6 +359,14 @@ struct sk_buff {
 
 #include <asm/system.h>
 
+#ifdef CONFIG_HAS_DMA
+#include <linux/dma-mapping.h>
+extern int skb_dma_map(struct device *dev, struct sk_buff *skb,
+		       enum dma_data_direction dir);
+extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
+			  enum dma_data_direction dir);
+#endif
+
 extern void kfree_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
diff --git a/net/core/Makefile b/net/core/Makefile
index b1332f6d004..26a37cb3192 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -6,6 +6,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 	 gen_stats.o gen_estimator.o net_namespace.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+obj-$(CONFIG_HAS_DMA) += skb_dma_map.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
new file mode 100644
index 00000000000..1f49afcd8e8
--- /dev/null
+++ b/net/core/skb_dma_map.c
@@ -0,0 +1,66 @@
+/* skb_dma_map.c: DMA mapping helpers for socket buffers.
+ *
+ * Copyright (C) David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/skbuff.h>
+
+int skb_dma_map(struct device *dev, struct sk_buff *skb,
+		enum dma_data_direction dir)
+{
+	struct skb_shared_info *sp = skb_shinfo(skb);
+	dma_addr_t map;
+	int i;
+
+	map = dma_map_single(dev, skb->data,
+			     skb_headlen(skb), dir);
+	if (dma_mapping_error(dev, map))
+		goto out_err;
+
+	sp->dma_maps[0] = map;
+	for (i = 0; i < sp->nr_frags; i++) {
+		skb_frag_t *fp = &sp->frags[i];
+
+		map = dma_map_page(dev, fp->page, fp->page_offset,
+				   fp->size, dir);
+		if (dma_mapping_error(dev, map))
+			goto unwind;
+		sp->dma_maps[i + 1] = map;
+	}
+	sp->num_dma_maps = i + 1;
+
+	return 0;
+
+unwind:
+	while (i-- >= 0) {
+		skb_frag_t *fp = &sp->frags[i];
+
+		dma_unmap_page(dev, sp->dma_maps[i + 1],
+			       fp->size, dir);
+	}
+	dma_unmap_single(dev, sp->dma_maps[0],
+			 skb_headlen(skb), dir);
+out_err:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(skb_dma_map);
+
+void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
+		   enum dma_data_direction dir)
+{
+	struct skb_shared_info *sp = skb_shinfo(skb);
+	int i;
+
+	dma_unmap_single(dev, sp->dma_maps[0],
+			 skb_headlen(skb), dir);
+	for (i = 0; i < sp->nr_frags; i++) {
+		skb_frag_t *fp = &sp->frags[i];
+
+		dma_unmap_page(dev, sp->dma_maps[i + 1],
+			       fp->size, dir);
+	}
+}
+EXPORT_SYMBOL(skb_dma_unmap);
-- 
cgit v1.2.3-70-g09d2


From 00c5ae2fa0f8191a1b204e71f0ee11359e3b2c06 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 3 Sep 2008 11:26:42 +0800
Subject: mac80211: change MIMO_PS to SM_PS

This patch follows 11n spec naming more rigorously replacing MIMO_PS
with SM_PS (Spatial Multiplexing Power Save).

(Originally submitted as 4 patches, "mac80211: change MIMO_PS to SM_PS",
"iwlwifi: change MIMO_PS to SM_PS", "ath9k: change MIMO_PS to SM_PS",
and "iwlwifi: remove double definition of SM PS". -- JWL)

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c         |  2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c |  4 ++--
 drivers/net/wireless/iwlwifi/iwl-agn.c    |  2 +-
 drivers/net/wireless/iwlwifi/iwl-core.c   | 16 ++++++++--------
 drivers/net/wireless/iwlwifi/iwl-dev.h    |  5 -----
 drivers/net/wireless/iwlwifi/iwl-sta.c    |  8 ++++----
 include/linux/ieee80211.h                 | 12 ++++++------
 net/mac80211/main.c                       |  4 ++--
 8 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 245b7308a9a..57d7cc87cb0 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -224,7 +224,7 @@ static void setup_ht_cap(struct ieee80211_ht_info *ht_info)
 
 	ht_info->ht_supported = 1;
 	ht_info->cap = (u16)IEEE80211_HT_CAP_SUP_WIDTH
-			|(u16)IEEE80211_HT_CAP_MIMO_PS
+			|(u16)IEEE80211_HT_CAP_SM_PS
 			|(u16)IEEE80211_HT_CAP_SGI_40
 			|(u16)IEEE80211_HT_CAP_DSSSCCK40;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index 98f2c843b99..4fc3a0f1d8f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -1153,8 +1153,8 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 	    !sta->ht_info.ht_supported)
 		return -1;
 
-	if (((sta->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS) >> 2)
-						== IWL_MIMO_PS_STATIC)
+	if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
+						== WLAN_HT_CAP_SM_PS_STATIC)
 		return -1;
 
 	/* Need both Tx chains/antennas to support MIMO */
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 1547122e66f..27ddf6cb1c6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -564,7 +564,7 @@ static void iwl4965_ht_conf(struct iwl_priv *priv,
 	if (!iwl_conf->is_ht)
 		return;
 
-	priv->ps_mode = (u8)((ht_conf->cap & IEEE80211_HT_CAP_MIMO_PS) >> 2);
+	priv->ps_mode = (u8)((ht_conf->cap & IEEE80211_HT_CAP_SM_PS) >> 2);
 
 	if (ht_conf->cap & IEEE80211_HT_CAP_SGI_20)
 		iwl_conf->sgf |= HT_SHORT_GI_20MHZ;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index a0b86af25c8..789556db684 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -399,8 +399,8 @@ static void iwlcore_init_ht_hw_capab(const struct iwl_priv *priv,
 
 	ht_info->cap |= (u16)IEEE80211_HT_CAP_GRN_FLD;
 	ht_info->cap |= (u16)IEEE80211_HT_CAP_SGI_20;
-	ht_info->cap |= (u16)(IEEE80211_HT_CAP_MIMO_PS &
-			     (IWL_MIMO_PS_NONE << 2));
+	ht_info->cap |= (u16)(IEEE80211_HT_CAP_SM_PS &
+			     (WLAN_HT_CAP_SM_PS_DISABLED << 2));
 
 	max_bit_rate = MAX_BIT_RATE_20_MHZ;
 	if (priv->hw_params.fat_channel & BIT(band)) {
@@ -709,7 +709,7 @@ static int iwl_get_active_rx_chain_count(struct iwl_priv *priv)
 	bool is_cam = !test_bit(STATUS_POWER_PMI, &priv->status);
 
 	/* # of Rx chains to use when expecting MIMO. */
-	if (is_single || (!is_cam && (priv->ps_mode == IWL_MIMO_PS_STATIC)))
+	if (is_single || (!is_cam && (priv->ps_mode == WLAN_HT_CAP_SM_PS_STATIC)))
 		return 2;
 	else
 		return 3;
@@ -721,14 +721,14 @@ static int iwl_get_idle_rx_chain_count(struct iwl_priv *priv, int active_cnt)
 	bool is_cam = !test_bit(STATUS_POWER_PMI, &priv->status);
 	/* # Rx chains when idling and maybe trying to save power */
 	switch (priv->ps_mode) {
-	case IWL_MIMO_PS_STATIC:
-	case IWL_MIMO_PS_DYNAMIC:
+	case WLAN_HT_CAP_SM_PS_STATIC:
+	case WLAN_HT_CAP_SM_PS_DYNAMIC:
 		idle_cnt = (is_cam) ? 2 : 1;
 		break;
-	case IWL_MIMO_PS_NONE:
+	case WLAN_HT_CAP_SM_PS_DISABLED:
 		idle_cnt = (is_cam) ? active_cnt : 1;
 		break;
-	case IWL_MIMO_PS_INVALID:
+	case WLAN_HT_CAP_SM_PS_INVALID:
 	default:
 		IWL_ERROR("invalide mimo ps mode %d\n", priv->ps_mode);
 		WARN_ON(1);
@@ -912,7 +912,7 @@ int iwl_init_drv(struct iwl_priv *priv)
 	priv->iw_mode = IEEE80211_IF_TYPE_STA;
 
 	priv->use_ant_b_for_management_frame = 1; /* start with ant B */
-	priv->ps_mode = IWL_MIMO_PS_NONE;
+	priv->ps_mode = WLAN_HT_CAP_SM_PS_DISABLED;
 
 	/* Choose which receivers/antennas to use */
 	iwl_set_rxon_chain(priv);
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 640ceea913c..f302e93b779 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -644,11 +644,6 @@ struct iwl_kw {
 #define IWL_CHANNEL_WIDTH_20MHZ   0
 #define IWL_CHANNEL_WIDTH_40MHZ   1
 
-#define IWL_MIMO_PS_STATIC        0
-#define IWL_MIMO_PS_NONE          3
-#define IWL_MIMO_PS_DYNAMIC       1
-#define IWL_MIMO_PS_INVALID       2
-
 #define IWL_OPERATION_MODE_AUTO     0
 #define IWL_OPERATION_MODE_HT_ONLY  1
 #define IWL_OPERATION_MODE_MIXED    2
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index 5b7b05c8773..a72569f1acb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -191,20 +191,20 @@ static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index,
 	if (!sta_ht_inf || !sta_ht_inf->ht_supported)
 		goto done;
 
-	mimo_ps_mode = (sta_ht_inf->cap & IEEE80211_HT_CAP_MIMO_PS) >> 2;
+	mimo_ps_mode = (sta_ht_inf->cap & IEEE80211_HT_CAP_SM_PS) >> 2;
 
 	sta_flags = priv->stations[index].sta.station_flags;
 
 	sta_flags &= ~(STA_FLG_RTS_MIMO_PROT_MSK | STA_FLG_MIMO_DIS_MSK);
 
 	switch (mimo_ps_mode) {
-	case WLAN_HT_CAP_MIMO_PS_STATIC:
+	case WLAN_HT_CAP_SM_PS_STATIC:
 		sta_flags |= STA_FLG_MIMO_DIS_MSK;
 		break;
-	case WLAN_HT_CAP_MIMO_PS_DYNAMIC:
+	case WLAN_HT_CAP_SM_PS_DYNAMIC:
 		sta_flags |= STA_FLG_RTS_MIMO_PROT_MSK;
 		break;
-	case WLAN_HT_CAP_MIMO_PS_DISABLED:
+	case WLAN_HT_CAP_SM_PS_DISABLED:
 		break;
 	default:
 		IWL_WARNING("Invalid MIMO PS mode %d\n", mimo_ps_mode);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index be456450cd2..333d3ae7688 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -708,7 +708,7 @@ struct ieee80211_ht_addt_info {
 
 /* 802.11n HT capabilities masks */
 #define IEEE80211_HT_CAP_SUP_WIDTH		0x0002
-#define IEEE80211_HT_CAP_MIMO_PS		0x000C
+#define IEEE80211_HT_CAP_SM_PS			0x000C
 #define IEEE80211_HT_CAP_GRN_FLD		0x0010
 #define IEEE80211_HT_CAP_SGI_20			0x0020
 #define IEEE80211_HT_CAP_SGI_40			0x0040
@@ -737,11 +737,11 @@ struct ieee80211_ht_addt_info {
 #define IEEE80211_HT_IE_NON_GF_STA_PRSNT	0x0004
 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT	0x0010
 
-/* MIMO Power Save Modes */
-#define WLAN_HT_CAP_MIMO_PS_STATIC	0
-#define WLAN_HT_CAP_MIMO_PS_DYNAMIC	1
-#define WLAN_HT_CAP_MIMO_PS_INVALID	2
-#define WLAN_HT_CAP_MIMO_PS_DISABLED	3
+/* Spatial Multiplexing Power Save Modes */
+#define WLAN_HT_CAP_SM_PS_STATIC	0
+#define WLAN_HT_CAP_SM_PS_DYNAMIC	1
+#define WLAN_HT_CAP_SM_PS_INVALID	2
+#define WLAN_HT_CAP_SM_PS_DISABLED	3
 
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 396cfb2d0f4..7dc06319725 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1140,8 +1140,8 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 	ht_conf.ht_supported = 1;
 
 	ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap;
-	ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
-	ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
+	ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS);
+	ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS;
 	ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
 	ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
 	ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
-- 
cgit v1.2.3-70-g09d2


From 69e6c010fd5f5015d3cc64718fbe266face93770 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 11:05:08 +0200
Subject: mac80211: move some RCU locking into an if branch

The if itself doesn't need to be protected, so move in the RCU
locking to avoid doing anything at all when the condition isn't
true.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index df12e746b03..0abd5a4fe38 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2612,13 +2612,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				      mesh_peer_accepts_plinks(elems));
 	}
 
-	rcu_read_lock();
-
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
-
 		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 
+		rcu_read_lock();
+
 		sta = sta_info_get(local, mgmt->sa);
 		if (sta) {
 			u64 prev_rates;
@@ -2642,9 +2641,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid,
 					       mgmt->sa, supp_rates);
 		}
-	}
 
-	rcu_read_unlock();
+		rcu_read_unlock();
+	}
 
 	if (elems->ds_params && elems->ds_params_len == 1)
 		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
-- 
cgit v1.2.3-70-g09d2


From fe3fa827314b877486c515a001c3e6f604f6f16f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 11:05:09 +0200
Subject: mac80211: make conf_tx non-atomic

The conf_tx callback currently needs to be atomic, this requirement
is just because it can be called from scanning. This rearranges it
slightly to only update while not scanning (which is fine, we'll be
getting beacons when associated) and thus removes the atomic
requirement.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  2 +-
 net/mac80211/mlme.c    | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7c399a9c11d..fb9e62211c3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1142,7 +1142,7 @@ enum ieee80211_ampdu_mlme_action {
  *	of assocaited station or AP.
  *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
- *	bursting) for a hardware TX queue. Must be atomic.
+ *	bursting) for a hardware TX queue.
  *
  * @get_tx_stats: Get statistics of the current TX queue status. This is used
  *	to get number of currently queued packets (queue length), maximum queue
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0abd5a4fe38..a03245255ed 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2872,15 +2872,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
-	ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
-				 elems.wmm_param_len);
-
 	/* Do not send changes to driver if we are scanning. This removes
-	 * requirement that driver's bss_info_changed function needs to be
-	 * atomic. */
+	 * requirement that a driver's bss_info_changed/conf_tx functions
+	 * need to be atomic.
+	 * This is really ugly code, we should rewrite scanning and make
+	 * all this more understandable for humans.
+	 */
 	if (local->sta_sw_scanning || local->sta_hw_scanning)
 		return;
 
+	ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
+				 elems.wmm_param_len);
+
 	if (elems.erp_info && elems.erp_info_len >= 1)
 		changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]);
 	else {
-- 
cgit v1.2.3-70-g09d2


From 5bda617576e58c7213aef5ab90383f303727b5b1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 11:05:10 +0200
Subject: mac80211: BSS info: check channel first

When we receive information about a BSS we check at some point
whether or not we think we're allowed to use the channel it is
on, but we do that fairly late. I don't think we should do it
that late, so do it earlier to avoid doing IBSS/mesh stuff on
that channel and then getting confused because it's disabled.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a03245255ed..ae97d7e9945 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2602,7 +2602,15 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	DECLARE_MAC_BUF(mac);
 	DECLARE_MAC_BUF(mac2);
 
-	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
+	if (elems->ds_params && elems->ds_params_len == 1)
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+	else
+		freq = rx_status->freq;
+
+	channel = ieee80211_get_channel(local->hw.wiphy, freq);
+
+	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+		return;
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
 	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
@@ -2645,16 +2653,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		rcu_read_unlock();
 	}
 
-	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
-	else
-		freq = rx_status->freq;
-
-	channel = ieee80211_get_channel(local->hw.wiphy, freq);
-
-	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
-		return;
-
 #ifdef CONFIG_MAC80211_MESH
 	if (elems->mesh_config)
 		bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id,
@@ -2723,6 +2721,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 	bss->band = band;
 
+	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
+
 	bss->timestamp = beacon_timestamp;
 	bss->last_update = jiffies;
 	bss->signal = rx_status->signal;
-- 
cgit v1.2.3-70-g09d2


From 9c80d3dc272ec5ce44a7564e5392f950ad38357a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 15:41:59 +0200
Subject: mac80211: fix action frame length checks

The action frame length checks are one too small, there's not just
an action code as the comment makes you believe, there's a category
code too, and the category code is required in each action frame
(hence part of IEEE80211_MIN_ACTION_SIZE).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh_hwmp.c  | 4 ++++
 net/mac80211/mesh_plink.c | 4 ++++
 net/mac80211/mlme.c       | 5 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index eeb0ce2d5d3..59fd7fe377e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -581,6 +581,10 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	u32 last_hop_metric;
 
+	/* need action_code */
+	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		return;
+
 	baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
 	ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
 			len - baselen, &elems);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 7714b0e6e4d..74983cfa729 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -421,6 +421,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	DECLARE_MAC_BUF(mac);
 #endif
 
+	/* need action_code, aux */
+	if (len < IEEE80211_MIN_ACTION_SIZE + 3)
+		return;
+
 	if (is_multicast_ether_addr(mgmt->da)) {
 		mpl_dbg("Mesh plink: ignore frame from multicast address");
 		return;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ae97d7e9945..eb1832aa1fe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -60,7 +60,7 @@
 
 #define ERP_INFO_USE_PROTECTION BIT(1)
 
-/* mgmt header + 1 byte action code */
+/* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE (24 + 1)
 
 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
@@ -2988,7 +2988,8 @@ static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 
-	if (len < IEEE80211_MIN_ACTION_SIZE)
+	/* all categories we currently handle have action_code */
+	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
 		return;
 
 	switch (mgmt->u.action.category) {
-- 
cgit v1.2.3-70-g09d2


From 5fd12d4da198647e834f93f163e20bfcdd33bad8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 16:31:48 +0200
Subject: mac80211: fix typo in action frame handling

This says chan_switch.action_code but really means
measurement.action_code, of course the actual offset in
the frame is the same, it's just harder to understand
this way.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index eb1832aa1fe..2341e5b3c24 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2996,7 +2996,7 @@ static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 	case WLAN_CATEGORY_SPECTRUM_MGMT:
 		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
 			break;
-		switch (mgmt->u.action.u.chan_switch.action_code) {
+		switch (mgmt->u.action.u.measurement.action_code) {
 		case WLAN_ACTION_SPCT_MSR_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
 				   sizeof(mgmt->u.action.u.measurement)))
-- 
cgit v1.2.3-70-g09d2


From 37ffc8da803a1151e887f2a80f08f0c49d1dc1d5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 16:40:36 +0200
Subject: mac80211: move IE parsing to util file

Since IE parsing is required for the mlme and mesh code, it's
not a static function anyway, and it's much better to have it
in util rather than the overly large mlme.c

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 145 ----------------------------------------------------
 net/mac80211/util.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 145 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2341e5b3c24..b03f1f3ef2e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -91,151 +91,6 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 static void sta_rx_agg_session_timer_expired(unsigned long data);
 
 
-void ieee802_11_parse_elems(u8 *start, size_t len,
-			    struct ieee802_11_elems *elems)
-{
-	size_t left = len;
-	u8 *pos = start;
-
-	memset(elems, 0, sizeof(*elems));
-	elems->ie_start = start;
-	elems->total_len = len;
-
-	while (left >= 2) {
-		u8 id, elen;
-
-		id = *pos++;
-		elen = *pos++;
-		left -= 2;
-
-		if (elen > left)
-			return;
-
-		switch (id) {
-		case WLAN_EID_SSID:
-			elems->ssid = pos;
-			elems->ssid_len = elen;
-			break;
-		case WLAN_EID_SUPP_RATES:
-			elems->supp_rates = pos;
-			elems->supp_rates_len = elen;
-			break;
-		case WLAN_EID_FH_PARAMS:
-			elems->fh_params = pos;
-			elems->fh_params_len = elen;
-			break;
-		case WLAN_EID_DS_PARAMS:
-			elems->ds_params = pos;
-			elems->ds_params_len = elen;
-			break;
-		case WLAN_EID_CF_PARAMS:
-			elems->cf_params = pos;
-			elems->cf_params_len = elen;
-			break;
-		case WLAN_EID_TIM:
-			elems->tim = pos;
-			elems->tim_len = elen;
-			break;
-		case WLAN_EID_IBSS_PARAMS:
-			elems->ibss_params = pos;
-			elems->ibss_params_len = elen;
-			break;
-		case WLAN_EID_CHALLENGE:
-			elems->challenge = pos;
-			elems->challenge_len = elen;
-			break;
-		case WLAN_EID_WPA:
-			if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
-			    pos[2] == 0xf2) {
-				/* Microsoft OUI (00:50:F2) */
-				if (pos[3] == 1) {
-					/* OUI Type 1 - WPA IE */
-					elems->wpa = pos;
-					elems->wpa_len = elen;
-				} else if (elen >= 5 && pos[3] == 2) {
-					if (pos[4] == 0) {
-						elems->wmm_info = pos;
-						elems->wmm_info_len = elen;
-					} else if (pos[4] == 1) {
-						elems->wmm_param = pos;
-						elems->wmm_param_len = elen;
-					}
-				}
-			}
-			break;
-		case WLAN_EID_RSN:
-			elems->rsn = pos;
-			elems->rsn_len = elen;
-			break;
-		case WLAN_EID_ERP_INFO:
-			elems->erp_info = pos;
-			elems->erp_info_len = elen;
-			break;
-		case WLAN_EID_EXT_SUPP_RATES:
-			elems->ext_supp_rates = pos;
-			elems->ext_supp_rates_len = elen;
-			break;
-		case WLAN_EID_HT_CAPABILITY:
-			elems->ht_cap_elem = pos;
-			elems->ht_cap_elem_len = elen;
-			break;
-		case WLAN_EID_HT_EXTRA_INFO:
-			elems->ht_info_elem = pos;
-			elems->ht_info_elem_len = elen;
-			break;
-		case WLAN_EID_MESH_ID:
-			elems->mesh_id = pos;
-			elems->mesh_id_len = elen;
-			break;
-		case WLAN_EID_MESH_CONFIG:
-			elems->mesh_config = pos;
-			elems->mesh_config_len = elen;
-			break;
-		case WLAN_EID_PEER_LINK:
-			elems->peer_link = pos;
-			elems->peer_link_len = elen;
-			break;
-		case WLAN_EID_PREQ:
-			elems->preq = pos;
-			elems->preq_len = elen;
-			break;
-		case WLAN_EID_PREP:
-			elems->prep = pos;
-			elems->prep_len = elen;
-			break;
-		case WLAN_EID_PERR:
-			elems->perr = pos;
-			elems->perr_len = elen;
-			break;
-		case WLAN_EID_CHANNEL_SWITCH:
-			elems->ch_switch_elem = pos;
-			elems->ch_switch_elem_len = elen;
-			break;
-		case WLAN_EID_QUIET:
-			if (!elems->quiet_elem) {
-				elems->quiet_elem = pos;
-				elems->quiet_elem_len = elen;
-			}
-			elems->num_of_quiet_elem++;
-			break;
-		case WLAN_EID_COUNTRY:
-			elems->country_elem = pos;
-			elems->country_elem_len = elen;
-			break;
-		case WLAN_EID_PWR_CONSTRAINT:
-			elems->pwr_constr_elem = pos;
-			elems->pwr_constr_elem_len = elen;
-			break;
-		default:
-			break;
-		}
-
-		left -= elen;
-		pos += elen;
-	}
-}
-
-
 static u8 * ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 {
 	u8 *end, *pos;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f40c060341a..e19c74cada3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -428,3 +428,147 @@ void ieee80211_iterate_active_interfaces_atomic(
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
+
+void ieee802_11_parse_elems(u8 *start, size_t len,
+			    struct ieee802_11_elems *elems)
+{
+	size_t left = len;
+	u8 *pos = start;
+
+	memset(elems, 0, sizeof(*elems));
+	elems->ie_start = start;
+	elems->total_len = len;
+
+	while (left >= 2) {
+		u8 id, elen;
+
+		id = *pos++;
+		elen = *pos++;
+		left -= 2;
+
+		if (elen > left)
+			return;
+
+		switch (id) {
+		case WLAN_EID_SSID:
+			elems->ssid = pos;
+			elems->ssid_len = elen;
+			break;
+		case WLAN_EID_SUPP_RATES:
+			elems->supp_rates = pos;
+			elems->supp_rates_len = elen;
+			break;
+		case WLAN_EID_FH_PARAMS:
+			elems->fh_params = pos;
+			elems->fh_params_len = elen;
+			break;
+		case WLAN_EID_DS_PARAMS:
+			elems->ds_params = pos;
+			elems->ds_params_len = elen;
+			break;
+		case WLAN_EID_CF_PARAMS:
+			elems->cf_params = pos;
+			elems->cf_params_len = elen;
+			break;
+		case WLAN_EID_TIM:
+			elems->tim = pos;
+			elems->tim_len = elen;
+			break;
+		case WLAN_EID_IBSS_PARAMS:
+			elems->ibss_params = pos;
+			elems->ibss_params_len = elen;
+			break;
+		case WLAN_EID_CHALLENGE:
+			elems->challenge = pos;
+			elems->challenge_len = elen;
+			break;
+		case WLAN_EID_WPA:
+			if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
+			    pos[2] == 0xf2) {
+				/* Microsoft OUI (00:50:F2) */
+				if (pos[3] == 1) {
+					/* OUI Type 1 - WPA IE */
+					elems->wpa = pos;
+					elems->wpa_len = elen;
+				} else if (elen >= 5 && pos[3] == 2) {
+					if (pos[4] == 0) {
+						elems->wmm_info = pos;
+						elems->wmm_info_len = elen;
+					} else if (pos[4] == 1) {
+						elems->wmm_param = pos;
+						elems->wmm_param_len = elen;
+					}
+				}
+			}
+			break;
+		case WLAN_EID_RSN:
+			elems->rsn = pos;
+			elems->rsn_len = elen;
+			break;
+		case WLAN_EID_ERP_INFO:
+			elems->erp_info = pos;
+			elems->erp_info_len = elen;
+			break;
+		case WLAN_EID_EXT_SUPP_RATES:
+			elems->ext_supp_rates = pos;
+			elems->ext_supp_rates_len = elen;
+			break;
+		case WLAN_EID_HT_CAPABILITY:
+			elems->ht_cap_elem = pos;
+			elems->ht_cap_elem_len = elen;
+			break;
+		case WLAN_EID_HT_EXTRA_INFO:
+			elems->ht_info_elem = pos;
+			elems->ht_info_elem_len = elen;
+			break;
+		case WLAN_EID_MESH_ID:
+			elems->mesh_id = pos;
+			elems->mesh_id_len = elen;
+			break;
+		case WLAN_EID_MESH_CONFIG:
+			elems->mesh_config = pos;
+			elems->mesh_config_len = elen;
+			break;
+		case WLAN_EID_PEER_LINK:
+			elems->peer_link = pos;
+			elems->peer_link_len = elen;
+			break;
+		case WLAN_EID_PREQ:
+			elems->preq = pos;
+			elems->preq_len = elen;
+			break;
+		case WLAN_EID_PREP:
+			elems->prep = pos;
+			elems->prep_len = elen;
+			break;
+		case WLAN_EID_PERR:
+			elems->perr = pos;
+			elems->perr_len = elen;
+			break;
+		case WLAN_EID_CHANNEL_SWITCH:
+			elems->ch_switch_elem = pos;
+			elems->ch_switch_elem_len = elen;
+			break;
+		case WLAN_EID_QUIET:
+			if (!elems->quiet_elem) {
+				elems->quiet_elem = pos;
+				elems->quiet_elem_len = elen;
+			}
+			elems->num_of_quiet_elem++;
+			break;
+		case WLAN_EID_COUNTRY:
+			elems->country_elem = pos;
+			elems->country_elem_len = elen;
+			break;
+		case WLAN_EID_PWR_CONSTRAINT:
+			elems->pwr_constr_elem = pos;
+			elems->pwr_constr_elem_len = elen;
+			break;
+		default:
+			break;
+		}
+
+		left -= elen;
+		pos += elen;
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From aa458d1737c3cc9a7c90ea9c5ef1ee6d663fba71 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 9 Sep 2008 00:32:12 +0300
Subject: mac80211: restructure disassoc/deauth flows

This patch restructure the flow of disassociation and deauthentication
flows to be consistent under all circumstances.
It ensures that BA session is treated down before deauthentication or disassociation,
adds the removal of the obsolete sta form station table and fixes a related bug (sta_info_destroy
without sta_info_unlink) in ieee80211_associated()
and reduce some code duplication

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 85 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b03f1f3ef2e..f7a390ff967 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -415,8 +415,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
 		ieee80211_sta_send_associnfo(sdata, ifsta);
 	} else {
-		netif_carrier_off(sdata->dev);
-		ieee80211_sta_tear_down_BA_sessions(sdata, ifsta->bssid);
 		ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 		changed |= ieee80211_reset_erp_info(sdata);
 
@@ -439,18 +437,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
 }
 
-static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
-				   struct ieee80211_if_sta *ifsta, int deauth)
-{
-	if (deauth) {
-		ifsta->direct_probe_tries = 0;
-		ifsta->auth_tries = 0;
-	}
-	ifsta->assoc_scan_tries = 0;
-	ifsta->assoc_tries = 0;
-	ieee80211_set_associated(sdata, ifsta, 0);
-}
-
 void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt)
 {
@@ -844,6 +830,50 @@ static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
+static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_if_sta *ifsta, bool deauth,
+				   bool self_disconnected, u16 reason)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta;
+
+	rcu_read_lock();
+
+	sta = sta_info_get(local, ifsta->bssid);
+	if (!sta) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (deauth) {
+		ifsta->direct_probe_tries = 0;
+		ifsta->auth_tries = 0;
+	}
+	ifsta->assoc_scan_tries = 0;
+	ifsta->assoc_tries = 0;
+
+	netif_carrier_off(sdata->dev);
+
+	ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
+
+	if (self_disconnected) {
+		if (deauth)
+			ieee80211_send_deauth(sdata, ifsta, reason);
+		else
+			ieee80211_send_disassoc(sdata, ifsta, reason);
+	}
+
+	ieee80211_set_associated(sdata, ifsta, 0);
+
+	if (self_disconnected)
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
+
+	sta_info_unlink(&sta);
+
+	rcu_read_unlock();
+
+	sta_info_destroy(sta);
+}
 
 static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_if_sta *ifsta)
@@ -938,7 +968,6 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 				       "range\n",
 				       sdata->dev->name, print_mac(mac, ifsta->bssid));
 				disassoc = 1;
-				sta_info_unlink(&sta);
 			} else
 				ieee80211_send_probe_req(sdata, ifsta->bssid,
 							 local->scan_ssid,
@@ -958,16 +987,12 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
-	if (disassoc && sta)
-		sta_info_destroy(sta);
-
-	if (disassoc) {
-		ifsta->state = IEEE80211_STA_MLME_DISABLED;
-		ieee80211_set_associated(sdata, ifsta, 0);
-	} else {
+	if (disassoc)
+		ieee80211_set_disassoc(sdata, ifsta, true, true,
+					WLAN_REASON_PREV_AUTH_NOT_VALID);
+	else
 		mod_timer(&ifsta->timer, jiffies +
 				      IEEE80211_MONITORING_INTERVAL);
-	}
 }
 
 
@@ -1832,7 +1857,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
 
-	ieee80211_set_disassoc(sdata, ifsta, 1);
+	ieee80211_set_disassoc(sdata, ifsta, true, false, 0);
 	ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED;
 }
 
@@ -1862,7 +1887,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 				      IEEE80211_RETRY_AUTH_INTERVAL);
 	}
 
-	ieee80211_set_disassoc(sdata, ifsta, 0);
+	ieee80211_set_disassoc(sdata, ifsta, false, false, 0);
 }
 
 
@@ -3200,8 +3225,8 @@ void ieee80211_sta_work(struct work_struct *work)
 		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
 		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
 
-		ieee80211_send_disassoc(sdata, ifsta, WLAN_REASON_UNSPECIFIED);
-		ieee80211_set_disassoc(sdata, ifsta, 0);
+		ieee80211_set_disassoc(sdata, ifsta, false, true,
+					WLAN_REASON_UNSPECIFIED);
 	}
 }
 
@@ -4236,8 +4261,7 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason
 	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return -EINVAL;
 
-	ieee80211_send_deauth(sdata, ifsta, reason);
-	ieee80211_set_disassoc(sdata, ifsta, 1);
+	ieee80211_set_disassoc(sdata, ifsta, true, true, reason);
 	return 0;
 }
 
@@ -4255,8 +4279,7 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 	if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
 		return -1;
 
-	ieee80211_send_disassoc(sdata, ifsta, reason);
-	ieee80211_set_disassoc(sdata, ifsta, 0);
+	ieee80211_set_disassoc(sdata, ifsta, false, true, reason);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3b7ee69d0caefbdb85a606a98bff841b8c63b97e Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 8 Sep 2008 17:33:38 +0200
Subject: mac80211: disassociate when moving to new BSS

This patch makes the MLME cleanly disassociate from the current BSS
when leaving it for a new one. This is not just nicer to the old AP
(we're leaving it, might as well tell it!) but also required for some
drivers that keep track of the station we're associated with, they'd
get confused because they'd think we are associated with two APs.

Signed-off-by: Ron Rindjunsky <ron.rindjunsky@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f7a390ff967..eababf320b8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3271,9 +3271,14 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
-				IEEE80211_STA_AUTO_BSSID_SEL)) &&
+			     IEEE80211_STA_AUTO_BSSID_SEL)) &&
 	    (ifsta->flags & (IEEE80211_STA_SSID_SET |
-				IEEE80211_STA_AUTO_SSID_SEL))) {
+			     IEEE80211_STA_AUTO_SSID_SEL))) {
+
+		if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED)
+			ieee80211_set_disassoc(sdata, ifsta, true, true,
+					       WLAN_REASON_DEAUTH_LEAVING);
+
 		set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
 		queue_work(local->hw.workqueue, &ifsta->work);
 	}
-- 
cgit v1.2.3-70-g09d2


From f5e5bf258b399f74b606e532ae0a2599522fd7bf Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 8 Sep 2008 17:33:39 +0200
Subject: mac80211: remove disassociation code from ieee80211_set_associated

This patch moves disassociation code from ieee80211_set_associated
to ieee80211_set_disassoc. To reduce code duplication, it introduces
the ieee80211_sta_send_apinfo function. Additionally, it fixes a lapse
where BSS_CHANGED_HT wasn't set when notifying the driver of changes
due to disassociation.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 102 ++++++++++++++++++++++++++++------------------------
 1 file changed, 56 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index eababf320b8..0e9bd840e7d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -352,6 +352,17 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 	return 0;
 }
 
+static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_if_sta *ifsta)
+{
+	union iwreq_data wrqu;
+	memset(&wrqu, 0, sizeof(wrqu));
+	if (ifsta->flags & IEEE80211_STA_ASSOCIATED)
+		memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
+}
+
 static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_if_sta *ifsta)
 {
@@ -373,68 +384,53 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta,
-				     bool assoc)
+				     struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_conf *conf = &local_to_hw(local)->conf;
-	union iwreq_data wrqu;
 	u32 changed = BSS_CHANGED_ASSOC;
 
-	if (assoc) {
-		struct ieee80211_sta_bss *bss;
-
-		ifsta->flags |= IEEE80211_STA_ASSOCIATED;
+	struct ieee80211_sta_bss *bss;
 
-		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
-			return;
+	ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
-		bss = ieee80211_rx_bss_get(local, ifsta->bssid,
-					   conf->channel->center_freq,
-					   ifsta->ssid, ifsta->ssid_len);
-		if (bss) {
-			/* set timing information */
-			sdata->bss_conf.beacon_int = bss->beacon_int;
-			sdata->bss_conf.timestamp = bss->timestamp;
-			sdata->bss_conf.dtim_period = bss->dtim_period;
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+		return;
 
-			changed |= ieee80211_handle_bss_capability(sdata, bss);
+	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
+				   conf->channel->center_freq,
+				   ifsta->ssid, ifsta->ssid_len);
+	if (bss) {
+		/* set timing information */
+		sdata->bss_conf.beacon_int = bss->beacon_int;
+		sdata->bss_conf.timestamp = bss->timestamp;
+		sdata->bss_conf.dtim_period = bss->dtim_period;
 
-			ieee80211_rx_bss_put(local, bss);
-		}
+		changed |= ieee80211_handle_bss_capability(sdata, bss);
 
-		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-			changed |= BSS_CHANGED_HT;
-			sdata->bss_conf.assoc_ht = 1;
-			sdata->bss_conf.ht_conf = &conf->ht_conf;
-			sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
-		}
+		ieee80211_rx_bss_put(local, bss);
+	}
 
-		ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
-		memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
-		memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN);
-		ieee80211_sta_send_associnfo(sdata, ifsta);
-	} else {
-		ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
-		changed |= ieee80211_reset_erp_info(sdata);
+	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
+		changed |= BSS_CHANGED_HT;
+		sdata->bss_conf.assoc_ht = 1;
+		sdata->bss_conf.ht_conf = &conf->ht_conf;
+		sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
+	}
 
-		sdata->bss_conf.assoc_ht = 0;
-		sdata->bss_conf.ht_conf = NULL;
-		sdata->bss_conf.ht_bss_conf = NULL;
+	ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
+	memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
+	ieee80211_sta_send_associnfo(sdata, ifsta);
 
-		memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN);
-	}
 	ifsta->last_probe = jiffies;
-	ieee80211_led_assoc(local, assoc);
+	ieee80211_led_assoc(local, 1);
 
-	sdata->bss_conf.assoc = assoc;
+	sdata->bss_conf.assoc = 1;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
-	if (assoc)
-		netif_carrier_on(sdata->dev);
+	netif_carrier_on(sdata->dev);
 
-	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
+	ieee80211_sta_send_apinfo(sdata, ifsta);
 }
 
 void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
@@ -836,6 +832,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
+	u32 changed = BSS_CHANGED_ASSOC;
 
 	rcu_read_lock();
 
@@ -863,7 +860,20 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 			ieee80211_send_disassoc(sdata, ifsta, reason);
 	}
 
-	ieee80211_set_associated(sdata, ifsta, 0);
+	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
+	changed |= ieee80211_reset_erp_info(sdata);
+
+	if (sdata->bss_conf.assoc_ht)
+		changed |= BSS_CHANGED_HT;
+
+	sdata->bss_conf.assoc_ht = 0;
+	sdata->bss_conf.ht_conf = NULL;
+	sdata->bss_conf.ht_bss_conf = NULL;
+
+	ieee80211_led_assoc(local, 0);
+	sdata->bss_conf.assoc = 0;
+
+	ieee80211_sta_send_apinfo(sdata, ifsta);
 
 	if (self_disconnected)
 		ifsta->state = IEEE80211_STA_MLME_DISABLED;
@@ -2081,7 +2091,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
-	ieee80211_set_associated(sdata, ifsta, 1);
+	ieee80211_set_associated(sdata, ifsta);
 
 	ieee80211_associated(sdata, ifsta);
 }
-- 
cgit v1.2.3-70-g09d2


From 24e64622c3f3143c801850897ab0cea8f3c69445 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 8 Sep 2008 17:33:40 +0200
Subject: mac80211: stop queues before carrier off

During testing of the disassociation fixes, Tomas noticed that it
was possible to run into a situation where you'd suddenly get a
few "wlan0: dropped frame to <AP> (unauthorized port)" messages
and I found this to be due to the AP's sta_info having been
removed but netif_carrier_off not having removed/stopped traffic
yet. To avoid that, stop the queue for the interface (and avoid
bringing them up when another vif scans when they weren't up.)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0e9bd840e7d..d889e2a89e4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -428,6 +428,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	sdata->bss_conf.assoc = 1;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
+	netif_tx_start_all_queues(sdata->dev);
 	netif_carrier_on(sdata->dev);
 
 	ieee80211_sta_send_apinfo(sdata, ifsta);
@@ -849,6 +850,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ifsta->assoc_scan_tries = 0;
 	ifsta->assoc_tries = 0;
 
+	netif_tx_stop_all_queues(sdata->dev);
 	netif_carrier_off(sdata->dev);
 
 	ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
@@ -3268,6 +3270,7 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 	ifsta->direct_probe_tries = 0;
 	ifsta->auth_tries = 0;
 	ifsta->assoc_tries = 0;
+	netif_tx_stop_all_queues(sdata->dev);
 	netif_carrier_off(sdata->dev);
 }
 
@@ -3744,13 +3747,15 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		/* Tell AP we're back */
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
-		    sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)
-			ieee80211_send_nullfunc(local, sdata, 0);
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
+				ieee80211_send_nullfunc(local, sdata, 0);
+				netif_tx_wake_all_queues(sdata->dev);
+			}
+		} else
+			netif_tx_wake_all_queues(sdata->dev);
 
 		ieee80211_restart_sta_timer(sdata);
-
-		netif_wake_queue(sdata->dev);
 	}
 	rcu_read_unlock();
 
@@ -3908,10 +3913,13 @@ static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		netif_stop_queue(sdata->dev);
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
-		    (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED))
-			ieee80211_send_nullfunc(local, sdata, 1);
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
+				netif_tx_stop_all_queues(sdata->dev);
+				ieee80211_send_nullfunc(local, sdata, 1);
+			}
+		} else
+			netif_tx_stop_all_queues(sdata->dev);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-70-g09d2


From 60f8b39c9406752ea5e0d3bbf5df6f903d61cacf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:22 +0200
Subject: mac80211: reorder mlme code

This reorders the mlme code a bit so we don't need all the forward
function declarations. It also removes the ERP_INFO_USE_PROTECTION
define that is unused, but otherwise contains no real changes.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1224 +++++++++++++++++++++++++--------------------------
 1 file changed, 597 insertions(+), 627 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d889e2a89e4..67c38235a3c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -26,9 +26,8 @@
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
 #include <net/iw_handler.h>
-#include <asm/types.h>
-
 #include <net/mac80211.h>
+
 #include "ieee80211_i.h"
 #include "rate.h"
 #include "led.h"
@@ -58,8 +57,6 @@
 #define IEEE80211_IBSS_MAX_STA_ENTRIES 128
 
 
-#define ERP_INFO_USE_PROTECTION BIT(1)
-
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE (24 + 1)
 
@@ -74,24 +71,199 @@
 #define IEEE80211_MIN_AMPDU_BUF 0x8
 #define IEEE80211_MAX_AMPDU_BUF 0x40
 
-static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-				     u8 *ssid, size_t ssid_len);
+/* BSS handling */
 static struct ieee80211_sta_bss *
 ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
-		     u8 *ssid, u8 ssid_len);
+		     u8 *ssid, u8 ssid_len)
+{
+	struct ieee80211_sta_bss *bss;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	bss = local->sta_bss_hash[STA_HASH(bssid)];
+	while (bss) {
+		if (!bss_mesh_cfg(bss) &&
+		    !memcmp(bss->bssid, bssid, ETH_ALEN) &&
+		    bss->freq == freq &&
+		    bss->ssid_len == ssid_len &&
+		    (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
+			atomic_inc(&bss->users);
+			break;
+		}
+		bss = bss->hnext;
+	}
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
+					struct ieee80211_sta_bss *bss)
+{
+	u8 hash_idx;
+
+	if (bss_mesh_cfg(bss))
+		hash_idx = mesh_id_hash(bss_mesh_id(bss),
+					bss_mesh_id_len(bss));
+	else
+		hash_idx = STA_HASH(bss->bssid);
+
+	bss->hnext = local->sta_bss_hash[hash_idx];
+	local->sta_bss_hash[hash_idx] = bss;
+}
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
+					struct ieee80211_sta_bss *bss)
+{
+	struct ieee80211_sta_bss *b, *prev = NULL;
+	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
+	while (b) {
+		if (b == bss) {
+			if (!prev)
+				local->sta_bss_hash[STA_HASH(bss->bssid)] =
+					bss->hnext;
+			else
+				prev->hnext = bss->hnext;
+			break;
+		}
+		prev = b;
+		b = b->hnext;
+	}
+}
+
+static struct ieee80211_sta_bss *
+ieee80211_rx_bss_add(struct ieee80211_sub_if_data *sdata, u8 *bssid, int freq,
+		     u8 *ssid, u8 ssid_len)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sta_bss *bss;
+
+	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
+	if (!bss)
+		return NULL;
+	atomic_inc(&bss->users);
+	atomic_inc(&bss->users);
+	memcpy(bss->bssid, bssid, ETH_ALEN);
+	bss->freq = freq;
+	if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
+		memcpy(bss->ssid, ssid, ssid_len);
+		bss->ssid_len = ssid_len;
+	}
+
+	spin_lock_bh(&local->sta_bss_lock);
+	/* TODO: order by RSSI? */
+	list_add_tail(&bss->list, &local->sta_bss_list);
+	__ieee80211_rx_bss_hash_add(local, bss);
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+#ifdef CONFIG_MAC80211_MESH
+static struct ieee80211_sta_bss *
+ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
+			  u8 *mesh_cfg, int freq)
+{
+	struct ieee80211_sta_bss *bss;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
+	while (bss) {
+		if (bss_mesh_cfg(bss) &&
+		    !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
+		    bss->freq == freq &&
+		    mesh_id_len == bss->mesh_id_len &&
+		    (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
+						 mesh_id_len))) {
+			atomic_inc(&bss->users);
+			break;
+		}
+		bss = bss->hnext;
+	}
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+static struct ieee80211_sta_bss *
+ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
+			  u8 *mesh_cfg, int mesh_config_len, int freq)
+{
+	struct ieee80211_sta_bss *bss;
+
+	if (mesh_config_len != MESH_CFG_LEN)
+		return NULL;
+
+	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
+	if (!bss)
+		return NULL;
+
+	bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
+	if (!bss->mesh_cfg) {
+		kfree(bss);
+		return NULL;
+	}
+
+	if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
+		bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
+		if (!bss->mesh_id) {
+			kfree(bss->mesh_cfg);
+			kfree(bss);
+			return NULL;
+		}
+		memcpy(bss->mesh_id, mesh_id, mesh_id_len);
+	}
+
+	atomic_inc(&bss->users);
+	atomic_inc(&bss->users);
+	memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
+	bss->mesh_id_len = mesh_id_len;
+	bss->freq = freq;
+	spin_lock_bh(&local->sta_bss_lock);
+	/* TODO: order by RSSI? */
+	list_add_tail(&bss->list, &local->sta_bss_list);
+	__ieee80211_rx_bss_hash_add(local, bss);
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+#endif
+
+static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
+{
+	kfree(bss->ies);
+	kfree(bss_mesh_id(bss));
+	kfree(bss_mesh_cfg(bss));
+	kfree(bss);
+}
+
 static void ieee80211_rx_bss_put(struct ieee80211_local *local,
-				 struct ieee80211_sta_bss *bss);
-static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
-				   struct ieee80211_if_sta *ifsta);
-static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata);
-static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *sdata,
-				    u8 *ssid, size_t ssid_len);
-static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta);
-static void sta_rx_agg_session_timer_expired(unsigned long data);
+				 struct ieee80211_sta_bss *bss)
+{
+	local_bh_disable();
+	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
+		local_bh_enable();
+		return;
+	}
+
+	__ieee80211_rx_bss_hash_del(local, bss);
+	list_del(&bss->list);
+	spin_unlock_bh(&local->sta_bss_lock);
+	ieee80211_rx_bss_free(bss);
+}
 
+void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
+{
+	spin_lock_init(&local->sta_bss_lock);
+	INIT_LIST_HEAD(&local->sta_bss_list);
+}
+
+void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
+{
+	struct ieee80211_sta_bss *bss, *tmp;
 
-static u8 * ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
+	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
+		ieee80211_rx_bss_put(local, bss);
+}
+
+static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 {
 	u8 *end, *pos;
 
@@ -111,13 +283,125 @@ static u8 * ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 	return NULL;
 }
 
-
+/* utils */
 static int ecw2cw(int ecw)
 {
 	return (1 << ecw) - 1;
 }
 
+/* frame sending functions */
+void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		      int encrypt)
+{
+	skb->dev = sdata->local->mdev;
+	skb_set_mac_header(skb, 0);
+	skb_set_network_header(skb, 0);
+	skb_set_transport_header(skb, 0);
 
+	skb->iif = sdata->dev->ifindex;
+	skb->do_not_encrypt = !encrypt;
+
+	dev_queue_xmit(skb);
+}
+
+static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
+				struct ieee80211_if_sta *ifsta,
+				int transaction, u8 *extra, size_t extra_len,
+				int encrypt)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    sizeof(*mgmt) + 6 + extra_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
+	memset(mgmt, 0, 24 + 6);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_AUTH);
+	if (encrypt)
+		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
+	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
+	ifsta->auth_transaction = transaction + 1;
+	mgmt->u.auth.status_code = cpu_to_le16(0);
+	if (extra)
+		memcpy(skb_put(skb, extra_len), extra, extra_len);
+
+	ieee80211_sta_tx(sdata, skb, encrypt);
+}
+
+static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
+				     u8 *ssid, size_t ssid_len)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u8 *pos, *supp_rates, *esupp_rates = NULL;
+	int i;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
+		       "request\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_PROBE_REQ);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	if (dst) {
+		memcpy(mgmt->da, dst, ETH_ALEN);
+		memcpy(mgmt->bssid, dst, ETH_ALEN);
+	} else {
+		memset(mgmt->da, 0xff, ETH_ALEN);
+		memset(mgmt->bssid, 0xff, ETH_ALEN);
+	}
+	pos = skb_put(skb, 2 + ssid_len);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = ssid_len;
+	memcpy(pos, ssid, ssid_len);
+
+	supp_rates = skb_put(skb, 2);
+	supp_rates[0] = WLAN_EID_SUPP_RATES;
+	supp_rates[1] = 0;
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	for (i = 0; i < sband->n_bitrates; i++) {
+		struct ieee80211_rate *rate = &sband->bitrates[i];
+		if (esupp_rates) {
+			pos = skb_put(skb, 1);
+			esupp_rates[1]++;
+		} else if (supp_rates[1] == 8) {
+			esupp_rates = skb_put(skb, 3);
+			esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
+			esupp_rates[1] = 1;
+			pos = &esupp_rates[2];
+		} else {
+			pos = skb_put(skb, 1);
+			supp_rates[1]++;
+		}
+		*pos = rate->bitrate / 5;
+	}
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+/* MLME */
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss,
 					 int ibss)
@@ -434,58 +718,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_send_apinfo(sdata, ifsta);
 }
 
-void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		      int encrypt)
-{
-	skb->dev = sdata->local->mdev;
-	skb_set_mac_header(skb, 0);
-	skb_set_network_header(skb, 0);
-	skb_set_transport_header(skb, 0);
-
-	skb->iif = sdata->dev->ifindex;
-	skb->do_not_encrypt = !encrypt;
-
-	dev_queue_xmit(skb);
-}
-
-
-static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_if_sta *ifsta,
-				int transaction, u8 *extra, size_t extra_len,
-				int encrypt)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 6 + extra_len);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
-		       "frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
-	memset(mgmt, 0, 24 + 6);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_AUTH);
-	if (encrypt)
-		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
-	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
-	ifsta->auth_transaction = transaction + 1;
-	mgmt->u.auth.status_code = cpu_to_le16(0);
-	if (extra)
-		memcpy(skb_put(skb, extra_len), extra, extra_len);
-
-	ieee80211_sta_tx(sdata, skb, encrypt);
-}
-
 static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta)
 {
@@ -798,6 +1030,13 @@ static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
+static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
+{
+	if (!sdata || !sdata->default_key ||
+	    sdata->default_key->conf.alg != ALG_WEP)
+		return 0;
+	return 1;
+}
 
 static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_if_sta *ifsta, u16 reason)
@@ -917,7 +1156,6 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
 	return 1;
 }
 
-
 static void ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 				struct ieee80211_if_sta *ifsta)
 {
@@ -999,82 +1237,12 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
-	if (disassoc)
-		ieee80211_set_disassoc(sdata, ifsta, true, true,
-					WLAN_REASON_PREV_AUTH_NOT_VALID);
-	else
-		mod_timer(&ifsta->timer, jiffies +
-				      IEEE80211_MONITORING_INTERVAL);
-}
-
-
-static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-				     u8 *ssid, size_t ssid_len)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_supported_band *sband;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *supp_rates, *esupp_rates = NULL;
-	int i;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
-		       "request\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_PROBE_REQ);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (dst) {
-		memcpy(mgmt->da, dst, ETH_ALEN);
-		memcpy(mgmt->bssid, dst, ETH_ALEN);
-	} else {
-		memset(mgmt->da, 0xff, ETH_ALEN);
-		memset(mgmt->bssid, 0xff, ETH_ALEN);
-	}
-	pos = skb_put(skb, 2 + ssid_len);
-	*pos++ = WLAN_EID_SSID;
-	*pos++ = ssid_len;
-	memcpy(pos, ssid, ssid_len);
-
-	supp_rates = skb_put(skb, 2);
-	supp_rates[0] = WLAN_EID_SUPP_RATES;
-	supp_rates[1] = 0;
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	for (i = 0; i < sband->n_bitrates; i++) {
-		struct ieee80211_rate *rate = &sband->bitrates[i];
-		if (esupp_rates) {
-			pos = skb_put(skb, 1);
-			esupp_rates[1]++;
-		} else if (supp_rates[1] == 8) {
-			esupp_rates = skb_put(skb, 3);
-			esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
-			esupp_rates[1] = 1;
-			pos = &esupp_rates[2];
-		} else {
-			pos = skb_put(skb, 1);
-			supp_rates[1]++;
-		}
-		*pos = rate->bitrate / 5;
-	}
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
-
-static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
-{
-	if (!sdata || !sdata->default_key ||
-	    sdata->default_key->conf.alg != ALG_WEP)
-		return 0;
-	return 1;
+	if (disassoc)
+		ieee80211_set_disassoc(sdata, ifsta, true, true,
+					WLAN_REASON_PREV_AUTH_NOT_VALID);
+	else
+		mod_timer(&ifsta->timer, jiffies +
+				      IEEE80211_MONITORING_INTERVAL);
 }
 
 
@@ -1200,6 +1368,30 @@ void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
+/*
+ * After accepting the AddBA Request we activated a timer,
+ * resetting it after each frame that arrives from the originator.
+ * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
+ */
+static void sta_rx_agg_session_timer_expired(unsigned long data)
+{
+	/* not an elegant detour, but there is no choice as the timer passes
+	 * only one argument, and various sta_info are needed here, so init
+	 * flow in sta_info_create gives the TID as data, while the timer_to_id
+	 * array gives the sta through container_of */
+	u8 *ptid = (u8 *)data;
+	u8 *timer_to_id = ptid - *ptid;
+	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
+					 timer_to_tid[0]);
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
+#endif
+	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
+					 (u16)*ptid, WLAN_BACK_TIMER,
+					 WLAN_REASON_QSTA_TIMEOUT);
+}
+
 static void ieee80211_sta_process_addba_request(struct ieee80211_local *local,
 						struct ieee80211_mgmt *mgmt,
 						size_t len)
@@ -1646,30 +1838,6 @@ timer_expired_exit:
 	rcu_read_unlock();
 }
 
-/*
- * After accepting the AddBA Request we activated a timer,
- * resetting it after each frame that arrives from the originator.
- * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
- */
-static void sta_rx_agg_session_timer_expired(unsigned long data)
-{
-	/* not an elegant detour, but there is no choice as the timer passes
-	 * only one argument, and various sta_info are needed here, so init
-	 * flow in sta_info_create gives the TID as data, while the timer_to_id
-	 * array gives the sta through container_of */
-	u8 *ptid = (u8 *)data;
-	u8 *timer_to_id = ptid - *ptid;
-	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
-					 timer_to_tid[0]);
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
-#endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
-					 (u16)*ptid, WLAN_BACK_TIMER,
-					 WLAN_REASON_QSTA_TIMEOUT);
-}
-
 void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -1991,308 +2159,111 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 			rcu_read_unlock();
 			return;
 		}
-		bss = ieee80211_rx_bss_get(local, ifsta->bssid,
-					   local->hw.conf.channel->center_freq,
-					   ifsta->ssid, ifsta->ssid_len);
-		if (bss) {
-			sta->last_signal = bss->signal;
-			sta->last_qual = bss->qual;
-			sta->last_noise = bss->noise;
-			ieee80211_rx_bss_put(local, bss);
-		}
-
-		err = sta_info_insert(sta);
-		if (err) {
-			printk(KERN_DEBUG "%s: failed to insert STA entry for"
-			       " the AP (error %d)\n", sdata->dev->name, err);
-			rcu_read_unlock();
-			return;
-		}
-		/* update new sta with its last rx activity */
-		sta->last_rx = jiffies;
-	}
-
-	/*
-	 * FIXME: Do we really need to update the sta_info's information here?
-	 *	  We already know about the AP (we found it in our list) so it
-	 *	  should already be filled with the right info, no?
-	 *	  As is stands, all this is racy because typically we assume
-	 *	  the information that is filled in here (except flags) doesn't
-	 *	  change while a STA structure is alive. As such, it should move
-	 *	  to between the sta_info_alloc() and sta_info_insert() above.
-	 */
-
-	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP |
-			   WLAN_STA_AUTHORIZED);
-
-	rates = 0;
-	basic_rates = 0;
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	for (i = 0; i < elems.supp_rates_len; i++) {
-		int rate = (elems.supp_rates[i] & 0x7f) * 5;
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
-
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate)
-				rates |= BIT(j);
-			if (elems.supp_rates[i] & 0x80)
-				basic_rates |= BIT(j);
-		}
-	}
-
-	for (i = 0; i < elems.ext_supp_rates_len; i++) {
-		int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
-
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate)
-				rates |= BIT(j);
-			if (elems.ext_supp_rates[i] & 0x80)
-				basic_rates |= BIT(j);
-		}
-	}
-
-	sta->supp_rates[local->hw.conf.channel->band] = rates;
-	sdata->basic_rates = basic_rates;
-
-	/* cf. IEEE 802.11 9.2.12 */
-	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
-	    have_higher_than_11mbit)
-		sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
-	else
-		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
-
-	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
-	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
-		struct ieee80211_ht_bss_info bss_info;
-		ieee80211_ht_cap_ie_to_ht_info(
-				(struct ieee80211_ht_cap *)
-				elems.ht_cap_elem, &sta->ht_info);
-		ieee80211_ht_addt_info_ie_to_ht_bss_info(
-				(struct ieee80211_ht_addt_info *)
-				elems.ht_info_elem, &bss_info);
-		ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info);
-	}
-
-	rate_control_rate_init(sta, local);
-
-	if (elems.wmm_param) {
-		set_sta_flags(sta, WLAN_STA_WME);
-		rcu_read_unlock();
-		ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
-					 elems.wmm_param_len);
-	} else
-		rcu_read_unlock();
-
-	/* set AID and assoc capability,
-	 * ieee80211_set_associated() will tell the driver */
-	bss_conf->aid = aid;
-	bss_conf->assoc_capability = capab_info;
-	ieee80211_set_associated(sdata, ifsta);
-
-	ieee80211_associated(sdata, ifsta);
-}
-
-
-/* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
-{
-	u8 hash_idx;
-
-	if (bss_mesh_cfg(bss))
-		hash_idx = mesh_id_hash(bss_mesh_id(bss),
-					bss_mesh_id_len(bss));
-	else
-		hash_idx = STA_HASH(bss->bssid);
-
-	bss->hnext = local->sta_bss_hash[hash_idx];
-	local->sta_bss_hash[hash_idx] = bss;
-}
-
-
-/* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
-{
-	struct ieee80211_sta_bss *b, *prev = NULL;
-	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
-	while (b) {
-		if (b == bss) {
-			if (!prev)
-				local->sta_bss_hash[STA_HASH(bss->bssid)] =
-					bss->hnext;
-			else
-				prev->hnext = bss->hnext;
-			break;
-		}
-		prev = b;
-		b = b->hnext;
-	}
-}
-
-
-static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct ieee80211_sub_if_data *sdata, u8 *bssid, int freq,
-		     u8 *ssid, u8 ssid_len)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss;
-
-	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
-	if (!bss)
-		return NULL;
-	atomic_inc(&bss->users);
-	atomic_inc(&bss->users);
-	memcpy(bss->bssid, bssid, ETH_ALEN);
-	bss->freq = freq;
-	if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
-		memcpy(bss->ssid, ssid, ssid_len);
-		bss->ssid_len = ssid_len;
-	}
-
-	spin_lock_bh(&local->sta_bss_lock);
-	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-
-static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
-		     u8 *ssid, u8 ssid_len)
-{
-	struct ieee80211_sta_bss *bss;
-
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[STA_HASH(bssid)];
-	while (bss) {
-		if (!bss_mesh_cfg(bss) &&
-		    !memcmp(bss->bssid, bssid, ETH_ALEN) &&
-		    bss->freq == freq &&
-		    bss->ssid_len == ssid_len &&
-		    (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
-			atomic_inc(&bss->users);
-			break;
-		}
-		bss = bss->hnext;
-	}
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-
-#ifdef CONFIG_MAC80211_MESH
-static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
-			  u8 *mesh_cfg, int freq)
-{
-	struct ieee80211_sta_bss *bss;
+		bss = ieee80211_rx_bss_get(local, ifsta->bssid,
+					   local->hw.conf.channel->center_freq,
+					   ifsta->ssid, ifsta->ssid_len);
+		if (bss) {
+			sta->last_signal = bss->signal;
+			sta->last_qual = bss->qual;
+			sta->last_noise = bss->noise;
+			ieee80211_rx_bss_put(local, bss);
+		}
 
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
-	while (bss) {
-		if (bss_mesh_cfg(bss) &&
-		    !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
-		    bss->freq == freq &&
-		    mesh_id_len == bss->mesh_id_len &&
-		    (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
-						 mesh_id_len))) {
-			atomic_inc(&bss->users);
-			break;
+		err = sta_info_insert(sta);
+		if (err) {
+			printk(KERN_DEBUG "%s: failed to insert STA entry for"
+			       " the AP (error %d)\n", sdata->dev->name, err);
+			rcu_read_unlock();
+			return;
 		}
-		bss = bss->hnext;
+		/* update new sta with its last rx activity */
+		sta->last_rx = jiffies;
 	}
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
 
-static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
-			  u8 *mesh_cfg, int mesh_config_len, int freq)
-{
-	struct ieee80211_sta_bss *bss;
+	/*
+	 * FIXME: Do we really need to update the sta_info's information here?
+	 *	  We already know about the AP (we found it in our list) so it
+	 *	  should already be filled with the right info, no?
+	 *	  As is stands, all this is racy because typically we assume
+	 *	  the information that is filled in here (except flags) doesn't
+	 *	  change while a STA structure is alive. As such, it should move
+	 *	  to between the sta_info_alloc() and sta_info_insert() above.
+	 */
 
-	if (mesh_config_len != MESH_CFG_LEN)
-		return NULL;
+	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP |
+			   WLAN_STA_AUTHORIZED);
 
-	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
-	if (!bss)
-		return NULL;
+	rates = 0;
+	basic_rates = 0;
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
-	if (!bss->mesh_cfg) {
-		kfree(bss);
-		return NULL;
-	}
+	for (i = 0; i < elems.supp_rates_len; i++) {
+		int rate = (elems.supp_rates[i] & 0x7f) * 5;
 
-	if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
-		bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
-		if (!bss->mesh_id) {
-			kfree(bss->mesh_cfg);
-			kfree(bss);
-			return NULL;
+		if (rate > 110)
+			have_higher_than_11mbit = true;
+
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].bitrate == rate)
+				rates |= BIT(j);
+			if (elems.supp_rates[i] & 0x80)
+				basic_rates |= BIT(j);
 		}
-		memcpy(bss->mesh_id, mesh_id, mesh_id_len);
 	}
 
-	atomic_inc(&bss->users);
-	atomic_inc(&bss->users);
-	memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
-	bss->mesh_id_len = mesh_id_len;
-	bss->freq = freq;
-	spin_lock_bh(&local->sta_bss_lock);
-	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-#endif
-
-static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
-{
-	kfree(bss->ies);
-	kfree(bss_mesh_id(bss));
-	kfree(bss_mesh_cfg(bss));
-	kfree(bss);
-}
+	for (i = 0; i < elems.ext_supp_rates_len; i++) {
+		int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
 
+		if (rate > 110)
+			have_higher_than_11mbit = true;
 
-static void ieee80211_rx_bss_put(struct ieee80211_local *local,
-				 struct ieee80211_sta_bss *bss)
-{
-	local_bh_disable();
-	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
-		local_bh_enable();
-		return;
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].bitrate == rate)
+				rates |= BIT(j);
+			if (elems.ext_supp_rates[i] & 0x80)
+				basic_rates |= BIT(j);
+		}
 	}
 
-	__ieee80211_rx_bss_hash_del(local, bss);
-	list_del(&bss->list);
-	spin_unlock_bh(&local->sta_bss_lock);
-	ieee80211_rx_bss_free(bss);
-}
+	sta->supp_rates[local->hw.conf.channel->band] = rates;
+	sdata->basic_rates = basic_rates;
+
+	/* cf. IEEE 802.11 9.2.12 */
+	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
+	    have_higher_than_11mbit)
+		sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
+	else
+		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
 
+	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
+	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
+		struct ieee80211_ht_bss_info bss_info;
+		ieee80211_ht_cap_ie_to_ht_info(
+				(struct ieee80211_ht_cap *)
+				elems.ht_cap_elem, &sta->ht_info);
+		ieee80211_ht_addt_info_ie_to_ht_bss_info(
+				(struct ieee80211_ht_addt_info *)
+				elems.ht_info_elem, &bss_info);
+		ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info);
+	}
 
-void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
-{
-	spin_lock_init(&local->sta_bss_lock);
-	INIT_LIST_HEAD(&local->sta_bss_list);
-}
+	rate_control_rate_init(sta, local);
 
+	if (elems.wmm_param) {
+		set_sta_flags(sta, WLAN_STA_WME);
+		rcu_read_unlock();
+		ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
+					 elems.wmm_param_len);
+	} else
+		rcu_read_unlock();
 
-void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
-{
-	struct ieee80211_sta_bss *bss, *tmp;
+	/* set AID and assoc capability,
+	 * ieee80211_set_associated() will tell the driver */
+	bss_conf->aid = aid;
+	bss_conf->assoc_capability = capab_info;
+	ieee80211_set_associated(sdata, ifsta);
 
-	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
-		ieee80211_rx_bss_put(local, bss);
+	ieee80211_associated(sdata, ifsta);
 }
 
 
@@ -3145,104 +3116,15 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 
 void ieee80211_sta_timer(unsigned long data)
 {
-	struct ieee80211_sub_if_data *sdata =
-		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = sdata->local;
-
-	set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
-	queue_work(local->hw.workqueue, &ifsta->work);
-}
-
-void ieee80211_sta_work(struct work_struct *work)
-{
-	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, u.sta.work);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_sta *ifsta;
-	struct sk_buff *skb;
-
-	if (!netif_running(sdata->dev))
-		return;
-
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
-		return;
-
-	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
-		return;
-	ifsta = &sdata->u.sta;
-
-	while ((skb = skb_dequeue(&ifsta->skb_queue)))
-		ieee80211_sta_rx_queued_mgmt(sdata, skb);
-
-#ifdef CONFIG_MAC80211_MESH
-	if (ifsta->preq_queue_len &&
-	    time_after(jiffies,
-		       ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval)))
-		mesh_path_start_discovery(sdata);
-#endif
-
-	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
-	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
-	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
-	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
-		if (ifsta->scan_ssid_len)
-			ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
-		else
-			ieee80211_sta_start_scan(sdata, NULL, 0);
-		return;
-	}
-
-	if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
-		if (ieee80211_sta_config_auth(sdata, ifsta))
-			return;
-		clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
-	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
-		return;
-
-	switch (ifsta->state) {
-	case IEEE80211_STA_MLME_DISABLED:
-		break;
-	case IEEE80211_STA_MLME_DIRECT_PROBE:
-		ieee80211_direct_probe(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_AUTHENTICATE:
-		ieee80211_authenticate(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_ASSOCIATE:
-		ieee80211_associate(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_ASSOCIATED:
-		ieee80211_associated(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_IBSS_SEARCH:
-		ieee80211_sta_find_ibss(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_IBSS_JOINED:
-		ieee80211_sta_merge_ibss(sdata, ifsta);
-		break;
-#ifdef CONFIG_MAC80211_MESH
-	case IEEE80211_STA_MLME_MESH_UP:
-		ieee80211_mesh_housekeeping(sdata, ifsta);
-		break;
-#endif
-	default:
-		WARN_ON(1);
-		break;
-	}
-
-	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
-		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
-		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
+	struct ieee80211_sub_if_data *sdata =
+		(struct ieee80211_sub_if_data *) data;
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_local *local = sdata->local;
 
-		ieee80211_set_disassoc(sdata, ifsta, false, true,
-					WLAN_REASON_UNSPECIFIED);
-	}
+	set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
+	queue_work(local->hw.workqueue, &ifsta->work);
 }
 
-
 static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
@@ -3327,85 +3209,6 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
 	return 0;
 }
 
-static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss, *selected = NULL;
-	int top_rssi = 0, freq;
-
-	spin_lock_bh(&local->sta_bss_lock);
-	freq = local->oper_channel->center_freq;
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
-		if (!(bss->capability & WLAN_CAPABILITY_ESS))
-			continue;
-
-		if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
-			IEEE80211_STA_AUTO_BSSID_SEL |
-			IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
-		    (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
-		     !!sdata->default_key))
-			continue;
-
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
-		    bss->freq != freq)
-			continue;
-
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
-		    memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
-			continue;
-
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
-		    !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
-			continue;
-
-		if (!selected || top_rssi < bss->signal) {
-			selected = bss;
-			top_rssi = bss->signal;
-		}
-	}
-	if (selected)
-		atomic_inc(&selected->users);
-	spin_unlock_bh(&local->sta_bss_lock);
-
-	if (selected) {
-		ieee80211_set_freq(sdata, selected->freq);
-		if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
-			ieee80211_sta_set_ssid(sdata, selected->ssid,
-					       selected->ssid_len);
-		ieee80211_sta_set_bssid(sdata, selected->bssid);
-		ieee80211_sta_def_wmm_params(sdata, selected, 0);
-
-		/* Send out direct probe if no probe resp was received or
-		 * the one we have is outdated
-		 */
-		if (!selected->last_probe_resp ||
-		    time_after(jiffies, selected->last_probe_resp
-					+ IEEE80211_SCAN_RESULT_EXPIRE))
-			ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
-		else
-			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
-
-		ieee80211_rx_bss_put(local, selected);
-		ieee80211_sta_reset_auth(sdata, ifsta);
-		return 0;
-	} else {
-		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
-			ifsta->assoc_scan_tries++;
-			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
-				ieee80211_sta_start_scan(sdata, NULL, 0);
-			else
-				ieee80211_sta_start_scan(sdata, ifsta->ssid,
-							 ifsta->ssid_len);
-			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
-			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
-		} else
-			ifsta->state = IEEE80211_STA_MLME_DISABLED;
-	}
-	return -1;
-}
-
-
 static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
@@ -4273,6 +4076,85 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 }
 
 
+static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
+				     struct ieee80211_if_sta *ifsta)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sta_bss *bss, *selected = NULL;
+	int top_rssi = 0, freq;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	freq = local->oper_channel->center_freq;
+	list_for_each_entry(bss, &local->sta_bss_list, list) {
+		if (!(bss->capability & WLAN_CAPABILITY_ESS))
+			continue;
+
+		if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
+			IEEE80211_STA_AUTO_BSSID_SEL |
+			IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
+		    (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
+		     !!sdata->default_key))
+			continue;
+
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
+		    bss->freq != freq)
+			continue;
+
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
+		    memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
+			continue;
+
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
+		    !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
+			continue;
+
+		if (!selected || top_rssi < bss->signal) {
+			selected = bss;
+			top_rssi = bss->signal;
+		}
+	}
+	if (selected)
+		atomic_inc(&selected->users);
+	spin_unlock_bh(&local->sta_bss_lock);
+
+	if (selected) {
+		ieee80211_set_freq(sdata, selected->freq);
+		if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
+			ieee80211_sta_set_ssid(sdata, selected->ssid,
+					       selected->ssid_len);
+		ieee80211_sta_set_bssid(sdata, selected->bssid);
+		ieee80211_sta_def_wmm_params(sdata, selected, 0);
+
+		/* Send out direct probe if no probe resp was received or
+		 * the one we have is outdated
+		 */
+		if (!selected->last_probe_resp ||
+		    time_after(jiffies, selected->last_probe_resp
+					+ IEEE80211_SCAN_RESULT_EXPIRE))
+			ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
+		else
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+
+		ieee80211_rx_bss_put(local, selected);
+		ieee80211_sta_reset_auth(sdata, ifsta);
+		return 0;
+	} else {
+		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
+			ifsta->assoc_scan_tries++;
+			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
+				ieee80211_sta_start_scan(sdata, NULL, 0);
+			else
+				ieee80211_sta_start_scan(sdata, ifsta->ssid,
+							 ifsta->ssid_len);
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
+		} else
+			ifsta->state = IEEE80211_STA_MLME_DISABLED;
+	}
+	return -1;
+}
+
+
 int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason)
 {
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -4326,3 +4208,91 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 	}
 }
 EXPORT_SYMBOL(ieee80211_notify_mac);
+
+void ieee80211_sta_work(struct work_struct *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data, u.sta.work);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta;
+	struct sk_buff *skb;
+
+	if (!netif_running(sdata->dev))
+		return;
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning)
+		return;
+
+	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+		    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
+		return;
+	ifsta = &sdata->u.sta;
+
+	while ((skb = skb_dequeue(&ifsta->skb_queue)))
+		ieee80211_sta_rx_queued_mgmt(sdata, skb);
+
+#ifdef CONFIG_MAC80211_MESH
+	if (ifsta->preq_queue_len &&
+	    time_after(jiffies,
+		       ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval)))
+		mesh_path_start_discovery(sdata);
+#endif
+
+	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
+	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
+	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
+	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
+		if (ifsta->scan_ssid_len)
+			ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
+		else
+			ieee80211_sta_start_scan(sdata, NULL, 0);
+		return;
+	}
+
+	if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
+		if (ieee80211_sta_config_auth(sdata, ifsta))
+			return;
+		clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
+	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
+		return;
+
+	switch (ifsta->state) {
+	case IEEE80211_STA_MLME_DISABLED:
+		break;
+	case IEEE80211_STA_MLME_DIRECT_PROBE:
+		ieee80211_direct_probe(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_AUTHENTICATE:
+		ieee80211_authenticate(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_ASSOCIATE:
+		ieee80211_associate(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_ASSOCIATED:
+		ieee80211_associated(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_IBSS_SEARCH:
+		ieee80211_sta_find_ibss(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_IBSS_JOINED:
+		ieee80211_sta_merge_ibss(sdata, ifsta);
+		break;
+#ifdef CONFIG_MAC80211_MESH
+	case IEEE80211_STA_MLME_MESH_UP:
+		ieee80211_mesh_housekeeping(sdata, ifsta);
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
+		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
+		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
+
+		ieee80211_set_disassoc(sdata, ifsta, false, true,
+					WLAN_REASON_UNSPECIFIED);
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From 491775a50787b9fbb09b5735be3d111c65935f5c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:23 +0200
Subject: mac80211: use sdata pointer for scan interface

Since we now use sdata pointers most of the time, using a netdev
pointer here is somewhat artificial, use an sdata pointer instead.
Replace a netdev-prefix in a few messages by a wiphy-prefix.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/main.c        |  2 +-
 net/mac80211/mlme.c        | 24 +++++++++++-------------
 3 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c68d4df1119..a33bbd1ca2b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -636,7 +636,7 @@ struct ieee80211_local {
 	enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
 	unsigned long last_scan_completed;
 	struct delayed_work scan_work;
-	struct net_device *scan_dev;
+	struct ieee80211_sub_if_data *scan_sdata;
 	struct ieee80211_channel *oper_channel, *scan_channel;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7dc06319725..6df4a2e1509 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -551,7 +551,7 @@ static int ieee80211_stop(struct net_device *dev)
 		synchronize_rcu();
 		skb_queue_purge(&sdata->u.sta.skb_queue);
 
-		if (local->scan_dev == sdata->dev) {
+		if (local->scan_sdata == sdata) {
 			if (!local->ops->hw_scan) {
 				local->sta_sw_scanning = 0;
 				cancel_delayed_work(&local->scan_work);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 67c38235a3c..f60212b7500 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3507,19 +3507,18 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 void ieee80211_scan_completed(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct net_device *dev = local->scan_dev;
 	struct ieee80211_sub_if_data *sdata;
 	union iwreq_data wrqu;
 
 	local->last_scan_completed = jiffies;
 	memset(&wrqu, 0, sizeof(wrqu));
-	wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
+	wireless_send_event(local->scan_sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
 
 	if (local->sta_hw_scanning) {
 		local->sta_hw_scanning = 0;
 		if (ieee80211_hw_config(local))
 			printk(KERN_DEBUG "%s: failed to restore operational "
-			       "channel after scan\n", dev->name);
+			       "channel after scan\n", wiphy_name(local->hw.wiphy));
 		/* Restart STA timer for HW scan case */
 		rcu_read_lock();
 		list_for_each_entry_rcu(sdata, &local->interfaces, list)
@@ -3532,7 +3531,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	local->sta_sw_scanning = 0;
 	if (ieee80211_hw_config(local))
 		printk(KERN_DEBUG "%s: failed to restore operational "
-		       "channel after scan\n", dev->name);
+		       "channel after scan\n", wiphy_name(local->hw.wiphy));
 
 
 	netif_tx_lock_bh(local->mdev);
@@ -3562,8 +3561,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	}
 	rcu_read_unlock();
 
-done:
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ done:
+	sdata = local->scan_sdata;
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
@@ -3578,8 +3577,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 {
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local, scan_work.work);
-	struct net_device *dev = local->scan_dev;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
 	int skip;
@@ -3627,7 +3625,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 			local->scan_channel = chan;
 			if (ieee80211_hw_config(local)) {
 				printk(KERN_DEBUG "%s: failed to set freq to "
-				       "%d MHz for scan\n", dev->name,
+				       "%d MHz for scan\n", wiphy_name(local->hw.wiphy),
 				       chan->center_freq);
 				skip = 1;
 			}
@@ -3697,7 +3695,7 @@ static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	 */
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_dev == scan_sdata->dev)
+		if (local->scan_sdata == scan_sdata)
 			return 0;
 		return -EBUSY;
 	}
@@ -3707,7 +3705,7 @@ static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 					     ssid, ssid_len);
 		if (!rc) {
 			local->sta_hw_scanning = 1;
-			local->scan_dev = scan_sdata->dev;
+			local->scan_sdata = scan_sdata;
 		}
 		return rc;
 	}
@@ -3734,7 +3732,7 @@ static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	local->scan_state = SCAN_SET_CHANNEL;
 	local->scan_channel_idx = 0;
 	local->scan_band = IEEE80211_BAND_2GHZ;
-	local->scan_dev = scan_sdata->dev;
+	local->scan_sdata = scan_sdata;
 
 	netif_addr_lock_bh(local->mdev);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
@@ -3762,7 +3760,7 @@ int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t
 		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
 
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_dev == sdata->dev)
+		if (local->scan_sdata == sdata)
 			return 0;
 		return -EBUSY;
 	}
-- 
cgit v1.2.3-70-g09d2


From ee96d6ef82cc29421569b7cb7f7c7ee90168ec50 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:24 +0200
Subject: mac80211: remove useless non-NULL tests from scan results code

I'm surprised nobody complained about these before. What a waste.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f60212b7500..ba502ce132d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3822,6 +3822,7 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 			  char *current_ev, char *end_buf)
 {
 	struct iw_event iwe;
+	char *buf;
 
 	if (time_after(jiffies,
 		       bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
@@ -3896,7 +3897,7 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 
 	ieee80211_sta_add_scan_ies(info, bss, &current_ev, end_buf);
 
-	if (bss && bss->supp_rates_len > 0) {
+	if (bss->supp_rates_len > 0) {
 		/* display all supported rates in readable format */
 		char *p = current_ev + iwe_stream_lcp_len(info);
 		int i;
@@ -3915,30 +3916,25 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 		current_ev = p;
 	}
 
-	if (bss) {
-		char *buf;
-		buf = kmalloc(30, GFP_ATOMIC);
-		if (buf) {
-			memset(&iwe, 0, sizeof(iwe));
-			iwe.cmd = IWEVCUSTOM;
-			sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			memset(&iwe, 0, sizeof(iwe));
-			iwe.cmd = IWEVCUSTOM;
-			sprintf(buf, " Last beacon: %dms ago",
-				jiffies_to_msecs(jiffies - bss->last_update));
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf, &iwe, buf);
-			kfree(buf);
-		}
+	buf = kmalloc(30, GFP_ATOMIC);
+	if (buf) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, " Last beacon: %dms ago",
+			jiffies_to_msecs(jiffies - bss->last_update));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev,
+						  end_buf, &iwe, buf);
+		kfree(buf);
 	}
 
 	if (bss_mesh_cfg(bss)) {
-		char *buf;
 		u8 *cfg = bss_mesh_cfg(bss);
 		buf = kmalloc(50, GFP_ATOMIC);
 		if (buf) {
-- 
cgit v1.2.3-70-g09d2


From 0a51b27e956bd9580296c48191b78175ed8b5971 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:25 +0200
Subject: mac80211: start moving scan code from mlme

Here's a first patch to move some code from mlme.c to a
new file called scan.c. The end result will hopefully be
a more manageable mlme.c.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Makefile      |   1 +
 net/mac80211/ieee80211_i.h |  13 +-
 net/mac80211/mlme.c        | 559 ++-------------------------------------------
 net/mac80211/scan.c        | 553 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 582 insertions(+), 544 deletions(-)
 create mode 100644 net/mac80211/scan.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index a169b0201d6..376280a420a 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -7,6 +7,7 @@ mac80211-y := \
 	sta_info.o \
 	wep.o \
 	wpa.o \
+	scan.o \
 	mlme.o \
 	iface.o \
 	rate.o \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a33bbd1ca2b..25dccd5cb2f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -53,6 +53,12 @@ struct ieee80211_local;
  * increased memory use (about 2 kB of RAM per entry). */
 #define IEEE80211_FRAGMENT_MAX 4
 
+/*
+ * Time after which we ignore scan results and no longer report/use
+ * them in any way.
+ */
+#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
+
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
@@ -924,8 +930,13 @@ u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    enum ieee80211_band band);
 void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		int encrypt);
+void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
+			      u8 *ssid, size_t ssid_len);
 void ieee802_11_parse_elems(u8 *start, size_t len,
-				   struct ieee802_11_elems *elems);
+			    struct ieee802_11_elems *elems);
+void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
+int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
+			     u8 *ssid, size_t ssid_len);
 
 #ifdef CONFIG_MAC80211_MESH
 void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ba502ce132d..2caea9759b7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -46,10 +46,6 @@
 #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
 #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
 
-#define IEEE80211_PROBE_DELAY (HZ / 33)
-#define IEEE80211_CHANNEL_TIME (HZ / 33)
-#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
-#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
 #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
 #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
@@ -341,8 +337,8 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, encrypt);
 }
 
-static void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-				     u8 *ssid, size_t ssid_len)
+void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
+			      u8 *ssid, size_t ssid_len)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
@@ -3466,543 +3462,6 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
 }
 
 
-static void ieee80211_send_nullfunc(struct ieee80211_local *local,
-				    struct ieee80211_sub_if_data *sdata,
-				    int powersave)
-{
-	struct sk_buff *skb;
-	struct ieee80211_hdr *nullfunc;
-	__le16 fc;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
-		       "frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
-	memset(nullfunc, 0, 24);
-	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
-			 IEEE80211_FCTL_TODS);
-	if (powersave)
-		fc |= cpu_to_le16(IEEE80211_FCTL_PM);
-	nullfunc->frame_control = fc;
-	memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
-	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
-
-static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
-{
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    ieee80211_vif_is_mesh(&sdata->vif))
-		ieee80211_sta_timer((unsigned long)sdata);
-}
-
-void ieee80211_scan_completed(struct ieee80211_hw *hw)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata;
-	union iwreq_data wrqu;
-
-	local->last_scan_completed = jiffies;
-	memset(&wrqu, 0, sizeof(wrqu));
-	wireless_send_event(local->scan_sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
-
-	if (local->sta_hw_scanning) {
-		local->sta_hw_scanning = 0;
-		if (ieee80211_hw_config(local))
-			printk(KERN_DEBUG "%s: failed to restore operational "
-			       "channel after scan\n", wiphy_name(local->hw.wiphy));
-		/* Restart STA timer for HW scan case */
-		rcu_read_lock();
-		list_for_each_entry_rcu(sdata, &local->interfaces, list)
-			ieee80211_restart_sta_timer(sdata);
-		rcu_read_unlock();
-
-		goto done;
-	}
-
-	local->sta_sw_scanning = 0;
-	if (ieee80211_hw_config(local))
-		printk(KERN_DEBUG "%s: failed to restore operational "
-		       "channel after scan\n", wiphy_name(local->hw.wiphy));
-
-
-	netif_tx_lock_bh(local->mdev);
-	netif_addr_lock(local->mdev);
-	local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC;
-	local->ops->configure_filter(local_to_hw(local),
-				     FIF_BCN_PRBRESP_PROMISC,
-				     &local->filter_flags,
-				     local->mdev->mc_count,
-				     local->mdev->mc_list);
-
-	netif_addr_unlock(local->mdev);
-	netif_tx_unlock_bh(local->mdev);
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		/* Tell AP we're back */
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
-			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
-				ieee80211_send_nullfunc(local, sdata, 0);
-				netif_tx_wake_all_queues(sdata->dev);
-			}
-		} else
-			netif_tx_wake_all_queues(sdata->dev);
-
-		ieee80211_restart_sta_timer(sdata);
-	}
-	rcu_read_unlock();
-
- done:
-	sdata = local->scan_sdata;
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
-		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
-		    !ieee80211_sta_active_ibss(sdata)))
-			ieee80211_sta_find_ibss(sdata, ifsta);
-	}
-}
-EXPORT_SYMBOL(ieee80211_scan_completed);
-
-void ieee80211_sta_scan_work(struct work_struct *work)
-{
-	struct ieee80211_local *local =
-		container_of(work, struct ieee80211_local, scan_work.work);
-	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *chan;
-	int skip;
-	unsigned long next_delay = 0;
-
-	if (!local->sta_sw_scanning)
-		return;
-
-	switch (local->scan_state) {
-	case SCAN_SET_CHANNEL:
-		/*
-		 * Get current scan band. scan_band may be IEEE80211_NUM_BANDS
-		 * after we successfully scanned the last channel of the last
-		 * band (and the last band is supported by the hw)
-		 */
-		if (local->scan_band < IEEE80211_NUM_BANDS)
-			sband = local->hw.wiphy->bands[local->scan_band];
-		else
-			sband = NULL;
-
-		/*
-		 * If we are at an unsupported band and have more bands
-		 * left to scan, advance to the next supported one.
-		 */
-		while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) {
-			local->scan_band++;
-			sband = local->hw.wiphy->bands[local->scan_band];
-			local->scan_channel_idx = 0;
-		}
-
-		/* if no more bands/channels left, complete scan */
-		if (!sband || local->scan_channel_idx >= sband->n_channels) {
-			ieee80211_scan_completed(local_to_hw(local));
-			return;
-		}
-		skip = 0;
-		chan = &sband->channels[local->scan_channel_idx];
-
-		if (chan->flags & IEEE80211_CHAN_DISABLED ||
-		    (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
-		     chan->flags & IEEE80211_CHAN_NO_IBSS))
-			skip = 1;
-
-		if (!skip) {
-			local->scan_channel = chan;
-			if (ieee80211_hw_config(local)) {
-				printk(KERN_DEBUG "%s: failed to set freq to "
-				       "%d MHz for scan\n", wiphy_name(local->hw.wiphy),
-				       chan->center_freq);
-				skip = 1;
-			}
-		}
-
-		/* advance state machine to next channel/band */
-		local->scan_channel_idx++;
-		if (local->scan_channel_idx >= sband->n_channels) {
-			/*
-			 * scan_band may end up == IEEE80211_NUM_BANDS, but
-			 * we'll catch that case above and complete the scan
-			 * if that is the case.
-			 */
-			local->scan_band++;
-			local->scan_channel_idx = 0;
-		}
-
-		if (skip)
-			break;
-
-		next_delay = IEEE80211_PROBE_DELAY +
-			     usecs_to_jiffies(local->hw.channel_change_time);
-		local->scan_state = SCAN_SEND_PROBE;
-		break;
-	case SCAN_SEND_PROBE:
-		next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
-		local->scan_state = SCAN_SET_CHANNEL;
-
-		if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN)
-			break;
-		ieee80211_send_probe_req(sdata, NULL, local->scan_ssid,
-					 local->scan_ssid_len);
-		next_delay = IEEE80211_CHANNEL_TIME;
-		break;
-	}
-
-	if (local->sta_sw_scanning)
-		queue_delayed_work(local->hw.workqueue, &local->scan_work,
-				   next_delay);
-}
-
-
-static int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
-				    u8 *ssid, size_t ssid_len)
-{
-	struct ieee80211_local *local = scan_sdata->local;
-	struct ieee80211_sub_if_data *sdata;
-
-	if (ssid_len > IEEE80211_MAX_SSID_LEN)
-		return -EINVAL;
-
-	/* MLME-SCAN.request (page 118)  page 144 (11.1.3.1)
-	 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
-	 * BSSID: MACAddress
-	 * SSID
-	 * ScanType: ACTIVE, PASSIVE
-	 * ProbeDelay: delay (in microseconds) to be used prior to transmitting
-	 *    a Probe frame during active scanning
-	 * ChannelList
-	 * MinChannelTime (>= ProbeDelay), in TU
-	 * MaxChannelTime: (>= MinChannelTime), in TU
-	 */
-
-	 /* MLME-SCAN.confirm
-	  * BSSDescriptionSet
-	  * ResultCode: SUCCESS, INVALID_PARAMETERS
-	 */
-
-	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_sdata == scan_sdata)
-			return 0;
-		return -EBUSY;
-	}
-
-	if (local->ops->hw_scan) {
-		int rc = local->ops->hw_scan(local_to_hw(local),
-					     ssid, ssid_len);
-		if (!rc) {
-			local->sta_hw_scanning = 1;
-			local->scan_sdata = scan_sdata;
-		}
-		return rc;
-	}
-
-	local->sta_sw_scanning = 1;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
-			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
-				netif_tx_stop_all_queues(sdata->dev);
-				ieee80211_send_nullfunc(local, sdata, 1);
-			}
-		} else
-			netif_tx_stop_all_queues(sdata->dev);
-	}
-	rcu_read_unlock();
-
-	if (ssid) {
-		local->scan_ssid_len = ssid_len;
-		memcpy(local->scan_ssid, ssid, ssid_len);
-	} else
-		local->scan_ssid_len = 0;
-	local->scan_state = SCAN_SET_CHANNEL;
-	local->scan_channel_idx = 0;
-	local->scan_band = IEEE80211_BAND_2GHZ;
-	local->scan_sdata = scan_sdata;
-
-	netif_addr_lock_bh(local->mdev);
-	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
-	local->ops->configure_filter(local_to_hw(local),
-				     FIF_BCN_PRBRESP_PROMISC,
-				     &local->filter_flags,
-				     local->mdev->mc_count,
-				     local->mdev->mc_list);
-	netif_addr_unlock_bh(local->mdev);
-
-	/* TODO: start scan as soon as all nullfunc frames are ACKed */
-	queue_delayed_work(local->hw.workqueue, &local->scan_work,
-			   IEEE80211_CHANNEL_TIME);
-
-	return 0;
-}
-
-
-int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len)
-{
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = sdata->local;
-
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
-		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
-
-	if (local->sta_sw_scanning || local->sta_hw_scanning) {
-		if (local->scan_sdata == sdata)
-			return 0;
-		return -EBUSY;
-	}
-
-	ifsta->scan_ssid_len = ssid_len;
-	if (ssid_len)
-		memcpy(ifsta->scan_ssid, ssid, ssid_len);
-	set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
-	queue_work(local->hw.workqueue, &ifsta->work);
-	return 0;
-}
-
-
-static void ieee80211_sta_add_scan_ies(struct iw_request_info *info,
-				       struct ieee80211_sta_bss *bss,
-				       char **current_ev, char *end_buf)
-{
-	u8 *pos, *end, *next;
-	struct iw_event iwe;
-
-	if (bss == NULL || bss->ies == NULL)
-		return;
-
-	/*
-	 * If needed, fragment the IEs buffer (at IE boundaries) into short
-	 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
-	 */
-	pos = bss->ies;
-	end = pos + bss->ies_len;
-
-	while (end - pos > IW_GENERIC_IE_MAX) {
-		next = pos + 2 + pos[1];
-		while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
-			next = next + 2 + next[1];
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = next - pos;
-		*current_ev = iwe_stream_add_point(info, *current_ev,
-						   end_buf, &iwe, pos);
-
-		pos = next;
-	}
-
-	if (end > pos) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVGENIE;
-		iwe.u.data.length = end - pos;
-		*current_ev = iwe_stream_add_point(info, *current_ev,
-						   end_buf, &iwe, pos);
-	}
-}
-
-
-static char *
-ieee80211_sta_scan_result(struct ieee80211_local *local,
-			  struct iw_request_info *info,
-			  struct ieee80211_sta_bss *bss,
-			  char *current_ev, char *end_buf)
-{
-	struct iw_event iwe;
-	char *buf;
-
-	if (time_after(jiffies,
-		       bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
-		return current_ev;
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWAP;
-	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
-	memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_ADDR_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWESSID;
-	if (bss_mesh_cfg(bss)) {
-		iwe.u.data.length = bss_mesh_id_len(bss);
-		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss_mesh_id(bss));
-	} else {
-		iwe.u.data.length = bss->ssid_len;
-		iwe.u.data.flags = 1;
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, bss->ssid);
-	}
-
-	if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
-	    || bss_mesh_cfg(bss)) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = SIOCGIWMODE;
-		if (bss_mesh_cfg(bss))
-			iwe.u.mode = IW_MODE_MESH;
-		else if (bss->capability & WLAN_CAPABILITY_ESS)
-			iwe.u.mode = IW_MODE_MASTER;
-		else
-			iwe.u.mode = IW_MODE_ADHOC;
-		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-						  &iwe, IW_EV_UINT_LEN);
-	}
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWFREQ;
-	iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
-	iwe.u.freq.e = 0;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_FREQ_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWFREQ;
-	iwe.u.freq.m = bss->freq;
-	iwe.u.freq.e = 6;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_FREQ_LEN);
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = IWEVQUAL;
-	iwe.u.qual.qual = bss->qual;
-	iwe.u.qual.level = bss->signal;
-	iwe.u.qual.noise = bss->noise;
-	iwe.u.qual.updated = local->wstats_flags;
-	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
-					  IW_EV_QUAL_LEN);
-
-	memset(&iwe, 0, sizeof(iwe));
-	iwe.cmd = SIOCGIWENCODE;
-	if (bss->capability & WLAN_CAPABILITY_PRIVACY)
-		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
-	else
-		iwe.u.data.flags = IW_ENCODE_DISABLED;
-	iwe.u.data.length = 0;
-	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-					  &iwe, "");
-
-	ieee80211_sta_add_scan_ies(info, bss, &current_ev, end_buf);
-
-	if (bss->supp_rates_len > 0) {
-		/* display all supported rates in readable format */
-		char *p = current_ev + iwe_stream_lcp_len(info);
-		int i;
-
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = SIOCGIWRATE;
-		/* Those two flags are ignored... */
-		iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
-
-		for (i = 0; i < bss->supp_rates_len; i++) {
-			iwe.u.bitrate.value = ((bss->supp_rates[i] &
-							0x7f) * 500000);
-			p = iwe_stream_add_value(info, current_ev, p,
-					end_buf, &iwe, IW_EV_PARAM_LEN);
-		}
-		current_ev = p;
-	}
-
-	buf = kmalloc(30, GFP_ATOMIC);
-	if (buf) {
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-						  &iwe, buf);
-		memset(&iwe, 0, sizeof(iwe));
-		iwe.cmd = IWEVCUSTOM;
-		sprintf(buf, " Last beacon: %dms ago",
-			jiffies_to_msecs(jiffies - bss->last_update));
-		iwe.u.data.length = strlen(buf);
-		current_ev = iwe_stream_add_point(info, current_ev,
-						  end_buf, &iwe, buf);
-		kfree(buf);
-	}
-
-	if (bss_mesh_cfg(bss)) {
-		u8 *cfg = bss_mesh_cfg(bss);
-		buf = kmalloc(50, GFP_ATOMIC);
-		if (buf) {
-			memset(&iwe, 0, sizeof(iwe));
-			iwe.cmd = IWEVCUSTOM;
-			sprintf(buf, "Mesh network (version %d)", cfg[0]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Path Selection Protocol ID: "
-				"0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
-							cfg[4]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Path Selection Metric ID: "
-				"0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
-							cfg[8]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Congestion Control Mode ID: "
-				"0x%02X%02X%02X%02X", cfg[9], cfg[10],
-							cfg[11], cfg[12]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			sprintf(buf, "Channel Precedence: "
-				"0x%02X%02X%02X%02X", cfg[13], cfg[14],
-							cfg[15], cfg[16]);
-			iwe.u.data.length = strlen(buf);
-			current_ev = iwe_stream_add_point(info, current_ev,
-							  end_buf,
-							  &iwe, buf);
-			kfree(buf);
-		}
-	}
-
-	return current_ev;
-}
-
-
-int ieee80211_sta_scan_results(struct ieee80211_local *local,
-			       struct iw_request_info *info,
-			       char *buf, size_t len)
-{
-	char *current_ev = buf;
-	char *end_buf = buf + len;
-	struct ieee80211_sta_bss *bss;
-
-	spin_lock_bh(&local->sta_bss_lock);
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
-		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
-			spin_unlock_bh(&local->sta_bss_lock);
-			return -E2BIG;
-		}
-		current_ev = ieee80211_sta_scan_result(local, info, bss,
-						       current_ev, end_buf);
-	}
-	spin_unlock_bh(&local->sta_bss_lock);
-	return current_ev - buf;
-}
-
-
 int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
 {
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -4290,3 +3749,17 @@ void ieee80211_sta_work(struct work_struct *work)
 					WLAN_REASON_UNSPECIFIED);
 	}
 }
+
+void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
+	struct ieee80211_if_sta *ifsta;
+
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+		ifsta = &sdata->u.sta;
+		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
+		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
+		    !ieee80211_sta_active_ibss(sdata)))
+			ieee80211_sta_find_ibss(sdata, ifsta);
+	}
+}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
new file mode 100644
index 00000000000..68fa782acd7
--- /dev/null
+++ b/net/mac80211/scan.c
@@ -0,0 +1,553 @@
+/*
+ * BSS client mode implementation
+ * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/wireless.h>
+#include <linux/if_arp.h>
+#include <net/mac80211.h>
+#include <net/iw_handler.h>
+
+#include "ieee80211_i.h"
+
+#define IEEE80211_PROBE_DELAY (HZ / 33)
+#define IEEE80211_CHANNEL_TIME (HZ / 33)
+#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
+
+
+static void ieee80211_send_nullfunc(struct ieee80211_local *local,
+				    struct ieee80211_sub_if_data *sdata,
+				    int powersave)
+{
+	struct sk_buff *skb;
+	struct ieee80211_hdr *nullfunc;
+	__le16 fc;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
+	memset(nullfunc, 0, 24);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
+			 IEEE80211_FCTL_TODS);
+	if (powersave)
+		fc |= cpu_to_le16(IEEE80211_FCTL_PM);
+	nullfunc->frame_control = fc;
+	memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
+	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
+{
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    ieee80211_vif_is_mesh(&sdata->vif))
+		ieee80211_sta_timer((unsigned long)sdata);
+}
+
+void ieee80211_scan_completed(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_sub_if_data *sdata;
+	union iwreq_data wrqu;
+
+	local->last_scan_completed = jiffies;
+	memset(&wrqu, 0, sizeof(wrqu));
+	wireless_send_event(local->scan_sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
+
+	if (local->sta_hw_scanning) {
+		local->sta_hw_scanning = 0;
+		if (ieee80211_hw_config(local))
+			printk(KERN_DEBUG "%s: failed to restore operational "
+			       "channel after scan\n", wiphy_name(local->hw.wiphy));
+		/* Restart STA timer for HW scan case */
+		rcu_read_lock();
+		list_for_each_entry_rcu(sdata, &local->interfaces, list)
+			ieee80211_restart_sta_timer(sdata);
+		rcu_read_unlock();
+
+		goto done;
+	}
+
+	local->sta_sw_scanning = 0;
+	if (ieee80211_hw_config(local))
+		printk(KERN_DEBUG "%s: failed to restore operational "
+		       "channel after scan\n", wiphy_name(local->hw.wiphy));
+
+
+	netif_tx_lock_bh(local->mdev);
+	netif_addr_lock(local->mdev);
+	local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC;
+	local->ops->configure_filter(local_to_hw(local),
+				     FIF_BCN_PRBRESP_PROMISC,
+				     &local->filter_flags,
+				     local->mdev->mc_count,
+				     local->mdev->mc_list);
+
+	netif_addr_unlock(local->mdev);
+	netif_tx_unlock_bh(local->mdev);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		/* Tell AP we're back */
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
+				ieee80211_send_nullfunc(local, sdata, 0);
+				netif_tx_wake_all_queues(sdata->dev);
+			}
+		} else
+			netif_tx_wake_all_queues(sdata->dev);
+
+		ieee80211_restart_sta_timer(sdata);
+	}
+	rcu_read_unlock();
+
+ done:
+	ieee80211_mlme_notify_scan_completed(local);
+}
+EXPORT_SYMBOL(ieee80211_scan_completed);
+
+
+void ieee80211_sta_scan_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, scan_work.work);
+	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_channel *chan;
+	int skip;
+	unsigned long next_delay = 0;
+
+	if (!local->sta_sw_scanning)
+		return;
+
+	switch (local->scan_state) {
+	case SCAN_SET_CHANNEL:
+		/*
+		 * Get current scan band. scan_band may be IEEE80211_NUM_BANDS
+		 * after we successfully scanned the last channel of the last
+		 * band (and the last band is supported by the hw)
+		 */
+		if (local->scan_band < IEEE80211_NUM_BANDS)
+			sband = local->hw.wiphy->bands[local->scan_band];
+		else
+			sband = NULL;
+
+		/*
+		 * If we are at an unsupported band and have more bands
+		 * left to scan, advance to the next supported one.
+		 */
+		while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) {
+			local->scan_band++;
+			sband = local->hw.wiphy->bands[local->scan_band];
+			local->scan_channel_idx = 0;
+		}
+
+		/* if no more bands/channels left, complete scan */
+		if (!sband || local->scan_channel_idx >= sband->n_channels) {
+			ieee80211_scan_completed(local_to_hw(local));
+			return;
+		}
+		skip = 0;
+		chan = &sband->channels[local->scan_channel_idx];
+
+		if (chan->flags & IEEE80211_CHAN_DISABLED ||
+		    (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+		     chan->flags & IEEE80211_CHAN_NO_IBSS))
+			skip = 1;
+
+		if (!skip) {
+			local->scan_channel = chan;
+			if (ieee80211_hw_config(local)) {
+				printk(KERN_DEBUG "%s: failed to set freq to "
+				       "%d MHz for scan\n", wiphy_name(local->hw.wiphy),
+				       chan->center_freq);
+				skip = 1;
+			}
+		}
+
+		/* advance state machine to next channel/band */
+		local->scan_channel_idx++;
+		if (local->scan_channel_idx >= sband->n_channels) {
+			/*
+			 * scan_band may end up == IEEE80211_NUM_BANDS, but
+			 * we'll catch that case above and complete the scan
+			 * if that is the case.
+			 */
+			local->scan_band++;
+			local->scan_channel_idx = 0;
+		}
+
+		if (skip)
+			break;
+
+		next_delay = IEEE80211_PROBE_DELAY +
+			     usecs_to_jiffies(local->hw.channel_change_time);
+		local->scan_state = SCAN_SEND_PROBE;
+		break;
+	case SCAN_SEND_PROBE:
+		next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+		local->scan_state = SCAN_SET_CHANNEL;
+
+		if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN)
+			break;
+		ieee80211_send_probe_req(sdata, NULL, local->scan_ssid,
+					 local->scan_ssid_len);
+		next_delay = IEEE80211_CHANNEL_TIME;
+		break;
+	}
+
+	if (local->sta_sw_scanning)
+		queue_delayed_work(local->hw.workqueue, &local->scan_work,
+				   next_delay);
+}
+
+
+int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
+			     u8 *ssid, size_t ssid_len)
+{
+	struct ieee80211_local *local = scan_sdata->local;
+	struct ieee80211_sub_if_data *sdata;
+
+	if (ssid_len > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+
+	/* MLME-SCAN.request (page 118)  page 144 (11.1.3.1)
+	 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
+	 * BSSID: MACAddress
+	 * SSID
+	 * ScanType: ACTIVE, PASSIVE
+	 * ProbeDelay: delay (in microseconds) to be used prior to transmitting
+	 *    a Probe frame during active scanning
+	 * ChannelList
+	 * MinChannelTime (>= ProbeDelay), in TU
+	 * MaxChannelTime: (>= MinChannelTime), in TU
+	 */
+
+	 /* MLME-SCAN.confirm
+	  * BSSDescriptionSet
+	  * ResultCode: SUCCESS, INVALID_PARAMETERS
+	 */
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning) {
+		if (local->scan_sdata == scan_sdata)
+			return 0;
+		return -EBUSY;
+	}
+
+	if (local->ops->hw_scan) {
+		int rc = local->ops->hw_scan(local_to_hw(local),
+					     ssid, ssid_len);
+		if (!rc) {
+			local->sta_hw_scanning = 1;
+			local->scan_sdata = scan_sdata;
+		}
+		return rc;
+	}
+
+	local->sta_sw_scanning = 1;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
+				netif_tx_stop_all_queues(sdata->dev);
+				ieee80211_send_nullfunc(local, sdata, 1);
+			}
+		} else
+			netif_tx_stop_all_queues(sdata->dev);
+	}
+	rcu_read_unlock();
+
+	if (ssid) {
+		local->scan_ssid_len = ssid_len;
+		memcpy(local->scan_ssid, ssid, ssid_len);
+	} else
+		local->scan_ssid_len = 0;
+	local->scan_state = SCAN_SET_CHANNEL;
+	local->scan_channel_idx = 0;
+	local->scan_band = IEEE80211_BAND_2GHZ;
+	local->scan_sdata = scan_sdata;
+
+	netif_addr_lock_bh(local->mdev);
+	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
+	local->ops->configure_filter(local_to_hw(local),
+				     FIF_BCN_PRBRESP_PROMISC,
+				     &local->filter_flags,
+				     local->mdev->mc_count,
+				     local->mdev->mc_list);
+	netif_addr_unlock_bh(local->mdev);
+
+	/* TODO: start scan as soon as all nullfunc frames are ACKed */
+	queue_delayed_work(local->hw.workqueue, &local->scan_work,
+			   IEEE80211_CHANNEL_TIME);
+
+	return 0;
+}
+
+
+int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len)
+{
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_local *local = sdata->local;
+
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning) {
+		if (local->scan_sdata == sdata)
+			return 0;
+		return -EBUSY;
+	}
+
+	ifsta->scan_ssid_len = ssid_len;
+	if (ssid_len)
+		memcpy(ifsta->scan_ssid, ssid, ssid_len);
+	set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
+	queue_work(local->hw.workqueue, &ifsta->work);
+	return 0;
+}
+
+
+static void ieee80211_sta_add_scan_ies(struct iw_request_info *info,
+				       struct ieee80211_sta_bss *bss,
+				       char **current_ev, char *end_buf)
+{
+	u8 *pos, *end, *next;
+	struct iw_event iwe;
+
+	if (bss == NULL || bss->ies == NULL)
+		return;
+
+	/*
+	 * If needed, fragment the IEs buffer (at IE boundaries) into short
+	 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
+	 */
+	pos = bss->ies;
+	end = pos + bss->ies_len;
+
+	while (end - pos > IW_GENERIC_IE_MAX) {
+		next = pos + 2 + pos[1];
+		while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
+			next = next + 2 + next[1];
+
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = next - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+
+		pos = next;
+	}
+
+	if (end > pos) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVGENIE;
+		iwe.u.data.length = end - pos;
+		*current_ev = iwe_stream_add_point(info, *current_ev,
+						   end_buf, &iwe, pos);
+	}
+}
+
+
+static char *
+ieee80211_sta_scan_result(struct ieee80211_local *local,
+			  struct iw_request_info *info,
+			  struct ieee80211_sta_bss *bss,
+			  char *current_ev, char *end_buf)
+{
+	struct iw_event iwe;
+	char *buf;
+
+	if (time_after(jiffies,
+		       bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
+		return current_ev;
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWAP;
+	iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
+	memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_ADDR_LEN);
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWESSID;
+	if (bss_mesh_cfg(bss)) {
+		iwe.u.data.length = bss_mesh_id_len(bss);
+		iwe.u.data.flags = 1;
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss_mesh_id(bss));
+	} else {
+		iwe.u.data.length = bss->ssid_len;
+		iwe.u.data.flags = 1;
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, bss->ssid);
+	}
+
+	if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
+	    || bss_mesh_cfg(bss)) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = SIOCGIWMODE;
+		if (bss_mesh_cfg(bss))
+			iwe.u.mode = IW_MODE_MESH;
+		else if (bss->capability & WLAN_CAPABILITY_ESS)
+			iwe.u.mode = IW_MODE_MASTER;
+		else
+			iwe.u.mode = IW_MODE_ADHOC;
+		current_ev = iwe_stream_add_event(info, current_ev, end_buf,
+						  &iwe, IW_EV_UINT_LEN);
+	}
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWFREQ;
+	iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
+	iwe.u.freq.e = 0;
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_FREQ_LEN);
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWFREQ;
+	iwe.u.freq.m = bss->freq;
+	iwe.u.freq.e = 6;
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_FREQ_LEN);
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = IWEVQUAL;
+	iwe.u.qual.qual = bss->qual;
+	iwe.u.qual.level = bss->signal;
+	iwe.u.qual.noise = bss->noise;
+	iwe.u.qual.updated = local->wstats_flags;
+	current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
+					  IW_EV_QUAL_LEN);
+
+	memset(&iwe, 0, sizeof(iwe));
+	iwe.cmd = SIOCGIWENCODE;
+	if (bss->capability & WLAN_CAPABILITY_PRIVACY)
+		iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
+	else
+		iwe.u.data.flags = IW_ENCODE_DISABLED;
+	iwe.u.data.length = 0;
+	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+					  &iwe, "");
+
+	ieee80211_sta_add_scan_ies(info, bss, &current_ev, end_buf);
+
+	if (bss->supp_rates_len > 0) {
+		/* display all supported rates in readable format */
+		char *p = current_ev + iwe_stream_lcp_len(info);
+		int i;
+
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = SIOCGIWRATE;
+		/* Those two flags are ignored... */
+		iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
+
+		for (i = 0; i < bss->supp_rates_len; i++) {
+			iwe.u.bitrate.value = ((bss->supp_rates[i] &
+							0x7f) * 500000);
+			p = iwe_stream_add_value(info, current_ev, p,
+					end_buf, &iwe, IW_EV_PARAM_LEN);
+		}
+		current_ev = p;
+	}
+
+	buf = kmalloc(30, GFP_ATOMIC);
+	if (buf) {
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev, end_buf,
+						  &iwe, buf);
+		memset(&iwe, 0, sizeof(iwe));
+		iwe.cmd = IWEVCUSTOM;
+		sprintf(buf, " Last beacon: %dms ago",
+			jiffies_to_msecs(jiffies - bss->last_update));
+		iwe.u.data.length = strlen(buf);
+		current_ev = iwe_stream_add_point(info, current_ev,
+						  end_buf, &iwe, buf);
+		kfree(buf);
+	}
+
+	if (bss_mesh_cfg(bss)) {
+		u8 *cfg = bss_mesh_cfg(bss);
+		buf = kmalloc(50, GFP_ATOMIC);
+		if (buf) {
+			memset(&iwe, 0, sizeof(iwe));
+			iwe.cmd = IWEVCUSTOM;
+			sprintf(buf, "Mesh network (version %d)", cfg[0]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Path Selection Protocol ID: "
+				"0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
+							cfg[4]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Path Selection Metric ID: "
+				"0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
+							cfg[8]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Congestion Control Mode ID: "
+				"0x%02X%02X%02X%02X", cfg[9], cfg[10],
+							cfg[11], cfg[12]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			sprintf(buf, "Channel Precedence: "
+				"0x%02X%02X%02X%02X", cfg[13], cfg[14],
+							cfg[15], cfg[16]);
+			iwe.u.data.length = strlen(buf);
+			current_ev = iwe_stream_add_point(info, current_ev,
+							  end_buf,
+							  &iwe, buf);
+			kfree(buf);
+		}
+	}
+
+	return current_ev;
+}
+
+
+int ieee80211_sta_scan_results(struct ieee80211_local *local,
+			       struct iw_request_info *info,
+			       char *buf, size_t len)
+{
+	char *current_ev = buf;
+	char *end_buf = buf + len;
+	struct ieee80211_sta_bss *bss;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	list_for_each_entry(bss, &local->sta_bss_list, list) {
+		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
+			spin_unlock_bh(&local->sta_bss_lock);
+			return -E2BIG;
+		}
+		current_ev = ieee80211_sta_scan_result(local, info, bss,
+						       current_ev, end_buf);
+	}
+	spin_unlock_bh(&local->sta_bss_lock);
+	return current_ev - buf;
+}
-- 
cgit v1.2.3-70-g09d2


From 98c8fccfaea838e62ffde2f2e44568844e0e5472 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:26 +0200
Subject: mac80211: refactor and move scan RX code

This patch refactors some code and moves the scan RX function
to scan.c. More importantly, however, it changes it so that the
MLME's beacon/probe_resp functions aren't invoked when scanning
so that we can remove a "if (scanning)" conditions from two
places.

There's a very slight behavioural change in this patch: now,
when scanning, IBSS and mesh aren't updated even on the same
channel.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   9 ++
 net/mac80211/mlme.c        | 253 +++++++++++++++++++++------------------------
 net/mac80211/scan.c        |  68 ++++++++++++
 3 files changed, 194 insertions(+), 136 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 25dccd5cb2f..4753ed3f3f1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -937,6 +937,15 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
 int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 			     u8 *ssid, size_t ssid_len);
+struct ieee80211_sta_bss *
+ieee80211_bss_info_update(struct ieee80211_local *local,
+			  struct ieee80211_rx_status *rx_status,
+			  struct ieee80211_mgmt *mgmt,
+			  size_t len,
+			  struct ieee802_11_elems *elems,
+			  int freq, bool beacon);
+void ieee80211_rx_bss_put(struct ieee80211_local *local,
+			  struct ieee80211_sta_bss *bss);
 
 #ifdef CONFIG_MAC80211_MESH
 void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2caea9759b7..1708a3d1cd3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -128,10 +128,9 @@ static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
 }
 
 static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct ieee80211_sub_if_data *sdata, u8 *bssid, int freq,
+ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_sta_bss *bss;
 
 	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
@@ -230,8 +229,8 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 	kfree(bss);
 }
 
-static void ieee80211_rx_bss_put(struct ieee80211_local *local,
-				 struct ieee80211_sta_bss *bss)
+void ieee80211_rx_bss_put(struct ieee80211_local *local,
+			  struct ieee80211_sta_bss *bss)
 {
 	local_bh_disable();
 	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
@@ -2443,74 +2442,16 @@ static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local,
 	return mandatory_rates;
 }
 
-static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
-				  struct ieee80211_mgmt *mgmt,
-				  size_t len,
-				  struct ieee80211_rx_status *rx_status,
-				  struct ieee802_11_elems *elems)
+struct ieee80211_sta_bss *
+ieee80211_bss_info_update(struct ieee80211_local *local,
+			  struct ieee80211_rx_status *rx_status,
+			  struct ieee80211_mgmt *mgmt,
+			  size_t len,
+			  struct ieee802_11_elems *elems,
+			  int freq, bool beacon)
 {
-	struct ieee80211_local *local = sdata->local;
-	int freq, clen;
 	struct ieee80211_sta_bss *bss;
-	struct sta_info *sta;
-	struct ieee80211_channel *channel;
-	u64 beacon_timestamp, rx_timestamp;
-	u64 supp_rates = 0;
-	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
-	enum ieee80211_band band = rx_status->band;
-	DECLARE_MAC_BUF(mac);
-	DECLARE_MAC_BUF(mac2);
-
-	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
-	else
-		freq = rx_status->freq;
-
-	channel = ieee80211_get_channel(local->hw.wiphy, freq);
-
-	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
-		return;
-
-	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
-	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
-		supp_rates = ieee80211_sta_get_rates(local, elems, band);
-
-		mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
-				      mesh_peer_accepts_plinks(elems));
-	}
-
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
-	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
-		supp_rates = ieee80211_sta_get_rates(local, elems, band);
-
-		rcu_read_lock();
-
-		sta = sta_info_get(local, mgmt->sa);
-		if (sta) {
-			u64 prev_rates;
-
-			prev_rates = sta->supp_rates[band];
-			/* make sure mandatory rates are always added */
-			sta->supp_rates[band] = supp_rates |
-				ieee80211_sta_get_mandatory_rates(local, band);
-
-#ifdef CONFIG_MAC80211_IBSS_DEBUG
-			if (sta->supp_rates[band] != prev_rates)
-				printk(KERN_DEBUG "%s: updated supp_rates set "
-				    "for %s based on beacon info (0x%llx | "
-				    "0x%llx -> 0x%llx)\n",
-				    sdata->dev->name, print_mac(mac, sta->addr),
-				    (unsigned long long) prev_rates,
-				    (unsigned long long) supp_rates,
-				    (unsigned long long) sta->supp_rates[band]);
-#endif
-		} else {
-			ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid,
-					       mgmt->sa, supp_rates);
-		}
-
-		rcu_read_unlock();
-	}
+	int clen;
 
 #ifdef CONFIG_MAC80211_MESH
 	if (elems->mesh_config)
@@ -2528,10 +2469,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				elems->mesh_config_len, freq);
 		else
 #endif
-			bss = ieee80211_rx_bss_add(sdata, mgmt->bssid, freq,
+			bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq,
 						  elems->ssid, elems->ssid_len);
 		if (!bss)
-			return;
+			return NULL;
 	} else {
 #if 0
 		/* TODO: order by RSSI? */
@@ -2578,17 +2519,114 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		bss->supp_rates_len += clen;
 	}
 
-	bss->band = band;
+	bss->band = rx_status->band;
 
-	beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
-
-	bss->timestamp = beacon_timestamp;
+	bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
 	bss->last_update = jiffies;
 	bss->signal = rx_status->signal;
 	bss->noise = rx_status->noise;
 	bss->qual = rx_status->qual;
+	bss->wmm_used = elems->wmm_param || elems->wmm_info;
+
 	if (!beacon)
 		bss->last_probe_resp = jiffies;
+
+	/*
+	 * For probe responses, or if we don't have any information yet,
+	 * use the IEs from the beacon.
+	 */
+	if (!bss->ies || !beacon) {
+		if (bss->ies == NULL || bss->ies_len < elems->total_len) {
+			kfree(bss->ies);
+			bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
+		}
+		if (bss->ies) {
+			memcpy(bss->ies, elems->ie_start, elems->total_len);
+			bss->ies_len = elems->total_len;
+		} else
+			bss->ies_len = 0;
+	}
+
+	return bss;
+}
+
+static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_mgmt *mgmt,
+				  size_t len,
+				  struct ieee80211_rx_status *rx_status,
+				  struct ieee802_11_elems *elems,
+				  bool beacon)
+{
+	struct ieee80211_local *local = sdata->local;
+	int freq;
+	struct ieee80211_sta_bss *bss;
+	struct sta_info *sta;
+	struct ieee80211_channel *channel;
+	u64 beacon_timestamp, rx_timestamp;
+	u64 supp_rates = 0;
+	enum ieee80211_band band = rx_status->band;
+	DECLARE_MAC_BUF(mac);
+	DECLARE_MAC_BUF(mac2);
+
+	if (elems->ds_params && elems->ds_params_len == 1)
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+	else
+		freq = rx_status->freq;
+
+	channel = ieee80211_get_channel(local->hw.wiphy, freq);
+
+	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+		return;
+
+	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
+	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
+
+		mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
+				      mesh_peer_accepts_plinks(elems));
+	}
+
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
+	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
+
+		rcu_read_lock();
+
+		sta = sta_info_get(local, mgmt->sa);
+		if (sta) {
+			u64 prev_rates;
+
+			prev_rates = sta->supp_rates[band];
+			/* make sure mandatory rates are always added */
+			sta->supp_rates[band] = supp_rates |
+				ieee80211_sta_get_mandatory_rates(local, band);
+
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+			if (sta->supp_rates[band] != prev_rates)
+				printk(KERN_DEBUG "%s: updated supp_rates set "
+				    "for %s based on beacon info (0x%llx | "
+				    "0x%llx -> 0x%llx)\n",
+				    sdata->dev->name, print_mac(mac, sta->addr),
+				    (unsigned long long) prev_rates,
+				    (unsigned long long) supp_rates,
+				    (unsigned long long) sta->supp_rates[band]);
+#endif
+		} else {
+			ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid,
+					       mgmt->sa, supp_rates);
+		}
+
+		rcu_read_unlock();
+	}
+
+	bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
+					freq, beacon);
+	if (!bss)
+		return;
+
+	/* was just updated in ieee80211_bss_info_update */
+	beacon_timestamp = bss->timestamp;
+
 	/*
 	 * In STA mode, the remaining parameters should not be overridden
 	 * by beacons because they're not necessarily accurate there.
@@ -2599,21 +2637,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	if (bss->ies == NULL || bss->ies_len < elems->total_len) {
-		kfree(bss->ies);
-		bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
-	}
-	if (bss->ies) {
-		memcpy(bss->ies, elems->ie_start, elems->total_len);
-		bss->ies_len = elems->total_len;
-	} else
-		bss->ies_len = 0;
-
-	bss->wmm_used = elems->wmm_param || elems->wmm_info;
-
 	/* check if we need to merge IBSS */
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
-	    !local->sta_sw_scanning && !local->sta_hw_scanning &&
 	    bss->capability & WLAN_CAPABILITY_IBSS &&
 	    bss->freq == local->oper_channel->center_freq &&
 	    elems->ssid_len == sdata->u.sta.ssid_len &&
@@ -2690,7 +2715,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
 				&elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
 
 	/* direct probe may be part of the association flow */
 	if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE,
@@ -2721,7 +2746,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
-	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return;
@@ -2731,15 +2756,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
-	/* Do not send changes to driver if we are scanning. This removes
-	 * requirement that a driver's bss_info_changed/conf_tx functions
-	 * need to be atomic.
-	 * This is really ugly code, we should rewrite scanning and make
-	 * all this more understandable for humans.
-	 */
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
-		return;
-
 	ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
 				 elems.wmm_param_len);
 
@@ -2982,41 +2998,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 }
 
 
-ieee80211_rx_result
-ieee80211_sta_rx_scan(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		      struct ieee80211_rx_status *rx_status)
-{
-	struct ieee80211_mgmt *mgmt;
-	__le16 fc;
-
-	if (skb->len < 2)
-		return RX_DROP_UNUSABLE;
-
-	mgmt = (struct ieee80211_mgmt *) skb->data;
-	fc = mgmt->frame_control;
-
-	if (ieee80211_is_ctl(fc))
-		return RX_CONTINUE;
-
-	if (skb->len < 24)
-		return RX_DROP_MONITOR;
-
-	if (ieee80211_is_probe_resp(fc)) {
-		ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status);
-		dev_kfree_skb(skb);
-		return RX_QUEUED;
-	}
-
-	if (ieee80211_is_beacon(fc)) {
-		ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status);
-		dev_kfree_skb(skb);
-		return RX_QUEUED;
-	}
-
-	return RX_CONTINUE;
-}
-
-
 static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -3233,7 +3214,7 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n",
 	       sdata->dev->name, print_mac(mac, bssid));
 
-	bss = ieee80211_rx_bss_add(sdata, bssid,
+	bss = ieee80211_rx_bss_add(local, bssid,
 				   local->hw.conf.channel->center_freq,
 				   sdata->u.sta.ssid, sdata->u.sta.ssid_len);
 	if (!bss)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 68fa782acd7..2848ba3a08e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -23,6 +23,74 @@
 #define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
 
 
+ieee80211_rx_result
+ieee80211_sta_rx_scan(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		      struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_sta_bss *bss;
+	u8 *elements;
+	struct ieee80211_channel *channel;
+	size_t baselen;
+	int freq;
+	__le16 fc;
+	bool presp, beacon = false;
+	struct ieee802_11_elems elems;
+
+	if (skb->len < 2)
+		return RX_DROP_UNUSABLE;
+
+	mgmt = (struct ieee80211_mgmt *) skb->data;
+	fc = mgmt->frame_control;
+
+	if (ieee80211_is_ctl(fc))
+		return RX_CONTINUE;
+
+	if (skb->len < 24)
+		return RX_DROP_MONITOR;
+
+	presp = ieee80211_is_probe_resp(fc);
+	if (presp) {
+		/* ignore ProbeResp to foreign address */
+		if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
+			return RX_DROP_MONITOR;
+
+		presp = true;
+		elements = mgmt->u.probe_resp.variable;
+		baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
+	} else {
+		beacon = ieee80211_is_beacon(fc);
+		baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
+		elements = mgmt->u.beacon.variable;
+	}
+
+	if (!presp && !beacon)
+		return RX_CONTINUE;
+
+	if (baselen > skb->len)
+		return RX_DROP_MONITOR;
+
+	ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
+
+	if (elems.ds_params && elems.ds_params_len == 1)
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+	else
+		freq = rx_status->freq;
+
+	channel = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
+
+	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+		return RX_DROP_MONITOR;
+
+	bss = ieee80211_bss_info_update(sdata->local, rx_status,
+					mgmt, skb->len, &elems,
+					freq, beacon);
+	ieee80211_rx_bss_put(sdata->local, bss);
+
+	dev_kfree_skb(skb);
+	return RX_QUEUED;
+}
+
 static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 				    struct ieee80211_sub_if_data *sdata,
 				    int powersave)
-- 
cgit v1.2.3-70-g09d2


From 5484e23749e78d5a4f56928efaf3c4b0d862b7a6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:27 +0200
Subject: mac80211: move BSS handling to scan code

This moves all the BSS list handling out of mlme.c to scan.c,
no further changes except fixing kzalloc/atomic_inc/atomic_inc
to kzalloc/atomic_set(2).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   6 +
 net/mac80211/mlme.c        | 310 +--------------------------------------------
 net/mac80211/scan.c        | 305 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 313 insertions(+), 308 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4753ed3f3f1..792c09ca08e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -944,6 +944,12 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  size_t len,
 			  struct ieee802_11_elems *elems,
 			  int freq, bool beacon);
+struct ieee80211_sta_bss *
+ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
+		     u8 *ssid, u8 ssid_len);
+struct ieee80211_sta_bss *
+ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
+		     u8 *ssid, u8 ssid_len);
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_sta_bss *bss);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1708a3d1cd3..be3292bfabe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -11,11 +11,6 @@
  * published by the Free Software Foundation.
  */
 
-/* TODO:
- * order BSS list by RSSI(?) ("quality of AP")
- * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
- *    SSID)
- */
 #include <linux/delay.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
@@ -67,195 +62,10 @@
 #define IEEE80211_MIN_AMPDU_BUF 0x8
 #define IEEE80211_MAX_AMPDU_BUF 0x40
 
-/* BSS handling */
-static struct ieee80211_sta_bss *
-ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
-		     u8 *ssid, u8 ssid_len)
-{
-	struct ieee80211_sta_bss *bss;
-
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[STA_HASH(bssid)];
-	while (bss) {
-		if (!bss_mesh_cfg(bss) &&
-		    !memcmp(bss->bssid, bssid, ETH_ALEN) &&
-		    bss->freq == freq &&
-		    bss->ssid_len == ssid_len &&
-		    (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
-			atomic_inc(&bss->users);
-			break;
-		}
-		bss = bss->hnext;
-	}
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-
-/* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
-{
-	u8 hash_idx;
-
-	if (bss_mesh_cfg(bss))
-		hash_idx = mesh_id_hash(bss_mesh_id(bss),
-					bss_mesh_id_len(bss));
-	else
-		hash_idx = STA_HASH(bss->bssid);
-
-	bss->hnext = local->sta_bss_hash[hash_idx];
-	local->sta_bss_hash[hash_idx] = bss;
-}
-
-/* Caller must hold local->sta_bss_lock */
-static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
-{
-	struct ieee80211_sta_bss *b, *prev = NULL;
-	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
-	while (b) {
-		if (b == bss) {
-			if (!prev)
-				local->sta_bss_hash[STA_HASH(bss->bssid)] =
-					bss->hnext;
-			else
-				prev->hnext = bss->hnext;
-			break;
-		}
-		prev = b;
-		b = b->hnext;
-	}
-}
-
-static struct ieee80211_sta_bss *
-ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
-		     u8 *ssid, u8 ssid_len)
-{
-	struct ieee80211_sta_bss *bss;
-
-	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
-	if (!bss)
-		return NULL;
-	atomic_inc(&bss->users);
-	atomic_inc(&bss->users);
-	memcpy(bss->bssid, bssid, ETH_ALEN);
-	bss->freq = freq;
-	if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
-		memcpy(bss->ssid, ssid, ssid_len);
-		bss->ssid_len = ssid_len;
-	}
-
-	spin_lock_bh(&local->sta_bss_lock);
-	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-
-#ifdef CONFIG_MAC80211_MESH
-static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
-			  u8 *mesh_cfg, int freq)
-{
-	struct ieee80211_sta_bss *bss;
-
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
-	while (bss) {
-		if (bss_mesh_cfg(bss) &&
-		    !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
-		    bss->freq == freq &&
-		    mesh_id_len == bss->mesh_id_len &&
-		    (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
-						 mesh_id_len))) {
-			atomic_inc(&bss->users);
-			break;
-		}
-		bss = bss->hnext;
-	}
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-
-static struct ieee80211_sta_bss *
-ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
-			  u8 *mesh_cfg, int mesh_config_len, int freq)
-{
-	struct ieee80211_sta_bss *bss;
-
-	if (mesh_config_len != MESH_CFG_LEN)
-		return NULL;
-
-	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
-	if (!bss)
-		return NULL;
-
-	bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
-	if (!bss->mesh_cfg) {
-		kfree(bss);
-		return NULL;
-	}
-
-	if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
-		bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
-		if (!bss->mesh_id) {
-			kfree(bss->mesh_cfg);
-			kfree(bss);
-			return NULL;
-		}
-		memcpy(bss->mesh_id, mesh_id, mesh_id_len);
-	}
-
-	atomic_inc(&bss->users);
-	atomic_inc(&bss->users);
-	memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
-	bss->mesh_id_len = mesh_id_len;
-	bss->freq = freq;
-	spin_lock_bh(&local->sta_bss_lock);
-	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
-	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
-	return bss;
-}
-#endif
-
-static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
-{
-	kfree(bss->ies);
-	kfree(bss_mesh_id(bss));
-	kfree(bss_mesh_cfg(bss));
-	kfree(bss);
-}
-
-void ieee80211_rx_bss_put(struct ieee80211_local *local,
-			  struct ieee80211_sta_bss *bss)
-{
-	local_bh_disable();
-	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
-		local_bh_enable();
-		return;
-	}
-
-	__ieee80211_rx_bss_hash_del(local, bss);
-	list_del(&bss->list);
-	spin_unlock_bh(&local->sta_bss_lock);
-	ieee80211_rx_bss_free(bss);
-}
-
-void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
-{
-	spin_lock_init(&local->sta_bss_lock);
-	INIT_LIST_HEAD(&local->sta_bss_list);
-}
-
-void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
+/* utils */
+static int ecw2cw(int ecw)
 {
-	struct ieee80211_sta_bss *bss, *tmp;
-
-	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
-		ieee80211_rx_bss_put(local, bss);
+	return (1 << ecw) - 1;
 }
 
 static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
@@ -278,12 +88,6 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 	return NULL;
 }
 
-/* utils */
-static int ecw2cw(int ecw)
-{
-	return (1 << ecw) - 1;
-}
-
 /* frame sending functions */
 void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt)
@@ -2442,114 +2246,6 @@ static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local,
 	return mandatory_rates;
 }
 
-struct ieee80211_sta_bss *
-ieee80211_bss_info_update(struct ieee80211_local *local,
-			  struct ieee80211_rx_status *rx_status,
-			  struct ieee80211_mgmt *mgmt,
-			  size_t len,
-			  struct ieee802_11_elems *elems,
-			  int freq, bool beacon)
-{
-	struct ieee80211_sta_bss *bss;
-	int clen;
-
-#ifdef CONFIG_MAC80211_MESH
-	if (elems->mesh_config)
-		bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id,
-				elems->mesh_id_len, elems->mesh_config, freq);
-	else
-#endif
-		bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq,
-					   elems->ssid, elems->ssid_len);
-	if (!bss) {
-#ifdef CONFIG_MAC80211_MESH
-		if (elems->mesh_config)
-			bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id,
-				elems->mesh_id_len, elems->mesh_config,
-				elems->mesh_config_len, freq);
-		else
-#endif
-			bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq,
-						  elems->ssid, elems->ssid_len);
-		if (!bss)
-			return NULL;
-	} else {
-#if 0
-		/* TODO: order by RSSI? */
-		spin_lock_bh(&local->sta_bss_lock);
-		list_move_tail(&bss->list, &local->sta_bss_list);
-		spin_unlock_bh(&local->sta_bss_lock);
-#endif
-	}
-
-	/* save the ERP value so that it is available at association time */
-	if (elems->erp_info && elems->erp_info_len >= 1) {
-		bss->erp_value = elems->erp_info[0];
-		bss->has_erp_value = 1;
-	}
-
-	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
-	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
-
-	if (elems->tim) {
-		struct ieee80211_tim_ie *tim_ie =
-			(struct ieee80211_tim_ie *)elems->tim;
-		bss->dtim_period = tim_ie->dtim_period;
-	}
-
-	/* set default value for buggy APs */
-	if (!elems->tim || bss->dtim_period == 0)
-		bss->dtim_period = 1;
-
-	bss->supp_rates_len = 0;
-	if (elems->supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-		if (clen > elems->supp_rates_len)
-			clen = elems->supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates,
-		       clen);
-		bss->supp_rates_len += clen;
-	}
-	if (elems->ext_supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-		if (clen > elems->ext_supp_rates_len)
-			clen = elems->ext_supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len],
-		       elems->ext_supp_rates, clen);
-		bss->supp_rates_len += clen;
-	}
-
-	bss->band = rx_status->band;
-
-	bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
-	bss->last_update = jiffies;
-	bss->signal = rx_status->signal;
-	bss->noise = rx_status->noise;
-	bss->qual = rx_status->qual;
-	bss->wmm_used = elems->wmm_param || elems->wmm_info;
-
-	if (!beacon)
-		bss->last_probe_resp = jiffies;
-
-	/*
-	 * For probe responses, or if we don't have any information yet,
-	 * use the IEs from the beacon.
-	 */
-	if (!bss->ies || !beacon) {
-		if (bss->ies == NULL || bss->ies_len < elems->total_len) {
-			kfree(bss->ies);
-			bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
-		}
-		if (bss->ies) {
-			memcpy(bss->ies, elems->ie_start, elems->total_len);
-			bss->ies_len = elems->total_len;
-		} else
-			bss->ies_len = 0;
-	}
-
-	return bss;
-}
-
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2848ba3a08e..1beefb5ad6f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1,5 +1,6 @@
 /*
- * BSS client mode implementation
+ * Scanning implementation
+ *
  * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright 2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
@@ -11,17 +12,319 @@
  * published by the Free Software Foundation.
  */
 
+/* TODO:
+ * order BSS list by RSSI(?) ("quality of AP")
+ * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
+ *    SSID)
+ */
+
 #include <linux/wireless.h>
 #include <linux/if_arp.h>
 #include <net/mac80211.h>
 #include <net/iw_handler.h>
 
 #include "ieee80211_i.h"
+#include "mesh.h"
 
 #define IEEE80211_PROBE_DELAY (HZ / 33)
 #define IEEE80211_CHANNEL_TIME (HZ / 33)
 #define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
 
+void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
+{
+	spin_lock_init(&local->sta_bss_lock);
+	INIT_LIST_HEAD(&local->sta_bss_list);
+}
+
+void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
+{
+	struct ieee80211_sta_bss *bss, *tmp;
+
+	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
+		ieee80211_rx_bss_put(local, bss);
+}
+
+struct ieee80211_sta_bss *
+ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
+		     u8 *ssid, u8 ssid_len)
+{
+	struct ieee80211_sta_bss *bss;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	bss = local->sta_bss_hash[STA_HASH(bssid)];
+	while (bss) {
+		if (!bss_mesh_cfg(bss) &&
+		    !memcmp(bss->bssid, bssid, ETH_ALEN) &&
+		    bss->freq == freq &&
+		    bss->ssid_len == ssid_len &&
+		    (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
+			atomic_inc(&bss->users);
+			break;
+		}
+		bss = bss->hnext;
+	}
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
+					struct ieee80211_sta_bss *bss)
+{
+	u8 hash_idx;
+
+	if (bss_mesh_cfg(bss))
+		hash_idx = mesh_id_hash(bss_mesh_id(bss),
+					bss_mesh_id_len(bss));
+	else
+		hash_idx = STA_HASH(bss->bssid);
+
+	bss->hnext = local->sta_bss_hash[hash_idx];
+	local->sta_bss_hash[hash_idx] = bss;
+}
+
+/* Caller must hold local->sta_bss_lock */
+static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
+					struct ieee80211_sta_bss *bss)
+{
+	struct ieee80211_sta_bss *b, *prev = NULL;
+	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
+	while (b) {
+		if (b == bss) {
+			if (!prev)
+				local->sta_bss_hash[STA_HASH(bss->bssid)] =
+					bss->hnext;
+			else
+				prev->hnext = bss->hnext;
+			break;
+		}
+		prev = b;
+		b = b->hnext;
+	}
+}
+
+struct ieee80211_sta_bss *
+ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
+		     u8 *ssid, u8 ssid_len)
+{
+	struct ieee80211_sta_bss *bss;
+
+	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
+	if (!bss)
+		return NULL;
+	atomic_set(&bss->users, 2);
+	memcpy(bss->bssid, bssid, ETH_ALEN);
+	bss->freq = freq;
+	if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
+		memcpy(bss->ssid, ssid, ssid_len);
+		bss->ssid_len = ssid_len;
+	}
+
+	spin_lock_bh(&local->sta_bss_lock);
+	/* TODO: order by RSSI? */
+	list_add_tail(&bss->list, &local->sta_bss_list);
+	__ieee80211_rx_bss_hash_add(local, bss);
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+#ifdef CONFIG_MAC80211_MESH
+static struct ieee80211_sta_bss *
+ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
+			  u8 *mesh_cfg, int freq)
+{
+	struct ieee80211_sta_bss *bss;
+
+	spin_lock_bh(&local->sta_bss_lock);
+	bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
+	while (bss) {
+		if (bss_mesh_cfg(bss) &&
+		    !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
+		    bss->freq == freq &&
+		    mesh_id_len == bss->mesh_id_len &&
+		    (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
+						 mesh_id_len))) {
+			atomic_inc(&bss->users);
+			break;
+		}
+		bss = bss->hnext;
+	}
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+
+static struct ieee80211_sta_bss *
+ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
+			  u8 *mesh_cfg, int mesh_config_len, int freq)
+{
+	struct ieee80211_sta_bss *bss;
+
+	if (mesh_config_len != MESH_CFG_LEN)
+		return NULL;
+
+	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
+	if (!bss)
+		return NULL;
+
+	bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
+	if (!bss->mesh_cfg) {
+		kfree(bss);
+		return NULL;
+	}
+
+	if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
+		bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
+		if (!bss->mesh_id) {
+			kfree(bss->mesh_cfg);
+			kfree(bss);
+			return NULL;
+		}
+		memcpy(bss->mesh_id, mesh_id, mesh_id_len);
+	}
+
+	atomic_set(&bss->users, 2);
+	memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
+	bss->mesh_id_len = mesh_id_len;
+	bss->freq = freq;
+	spin_lock_bh(&local->sta_bss_lock);
+	/* TODO: order by RSSI? */
+	list_add_tail(&bss->list, &local->sta_bss_list);
+	__ieee80211_rx_bss_hash_add(local, bss);
+	spin_unlock_bh(&local->sta_bss_lock);
+	return bss;
+}
+#endif
+
+static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
+{
+	kfree(bss->ies);
+	kfree(bss_mesh_id(bss));
+	kfree(bss_mesh_cfg(bss));
+	kfree(bss);
+}
+
+void ieee80211_rx_bss_put(struct ieee80211_local *local,
+			  struct ieee80211_sta_bss *bss)
+{
+	local_bh_disable();
+	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
+		local_bh_enable();
+		return;
+	}
+
+	__ieee80211_rx_bss_hash_del(local, bss);
+	list_del(&bss->list);
+	spin_unlock_bh(&local->sta_bss_lock);
+	ieee80211_rx_bss_free(bss);
+}
+
+struct ieee80211_sta_bss *
+ieee80211_bss_info_update(struct ieee80211_local *local,
+			  struct ieee80211_rx_status *rx_status,
+			  struct ieee80211_mgmt *mgmt,
+			  size_t len,
+			  struct ieee802_11_elems *elems,
+			  int freq, bool beacon)
+{
+	struct ieee80211_sta_bss *bss;
+	int clen;
+
+#ifdef CONFIG_MAC80211_MESH
+	if (elems->mesh_config)
+		bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id,
+				elems->mesh_id_len, elems->mesh_config, freq);
+	else
+#endif
+		bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq,
+					   elems->ssid, elems->ssid_len);
+	if (!bss) {
+#ifdef CONFIG_MAC80211_MESH
+		if (elems->mesh_config)
+			bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id,
+				elems->mesh_id_len, elems->mesh_config,
+				elems->mesh_config_len, freq);
+		else
+#endif
+			bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq,
+						  elems->ssid, elems->ssid_len);
+		if (!bss)
+			return NULL;
+	} else {
+#if 0
+		/* TODO: order by RSSI? */
+		spin_lock_bh(&local->sta_bss_lock);
+		list_move_tail(&bss->list, &local->sta_bss_list);
+		spin_unlock_bh(&local->sta_bss_lock);
+#endif
+	}
+
+	/* save the ERP value so that it is available at association time */
+	if (elems->erp_info && elems->erp_info_len >= 1) {
+		bss->erp_value = elems->erp_info[0];
+		bss->has_erp_value = 1;
+	}
+
+	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
+	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
+
+	if (elems->tim) {
+		struct ieee80211_tim_ie *tim_ie =
+			(struct ieee80211_tim_ie *)elems->tim;
+		bss->dtim_period = tim_ie->dtim_period;
+	}
+
+	/* set default value for buggy APs */
+	if (!elems->tim || bss->dtim_period == 0)
+		bss->dtim_period = 1;
+
+	bss->supp_rates_len = 0;
+	if (elems->supp_rates) {
+		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		if (clen > elems->supp_rates_len)
+			clen = elems->supp_rates_len;
+		memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates,
+		       clen);
+		bss->supp_rates_len += clen;
+	}
+	if (elems->ext_supp_rates) {
+		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		if (clen > elems->ext_supp_rates_len)
+			clen = elems->ext_supp_rates_len;
+		memcpy(&bss->supp_rates[bss->supp_rates_len],
+		       elems->ext_supp_rates, clen);
+		bss->supp_rates_len += clen;
+	}
+
+	bss->band = rx_status->band;
+
+	bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
+	bss->last_update = jiffies;
+	bss->signal = rx_status->signal;
+	bss->noise = rx_status->noise;
+	bss->qual = rx_status->qual;
+	bss->wmm_used = elems->wmm_param || elems->wmm_info;
+
+	if (!beacon)
+		bss->last_probe_resp = jiffies;
+
+	/*
+	 * For probe responses, or if we don't have any information yet,
+	 * use the IEs from the beacon.
+	 */
+	if (!bss->ies || !beacon) {
+		if (bss->ies == NULL || bss->ies_len < elems->total_len) {
+			kfree(bss->ies);
+			bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
+		}
+		if (bss->ies) {
+			memcpy(bss->ies, elems->ie_start, elems->total_len);
+			bss->ies_len = elems->total_len;
+		} else
+			bss->ies_len = 0;
+	}
+
+	return bss;
+}
 
 ieee80211_rx_result
 ieee80211_sta_rx_scan(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-- 
cgit v1.2.3-70-g09d2


From 44d414dbff9d5bf46fc09f2e68567b5848cbbfd3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:28 +0200
Subject: mac80211: move some HT code out of mlme.c

Some of the HT code in mlme.c is misplaced:
 * constants/definitions belong to the ieee80211.h header
 * code being used in other modes as well shouldn't be there

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  18 +++
 net/mac80211/Makefile     |   1 +
 net/mac80211/ht.c         | 328 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/mlme.c       | 321 ---------------------------------------------
 4 files changed, 347 insertions(+), 321 deletions(-)
 create mode 100644 net/mac80211/ht.c

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 333d3ae7688..abc1abc63bf 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -643,6 +643,9 @@ struct ieee80211_mgmt {
 	} u;
 } __attribute__ ((packed));
 
+/* mgmt header + 1 byte category code */
+#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
+
 
 /* Control frames */
 struct ieee80211_rts {
@@ -737,6 +740,21 @@ struct ieee80211_ht_addt_info {
 #define IEEE80211_HT_IE_NON_GF_STA_PRSNT	0x0004
 #define IEEE80211_HT_IE_NON_HT_STA_PRSNT	0x0010
 
+/* block-ack parameters */
+#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
+#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
+#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
+#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
+#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
+
+/*
+ * A-PMDU buffer sizes
+ * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
+ */
+#define IEEE80211_MIN_AMPDU_BUF 0x8
+#define IEEE80211_MAX_AMPDU_BUF 0x40
+
+
 /* Spatial Multiplexing Power Save Modes */
 #define WLAN_HT_CAP_SM_PS_STATIC	0
 #define WLAN_HT_CAP_SM_PS_DYNAMIC	1
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 376280a420a..1a7ac50910c 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -8,6 +8,7 @@ mac80211-y := \
 	wep.o \
 	wpa.o \
 	scan.o \
+	ht.o \
 	mlme.o \
 	iface.o \
 	rate.o \
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
new file mode 100644
index 00000000000..5ccf1bc1746
--- /dev/null
+++ b/net/mac80211/ht.c
@@ -0,0 +1,328 @@
+/*
+ * HT handling
+ *
+ * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2007-2008, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ieee80211.h>
+#include <net/wireless.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+
+int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
+				   struct ieee80211_ht_info *ht_info)
+{
+
+	if (ht_info == NULL)
+		return -EINVAL;
+
+	memset(ht_info, 0, sizeof(*ht_info));
+
+	if (ht_cap_ie) {
+		u8 ampdu_info = ht_cap_ie->ampdu_params_info;
+
+		ht_info->ht_supported = 1;
+		ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
+		ht_info->ampdu_factor =
+			ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
+		ht_info->ampdu_density =
+			(ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
+		memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
+	} else
+		ht_info->ht_supported = 0;
+
+	return 0;
+}
+
+int ieee80211_ht_addt_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_addt_info *ht_add_info_ie,
+			struct ieee80211_ht_bss_info *bss_info)
+{
+	if (bss_info == NULL)
+		return -EINVAL;
+
+	memset(bss_info, 0, sizeof(*bss_info));
+
+	if (ht_add_info_ie) {
+		u16 op_mode;
+		op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
+
+		bss_info->primary_channel = ht_add_info_ie->control_chan;
+		bss_info->bss_cap = ht_add_info_ie->ht_param;
+		bss_info->bss_op_mode = (u8)(op_mode & 0xff);
+	}
+
+	return 0;
+}
+
+void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
+				u16 tid, u8 dialog_token, u16 start_seq_num,
+				u16 agg_size, u16 timeout)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 capab;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer "
+				"for addba request frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
+
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
+
+	mgmt->u.action.u.addba_req.dialog_token = dialog_token;
+	capab = (u16)(1 << 1);		/* bit 1 aggregation policy */
+	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
+	capab |= (u16)(agg_size << 6);	/* bit 15:6 max size of aggergation */
+
+	mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
+
+	mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.u.addba_req.start_seq_num =
+					cpu_to_le16(start_seq_num << 4);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
+			  u16 initiator, u16 reason_code)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 params;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer "
+					"for delba frame\n", sdata->dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
+
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
+	params = (u16)(initiator << 11); 	/* bit 11 initiator */
+	params |= (u16)(tid << 12); 		/* bit 15:12 TID number */
+
+	mgmt->u.action.u.delba.params = cpu_to_le16(params);
+	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_bar *bar;
+	u16 bar_control = 0;
+
+	skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer for "
+			"bar frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
+	memset(bar, 0, sizeof(*bar));
+	bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					 IEEE80211_STYPE_BACK_REQ);
+	memcpy(bar->ra, ra, ETH_ALEN);
+	memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
+	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
+	bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
+	bar_control |= (u16)(tid << 12);
+	bar->control = cpu_to_le16(bar_control);
+	bar->start_seq_num = cpu_to_le16(ssn);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
+					u16 initiator, u16 reason)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_hw *hw = &local->hw;
+	struct sta_info *sta;
+	int ret, i;
+	DECLARE_MAC_BUF(mac);
+
+	rcu_read_lock();
+
+	sta = sta_info_get(local, ra);
+	if (!sta) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* check if TID is in operational state */
+	spin_lock_bh(&sta->lock);
+	if (sta->ampdu_mlme.tid_state_rx[tid]
+				!= HT_AGG_STATE_OPERATIONAL) {
+		spin_unlock_bh(&sta->lock);
+		rcu_read_unlock();
+		return;
+	}
+	sta->ampdu_mlme.tid_state_rx[tid] =
+		HT_AGG_STATE_REQ_STOP_BA_MSK |
+		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
+	spin_unlock_bh(&sta->lock);
+
+	/* stop HW Rx aggregation. ampdu_action existence
+	 * already verified in session init so we add the BUG_ON */
+	BUG_ON(!local->ops->ampdu_action);
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n",
+				print_mac(mac, ra), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
+					ra, tid, NULL);
+	if (ret)
+		printk(KERN_DEBUG "HW problem - can not stop rx "
+				"aggregation for tid %d\n", tid);
+
+	/* shutdown timer has not expired */
+	if (initiator != WLAN_BACK_TIMER)
+		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
+
+	/* check if this is a self generated aggregation halt */
+	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
+		ieee80211_send_delba(sdata, ra, tid, 0, reason);
+
+	/* free the reordering buffer */
+	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
+		if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
+			/* release the reordered frames */
+			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
+			sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
+			sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
+		}
+	}
+	/* free resources */
+	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
+	kfree(sta->ampdu_mlme.tid_rx[tid]);
+	sta->ampdu_mlme.tid_rx[tid] = NULL;
+	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
+
+	rcu_read_unlock();
+}
+
+
+/*
+ * After sending add Block Ack request we activated a timer until
+ * add Block Ack response will arrive from the recipient.
+ * If this timer expires sta_addba_resp_timer_expired will be executed.
+ */
+void sta_addba_resp_timer_expired(unsigned long data)
+{
+	/* not an elegant detour, but there is no choice as the timer passes
+	 * only one argument, and both sta_info and TID are needed, so init
+	 * flow in sta_info_create gives the TID as data, while the timer_to_id
+	 * array gives the sta through container_of */
+	u16 tid = *(u8 *)data;
+	struct sta_info *temp_sta = container_of((void *)data,
+		struct sta_info, timer_to_tid[tid]);
+
+	struct ieee80211_local *local = temp_sta->local;
+	struct ieee80211_hw *hw = &local->hw;
+	struct sta_info *sta;
+	u8 *state;
+
+	rcu_read_lock();
+
+	sta = sta_info_get(local, temp_sta->addr);
+	if (!sta) {
+		rcu_read_unlock();
+		return;
+	}
+
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	/* check if the TID waits for addBA response */
+	spin_lock_bh(&sta->lock);
+	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
+		spin_unlock_bh(&sta->lock);
+		*state = HT_AGG_STATE_IDLE;
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "timer expired on tid %d but we are not "
+				"expecting addBA response there", tid);
+#endif
+		goto timer_expired_exit;
+	}
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
+#endif
+
+	/* go through the state check in stop_BA_session */
+	*state = HT_AGG_STATE_OPERATIONAL;
+	spin_unlock_bh(&sta->lock);
+	ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid,
+				     WLAN_BACK_INITIATOR);
+
+timer_expired_exit:
+	rcu_read_unlock();
+}
+
+void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
+{
+	struct ieee80211_local *local = sdata->local;
+	int i;
+
+	for (i = 0; i <  STA_TID_NUM; i++) {
+		ieee80211_stop_tx_ba_session(&local->hw, addr, i,
+					     WLAN_BACK_INITIATOR);
+		ieee80211_sta_stop_rx_ba_session(sdata, addr, i,
+						 WLAN_BACK_RECIPIENT,
+						 WLAN_REASON_QSTA_LEAVE_QBSS);
+	}
+}
+
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index be3292bfabe..f43ca7b88d5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -48,20 +48,6 @@
 #define IEEE80211_IBSS_MAX_STA_ENTRIES 128
 
 
-/* mgmt header + 1 byte category code */
-#define IEEE80211_MIN_ACTION_SIZE (24 + 1)
-
-#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
-#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
-#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
-#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
-#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
-
-/* next values represent the buffer size for A-MPDU frame.
- * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */
-#define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
-
 /* utils */
 static int ecw2cw(int ecw)
 {
@@ -389,52 +375,6 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 	return changed;
 }
 
-int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
-				   struct ieee80211_ht_info *ht_info)
-{
-
-	if (ht_info == NULL)
-		return -EINVAL;
-
-	memset(ht_info, 0, sizeof(*ht_info));
-
-	if (ht_cap_ie) {
-		u8 ampdu_info = ht_cap_ie->ampdu_params_info;
-
-		ht_info->ht_supported = 1;
-		ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
-		ht_info->ampdu_factor =
-			ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
-		ht_info->ampdu_density =
-			(ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
-		memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
-	} else
-		ht_info->ht_supported = 0;
-
-	return 0;
-}
-
-int ieee80211_ht_addt_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_addt_info *ht_add_info_ie,
-			struct ieee80211_ht_bss_info *bss_info)
-{
-	if (bss_info == NULL)
-		return -EINVAL;
-
-	memset(bss_info, 0, sizeof(*bss_info));
-
-	if (ht_add_info_ie) {
-		u16 op_mode;
-		op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
-
-		bss_info->primary_channel = ht_add_info_ie->control_chan;
-		bss_info->bss_cap = ht_add_info_ie->ht_param;
-		bss_info->bss_op_mode = (u8)(op_mode & 0xff);
-	}
-
-	return 0;
-}
-
 static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_if_sta *ifsta)
 {
@@ -1118,55 +1058,6 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	return;
 }
 
-void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
-				u16 tid, u8 dialog_token, u16 start_seq_num,
-				u16 agg_size, u16 timeout)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u16 capab;
-
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
-	if (!skb) {
-		printk(KERN_ERR "%s: failed to allocate buffer "
-				"for addba request frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
-	else
-		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
-
-	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
-
-	mgmt->u.action.u.addba_req.dialog_token = dialog_token;
-	capab = (u16)(1 << 1);		/* bit 1 aggregation policy */
-	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
-	capab |= (u16)(agg_size << 6);	/* bit 15:6 max size of aggergation */
-
-	mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
-
-	mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
-	mgmt->u.action.u.addba_req.start_seq_num =
-					cpu_to_le16(start_seq_num << 4);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
@@ -1396,149 +1287,6 @@ addba_resp_exit:
 	rcu_read_unlock();
 }
 
-void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
-			  u16 initiator, u16 reason_code)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u16 params;
-
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
-	if (!skb) {
-		printk(KERN_ERR "%s: failed to allocate buffer "
-					"for delba frame\n", sdata->dev->name);
-		return;
-	}
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
-	else
-		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
-
-	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
-	params = (u16)(initiator << 11); 	/* bit 11 initiator */
-	params |= (u16)(tid << 12); 		/* bit 15:12 TID number */
-
-	mgmt->u.action.u.delba.params = cpu_to_le16(params);
-	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
-void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_bar *bar;
-	u16 bar_control = 0;
-
-	skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
-	if (!skb) {
-		printk(KERN_ERR "%s: failed to allocate buffer for "
-			"bar frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
-	memset(bar, 0, sizeof(*bar));
-	bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
-					 IEEE80211_STYPE_BACK_REQ);
-	memcpy(bar->ra, ra, ETH_ALEN);
-	memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
-	bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
-	bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
-	bar_control |= (u16)(tid << 12);
-	bar->control = cpu_to_le16(bar_control);
-	bar->start_seq_num = cpu_to_le16(ssn);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
-					u16 initiator, u16 reason)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_hw *hw = &local->hw;
-	struct sta_info *sta;
-	int ret, i;
-	DECLARE_MAC_BUF(mac);
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, ra);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	/* check if TID is in operational state */
-	spin_lock_bh(&sta->lock);
-	if (sta->ampdu_mlme.tid_state_rx[tid]
-				!= HT_AGG_STATE_OPERATIONAL) {
-		spin_unlock_bh(&sta->lock);
-		rcu_read_unlock();
-		return;
-	}
-	sta->ampdu_mlme.tid_state_rx[tid] =
-		HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-	spin_unlock_bh(&sta->lock);
-
-	/* stop HW Rx aggregation. ampdu_action existence
-	 * already verified in session init so we add the BUG_ON */
-	BUG_ON(!local->ops->ampdu_action);
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n",
-				print_mac(mac, ra), tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
-					ra, tid, NULL);
-	if (ret)
-		printk(KERN_DEBUG "HW problem - can not stop rx "
-				"aggregation for tid %d\n", tid);
-
-	/* shutdown timer has not expired */
-	if (initiator != WLAN_BACK_TIMER)
-		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
-
-	/* check if this is a self generated aggregation halt */
-	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
-		ieee80211_send_delba(sdata, ra, tid, 0, reason);
-
-	/* free the reordering buffer */
-	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
-		if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
-			/* release the reordered frames */
-			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
-			sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
-			sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
-		}
-	}
-	/* free resources */
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
-	kfree(sta->ampdu_mlme.tid_rx[tid]);
-	sta->ampdu_mlme.tid_rx[tid] = NULL;
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
-
-	rcu_read_unlock();
-}
-
-
 static void ieee80211_sta_process_delba(struct ieee80211_sub_if_data *sdata,
 			struct ieee80211_mgmt *mgmt, size_t len)
 {
@@ -1582,75 +1330,6 @@ static void ieee80211_sta_process_delba(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 }
 
-/*
- * After sending add Block Ack request we activated a timer until
- * add Block Ack response will arrive from the recipient.
- * If this timer expires sta_addba_resp_timer_expired will be executed.
- */
-void sta_addba_resp_timer_expired(unsigned long data)
-{
-	/* not an elegant detour, but there is no choice as the timer passes
-	 * only one argument, and both sta_info and TID are needed, so init
-	 * flow in sta_info_create gives the TID as data, while the timer_to_id
-	 * array gives the sta through container_of */
-	u16 tid = *(u8 *)data;
-	struct sta_info *temp_sta = container_of((void *)data,
-		struct sta_info, timer_to_tid[tid]);
-
-	struct ieee80211_local *local = temp_sta->local;
-	struct ieee80211_hw *hw = &local->hw;
-	struct sta_info *sta;
-	u8 *state;
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, temp_sta->addr);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	/* check if the TID waits for addBA response */
-	spin_lock_bh(&sta->lock);
-	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
-		spin_unlock_bh(&sta->lock);
-		*state = HT_AGG_STATE_IDLE;
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "timer expired on tid %d but we are not "
-				"expecting addBA response there", tid);
-#endif
-		goto timer_expired_exit;
-	}
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
-#endif
-
-	/* go through the state check in stop_BA_session */
-	*state = HT_AGG_STATE_OPERATIONAL;
-	spin_unlock_bh(&sta->lock);
-	ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid,
-				     WLAN_BACK_INITIATOR);
-
-timer_expired_exit:
-	rcu_read_unlock();
-}
-
-void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
-{
-	struct ieee80211_local *local = sdata->local;
-	int i;
-
-	for (i = 0; i <  STA_TID_NUM; i++) {
-		ieee80211_stop_tx_ba_session(&local->hw, addr, i,
-					     WLAN_BACK_INITIATOR);
-		ieee80211_sta_stop_rx_ba_session(sdata, addr, i,
-						 WLAN_BACK_RECIPIENT,
-						 WLAN_REASON_QSTA_LEAVE_QBSS);
-	}
-}
-
 static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_msrment_ie *request_ie,
 					const u8 *da, const u8 *bssid,
-- 
cgit v1.2.3-70-g09d2


From bacac545f10f2bf6e5ceff0d8e2b82dfc493602a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:44:29 +0200
Subject: mac80211: move some HT code out of main.c

Now that I've created ht.c, I can move the aggregation
code from main.c into it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ht.c   | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/mac80211/main.c | 373 ---------------------------------------------------
 2 files changed, 375 insertions(+), 375 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 5ccf1bc1746..c72b3fe3ccd 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -2,8 +2,8 @@
  * HT handling
  *
  * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
- * Copyright 2004, Instant802 Networks, Inc.
- * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2008, Intel Corporation
@@ -18,6 +18,7 @@
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "sta_info.h"
+#include "wme.h"
 
 int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
 				   struct ieee80211_ht_info *ht_info)
@@ -326,3 +327,375 @@ void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8
 	}
 }
 
+int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct sta_info *sta;
+	struct ieee80211_sub_if_data *sdata;
+	u16 start_seq_num;
+	u8 *state;
+	int ret;
+	DECLARE_MAC_BUF(mac);
+
+	if (tid >= STA_TID_NUM)
+		return -EINVAL;
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Open BA session requested for %s tid %u\n",
+				print_mac(mac, ra), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	rcu_read_lock();
+
+	sta = sta_info_get(local, ra);
+	if (!sta) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Could not find the station\n");
+#endif
+		ret = -ENOENT;
+		goto exit;
+	}
+
+	spin_lock_bh(&sta->lock);
+
+	/* we have tried too many times, receiver does not want A-MPDU */
+	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
+		ret = -EBUSY;
+		goto err_unlock_sta;
+	}
+
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	/* check if the TID is not in aggregation flow already */
+	if (*state != HT_AGG_STATE_IDLE) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "BA request denied - session is not "
+				 "idle on tid %u\n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		ret = -EAGAIN;
+		goto err_unlock_sta;
+	}
+
+	/* prepare A-MPDU MLME for Tx aggregation */
+	sta->ampdu_mlme.tid_tx[tid] =
+			kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
+	if (!sta->ampdu_mlme.tid_tx[tid]) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
+					tid);
+#endif
+		ret = -ENOMEM;
+		goto err_unlock_sta;
+	}
+	/* Tx timer */
+	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
+			sta_addba_resp_timer_expired;
+	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
+			(unsigned long)&sta->timer_to_tid[tid];
+	init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+
+	/* create a new queue for this aggregation */
+	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
+
+	/* case no queue is available to aggregation
+	 * don't switch to aggregation */
+	if (ret) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "BA request denied - queue unavailable for"
+					" tid %d\n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		goto err_unlock_queue;
+	}
+	sdata = sta->sdata;
+
+	/* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
+	 * call back right away, it must see that the flow has begun */
+	*state |= HT_ADDBA_REQUESTED_MSK;
+
+	/* This is slightly racy because the queue isn't stopped */
+	start_seq_num = sta->tid_seq[tid];
+
+	if (local->ops->ampdu_action)
+		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
+						ra, tid, &start_seq_num);
+
+	if (ret) {
+		/* No need to requeue the packets in the agg queue, since we
+		 * held the tx lock: no packet could be enqueued to the newly
+		 * allocated queue */
+		ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "BA request denied - HW unavailable for"
+					" tid %d\n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		*state = HT_AGG_STATE_IDLE;
+		goto err_unlock_queue;
+	}
+
+	/* Will put all the packets in the new SW queue */
+	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
+	spin_unlock_bh(&sta->lock);
+
+	/* send an addBA request */
+	sta->ampdu_mlme.dialog_token_allocator++;
+	sta->ampdu_mlme.tid_tx[tid]->dialog_token =
+			sta->ampdu_mlme.dialog_token_allocator;
+	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
+
+
+	ieee80211_send_addba_request(sta->sdata, ra, tid,
+			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
+			 sta->ampdu_mlme.tid_tx[tid]->ssn,
+			 0x40, 5000);
+	/* activate the timer for the recipient's addBA response */
+	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
+				jiffies + ADDBA_RESP_INTERVAL;
+	add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
+#endif
+	goto exit;
+
+err_unlock_queue:
+	kfree(sta->ampdu_mlme.tid_tx[tid]);
+	sta->ampdu_mlme.tid_tx[tid] = NULL;
+	ret = -EBUSY;
+err_unlock_sta:
+	spin_unlock_bh(&sta->lock);
+exit:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
+
+int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
+				 u8 *ra, u16 tid,
+				 enum ieee80211_back_parties initiator)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct sta_info *sta;
+	u8 *state;
+	int ret = 0;
+	DECLARE_MAC_BUF(mac);
+
+	if (tid >= STA_TID_NUM)
+		return -EINVAL;
+
+	rcu_read_lock();
+	sta = sta_info_get(local, ra);
+	if (!sta) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	/* check if the TID is in aggregation */
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	spin_lock_bh(&sta->lock);
+
+	if (*state != HT_AGG_STATE_OPERATIONAL) {
+		ret = -ENOENT;
+		goto stop_BA_exit;
+	}
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n",
+				print_mac(mac, ra), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
+
+	*state = HT_AGG_STATE_REQ_STOP_BA_MSK |
+		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
+
+	if (local->ops->ampdu_action)
+		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP,
+						ra, tid, NULL);
+
+	/* case HW denied going back to legacy */
+	if (ret) {
+		WARN_ON(ret != -EBUSY);
+		*state = HT_AGG_STATE_OPERATIONAL;
+		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
+		goto stop_BA_exit;
+	}
+
+stop_BA_exit:
+	spin_unlock_bh(&sta->lock);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
+
+void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct sta_info *sta;
+	u8 *state;
+	DECLARE_MAC_BUF(mac);
+
+	if (tid >= STA_TID_NUM) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
+				tid, STA_TID_NUM);
+#endif
+		return;
+	}
+
+	rcu_read_lock();
+	sta = sta_info_get(local, ra);
+	if (!sta) {
+		rcu_read_unlock();
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Could not find station: %s\n",
+				print_mac(mac, ra));
+#endif
+		return;
+	}
+
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+	spin_lock_bh(&sta->lock);
+
+	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
+				*state);
+#endif
+		spin_unlock_bh(&sta->lock);
+		rcu_read_unlock();
+		return;
+	}
+
+	WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK);
+
+	*state |= HT_ADDBA_DRV_READY_MSK;
+
+	if (*state == HT_AGG_STATE_OPERATIONAL) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
+#endif
+		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
+	}
+	spin_unlock_bh(&sta->lock);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
+
+void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct sta_info *sta;
+	u8 *state;
+	int agg_queue;
+	DECLARE_MAC_BUF(mac);
+
+	if (tid >= STA_TID_NUM) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
+				tid, STA_TID_NUM);
+#endif
+		return;
+	}
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n",
+				print_mac(mac, ra), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	rcu_read_lock();
+	sta = sta_info_get(local, ra);
+	if (!sta) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Could not find station: %s\n",
+				print_mac(mac, ra));
+#endif
+		rcu_read_unlock();
+		return;
+	}
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+
+	/* NOTE: no need to use sta->lock in this state check, as
+	 * ieee80211_stop_tx_ba_session will let only one stop call to
+	 * pass through per sta/tid
+	 */
+	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
+#endif
+		rcu_read_unlock();
+		return;
+	}
+
+	if (*state & HT_AGG_STATE_INITIATOR_MSK)
+		ieee80211_send_delba(sta->sdata, ra, tid,
+			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
+
+	agg_queue = sta->tid_to_tx_q[tid];
+
+	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
+
+	/* We just requeued the all the frames that were in the
+	 * removed queue, and since we might miss a softirq we do
+	 * netif_schedule_queue.  ieee80211_wake_queue is not used
+	 * here as this queue is not necessarily stopped
+	 */
+	netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue));
+	spin_lock_bh(&sta->lock);
+	*state = HT_AGG_STATE_IDLE;
+	sta->ampdu_mlme.addba_req_num[tid] = 0;
+	kfree(sta->ampdu_mlme.tid_tx[tid]);
+	sta->ampdu_mlme.tid_tx[tid] = NULL;
+	spin_unlock_bh(&sta->lock);
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
+
+void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
+				      const u8 *ra, u16 tid)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_ra_tid *ra_tid;
+	struct sk_buff *skb = dev_alloc_skb(0);
+
+	if (unlikely(!skb)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s: Not enough memory, "
+			       "dropping start BA session", skb->dev->name);
+#endif
+		return;
+	}
+	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
+	memcpy(&ra_tid->ra, ra, ETH_ALEN);
+	ra_tid->tid = tid;
+
+	skb->pkt_type = IEEE80211_ADDBA_MSG;
+	skb_queue_tail(&local->skb_queue, skb);
+	tasklet_schedule(&local->tasklet);
+}
+EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
+
+void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
+				     const u8 *ra, u16 tid)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_ra_tid *ra_tid;
+	struct sk_buff *skb = dev_alloc_skb(0);
+
+	if (unlikely(!skb)) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s: Not enough memory, "
+			       "dropping stop BA session", skb->dev->name);
+#endif
+		return;
+	}
+	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
+	memcpy(&ra_tid->ra, ra, ETH_ALEN);
+	ra_tid->tid = tid;
+
+	skb->pkt_type = IEEE80211_DELBA_MSG;
+	skb_queue_tail(&local->skb_queue, skb);
+	tasklet_schedule(&local->tasklet);
+}
+EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6df4a2e1509..f90254a5948 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -593,379 +593,6 @@ static int ieee80211_stop(struct net_device *dev)
 	return 0;
 }
 
-int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata;
-	u16 start_seq_num;
-	u8 *state;
-	int ret;
-	DECLARE_MAC_BUF(mac);
-
-	if (tid >= STA_TID_NUM)
-		return -EINVAL;
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Open BA session requested for %s tid %u\n",
-				print_mac(mac, ra), tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, ra);
-	if (!sta) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Could not find the station\n");
-#endif
-		ret = -ENOENT;
-		goto exit;
-	}
-
-	spin_lock_bh(&sta->lock);
-
-	/* we have tried too many times, receiver does not want A-MPDU */
-	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
-		ret = -EBUSY;
-		goto err_unlock_sta;
-	}
-
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	/* check if the TID is not in aggregation flow already */
-	if (*state != HT_AGG_STATE_IDLE) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "BA request denied - session is not "
-				 "idle on tid %u\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		ret = -EAGAIN;
-		goto err_unlock_sta;
-	}
-
-	/* prepare A-MPDU MLME for Tx aggregation */
-	sta->ampdu_mlme.tid_tx[tid] =
-			kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
-	if (!sta->ampdu_mlme.tid_tx[tid]) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
-					tid);
-#endif
-		ret = -ENOMEM;
-		goto err_unlock_sta;
-	}
-	/* Tx timer */
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
-			sta_addba_resp_timer_expired;
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
-			(unsigned long)&sta->timer_to_tid[tid];
-	init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-
-	/* create a new queue for this aggregation */
-	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
-
-	/* case no queue is available to aggregation
-	 * don't switch to aggregation */
-	if (ret) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "BA request denied - queue unavailable for"
-					" tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto err_unlock_queue;
-	}
-	sdata = sta->sdata;
-
-	/* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
-	 * call back right away, it must see that the flow has begun */
-	*state |= HT_ADDBA_REQUESTED_MSK;
-
-	/* This is slightly racy because the queue isn't stopped */
-	start_seq_num = sta->tid_seq[tid];
-
-	if (local->ops->ampdu_action)
-		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
-						ra, tid, &start_seq_num);
-
-	if (ret) {
-		/* No need to requeue the packets in the agg queue, since we
-		 * held the tx lock: no packet could be enqueued to the newly
-		 * allocated queue */
-		ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "BA request denied - HW unavailable for"
-					" tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		*state = HT_AGG_STATE_IDLE;
-		goto err_unlock_queue;
-	}
-
-	/* Will put all the packets in the new SW queue */
-	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
-	spin_unlock_bh(&sta->lock);
-
-	/* send an addBA request */
-	sta->ampdu_mlme.dialog_token_allocator++;
-	sta->ampdu_mlme.tid_tx[tid]->dialog_token =
-			sta->ampdu_mlme.dialog_token_allocator;
-	sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
-
-
-	ieee80211_send_addba_request(sta->sdata, ra, tid,
-			 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
-			 sta->ampdu_mlme.tid_tx[tid]->ssn,
-			 0x40, 5000);
-	/* activate the timer for the recipient's addBA response */
-	sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
-				jiffies + ADDBA_RESP_INTERVAL;
-	add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
-#endif
-	goto exit;
-
-err_unlock_queue:
-	kfree(sta->ampdu_mlme.tid_tx[tid]);
-	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	ret = -EBUSY;
-err_unlock_sta:
-	spin_unlock_bh(&sta->lock);
-exit:
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
-
-int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
-				 u8 *ra, u16 tid,
-				 enum ieee80211_back_parties initiator)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct sta_info *sta;
-	u8 *state;
-	int ret = 0;
-	DECLARE_MAC_BUF(mac);
-
-	if (tid >= STA_TID_NUM)
-		return -EINVAL;
-
-	rcu_read_lock();
-	sta = sta_info_get(local, ra);
-	if (!sta) {
-		rcu_read_unlock();
-		return -ENOENT;
-	}
-
-	/* check if the TID is in aggregation */
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	spin_lock_bh(&sta->lock);
-
-	if (*state != HT_AGG_STATE_OPERATIONAL) {
-		ret = -ENOENT;
-		goto stop_BA_exit;
-	}
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n",
-				print_mac(mac, ra), tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
-
-	*state = HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-
-	if (local->ops->ampdu_action)
-		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP,
-						ra, tid, NULL);
-
-	/* case HW denied going back to legacy */
-	if (ret) {
-		WARN_ON(ret != -EBUSY);
-		*state = HT_AGG_STATE_OPERATIONAL;
-		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
-		goto stop_BA_exit;
-	}
-
-stop_BA_exit:
-	spin_unlock_bh(&sta->lock);
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
-
-void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct sta_info *sta;
-	u8 *state;
-	DECLARE_MAC_BUF(mac);
-
-	if (tid >= STA_TID_NUM) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
-				tid, STA_TID_NUM);
-#endif
-		return;
-	}
-
-	rcu_read_lock();
-	sta = sta_info_get(local, ra);
-	if (!sta) {
-		rcu_read_unlock();
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Could not find station: %s\n",
-				print_mac(mac, ra));
-#endif
-		return;
-	}
-
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-	spin_lock_bh(&sta->lock);
-
-	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
-				*state);
-#endif
-		spin_unlock_bh(&sta->lock);
-		rcu_read_unlock();
-		return;
-	}
-
-	WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK);
-
-	*state |= HT_ADDBA_DRV_READY_MSK;
-
-	if (*state == HT_AGG_STATE_OPERATIONAL) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
-#endif
-		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
-	}
-	spin_unlock_bh(&sta->lock);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
-
-void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct sta_info *sta;
-	u8 *state;
-	int agg_queue;
-	DECLARE_MAC_BUF(mac);
-
-	if (tid >= STA_TID_NUM) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
-				tid, STA_TID_NUM);
-#endif
-		return;
-	}
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n",
-				print_mac(mac, ra), tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	rcu_read_lock();
-	sta = sta_info_get(local, ra);
-	if (!sta) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "Could not find station: %s\n",
-				print_mac(mac, ra));
-#endif
-		rcu_read_unlock();
-		return;
-	}
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-
-	/* NOTE: no need to use sta->lock in this state check, as
-	 * ieee80211_stop_tx_ba_session will let only one stop call to
-	 * pass through per sta/tid
-	 */
-	if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
-#endif
-		rcu_read_unlock();
-		return;
-	}
-
-	if (*state & HT_AGG_STATE_INITIATOR_MSK)
-		ieee80211_send_delba(sta->sdata, ra, tid,
-			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
-
-	agg_queue = sta->tid_to_tx_q[tid];
-
-	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-
-	/* We just requeued the all the frames that were in the
-	 * removed queue, and since we might miss a softirq we do
-	 * netif_schedule_queue.  ieee80211_wake_queue is not used
-	 * here as this queue is not necessarily stopped
-	 */
-	netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue));
-	spin_lock_bh(&sta->lock);
-	*state = HT_AGG_STATE_IDLE;
-	sta->ampdu_mlme.addba_req_num[tid] = 0;
-	kfree(sta->ampdu_mlme.tid_tx[tid]);
-	sta->ampdu_mlme.tid_tx[tid] = NULL;
-	spin_unlock_bh(&sta->lock);
-
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
-
-void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
-				      const u8 *ra, u16 tid)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
-
-	if (unlikely(!skb)) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s: Not enough memory, "
-			       "dropping start BA session", skb->dev->name);
-#endif
-		return;
-	}
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
-
-	skb->pkt_type = IEEE80211_ADDBA_MSG;
-	skb_queue_tail(&local->skb_queue, skb);
-	tasklet_schedule(&local->tasklet);
-}
-EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
-
-void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
-				     const u8 *ra, u16 tid)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
-
-	if (unlikely(!skb)) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s: Not enough memory, "
-			       "dropping stop BA session", skb->dev->name);
-#endif
-		return;
-	}
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
-
-	skb->pkt_type = IEEE80211_DELBA_MSG;
-	skb_queue_tail(&local->skb_queue, skb);
-	tasklet_schedule(&local->tasklet);
-}
-EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
-
 static void ieee80211_set_multicast_list(struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-- 
cgit v1.2.3-70-g09d2


From 9116dd01120e249dc2e84e6edecd7ad7f828680f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:47:23 +0200
Subject: mac80211: clarify scan request

When a scan is requested for non-STA interfaces, we simply fire
off a scan, but for STA interfaces we shouldn't because they
could be in the middle of an association. This clarifies the
corresponding code.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 1beefb5ad6f..9f612015012 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -674,23 +674,32 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 
 int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len)
 {
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta;
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
 		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
 
+	/*
+	 * STA has a state machine that might need to defer scanning
+	 * while it's trying to associate/authenticate, therefore we
+	 * queue it up to the state machine in that case.
+	 */
+
 	if (local->sta_sw_scanning || local->sta_hw_scanning) {
 		if (local->scan_sdata == sdata)
 			return 0;
 		return -EBUSY;
 	}
 
+	ifsta = &sdata->u.sta;
+
 	ifsta->scan_ssid_len = ssid_len;
 	if (ssid_len)
 		memcpy(ifsta->scan_ssid, ssid, ssid_len);
 	set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
 	queue_work(local->hw.workqueue, &ifsta->work);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From a0fe8b3349bdee27065b57cdceb2ca53c1487866 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Sep 2008 17:58:23 +0200
Subject: mac80211: simplify scan start

ieee80211_sta_start_scan() can very well take a non-NULL
ssid pointer with a zero ssid_len.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f43ca7b88d5..f4cbe5cc56c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3052,10 +3052,7 @@ void ieee80211_sta_work(struct work_struct *work)
 	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
-		if (ifsta->scan_ssid_len)
-			ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
-		else
-			ieee80211_sta_start_scan(sdata, NULL, 0);
+		ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
 		return;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From b079ada7dd11cf82c3157a51c205c3d88321c704 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 09:32:59 +0200
Subject: mac80211: remove useless 'ibss' parameter

Ever since we refactored beaconing to not be controlled by a
fake queue this parameter to ieee80211_sta_def_wmm_params
has been unused.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f4cbe5cc56c..b1815c1ee6d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -188,8 +188,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 
 /* MLME */
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_sta_bss *bss,
-					 int ibss)
+					 struct ieee80211_sta_bss *bss)
 {
 	struct ieee80211_local *local = sdata->local;
 	int i, have_higher_than_11mbit = 0;
@@ -1849,7 +1848,7 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	}
 	ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
 
-	ieee80211_sta_def_wmm_params(sdata, bss, 1);
+	ieee80211_sta_def_wmm_params(sdata, bss);
 
 	ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED;
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
@@ -2932,7 +2931,7 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 			ieee80211_sta_set_ssid(sdata, selected->ssid,
 					       selected->ssid_len);
 		ieee80211_sta_set_bssid(sdata, selected->bssid);
-		ieee80211_sta_def_wmm_params(sdata, selected, 0);
+		ieee80211_sta_def_wmm_params(sdata, selected);
 
 		/* Send out direct probe if no probe resp was received or
 		 * the one we have is outdated
-- 
cgit v1.2.3-70-g09d2


From 9ac19a9084001695479a6d6dd67443cc5fb1df2f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 10:57:09 +0200
Subject: mac80211: reorder frame code in mlme

This reorders all frame sending functions to be at the top of the
file. When reading the file, I tend to be looking at either the
frame code or the state machine, and having them mixed in the file
is confusing. When all frame sending is at the top the remainder
of the file is more readable, in my opinion.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 884 ++++++++++++++++++++++++++--------------------------
 1 file changed, 441 insertions(+), 443 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b1815c1ee6d..e917e1b7263 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -74,6 +74,27 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 	return NULL;
 }
 
+static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
+				      struct ieee80211_supported_band *sband,
+				      u64 *rates)
+{
+	int i, j, count;
+	*rates = 0;
+	count = 0;
+	for (i = 0; i < bss->supp_rates_len; i++) {
+		int rate = (bss->supp_rates[i] & 0x7F) * 5;
+
+		for (j = 0; j < sband->n_bitrates; j++)
+			if (sband->bitrates[j].bitrate == rate) {
+				*rates |= BIT(j);
+				count++;
+				break;
+			}
+	}
+
+	return count;
+}
+
 /* frame sending functions */
 void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt)
@@ -186,6 +207,364 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
+static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
+				 struct ieee80211_if_sta *ifsta)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u8 *pos, *ies, *ht_add_ie;
+	int i, len, count, rates_len, supp_rates_len;
+	u16 capab;
+	struct ieee80211_sta_bss *bss;
+	int wmm = 0;
+	struct ieee80211_supported_band *sband;
+	u64 rates = 0;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
+			    ifsta->ssid_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	capab = ifsta->capab;
+
+	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) {
+		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+			capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+	}
+
+	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
+				   local->hw.conf.channel->center_freq,
+				   ifsta->ssid, ifsta->ssid_len);
+	if (bss) {
+		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
+			capab |= WLAN_CAPABILITY_PRIVACY;
+		if (bss->wmm_used)
+			wmm = 1;
+
+		/* get all rates supported by the device and the AP as
+		 * some APs don't like getting a superset of their rates
+		 * in the association request (e.g. D-Link DAP 1353 in
+		 * b-only mode) */
+		rates_len = ieee80211_compatible_rates(bss, sband, &rates);
+
+		if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+		    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
+			capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+
+		ieee80211_rx_bss_put(local, bss);
+	} else {
+		rates = ~0;
+		rates_len = sband->n_bitrates;
+	}
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+
+	if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) {
+		skb_put(skb, 10);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_REASSOC_REQ);
+		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
+		mgmt->u.reassoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
+		memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid,
+		       ETH_ALEN);
+	} else {
+		skb_put(skb, 4);
+		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_ASSOC_REQ);
+		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
+		mgmt->u.reassoc_req.listen_interval =
+				cpu_to_le16(local->hw.conf.listen_interval);
+	}
+
+	/* SSID */
+	ies = pos = skb_put(skb, 2 + ifsta->ssid_len);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = ifsta->ssid_len;
+	memcpy(pos, ifsta->ssid, ifsta->ssid_len);
+
+	/* add all rates which were marked to be used above */
+	supp_rates_len = rates_len;
+	if (supp_rates_len > 8)
+		supp_rates_len = 8;
+
+	len = sband->n_bitrates;
+	pos = skb_put(skb, supp_rates_len + 2);
+	*pos++ = WLAN_EID_SUPP_RATES;
+	*pos++ = supp_rates_len;
+
+	count = 0;
+	for (i = 0; i < sband->n_bitrates; i++) {
+		if (BIT(i) & rates) {
+			int rate = sband->bitrates[i].bitrate;
+			*pos++ = (u8) (rate / 5);
+			if (++count == 8)
+				break;
+		}
+	}
+
+	if (rates_len > count) {
+		pos = skb_put(skb, rates_len - count + 2);
+		*pos++ = WLAN_EID_EXT_SUPP_RATES;
+		*pos++ = rates_len - count;
+
+		for (i++; i < sband->n_bitrates; i++) {
+			if (BIT(i) & rates) {
+				int rate = sband->bitrates[i].bitrate;
+				*pos++ = (u8) (rate / 5);
+			}
+		}
+	}
+
+	if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
+		/* 1. power capabilities */
+		pos = skb_put(skb, 4);
+		*pos++ = WLAN_EID_PWR_CAPABILITY;
+		*pos++ = 2;
+		*pos++ = 0; /* min tx power */
+		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
+
+		/* 2. supported channels */
+		/* TODO: get this in reg domain format */
+		pos = skb_put(skb, 2 * sband->n_channels + 2);
+		*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
+		*pos++ = 2 * sband->n_channels;
+		for (i = 0; i < sband->n_channels; i++) {
+			*pos++ = ieee80211_frequency_to_channel(
+					sband->channels[i].center_freq);
+			*pos++ = 1; /* one channel in the subband*/
+		}
+	}
+
+	if (ifsta->extra_ie) {
+		pos = skb_put(skb, ifsta->extra_ie_len);
+		memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len);
+	}
+
+	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
+		pos = skb_put(skb, 9);
+		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
+		*pos++ = 7; /* len */
+		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
+		*pos++ = 0x50;
+		*pos++ = 0xf2;
+		*pos++ = 2; /* WME */
+		*pos++ = 0; /* WME info */
+		*pos++ = 1; /* WME ver */
+		*pos++ = 0;
+	}
+
+	/* wmm support is a must to HT */
+	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
+	    sband->ht_info.ht_supported &&
+	    (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) {
+		struct ieee80211_ht_addt_info *ht_add_info =
+			(struct ieee80211_ht_addt_info *)ht_add_ie;
+		u16 cap = sband->ht_info.cap;
+		__le16 tmp;
+		u32 flags = local->hw.conf.channel->flags;
+
+		switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_IE_CHA_SEC_ABOVE:
+			if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		case IEEE80211_HT_IE_CHA_SEC_BELOW:
+			if (flags & IEEE80211_CHAN_NO_FAT_BELOW) {
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SGI_40;
+			}
+			break;
+		}
+
+		tmp = cpu_to_le16(cap);
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
+		*pos++ = WLAN_EID_HT_CAPABILITY;
+		*pos++ = sizeof(struct ieee80211_ht_cap);
+		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+		memcpy(pos, &tmp, sizeof(u16));
+		pos += sizeof(u16);
+		/* TODO: needs a define here for << 2 */
+		*pos++ = sband->ht_info.ampdu_factor |
+			 (sband->ht_info.ampdu_density << 2);
+		memcpy(pos, sband->ht_info.supp_mcs_set, 16);
+	}
+
+	kfree(ifsta->assocreq_ies);
+	ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
+	ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL);
+	if (ifsta->assocreq_ies)
+		memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+
+static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_if_sta *ifsta, u16 reason)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for deauth "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_DEAUTH);
+	skb_put(skb, 2);
+	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_if_sta *ifsta, u16 reason)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_DISASSOC);
+	skb_put(skb, 2);
+	mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
+					u8 dialog_token, u16 status, u16 policy,
+					u16 buf_size, u16 timeout)
+{
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 capab;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer "
+		       "for addba resp frame\n", sdata->dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
+	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
+
+	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
+	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
+	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
+
+	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
+	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_msrment_ie *request_ie,
+					const u8 *da, const u8 *bssid,
+					u8 dialog_token)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *msr_report;
+
+	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
+				sizeof(struct ieee80211_msrment_ie));
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer for "
+				"measurement report frame\n", sdata->dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
+	memset(msr_report, 0, 24);
+	memcpy(msr_report->da, da, ETH_ALEN);
+	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(msr_report->bssid, bssid, ETH_ALEN);
+	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
+	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
+	msr_report->u.action.u.measurement.action_code =
+				WLAN_ACTION_SPCT_MSR_RPRT;
+	msr_report->u.action.u.measurement.dialog_token = dialog_token;
+
+	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
+	msr_report->u.action.u.measurement.length =
+			sizeof(struct ieee80211_msrment_ie);
+
+	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
+		sizeof(struct ieee80211_msrment_ie));
+	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
+	msr_report->u.action.u.measurement.msr_elem.mode |=
+			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
+	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
 /* MLME */
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss)
@@ -429,379 +808,85 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		sdata->bss_conf.dtim_period = bss->dtim_period;
 
 		changed |= ieee80211_handle_bss_capability(sdata, bss);
-
-		ieee80211_rx_bss_put(local, bss);
-	}
-
-	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-		changed |= BSS_CHANGED_HT;
-		sdata->bss_conf.assoc_ht = 1;
-		sdata->bss_conf.ht_conf = &conf->ht_conf;
-		sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
-	}
-
-	ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
-	memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
-	ieee80211_sta_send_associnfo(sdata, ifsta);
-
-	ifsta->last_probe = jiffies;
-	ieee80211_led_assoc(local, 1);
-
-	sdata->bss_conf.assoc = 1;
-	ieee80211_bss_info_change_notify(sdata, changed);
-
-	netif_tx_start_all_queues(sdata->dev);
-	netif_carrier_on(sdata->dev);
-
-	ieee80211_sta_send_apinfo(sdata, ifsta);
-}
-
-static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
-				   struct ieee80211_if_sta *ifsta)
-{
-	DECLARE_MAC_BUF(mac);
-
-	ifsta->direct_probe_tries++;
-	if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n",
-		       sdata->dev->name, print_mac(mac, ifsta->bssid));
-		ifsta->state = IEEE80211_STA_MLME_DISABLED;
-		return;
-	}
-
-	printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n",
-			sdata->dev->name, print_mac(mac, ifsta->bssid),
-			ifsta->direct_probe_tries);
-
-	ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
-
-	set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request);
-
-	/* Direct probe is sent to broadcast address as some APs
-	 * will not answer to direct packet in unassociated state.
-	 */
-	ieee80211_send_probe_req(sdata, NULL,
-				 ifsta->ssid, ifsta->ssid_len);
-
-	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
-}
-
-
-static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
-				   struct ieee80211_if_sta *ifsta)
-{
-	DECLARE_MAC_BUF(mac);
-
-	ifsta->auth_tries++;
-	if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
-		printk(KERN_DEBUG "%s: authentication with AP %s"
-		       " timed out\n",
-		       sdata->dev->name, print_mac(mac, ifsta->bssid));
-		ifsta->state = IEEE80211_STA_MLME_DISABLED;
-		return;
-	}
-
-	ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
-	printk(KERN_DEBUG "%s: authenticate with AP %s\n",
-	       sdata->dev->name, print_mac(mac, ifsta->bssid));
-
-	ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0);
-
-	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
-}
-
-static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
-				      struct ieee80211_supported_band *sband,
-				      u64 *rates)
-{
-	int i, j, count;
-	*rates = 0;
-	count = 0;
-	for (i = 0; i < bss->supp_rates_len; i++) {
-		int rate = (bss->supp_rates[i] & 0x7F) * 5;
-
-		for (j = 0; j < sband->n_bitrates; j++)
-			if (sband->bitrates[j].bitrate == rate) {
-				*rates |= BIT(j);
-				count++;
-				break;
-			}
-	}
-
-	return count;
-}
-
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
-				 struct ieee80211_if_sta *ifsta)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies, *ht_add_ie;
-	int i, len, count, rates_len, supp_rates_len;
-	u16 capab;
-	struct ieee80211_sta_bss *bss;
-	int wmm = 0;
-	struct ieee80211_supported_band *sband;
-	u64 rates = 0;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
-			    ifsta->ssid_len);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
-		       "frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	capab = ifsta->capab;
-
-	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) {
-		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
-			capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
-		if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
-			capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
-	}
-
-	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
-				   local->hw.conf.channel->center_freq,
-				   ifsta->ssid, ifsta->ssid_len);
-	if (bss) {
-		if (bss->capability & WLAN_CAPABILITY_PRIVACY)
-			capab |= WLAN_CAPABILITY_PRIVACY;
-		if (bss->wmm_used)
-			wmm = 1;
-
-		/* get all rates supported by the device and the AP as
-		 * some APs don't like getting a superset of their rates
-		 * in the association request (e.g. D-Link DAP 1353 in
-		 * b-only mode) */
-		rates_len = ieee80211_compatible_rates(bss, sband, &rates);
-
-		if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
-		    (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
-			capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
-
-		ieee80211_rx_bss_put(local, bss);
-	} else {
-		rates = ~0;
-		rates_len = sband->n_bitrates;
-	}
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-
-	if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) {
-		skb_put(skb, 10);
-		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						  IEEE80211_STYPE_REASSOC_REQ);
-		mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.reassoc_req.listen_interval =
-				cpu_to_le16(local->hw.conf.listen_interval);
-		memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid,
-		       ETH_ALEN);
-	} else {
-		skb_put(skb, 4);
-		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						  IEEE80211_STYPE_ASSOC_REQ);
-		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
-		mgmt->u.reassoc_req.listen_interval =
-				cpu_to_le16(local->hw.conf.listen_interval);
-	}
-
-	/* SSID */
-	ies = pos = skb_put(skb, 2 + ifsta->ssid_len);
-	*pos++ = WLAN_EID_SSID;
-	*pos++ = ifsta->ssid_len;
-	memcpy(pos, ifsta->ssid, ifsta->ssid_len);
-
-	/* add all rates which were marked to be used above */
-	supp_rates_len = rates_len;
-	if (supp_rates_len > 8)
-		supp_rates_len = 8;
-
-	len = sband->n_bitrates;
-	pos = skb_put(skb, supp_rates_len + 2);
-	*pos++ = WLAN_EID_SUPP_RATES;
-	*pos++ = supp_rates_len;
-
-	count = 0;
-	for (i = 0; i < sband->n_bitrates; i++) {
-		if (BIT(i) & rates) {
-			int rate = sband->bitrates[i].bitrate;
-			*pos++ = (u8) (rate / 5);
-			if (++count == 8)
-				break;
-		}
-	}
-
-	if (rates_len > count) {
-		pos = skb_put(skb, rates_len - count + 2);
-		*pos++ = WLAN_EID_EXT_SUPP_RATES;
-		*pos++ = rates_len - count;
-
-		for (i++; i < sband->n_bitrates; i++) {
-			if (BIT(i) & rates) {
-				int rate = sband->bitrates[i].bitrate;
-				*pos++ = (u8) (rate / 5);
-			}
-		}
-	}
-
-	if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
-		/* 1. power capabilities */
-		pos = skb_put(skb, 4);
-		*pos++ = WLAN_EID_PWR_CAPABILITY;
-		*pos++ = 2;
-		*pos++ = 0; /* min tx power */
-		*pos++ = local->hw.conf.channel->max_power; /* max tx power */
-
-		/* 2. supported channels */
-		/* TODO: get this in reg domain format */
-		pos = skb_put(skb, 2 * sband->n_channels + 2);
-		*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
-		*pos++ = 2 * sband->n_channels;
-		for (i = 0; i < sband->n_channels; i++) {
-			*pos++ = ieee80211_frequency_to_channel(
-					sband->channels[i].center_freq);
-			*pos++ = 1; /* one channel in the subband*/
-		}
-	}
-
-	if (ifsta->extra_ie) {
-		pos = skb_put(skb, ifsta->extra_ie_len);
-		memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len);
+
+		ieee80211_rx_bss_put(local, bss);
 	}
 
-	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
-		pos = skb_put(skb, 9);
-		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
-		*pos++ = 7; /* len */
-		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
-		*pos++ = 0x50;
-		*pos++ = 0xf2;
-		*pos++ = 2; /* WME */
-		*pos++ = 0; /* WME info */
-		*pos++ = 1; /* WME ver */
-		*pos++ = 0;
+	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
+		changed |= BSS_CHANGED_HT;
+		sdata->bss_conf.assoc_ht = 1;
+		sdata->bss_conf.ht_conf = &conf->ht_conf;
+		sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
 	}
 
-	/* wmm support is a must to HT */
-	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
-	    sband->ht_info.ht_supported &&
-	    (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) {
-		struct ieee80211_ht_addt_info *ht_add_info =
-			(struct ieee80211_ht_addt_info *)ht_add_ie;
-		u16 cap = sband->ht_info.cap;
-		__le16 tmp;
-		u32 flags = local->hw.conf.channel->flags;
+	ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
+	memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
+	ieee80211_sta_send_associnfo(sdata, ifsta);
 
-		switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_IE_CHA_SEC_ABOVE:
-			if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		case IEEE80211_HT_IE_CHA_SEC_BELOW:
-			if (flags & IEEE80211_CHAN_NO_FAT_BELOW) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
-				cap &= ~IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		}
+	ifsta->last_probe = jiffies;
+	ieee80211_led_assoc(local, 1);
 
-		tmp = cpu_to_le16(cap);
-		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
-		*pos++ = WLAN_EID_HT_CAPABILITY;
-		*pos++ = sizeof(struct ieee80211_ht_cap);
-		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
-		memcpy(pos, &tmp, sizeof(u16));
-		pos += sizeof(u16);
-		/* TODO: needs a define here for << 2 */
-		*pos++ = sband->ht_info.ampdu_factor |
-			 (sband->ht_info.ampdu_density << 2);
-		memcpy(pos, sband->ht_info.supp_mcs_set, 16);
-	}
+	sdata->bss_conf.assoc = 1;
+	ieee80211_bss_info_change_notify(sdata, changed);
 
-	kfree(ifsta->assocreq_ies);
-	ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
-	ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL);
-	if (ifsta->assocreq_ies)
-		memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len);
+	netif_tx_start_all_queues(sdata->dev);
+	netif_carrier_on(sdata->dev);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_sta_send_apinfo(sdata, ifsta);
 }
 
-
-static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
-				  struct ieee80211_if_sta *ifsta, u16 reason)
+static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
+	DECLARE_MAC_BUF(mac);
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for deauth "
-		       "frame\n", sdata->dev->name);
+	ifsta->direct_probe_tries++;
+	if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n",
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		return;
 	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_DEAUTH);
-	skb_put(skb, 2);
-	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
+	printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n",
+			sdata->dev->name, print_mac(mac, ifsta->bssid),
+			ifsta->direct_probe_tries);
 
-	ieee80211_sta_tx(sdata, skb, 0);
-}
+	ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
 
-static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
-{
-	if (!sdata || !sdata->default_key ||
-	    sdata->default_key->conf.alg != ALG_WEP)
-		return 0;
-	return 1;
+	set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request);
+
+	/* Direct probe is sent to broadcast address as some APs
+	 * will not answer to direct packet in unassociated state.
+	 */
+	ieee80211_send_probe_req(sdata, NULL,
+				 ifsta->ssid, ifsta->ssid_len);
+
+	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
 }
 
-static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
-				    struct ieee80211_if_sta *ifsta, u16 reason)
+
+static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
+				   struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
+	DECLARE_MAC_BUF(mac);
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc "
-		       "frame\n", sdata->dev->name);
+	ifsta->auth_tries++;
+	if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
+		printk(KERN_DEBUG "%s: authentication with AP %s"
+		       " timed out\n",
+		       sdata->dev->name, print_mac(mac, ifsta->bssid));
+		ifsta->state = IEEE80211_STA_MLME_DISABLED;
 		return;
 	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
 
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_DISASSOC);
-	skb_put(skb, 2);
-	mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
+	ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+	printk(KERN_DEBUG "%s: authenticate with AP %s\n",
+	       sdata->dev->name, print_mac(mac, ifsta->bssid));
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0);
+
+	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
 }
 
 static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
@@ -864,6 +949,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	sta_info_destroy(sta);
 }
 
+static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
+{
+	if (!sdata || !sdata->default_key ||
+	    sdata->default_key->conf.alg != ALG_WEP)
+		return 0;
+	return 1;
+}
+
 static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_if_sta *ifsta)
 {
@@ -1009,54 +1102,6 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 			    elems.challenge_len + 2, 1);
 }
 
-static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
-					u8 dialog_token, u16 status, u16 policy,
-					u16 buf_size, u16 timeout)
-{
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u16 capab;
-
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer "
-		       "for addba resp frame\n", sdata->dev->name);
-		return;
-	}
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
-	else
-		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
-	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
-	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
-
-	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
-	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
-	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
-
-	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
-	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
-	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-
-	return;
-}
-
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
@@ -1329,53 +1374,6 @@ static void ieee80211_sta_process_delba(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 }
 
-static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
-					struct ieee80211_msrment_ie *request_ie,
-					const u8 *da, const u8 *bssid,
-					u8 dialog_token)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *msr_report;
-
-	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
-				sizeof(struct ieee80211_msrment_ie));
-
-	if (!skb) {
-		printk(KERN_ERR "%s: failed to allocate buffer for "
-				"measurement report frame\n", sdata->dev->name);
-		return;
-	}
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
-	memset(msr_report, 0, 24);
-	memcpy(msr_report->da, da, ETH_ALEN);
-	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(msr_report->bssid, bssid, ETH_ALEN);
-	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
-	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
-	msr_report->u.action.u.measurement.action_code =
-				WLAN_ACTION_SPCT_MSR_RPRT;
-	msr_report->u.action.u.measurement.dialog_token = dialog_token;
-
-	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
-	msr_report->u.action.u.measurement.length =
-			sizeof(struct ieee80211_msrment_ie);
-
-	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
-		sizeof(struct ieee80211_msrment_ie));
-	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
-	msr_report->u.action.u.measurement.msr_elem.mode |=
-			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
-	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
 static void ieee80211_sta_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 						struct ieee80211_mgmt *mgmt,
 						size_t len)
-- 
cgit v1.2.3-70-g09d2


From ef422bc0ae934e6a46dfa63f0e27cad83b94234f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 10:58:25 +0200
Subject: mac80211: consolidate deauth/disassoc

deauth and disassoc frames are completely identical so there's
little point in having two functions to send them rather than
one that gets a parameter. This same a bit of code size.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 47 +++++++++++------------------------------------
 1 file changed, 11 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e917e1b7263..62357a20885 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -416,17 +416,18 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 }
 
 
-static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
-				  struct ieee80211_if_sta *ifsta, u16 reason)
+static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
+					   u16 stype, u16 reason)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
 	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for deauth "
-		       "frame\n", sdata->dev->name);
+		printk(KERN_DEBUG "%s: failed to allocate buffer for "
+		       "deauth/disassoc frame\n", sdata->dev->name);
 		return;
 	}
 	skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -436,42 +437,14 @@ static void ieee80211_send_deauth(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_DEAUTH);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
 	skb_put(skb, 2);
+	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_send_disassoc(struct ieee80211_sub_if_data *sdata,
-				    struct ieee80211_if_sta *ifsta, u16 reason)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt));
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc "
-		       "frame\n", sdata->dev->name);
-		return;
-	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_DISASSOC);
-	skb_put(skb, 2);
-	mgmt->u.disassoc.reason_code = cpu_to_le16(reason);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
 					u8 dialog_token, u16 status, u16 policy,
 					u16 buf_size, u16 timeout)
@@ -919,9 +892,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	if (self_disconnected) {
 		if (deauth)
-			ieee80211_send_deauth(sdata, ifsta, reason);
+			ieee80211_send_deauth_disassoc(sdata,
+				IEEE80211_STYPE_DEAUTH, reason);
 		else
-			ieee80211_send_disassoc(sdata, ifsta, reason);
+			ieee80211_send_deauth_disassoc(sdata,
+				IEEE80211_STYPE_DISASSOC, reason);
 	}
 
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
-- 
cgit v1.2.3-70-g09d2


From 3d35f7c6874d83063d19de0cdb4e503ff4471098 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 12:54:11 +0200
Subject: mac80211: split ieee80211_sta_def_wmm_params

Cleans up the code a bit and prepares for the next patch
that will use the function elsewhere.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 48 +++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 62357a20885..f754ad273f9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -539,13 +539,38 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 }
 
 /* MLME */
+static void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_tx_queue_params qparam;
+	int i;
+
+	if (!local->ops->conf_tx)
+		return;
+
+	memset(&qparam, 0, sizeof(qparam));
+
+	qparam.aifs = 2;
+
+	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
+	    !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE))
+		qparam.cw_min = 31;
+	else
+		qparam.cw_min = 15;
+
+	qparam.cw_max = 1023;
+	qparam.txop = 0;
+
+	for (i = 0; i < local_to_hw(local)->queues; i++)
+		local->ops->conf_tx(local_to_hw(local), i, &qparam);
+}
+
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss)
 {
 	struct ieee80211_local *local = sdata->local;
 	int i, have_higher_than_11mbit = 0;
 
-
 	/* cf. IEEE 802.11 9.2.12 */
 	for (i = 0; i < bss->supp_rates_len; i++)
 		if ((bss->supp_rates[i] & 0x7f) * 5 > 110)
@@ -557,26 +582,7 @@ static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 	else
 		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
 
-
-	if (local->ops->conf_tx) {
-		struct ieee80211_tx_queue_params qparam;
-
-		memset(&qparam, 0, sizeof(qparam));
-
-		qparam.aifs = 2;
-
-		if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
-		    !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE))
-			qparam.cw_min = 31;
-		else
-			qparam.cw_min = 15;
-
-		qparam.cw_max = 1023;
-		qparam.txop = 0;
-
-		for (i = 0; i < local_to_hw(local)->queues; i++)
-			local->ops->conf_tx(local_to_hw(local), i, &qparam);
-	}
+	ieee80211_set_wmm_default(sdata);
 }
 
 static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
-- 
cgit v1.2.3-70-g09d2


From 5825fe100d654fff89aa67a1e202af1f8a7f0ad0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 12:56:01 +0200
Subject: mac80211: initialise queue QoS parameters at hw start

When hardware is started it might be in a confused state with
respect to queue QoS parameters. This patch changes mac80211
to set sane defaults right after the hardware is brought up.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/main.c        |  9 ++++++++-
 net/mac80211/mlme.c        | 26 --------------------------
 net/mac80211/util.c        | 26 ++++++++++++++++++++++++++
 4 files changed, 35 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 792c09ca08e..b10e70715db 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -987,6 +987,7 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
 				     struct ieee80211_hdr *hdr);
+void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f90254a5948..6a7f4fae18c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -399,8 +399,15 @@ static int ieee80211_open(struct net_device *dev)
 		atomic_inc(&local->iff_promiscs);
 
 	local->open_count++;
-	if (need_hw_reconfig)
+	if (need_hw_reconfig) {
 		ieee80211_hw_config(local);
+		/*
+		 * set default queue parameters so drivers don't
+		 * need to initialise the hardware if the hardware
+		 * doesn't start up with sane defaults
+		 */
+		ieee80211_set_wmm_default(sdata);
+	}
 
 	/*
 	 * ieee80211_sta_work is disabled while network interface
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f754ad273f9..c22dcd605e4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -539,32 +539,6 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 }
 
 /* MLME */
-static void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_tx_queue_params qparam;
-	int i;
-
-	if (!local->ops->conf_tx)
-		return;
-
-	memset(&qparam, 0, sizeof(qparam));
-
-	qparam.aifs = 2;
-
-	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
-	    !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE))
-		qparam.cw_min = 31;
-	else
-		qparam.cw_min = 15;
-
-	qparam.cw_max = 1023;
-	qparam.txop = 0;
-
-	for (i = 0; i < local_to_hw(local)->queues; i++)
-		local->ops->conf_tx(local_to_hw(local), i, &qparam);
-}
-
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss)
 {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e19c74cada3..55be3ef5c75 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -572,3 +572,29 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 		pos += elen;
 	}
 }
+
+void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_tx_queue_params qparam;
+	int i;
+
+	if (!local->ops->conf_tx)
+		return;
+
+	memset(&qparam, 0, sizeof(qparam));
+
+	qparam.aifs = 2;
+
+	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
+	    !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE))
+		qparam.cw_min = 31;
+	else
+		qparam.cw_min = 15;
+
+	qparam.cw_max = 1023;
+	qparam.txop = 0;
+
+	for (i = 0; i < local_to_hw(local)->queues; i++)
+		local->ops->conf_tx(local_to_hw(local), i, &qparam);
+}
-- 
cgit v1.2.3-70-g09d2


From de1ede7ac3bd300f9aa565d0f93f6cf9ba74bb1a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 14:42:50 +0200
Subject: mac80211: make BA session handling independent of STA mode

The aggregation handling isn't dependent on anything related to our
STA-mode implementation, and doesn't need to depend on it for frame
processing. This patch moves the relevant code to ht.c and adds a
hook in rx.c. For now, the relevant action frames are only processed
in STA/IBSS modes, but that's now something we can easily change.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ht.c          | 303 +++++++++++++++++++++++++++++++++++++++-
 net/mac80211/ieee80211_i.h |  39 +++---
 net/mac80211/mlme.c        | 340 ---------------------------------------------
 net/mac80211/rx.c          |  60 ++++++++
 4 files changed, 380 insertions(+), 362 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c72b3fe3ccd..7e93e1079ad 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -66,9 +66,10 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 	return 0;
 }
 
-void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
-				u16 tid, u8 dialog_token, u16 start_seq_num,
-				u16 agg_size, u16 timeout)
+static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
+					 const u8 *da, u16 tid,
+					 u8 dialog_token, u16 start_seq_num,
+					 u16 agg_size, u16 timeout)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -115,8 +116,55 @@ void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
-			  u16 initiator, u16 reason_code)
+static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
+				      u8 dialog_token, u16 status, u16 policy,
+				      u16 buf_size, u16 timeout)
+{
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+	u16 capab;
+
+	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer "
+		       "for addba resp frame\n", sdata->dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+	memset(mgmt, 0, 24);
+	memcpy(mgmt->da, da, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
+	mgmt->u.action.category = WLAN_CATEGORY_BACK;
+	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
+	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
+
+	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
+	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
+	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
+
+	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
+	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
+	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
+				 const u8 *da, u16 tid,
+				 u16 initiator, u16 reason_code)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -263,7 +311,7 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r
  * add Block Ack response will arrive from the recipient.
  * If this timer expires sta_addba_resp_timer_expired will be executed.
  */
-void sta_addba_resp_timer_expired(unsigned long data)
+static void sta_addba_resp_timer_expired(unsigned long data)
 {
 	/* not an elegant detour, but there is no choice as the timer passes
 	 * only one argument, and both sta_info and TID are needed, so init
@@ -699,3 +747,246 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
 	tasklet_schedule(&local->tasklet);
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
+
+/*
+ * After accepting the AddBA Request we activated a timer,
+ * resetting it after each frame that arrives from the originator.
+ * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
+ */
+static void sta_rx_agg_session_timer_expired(unsigned long data)
+{
+	/* not an elegant detour, but there is no choice as the timer passes
+	 * only one argument, and various sta_info are needed here, so init
+	 * flow in sta_info_create gives the TID as data, while the timer_to_id
+	 * array gives the sta through container_of */
+	u8 *ptid = (u8 *)data;
+	u8 *timer_to_id = ptid - *ptid;
+	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
+					 timer_to_tid[0]);
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
+#endif
+	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
+					 (u16)*ptid, WLAN_BACK_TIMER,
+					 WLAN_REASON_QSTA_TIMEOUT);
+}
+
+void ieee80211_process_addba_request(struct ieee80211_local *local,
+				     struct sta_info *sta,
+				     struct ieee80211_mgmt *mgmt,
+				     size_t len)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	struct ieee80211_conf *conf = &hw->conf;
+	struct tid_ampdu_rx *tid_agg_rx;
+	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
+	u8 dialog_token;
+	int ret = -EOPNOTSUPP;
+	DECLARE_MAC_BUF(mac);
+
+	/* extract session parameters from addba request frame */
+	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
+	timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+	start_seq_num =
+		le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
+
+	capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+	ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
+	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+
+	status = WLAN_STATUS_REQUEST_DECLINED;
+
+	/* sanity check for incoming parameters:
+	 * check if configuration can support the BA policy
+	 * and if buffer size does not exceeds max value */
+	if (((ba_policy != 1)
+		&& (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
+		|| (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+		status = WLAN_STATUS_INVALID_QOS_PARAM;
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "AddBA Req with bad params from "
+				"%s on tid %u. policy %d, buffer size %d\n",
+				print_mac(mac, mgmt->sa), tid, ba_policy,
+				buf_size);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		goto end_no_lock;
+	}
+	/* determine default buffer size */
+	if (buf_size == 0) {
+		struct ieee80211_supported_band *sband;
+
+		sband = local->hw.wiphy->bands[conf->channel->band];
+		buf_size = IEEE80211_MIN_AMPDU_BUF;
+		buf_size = buf_size << sband->ht_info.ampdu_factor;
+	}
+
+
+	/* examine state machine */
+	spin_lock_bh(&sta->lock);
+
+	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_DEBUG "unexpected AddBA Req from "
+				"%s on tid %u\n",
+				print_mac(mac, mgmt->sa), tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		goto end;
+	}
+
+	/* prepare A-MPDU MLME for Rx aggregation */
+	sta->ampdu_mlme.tid_rx[tid] =
+			kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
+	if (!sta->ampdu_mlme.tid_rx[tid]) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
+					tid);
+#endif
+		goto end;
+	}
+	/* rx timer */
+	sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
+				sta_rx_agg_session_timer_expired;
+	sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
+				(unsigned long)&sta->timer_to_tid[tid];
+	init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
+
+	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
+
+	/* prepare reordering buffer */
+	tid_agg_rx->reorder_buf =
+		kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
+	if (!tid_agg_rx->reorder_buf) {
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		if (net_ratelimit())
+			printk(KERN_ERR "can not allocate reordering buffer "
+			       "to tid %d\n", tid);
+#endif
+		kfree(sta->ampdu_mlme.tid_rx[tid]);
+		goto end;
+	}
+	memset(tid_agg_rx->reorder_buf, 0,
+		buf_size * sizeof(struct sk_buff *));
+
+	if (local->ops->ampdu_action)
+		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
+					       sta->addr, tid, &start_seq_num);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	if (ret) {
+		kfree(tid_agg_rx->reorder_buf);
+		kfree(tid_agg_rx);
+		sta->ampdu_mlme.tid_rx[tid] = NULL;
+		goto end;
+	}
+
+	/* change state and send addba resp */
+	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
+	tid_agg_rx->dialog_token = dialog_token;
+	tid_agg_rx->ssn = start_seq_num;
+	tid_agg_rx->head_seq_num = start_seq_num;
+	tid_agg_rx->buf_size = buf_size;
+	tid_agg_rx->timeout = timeout;
+	tid_agg_rx->stored_mpdu_num = 0;
+	status = WLAN_STATUS_SUCCESS;
+end:
+	spin_unlock_bh(&sta->lock);
+
+end_no_lock:
+	ieee80211_send_addba_resp(sta->sdata, sta->addr, tid,
+				  dialog_token, status, 1, buf_size, timeout);
+}
+
+void ieee80211_process_addba_resp(struct ieee80211_local *local,
+				  struct sta_info *sta,
+				  struct ieee80211_mgmt *mgmt,
+				  size_t len)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	u16 capab;
+	u16 tid;
+	u8 *state;
+
+	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
+	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+
+	state = &sta->ampdu_mlme.tid_state_tx[tid];
+
+	spin_lock_bh(&sta->lock);
+
+	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
+		spin_unlock_bh(&sta->lock);
+		return;
+	}
+
+	if (mgmt->u.action.u.addba_resp.dialog_token !=
+		sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
+		spin_unlock_bh(&sta->lock);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+		return;
+	}
+
+	del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
+			== WLAN_STATUS_SUCCESS) {
+		*state |= HT_ADDBA_RECEIVED_MSK;
+		sta->ampdu_mlme.addba_req_num[tid] = 0;
+
+		if (*state == HT_AGG_STATE_OPERATIONAL)
+			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
+
+		spin_unlock_bh(&sta->lock);
+	} else {
+		sta->ampdu_mlme.addba_req_num[tid]++;
+		/* this will allow the state check in stop_BA_session */
+		*state = HT_AGG_STATE_OPERATIONAL;
+		spin_unlock_bh(&sta->lock);
+		ieee80211_stop_tx_ba_session(hw, sta->addr, tid,
+					     WLAN_BACK_INITIATOR);
+	}
+}
+
+void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
+			     struct sta_info *sta,
+			     struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_local *local = sdata->local;
+	u16 tid, params;
+	u16 initiator;
+	DECLARE_MAC_BUF(mac);
+
+	params = le16_to_cpu(mgmt->u.action.u.delba.params);
+	tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
+	initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
+
+#ifdef CONFIG_MAC80211_HT_DEBUG
+	if (net_ratelimit())
+		printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n",
+			print_mac(mac, mgmt->sa),
+			initiator ? "initiator" : "recipient", tid,
+			mgmt->u.action.u.delba.reason_code);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
+
+	if (initiator == WLAN_BACK_INITIATOR)
+		ieee80211_sta_stop_rx_ba_session(sdata, sta->addr, tid,
+						 WLAN_BACK_INITIATOR, 0);
+	else { /* WLAN_BACK_RECIPIENT */
+		spin_lock_bh(&sta->lock);
+		sta->ampdu_mlme.tid_state_tx[tid] =
+				HT_AGG_STATE_OPERATIONAL;
+		spin_unlock_bh(&sta->lock);
+		ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid,
+					     WLAN_BACK_RECIPIENT);
+	}
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b10e70715db..60ec7ad2764 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -909,22 +909,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
-int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
-				   struct ieee80211_ht_info *ht_info);
-int ieee80211_ht_addt_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_addt_info *ht_add_info_ie,
-			struct ieee80211_ht_bss_info *bss_info);
-void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da,
-				  u16 tid, u8 dialog_token, u16 start_seq_num,
-				  u16 agg_size, u16 timeout);
-void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid,
-				u16 initiator, u16 reason_code);
-void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
-
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
-				u16 tid, u16 initiator, u16 reason);
-void sta_addba_resp_timer_expired(unsigned long data);
-void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr);
 u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
@@ -977,6 +961,29 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
+/* HT */
+int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
+				   struct ieee80211_ht_info *ht_info);
+int ieee80211_ht_addt_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_addt_info *ht_add_info_ie,
+			struct ieee80211_ht_bss_info *bss_info);
+void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
+
+void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
+				u16 tid, u16 initiator, u16 reason);
+void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr);
+void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
+			     struct sta_info *sta,
+			     struct ieee80211_mgmt *mgmt, size_t len);
+void ieee80211_process_addba_resp(struct ieee80211_local *local,
+				  struct sta_info *sta,
+				  struct ieee80211_mgmt *mgmt,
+				  size_t len);
+void ieee80211_process_addba_request(struct ieee80211_local *local,
+				     struct sta_info *sta,
+				     struct ieee80211_mgmt *mgmt,
+				     size_t len);
+
 /* utility functions/constants */
 extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c22dcd605e4..25f90f7ccf3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -445,52 +445,6 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
-					u8 dialog_token, u16 status, u16 policy,
-					u16 buf_size, u16 timeout)
-{
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
-	u16 capab;
-
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer "
-		       "for addba resp frame\n", sdata->dev->name);
-		return;
-	}
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
-	memset(mgmt, 0, 24);
-	memcpy(mgmt->da, da, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
-		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
-	else
-		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
-	mgmt->u.action.category = WLAN_CATEGORY_BACK;
-	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
-	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
-
-	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
-	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
-	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
-
-	mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
-	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
-	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
 static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
 					struct ieee80211_msrment_ie *request_ie,
 					const u8 *da, const u8 *bssid,
@@ -1057,278 +1011,6 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 			    elems.challenge_len + 2, 1);
 }
 
-/*
- * After accepting the AddBA Request we activated a timer,
- * resetting it after each frame that arrives from the originator.
- * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
- */
-static void sta_rx_agg_session_timer_expired(unsigned long data)
-{
-	/* not an elegant detour, but there is no choice as the timer passes
-	 * only one argument, and various sta_info are needed here, so init
-	 * flow in sta_info_create gives the TID as data, while the timer_to_id
-	 * array gives the sta through container_of */
-	u8 *ptid = (u8 *)data;
-	u8 *timer_to_id = ptid - *ptid;
-	struct sta_info *sta = container_of(timer_to_id, struct sta_info,
-					 timer_to_tid[0]);
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
-#endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
-					 (u16)*ptid, WLAN_BACK_TIMER,
-					 WLAN_REASON_QSTA_TIMEOUT);
-}
-
-static void ieee80211_sta_process_addba_request(struct ieee80211_local *local,
-						struct ieee80211_mgmt *mgmt,
-						size_t len)
-{
-	struct ieee80211_hw *hw = &local->hw;
-	struct ieee80211_conf *conf = &hw->conf;
-	struct sta_info *sta;
-	struct tid_ampdu_rx *tid_agg_rx;
-	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
-	u8 dialog_token;
-	int ret = -EOPNOTSUPP;
-	DECLARE_MAC_BUF(mac);
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, mgmt->sa);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	/* extract session parameters from addba request frame */
-	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
-	timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
-	start_seq_num =
-		le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
-
-	capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
-	ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
-	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
-	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
-
-	status = WLAN_STATUS_REQUEST_DECLINED;
-
-	/* sanity check for incoming parameters:
-	 * check if configuration can support the BA policy
-	 * and if buffer size does not exceeds max value */
-	if (((ba_policy != 1)
-		&& (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
-		|| (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
-		status = WLAN_STATUS_INVALID_QOS_PARAM;
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "AddBA Req with bad params from "
-				"%s on tid %u. policy %d, buffer size %d\n",
-				print_mac(mac, mgmt->sa), tid, ba_policy,
-				buf_size);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto end_no_lock;
-	}
-	/* determine default buffer size */
-	if (buf_size == 0) {
-		struct ieee80211_supported_band *sband;
-
-		sband = local->hw.wiphy->bands[conf->channel->band];
-		buf_size = IEEE80211_MIN_AMPDU_BUF;
-		buf_size = buf_size << sband->ht_info.ampdu_factor;
-	}
-
-
-	/* examine state machine */
-	spin_lock_bh(&sta->lock);
-
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_DEBUG "unexpected AddBA Req from "
-				"%s on tid %u\n",
-				print_mac(mac, mgmt->sa), tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto end;
-	}
-
-	/* prepare A-MPDU MLME for Rx aggregation */
-	sta->ampdu_mlme.tid_rx[tid] =
-			kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
-	if (!sta->ampdu_mlme.tid_rx[tid]) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
-					tid);
-#endif
-		goto end;
-	}
-	/* rx timer */
-	sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
-				sta_rx_agg_session_timer_expired;
-	sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
-				(unsigned long)&sta->timer_to_tid[tid];
-	init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
-
-	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
-
-	/* prepare reordering buffer */
-	tid_agg_rx->reorder_buf =
-		kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
-	if (!tid_agg_rx->reorder_buf) {
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		if (net_ratelimit())
-			printk(KERN_ERR "can not allocate reordering buffer "
-			       "to tid %d\n", tid);
-#endif
-		kfree(sta->ampdu_mlme.tid_rx[tid]);
-		goto end;
-	}
-	memset(tid_agg_rx->reorder_buf, 0,
-		buf_size * sizeof(struct sk_buff *));
-
-	if (local->ops->ampdu_action)
-		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
-					       sta->addr, tid, &start_seq_num);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	if (ret) {
-		kfree(tid_agg_rx->reorder_buf);
-		kfree(tid_agg_rx);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
-		goto end;
-	}
-
-	/* change state and send addba resp */
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
-	tid_agg_rx->dialog_token = dialog_token;
-	tid_agg_rx->ssn = start_seq_num;
-	tid_agg_rx->head_seq_num = start_seq_num;
-	tid_agg_rx->buf_size = buf_size;
-	tid_agg_rx->timeout = timeout;
-	tid_agg_rx->stored_mpdu_num = 0;
-	status = WLAN_STATUS_SUCCESS;
-end:
-	spin_unlock_bh(&sta->lock);
-
-end_no_lock:
-	ieee80211_send_addba_resp(sta->sdata, sta->addr, tid,
-				  dialog_token, status, 1, buf_size, timeout);
-	rcu_read_unlock();
-}
-
-static void ieee80211_sta_process_addba_resp(struct ieee80211_local *local,
-					     struct ieee80211_mgmt *mgmt,
-					     size_t len)
-{
-	struct ieee80211_hw *hw = &local->hw;
-	struct sta_info *sta;
-	u16 capab;
-	u16 tid;
-	u8 *state;
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, mgmt->sa);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
-	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
-
-	state = &sta->ampdu_mlme.tid_state_tx[tid];
-
-	spin_lock_bh(&sta->lock);
-
-	if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
-		spin_unlock_bh(&sta->lock);
-		goto addba_resp_exit;
-	}
-
-	if (mgmt->u.action.u.addba_resp.dialog_token !=
-		sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
-		spin_unlock_bh(&sta->lock);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto addba_resp_exit;
-	}
-
-	del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
-			== WLAN_STATUS_SUCCESS) {
-		*state |= HT_ADDBA_RECEIVED_MSK;
-		sta->ampdu_mlme.addba_req_num[tid] = 0;
-
-		if (*state == HT_AGG_STATE_OPERATIONAL)
-			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
-
-		spin_unlock_bh(&sta->lock);
-	} else {
-		sta->ampdu_mlme.addba_req_num[tid]++;
-		/* this will allow the state check in stop_BA_session */
-		*state = HT_AGG_STATE_OPERATIONAL;
-		spin_unlock_bh(&sta->lock);
-		ieee80211_stop_tx_ba_session(hw, sta->addr, tid,
-					     WLAN_BACK_INITIATOR);
-	}
-
-addba_resp_exit:
-	rcu_read_unlock();
-}
-
-static void ieee80211_sta_process_delba(struct ieee80211_sub_if_data *sdata,
-			struct ieee80211_mgmt *mgmt, size_t len)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
-	u16 tid, params;
-	u16 initiator;
-	DECLARE_MAC_BUF(mac);
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, mgmt->sa);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	params = le16_to_cpu(mgmt->u.action.u.delba.params);
-	tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
-	initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
-
-#ifdef CONFIG_MAC80211_HT_DEBUG
-	if (net_ratelimit())
-		printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n",
-			print_mac(mac, mgmt->sa),
-			initiator ? "initiator" : "recipient", tid,
-			mgmt->u.action.u.delba.reason_code);
-#endif /* CONFIG_MAC80211_HT_DEBUG */
-
-	if (initiator == WLAN_BACK_INITIATOR)
-		ieee80211_sta_stop_rx_ba_session(sdata, sta->addr, tid,
-						 WLAN_BACK_INITIATOR, 0);
-	else { /* WLAN_BACK_RECIPIENT */
-		spin_lock_bh(&sta->lock);
-		sta->ampdu_mlme.tid_state_tx[tid] =
-				HT_AGG_STATE_OPERATIONAL;
-		spin_unlock_bh(&sta->lock);
-		ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid,
-					     WLAN_BACK_RECIPIENT);
-	}
-	rcu_read_unlock();
-}
-
 static void ieee80211_sta_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 						struct ieee80211_mgmt *mgmt,
 						size_t len)
@@ -2207,28 +1889,6 @@ static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 			break;
 		}
 		break;
-	case WLAN_CATEGORY_BACK:
-		switch (mgmt->u.action.u.addba_req.action_code) {
-		case WLAN_ACTION_ADDBA_REQ:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.addba_req)))
-				break;
-			ieee80211_sta_process_addba_request(local, mgmt, len);
-			break;
-		case WLAN_ACTION_ADDBA_RESP:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.addba_resp)))
-				break;
-			ieee80211_sta_process_addba_resp(local, mgmt, len);
-			break;
-		case WLAN_ACTION_DELBA:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.delba)))
-				break;
-			ieee80211_sta_process_delba(sdata, mgmt, len);
-			break;
-		}
-		break;
 	case PLINK_CATEGORY:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7e09b30dd39..71cce0bcc5b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1510,6 +1510,65 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_local *local = rx->local;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	int len = rx->skb->len;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return RX_CONTINUE;
+
+	if (!rx->sta)
+		return RX_DROP_MONITOR;
+
+	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+		return RX_DROP_MONITOR;
+
+	/* all categories we currently handle have action_code */
+	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+		return RX_DROP_MONITOR;
+
+	/*
+	 * FIXME: revisit this, I'm sure we should handle most
+	 *	  of these frames in other modes as well!
+	 */
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+		return RX_DROP_MONITOR;
+
+	switch (mgmt->u.action.category) {
+	case WLAN_CATEGORY_BACK:
+		switch (mgmt->u.action.u.addba_req.action_code) {
+		case WLAN_ACTION_ADDBA_REQ:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.addba_req)))
+				return RX_DROP_MONITOR;
+			ieee80211_process_addba_request(local, rx->sta, mgmt, len);
+			break;
+		case WLAN_ACTION_ADDBA_RESP:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.addba_resp)))
+				return RX_DROP_MONITOR;
+			ieee80211_process_addba_resp(local, rx->sta, mgmt, len);
+			break;
+		case WLAN_ACTION_DELBA:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.delba)))
+				return RX_DROP_MONITOR;
+			ieee80211_process_delba(sdata, rx->sta, mgmt, len);
+			break;
+		}
+		rx->sta->rx_packets++;
+		dev_kfree_skb(rx->skb);
+		return RX_QUEUED;
+	}
+
+	return RX_CONTINUE;
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
@@ -1689,6 +1748,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 		CALL_RXH(ieee80211_rx_h_mesh_fwding);
 	CALL_RXH(ieee80211_rx_h_data)
 	CALL_RXH(ieee80211_rx_h_ctrl)
+	CALL_RXH(ieee80211_rx_h_action)
 	CALL_RXH(ieee80211_rx_h_mgmt)
 
 #undef CALL_RXH
-- 
cgit v1.2.3-70-g09d2


From 39192c0bcf556c8521dcf0203714e9d48ac0b9f6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 14:49:03 +0200
Subject: mac80211: move spectrum management code out

Like the HT code, this doesn't depend on the STA-mode implementation
and can be handled entirely independently. There's only stub code
for now, but when it gets filled having it in its own file will be
beneficial.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Makefile      |  1 +
 net/mac80211/ieee80211_i.h |  5 +++
 net/mac80211/mlme.c        | 89 +++-------------------------------------------
 net/mac80211/rx.c          | 22 +++++++++---
 net/mac80211/spectmgmt.c   | 86 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 89 deletions(-)
 create mode 100644 net/mac80211/spectmgmt.c

(limited to 'net')

diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 1a7ac50910c..2dc8f2bff27 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -17,6 +17,7 @@ mac80211-y := \
 	aes_ccm.o \
 	cfg.o \
 	rx.o \
+	spectmgmt.o \
 	tx.o \
 	key.o \
 	util.o \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 60ec7ad2764..b2ca9e6122c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -984,6 +984,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     struct ieee80211_mgmt *mgmt,
 				     size_t len);
 
+/* Spectrum management */
+void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
+				       struct ieee80211_mgmt *mgmt,
+				       size_t len);
+
 /* utility functions/constants */
 extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 25f90f7ccf3..f1ee9d22cf4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -445,53 +445,6 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_tx(sdata, skb, 0);
 }
 
-static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
-					struct ieee80211_msrment_ie *request_ie,
-					const u8 *da, const u8 *bssid,
-					u8 dialog_token)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *msr_report;
-
-	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
-				sizeof(struct ieee80211_msrment_ie));
-
-	if (!skb) {
-		printk(KERN_ERR "%s: failed to allocate buffer for "
-				"measurement report frame\n", sdata->dev->name);
-		return;
-	}
-
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
-	memset(msr_report, 0, 24);
-	memcpy(msr_report->da, da, ETH_ALEN);
-	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(msr_report->bssid, bssid, ETH_ALEN);
-	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-						IEEE80211_STYPE_ACTION);
-
-	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
-	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
-	msr_report->u.action.u.measurement.action_code =
-				WLAN_ACTION_SPCT_MSR_RPRT;
-	msr_report->u.action.u.measurement.dialog_token = dialog_token;
-
-	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
-	msr_report->u.action.u.measurement.length =
-			sizeof(struct ieee80211_msrment_ie);
-
-	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
-		sizeof(struct ieee80211_msrment_ie));
-	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
-	msr_report->u.action.u.measurement.msr_elem.mode |=
-			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
-	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
-
-	ieee80211_sta_tx(sdata, skb, 0);
-}
-
 /* MLME */
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_sta_bss *bss)
@@ -1011,24 +964,6 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 			    elems.challenge_len + 2, 1);
 }
 
-static void ieee80211_sta_process_measurement_req(struct ieee80211_sub_if_data *sdata,
-						struct ieee80211_mgmt *mgmt,
-						size_t len)
-{
-	/*
-	 * Ignoring measurement request is spec violation.
-	 * Mandatory measurements must be reported optional
-	 * measurements might be refused or reported incapable
-	 * For now just refuse
-	 * TODO: Answer basic measurement as unmeasured
-	 */
-	ieee80211_send_refuse_measurement_request(sdata,
-			&mgmt->u.action.u.measurement.msr_elem,
-			mgmt->sa, mgmt->bssid,
-			mgmt->u.action.u.measurement.dialog_token);
-}
-
-
 static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta,
 				   struct ieee80211_mgmt *mgmt,
@@ -1870,32 +1805,16 @@ static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 				     size_t len,
 				     struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_local *local = sdata->local;
-
-	/* all categories we currently handle have action_code */
-	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+	/* currently we only handle mesh interface action frames here */
+	if (!ieee80211_vif_is_mesh(&sdata->vif))
 		return;
 
 	switch (mgmt->u.action.category) {
-	case WLAN_CATEGORY_SPECTRUM_MGMT:
-		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
-			break;
-		switch (mgmt->u.action.u.measurement.action_code) {
-		case WLAN_ACTION_SPCT_MSR_REQ:
-			if (len < (IEEE80211_MIN_ACTION_SIZE +
-				   sizeof(mgmt->u.action.u.measurement)))
-				break;
-			ieee80211_sta_process_measurement_req(sdata, mgmt, len);
-			break;
-		}
-		break;
 	case PLINK_CATEGORY:
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
+		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
 	case MESH_PATH_SEL_CATEGORY:
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			mesh_rx_path_sel_frame(sdata, mgmt, len);
+		mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
 	}
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 71cce0bcc5b..d00ace78bf8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1561,12 +1561,26 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			ieee80211_process_delba(sdata, rx->sta, mgmt, len);
 			break;
 		}
-		rx->sta->rx_packets++;
-		dev_kfree_skb(rx->skb);
-		return RX_QUEUED;
+		break;
+	case WLAN_CATEGORY_SPECTRUM_MGMT:
+		if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
+			return RX_DROP_MONITOR;
+		switch (mgmt->u.action.u.measurement.action_code) {
+		case WLAN_ACTION_SPCT_MSR_REQ:
+			if (len < (IEEE80211_MIN_ACTION_SIZE +
+				   sizeof(mgmt->u.action.u.measurement)))
+				return RX_DROP_MONITOR;
+			ieee80211_process_measurement_req(sdata, mgmt, len);
+			break;
+		}
+		break;
+	default:
+		return RX_CONTINUE;
 	}
 
-	return RX_CONTINUE;
+	rx->sta->rx_packets++;
+	dev_kfree_skb(rx->skb);
+	return RX_QUEUED;
 }
 
 static ieee80211_rx_result debug_noinline
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
new file mode 100644
index 00000000000..b7129f4ae0c
--- /dev/null
+++ b/net/mac80211/spectmgmt.c
@@ -0,0 +1,86 @@
+/*
+ * spectrum management
+ *
+ * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright 2002-2005, Instant802 Networks, Inc.
+ * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2006-2007  Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2007-2008, Intel Corporation
+ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ieee80211.h>
+#include <net/wireless.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+#include "wme.h"
+
+static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_msrment_ie *request_ie,
+					const u8 *da, const u8 *bssid,
+					u8 dialog_token)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *msr_report;
+
+	skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom +
+				sizeof(struct ieee80211_msrment_ie));
+
+	if (!skb) {
+		printk(KERN_ERR "%s: failed to allocate buffer for "
+				"measurement report frame\n", sdata->dev->name);
+		return;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24);
+	memset(msr_report, 0, 24);
+	memcpy(msr_report->da, da, ETH_ALEN);
+	memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(msr_report->bssid, bssid, ETH_ALEN);
+	msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						IEEE80211_STYPE_ACTION);
+
+	skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement));
+	msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
+	msr_report->u.action.u.measurement.action_code =
+				WLAN_ACTION_SPCT_MSR_RPRT;
+	msr_report->u.action.u.measurement.dialog_token = dialog_token;
+
+	msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT;
+	msr_report->u.action.u.measurement.length =
+			sizeof(struct ieee80211_msrment_ie);
+
+	memset(&msr_report->u.action.u.measurement.msr_elem, 0,
+		sizeof(struct ieee80211_msrment_ie));
+	msr_report->u.action.u.measurement.msr_elem.token = request_ie->token;
+	msr_report->u.action.u.measurement.msr_elem.mode |=
+			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
+	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
+
+	ieee80211_sta_tx(sdata, skb, 0);
+}
+
+void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
+				       struct ieee80211_mgmt *mgmt,
+				       size_t len)
+{
+	/*
+	 * Ignoring measurement request is spec violation.
+	 * Mandatory measurements must be reported optional
+	 * measurements might be refused or reported incapable
+	 * For now just refuse
+	 * TODO: Answer basic measurement as unmeasured
+	 */
+	ieee80211_send_refuse_measurement_request(sdata,
+			&mgmt->u.action.u.measurement.msr_elem,
+			mgmt->sa, mgmt->bssid,
+			mgmt->u.action.u.measurement.dialog_token);
+}
-- 
cgit v1.2.3-70-g09d2


From 759ef3eb1eeba8ff7411771e7b9cf6bfd6bb9cfe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 14:55:09 +0200
Subject: mac80211: make ieee80211_rx_h_mgmt more readable

That function isn't exactly easy to read especially since it
does something in an if branch that continues after the if
because the else returns. Express it in a more readable way.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d00ace78bf8..d0803797902 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1586,20 +1586,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
 
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
-	if ((sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	     sdata->vif.type == IEEE80211_IF_TYPE_IBSS ||
-	     sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) &&
-	    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
-		ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
-	else
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+	    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+		return RX_DROP_MONITOR;
+
+	if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
 		return RX_DROP_MONITOR;
 
+	ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
 	return RX_QUEUED;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e50db65c0dad109aae77c353305853b31555b228 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Sep 2008 15:07:09 +0200
Subject: mac80211: move frame TX function

The ieee80211_sta_tx function isn't MLME code any more,
it's getting used by a lot of code. Move it to utils and
rename it to ieee80211_tx_skb.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ht.c          |  8 ++++----
 net/mac80211/ieee80211_i.h |  4 ++--
 net/mac80211/mesh_hwmp.c   |  4 ++--
 net/mac80211/mesh_plink.c  |  2 +-
 net/mac80211/mlme.c        | 24 +++++-------------------
 net/mac80211/scan.c        |  2 +-
 net/mac80211/spectmgmt.c   |  2 +-
 net/mac80211/util.c        | 14 ++++++++++++++
 8 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 7e93e1079ad..4dc35c9dabc 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -113,7 +113,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.action.u.addba_req.start_seq_num =
 					cpu_to_le16(start_seq_num << 4);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
@@ -159,7 +159,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
 	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
@@ -202,7 +202,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.action.u.delba.params = cpu_to_le16(params);
 	mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
@@ -231,7 +231,7 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 	bar->control = cpu_to_le16(bar_control);
 	bar->start_seq_num = cpu_to_le16(ssn);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b2ca9e6122c..6f334e4c3d6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -912,8 +912,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
-void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		int encrypt);
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      u8 *ssid, size_t ssid_len);
 void ieee802_11_parse_elems(u8 *start, size_t len,
@@ -1000,6 +998,8 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
 				     struct ieee80211_hdr *hdr);
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata);
+void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		      int encrypt);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 59fd7fe377e..210d6b85240 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -149,7 +149,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	pos += ETH_ALEN;
 	memcpy(pos, &dst_dsn, 4);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 	return 0;
 }
 
@@ -198,7 +198,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
 	pos += ETH_ALEN;
 	memcpy(pos, &dst_dsn, 4);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 	return 0;
 }
 
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 74983cfa729..7356462dee9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -217,7 +217,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		memcpy(pos, &reason, 2);
 	}
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 	return 0;
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f1ee9d22cf4..2c06f6965b7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -96,20 +96,6 @@ static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
 }
 
 /* frame sending functions */
-void ieee80211_sta_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		      int encrypt)
-{
-	skb->dev = sdata->local->mdev;
-	skb_set_mac_header(skb, 0);
-	skb_set_network_header(skb, 0);
-	skb_set_transport_header(skb, 0);
-
-	skb->iif = sdata->dev->ifindex;
-	skb->do_not_encrypt = !encrypt;
-
-	dev_queue_xmit(skb);
-}
-
 static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 				struct ieee80211_if_sta *ifsta,
 				int transaction, u8 *extra, size_t extra_len,
@@ -144,7 +130,7 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	if (extra)
 		memcpy(skb_put(skb, extra_len), extra, extra_len);
 
-	ieee80211_sta_tx(sdata, skb, encrypt);
+	ieee80211_tx_skb(sdata, skb, encrypt);
 }
 
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
@@ -204,7 +190,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 		*pos = rate->bitrate / 5;
 	}
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
@@ -412,7 +398,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	if (ifsta->assocreq_ies)
 		memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 
@@ -442,7 +428,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 /* MLME */
@@ -1796,7 +1782,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n",
 	       sdata->dev->name, print_mac(mac, resp->da));
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 9f612015012..010781b806f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -421,7 +421,7 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index b7129f4ae0c..f72bad636d8 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -65,7 +65,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
 			IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
 	msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
 
-	ieee80211_sta_tx(sdata, skb, 0);
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 55be3ef5c75..c3a22ab2ad2 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -598,3 +598,17 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 	for (i = 0; i < local_to_hw(local)->queues; i++)
 		local->ops->conf_tx(local_to_hw(local), i, &qparam);
 }
+
+void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		      int encrypt)
+{
+	skb->dev = sdata->local->mdev;
+	skb_set_mac_header(skb, 0);
+	skb_set_network_header(skb, 0);
+	skb_set_transport_header(skb, 0);
+
+	skb->iif = sdata->dev->ifindex;
+	skb->do_not_encrypt = !encrypt;
+
+	dev_queue_xmit(skb);
+}
-- 
cgit v1.2.3-70-g09d2


From aee14ceb5230afb5c17a4e28222ab9734ffd5002 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Tue, 9 Sep 2008 16:33:15 +0200
Subject: mac80211: Reorder debugfs calls during netdev deinit

ieee80211_free_keys() must be called before
ieee80211_debugfs_remove_netdev() in order to make sure that the
possible default_key symlink is removed before attempting to
remove the netdev debugfs directory.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 4a623b8e91f..672cec60a2f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -31,11 +31,11 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 	int flushed;
 	int i;
 
-	ieee80211_debugfs_remove_netdev(sdata);
-
 	/* free extra data */
 	ieee80211_free_keys(sdata);
 
+	ieee80211_debugfs_remove_netdev(sdata);
+
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
 		__skb_queue_purge(&sdata->fragments[i].skb_list);
 	sdata->fragment_next = 0;
-- 
cgit v1.2.3-70-g09d2


From 7c6a329e444725f24c02192ac493d8a7cd9fa638 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 12 Sep 2008 03:11:54 +0200
Subject: [Bluetooth] Fix regression from using default link policy

To speed up the Simple Pairing connection setup, the support for the
default link policy has been enabled. This is in contrast to settings
the link policy on every connection setup. Using the default link policy
is the preferred way since there is no need to dynamically change it for
every connection.

For backward compatibility reason and to support old userspace the
HCISETLINKPOL ioctl has been switched over to using hci_request() to
issue the HCI command for setting the default link policy instead of
just storing it in the HCI device structure.

However the hci_request() can only be issued when the device is
brought up. If used on a device that is registered, but still down
it will timeout and fail. This is problematic since the command is
put on the TX queue and the Bluetooth core tries to submit it to
hardware that is not ready yet. The timeout for these requests is
10 seconds and this causes a significant regression when setting up
a new device.

The userspace can perfectly handle a failure of the HCISETLINKPOL
ioctl and will re-submit it later, but the 10 seconds delay causes
a problem. So in case hci_request() is called on a device that is
still down, just fail it with ENETDOWN to indicate what happens.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f5b21cb9369..278a3ace14f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -164,6 +164,9 @@ static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *
 {
 	int ret;
 
+	if (!test_bit(HCI_UP, &hdev->flags))
+		return -ENETDOWN;
+
 	/* Serialize all requests */
 	hci_req_lock(hdev);
 	ret = __hci_request(hdev, req, opt, timeout);
-- 
cgit v1.2.3-70-g09d2


From 1ae4be22f64326a6784acd7083b9590c9f4215a2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 11 Sep 2008 20:17:05 -0700
Subject: vlan: vlan device not reading gso max size of parent.

The vlan devices are not reading the gso max size of the parent device.  As
a result devices that do not support 64K max gso size are currently
failing.

This issue is seen on 2.6.26 kernels as well and the same patch should be
able to be applied without any issues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 1 +
 net/8021q/vlan_dev.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b661f47bf10..f0e335aa20d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -394,6 +394,7 @@ static void vlan_transfer_features(struct net_device *dev,
 
 	vlandev->features &= ~dev->vlan_features;
 	vlandev->features |= dev->features & dev->vlan_features;
+	vlandev->gso_max_size = dev->gso_max_size;
 
 	if (old_features != vlandev->features)
 		netdev_features_change(vlandev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4bf014e51f8..97688cdb550 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -607,6 +607,7 @@ static int vlan_dev_init(struct net_device *dev)
 		      (1<<__LINK_STATE_PRESENT);
 
 	dev->features |= real_dev->features & real_dev->vlan_features;
+	dev->gso_max_size = real_dev->gso_max_size;
 
 	/* ipv6 shared card related stuff */
 	dev->dev_id = real_dev->dev_id;
-- 
cgit v1.2.3-70-g09d2


From f262b59becc3f557da6460232abac13706402849 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Fri, 12 Sep 2008 16:16:37 -0700
Subject: net: fix scheduling of dst_gc_task by __dst_free

The dst garbage collector dst_gc_task() may not be scheduled as we
expect it to be in __dst_free().

Indeed, when the dst_gc_timer was replaced by the delayed_work
dst_gc_work, the mod_timer() call used to schedule the garbage
collector at an earlier date was replaced by a schedule_delayed_work()
(see commit 86bba269d08f0c545ae76c90b56727f65d62d57f).

But, the behaviour of mod_timer() and schedule_delayed_work() is
different in the way they handle the delay.

mod_timer() stops the timer and re-arm it with the new given delay,
whereas schedule_delayed_work() only check if the work is already
queued in the workqueue (and queue it (with delay) if it is not)
BUT it does NOT take into account the new delay (even if the new delay
is earlier in time).
schedule_delayed_work() returns 0 if it didn't queue the work,
but we don't check the return code in __dst_free().

If I understand the code in __dst_free() correctly, we want dst_gc_task
to be queued after DST_GC_INC jiffies if we pass the test (and not in
some undetermined time in the future), so I think we should add a call
to cancel_delayed_work() before schedule_delayed_work(). Patch below.

Or we should at least test the return code of schedule_delayed_work(),
and reset the values of dst_garbage.timer_inc and dst_garbage.timer_expires
back to their former values if schedule_delayed_work() failed.
Otherwise the subsequent calls to __dst_free will test the wrong values
and assume wrong thing about when the garbage collector is supposed to
be scheduled.

dst_gc_task() also calls schedule_delayed_work() without checking
its return code (or calling cancel_scheduled_work() first), but it
should fine there: dst_gc_task is the routine of the delayed_work, so
no dst_gc_work should be pending in the queue when it's running.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index fe03266130b..09c1530f468 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -203,6 +203,7 @@ void __dst_free(struct dst_entry * dst)
 	if (dst_garbage.timer_inc > DST_GC_INC) {
 		dst_garbage.timer_inc = DST_GC_INC;
 		dst_garbage.timer_expires = DST_GC_MIN;
+		cancel_delayed_work(&dst_gc_work);
 		schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
 	}
 	spin_unlock_bh(&dst_garbage.lock);
-- 
cgit v1.2.3-70-g09d2


From 78d15e82754945ee9821fb491b57faf43abfb9d7 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 12 Sep 2008 16:17:43 -0700
Subject: tcp_ipv6: fix use of uninitialized memory

inet6_rsk() is called on a struct request_sock * before we
have checked whether the socket is an ipv6 socket or a ipv6-
mapped ipv4 socket. The access that triggers this is the
inet_rsk(rsk)->inet6_rsk_offset dereference in inet6_rsk().

This is arguably not a critical error as the inet6_rsk_offset
is only used to compute a pointer which is never really used
(in the code path in question) anyway. But it might be a
latent error, so let's fix it.

Spotted by kmemcheck.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b585c850a89..e85f377a8f8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1286,7 +1286,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1350,6 +1350,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		return newsk;
 	}
 
+	treq = inet6_rsk(req);
 	opt = np->opt;
 
 	if (sk_acceptq_is_full(sk))
-- 
cgit v1.2.3-70-g09d2


From 92651940ab00dbe64722e908f70d816713d677b7 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 16:29:34 -0700
Subject: pkt_sched: Add multiqueue scheduler support

This patch is intended to add a qdisc to support the new tx multiqueue
architecture by providing a band for each hardware queue.  By doing
this it is possible to support a different qdisc per physical hardware
queue.

This qdisc uses the skb->queue_mapping to select which band to place
the traffic onto.  It then uses a round robin w/ a check to see if the
subqueue is stopped to determine which band to dequeue the packet from.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/multiqueue.txt |  47 +++-
 include/linux/pkt_sched.h               |   7 +
 net/sched/Kconfig                       |   9 +
 net/sched/Makefile                      |   1 +
 net/sched/sch_multiq.c                  | 467 ++++++++++++++++++++++++++++++++
 5 files changed, 530 insertions(+), 1 deletion(-)
 create mode 100644 net/sched/sch_multiq.c

(limited to 'net')

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index d391ea63114..5787ee6eca4 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -24,4 +24,49 @@ netif_{start|stop|wake}_subqueue() functions to manage each queue while the
 device is still operational.  netdev->queue_lock is still used when the device
 comes online or when it's completely shut down (unregister_netdev(), etc.).
 
-Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
+
+Section 2: Qdisc support for multiqueue devices
+
+-----------------------------------------------
+
+Currently two qdiscs support multiqueue devices.  The first is the default
+pfifo_fast qdisc.  This qdisc supports one qdisc per hardware queue.  A new
+round-robin qdisc, sch_multiq also supports multiple hardware queues. The
+qdisc is responsible for classifying the skb's and then directing the skb's to
+bands and queues based on the value in skb->queue_mapping.  Use this field in
+the base driver to determine which queue to send the skb to.
+
+sch_multiq has been added for hardware that wishes to avoid unnecessary
+requeuing.  It will cycle though the bands and verify that the hardware queue
+associated with the band is not stopped prior to dequeuing a packet.
+
+On qdisc load, the number of bands is based on the number of queues on the
+hardware.  Once the association is made, any skb with skb->queue_mapping set,
+will be queued to the band associated with the hardware queue.
+
+
+Section 3: Brief howto using MULTIQ for multiqueue devices
+---------------------------------------------------------------
+
+The userspace command 'tc,' part of the iproute2 package, is used to configure
+qdiscs.  To add the MULTIQ qdisc to your network device, assuming the device
+is called eth0, run the following command:
+
+# tc qdisc add dev eth0 root handle 1: multiq
+
+The qdisc will allocate the number of bands to equal the number of queues that
+the device reports, and bring the qdisc online.  Assuming eth0 has 4 Tx
+queues, the band mapping would look like:
+
+band 0 => queue 0
+band 1 => queue 1
+band 2 => queue 2
+band 3 => queue 3
+
+Traffic will begin flowing through each queue if your base device has either
+the default simple_tx_hash or a custom netdev->select_queue() defined.
+
+The behavior of tc filters remains the same.
+
+Author: Alexander Duyck <alexander.h.duyck@intel.com>
+Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e5de421ac7b..5d921fa91a5 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -123,6 +123,13 @@ struct tc_prio_qopt
 	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
 };
 
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+	__u16	bands;			/* Number of bands */
+	__u16	max_bands;		/* Maximum number of queues */
+};
+
 /* TBF section */
 
 struct tc_tbf_qopt
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9437b27ff84..efaa7a75e7f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,6 +106,15 @@ config NET_SCH_PRIO
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_prio.
 
+config NET_SCH_MULTIQ
+	tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
+	---help---
+	  Say Y here if you want to use an n-band queue packet scheduler
+	  to support devices that have multiple hardware transmit queues.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_multiq.
+
 config NET_SCH_RED
 	tristate "Random Early Detection (RED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7df84..3d9b953f7f6 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
 obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 00000000000..49a8b67ed3b
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+	u16 bands;
+	u16 max_bands;
+	u16 curband;
+	struct tcf_proto *filter_list;
+	struct Qdisc **queues;
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	u32 band;
+	struct tcf_result res;
+	int err;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+	switch (err) {
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+	case TC_ACT_SHOT:
+		return NULL;
+	}
+#endif
+	band = skb_get_queue_mapping(skb);
+
+	if (band >= q->bands)
+		return q->queues[0];
+
+	return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc->ops->requeue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		q->curband++;
+		if (q->curband >= q->bands)
+			q->curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid excessive requeues
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+			qdisc = q->queues[q->curband];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				return skb;
+			}
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (band = q->bands-1; band >= 0; band--) {
+		qdisc = q->queues[band];
+		if (qdisc->ops->drop) {
+			len = qdisc->ops->drop(qdisc);
+			if (len != 0) {
+				sch->q.qlen--;
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+	u16 band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	for (band = 0; band < q->bands; band++)
+		qdisc_reset(q->queues[band]);
+	sch->q.qlen = 0;
+	q->curband = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+	int band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	for (band = 0; band < q->bands; band++)
+		qdisc_destroy(q->queues[band]);
+
+	kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct tc_multiq_qopt *qopt;
+	int i;
+
+	if (!netif_is_multiqueue(qdisc_dev(sch)))
+		return -EINVAL;
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+
+	qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	for (i = q->bands; i < q->max_bands; i++) {
+		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		if (child != &noop_qdisc) {
+			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_destroy(child);
+		}
+	}
+
+	sch_tree_unlock(sch);
+
+	for (i = 0; i < q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(qdisc_dev(sch),
+						  sch->dev_queue,
+						  &pfifo_qdisc_ops,
+						  TC_H_MAKE(sch->handle,
+							    i + 1));
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[i], child);
+
+				if (child != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(child,
+								 child->q.qlen);
+					qdisc_destroy(child);
+				}
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	q->queues = NULL;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	q->max_bands = qdisc_dev(sch)->num_tx_queues;
+
+	q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
+	if (!q->queues)
+		return -ENOBUFS;
+	for (i = 0; i < q->max_bands; i++)
+		q->queues[i] = &noop_qdisc;
+
+	return multiq_tune(sch, opt);
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_multiq_qopt opt;
+
+	opt.bands = q->bands;
+	opt.max_bands = q->max_bands;
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+				 u32 classid)
+{
+	return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *cl_q;
+
+	cl_q = q->queues[cl - 1];
+	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+
+	if (arg->stop)
+		return;
+
+	for (band = 0; band < q->bands; band++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, band+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+	.graft		=	multiq_graft,
+	.leaf		=	multiq_leaf,
+	.get		=	multiq_get,
+	.put		=	multiq_put,
+	.change		=	multiq_change,
+	.delete		=	multiq_delete,
+	.walk		=	multiq_walk,
+	.tcf_chain	=	multiq_find_tcf,
+	.bind_tcf	=	multiq_bind,
+	.unbind_tcf	=	multiq_put,
+	.dump		=	multiq_dump_class,
+	.dump_stats	=	multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&multiq_class_ops,
+	.id		=	"multiq",
+	.priv_size	=	sizeof(struct multiq_sched_data),
+	.enqueue	=	multiq_enqueue,
+	.dequeue	=	multiq_dequeue,
+	.requeue	=	multiq_requeue,
+	.drop		=	multiq_drop,
+	.init		=	multiq_init,
+	.reset		=	multiq_reset,
+	.destroy	=	multiq_destroy,
+	.change		=	multiq_tune,
+	.dump		=	multiq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+	return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+	unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 16:30:20 -0700
Subject: pkt_action: add new action skbedit

This new action will have the ability to change the priority and/or
queue_mapping fields on an sk_buff.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/multiqueue.txt |   9 +-
 include/linux/tc_act/Kbuild             |   1 +
 include/linux/tc_act/tc_skbedit.h       |  44 +++++++
 include/net/tc_act/tc_skbedit.h         |  34 ++++++
 net/sched/Kconfig                       |  11 ++
 net/sched/Makefile                      |   1 +
 net/sched/act_skbedit.c                 | 203 ++++++++++++++++++++++++++++++++
 7 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/tc_act/tc_skbedit.h
 create mode 100644 include/net/tc_act/tc_skbedit.h
 create mode 100644 net/sched/act_skbedit.c

(limited to 'net')

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index 5787ee6eca4..10113ffa807 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -66,7 +66,14 @@ band 3 => queue 3
 Traffic will begin flowing through each queue if your base device has either
 the default simple_tx_hash or a custom netdev->select_queue() defined.
 
-The behavior of tc filters remains the same.
+The behavior of tc filters remains the same.  However a new tc action,
+skbedit, has been added.  Assuming you wanted to route all traffic to a
+specific host, for example 192.168.0.3, though a specific queue you could use
+this action and establish a filter such as:
+
+tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \
+	match ip dst 192.168.0.3 \
+	action skbedit queue_mapping 3
 
 Author: Alexander Duyck <alexander.h.duyck@intel.com>
 Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
index 6dac0d7365c..76990937f4c 100644
--- a/include/linux/tc_act/Kbuild
+++ b/include/linux/tc_act/Kbuild
@@ -3,3 +3,4 @@ header-y += tc_ipt.h
 header-y += tc_mirred.h
 header-y += tc_pedit.h
 header-y += tc_nat.h
+header-y += tc_skbedit.h
diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h
new file mode 100644
index 00000000000..a14e461a7af
--- /dev/null
+++ b/include/linux/tc_act/tc_skbedit.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#ifndef __LINUX_TC_SKBEDIT_H
+#define __LINUX_TC_SKBEDIT_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBEDIT 11
+
+#define SKBEDIT_F_PRIORITY		0x1
+#define SKBEDIT_F_QUEUE_MAPPING		0x2
+
+struct tc_skbedit {
+	tc_gen;
+};
+
+enum {
+	TCA_SKBEDIT_UNSPEC,
+	TCA_SKBEDIT_TM,
+	TCA_SKBEDIT_PARMS,
+	TCA_SKBEDIT_PRIORITY,
+	TCA_SKBEDIT_QUEUE_MAPPING,
+	__TCA_SKBEDIT_MAX
+};
+#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
+
+#endif
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
new file mode 100644
index 00000000000..6abb3ed3ebf
--- /dev/null
+++ b/include/net/tc_act/tc_skbedit.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#ifndef __NET_TC_SKBEDIT_H
+#define __NET_TC_SKBEDIT_H
+
+#include <net/act_api.h>
+
+struct tcf_skbedit {
+	struct tcf_common	common;
+	u32			flags;
+	u32     		priority;
+	u16			queue_mapping;
+};
+#define to_skbedit(pc) \
+	container_of(pc, struct tcf_skbedit, common)
+
+#endif /* __NET_TC_SKBEDIT_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index efaa7a75e7f..6767e54155d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -485,6 +485,17 @@ config NET_ACT_SIMP
 	  To compile this code as a module, choose M here: the
 	  module will be called simple.
 
+config NET_ACT_SKBEDIT
+        tristate "SKB Editing"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to change skb priority or queue_mapping settings.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called skbedit.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 3d9b953f7f6..e60c9925b26 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
 obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 00000000000..fe9777e77f3
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK     15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+	.htab	=	tcf_skbedit_ht,
+	.hmask	=	SKBEDIT_TAB_MASK,
+	.lock	=	&skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+		       struct tcf_result *res)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	d->tcf_bstats.packets++;
+
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > d->queue_mapping)
+		skb_set_queue_mapping(skb, d->queue_mapping);
+
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
+	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+	struct tc_skbedit *parm;
+	struct tcf_skbedit *d;
+	struct tcf_common *pc;
+	u32 flags = 0, *priority = NULL;
+	u16 *queue_mapping = NULL;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_SKBEDIT_PARMS] == NULL)
+		return -EINVAL;
+
+	if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+		flags |= SKBEDIT_F_PRIORITY;
+		priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+	}
+
+	if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+		flags |= SKBEDIT_F_QUEUE_MAPPING;
+		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+	}
+	if (!flags)
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &skbedit_idx_gen, &skbedit_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_skbedit(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_skbedit(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &skbedit_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+
+	d->flags = flags;
+	if (flags & SKBEDIT_F_PRIORITY)
+		d->priority = *priority;
+	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+		d->queue_mapping = *queue_mapping;
+	d->tcf_action = parm->action;
+
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &skbedit_hash_info);
+	return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	if (d)
+		return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+	return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbedit *d = a->priv;
+	struct tc_skbedit opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+			&d->priority);
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+		NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+			sizeof(d->queue_mapping), &d->queue_mapping);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+	.kind		=	"skbedit",
+	.hinfo		=	&skbedit_hash_info,
+	.type		=	TCA_ACT_SKBEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbedit,
+	.dump		=	tcf_skbedit_dump,
+	.cleanup	=	tcf_skbedit_cleanup,
+	.init		=	tcf_skbedit_init,
+	.walk		=	tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+	return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);
-- 
cgit v1.2.3-70-g09d2


From f07d1501292b3b0d3276ee0e537005526a45e242 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2008 17:57:23 -0700
Subject: multiq: Further multiqueue cleanup

This patch resolves a few issues found with multiq including wording
suggestions and a problem seen in the allocation of queues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/multiqueue.txt | 14 +++++++-------
 net/sched/sch_multiq.c                  | 13 +++++++++----
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
index 8c2b06b77f6..4caa0e314cc 100644
--- a/Documentation/networking/multiqueue.txt
+++ b/Documentation/networking/multiqueue.txt
@@ -29,15 +29,15 @@ Section 2: Qdisc support for multiqueue devices
 
 -----------------------------------------------
 
-Currently two qdiscs support multiqueue devices.  The first is the default
-pfifo_fast qdisc.  This qdisc supports one qdisc per hardware queue.  A new
-round-robin qdisc, sch_multiq also supports multiple hardware queues. The
+Currently two qdiscs are optimized for multiqueue devices.  The first is the
+default pfifo_fast qdisc.  This qdisc supports one qdisc per hardware queue.
+A new round-robin qdisc, sch_multiq also supports multiple hardware queues. The
 qdisc is responsible for classifying the skb's and then directing the skb's to
 bands and queues based on the value in skb->queue_mapping.  Use this field in
 the base driver to determine which queue to send the skb to.
 
-sch_multiq has been added for hardware that wishes to avoid unnecessary
-requeuing.  It will cycle though the bands and verify that the hardware queue
+sch_multiq has been added for hardware that wishes to avoid head-of-line
+blocking.  It will cycle though the bands and verify that the hardware queue
 associated with the band is not stopped prior to dequeuing a packet.
 
 On qdisc load, the number of bands is based on the number of queues on the
@@ -63,8 +63,8 @@ band 1 => queue 1
 band 2 => queue 2
 band 3 => queue 3
 
-Traffic will begin flowing through each queue if your base device has either
-the default simple_tx_hash or a custom netdev->select_queue() defined.
+Traffic will begin flowing through each queue based on either the simple_tx_hash
+function or based on netdev->select_queue() if you have it defined.
 
 The behavior of tc filters remains the same.  However a new tc action,
 skbedit, has been added.  Assuming you wanted to route all traffic to a
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 49a8b67ed3b..5d9cd68e91d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -214,8 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	for (i = q->bands; i < q->max_bands; i++) {
-		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
-		if (child != &noop_qdisc) {
+		if (q->queues[i] != &noop_qdisc) {
+			struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
 			qdisc_tree_decrease_qlen(child, child->q.qlen);
 			qdisc_destroy(child);
 		}
@@ -250,7 +250,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
-	int i;
+	int i, err;
 
 	q->queues = NULL;
 
@@ -265,7 +265,12 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	return multiq_tune(sch, opt);
+	err = multiq_tune(sch,opt);
+
+	if (err)
+		kfree(q->queues);
+
+	return err;
 }
 
 static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
-- 
cgit v1.2.3-70-g09d2


From 63f2c0464875b6ef2132cecb19b2a5abbf061227 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 12 Sep 2008 23:23:50 -0700
Subject: net: ip_vs_proto_{tcp,udp} build fix

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_proto_tcp.c | 1 +
 net/ipv4/ipvs/ip_vs_proto_udp.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 537f616776d..dd4566ea2bf 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -18,6 +18,7 @@
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/ip6_checksum.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index e3ee26bd1de..6eb6039d634 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,6 +22,7 @@
 
 #include <net/ip_vs.h>
 #include <net/ip.h>
+#include <net/ip6_checksum.h>
 
 static struct ip_vs_conn *
 udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-- 
cgit v1.2.3-70-g09d2


From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Sep 2008 11:48:46 -0700
Subject: udp: Fix rcv socket locking

The previous patch in response to the recursive locking on IPsec
reception is broken as it tries to drop the BH socket lock while in
user context.

This patch fixes it by shrinking the section protected by the
socket lock to sock_queue_rcv_skb only.  The only reason we added
the lock is for the accounting which happens in that function.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 62 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8e42fbbd576..57e26fa6618 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
+static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int is_udplite = IS_UDPLITE(sk);
+	int rc;
+
+	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+					 is_udplite);
+		goto drop;
+	}
+
+	return 0;
+
+drop:
+	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+	kfree_skb(skb);
+	return -1;
+}
+
 /* returns:
  *  -1: error
  *   0: success
@@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		    up->encap_rcv != NULL) {
 			int ret;
 
-			bh_unlock_sock(sk);
 			ret = (*up->encap_rcv)(sk, skb);
-			bh_lock_sock(sk);
 			if (ret <= 0) {
 				UDP_INC_STATS_BH(sock_net(sk),
 						 UDP_MIB_INDATAGRAMS,
@@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
-		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM) {
-			UDP_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_RCVBUFERRORS, is_udplite);
-			atomic_inc(&sk->sk_drops);
-		}
-		goto drop;
-	}
+	rc = 0;
 
-	return 0;
+	bh_lock_sock(sk);
+	if (!sock_owned_by_user(sk))
+		rc = __udp_queue_rcv_skb(sk, skb);
+	else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	return rc;
 
 drop:
 	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
@@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb1) {
-				int ret = 0;
-
-				bh_lock_sock(sk);
-				if (!sock_owned_by_user(sk))
-					ret = udp_queue_rcv_skb(sk, skb1);
-				else
-					sk_add_backlog(sk, skb1);
-				bh_unlock_sock(sk);
-
+				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
 					/* we should probably re-process instead
 					 * of dropping packets here. */
@@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 			uh->dest, inet_iif(skb), udptable);
 
 	if (sk != NULL) {
-		int ret = 0;
-		bh_lock_sock(sk);
-		if (!sock_owned_by_user(sk))
-			ret = udp_queue_rcv_skb(sk, skb);
-		else
-			sk_add_backlog(sk, skb);
-		bh_unlock_sock(sk);
+		int ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
 		/* a return value > 0 means to resubmit the input, but
@@ -1494,7 +1498,7 @@ struct proto udp_prot = {
 	.sendmsg	   = udp_sendmsg,
 	.recvmsg	   = udp_recvmsg,
 	.sendpage	   = udp_sendpage,
-	.backlog_rcv	   = udp_queue_rcv_skb,
+	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
-- 
cgit v1.2.3-70-g09d2


From b2e1b30290539b344cbaff0d9da38012e03aa347 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Tue, 9 Sep 2008 23:19:48 -0700
Subject: cfg80211: Add new wireless regulatory infrastructure

This adds the new wireless regulatory infrastructure. The
main motiviation behind this was to centralize regulatory
code as each driver was implementing their own regulatory solution,
and to replace the initial centralized code we have where:

* only 3 regulatory domains are supported: US, JP and EU
* regulatory domains can only be changed through module parameter
* all rules were built statically in the kernel

We now have support for regulatory domains for many countries
and regulatory domains are now queried through a userspace agent
through udev allowing distributions to update regulatory rules
without updating the kernel.

Each driver can regulatory_hint() a regulatory domain
based on either their EEPROM mapped regulatory domain value to a
respective ISO/IEC 3166-1 country code or pass an internally built
regulatory domain. We also add support to let the user set the
regulatory domain through userspace in case of faulty EEPROMs to
further help compliance.

Support for world roaming will be added soon for cards capable of
this.

For more information see:

http://wireless.kernel.org/en/developers/Regulatory/CRDA

For now we leave an option to enable the old module parameter,
ieee80211_regdom, and to build the 3 old regdomains statically
(US, JP and EU). This option is CONFIG_WIRELESS_OLD_REGULATORY.
These old static definitions and the module parameter is being
scheduled for removal for 2.6.29. Note that if you use this
you won't make use of a world regulatory domain as its pointless.
If you leave this option enabled and if CRDA is present and you
use US or JP we will try to ask CRDA to update us a regulatory
domain for us.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/feature-removal-schedule.txt |  18 +
 Documentation/networking/regulatory.txt    | 194 +++++++
 include/linux/nl80211.h                    |  96 +++-
 include/net/cfg80211.h                     |  60 +++
 include/net/mac80211.h                     |   2 +
 include/net/wireless.h                     |  58 +++
 net/mac80211/cfg.c                         |   7 +
 net/wireless/Kconfig                       |  32 ++
 net/wireless/core.c                        | 162 +++++-
 net/wireless/core.h                        |   2 +-
 net/wireless/nl80211.c                     | 151 ++++++
 net/wireless/reg.c                         | 805 +++++++++++++++++++++++++----
 net/wireless/reg.h                         |  44 ++
 13 files changed, 1513 insertions(+), 118 deletions(-)
 create mode 100644 Documentation/networking/regulatory.txt
 create mode 100644 net/wireless/reg.h

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index eb1a47b9742..c93fcdec246 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -6,6 +6,24 @@ be removed from this file.
 
 ---------------------------
 
+What:	old static regulatory information and ieee80211_regdom module parameter
+When:	2.6.29
+Why:	The old regulatory infrastructure has been replaced with a new one
+	which does not require statically defined regulatory domains. We do
+	not want to keep static regulatory domains in the kernel due to the
+	the dynamic nature of regulatory law and localization. We kept around
+	the old static definitions for the regulatory domains of:
+		* US
+		* JP
+		* EU
+	and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was
+	set. We also kept around the ieee80211_regdom module parameter in case
+	some applications were relying on it. Changing regulatory domains
+	can now be done instead by using nl80211, as is done with iw.
+Who:	Luis R. Rodriguez <lrodriguez@atheros.com>
+
+---------------------------
+
 What:	dev->power.power_state
 When:	July 2007
 Why:	Broken design for runtime control over driver power states, confusing
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
new file mode 100644
index 00000000000..a96989a8ff3
--- /dev/null
+++ b/Documentation/networking/regulatory.txt
@@ -0,0 +1,194 @@
+Linux wireless regulatory documentation
+---------------------------------------
+
+This document gives a brief review over how the Linux wireless
+regulatory infrastructure works.
+
+More up to date information can be obtained at the project's web page:
+
+http://wireless.kernel.org/en/developers/Regulatory
+
+Keeping regulatory domains in userspace
+---------------------------------------
+
+Due to the dynamic nature of regulatory domains we keep them
+in userspace and provide a framework for userspace to upload
+to the kernel one regulatory domain to be used as the central
+core regulatory domain all wireless devices should adhere to.
+
+How to get regulatory domains to the kernel
+-------------------------------------------
+
+Userspace gets a regulatory domain in the kernel by having
+a userspace agent build it and send it via nl80211. Only
+expected regulatory domains will be respected by the kernel.
+
+A currently available userspace agent which can accomplish this
+is CRDA - central regulatory domain agent. Its documented here:
+
+http://wireless.kernel.org/en/developers/Regulatory/CRDA
+
+Essentially the kernel will send a udev event when it knows
+it needs a new regulatory domain. A udev rule can be put in place
+to trigger crda to send the respective regulatory domain for a
+specific ISO/IEC 3166 alpha2.
+
+Below is an example udev rule which can be used:
+
+# Example file, should be put in /etc/udev/rules.d/regulatory.rules
+KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda"
+
+The alpha2 is passed as an environment variable under the variable COUNTRY.
+
+Who asks for regulatory domains?
+--------------------------------
+
+* Users
+
+Users can use iw:
+
+http://wireless.kernel.org/en/users/Documentation/iw
+
+An example:
+
+  # set regulatory domain to "Costa Rica"
+  iw reg set CR
+
+This will request the kernel to set the regulatory domain to
+the specificied alpha2. The kernel in turn will then ask userspace
+to provide a regulatory domain for the alpha2 specified by the user
+by sending a uevent.
+
+* Wireless subsystems for Country Information elements
+
+The kernel will send a uevent to inform userspace a new
+regulatory domain is required. More on this to be added
+as its integration is added.
+
+* Drivers
+
+If drivers determine they need a specific regulatory domain
+set they can inform the wireless core using regulatory_hint().
+They have two options -- they either provide an alpha2 so that
+crda can provide back a regulatory domain for that country or
+they can build their own regulatory domain based on internal
+custom knowledge so the wireless core can respect it.
+
+*Most* drivers will rely on the first mechanism of providing a
+regulatory hint with an alpha2. For these drivers there is an additional
+check that can be used to ensure compliance based on custom EEPROM
+regulatory data. This additional check can be used by drivers by
+registering on its struct wiphy a reg_notifier() callback. This notifier
+is called when the core's regulatory domain has been changed. The driver
+can use this to review the changes made and also review who made them
+(driver, user, country IE) and determine what to allow based on its
+internal EEPROM data. Devices drivers wishing to be capable of world
+roaming should use this callback. More on world roaming will be
+added to this document when its support is enabled.
+
+Device drivers who provide their own built regulatory domain
+do not need a callback as the channels registered by them are
+the only ones that will be allowed and therefore *additional*
+cannels cannot be enabled.
+
+Example code - drivers hinting an alpha2:
+------------------------------------------
+
+This example comes from the zd1211rw device driver. You can start
+by having a mapping of your device's EEPROM country/regulatory
+domain value to to a specific alpha2 as follows:
+
+static struct zd_reg_alpha2_map reg_alpha2_map[] = {
+	{ ZD_REGDOMAIN_FCC, "US" },
+	{ ZD_REGDOMAIN_IC, "CA" },
+	{ ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */
+	{ ZD_REGDOMAIN_JAPAN, "JP" },
+	{ ZD_REGDOMAIN_JAPAN_ADD, "JP" },
+	{ ZD_REGDOMAIN_SPAIN, "ES" },
+	{ ZD_REGDOMAIN_FRANCE, "FR" },
+
+Then you can define a routine to map your read EEPROM value to an alpha2,
+as follows:
+
+static int zd_reg2alpha2(u8 regdomain, char *alpha2)
+{
+	unsigned int i;
+	struct zd_reg_alpha2_map *reg_map;
+		for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) {
+			reg_map = &reg_alpha2_map[i];
+			if (regdomain == reg_map->reg) {
+			alpha2[0] = reg_map->alpha2[0];
+			alpha2[1] = reg_map->alpha2[1];
+			return 0;
+		}
+	}
+	return 1;
+}
+
+Lastly, you can then hint to the core of your discovered alpha2, if a match
+was found. You need to do this after you have registered your wiphy. You
+are expected to do this during initialization.
+
+	r = zd_reg2alpha2(mac->regdomain, alpha2);
+	if (!r)
+		regulatory_hint(hw->wiphy, alpha2, NULL);
+
+Example code - drivers providing a built in regulatory domain:
+--------------------------------------------------------------
+
+If you have regulatory information you can obtain from your
+driver and you *need* to use this we let you build a regulatory domain
+structure and pass it to the wireless core. To do this you should
+kmalloc() a structure big enough to hold your regulatory domain
+structure and you should then fill it with your data. Finally you simply
+call regulatory_hint() with the regulatory domain structure in it.
+
+Bellow is a simple example, with a regulatory domain cached using the stack.
+Your implementation may vary (read EEPROM cache instead, for example).
+
+Example cache of some regulatory domain
+
+struct ieee80211_regdomain mydriver_jp_regdom = {
+	.n_reg_rules = 3,
+	.alpha2 =  "JP",
+	//.alpha2 =  "99", /* If I have no alpha2 to map it to */
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..14 */
+		REG_RULE(2412-20, 2484+20, 40, 6, 20, 0),
+		/* IEEE 802.11a, channels 34..48 */
+		REG_RULE(5170-20, 5240+20, 40, 6, 20,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channels 52..64 */
+		REG_RULE(5260-20, 5320+20, 40, 6, 20,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+	}
+};
+
+Then in some part of your code after your wiphy has been registered:
+
+	int r;
+	struct ieee80211_regdomain *rd;
+	int size_of_regd;
+	int num_rules = mydriver_jp_regdom.n_reg_rules;
+	unsigned int i;
+
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
+		(num_rules * sizeof(struct ieee80211_reg_rule));
+
+	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!rd)
+	return -ENOMEM;
+
+	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
+
+	for (i=0; i < num_rules; i++) {
+		memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i],
+			sizeof(struct ieee80211_reg_rule));
+	}
+	r = regulatory_hint(hw->wiphy, NULL, rd);
+	if (r) {
+		kfree(rd);
+		return r;
+	}
+
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5e51f4e7600..9bad65400fb 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -92,6 +92,20 @@
  * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by
  *	%NL80211_ATTR_IFINDEX.
  *
+ * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
+ *	after being queried by the kernel. CRDA replies by sending a regulatory
+ *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
+ *	current alpha2 if it found a match. It also provides
+ * 	NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each
+ * 	regulatory rule is a nested set of attributes  given by
+ * 	%NL80211_ATTR_REG_RULE_FREQ_[START|END] and
+ * 	%NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by
+ * 	%NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and
+ * 	%NL80211_ATTR_REG_RULE_POWER_MAX_EIRP.
+ * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain
+ * 	to the the specified ISO/IEC 3166-1 alpha2 country code. The core will
+ * 	store this as a valid request and then query userspace for it.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -131,7 +145,10 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_BSS,
 
-	/* add commands here */
+	NL80211_CMD_SET_REG,
+	NL80211_CMD_REQ_SET_REG,
+
+	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
 	__NL80211_CMD_AFTER_LAST,
@@ -197,10 +214,21 @@ enum nl80211_commands {
  * 	info given for %NL80211_CMD_GET_MPATH, nested attribute described at
  *	&enum nl80211_mpath_info.
  *
- *
  * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of
  *      &enum nl80211_mntr_flags.
  *
+ * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the
+ * 	current regulatory domain should be set to or is already set to.
+ * 	For example, 'CR', for Costa Rica. This attribute is used by the kernel
+ * 	to query the CRDA to retrieve one regulatory domain. This attribute can
+ * 	also be used by userspace to query the kernel for the currently set
+ * 	regulatory domain. We chose an alpha2 as that is also used by the
+ * 	IEEE-802.11d country information element to identify a country.
+ * 	Users can also simply ask the wireless core to set regulatory domain
+ * 	to a specific alpha2.
+ * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory
+ *	rules.
+ *
  * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1)
  * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled
  *	(u8, 0 or 1)
@@ -265,6 +293,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORTED_IFTYPES,
 
+	NL80211_ATTR_REG_ALPHA2,
+	NL80211_ATTR_REG_RULES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -278,6 +309,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
 
 #define NL80211_MAX_SUPP_RATES			32
+#define NL80211_MAX_SUPP_REG_RULES		32
 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY	16
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
@@ -472,6 +504,66 @@ enum nl80211_bitrate_attr {
 	NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_reg_rule_attr - regulatory rule attributes
+ * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
+ * 	considerations for a given frequency range. These are the
+ * 	&enum nl80211_reg_rule_flags.
+ * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory
+ * 	rule in KHz. This is not a center of frequency but an actual regulatory
+ * 	band edge.
+ * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule
+ * 	in KHz. This is not a center a frequency but an actual regulatory
+ * 	band edge.
+ * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this
+ * 	frequency range, in KHz.
+ * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain
+ * 	for a given frequency range. The value is in mBi (100 * dBi).
+ * 	If you don't have one then don't send this.
+ * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for
+ * 	a given frequency range. The value is in mBm (100 * dBm).
+ */
+enum nl80211_reg_rule_attr {
+	__NL80211_REG_RULE_ATTR_INVALID,
+	NL80211_ATTR_REG_RULE_FLAGS,
+
+	NL80211_ATTR_FREQ_RANGE_START,
+	NL80211_ATTR_FREQ_RANGE_END,
+	NL80211_ATTR_FREQ_RANGE_MAX_BW,
+
+	NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
+	NL80211_ATTR_POWER_RULE_MAX_EIRP,
+
+	/* keep last */
+	__NL80211_REG_RULE_ATTR_AFTER_LAST,
+	NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_reg_rule_flags - regulatory rule flags
+ *
+ * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed
+ * @NL80211_RRF_NO_CCK: CCK modulation not allowed
+ * @NL80211_RRF_NO_INDOOR: indoor operation not allowed
+ * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed
+ * @NL80211_RRF_DFS: DFS support is required to be used
+ * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links
+ * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links
+ * @NL80211_RRF_PASSIVE_SCAN: passive scan is required
+ * @NL80211_RRF_NO_IBSS: no IBSS is allowed
+ */
+enum nl80211_reg_rule_flags {
+	NL80211_RRF_NO_OFDM		= 1<<0,
+	NL80211_RRF_NO_CCK		= 1<<1,
+	NL80211_RRF_NO_INDOOR		= 1<<2,
+	NL80211_RRF_NO_OUTDOOR		= 1<<3,
+	NL80211_RRF_DFS			= 1<<4,
+	NL80211_RRF_PTP_ONLY		= 1<<5,
+	NL80211_RRF_PTMP_ONLY		= 1<<6,
+	NL80211_RRF_PASSIVE_SCAN	= 1<<7,
+	NL80211_RRF_NO_IBSS		= 1<<8,
+};
+
 /**
  * enum nl80211_mntr_flags - monitor configuration flags
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0a72d1e3d3a..9f40c4d417d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -287,6 +287,66 @@ struct bss_parameters {
 	int use_short_slot_time;
 };
 
+/**
+ * enum reg_set_by - Indicates who is trying to set the regulatory domain
+ * @REGDOM_SET_BY_INIT: regulatory domain was set by initialization. We will be
+ * 	using a static world regulatory domain by default.
+ * @REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world regulatory domain.
+ * @REGDOM_SET_BY_USER: User asked the wireless core to set the
+ * 	regulatory domain.
+ * @REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the wireless core
+ *	it thinks its knows the regulatory domain we should be in.
+ * @REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an 802.11 country
+ *	information element with regulatory information it thinks we
+ *	should consider.
+ */
+enum reg_set_by {
+	REGDOM_SET_BY_INIT,
+	REGDOM_SET_BY_CORE,
+	REGDOM_SET_BY_USER,
+	REGDOM_SET_BY_DRIVER,
+	REGDOM_SET_BY_COUNTRY_IE,
+};
+
+struct ieee80211_freq_range {
+	u32 start_freq_khz;
+	u32 end_freq_khz;
+	u32 max_bandwidth_khz;
+};
+
+struct ieee80211_power_rule {
+	u32 max_antenna_gain;
+	u32 max_eirp;
+};
+
+struct ieee80211_reg_rule {
+	struct ieee80211_freq_range freq_range;
+	struct ieee80211_power_rule power_rule;
+	u32 flags;
+};
+
+struct ieee80211_regdomain {
+	u32 n_reg_rules;
+	char alpha2[2];
+	struct ieee80211_reg_rule reg_rules[];
+};
+
+#define MHZ_TO_KHZ(freq) (freq * 1000)
+#define KHZ_TO_MHZ(freq) (freq / 1000)
+#define DBI_TO_MBI(gain) (gain * 100)
+#define MBI_TO_DBI(gain) (gain / 100)
+#define DBM_TO_MBM(gain) (gain * 100)
+#define MBM_TO_DBM(gain) (gain / 100)
+
+#define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \
+	.freq_range.start_freq_khz = (start) * 1000, \
+	.freq_range.end_freq_khz = (end) * 1000, \
+	.freq_range.max_bandwidth_khz = (bw) * 1000, \
+	.power_rule.max_antenna_gain = (gain) * 100, \
+	.power_rule.max_eirp = (eirp) * 100, \
+	.flags = reg_flags, \
+	}
+
 /* from net/wireless.h */
 struct wiphy;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fb9e62211c3..f504e3eca7d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -833,6 +833,8 @@ struct ieee80211_hw {
 	s8 max_signal;
 };
 
+struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy);
+
 /**
  * SET_IEEE80211_DEV - set device for 802.11 hardware
  *
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 1dc8ec3daa2..e4378cc6bf8 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -60,6 +60,7 @@ enum ieee80211_channel_flags {
  * with cfg80211.
  *
  * @center_freq: center frequency in MHz
+ * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz
  * @hw_value: hardware-specific value for the channel
  * @flags: channel flags from &enum ieee80211_channel_flags.
  * @orig_flags: channel flags at registration time, used by regulatory
@@ -73,6 +74,7 @@ enum ieee80211_channel_flags {
 struct ieee80211_channel {
 	enum ieee80211_band band;
 	u16 center_freq;
+	u8 max_bandwidth;
 	u16 hw_value;
 	u32 flags;
 	int max_antenna_gain;
@@ -178,6 +180,7 @@ struct ieee80211_supported_band {
  * struct wiphy - wireless hardware description
  * @idx: the wiphy index assigned to this item
  * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
+ * @reg_notifier: the driver's regulatory notification callback
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -197,6 +200,9 @@ struct wiphy {
 
 	struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS];
 
+	/* Lets us get back the wiphy on the callback */
+	int (*reg_notifier)(struct wiphy *wiphy, enum reg_set_by setby);
+
 	/* fields below are read-only, assigned by cfg80211 */
 
 	/* the item in /sys/class/ieee80211/ points to this,
@@ -322,6 +328,58 @@ extern int ieee80211_frequency_to_channel(int freq);
  */
 extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
 							 int freq);
+/**
+ * __regulatory_hint - hint to the wireless core a regulatory domain
+ * @wiphy: if a driver is providing the hint this is the driver's very
+ * 	own &struct wiphy
+ * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain
+ * 	should be in. If @rd is set this should be NULL
+ * @rd: a complete regulatory domain, if passed the caller need not worry
+ * 	about freeing it
+ *
+ * The Wireless subsystem can use this function to hint to the wireless core
+ * what it believes should be the current regulatory domain by
+ * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
+ * domain should be in or by providing a completely build regulatory domain.
+ *
+ * Returns -EALREADY if *a regulatory domain* has already been set. Note that
+ * this could be by another driver. It is safe for drivers to continue if
+ * -EALREADY is returned, if drivers are not capable of world roaming they
+ * should not register more channels than they support. Right now we only
+ * support listening to the first driver hint. If the driver is capable
+ * of world roaming but wants to respect its own EEPROM mappings for
+ * specific regulatory domains it should register the @reg_notifier callback
+ * on the &struct wiphy. Returns 0 if the hint went through fine or through an
+ * intersection operation. Otherwise a standard error code is returned.
+ *
+ */
+extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
+		const char *alpha2, struct ieee80211_regdomain *rd);
+/**
+ * regulatory_hint - driver hint to the wireless core a regulatory domain
+ * @wiphy: the driver's very own &struct wiphy
+ * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
+ * 	should be in. If @rd is set this should be NULL. Note that if you
+ * 	set this to NULL you should still set rd->alpha2 to some accepted
+ * 	alpha2.
+ * @rd: a complete regulatory domain provided by the driver. If passed
+ * 	the driver does not need to worry about freeing it.
+ *
+ * Wireless drivers can use this function to hint to the wireless core
+ * what it believes should be the current regulatory domain by
+ * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
+ * domain should be in or by providing a completely build regulatory domain.
+ * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried
+ * for a regulatory domain structure for the respective country. If
+ * a regulatory domain is build and passed you should set the alpha2
+ * if possible, otherwise set it to the special value of "99" which tells
+ * the wireless core it is unknown. If you pass a built regulatory domain
+ * and we return non zero you are in charge of kfree()'ing the structure.
+ *
+ * See __regulatory_hint() documentation for possible return values.
+ */
+extern int regulatory_hint(struct wiphy *wiphy,
+		const char *alpha2, struct ieee80211_regdomain *rd);
 
 /**
  * ieee80211_get_channel - get channel struct from wiphy for specified frequency
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 928813ce08e..5a3bdaad6c1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -17,6 +17,13 @@
 #include "rate.h"
 #include "mesh.h"
 
+struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	return &local->hw;
+}
+EXPORT_SYMBOL(wiphy_to_hw);
+
 static enum ieee80211_if_types
 nl80211_type_to_mac80211_type(enum nl80211_iftype type)
 {
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 833b024f8f6..b97bd9fe6b7 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -14,6 +14,38 @@ config NL80211
 
 	  If unsure, say Y.
 
+config WIRELESS_OLD_REGULATORY
+	bool "Old wireless static regulatory defintions"
+	default n
+	---help---
+	  This option enables the old static regulatory information
+	  and uses it within the new framework. This is available
+	  temporarily as an option to help prevent immediate issues
+	  due to the switch to the new regulatory framework which
+	  does require a new userspace application which has the
+	  database of regulatory information (CRDA) and another for
+	  setting regulatory domains (iw).
+
+	  For more information see:
+
+	  http://wireless.kernel.org/en/developers/Regulatory/CRDA
+	  http://wireless.kernel.org/en/users/Documentation/iw
+
+	  It is important to note though that if you *do* have CRDA present
+	  and if this option is enabled CRDA *will* be called to update the
+	  regulatory domain (for US and JP only). Support for letting the user
+	  set the regulatory domain through iw is also supported. This option
+	  mainly exists to leave around for a kernel release some old static
+	  regulatory domains that were defined and to keep around the old
+	  ieee80211_regdom module parameter. This is being phased out and you
+	  should stop using them ASAP.
+
+	  Say N unless you cannot install a new userspace application
+	  or have one currently depending on the ieee80211_regdom module
+	  parameter and cannot port it to use the new userspace interfaces.
+
+	  This is scheduled for removal for 2.6.29.
+
 config WIRELESS_EXT
 	bool "Wireless extensions"
 	default n
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7e995ac06a0..a910cd2d0fd 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -13,12 +13,14 @@
 #include <linux/debugfs.h>
 #include <linux/notifier.h>
 #include <linux/device.h>
+#include <linux/list.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include <net/wireless.h>
 #include "nl80211.h"
 #include "core.h"
 #include "sysfs.h"
+#include "reg.h"
 
 /* name for sysfs, %d is appended */
 #define PHY_NAME "phy"
@@ -27,6 +29,107 @@ MODULE_AUTHOR("Johannes Berg");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("wireless configuration support");
 
+struct list_head regulatory_requests;
+
+/* Central wireless core regulatory domains, we only need two,
+ * the current one and a world regulatory domain in case we have no
+ * information to give us an alpha2 */
+struct ieee80211_regdomain *cfg80211_regdomain;
+
+/* We keep a static world regulatory domain in case of the absence of CRDA */
+const struct ieee80211_regdomain world_regdom = {
+	.n_reg_rules = 1,
+	.alpha2 =  "00",
+	.reg_rules = {
+		REG_RULE(2402, 2472, 40, 6, 20,
+			NL80211_RRF_PASSIVE_SCAN |
+			NL80211_RRF_NO_IBSS),
+	}
+};
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+/* All this fucking static junk will be removed soon, so
+ * don't fucking count on it !@#$ */
+
+static char *ieee80211_regdom = "US";
+module_param(ieee80211_regdom, charp, 0444);
+MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
+
+/* We assume 40 MHz bandwidth for the old regulatory work.
+ * We make emphasis we are using the exact same frequencies
+ * as before */
+
+const struct ieee80211_regdomain us_regdom = {
+	.n_reg_rules = 6,
+	.alpha2 =  "US",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..11 */
+		REG_RULE(2412-20, 2462+20, 40, 6, 27, 0),
+		/* IEEE 802.11a, channel 36 */
+		REG_RULE(5180-20, 5180+20, 40, 6, 23, 0),
+		/* IEEE 802.11a, channel 40 */
+		REG_RULE(5200-20, 5200+20, 40, 6, 23, 0),
+		/* IEEE 802.11a, channel 44 */
+		REG_RULE(5220-20, 5220+20, 40, 6, 23, 0),
+		/* IEEE 802.11a, channels 48..64 */
+		REG_RULE(5240-20, 5320+20, 40, 6, 23, 0),
+		/* IEEE 802.11a, channels 149..165, outdoor */
+		REG_RULE(5745-20, 5825+20, 40, 6, 30, 0),
+	}
+};
+
+const struct ieee80211_regdomain jp_regdom = {
+	.n_reg_rules = 3,
+	.alpha2 =  "JP",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..14 */
+		REG_RULE(2412-20, 2484+20, 40, 6, 20, 0),
+		/* IEEE 802.11a, channels 34..48 */
+		REG_RULE(5170-20, 5240+20, 40, 6, 20,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channels 52..64 */
+		REG_RULE(5260-20, 5320+20, 40, 6, 20,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+	}
+};
+
+const struct ieee80211_regdomain eu_regdom = {
+	.n_reg_rules = 6,
+	/* This alpha2 is bogus, we leave it here just for stupid
+	 * backward compatibility */
+	.alpha2 =  "EU",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..13 */
+		REG_RULE(2412-20, 2472+20, 40, 6, 20, 0),
+		/* IEEE 802.11a, channel 36 */
+		REG_RULE(5180-20, 5180+20, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channel 40 */
+		REG_RULE(5200-20, 5200+20, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channel 44 */
+		REG_RULE(5220-20, 5220+20, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channels 48..64 */
+		REG_RULE(5240-20, 5320+20, 40, 6, 20,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+		/* IEEE 802.11a, channels 100..140 */
+		REG_RULE(5500-20, 5700+20, 40, 6, 30,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+	}
+};
+
+#endif
+
+struct ieee80211_regdomain *cfg80211_world_regdom =
+	(struct ieee80211_regdomain *) &world_regdom;
+
+LIST_HEAD(regulatory_requests);
+DEFINE_MUTEX(cfg80211_reg_mutex);
+
 /* RCU might be appropriate here since we usually
  * only read the list, and that can happen quite
  * often because we need to do it for each command */
@@ -302,7 +405,9 @@ int wiphy_register(struct wiphy *wiphy)
 	ieee80211_set_bitrate_flags(wiphy);
 
 	/* set up regulatory info */
-	wiphy_update_regulatory(wiphy);
+	mutex_lock(&cfg80211_reg_mutex);
+	wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE);
+	mutex_unlock(&cfg80211_reg_mutex);
 
 	mutex_lock(&cfg80211_drv_mutex);
 
@@ -409,9 +514,35 @@ static struct notifier_block cfg80211_netdev_notifier = {
 	.notifier_call = cfg80211_netdev_notifier_call,
 };
 
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+const struct ieee80211_regdomain *static_regdom(char *alpha2)
+{
+	if (alpha2[0] == 'U' && alpha2[1] == 'S')
+		return &us_regdom;
+	if (alpha2[0] == 'J' && alpha2[1] == 'P')
+		return &jp_regdom;
+	if (alpha2[0] == 'E' && alpha2[1] == 'U')
+		return &eu_regdom;
+	/* Default, as per the old rules */
+	return &us_regdom;
+}
+#endif
+
 static int cfg80211_init(void)
 {
-	int err = wiphy_sysfs_init();
+	int err;
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) static_regdom(ieee80211_regdom);
+	/* Used during reset_regdomains_static() */
+	cfg80211_world_regdom = cfg80211_regdomain;
+#else
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+#endif
+
+	err = wiphy_sysfs_init();
 	if (err)
 		goto out_fail_sysfs;
 
@@ -425,8 +556,33 @@ static int cfg80211_init(void)
 
 	ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
 
+	err = regulatory_init();
+	if (err)
+		goto out_fail_reg;
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n");
+	print_regdomain_info(cfg80211_regdomain);
+	/* The old code still requests for a new regdomain and if
+	 * you have CRDA you get it updated, otherwise you get
+	 * stuck with the static values. We ignore "EU" code as
+	 * that is not a valid ISO / IEC 3166 alpha2 */
+	if (ieee80211_regdom[0] != 'E' &&
+			ieee80211_regdom[1] != 'U')
+		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
+			ieee80211_regdom, NULL);
+#else
+	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL);
+	if (err)
+		printk(KERN_ERR "cfg80211: calling CRDA failed - "
+			"unable to update world regulatory domain, "
+			"using static definition\n");
+#endif
+
 	return 0;
 
+out_fail_reg:
+	debugfs_remove(ieee80211_debugfs_dir);
 out_fail_nl80211:
 	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
 out_fail_notifier:
@@ -434,6 +590,7 @@ out_fail_notifier:
 out_fail_sysfs:
 	return err;
 }
+
 subsys_initcall(cfg80211_init);
 
 static void cfg80211_exit(void)
@@ -442,5 +599,6 @@ static void cfg80211_exit(void)
 	nl80211_exit();
 	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
 	wiphy_sysfs_exit();
+	regulatory_exit();
 }
 module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7a02c356d63..771cc5cc765 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -79,6 +79,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv,
 			       char *newname);
 
 void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
-void wiphy_update_regulatory(struct wiphy *wiphy);
+void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby);
 
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 77880ba8b61..1221d726ed5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -18,6 +18,7 @@
 #include <net/cfg80211.h>
 #include "core.h"
 #include "nl80211.h"
+#include "reg.h"
 
 /* the netlink family */
 static struct genl_family nl80211_fam = {
@@ -88,6 +89,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				.len = IEEE80211_MAX_MESH_ID_LEN },
 	[NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 },
 
+	[NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
+	[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
+
 	[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
@@ -1599,6 +1603,141 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static const struct nla_policy
+	reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
+	[NL80211_ATTR_REG_RULE_FLAGS]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_START]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_END]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_MAX_BW]	= { .type = NLA_U32 },
+	[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]	= { .type = NLA_U32 },
+	[NL80211_ATTR_POWER_RULE_MAX_EIRP]	= { .type = NLA_U32 },
+};
+
+static int parse_reg_rule(struct nlattr *tb[],
+	struct ieee80211_reg_rule *reg_rule)
+{
+	struct ieee80211_freq_range *freq_range = &reg_rule->freq_range;
+	struct ieee80211_power_rule *power_rule = &reg_rule->power_rule;
+
+	if (!tb[NL80211_ATTR_REG_RULE_FLAGS])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_START])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_END])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
+		return -EINVAL;
+
+	reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]);
+
+	freq_range->start_freq_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
+	freq_range->end_freq_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
+	freq_range->max_bandwidth_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
+
+	power_rule->max_eirp =
+		nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
+
+	if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN])
+		power_rule->max_antenna_gain =
+			nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
+
+	return 0;
+}
+
+static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
+{
+	int r;
+	char *data = NULL;
+
+	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
+		return -EINVAL;
+
+	data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	/* We ignore world regdom requests with the old regdom setup */
+	if (is_world_regdom(data))
+		return -EINVAL;
+#endif
+	mutex_lock(&cfg80211_drv_mutex);
+	r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL);
+	mutex_unlock(&cfg80211_drv_mutex);
+	return r;
+}
+
+static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
+	struct nlattr *nl_reg_rule;
+	char *alpha2 = NULL;
+	int rem_reg_rules = 0, r = 0;
+	u32 num_rules = 0, rule_idx = 0, size_of_regd;
+	struct ieee80211_regdomain *rd = NULL;
+
+	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_REG_RULES])
+		return -EINVAL;
+
+	alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
+
+	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
+			rem_reg_rules) {
+		num_rules++;
+		if (num_rules > NL80211_MAX_SUPP_REG_RULES)
+			goto bad_reg;
+	}
+
+	if (!reg_is_valid_request(alpha2))
+		return -EINVAL;
+
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
+		(num_rules * sizeof(struct ieee80211_reg_rule));
+
+	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	rd->n_reg_rules = num_rules;
+	rd->alpha2[0] = alpha2[0];
+	rd->alpha2[1] = alpha2[1];
+
+	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
+			rem_reg_rules) {
+		nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
+			nla_data(nl_reg_rule), nla_len(nl_reg_rule),
+			reg_rule_policy);
+		r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
+		if (r)
+			goto bad_reg;
+
+		rule_idx++;
+
+		if (rule_idx > NL80211_MAX_SUPP_REG_RULES)
+			goto bad_reg;
+	}
+
+	BUG_ON(rule_idx != num_rules);
+
+	mutex_lock(&cfg80211_drv_mutex);
+	r = set_regdom(rd);
+	mutex_unlock(&cfg80211_drv_mutex);
+	if (r)
+		goto bad_reg;
+
+	return r;
+
+bad_reg:
+	kfree(rd);
+	return -EINVAL;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -1736,6 +1875,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_REG,
+		.doit = nl80211_set_reg,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_REQ_SET_REG,
+		.doit = nl80211_req_set_reg,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 855bff4b325..592b2e391d4 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2,179 +2,758 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008	Luis R. Rodriguez <lrodriguz@atheros.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-/*
- * This regulatory domain control implementation is highly incomplete, it
- * only exists for the purpose of not regressing mac80211.
- *
- * For now, drivers can restrict the set of allowed channels by either
- * not registering those channels or setting the IEEE80211_CHAN_DISABLED
- * flag; that flag will only be *set* by this code, never *cleared.
+/**
+ * DOC: Wireless regulatory infrastructure
  *
  * The usual implementation is for a driver to read a device EEPROM to
  * determine which regulatory domain it should be operating under, then
  * looking up the allowable channels in a driver-local table and finally
  * registering those channels in the wiphy structure.
  *
- * Alternatively, drivers that trust the regulatory domain control here
- * will register a complete set of capabilities and the control code
- * will restrict the set by setting the IEEE80211_CHAN_* flags.
+ * Another set of compliance enforcement is for drivers to use their
+ * own compliance limits which can be stored on the EEPROM. The host
+ * driver or firmware may ensure these are used.
+ *
+ * In addition to all this we provide an extra layer of regulatory
+ * conformance. For drivers which do not have any regulatory
+ * information CRDA provides the complete regulatory solution.
+ * For others it provides a community effort on further restrictions
+ * to enhance compliance.
+ *
+ * Note: When number of rules --> infinity we will not be able to
+ * index on alpha2 any more, instead we'll probably have to
+ * rely on some SHA1 checksum of the regdomain for example.
+ *
  */
 #include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/random.h>
+#include <linux/nl80211.h>
+#include <linux/platform_device.h>
 #include <net/wireless.h>
+#include <net/cfg80211.h>
 #include "core.h"
+#include "reg.h"
 
-static char *ieee80211_regdom = "US";
-module_param(ieee80211_regdom, charp, 0444);
-MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
+/* To trigger userspace events */
+static struct platform_device *reg_pdev;
 
-struct ieee80211_channel_range {
-	short start_freq;
-	short end_freq;
-	int max_power;
-	int max_antenna_gain;
-	u32 flags;
+/* Keep the ordering from large to small */
+static u32 supported_bandwidths[] = {
+	MHZ_TO_KHZ(40),
+	MHZ_TO_KHZ(20),
 };
 
-struct ieee80211_regdomain {
-	const char *code;
-	const struct ieee80211_channel_range *ranges;
-	int n_ranges;
-};
+bool is_world_regdom(char *alpha2)
+{
+	if (!alpha2)
+		return false;
+	if (alpha2[0] == '0' && alpha2[1] == '0')
+		return true;
+	return false;
+}
 
-#define RANGE_PWR(_start, _end, _pwr, _ag, _flags)	\
-	{ _start, _end, _pwr, _ag, _flags }
+static bool is_alpha2_set(char *alpha2)
+{
+	if (!alpha2)
+		return false;
+	if (alpha2[0] != 0 && alpha2[1] != 0)
+		return true;
+	return false;
+}
 
+static bool is_alpha_upper(char letter)
+{
+	/* ASCII A - Z */
+	if (letter >= 65 && letter <= 90)
+		return true;
+	return false;
+}
 
-/*
- * Ideally, in the future, these definitions will be loaded from a
- * userspace table via some daemon.
- */
-static const struct ieee80211_channel_range ieee80211_US_channels[] = {
-	/* IEEE 802.11b/g, channels 1..11 */
-	RANGE_PWR(2412, 2462, 27, 6, 0),
-	/* IEEE 802.11a, channel 36*/
-	RANGE_PWR(5180, 5180, 23, 6, 0),
-	/* IEEE 802.11a, channel 40*/
-	RANGE_PWR(5200, 5200, 23, 6, 0),
-	/* IEEE 802.11a, channel 44*/
-	RANGE_PWR(5220, 5220, 23, 6, 0),
-	/* IEEE 802.11a, channels 48..64 */
-	RANGE_PWR(5240, 5320, 23, 6, 0),
-	/* IEEE 802.11a, channels 149..165, outdoor */
-	RANGE_PWR(5745, 5825, 30, 6, 0),
-};
+static bool is_unknown_alpha2(char *alpha2)
+{
+	if (!alpha2)
+		return false;
+	/* Special case where regulatory domain was built by driver
+	 * but a specific alpha2 cannot be determined */
+	if (alpha2[0] == '9' && alpha2[1] == '9')
+		return true;
+	return false;
+}
 
-static const struct ieee80211_channel_range ieee80211_JP_channels[] = {
-	/* IEEE 802.11b/g, channels 1..14 */
-	RANGE_PWR(2412, 2484, 20, 6, 0),
-	/* IEEE 802.11a, channels 34..48 */
-	RANGE_PWR(5170, 5240, 20, 6, IEEE80211_CHAN_PASSIVE_SCAN),
-	/* IEEE 802.11a, channels 52..64 */
-	RANGE_PWR(5260, 5320, 20, 6, IEEE80211_CHAN_NO_IBSS |
-				     IEEE80211_CHAN_RADAR),
-};
+static bool is_an_alpha2(char *alpha2)
+{
+	if (!alpha2)
+		return false;
+	if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1]))
+		return true;
+	return false;
+}
 
-static const struct ieee80211_channel_range ieee80211_EU_channels[] = {
-	/* IEEE 802.11b/g, channels 1..13 */
-	RANGE_PWR(2412, 2472, 20, 6, 0),
-	/* IEEE 802.11a, channel 36*/
-	RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
-	/* IEEE 802.11a, channel 40*/
-	RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
-	/* IEEE 802.11a, channel 44*/
-	RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN),
-	/* IEEE 802.11a, channels 48..64 */
-	RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS |
-				     IEEE80211_CHAN_RADAR),
-	/* IEEE 802.11a, channels 100..140 */
-	RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS |
-				     IEEE80211_CHAN_RADAR),
-};
+static bool alpha2_equal(char *alpha2_x, char *alpha2_y)
+{
+	if (!alpha2_x || !alpha2_y)
+		return false;
+	if (alpha2_x[0] == alpha2_y[0] &&
+		alpha2_x[1] == alpha2_y[1])
+		return true;
+	return false;
+}
+
+static bool regdom_changed(char *alpha2)
+{
+	if (!cfg80211_regdomain)
+		return true;
+	if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
+		return false;
+	return true;
+}
+
+/* This lets us keep regulatory code which is updated on a regulatory
+ * basis in userspace. */
+static int call_crda(const char *alpha2)
+{
+	char country_env[9 + 2] = "COUNTRY=";
+	char *envp[] = {
+		country_env,
+		NULL
+	};
+
+	if (!is_world_regdom((char *) alpha2))
+		printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n",
+			alpha2[0], alpha2[1]);
+	else
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+		return -EINVAL;
+#else
+		printk(KERN_INFO "cfg80211: Calling CRDA to update world "
+			"regulatory domain\n");
+#endif
+
+	country_env[8] = alpha2[0];
+	country_env[9] = alpha2[1];
+
+	return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, envp);
+}
+
+/* This has the logic which determines when a new request
+ * should be ignored. */
+static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
+	char *alpha2, struct ieee80211_regdomain *rd)
+{
+	struct regulatory_request *last_request = NULL;
 
-#define REGDOM(_code)							\
-	{								\
-		.code = __stringify(_code),				\
-		.ranges = ieee80211_ ##_code## _channels,		\
-		.n_ranges = ARRAY_SIZE(ieee80211_ ##_code## _channels),	\
+	/* All initial requests are respected */
+	if (list_empty(&regulatory_requests))
+		return 0;
+
+	last_request = list_first_entry(&regulatory_requests,
+		struct regulatory_request, list);
+
+	switch (set_by) {
+	case REGDOM_SET_BY_INIT:
+		return -EINVAL;
+	case REGDOM_SET_BY_CORE:
+		/* Always respect new wireless core hints, should only
+		 * come in for updating the world regulatory domain at init
+		 * anyway */
+		return 0;
+	case REGDOM_SET_BY_COUNTRY_IE:
+		if (last_request->initiator == set_by) {
+			if (last_request->wiphy != wiphy) {
+				/* Two cards with two APs claiming different
+				 * different Country IE alpha2s!
+				 * You're special!! */
+				if (!alpha2_equal(last_request->alpha2,
+						cfg80211_regdomain->alpha2)) {
+					/* XXX: Deal with conflict, consider
+					 * building a new one out of the
+					 * intersection */
+					WARN_ON(1);
+					return -EOPNOTSUPP;
+				}
+				return -EALREADY;
+			}
+			/* Two consecutive Country IE hints on the same wiphy */
+			if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
+				return 0;
+			return -EALREADY;
+		}
+		if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)),
+				"Invalid Country IE regulatory hint passed "
+				"to the wireless core\n")
+			return -EINVAL;
+		/* We ignore Country IE hints for now, as we haven't yet
+		 * added the dot11MultiDomainCapabilityEnabled flag
+		 * for wiphys */
+		return 1;
+	case REGDOM_SET_BY_DRIVER:
+		BUG_ON(!wiphy);
+		if (last_request->initiator == set_by) {
+			/* Two separate drivers hinting different things,
+			 * this is possible if you have two devices present
+			 * on a system with different EEPROM regulatory
+			 * readings. XXX: Do intersection, we support only
+			 * the first regulatory hint for now */
+			if (last_request->wiphy != wiphy)
+				return -EALREADY;
+			if (rd)
+				return -EALREADY;
+			/* Driver should not be trying to hint different
+			 * regulatory domains! */
+			BUG_ON(!alpha2_equal(alpha2,
+					cfg80211_regdomain->alpha2));
+			return -EALREADY;
+		}
+		if (last_request->initiator == REGDOM_SET_BY_CORE)
+			return 0;
+		/* XXX: Handle intersection, and add the
+		 * dot11MultiDomainCapabilityEnabled flag to wiphy. For now
+		 * we assume the driver has this set to false, following the
+		 * 802.11d dot11MultiDomainCapabilityEnabled documentation */
+		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
+			return 0;
+		return 0;
+	case REGDOM_SET_BY_USER:
+		if (last_request->initiator == set_by ||
+				last_request->initiator == REGDOM_SET_BY_CORE)
+			return 0;
+		/* Drivers can use their wiphy's reg_notifier()
+		 * to override any information */
+		if (last_request->initiator == REGDOM_SET_BY_DRIVER)
+			return 0;
+		/* XXX: Handle intersection */
+		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
+			return -EOPNOTSUPP;
+		return 0;
+	default:
+		return -EINVAL;
 	}
+}
 
-static const struct ieee80211_regdomain ieee80211_regdoms[] = {
-	REGDOM(US),
-	REGDOM(JP),
-	REGDOM(EU),
-};
+static bool __reg_is_valid_request(char *alpha2,
+	struct regulatory_request **request)
+{
+	struct regulatory_request *req;
+	if (list_empty(&regulatory_requests))
+		return false;
+	list_for_each_entry(req, &regulatory_requests, list) {
+		if (alpha2_equal(req->alpha2, alpha2)) {
+			*request = req;
+			return true;
+		}
+	}
+	return false;
+}
 
+/* Used by nl80211 before kmalloc'ing our regulatory domain */
+bool reg_is_valid_request(char *alpha2)
+{
+	struct regulatory_request *request = NULL;
+	return  __reg_is_valid_request(alpha2, &request);
+}
 
-static const struct ieee80211_regdomain *get_regdom(void)
+/* Sanity check on a regulatory rule */
+static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule)
 {
-	static const struct ieee80211_channel_range
-	ieee80211_world_channels[] = {
-		/* IEEE 802.11b/g, channels 1..11 */
-		RANGE_PWR(2412, 2462, 27, 6, 0),
-	};
-	static const struct ieee80211_regdomain regdom_world = REGDOM(world);
-	int i;
+	struct ieee80211_freq_range *freq_range = &rule->freq_range;
+	u32 freq_diff;
+
+	if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0)
+		return false;
+
+	if (freq_range->start_freq_khz > freq_range->end_freq_khz)
+		return false;
+
+	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
+
+	if (freq_range->max_bandwidth_khz > freq_diff)
+		return false;
+
+	return true;
+}
+
+static bool is_valid_rd(struct ieee80211_regdomain *rd)
+{
+	struct ieee80211_reg_rule *reg_rule = NULL;
+	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(ieee80211_regdoms); i++)
-		if (strcmp(ieee80211_regdom, ieee80211_regdoms[i].code) == 0)
-			return &ieee80211_regdoms[i];
+	if (!rd->n_reg_rules)
+		return false;
 
-	return &regdom_world;
+	for (i = 0; i < rd->n_reg_rules; i++) {
+		reg_rule = &rd->reg_rules[i];
+		if (!is_valid_reg_rule(reg_rule))
+			return false;
+	}
+
+	return true;
 }
 
+/* Returns value in KHz */
+static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range,
+	u32 freq)
+{
+	unsigned int i;
+	for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) {
+		u32 start_freq_khz = freq - supported_bandwidths[i]/2;
+		u32 end_freq_khz = freq + supported_bandwidths[i]/2;
+		if (start_freq_khz >= freq_range->start_freq_khz &&
+			end_freq_khz <= freq_range->end_freq_khz)
+			return supported_bandwidths[i];
+	}
+	return 0;
+}
 
-static void handle_channel(struct ieee80211_channel *chan,
-			   const struct ieee80211_regdomain *rd)
+/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may
+ * want to just have the channel structure use these */
+static u32 map_regdom_flags(u32 rd_flags)
+{
+	u32 channel_flags = 0;
+	if (rd_flags & NL80211_RRF_PASSIVE_SCAN)
+		channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN;
+	if (rd_flags & NL80211_RRF_NO_IBSS)
+		channel_flags |= IEEE80211_CHAN_NO_IBSS;
+	if (rd_flags & NL80211_RRF_DFS)
+		channel_flags |= IEEE80211_CHAN_RADAR;
+	return channel_flags;
+}
+
+/**
+ * freq_reg_info - get regulatory information for the given frequency
+ * @center_freq: Frequency in KHz for which we want regulatory information for
+ * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one
+ * 	you can set this to 0. If this frequency is allowed we then set
+ * 	this value to the maximum allowed bandwidth.
+ * @reg_rule: the regulatory rule which we have for this frequency
+ *
+ * Use this function to get the regulatory rule for a specific frequency.
+ */
+static int freq_reg_info(u32 center_freq, u32 *bandwidth,
+			 const struct ieee80211_reg_rule **reg_rule)
 {
 	int i;
-	u32 flags = chan->orig_flags;
-	const struct ieee80211_channel_range *rg = NULL;
+	u32 max_bandwidth = 0;
 
-	for (i = 0; i < rd->n_ranges; i++) {
-		if (rd->ranges[i].start_freq <= chan->center_freq &&
-		    chan->center_freq <= rd->ranges[i].end_freq) {
-			rg = &rd->ranges[i];
+	if (!cfg80211_regdomain)
+		return -EINVAL;
+
+	for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) {
+		const struct ieee80211_reg_rule *rr;
+		const struct ieee80211_freq_range *fr = NULL;
+		const struct ieee80211_power_rule *pr = NULL;
+
+		rr = &cfg80211_regdomain->reg_rules[i];
+		fr = &rr->freq_range;
+		pr = &rr->power_rule;
+		max_bandwidth = freq_max_bandwidth(fr, center_freq);
+		if (max_bandwidth && *bandwidth <= max_bandwidth) {
+			*reg_rule = rr;
+			*bandwidth = max_bandwidth;
 			break;
 		}
 	}
 
-	if (!rg) {
-		/* not found */
+	return !max_bandwidth;
+}
+
+static void handle_channel(struct ieee80211_channel *chan)
+{
+	int r;
+	u32 flags = chan->orig_flags;
+	u32 max_bandwidth = 0;
+	const struct ieee80211_reg_rule *reg_rule = NULL;
+	const struct ieee80211_power_rule *power_rule = NULL;
+
+	r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq),
+		&max_bandwidth, &reg_rule);
+
+	if (r) {
 		flags |= IEEE80211_CHAN_DISABLED;
 		chan->flags = flags;
 		return;
 	}
 
-	chan->flags = flags;
+	power_rule = &reg_rule->power_rule;
+
+	chan->flags = flags | map_regdom_flags(reg_rule->flags);
 	chan->max_antenna_gain = min(chan->orig_mag,
-					 rg->max_antenna_gain);
+		(int) MBI_TO_DBI(power_rule->max_antenna_gain));
+	chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
 	if (chan->orig_mpwr)
-		chan->max_power = min(chan->orig_mpwr, rg->max_power);
+		chan->max_power = min(chan->orig_mpwr,
+			(int) MBM_TO_DBM(power_rule->max_eirp));
 	else
-		chan->max_power = rg->max_power;
+		chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-static void handle_band(struct ieee80211_supported_band *sband,
-			const struct ieee80211_regdomain *rd)
+static void handle_band(struct ieee80211_supported_band *sband)
 {
 	int i;
 
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel(&sband->channels[i], rd);
+		handle_channel(&sband->channels[i]);
 }
 
-void wiphy_update_regulatory(struct wiphy *wiphy)
+static void update_all_wiphy_regulatory(enum reg_set_by setby)
 {
-	enum ieee80211_band band;
-	const struct ieee80211_regdomain *rd = get_regdom();
+	struct cfg80211_registered_device *drv;
 
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+	list_for_each_entry(drv, &cfg80211_drv_list, list)
+		wiphy_update_regulatory(&drv->wiphy, setby);
+}
+
+void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
+{
+	enum ieee80211_band band;
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
-			handle_band(wiphy->bands[band], rd);
+			handle_band(wiphy->bands[band]);
+		if (wiphy->reg_notifier)
+			wiphy->reg_notifier(wiphy, setby);
+	}
+}
+
+/* Caller must hold &cfg80211_drv_mutex */
+int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
+		      const char *alpha2, struct ieee80211_regdomain *rd)
+{
+	struct regulatory_request *request;
+	char *rd_alpha2;
+	int r = 0;
+
+	r = ignore_request(wiphy, set_by, (char *) alpha2, rd);
+	if (r)
+		return r;
+
+	if (rd)
+		rd_alpha2 = rd->alpha2;
+	else
+		rd_alpha2 = (char *) alpha2;
+
+	switch (set_by) {
+	case REGDOM_SET_BY_CORE:
+	case REGDOM_SET_BY_COUNTRY_IE:
+	case REGDOM_SET_BY_DRIVER:
+	case REGDOM_SET_BY_USER:
+		request = kzalloc(sizeof(struct regulatory_request),
+			GFP_KERNEL);
+		if (!request)
+			return -ENOMEM;
+
+		request->alpha2[0] = rd_alpha2[0];
+		request->alpha2[1] = rd_alpha2[1];
+		request->initiator = set_by;
+		request->wiphy = wiphy;
+
+		list_add_tail(&request->list, &regulatory_requests);
+		if (rd)
+			break;
+		r = call_crda(alpha2);
+#ifndef CONFIG_WIRELESS_OLD_REGULATORY
+		if (r)
+			printk(KERN_ERR "cfg80211: Failed calling CRDA\n");
+#endif
+		break;
+	default:
+		r = -ENOTSUPP;
+		break;
+	}
+
+	return r;
+}
+
+/* If rd is not NULL and if this call fails the caller must free it */
+int regulatory_hint(struct wiphy *wiphy, const char *alpha2,
+	struct ieee80211_regdomain *rd)
+{
+	int r;
+	BUG_ON(!rd && !alpha2);
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd);
+	if (r || !rd)
+		goto unlock_and_exit;
+
+	/* If the driver passed a regulatory domain we skipped asking
+	 * userspace for one so we can now go ahead and set it */
+	r = set_regdom(rd);
+
+unlock_and_exit:
+	mutex_unlock(&cfg80211_drv_mutex);
+	return r;
+}
+EXPORT_SYMBOL(regulatory_hint);
+
+
+static void print_rd_rules(struct ieee80211_regdomain *rd)
+{
+	unsigned int i;
+	struct ieee80211_reg_rule *reg_rule = NULL;
+	struct ieee80211_freq_range *freq_range = NULL;
+	struct ieee80211_power_rule *power_rule = NULL;
+
+	printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), "
+		"(max_antenna_gain, max_eirp)\n");
+
+	for (i = 0; i < rd->n_reg_rules; i++) {
+		reg_rule = &rd->reg_rules[i];
+		freq_range = &reg_rule->freq_range;
+		power_rule = &reg_rule->power_rule;
+
+		/* There may not be documentation for max antenna gain
+		 * in certain regions */
+		if (power_rule->max_antenna_gain)
+			printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), "
+				"(%d mBi, %d mBm)\n",
+				freq_range->start_freq_khz,
+				freq_range->end_freq_khz,
+				freq_range->max_bandwidth_khz,
+				power_rule->max_antenna_gain,
+				power_rule->max_eirp);
+		else
+			printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), "
+				"(N/A, %d mBm)\n",
+				freq_range->start_freq_khz,
+				freq_range->end_freq_khz,
+				freq_range->max_bandwidth_khz,
+				power_rule->max_eirp);
+	}
+}
+
+static void print_regdomain(struct ieee80211_regdomain *rd)
+{
+
+	if (is_world_regdom(rd->alpha2))
+		printk(KERN_INFO "cfg80211: World regulatory "
+			"domain updated:\n");
+	else {
+		if (is_unknown_alpha2(rd->alpha2))
+			printk(KERN_INFO "cfg80211: Regulatory domain "
+				"changed to driver built-in settings "
+				"(unknown country)\n");
+		else
+			printk(KERN_INFO "cfg80211: Regulatory domain "
+				"changed to country: %c%c\n",
+				rd->alpha2[0], rd->alpha2[1]);
+	}
+	print_rd_rules(rd);
+}
+
+void print_regdomain_info(struct ieee80211_regdomain *rd)
+{
+	printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n",
+		rd->alpha2[0], rd->alpha2[1]);
+	print_rd_rules(rd);
+}
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+
+static bool is_old_static_regdom(struct ieee80211_regdomain *rd)
+{
+	if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom)
+		return true;
+	return false;
+}
+
+/* The old crap never deals with a world regulatory domain, it only
+ * deals with the static regulatory domain passed and if possible
+ * an updated "US" or "JP" regulatory domain. We do however store the
+ * old static regulatory domain in cfg80211_world_regdom for convenience
+ * of use here */
+static void reset_regdomains_static(void)
+{
+	if (!is_old_static_regdom(cfg80211_regdomain))
+		kfree(cfg80211_regdomain);
+	/* This is setting the regdom to the old static regdom */
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+}
+#else
+static void reset_regdomains(void)
+{
+	if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) {
+		if (cfg80211_world_regdom == cfg80211_regdomain) {
+			kfree(cfg80211_regdomain);
+		} else {
+			kfree(cfg80211_world_regdom);
+			kfree(cfg80211_regdomain);
+		}
+	} else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom)
+		kfree(cfg80211_regdomain);
+
+	cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom;
+	cfg80211_regdomain = NULL;
+}
+
+/* Dynamic world regulatory domain requested by the wireless
+ * core upon initialization */
+static void update_world_regdomain(struct ieee80211_regdomain *rd)
+{
+	BUG_ON(list_empty(&regulatory_requests));
+
+	reset_regdomains();
+
+	cfg80211_world_regdom = rd;
+	cfg80211_regdomain = rd;
+}
+#endif
+
+static int __set_regdom(struct ieee80211_regdomain *rd)
+{
+	struct regulatory_request *request = NULL;
+
+	/* Some basic sanity checks first */
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	/* We ignore the world regdom with the old static regdomains setup
+	 * as there is no point to it with satic regulatory definitions :(
+	 * Don't worry this shit will be removed soon... */
+	if (is_world_regdom(rd->alpha2))
+		return -EINVAL;
+#else
+	if (is_world_regdom(rd->alpha2)) {
+		if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request)))
+			return -EINVAL;
+		update_world_regdomain(rd);
+		return 0;
+	}
+#endif
+
+	if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) &&
+			!is_unknown_alpha2(rd->alpha2))
+		return -EINVAL;
+
+	if (list_empty(&regulatory_requests))
+		return -EINVAL;
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	/* Static "US" and "JP" will be overridden, but just once */
+	if (!is_old_static_regdom(cfg80211_regdomain) &&
+			!regdom_changed(rd->alpha2))
+		return -EINVAL;
+#else
+	if (!regdom_changed(rd->alpha2))
+		return -EINVAL;
+#endif
+
+	/* Now lets set the regulatory domain, update all driver channels
+	 * and finally inform them of what we have done, in case they want
+	 * to review or adjust their own settings based on their own
+	 * internal EEPROM data */
+
+	if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request)))
+		return -EINVAL;
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	reset_regdomains_static();
+#else
+	reset_regdomains();
+#endif
+
+	/* Country IE parsing coming soon */
+	switch (request->initiator) {
+	case REGDOM_SET_BY_CORE:
+	case REGDOM_SET_BY_DRIVER:
+	case REGDOM_SET_BY_USER:
+		if (!is_valid_rd(rd)) {
+			printk(KERN_ERR "cfg80211: Invalid "
+				"regulatory domain detected:\n");
+			print_regdomain_info(rd);
+			return -EINVAL;
+		}
+		break;
+	case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */
+		WARN_ON(1);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Tada! */
+	cfg80211_regdomain = rd;
+	request->granted = 1;
+
+	return 0;
+}
+
+
+/* Use this call to set the current regulatory domain. Conflicts with
+ * multiple drivers can be ironed out later. Caller must've already
+ * kmalloc'd the rd structure. If this calls fails you should kfree()
+ * the passed rd. Caller must hold cfg80211_drv_mutex */
+int set_regdom(struct ieee80211_regdomain *rd)
+{
+	struct regulatory_request *this_request = NULL, *prev_request = NULL;
+	int r;
+
+	if (!list_empty(&regulatory_requests))
+		prev_request = list_first_entry(&regulatory_requests,
+			struct regulatory_request, list);
+
+	/* Note that this doesn't update the wiphys, this is done below */
+	r = __set_regdom(rd);
+	if (r)
+		return r;
+
+	BUG_ON((!__reg_is_valid_request(rd->alpha2, &this_request)));
+
+	/* The initial standard core update of the world regulatory domain, no
+	 * need to keep that request info around if it didn't fail. */
+	if (is_world_regdom(rd->alpha2) &&
+			this_request->initiator == REGDOM_SET_BY_CORE &&
+			this_request->granted) {
+		list_del(&this_request->list);
+		kfree(this_request);
+		this_request = NULL;
+	}
+
+	/* Remove old requests, we only leave behind the last one */
+	if (prev_request) {
+		list_del(&prev_request->list);
+		kfree(prev_request);
+		prev_request = NULL;
+	}
+
+	/* This would make this whole thing pointless */
+	BUG_ON(rd != cfg80211_regdomain);
+
+	/* update all wiphys now with the new established regulatory domain */
+	update_all_wiphy_regulatory(this_request->initiator);
+
+	print_regdomain(rd);
+
+	return r;
+}
+
+int regulatory_init(void)
+{
+	reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
+	if (IS_ERR(reg_pdev))
+		return PTR_ERR(reg_pdev);
+	return 0;
+}
+
+void regulatory_exit(void)
+{
+	struct regulatory_request *req, *req_tmp;
+	mutex_lock(&cfg80211_drv_mutex);
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	reset_regdomains_static();
+#else
+	reset_regdomains();
+#endif
+	list_for_each_entry_safe(req, req_tmp, &regulatory_requests, list) {
+		list_del(&req->list);
+		kfree(req);
+	}
+	platform_device_unregister(reg_pdev);
+	mutex_unlock(&cfg80211_drv_mutex);
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
new file mode 100644
index 00000000000..d75fd023297
--- /dev/null
+++ b/net/wireless/reg.h
@@ -0,0 +1,44 @@
+#ifndef __NET_WIRELESS_REG_H
+#define __NET_WIRELESS_REG_H
+
+extern const struct ieee80211_regdomain world_regdom;
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+extern const struct ieee80211_regdomain us_regdom;
+extern const struct ieee80211_regdomain jp_regdom;
+extern const struct ieee80211_regdomain eu_regdom;
+#endif
+
+extern struct ieee80211_regdomain *cfg80211_regdomain;
+extern struct ieee80211_regdomain *cfg80211_world_regdom;
+extern struct list_head regulatory_requests;
+
+struct regdom_last_setby {
+	struct wiphy *wiphy;
+	u8 initiator;
+};
+
+/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */
+struct regulatory_request {
+	struct list_head list;
+	struct wiphy *wiphy;
+	int granted;
+	enum reg_set_by initiator;
+	char alpha2[2];
+};
+
+bool is_world_regdom(char *alpha2);
+bool reg_is_valid_request(char *alpha2);
+
+int set_regdom(struct ieee80211_regdomain *rd);
+int __regulatory_hint_alpha2(struct wiphy *wiphy, enum reg_set_by set_by,
+		      const char *alpha2);
+
+int regulatory_init(void);
+void regulatory_exit(void);
+
+void print_regdomain_info(struct ieee80211_regdomain *);
+
+/* If a char is A-Z */
+#define IS_ALPHA(letter) (letter >= 65 && letter <= 90)
+
+#endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3-70-g09d2


From 24723d1bc9da79a53d0495b9cf9ee18747121b03 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:46 +0200
Subject: mac80211: move ieee80211_sta_expire

ieee80211_sta_expire uses the internal __sta_info_unlink
function which can become static if this function is moved
to sta_info.c.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c     | 26 --------------------------
 net/mac80211/sta_info.c | 28 +++++++++++++++++++++++++++-
 net/mac80211/sta_info.h |  3 ++-
 3 files changed, 29 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2c06f6965b7..ffc47c81a16 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1913,32 +1913,6 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 }
 
 
-static void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta, *tmp;
-	LIST_HEAD(tmp_list);
-	DECLARE_MAC_BUF(mac);
-	unsigned long flags;
-
-	spin_lock_irqsave(&local->sta_lock, flags);
-	list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
-		if (time_after(jiffies, sta->last_rx + exp_time)) {
-#ifdef CONFIG_MAC80211_IBSS_DEBUG
-			printk(KERN_DEBUG "%s: expiring inactive STA %s\n",
-			       sdata->dev->name, print_mac(mac, sta->addr));
-#endif
-			__sta_info_unlink(&sta);
-			if (sta)
-				list_add(&sta->list, &tmp_list);
-		}
-	spin_unlock_irqrestore(&local->sta_lock, flags);
-
-	list_for_each_entry_safe(sta, tmp, &tmp_list, list)
-		sta_info_destroy(sta);
-}
-
-
 static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f2ba653b9d6..3370b262563 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -424,7 +424,7 @@ void sta_info_clear_tim_bit(struct sta_info *sta)
 	spin_unlock_irqrestore(&sta->local->sta_lock, flags);
 }
 
-void __sta_info_unlink(struct sta_info **sta)
+static void __sta_info_unlink(struct sta_info **sta)
 {
 	struct ieee80211_local *local = (*sta)->local;
 	struct ieee80211_sub_if_data *sdata = (*sta)->sdata;
@@ -802,3 +802,29 @@ void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata)
 		schedule_work(&local->sta_flush_work);
 	spin_unlock_irqrestore(&local->sta_lock, flags);
 }
+
+void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
+			  unsigned long exp_time)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta, *tmp;
+	LIST_HEAD(tmp_list);
+	DECLARE_MAC_BUF(mac);
+	unsigned long flags;
+
+	spin_lock_irqsave(&local->sta_lock, flags);
+	list_for_each_entry_safe(sta, tmp, &local->sta_list, list)
+		if (time_after(jiffies, sta->last_rx + exp_time)) {
+#ifdef CONFIG_MAC80211_IBSS_DEBUG
+			printk(KERN_DEBUG "%s: expiring inactive STA %s\n",
+			       sdata->dev->name, print_mac(mac, sta->addr));
+#endif
+			__sta_info_unlink(&sta);
+			if (sta)
+				list_add(&sta->list, &tmp_list);
+		}
+	spin_unlock_irqrestore(&local->sta_lock, flags);
+
+	list_for_each_entry_safe(sta, tmp, &tmp_list, list)
+		sta_info_destroy(sta);
+}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4a581a5b576..22007990099 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -452,7 +452,6 @@ int sta_info_insert(struct sta_info *sta);
  * has already unlinked it.
  */
 void sta_info_unlink(struct sta_info **sta);
-void __sta_info_unlink(struct sta_info **sta);
 
 void sta_info_destroy(struct sta_info *sta);
 void sta_info_set_tim_bit(struct sta_info *sta);
@@ -464,5 +463,7 @@ void sta_info_stop(struct ieee80211_local *local);
 int sta_info_flush(struct ieee80211_local *local,
 		    struct ieee80211_sub_if_data *sdata);
 void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
+			  unsigned long exp_time);
 
 #endif /* STA_INFO_H */
-- 
cgit v1.2.3-70-g09d2


From a1678f84bff9b20807f7f6a45ebfb56a0c02b353 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:47 +0200
Subject: mac80211: move STA timer restart

This I shouldn't have moved to the scan implementation, move
it back to the MLME where it belongs, to the notification.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 13 +++++++++++++
 net/mac80211/scan.c | 14 --------------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ffc47c81a16..809fb916089 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2594,6 +2594,13 @@ void ieee80211_sta_work(struct work_struct *work)
 	}
 }
 
+static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
+{
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
+	    ieee80211_vif_is_mesh(&sdata->vif))
+		ieee80211_sta_timer((unsigned long)sdata);
+}
+
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
@@ -2606,4 +2613,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 		    !ieee80211_sta_active_ibss(sdata)))
 			ieee80211_sta_find_ibss(sdata, ifsta);
 	}
+
+	/* Restart STA timers */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list)
+		ieee80211_restart_sta_timer(sdata);
+	rcu_read_unlock();
 }
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 010781b806f..f8b296bed0b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -424,13 +424,6 @@ static void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	ieee80211_tx_skb(sdata, skb, 0);
 }
 
-static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
-{
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    ieee80211_vif_is_mesh(&sdata->vif))
-		ieee80211_sta_timer((unsigned long)sdata);
-}
-
 void ieee80211_scan_completed(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -446,11 +439,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 		if (ieee80211_hw_config(local))
 			printk(KERN_DEBUG "%s: failed to restore operational "
 			       "channel after scan\n", wiphy_name(local->hw.wiphy));
-		/* Restart STA timer for HW scan case */
-		rcu_read_lock();
-		list_for_each_entry_rcu(sdata, &local->interfaces, list)
-			ieee80211_restart_sta_timer(sdata);
-		rcu_read_unlock();
 
 		goto done;
 	}
@@ -483,8 +471,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 			}
 		} else
 			netif_tx_wake_all_queues(sdata->dev);
-
-		ieee80211_restart_sta_timer(sdata);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-70-g09d2


From 7c95069522d02ff144cd421be6618dce619caf7e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:48 +0200
Subject: mac80211: dont set REQ_RUN when scan finishes

The timer restart is done wrongly, we shouldn't set the REQ_RUN
bit when the scan has finished if it hadn't been set before the
scan started. If the timer fires during the scan, it will set
REQ_RUN and then we can run the work for it, if it didn't fire
then we shouldn't run its work either.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 809fb916089..ef73f895372 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2598,7 +2598,8 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 {
 	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
 	    ieee80211_vif_is_mesh(&sdata->vif))
-		ieee80211_sta_timer((unsigned long)sdata);
+		queue_work(sdata->local->hw.workqueue,
+			   &sdata->u.sta.work);
 }
 
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
-- 
cgit v1.2.3-70-g09d2


From 472dbc45dc1966284de72d7de15690c17ed2cf33 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:49 +0200
Subject: mac80211: split off mesh handling entirely

This patch splits off mesh handling from the STA/IBSS.
Unfortunately it increases mesh code size a bit, but I
think it makes things clearer. The patch also reduces
per-interface run-time memory usage.

Also clean up a few places where ifdef is not required.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c              |   6 +-
 net/mac80211/debugfs_netdev.c   |  38 ++---
 net/mac80211/ieee80211_i.h      | 101 +++++++------
 net/mac80211/iface.c            |  13 +-
 net/mac80211/main.c             |  13 +-
 net/mac80211/mesh.c             | 328 +++++++++++++++++++++++++++++++++-------
 net/mac80211/mesh.h             |  15 +-
 net/mac80211/mesh_hwmp.c        |  98 ++++++------
 net/mac80211/mesh_pathtbl.c     |   8 +-
 net/mac80211/mesh_plink.c       |  18 +--
 net/mac80211/mlme.c             |  83 +---------
 net/mac80211/rc80211_pid_algo.c |   7 +-
 net/mac80211/rx.c               |   8 +-
 net/mac80211/scan.c             |   1 +
 net/mac80211/tx.c               |  15 +-
 15 files changed, 457 insertions(+), 295 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5a3bdaad6c1..6ec2127f9a6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -116,9 +116,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 		return ret;
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
-		ieee80211_if_sta_set_mesh_id(&sdata->u.sta,
-					     params->mesh_id_len,
-					     params->mesh_id);
+		ieee80211_sdata_set_mesh_id(sdata,
+					    params->mesh_id_len,
+					    params->mesh_id);
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || !flags)
 		return 0;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 8165df578c9..0fa7681a3d2 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -207,37 +207,37 @@ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
 
 #ifdef CONFIG_MAC80211_MESH
 /* Mesh stats attributes */
-IEEE80211_IF_FILE(fwded_frames, u.sta.mshstats.fwded_frames, DEC);
-IEEE80211_IF_FILE(dropped_frames_ttl, u.sta.mshstats.dropped_frames_ttl, DEC);
+IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC);
+IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC);
 IEEE80211_IF_FILE(dropped_frames_no_route,
-		u.sta.mshstats.dropped_frames_no_route, DEC);
-IEEE80211_IF_FILE(estab_plinks, u.sta.mshstats.estab_plinks, ATOMIC);
+		u.mesh.mshstats.dropped_frames_no_route, DEC);
+IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC);
 
 /* Mesh parameters */
 IEEE80211_IF_WFILE(dot11MeshMaxRetries,
-		u.sta.mshcfg.dot11MeshMaxRetries, DEC, u8);
+		u.mesh.mshcfg.dot11MeshMaxRetries, DEC, u8);
 IEEE80211_IF_WFILE(dot11MeshRetryTimeout,
-		u.sta.mshcfg.dot11MeshRetryTimeout, DEC, u16);
+		u.mesh.mshcfg.dot11MeshRetryTimeout, DEC, u16);
 IEEE80211_IF_WFILE(dot11MeshConfirmTimeout,
-		u.sta.mshcfg.dot11MeshConfirmTimeout, DEC, u16);
+		u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC, u16);
 IEEE80211_IF_WFILE(dot11MeshHoldingTimeout,
-		u.sta.mshcfg.dot11MeshHoldingTimeout, DEC, u16);
-IEEE80211_IF_WFILE(dot11MeshTTL, u.sta.mshcfg.dot11MeshTTL, DEC, u8);
-IEEE80211_IF_WFILE(auto_open_plinks, u.sta.mshcfg.auto_open_plinks, DEC, u8);
+		u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC, u16);
+IEEE80211_IF_WFILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC, u8);
+IEEE80211_IF_WFILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC, u8);
 IEEE80211_IF_WFILE(dot11MeshMaxPeerLinks,
-		u.sta.mshcfg.dot11MeshMaxPeerLinks, DEC, u16);
+		u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC, u16);
 IEEE80211_IF_WFILE(dot11MeshHWMPactivePathTimeout,
-		u.sta.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32);
+		u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32);
 IEEE80211_IF_WFILE(dot11MeshHWMPpreqMinInterval,
-		u.sta.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16);
+		u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16);
 IEEE80211_IF_WFILE(dot11MeshHWMPnetDiameterTraversalTime,
-		u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16);
+		u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16);
 IEEE80211_IF_WFILE(dot11MeshHWMPmaxPREQretries,
-		u.sta.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8);
+		u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8);
 IEEE80211_IF_WFILE(path_refresh_time,
-		u.sta.mshcfg.path_refresh_time, DEC, u32);
+		u.mesh.mshcfg.path_refresh_time, DEC, u32);
 IEEE80211_IF_WFILE(min_discovery_timeout,
-		u.sta.mshcfg.min_discovery_timeout, DEC, u16);
+		u.mesh.mshcfg.min_discovery_timeout, DEC, u16);
 #endif
 
 
@@ -350,7 +350,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 		add_mesh_stats(sdata);
 		add_mesh_config(sdata);
 #endif
-		/* fall through */
+		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		add_sta_files(sdata);
@@ -487,7 +487,7 @@ static void del_files(struct ieee80211_sub_if_data *sdata)
 		del_mesh_stats(sdata);
 		del_mesh_config(sdata);
 #endif
-		/* fall through */
+		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		del_sta_files(sdata);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6f334e4c3d6..cac0b133454 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -308,7 +308,6 @@ enum ieee80211_sta_mlme_state {
 	IEEE80211_STA_MLME_ASSOCIATED,
 	IEEE80211_STA_MLME_IBSS_SEARCH,
 	IEEE80211_STA_MLME_IBSS_JOINED,
-	IEEE80211_STA_MLME_MESH_UP
 };
 
 /* bitfield of allowed auth algs */
@@ -325,34 +324,6 @@ struct ieee80211_if_sta {
 	size_t ssid_len;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
-#ifdef CONFIG_MAC80211_MESH
-	struct timer_list mesh_path_timer;
-	u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
-	size_t mesh_id_len;
-	/* Active Path Selection Protocol Identifier */
-	u8 mesh_pp_id[4];
-	/* Active Path Selection Metric Identifier */
-	u8 mesh_pm_id[4];
-	/* Congestion Control Mode Identifier */
-	u8 mesh_cc_id[4];
-	/* Local mesh Destination Sequence Number */
-	u32 dsn;
-	/* Last used PREQ ID */
-	u32 preq_id;
-	atomic_t mpaths;
-	/* Timestamp of last DSN update */
-	unsigned long last_dsn_update;
-	/* Timestamp of last DSN sent */
-	unsigned long last_preq;
-	struct mesh_rmc *rmc;
-	spinlock_t mesh_preq_queue_lock;
-	struct mesh_preq_queue preq_queue;
-	int preq_queue_len;
-	struct mesh_stats mshstats;
-	struct mesh_config mshcfg;
-	u32 mesh_seqnum;
-	bool accepting_plinks;
-#endif
 	u16 aid;
 	u16 ap_capab, capab;
 	u8 *extra_ie; /* to be added to the end of AssocReq */
@@ -387,20 +358,47 @@ struct ieee80211_if_sta {
 	int num_beacons; /* number of TXed beacon frames by this STA */
 };
 
-static inline void ieee80211_if_sta_set_mesh_id(struct ieee80211_if_sta *ifsta,
-						u8 mesh_id_len, u8 *mesh_id)
-{
-#ifdef CONFIG_MAC80211_MESH
-	ifsta->mesh_id_len = mesh_id_len;
-	memcpy(ifsta->mesh_id, mesh_id, mesh_id_len);
-#endif
-}
+struct ieee80211_if_mesh {
+	struct work_struct work;
+	struct timer_list housekeeping_timer;
+	struct timer_list mesh_path_timer;
+	struct sk_buff_head skb_queue;
+
+	bool housekeeping;
+
+	u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
+	size_t mesh_id_len;
+	/* Active Path Selection Protocol Identifier */
+	u8 mesh_pp_id[4];
+	/* Active Path Selection Metric Identifier */
+	u8 mesh_pm_id[4];
+	/* Congestion Control Mode Identifier */
+	u8 mesh_cc_id[4];
+	/* Local mesh Destination Sequence Number */
+	u32 dsn;
+	/* Last used PREQ ID */
+	u32 preq_id;
+	atomic_t mpaths;
+	/* Timestamp of last DSN update */
+	unsigned long last_dsn_update;
+	/* Timestamp of last DSN sent */
+	unsigned long last_preq;
+	struct mesh_rmc *rmc;
+	spinlock_t mesh_preq_queue_lock;
+	struct mesh_preq_queue preq_queue;
+	int preq_queue_len;
+	struct mesh_stats mshstats;
+	struct mesh_config mshcfg;
+	u32 mesh_seqnum;
+	bool accepting_plinks;
+	int num_beacons;
+};
 
 #ifdef CONFIG_MAC80211_MESH
-#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name)	\
-	do { (sta)->mshstats.name++; } while (0)
+#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name)	\
+	do { (msh)->mshstats.name++; } while (0)
 #else
-#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name) \
+#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \
 	do { } while (0)
 #endif
 
@@ -455,6 +453,9 @@ struct ieee80211_sub_if_data {
 		struct ieee80211_if_wds wds;
 		struct ieee80211_if_vlan vlan;
 		struct ieee80211_if_sta sta;
+#ifdef CONFIG_MAC80211_MESH
+		struct ieee80211_if_mesh mesh;
+#endif
 		u32 mntr_flags;
 	} u;
 
@@ -548,6 +549,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
 	return container_of(p, struct ieee80211_sub_if_data, vif);
 }
 
+static inline void
+ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata,
+			    u8 mesh_id_len, u8 *mesh_id)
+{
+#ifdef CONFIG_MAC80211_MESH
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	ifmsh->mesh_id_len = mesh_id_len;
+	memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len);
+#else
+	WARN_ON(1);
+#endif
+}
+
 enum {
 	IEEE80211_RX_MSG	= 1,
 	IEEE80211_TX_STATUS_MSG	= 2,
@@ -935,13 +949,6 @@ ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_sta_bss *bss);
 
-#ifdef CONFIG_MAC80211_MESH
-void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
-#else
-static inline void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
-{}
-#endif
-
 /* interface handling */
 void ieee80211_if_setup(struct net_device *dev);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 672cec60a2f..ddbaa417e2e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,10 +54,9 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
-		/* Allow compiler to elide mesh_rmc_free call. */
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			mesh_rmc_free(sdata);
-		/* fall through */
+		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		kfree(sdata->u.sta.extra_ie);
@@ -100,7 +99,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
 		INIT_LIST_HEAD(&sdata->u.ap.vlans);
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		ifsta = &sdata->u.sta;
@@ -117,7 +115,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 			IEEE80211_STA_AUTO_CHANNEL_SEL;
 		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
 			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
-
+		break;
+	case IEEE80211_IF_TYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			ieee80211_mesh_init_sdata(sdata);
 		break;
@@ -225,9 +224,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) &&
 	    params && params->mesh_id_len)
-		ieee80211_if_sta_set_mesh_id(&sdata->u.sta,
-					     params->mesh_id_len,
-					     params->mesh_id);
+		ieee80211_sdata_set_mesh_id(sdata,
+					    params->mesh_id_len,
+					    params->mesh_id);
 
 	list_add_tail_rcu(&sdata->list, &local->interfaces);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6a7f4fae18c..522fe617648 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -252,6 +252,8 @@ static int ieee80211_open(struct net_device *dev)
 		sdata->bss = &sdata->u.ap;
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
+		if (!ieee80211_vif_is_mesh(&sdata->vif))
+			break;
 		/* mesh ifaces must set allmulti to forward mcast traffic */
 		atomic_inc(&local->iff_allmultis);
 		break;
@@ -540,10 +542,6 @@ static int ieee80211_stop(struct net_device *dev)
 		ieee80211_configure_filter(local);
 		netif_addr_unlock_bh(local->mdev);
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
-		/* allmulti is always set on mesh ifaces */
-		atomic_dec(&local->iff_allmultis);
-		/* fall through */
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
 		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
@@ -571,6 +569,13 @@ static int ieee80211_stop(struct net_device *dev)
 		sdata->u.sta.extra_ie = NULL;
 		sdata->u.sta.extra_ie_len = 0;
 		/* fall through */
+	case IEEE80211_IF_TYPE_MESH_POINT:
+		if (ieee80211_vif_is_mesh(&sdata->vif)) {
+			/* allmulti is always set on mesh ifaces */
+			atomic_dec(&local->iff_allmultis);
+			ieee80211_stop_mesh(sdata);
+		}
+		/* fall through */
 	default:
 		conf.vif = &sdata->vif;
 		conf.type = sdata->vif.type;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 3ccb3599c04..9e47725cc59 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -12,6 +12,9 @@
 #include "ieee80211_i.h"
 #include "mesh.h"
 
+#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
+#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
+
 #define PP_OFFSET 	1		/* Path Selection Protocol */
 #define PM_OFFSET	5		/* Path Selection Metric   */
 #define CC_OFFSET	9		/* Congestion Control Mode */
@@ -35,6 +38,16 @@ void ieee80211s_stop(void)
 	kmem_cache_destroy(rm_cache);
 }
 
+static void ieee80211_mesh_housekeeping_timer(unsigned long data)
+{
+	struct ieee80211_sub_if_data *sdata = (void *) data;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	ifmsh->housekeeping = true;
+	queue_work(local->hw.workqueue, &ifmsh->work);
+}
+
 /**
  * mesh_matches_local - check if the config of a mesh point matches ours
  *
@@ -46,7 +59,7 @@ void ieee80211s_stop(void)
  */
 bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_if_sta *sta = &sdata->u.sta;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 
 	/*
 	 * As support for each feature is added, check for matching
@@ -58,11 +71,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
 	 *   - MDA enabled
 	 * - Power management control on fc
 	 */
-	if (sta->mesh_id_len == ie->mesh_id_len &&
-		memcmp(sta->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 &&
-		memcmp(sta->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 &&
-		memcmp(sta->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 &&
-		memcmp(sta->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0)
+	if (ifmsh->mesh_id_len == ie->mesh_id_len &&
+		memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 &&
+		memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 &&
+		memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 &&
+		memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0)
 		return true;
 
 	return false;
@@ -95,11 +108,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 	 */
 	free_plinks = mesh_plink_availables(sdata);
 
-	if (free_plinks != sdata->u.sta.accepting_plinks)
-		ieee80211_sta_timer((unsigned long) sdata);
+	if (free_plinks != sdata->u.mesh.accepting_plinks)
+		ieee80211_mesh_housekeeping_timer((unsigned long) sdata);
 }
 
-void mesh_ids_set_default(struct ieee80211_if_sta *sta)
+void mesh_ids_set_default(struct ieee80211_if_mesh *sta)
 {
 	u8 def_id[4] = {0x00, 0x0F, 0xAC, 0xff};
 
@@ -112,22 +125,22 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
 {
 	int i;
 
-	sdata->u.sta.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL);
-	if (!sdata->u.sta.rmc)
+	sdata->u.mesh.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL);
+	if (!sdata->u.mesh.rmc)
 		return -ENOMEM;
-	sdata->u.sta.rmc->idx_mask = RMC_BUCKETS - 1;
+	sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1;
 	for (i = 0; i < RMC_BUCKETS; i++)
-		INIT_LIST_HEAD(&sdata->u.sta.rmc->bucket[i].list);
+		INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i].list);
 	return 0;
 }
 
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 {
-	struct mesh_rmc *rmc = sdata->u.sta.rmc;
+	struct mesh_rmc *rmc = sdata->u.mesh.rmc;
 	struct rmc_entry *p, *n;
 	int i;
 
-	if (!sdata->u.sta.rmc)
+	if (!sdata->u.mesh.rmc)
 		return;
 
 	for (i = 0; i < RMC_BUCKETS; i++)
@@ -137,7 +150,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 		}
 
 	kfree(rmc);
-	sdata->u.sta.rmc = NULL;
+	sdata->u.mesh.rmc = NULL;
 }
 
 /**
@@ -155,7 +168,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata)
 int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr,
 		   struct ieee80211_sub_if_data *sdata)
 {
-	struct mesh_rmc *rmc = sdata->u.sta.rmc;
+	struct mesh_rmc *rmc = sdata->u.mesh.rmc;
 	u32 seqnum = 0;
 	int entries = 0;
 	u8 idx;
@@ -217,11 +230,11 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 		}
 	}
 
-	pos = skb_put(skb, 2 + sdata->u.sta.mesh_id_len);
+	pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len);
 	*pos++ = WLAN_EID_MESH_ID;
-	*pos++ = sdata->u.sta.mesh_id_len;
-	if (sdata->u.sta.mesh_id_len)
-		memcpy(pos, sdata->u.sta.mesh_id, sdata->u.sta.mesh_id_len);
+	*pos++ = sdata->u.mesh.mesh_id_len;
+	if (sdata->u.mesh.mesh_id_len)
+		memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len);
 
 	pos = skb_put(skb, 21);
 	*pos++ = WLAN_EID_MESH_CONFIG;
@@ -230,15 +243,15 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos++ = 1;
 
 	/* Active path selection protocol ID */
-	memcpy(pos, sdata->u.sta.mesh_pp_id, 4);
+	memcpy(pos, sdata->u.mesh.mesh_pp_id, 4);
 	pos += 4;
 
 	/* Active path selection metric ID   */
-	memcpy(pos, sdata->u.sta.mesh_pm_id, 4);
+	memcpy(pos, sdata->u.mesh.mesh_pm_id, 4);
 	pos += 4;
 
 	/* Congestion control mode identifier */
-	memcpy(pos, sdata->u.sta.mesh_cc_id, 4);
+	memcpy(pos, sdata->u.mesh.mesh_cc_id, 4);
 	pos += 4;
 
 	/* Channel precedence:
@@ -248,8 +261,8 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	pos += 4;
 
 	/* Mesh capability */
-	sdata->u.sta.accepting_plinks = mesh_plink_availables(sdata);
-	*pos++ = sdata->u.sta.accepting_plinks ? ACCEPT_PLINKS : 0x00;
+	sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata);
+	*pos++ = sdata->u.mesh.accepting_plinks ? ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
 
 	return;
@@ -337,10 +350,10 @@ static void ieee80211_mesh_path_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = wdev_priv(&sdata->wdev);
 
-	queue_work(local->hw.workqueue, &ifsta->work);
+	queue_work(local->hw.workqueue, &ifmsh->work);
 }
 
 struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
@@ -392,50 +405,255 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
 		struct ieee80211_sub_if_data *sdata)
 {
 	meshhdr->flags = 0;
-	meshhdr->ttl = sdata->u.sta.mshcfg.dot11MeshTTL;
-	put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum);
-	sdata->u.sta.mesh_seqnum++;
+	meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
+	put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum);
+	sdata->u.mesh.mesh_seqnum++;
 
 	return 6;
 }
 
+static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
+			   struct ieee80211_if_mesh *ifmsh)
+{
+	bool free_plinks;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	printk(KERN_DEBUG "%s: running mesh housekeeping\n",
+	       sdata->dev->name);
+#endif
+
+	ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
+	mesh_path_expire(sdata);
+
+	free_plinks = mesh_plink_availables(sdata);
+	if (free_plinks != sdata->u.mesh.accepting_plinks)
+		ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
+
+	ifmsh->housekeeping = false;
+	mod_timer(&ifmsh->housekeeping_timer,
+		  round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL));
+}
+
+
+void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct ieee80211_local *local = sdata->local;
+
+	ifmsh->housekeeping = true;
+	queue_work(local->hw.workqueue, &ifmsh->work);
+	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
+}
+
+void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
+{
+	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
+	/*
+	 * When we get here, the interface is marked down.
+	 * Call synchronize_rcu() to wait for the RX path
+	 * should it be using the interface and enqueuing
+	 * frames at this very time on another CPU.
+	 */
+	synchronize_rcu();
+	skb_queue_purge(&sdata->u.mesh.skb_queue);
+}
+
+static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+					u16 stype,
+					struct ieee80211_mgmt *mgmt,
+					size_t len,
+					struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_local *local= sdata->local;
+	struct ieee802_11_elems elems;
+	struct ieee80211_channel *channel;
+	u64 supp_rates = 0;
+	size_t baselen;
+	int freq;
+	enum ieee80211_band band = rx_status->band;
+
+	/* ignore ProbeResp to foreign address */
+	if (stype == IEEE80211_STYPE_PROBE_RESP &&
+	    compare_ether_addr(mgmt->da, sdata->dev->dev_addr))
+		return;
+
+	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
+	if (baselen > len)
+		return;
+
+	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
+			       &elems);
+
+	if (elems.ds_params && elems.ds_params_len == 1)
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+	else
+		freq = rx_status->freq;
+
+	channel = ieee80211_get_channel(local->hw.wiphy, freq);
+
+	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+		return;
+
+	if (elems.mesh_id && elems.mesh_config &&
+	    mesh_matches_local(&elems, sdata)) {
+		supp_rates = ieee80211_sta_get_rates(local, &elems, band);
+
+		mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
+				      mesh_peer_accepts_plinks(&elems));
+	}
+}
+
+static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
+					  struct ieee80211_mgmt *mgmt,
+					  size_t len,
+					  struct ieee80211_rx_status *rx_status)
+{
+	switch (mgmt->u.action.category) {
+	case PLINK_CATEGORY:
+		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
+		break;
+	case MESH_PATH_SEL_CATEGORY:
+		mesh_rx_path_sel_frame(sdata, mgmt, len);
+		break;
+	}
+}
+
+static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+					  struct sk_buff *skb)
+{
+	struct ieee80211_rx_status *rx_status;
+	struct ieee80211_if_mesh *ifmsh;
+	struct ieee80211_mgmt *mgmt;
+	u16 stype;
+
+	ifmsh = &sdata->u.mesh;
+
+	rx_status = (struct ieee80211_rx_status *) skb->cb;
+	mgmt = (struct ieee80211_mgmt *) skb->data;
+	stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
+
+	switch (stype) {
+	case IEEE80211_STYPE_PROBE_RESP:
+	case IEEE80211_STYPE_BEACON:
+		ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len,
+					    rx_status);
+		break;
+	case IEEE80211_STYPE_ACTION:
+		ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
+		break;
+	}
+
+	kfree_skb(skb);
+}
+
+static void ieee80211_mesh_work(struct work_struct *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data, u.mesh.work);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct sk_buff *skb;
+
+	if (!netif_running(sdata->dev))
+		return;
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning)
+		return;
+
+	while ((skb = skb_dequeue(&ifmsh->skb_queue)))
+		ieee80211_mesh_rx_queued_mgmt(sdata, skb);
+
+	if (ifmsh->preq_queue_len &&
+	    time_after(jiffies,
+		       ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval)))
+		mesh_path_start_discovery(sdata);
+
+	if (ifmsh->housekeeping)
+		ieee80211_mesh_housekeeping(sdata, ifmsh);
+}
+
+void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list)
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			queue_work(local->hw.workqueue, &sdata->u.mesh.work);
+	rcu_read_unlock();
+}
+
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-
-	ifsta->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
-	ifsta->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
-	ifsta->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
-	ifsta->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
-	ifsta->mshcfg.dot11MeshTTL = MESH_TTL;
-	ifsta->mshcfg.auto_open_plinks = true;
-	ifsta->mshcfg.dot11MeshMaxPeerLinks =
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	INIT_WORK(&ifmsh->work, ieee80211_mesh_work);
+	setup_timer(&ifmsh->housekeeping_timer,
+		    ieee80211_mesh_housekeeping_timer,
+		    (unsigned long) sdata);
+	skb_queue_head_init(&sdata->u.mesh.skb_queue);
+
+	ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
+	ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
+	ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
+	ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
+	ifmsh->mshcfg.dot11MeshTTL = MESH_TTL;
+	ifmsh->mshcfg.auto_open_plinks = true;
+	ifmsh->mshcfg.dot11MeshMaxPeerLinks =
 		MESH_MAX_ESTAB_PLINKS;
-	ifsta->mshcfg.dot11MeshHWMPactivePathTimeout =
+	ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout =
 		MESH_PATH_TIMEOUT;
-	ifsta->mshcfg.dot11MeshHWMPpreqMinInterval =
+	ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval =
 		MESH_PREQ_MIN_INT;
-	ifsta->mshcfg.dot11MeshHWMPnetDiameterTraversalTime =
+	ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime =
 		MESH_DIAM_TRAVERSAL_TIME;
-	ifsta->mshcfg.dot11MeshHWMPmaxPREQretries =
+	ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries =
 		MESH_MAX_PREQ_RETRIES;
-	ifsta->mshcfg.path_refresh_time =
+	ifmsh->mshcfg.path_refresh_time =
 		MESH_PATH_REFRESH_TIME;
-	ifsta->mshcfg.min_discovery_timeout =
+	ifmsh->mshcfg.min_discovery_timeout =
 		MESH_MIN_DISCOVERY_TIMEOUT;
-	ifsta->accepting_plinks = true;
-	ifsta->preq_id = 0;
-	ifsta->dsn = 0;
-	atomic_set(&ifsta->mpaths, 0);
+	ifmsh->accepting_plinks = true;
+	ifmsh->preq_id = 0;
+	ifmsh->dsn = 0;
+	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
-	ifsta->last_preq = jiffies;
+	ifmsh->last_preq = jiffies;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
-	mesh_ids_set_default(ifsta);
-	setup_timer(&ifsta->mesh_path_timer,
+	mesh_ids_set_default(ifmsh);
+	setup_timer(&ifmsh->mesh_path_timer,
 		    ieee80211_mesh_path_timer,
 		    (unsigned long) sdata);
-	INIT_LIST_HEAD(&ifsta->preq_queue.list);
-	spin_lock_init(&ifsta->mesh_preq_queue_lock);
+	INIT_LIST_HEAD(&ifmsh->preq_queue.list);
+	spin_lock_init(&ifmsh->mesh_preq_queue_lock);
+}
+
+ieee80211_rx_result
+ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		       struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct ieee80211_mgmt *mgmt;
+	u16 fc;
+
+	if (skb->len < 24)
+		return RX_DROP_MONITOR;
+
+	mgmt = (struct ieee80211_mgmt *) skb->data;
+	fc = le16_to_cpu(mgmt->frame_control);
+
+	switch (fc & IEEE80211_FCTL_STYPE) {
+	case IEEE80211_STYPE_PROBE_RESP:
+	case IEEE80211_STYPE_BEACON:
+	case IEEE80211_STYPE_ACTION:
+		memcpy(skb->cb, rx_status, sizeof(*rx_status));
+		skb_queue_tail(&ifmsh->skb_queue, skb);
+		queue_work(local->hw.workqueue, &ifmsh->work);
+		return RX_QUEUED;
+	}
+
+	return RX_CONTINUE;
 }
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 84ff5d828fd..8ee414a0447 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -206,7 +206,7 @@ int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
 		struct ieee80211_sub_if_data *sdata);
 bool mesh_matches_local(struct ieee802_11_elems *ie,
 		struct ieee80211_sub_if_data *sdata);
-void mesh_ids_set_default(struct ieee80211_if_sta *sta);
+void mesh_ids_set_default(struct ieee80211_if_mesh *mesh);
 void mesh_mgmt_ies_add(struct sk_buff *skb,
 		struct ieee80211_sub_if_data *sdata);
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
@@ -214,6 +214,11 @@ int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
 void ieee80211s_stop(void);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
+ieee80211_rx_result
+ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		       struct ieee80211_rx_status *rx_status);
+void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
+void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 
 /* Mesh paths */
 int mesh_nexthop_lookup(struct sk_buff *skb,
@@ -269,8 +274,8 @@ extern int mesh_allocated;
 
 static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata)
 {
-	return sdata->u.sta.mshcfg.dot11MeshMaxPeerLinks -
-	       atomic_read(&sdata->u.sta.mshstats.estab_plinks);
+	return sdata->u.mesh.mshcfg.dot11MeshMaxPeerLinks -
+	       atomic_read(&sdata->u.mesh.mshstats.estab_plinks);
 }
 
 static inline bool mesh_plink_availables(struct ieee80211_sub_if_data *sdata)
@@ -288,8 +293,12 @@ static inline void mesh_path_activate(struct mesh_path *mpath)
 	for (i = 0; i <= x->hash_mask; i++) \
 		hlist_for_each_entry_rcu(node, p, &x->hash_buckets[i], list)
 
+void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
+
 #else
 #define mesh_allocated	0
+static inline void
+ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
 #endif
 
 #endif /* IEEE80211S_H */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 210d6b85240..1fad792ad25 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -64,14 +64,14 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
 #define DSN_LT(x, y) ((long) (x) - (long) (y) < 0)
 
 #define net_traversal_jiffies(s) \
-	msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime)
+	msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime)
 #define default_lifetime(s) \
-	MSEC_TO_TU(s->u.sta.mshcfg.dot11MeshHWMPactivePathTimeout)
+	MSEC_TO_TU(s->u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout)
 #define min_preq_int_jiff(s) \
-	(msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPpreqMinInterval))
-#define max_preq_retries(s) (s->u.sta.mshcfg.dot11MeshHWMPmaxPREQretries)
+	(msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval))
+#define max_preq_retries(s) (s->u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries)
 #define disc_timeout_jiff(s) \
-	msecs_to_jiffies(sdata->u.sta.mshcfg.min_discovery_timeout)
+	msecs_to_jiffies(sdata->u.mesh.mshcfg.min_discovery_timeout)
 
 enum mpath_frame_type {
 	MPATH_PREQ = 0,
@@ -395,7 +395,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
 				    u8 *preq_elem, u32 metric) {
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
 	u8 *dst_addr, *orig_addr;
 	u8 dst_flags, ttl;
@@ -414,11 +414,11 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		forward = false;
 		reply = true;
 		metric = 0;
-		if (time_after(jiffies, ifsta->last_dsn_update +
+		if (time_after(jiffies, ifmsh->last_dsn_update +
 					net_traversal_jiffies(sdata)) ||
-		    time_before(jiffies, ifsta->last_dsn_update)) {
-			dst_dsn = ++ifsta->dsn;
-			ifsta->last_dsn_update = jiffies;
+		    time_before(jiffies, ifmsh->last_dsn_update)) {
+			dst_dsn = ++ifmsh->dsn;
+			ifmsh->last_dsn_update = jiffies;
 		}
 	} else {
 		rcu_read_lock();
@@ -444,7 +444,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	if (reply) {
 		lifetime = PREQ_IE_LIFETIME(preq_elem);
-		ttl = ifsta->mshcfg.dot11MeshTTL;
+		ttl = ifmsh->mshcfg.dot11MeshTTL;
 		if (ttl != 0)
 			mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr,
 				cpu_to_le32(dst_dsn), 0, orig_addr,
@@ -452,7 +452,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				cpu_to_le32(lifetime), cpu_to_le32(metric),
 				0, sdata);
 		else
-			ifsta->mshstats.dropped_frames_ttl++;
+			ifmsh->mshstats.dropped_frames_ttl++;
 	}
 
 	if (forward) {
@@ -462,7 +462,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		ttl = PREQ_IE_TTL(preq_elem);
 		lifetime = PREQ_IE_LIFETIME(preq_elem);
 		if (ttl <= 1) {
-			ifsta->mshstats.dropped_frames_ttl++;
+			ifmsh->mshstats.dropped_frames_ttl++;
 			return;
 		}
 		--ttl;
@@ -475,7 +475,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				hopcount, ttl, cpu_to_le32(lifetime),
 				cpu_to_le32(metric), cpu_to_le32(preq_id),
 				sdata);
-		ifsta->mshstats.fwded_frames++;
+		ifmsh->mshstats.fwded_frames++;
 	}
 }
 
@@ -503,7 +503,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	ttl = PREP_IE_TTL(prep_elem);
 	if (ttl <= 1) {
-		sdata->u.sta.mshstats.dropped_frames_ttl++;
+		sdata->u.mesh.mshstats.dropped_frames_ttl++;
 		return;
 	}
 
@@ -533,12 +533,12 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 		cpu_to_le32(lifetime), cpu_to_le32(metric),
 		0, sdata);
 	rcu_read_unlock();
-	sdata->u.sta.mshstats.fwded_frames++;
+	sdata->u.mesh.mshstats.fwded_frames++;
 	return;
 
 fail:
 	rcu_read_unlock();
-	sdata->u.sta.mshstats.dropped_frames_no_route++;
+	sdata->u.mesh.mshstats.dropped_frames_no_route++;
 	return;
 }
 
@@ -631,7 +631,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 {
 	struct ieee80211_sub_if_data *sdata = mpath->sdata;
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_preq_queue *preq_node;
 
 	preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_KERNEL);
@@ -640,9 +640,9 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 		return;
 	}
 
-	spin_lock(&ifsta->mesh_preq_queue_lock);
-	if (ifsta->preq_queue_len == MAX_PREQ_QUEUE_LEN) {
-		spin_unlock(&ifsta->mesh_preq_queue_lock);
+	spin_lock(&ifmsh->mesh_preq_queue_lock);
+	if (ifmsh->preq_queue_len == MAX_PREQ_QUEUE_LEN) {
+		spin_unlock(&ifmsh->mesh_preq_queue_lock);
 		kfree(preq_node);
 		if (printk_ratelimit())
 			printk(KERN_DEBUG "Mesh HWMP: PREQ node queue full\n");
@@ -652,21 +652,21 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 	memcpy(preq_node->dst, mpath->dst, ETH_ALEN);
 	preq_node->flags = flags;
 
-	list_add_tail(&preq_node->list, &ifsta->preq_queue.list);
-	++ifsta->preq_queue_len;
-	spin_unlock(&ifsta->mesh_preq_queue_lock);
+	list_add_tail(&preq_node->list, &ifmsh->preq_queue.list);
+	++ifmsh->preq_queue_len;
+	spin_unlock(&ifmsh->mesh_preq_queue_lock);
 
-	if (time_after(jiffies, ifsta->last_preq + min_preq_int_jiff(sdata)))
-		queue_work(sdata->local->hw.workqueue, &ifsta->work);
+	if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata)))
+		queue_work(sdata->local->hw.workqueue, &ifmsh->work);
 
-	else if (time_before(jiffies, ifsta->last_preq)) {
+	else if (time_before(jiffies, ifmsh->last_preq)) {
 		/* avoid long wait if did not send preqs for a long time
 		 * and jiffies wrapped around
 		 */
-		ifsta->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
-		queue_work(sdata->local->hw.workqueue, &ifsta->work);
+		ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
+		queue_work(sdata->local->hw.workqueue, &ifmsh->work);
 	} else
-		mod_timer(&ifsta->mesh_path_timer, ifsta->last_preq +
+		mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq +
 						min_preq_int_jiff(sdata));
 }
 
@@ -677,25 +677,25 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
  */
 void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_preq_queue *preq_node;
 	struct mesh_path *mpath;
 	u8 ttl, dst_flags;
 	u32 lifetime;
 
-	spin_lock(&ifsta->mesh_preq_queue_lock);
-	if (!ifsta->preq_queue_len ||
-		time_before(jiffies, ifsta->last_preq +
+	spin_lock(&ifmsh->mesh_preq_queue_lock);
+	if (!ifmsh->preq_queue_len ||
+		time_before(jiffies, ifmsh->last_preq +
 				min_preq_int_jiff(sdata))) {
-		spin_unlock(&ifsta->mesh_preq_queue_lock);
+		spin_unlock(&ifmsh->mesh_preq_queue_lock);
 		return;
 	}
 
-	preq_node = list_first_entry(&ifsta->preq_queue.list,
+	preq_node = list_first_entry(&ifmsh->preq_queue.list,
 			struct mesh_preq_queue, list);
 	list_del(&preq_node->list);
-	--ifsta->preq_queue_len;
-	spin_unlock(&ifsta->mesh_preq_queue_lock);
+	--ifmsh->preq_queue_len;
+	spin_unlock(&ifmsh->mesh_preq_queue_lock);
 
 	rcu_read_lock();
 	mpath = mesh_path_lookup(preq_node->dst, sdata);
@@ -720,18 +720,18 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 		goto enddiscovery;
 	}
 
-	ifsta->last_preq = jiffies;
+	ifmsh->last_preq = jiffies;
 
-	if (time_after(jiffies, ifsta->last_dsn_update +
+	if (time_after(jiffies, ifmsh->last_dsn_update +
 				net_traversal_jiffies(sdata)) ||
-	    time_before(jiffies, ifsta->last_dsn_update)) {
-		++ifsta->dsn;
-		sdata->u.sta.last_dsn_update = jiffies;
+	    time_before(jiffies, ifmsh->last_dsn_update)) {
+		++ifmsh->dsn;
+		sdata->u.mesh.last_dsn_update = jiffies;
 	}
 	lifetime = default_lifetime(sdata);
-	ttl = sdata->u.sta.mshcfg.dot11MeshTTL;
+	ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
 	if (ttl == 0) {
-		sdata->u.sta.mshstats.dropped_frames_ttl++;
+		sdata->u.mesh.mshstats.dropped_frames_ttl++;
 		spin_unlock_bh(&mpath->state_lock);
 		goto enddiscovery;
 	}
@@ -743,10 +743,10 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 
 	spin_unlock_bh(&mpath->state_lock);
 	mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr,
-			cpu_to_le32(ifsta->dsn), dst_flags, mpath->dst,
+			cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst,
 			cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0,
 			ttl, cpu_to_le32(lifetime), 0,
-			cpu_to_le32(ifsta->preq_id++), sdata);
+			cpu_to_le32(ifmsh->preq_id++), sdata);
 	mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
 
 enddiscovery:
@@ -783,7 +783,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 		mpath = mesh_path_lookup(dst_addr, sdata);
 		if (!mpath) {
 			dev_kfree_skb(skb);
-			sdata->u.sta.mshstats.dropped_frames_no_route++;
+			sdata->u.mesh.mshstats.dropped_frames_no_route++;
 			err = -ENOSPC;
 			goto endlookup;
 		}
@@ -791,7 +791,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 
 	if (mpath->flags & MESH_PATH_ACTIVE) {
 		if (time_after(jiffies, mpath->exp_time -
-			msecs_to_jiffies(sdata->u.sta.mshcfg.path_refresh_time))
+			msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time))
 				&& !memcmp(sdata->dev->dev_addr, hdr->addr4,
 					   ETH_ALEN)
 				&& !(mpath->flags & MESH_PATH_RESOLVING)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 0a60f55f32a..e4fa2905fad 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -153,7 +153,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	if (is_multicast_ether_addr(dst))
 		return -ENOTSUPP;
 
-	if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0)
+	if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
 		return -ENOSPC;
 
 	err = -ENOMEM;
@@ -221,7 +221,7 @@ err_exists:
 err_node_alloc:
 	kfree(new_mpath);
 err_path_alloc:
-	atomic_dec(&sdata->u.sta.mpaths);
+	atomic_dec(&sdata->u.mesh.mpaths);
 	return err;
 }
 
@@ -306,7 +306,7 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
 	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
 
 	del_timer_sync(&node->mpath->timer);
-	atomic_dec(&sdata->u.sta.mpaths);
+	atomic_dec(&sdata->u.mesh.mpaths);
 	kfree(node->mpath);
 	kfree(node);
 }
@@ -401,7 +401,7 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 	}
 
 	kfree_skb(skb);
-	sdata->u.sta.mshstats.dropped_frames_no_route++;
+	sdata->u.mesh.mshstats.dropped_frames_no_route++;
 }
 
 /**
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 7356462dee9..990a4b7f6bc 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -36,11 +36,11 @@
 #define MESH_SECURITY_AUTHENTICATION_IMPOSSIBLE	9
 #define MESH_SECURITY_FAILED_VERIFICATION	10
 
-#define dot11MeshMaxRetries(s) (s->u.sta.mshcfg.dot11MeshMaxRetries)
-#define dot11MeshRetryTimeout(s) (s->u.sta.mshcfg.dot11MeshRetryTimeout)
-#define dot11MeshConfirmTimeout(s) (s->u.sta.mshcfg.dot11MeshConfirmTimeout)
-#define dot11MeshHoldingTimeout(s) (s->u.sta.mshcfg.dot11MeshHoldingTimeout)
-#define dot11MeshMaxPeerLinks(s) (s->u.sta.mshcfg.dot11MeshMaxPeerLinks)
+#define dot11MeshMaxRetries(s) (s->u.mesh.mshcfg.dot11MeshMaxRetries)
+#define dot11MeshRetryTimeout(s) (s->u.mesh.mshcfg.dot11MeshRetryTimeout)
+#define dot11MeshConfirmTimeout(s) (s->u.mesh.mshcfg.dot11MeshConfirmTimeout)
+#define dot11MeshHoldingTimeout(s) (s->u.mesh.mshcfg.dot11MeshHoldingTimeout)
+#define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks)
 
 enum plink_frame_type {
 	PLINK_OPEN = 0,
@@ -63,14 +63,14 @@ enum plink_event {
 static inline
 void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
 {
-	atomic_inc(&sdata->u.sta.mshstats.estab_plinks);
+	atomic_inc(&sdata->u.mesh.mshstats.estab_plinks);
 	mesh_accept_plinks_update(sdata);
 }
 
 static inline
 void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
 {
-	atomic_dec(&sdata->u.sta.mshstats.estab_plinks);
+	atomic_dec(&sdata->u.mesh.mshstats.estab_plinks);
 	mesh_accept_plinks_update(sdata);
 }
 
@@ -245,8 +245,8 @@ void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data
 	sta->last_rx = jiffies;
 	sta->supp_rates[local->hw.conf.channel->band] = rates;
 	if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN &&
-			sdata->u.sta.accepting_plinks &&
-			sdata->u.sta.mshcfg.auto_open_plinks)
+			sdata->u.mesh.accepting_plinks &&
+			sdata->u.mesh.mshcfg.auto_open_plinks)
 		mesh_plink_open(sta);
 
 	rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ef73f895372..9e20a0c20a4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -22,11 +22,11 @@
 #include <linux/rtnetlink.h>
 #include <net/iw_handler.h>
 #include <net/mac80211.h>
+#include <asm/unaligned.h>
 
 #include "ieee80211_i.h"
 #include "rate.h"
 #include "led.h"
-#include "mesh.h"
 
 #define IEEE80211_ASSOC_SCANS_MAX_TRIES 2
 #define IEEE80211_AUTH_TIMEOUT (HZ / 5)
@@ -34,7 +34,6 @@
 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
 #define IEEE80211_ASSOC_MAX_TRIES 3
 #define IEEE80211_MONITORING_INTERVAL (2 * HZ)
-#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
 #define IEEE80211_PROBE_INTERVAL (60 * HZ)
 #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
 #define IEEE80211_SCAN_INTERVAL (2 * HZ)
@@ -43,7 +42,6 @@
 
 #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
-#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
 
 #define IEEE80211_IBSS_MAX_STA_ENTRIES 128
 
@@ -1508,14 +1506,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
 		return;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id &&
-	    elems->mesh_config && mesh_matches_local(elems, sdata)) {
-		supp_rates = ieee80211_sta_get_rates(local, elems, band);
-
-		mesh_neighbour_update(mgmt->sa, supp_rates, sdata,
-				      mesh_peer_accepts_plinks(elems));
-	}
-
 	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
 		supp_rates = ieee80211_sta_get_rates(local, elems, band);
@@ -1785,26 +1775,6 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tx_skb(sdata, skb, 0);
 }
 
-static void ieee80211_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta,
-				     struct ieee80211_mgmt *mgmt,
-				     size_t len,
-				     struct ieee80211_rx_status *rx_status)
-{
-	/* currently we only handle mesh interface action frames here */
-	if (!ieee80211_vif_is_mesh(&sdata->vif))
-		return;
-
-	switch (mgmt->u.action.category) {
-	case PLINK_CATEGORY:
-		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
-		break;
-	case MESH_PATH_SEL_CATEGORY:
-		mesh_rx_path_sel_frame(sdata, mgmt, len);
-		break;
-	}
-}
-
 void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status)
 {
@@ -1825,7 +1795,6 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
-	case IEEE80211_STYPE_ACTION:
 		memcpy(skb->cb, rx_status, sizeof(*rx_status));
 	case IEEE80211_STYPE_AUTH:
 	case IEEE80211_STYPE_ASSOC_RESP:
@@ -1881,9 +1850,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_STYPE_DISASSOC:
 		ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len);
 		break;
-	case IEEE80211_STYPE_ACTION:
-		ieee80211_rx_mgmt_action(sdata, ifsta, mgmt, skb->len, rx_status);
-		break;
 	}
 
 	kfree_skb(skb);
@@ -1928,35 +1894,6 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 }
 
 
-#ifdef CONFIG_MAC80211_MESH
-static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
-			   struct ieee80211_if_sta *ifsta)
-{
-	bool free_plinks;
-
-	ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
-	mesh_path_expire(sdata);
-
-	free_plinks = mesh_plink_availables(sdata);
-	if (free_plinks != sdata->u.sta.accepting_plinks)
-		ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
-
-	mod_timer(&ifsta->timer, jiffies +
-			IEEE80211_MESH_HOUSEKEEPING_INTERVAL);
-}
-
-
-void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_sta *ifsta;
-	ifsta = &sdata->u.sta;
-	ifsta->state = IEEE80211_STA_MLME_MESH_UP;
-	ieee80211_sta_timer((unsigned long)sdata);
-	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
-}
-#endif
-
-
 void ieee80211_sta_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -2524,21 +2461,13 @@ void ieee80211_sta_work(struct work_struct *work)
 		return;
 
 	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
+		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS))
 		return;
 	ifsta = &sdata->u.sta;
 
 	while ((skb = skb_dequeue(&ifsta->skb_queue)))
 		ieee80211_sta_rx_queued_mgmt(sdata, skb);
 
-#ifdef CONFIG_MAC80211_MESH
-	if (ifsta->preq_queue_len &&
-	    time_after(jiffies,
-		       ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval)))
-		mesh_path_start_discovery(sdata);
-#endif
-
 	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
 	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
@@ -2575,11 +2504,6 @@ void ieee80211_sta_work(struct work_struct *work)
 	case IEEE80211_STA_MLME_IBSS_JOINED:
 		ieee80211_sta_merge_ibss(sdata, ifsta);
 		break;
-#ifdef CONFIG_MAC80211_MESH
-	case IEEE80211_STA_MLME_MESH_UP:
-		ieee80211_mesh_housekeeping(sdata, ifsta);
-		break;
-#endif
 	default:
 		WARN_ON(1);
 		break;
@@ -2596,8 +2520,7 @@ void ieee80211_sta_work(struct work_struct *work)
 
 static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 {
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    ieee80211_vif_is_mesh(&sdata->vif))
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
 		queue_work(sdata->local->hw.workqueue,
 			   &sdata->u.sta.work);
 }
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index a914ba73ccf..21e1942ea97 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -148,9 +148,7 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 				    struct ieee80211_local *local,
 				    struct sta_info *sta)
 {
-#ifdef CONFIG_MAC80211_MESH
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-#endif
 	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
 	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
 	struct ieee80211_supported_band *sband;
@@ -181,11 +179,8 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 		pf = spinfo->last_pf;
 	else {
 		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
-#ifdef CONFIG_MAC80211_MESH
-		if (pf == 100 &&
-		    sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT)
+		if (ieee80211_vif_is_mesh(&sdata->vif) && pf == 100)
 			mesh_plink_broken(sta);
-#endif
 		pf <<= RC_PID_ARITH_SHIFT;
 		sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9)
 					>> RC_PID_ARITH_SHIFT;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d0803797902..208563a27bc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1404,7 +1404,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	if (rx->flags & IEEE80211_RX_RA_MATCH) {
 		if (!mesh_hdr->ttl)
-			IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.sta,
+			IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
 						     dropped_frames_ttl);
 		else {
 			struct ieee80211_hdr *fwd_hdr;
@@ -1591,9 +1591,11 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
 
+	if (ieee80211_vif_is_mesh(&sdata->vif))
+		return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status);
+
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
 		return RX_DROP_MONITOR;
 
 	if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f8b296bed0b..f4399e9ac92 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -476,6 +476,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
  done:
 	ieee80211_mlme_notify_scan_completed(local);
+	ieee80211_mesh_notify_scan_completed(local);
 }
 EXPORT_SYMBOL(ieee80211_scan_completed);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c413d4836af..1059b17c83b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1330,7 +1330,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 				if (mesh_nexthop_lookup(skb, osdata))
 					return  0;
 			if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
-				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.sta,
+				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
 							     fwded_frames);
 		}
 	}
@@ -1483,9 +1483,9 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
 		memcpy(hdr.addr3, skb->data, ETH_ALEN);
 		memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
-		if (!sdata->u.sta.mshcfg.dot11MeshTTL) {
+		if (!sdata->u.mesh.mshcfg.dot11MeshTTL) {
 			/* Do not send frames with mesh_ttl == 0 */
-			sdata->u.sta.mshstats.dropped_frames_ttl++;
+			sdata->u.mesh.mshstats.dropped_frames_ttl++;
 			ret = 0;
 			goto fail;
 		}
@@ -1815,10 +1815,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	struct rate_selection rsel;
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
-	struct ieee80211_mgmt *mgmt;
 	int *num_beacons;
 	enum ieee80211_band band = local->hw.conf.channel->band;
-	u8 *pos;
 
 	sband = local->hw.wiphy->bands[band];
 
@@ -1885,7 +1883,11 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 						 IEEE80211_STYPE_BEACON);
 
 		num_beacons = &ifsta->num_beacons;
+#ifdef CONFIG_MAC80211_MESH
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		struct ieee80211_mgmt *mgmt;
+		u8 *pos;
+
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400);
 		if (!skb)
@@ -1910,7 +1912,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 		mesh_mgmt_ies_add(skb, sdata);
 
-		num_beacons = &sdata->u.sta.num_beacons;
+		num_beacons = &sdata->u.mesh.num_beacons;
+#endif
 	} else {
 		WARN_ON(1);
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From b7413430d4d2a6168e68231d9f93763047b6d60c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:50 +0200
Subject: mac80211: fix work race

When we stop an interface, the work on it may still be pending
or running. We do cancel the timer, but we do not currently
protect against the work struct. The race is very unlikely to
hit -- it'll happen only when the driver is using mac80211's
workqueue to run long-running tasks and the sta/mesh works are
delayed for quite a bit.

This patch fixes it by cancelling the work explicitly.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 8 ++++++++
 net/mac80211/mesh.c | 9 +++++++++
 2 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 522fe617648..ebdec7106d6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -547,6 +547,14 @@ static int ieee80211_stop(struct net_device *dev)
 		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
 		memset(sdata->u.sta.bssid, 0, ETH_ALEN);
 		del_timer_sync(&sdata->u.sta.timer);
+		/*
+		 * If the timer fired while we waited for it, it will have
+		 * requeued the work. Now the work will be running again
+		 * but will not rearm the timer again because it checks
+		 * whether the interface is running, which, at this point,
+		 * it no longer is.
+		 */
+		cancel_work_sync(&sdata->u.sta.work);
 		/*
 		 * When we get here, the interface is marked down.
 		 * Call synchronize_rcu() to wait for the RX path
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9e47725cc59..a0141f5ff18 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -448,6 +448,15 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
+	/*
+	 * If the timer fired while we waited for it, it will have
+	 * requeued the work. Now the work will be running again
+	 * but will not rearm the timer again because it checks
+	 * whether the interface is running, which, at this point,
+	 * it no longer is.
+	 */
+	cancel_work_sync(&sdata->u.mesh.work);
+
 	/*
 	 * When we get here, the interface is marked down.
 	 * Call synchronize_rcu() to wait for the RX path
-- 
cgit v1.2.3-70-g09d2


From 5bc75728fd43bb15b46f16ef465bcf9d487393cf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:51 +0200
Subject: mac80211: fix scan vs. interface removal race

When we remove an interface, we can currently end up having
a pointer to it left in local->scan_sdata after it has been
set down, and then with a hardware scan the scan completion
can try to access it which is a bug. Alternatively, a scan
that started as a hardware scan may terminate as though it
was a software scan, if the timing is just right.

On SMP systems, software scan also has a similar problem,
just canceling the delayed work and setting a flag isn't
enough since it may be running concurrently; in this case
we would also never restore state of other interfaces.

This patch hopefully fixes the problems by always invoking
ieee80211_scan_completed or requiring it to be invoked by
the driver, I suspect the drivers that have ->hw_scan() are
buggy. The bug will not manifest itself unless you remove
the interface while hw-scanning which will also turn off
the hw, and then add a new interface which will be unusable
until you scan once.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  4 +++-
 net/mac80211/main.c    | 33 +++++++++++++++++++++++++--------
 net/mac80211/mlme.c    |  2 +-
 net/mac80211/scan.c    | 38 +++++++++++++++++++++++++++-----------
 4 files changed, 56 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f504e3eca7d..d67882dd360 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1124,7 +1124,9 @@ enum ieee80211_ampdu_mlme_action {
  * @hw_scan: Ask the hardware to service the scan request, no need to start
  *	the scan state machine in stack. The scan must honour the channel
  *	configuration done by the regulatory agent in the wiphy's registered
- *	bands.
+ *	bands. When the scan finishes, ieee80211_scan_completed() must be
+ *	called; note that it also must be called when the scan cannot finish
+ *	because the hardware is turned off! Anything else is a bug!
  *
  * @get_stats: return low-level statistics
  *
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ebdec7106d6..4bfac4b41c5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -564,14 +564,6 @@ static int ieee80211_stop(struct net_device *dev)
 		synchronize_rcu();
 		skb_queue_purge(&sdata->u.sta.skb_queue);
 
-		if (local->scan_sdata == sdata) {
-			if (!local->ops->hw_scan) {
-				local->sta_sw_scanning = 0;
-				cancel_delayed_work(&local->scan_work);
-			} else
-				local->sta_hw_scanning = 0;
-		}
-
 		sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
 		kfree(sdata->u.sta.extra_ie);
 		sdata->u.sta.extra_ie = NULL;
@@ -585,6 +577,31 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 		/* fall through */
 	default:
+		if (local->scan_sdata == sdata) {
+			if (!local->ops->hw_scan)
+				cancel_delayed_work_sync(&local->scan_work);
+			/*
+			 * The software scan can no longer run now, so we can
+			 * clear out the scan_sdata reference. However, the
+			 * hardware scan may still be running. The complete
+			 * function must be prepared to handle a NULL value.
+			 */
+			local->scan_sdata = NULL;
+			/*
+			 * The memory barrier guarantees that another CPU
+			 * that is hardware-scanning will now see the fact
+			 * that this interface is gone.
+			 */
+			smp_mb();
+			/*
+			 * If software scanning, complete the scan but since
+			 * the scan_sdata is NULL already don't send out a
+			 * scan event to userspace -- the scan is incomplete.
+			 */
+			if (local->sta_sw_scanning)
+				ieee80211_scan_completed(&local->hw);
+		}
+
 		conf.vif = &sdata->vif;
 		conf.type = sdata->vif.type;
 		conf.mac_addr = dev->dev_addr;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9e20a0c20a4..19c7f21e49d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2530,7 +2530,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
 	struct ieee80211_if_sta *ifsta;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
 		ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
 		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f4399e9ac92..27727027d76 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -430,9 +430,20 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	struct ieee80211_sub_if_data *sdata;
 	union iwreq_data wrqu;
 
+	if (WARN_ON(!local->sta_hw_scanning && !local->sta_sw_scanning))
+		return;
+
 	local->last_scan_completed = jiffies;
 	memset(&wrqu, 0, sizeof(wrqu));
-	wireless_send_event(local->scan_sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
+
+	/*
+	 * local->scan_sdata could have been NULLed by the interface
+	 * down code in case we were scanning on an interface that is
+	 * being taken down.
+	 */
+	sdata = local->scan_sdata;
+	if (sdata)
+		wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
 
 	if (local->sta_hw_scanning) {
 		local->sta_hw_scanning = 0;
@@ -491,7 +502,10 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 	int skip;
 	unsigned long next_delay = 0;
 
-	if (!local->sta_sw_scanning)
+	/*
+	 * Avoid re-scheduling when the sdata is going away.
+	 */
+	if (!netif_running(sdata->dev))
 		return;
 
 	switch (local->scan_state) {
@@ -570,9 +584,8 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 		break;
 	}
 
-	if (local->sta_sw_scanning)
-		queue_delayed_work(local->hw.workqueue, &local->scan_work,
-				   next_delay);
+	queue_delayed_work(local->hw.workqueue, &local->scan_work,
+			   next_delay);
 }
 
 
@@ -609,13 +622,16 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	}
 
 	if (local->ops->hw_scan) {
-		int rc = local->ops->hw_scan(local_to_hw(local),
-					     ssid, ssid_len);
-		if (!rc) {
-			local->sta_hw_scanning = 1;
-			local->scan_sdata = scan_sdata;
+		int rc;
+
+		local->sta_hw_scanning = 1;
+		rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len);
+		if (rc) {
+			local->sta_hw_scanning = 0;
+			return rc;
 		}
-		return rc;
+		local->scan_sdata = scan_sdata;
+		return 0;
 	}
 
 	local->sta_sw_scanning = 1;
-- 
cgit v1.2.3-70-g09d2


From 9c6bd79011b14a8bfe58aad0acfb51e4dca05eed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:52 +0200
Subject: mac80211: reorder MLME code more

This way all the utility functions are at the top, then the
state machine and externally callable functions are moved to
the bottom. Also clean up ieee80211_i.h a bit and add a few
comments about which functions are called from where.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  39 +--
 net/mac80211/iface.c       |  17 +-
 net/mac80211/mlme.c        | 700 +++++++++++++++++++++++----------------------
 3 files changed, 384 insertions(+), 372 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cac0b133454..442a43a3400 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -882,54 +882,53 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 }
 
 
-/* ieee80211.c */
 int ieee80211_hw_config(struct ieee80211_local *local);
 int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
 u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 			struct ieee80211_ht_info *req_ht_cap,
 			struct ieee80211_ht_bss_info *req_bss_cap);
+void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
+				      u32 changed);
 
-/* ieee80211_ioctl.c */
+/* wireless extensions */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
 int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
 
-/* ieee80211_sta.c */
-void ieee80211_sta_timer(unsigned long data);
-void ieee80211_sta_work(struct work_struct *work);
+/* STA/IBSS code */
+void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_scan_work(struct work_struct *work);
 void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status);
 int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
 int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
 int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
-int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len);
 void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 			    struct ieee80211_if_sta *ifsta);
-int ieee80211_sta_scan_results(struct ieee80211_local *local,
-			       struct iw_request_info *info,
-			       char *buf, size_t len);
-ieee80211_rx_result ieee80211_sta_rx_scan(
-	struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-	struct ieee80211_rx_status *rx_status);
-void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
-void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
-int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len);
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					struct sk_buff *skb, u8 *bssid,
 					u8 *addr, u64 supp_rates);
 int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason);
 int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
-void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
-				      u32 changed);
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      u8 *ssid, size_t ssid_len);
-void ieee802_11_parse_elems(u8 *start, size_t len,
-			    struct ieee802_11_elems *elems);
+
+/* scan/BSS handling */
+int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len);
+int ieee80211_sta_scan_results(struct ieee80211_local *local,
+			       struct iw_request_info *info,
+			       char *buf, size_t len);
+ieee80211_rx_result ieee80211_sta_rx_scan(
+	struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+	struct ieee80211_rx_status *rx_status);
+void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
+void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len);
+
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
 int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 			     u8 *ssid, size_t ssid_len);
@@ -1007,6 +1006,8 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata);
 void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt);
+void ieee802_11_parse_elems(u8 *start, size_t len,
+			    struct ieee802_11_elems *elems);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ddbaa417e2e..61b19340488 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -83,8 +83,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 				  enum ieee80211_if_types type)
 {
-	struct ieee80211_if_sta *ifsta;
-
 	/* clear type-dependent union */
 	memset(&sdata->u, 0, sizeof(sdata->u));
 
@@ -101,20 +99,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 		break;
 	case IEEE80211_IF_TYPE_STA:
 	case IEEE80211_IF_TYPE_IBSS:
-		ifsta = &sdata->u.sta;
-		INIT_WORK(&ifsta->work, ieee80211_sta_work);
-		setup_timer(&ifsta->timer, ieee80211_sta_timer,
-			    (unsigned long) sdata);
-		skb_queue_head_init(&ifsta->skb_queue);
-
-		ifsta->capab = WLAN_CAPABILITY_ESS;
-		ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
-			IEEE80211_AUTH_ALG_SHARED_KEY;
-		ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
-			IEEE80211_STA_AUTO_BSSID_SEL |
-			IEEE80211_STA_AUTO_CHANNEL_SEL;
-		if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
-			ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
+		ieee80211_sta_setup_sdata(sdata);
 		break;
 	case IEEE80211_IF_TYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 19c7f21e49d..e1483010652 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -93,44 +93,46 @@ static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
 	return count;
 }
 
-/* frame sending functions */
-static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_if_sta *ifsta,
-				int transaction, u8 *extra, size_t extra_len,
-				int encrypt)
+/* also used by mesh code */
+u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
+			    struct ieee802_11_elems *elems,
+			    enum ieee80211_band band)
 {
-	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_rate *bitrates;
+	size_t num_rates;
+	u64 supp_rates;
+	int i, j;
+	sband = local->hw.wiphy->bands[band];
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    sizeof(*mgmt) + 6 + extra_len);
-	if (!skb) {
-		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
-		       "frame\n", sdata->dev->name);
-		return;
+	if (!sband) {
+		WARN_ON(1);
+		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 	}
-	skb_reserve(skb, local->hw.extra_tx_headroom);
-
-	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
-	memset(mgmt, 0, 24 + 6);
-	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-					  IEEE80211_STYPE_AUTH);
-	if (encrypt)
-		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
-	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
-	mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
-	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
-	ifsta->auth_transaction = transaction + 1;
-	mgmt->u.auth.status_code = cpu_to_le16(0);
-	if (extra)
-		memcpy(skb_put(skb, extra_len), extra, extra_len);
 
-	ieee80211_tx_skb(sdata, skb, encrypt);
+	bitrates = sband->bitrates;
+	num_rates = sband->n_bitrates;
+	supp_rates = 0;
+	for (i = 0; i < elems->supp_rates_len +
+		     elems->ext_supp_rates_len; i++) {
+		u8 rate = 0;
+		int own_rate;
+		if (i < elems->supp_rates_len)
+			rate = elems->supp_rates[i];
+		else if (elems->ext_supp_rates)
+			rate = elems->ext_supp_rates
+				[i - elems->supp_rates_len];
+		own_rate = 5 * (rate & 0x7f);
+		for (j = 0; j < num_rates; j++)
+			if (bitrates[j].bitrate == own_rate)
+				supp_rates |= BIT(j);
+	}
+	return supp_rates;
 }
 
+/* frame sending functions */
+
+/* also used by scanning code */
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      u8 *ssid, size_t ssid_len)
 {
@@ -191,6 +193,43 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	ieee80211_tx_skb(sdata, skb, 0);
 }
 
+static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
+				struct ieee80211_if_sta *ifsta,
+				int transaction, u8 *extra, size_t extra_len,
+				int encrypt)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *mgmt;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    sizeof(*mgmt) + 6 + extra_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
+		       "frame\n", sdata->dev->name);
+		return;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
+	memset(mgmt, 0, 24 + 6);
+	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					  IEEE80211_STYPE_AUTH);
+	if (encrypt)
+		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+	memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
+	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
+	mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
+	mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
+	ifsta->auth_transaction = transaction + 1;
+	mgmt->u.auth.status_code = cpu_to_le16(0);
+	if (extra)
+		memcpy(skb_put(skb, extra_len), extra, extra_len);
+
+	ieee80211_tx_skb(sdata, skb, encrypt);
+}
+
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_if_sta *ifsta)
 {
@@ -1414,42 +1453,6 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	return res;
 }
 
-u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
-			    struct ieee802_11_elems *elems,
-			    enum ieee80211_band band)
-{
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_rate *bitrates;
-	size_t num_rates;
-	u64 supp_rates;
-	int i, j;
-	sband = local->hw.wiphy->bands[band];
-
-	if (!sband) {
-		WARN_ON(1);
-		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	}
-
-	bitrates = sband->bitrates;
-	num_rates = sband->n_bitrates;
-	supp_rates = 0;
-	for (i = 0; i < elems->supp_rates_len +
-		     elems->ext_supp_rates_len; i++) {
-		u8 rate = 0;
-		int own_rate;
-		if (i < elems->supp_rates_len)
-			rate = elems->supp_rates[i];
-		else if (elems->ext_supp_rates)
-			rate = elems->ext_supp_rates
-				[i - elems->supp_rates_len];
-		own_rate = 5 * (rate & 0x7f);
-		for (j = 0; j < num_rates; j++)
-			if (bitrates[j].bitrate == own_rate)
-				supp_rates |= BIT(j);
-	}
-	return supp_rates;
-}
-
 static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local,
 					enum ieee80211_band band)
 {
@@ -1894,7 +1897,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 }
 
 
-void ieee80211_sta_timer(unsigned long data)
+static void ieee80211_sta_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
@@ -1937,28 +1940,6 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
 }
 
 
-void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
-			    struct ieee80211_if_sta *ifsta)
-{
-	struct ieee80211_local *local = sdata->local;
-
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
-		return;
-
-	if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
-			     IEEE80211_STA_AUTO_BSSID_SEL)) &&
-	    (ifsta->flags & (IEEE80211_STA_SSID_SET |
-			     IEEE80211_STA_AUTO_SSID_SEL))) {
-
-		if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED)
-			ieee80211_set_disassoc(sdata, ifsta, true, true,
-					       WLAN_REASON_DEAUTH_LEAVING);
-
-		set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
-		queue_work(local->hw.workqueue, &ifsta->work);
-	}
-}
-
 static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
 				    const char *ssid, int ssid_len)
 {
@@ -2160,113 +2141,190 @@ dont_join:
 }
 
 
-int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
+static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
+				     struct ieee80211_if_sta *ifsta)
 {
-	struct ieee80211_if_sta *ifsta;
-	int res;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sta_bss *bss, *selected = NULL;
+	int top_rssi = 0, freq;
 
-	if (len > IEEE80211_MAX_SSID_LEN)
-		return -EINVAL;
+	spin_lock_bh(&local->sta_bss_lock);
+	freq = local->oper_channel->center_freq;
+	list_for_each_entry(bss, &local->sta_bss_list, list) {
+		if (!(bss->capability & WLAN_CAPABILITY_ESS))
+			continue;
 
-	ifsta = &sdata->u.sta;
+		if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
+			IEEE80211_STA_AUTO_BSSID_SEL |
+			IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
+		    (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
+		     !!sdata->default_key))
+			continue;
 
-	if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) {
-		memset(ifsta->ssid, 0, sizeof(ifsta->ssid));
-		memcpy(ifsta->ssid, ssid, len);
-		ifsta->ssid_len = len;
-		ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
+		    bss->freq != freq)
+			continue;
 
-		res = 0;
-		/*
-		 * Hack! MLME code needs to be cleaned up to have different
-		 * entry points for configuration and internal selection change
-		 */
-		if (netif_running(sdata->dev))
-			res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
-		if (res) {
-			printk(KERN_DEBUG "%s: Failed to config new SSID to "
-			       "the low-level driver\n", sdata->dev->name);
-			return res;
-		}
-	}
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
+		    memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
+			continue;
 
-	if (len)
-		ifsta->flags |= IEEE80211_STA_SSID_SET;
-	else
-		ifsta->flags &= ~IEEE80211_STA_SSID_SET;
+		if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
+		    !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
+			continue;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
-	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
-		ifsta->ibss_join_req = jiffies;
-		ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
-		return ieee80211_sta_find_ibss(sdata, ifsta);
+		if (!selected || top_rssi < bss->signal) {
+			selected = bss;
+			top_rssi = bss->signal;
+		}
 	}
+	if (selected)
+		atomic_inc(&selected->users);
+	spin_unlock_bh(&local->sta_bss_lock);
 
-	return 0;
-}
-
-
-int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
-{
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-	memcpy(ssid, ifsta->ssid, ifsta->ssid_len);
-	*len = ifsta->ssid_len;
-	return 0;
-}
-
+	if (selected) {
+		ieee80211_set_freq(sdata, selected->freq);
+		if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
+			ieee80211_sta_set_ssid(sdata, selected->ssid,
+					       selected->ssid_len);
+		ieee80211_sta_set_bssid(sdata, selected->bssid);
+		ieee80211_sta_def_wmm_params(sdata, selected);
 
-int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
-{
-	struct ieee80211_if_sta *ifsta;
-	int res;
+		/* Send out direct probe if no probe resp was received or
+		 * the one we have is outdated
+		 */
+		if (!selected->last_probe_resp ||
+		    time_after(jiffies, selected->last_probe_resp
+					+ IEEE80211_SCAN_RESULT_EXPIRE))
+			ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
+		else
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 
+		ieee80211_rx_bss_put(local, selected);
+		ieee80211_sta_reset_auth(sdata, ifsta);
+		return 0;
+	} else {
+		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
+			ifsta->assoc_scan_tries++;
+			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
+				ieee80211_sta_start_scan(sdata, NULL, 0);
+			else
+				ieee80211_sta_start_scan(sdata, ifsta->ssid,
+							 ifsta->ssid_len);
+			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
+		} else
+			ifsta->state = IEEE80211_STA_MLME_DISABLED;
+	}
+	return -1;
+}
+
+
+static void ieee80211_sta_work(struct work_struct *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data, u.sta.work);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_sta *ifsta;
+	struct sk_buff *skb;
+
+	if (!netif_running(sdata->dev))
+		return;
+
+	if (local->sta_sw_scanning || local->sta_hw_scanning)
+		return;
+
+	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
+		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS))
+		return;
 	ifsta = &sdata->u.sta;
 
-	if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
-		memcpy(ifsta->bssid, bssid, ETH_ALEN);
-		res = 0;
-		/*
-		 * Hack! See also ieee80211_sta_set_ssid.
-		 */
-		if (netif_running(sdata->dev))
-			res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
-		if (res) {
-			printk(KERN_DEBUG "%s: Failed to config new BSSID to "
-			       "the low-level driver\n", sdata->dev->name);
-			return res;
-		}
+	while ((skb = skb_dequeue(&ifsta->skb_queue)))
+		ieee80211_sta_rx_queued_mgmt(sdata, skb);
+
+	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
+	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
+	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
+	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
+		ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
+		return;
 	}
 
-	if (is_valid_ether_addr(bssid))
-		ifsta->flags |= IEEE80211_STA_BSSID_SET;
-	else
-		ifsta->flags &= ~IEEE80211_STA_BSSID_SET;
+	if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
+		if (ieee80211_sta_config_auth(sdata, ifsta))
+			return;
+		clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
+	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
+		return;
 
-	return 0;
+	switch (ifsta->state) {
+	case IEEE80211_STA_MLME_DISABLED:
+		break;
+	case IEEE80211_STA_MLME_DIRECT_PROBE:
+		ieee80211_direct_probe(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_AUTHENTICATE:
+		ieee80211_authenticate(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_ASSOCIATE:
+		ieee80211_associate(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_ASSOCIATED:
+		ieee80211_associated(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_IBSS_SEARCH:
+		ieee80211_sta_find_ibss(sdata, ifsta);
+		break;
+	case IEEE80211_STA_MLME_IBSS_JOINED:
+		ieee80211_sta_merge_ibss(sdata, ifsta);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
+		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
+		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
+
+		ieee80211_set_disassoc(sdata, ifsta, false, true,
+					WLAN_REASON_UNSPECIFIED);
+	}
 }
 
+static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
+{
+	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
+		queue_work(sdata->local->hw.workqueue,
+			   &sdata->u.sta.work);
+}
 
-int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
+/* interface setup */
+void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	struct ieee80211_if_sta *ifsta;
 
-	kfree(ifsta->extra_ie);
-	if (len == 0) {
-		ifsta->extra_ie = NULL;
-		ifsta->extra_ie_len = 0;
-		return 0;
-	}
-	ifsta->extra_ie = kmalloc(len, GFP_KERNEL);
-	if (!ifsta->extra_ie) {
-		ifsta->extra_ie_len = 0;
-		return -ENOMEM;
-	}
-	memcpy(ifsta->extra_ie, ie, len);
-	ifsta->extra_ie_len = len;
-	return 0;
+	ifsta = &sdata->u.sta;
+	INIT_WORK(&ifsta->work, ieee80211_sta_work);
+	setup_timer(&ifsta->timer, ieee80211_sta_timer,
+		    (unsigned long) sdata);
+	skb_queue_head_init(&ifsta->skb_queue);
+
+	ifsta->capab = WLAN_CAPABILITY_ESS;
+	ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN |
+		IEEE80211_AUTH_ALG_SHARED_KEY;
+	ifsta->flags |= IEEE80211_STA_CREATE_IBSS |
+		IEEE80211_STA_AUTO_BSSID_SEL |
+		IEEE80211_STA_AUTO_CHANNEL_SEL;
+	if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
+		ifsta->flags |= IEEE80211_STA_WMM_ENABLED;
 }
 
-
+/*
+ * Add a new IBSS station, will also be called by the RX code when,
+ * in IBSS mode, receiving a frame from a yet-unknown station, hence
+ * must be callable in atomic context.
+ */
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					struct sk_buff *skb, u8 *bssid,
 					u8 *addr, u64 supp_rates)
@@ -2312,86 +2370,132 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 	return sta;
 }
 
-
-static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta)
+/* configuration hooks */
+void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
+			    struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss, *selected = NULL;
-	int top_rssi = 0, freq;
 
-	spin_lock_bh(&local->sta_bss_lock);
-	freq = local->oper_channel->center_freq;
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
-		if (!(bss->capability & WLAN_CAPABILITY_ESS))
-			continue;
+	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+		return;
 
-		if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
-			IEEE80211_STA_AUTO_BSSID_SEL |
-			IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
-		    (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
-		     !!sdata->default_key))
-			continue;
+	if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
+			     IEEE80211_STA_AUTO_BSSID_SEL)) &&
+	    (ifsta->flags & (IEEE80211_STA_SSID_SET |
+			     IEEE80211_STA_AUTO_SSID_SEL))) {
 
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
-		    bss->freq != freq)
-			continue;
+		if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED)
+			ieee80211_set_disassoc(sdata, ifsta, true, true,
+					       WLAN_REASON_DEAUTH_LEAVING);
 
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
-		    memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
-			continue;
+		set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
+		queue_work(local->hw.workqueue, &ifsta->work);
+	}
+}
 
-		if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
-		    !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
-			continue;
+int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
+{
+	struct ieee80211_if_sta *ifsta;
+	int res;
 
-		if (!selected || top_rssi < bss->signal) {
-			selected = bss;
-			top_rssi = bss->signal;
+	if (len > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+
+	ifsta = &sdata->u.sta;
+
+	if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) {
+		memset(ifsta->ssid, 0, sizeof(ifsta->ssid));
+		memcpy(ifsta->ssid, ssid, len);
+		ifsta->ssid_len = len;
+		ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
+
+		res = 0;
+		/*
+		 * Hack! MLME code needs to be cleaned up to have different
+		 * entry points for configuration and internal selection change
+		 */
+		if (netif_running(sdata->dev))
+			res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
+		if (res) {
+			printk(KERN_DEBUG "%s: Failed to config new SSID to "
+			       "the low-level driver\n", sdata->dev->name);
+			return res;
 		}
 	}
-	if (selected)
-		atomic_inc(&selected->users);
-	spin_unlock_bh(&local->sta_bss_lock);
 
-	if (selected) {
-		ieee80211_set_freq(sdata, selected->freq);
-		if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
-			ieee80211_sta_set_ssid(sdata, selected->ssid,
-					       selected->ssid_len);
-		ieee80211_sta_set_bssid(sdata, selected->bssid);
-		ieee80211_sta_def_wmm_params(sdata, selected);
+	if (len)
+		ifsta->flags |= IEEE80211_STA_SSID_SET;
+	else
+		ifsta->flags &= ~IEEE80211_STA_SSID_SET;
 
-		/* Send out direct probe if no probe resp was received or
-		 * the one we have is outdated
+	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
+		ifsta->ibss_join_req = jiffies;
+		ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
+		return ieee80211_sta_find_ibss(sdata, ifsta);
+	}
+
+	return 0;
+}
+
+int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
+{
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+	memcpy(ssid, ifsta->ssid, ifsta->ssid_len);
+	*len = ifsta->ssid_len;
+	return 0;
+}
+
+int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
+{
+	struct ieee80211_if_sta *ifsta;
+	int res;
+
+	ifsta = &sdata->u.sta;
+
+	if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) {
+		memcpy(ifsta->bssid, bssid, ETH_ALEN);
+		res = 0;
+		/*
+		 * Hack! See also ieee80211_sta_set_ssid.
 		 */
-		if (!selected->last_probe_resp ||
-		    time_after(jiffies, selected->last_probe_resp
-					+ IEEE80211_SCAN_RESULT_EXPIRE))
-			ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE;
-		else
-			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
+		if (netif_running(sdata->dev))
+			res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
+		if (res) {
+			printk(KERN_DEBUG "%s: Failed to config new BSSID to "
+			       "the low-level driver\n", sdata->dev->name);
+			return res;
+		}
+	}
 
-		ieee80211_rx_bss_put(local, selected);
-		ieee80211_sta_reset_auth(sdata, ifsta);
+	if (is_valid_ether_addr(bssid))
+		ifsta->flags |= IEEE80211_STA_BSSID_SET;
+	else
+		ifsta->flags &= ~IEEE80211_STA_BSSID_SET;
+
+	return 0;
+}
+
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
+{
+	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+
+	kfree(ifsta->extra_ie);
+	if (len == 0) {
+		ifsta->extra_ie = NULL;
+		ifsta->extra_ie_len = 0;
 		return 0;
-	} else {
-		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
-			ifsta->assoc_scan_tries++;
-			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
-				ieee80211_sta_start_scan(sdata, NULL, 0);
-			else
-				ieee80211_sta_start_scan(sdata, ifsta->ssid,
-							 ifsta->ssid_len);
-			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
-			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
-		} else
-			ifsta->state = IEEE80211_STA_MLME_DISABLED;
 	}
-	return -1;
+	ifsta->extra_ie = kmalloc(len, GFP_KERNEL);
+	if (!ifsta->extra_ie) {
+		ifsta->extra_ie_len = 0;
+		return -ENOMEM;
+	}
+	memcpy(ifsta->extra_ie, ie, len);
+	ifsta->extra_ie_len = len;
+	return 0;
 }
 
-
 int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason)
 {
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -2407,7 +2511,6 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason
 	return 0;
 }
 
-
 int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 {
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
@@ -2425,6 +2528,28 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 	return 0;
 }
 
+/* scan finished notification */
+void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
+	struct ieee80211_if_sta *ifsta;
+
+	if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+		ifsta = &sdata->u.sta;
+		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
+		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
+		    !ieee80211_sta_active_ibss(sdata)))
+			ieee80211_sta_find_ibss(sdata, ifsta);
+	}
+
+	/* Restart STA timers */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list)
+		ieee80211_restart_sta_timer(sdata);
+	rcu_read_unlock();
+}
+
+/* driver notification call */
 void ieee80211_notify_mac(struct ieee80211_hw *hw,
 			  enum ieee80211_notification_types  notif_type)
 {
@@ -2445,102 +2570,3 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 	}
 }
 EXPORT_SYMBOL(ieee80211_notify_mac);
-
-void ieee80211_sta_work(struct work_struct *work)
-{
-	struct ieee80211_sub_if_data *sdata =
-		container_of(work, struct ieee80211_sub_if_data, u.sta.work);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_sta *ifsta;
-	struct sk_buff *skb;
-
-	if (!netif_running(sdata->dev))
-		return;
-
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
-		return;
-
-	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS))
-		return;
-	ifsta = &sdata->u.sta;
-
-	while ((skb = skb_dequeue(&ifsta->skb_queue)))
-		ieee80211_sta_rx_queued_mgmt(sdata, skb);
-
-	if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
-	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
-	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
-	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
-		ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
-		return;
-	}
-
-	if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) {
-		if (ieee80211_sta_config_auth(sdata, ifsta))
-			return;
-		clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request);
-	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request))
-		return;
-
-	switch (ifsta->state) {
-	case IEEE80211_STA_MLME_DISABLED:
-		break;
-	case IEEE80211_STA_MLME_DIRECT_PROBE:
-		ieee80211_direct_probe(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_AUTHENTICATE:
-		ieee80211_authenticate(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_ASSOCIATE:
-		ieee80211_associate(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_ASSOCIATED:
-		ieee80211_associated(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_IBSS_SEARCH:
-		ieee80211_sta_find_ibss(sdata, ifsta);
-		break;
-	case IEEE80211_STA_MLME_IBSS_JOINED:
-		ieee80211_sta_merge_ibss(sdata, ifsta);
-		break;
-	default:
-		WARN_ON(1);
-		break;
-	}
-
-	if (ieee80211_privacy_mismatch(sdata, ifsta)) {
-		printk(KERN_DEBUG "%s: privacy configuration mismatch and "
-		       "mixed-cell disabled - disassociate\n", sdata->dev->name);
-
-		ieee80211_set_disassoc(sdata, ifsta, false, true,
-					WLAN_REASON_UNSPECIFIED);
-	}
-}
-
-static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
-{
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
-		queue_work(sdata->local->hw.workqueue,
-			   &sdata->u.sta.work);
-}
-
-void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
-{
-	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
-	struct ieee80211_if_sta *ifsta;
-
-	if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
-		ifsta = &sdata->u.sta;
-		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
-		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
-		    !ieee80211_sta_active_ibss(sdata)))
-			ieee80211_sta_find_ibss(sdata, ifsta);
-	}
-
-	/* Restart STA timers */
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		ieee80211_restart_sta_timer(sdata);
-	rcu_read_unlock();
-}
-- 
cgit v1.2.3-70-g09d2


From e16751c3178add97c4f83dcf92e59b536537b22f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:53 +0200
Subject: mac80211: move ieee80211_set_freq to utils

It really doesn't belong into the wireless extensions code.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/util.c        | 28 ++++++++++++++++++++++++++++
 net/mac80211/wext.c        | 28 ----------------------------
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 442a43a3400..199d64143b4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -893,7 +893,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 
 /* wireless extensions */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
-int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
 
 /* STA/IBSS code */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
@@ -1008,6 +1007,7 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt);
 void ieee802_11_parse_elems(u8 *start, size_t len,
 			    struct ieee802_11_elems *elems);
+int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c3a22ab2ad2..cf0b820a0ea 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -612,3 +612,31 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 
 	dev_queue_xmit(skb);
 }
+
+int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
+{
+	int ret = -EINVAL;
+	struct ieee80211_channel *chan;
+	struct ieee80211_local *local = sdata->local;
+
+	chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
+
+	if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) {
+		if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+		    chan->flags & IEEE80211_CHAN_NO_IBSS) {
+			printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
+				"%d MHz\n", sdata->dev->name, chan->center_freq);
+			return ret;
+		}
+		local->oper_channel = chan;
+
+		if (local->sta_sw_scanning || local->sta_hw_scanning)
+			ret = 0;
+		else
+			ret = ieee80211_hw_config(local);
+
+		rate_control_clear(local);
+	}
+
+	return ret;
+}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index beae664ab48..97d132811c8 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -330,34 +330,6 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
 	return 0;
 }
 
-int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
-{
-	int ret = -EINVAL;
-	struct ieee80211_channel *chan;
-	struct ieee80211_local *local = sdata->local;
-
-	chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
-
-	if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
-		    chan->flags & IEEE80211_CHAN_NO_IBSS) {
-			printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
-				"%d MHz\n", sdata->dev->name, chan->center_freq);
-			return ret;
-		}
-		local->oper_channel = chan;
-
-		if (local->sta_sw_scanning || local->sta_hw_scanning)
-			ret = 0;
-		else
-			ret = ieee80211_hw_config(local);
-
-		rate_control_clear(local);
-	}
-
-	return ret;
-}
-
 static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 				   struct iw_request_info *info,
 				   struct iw_freq *freq, char *extra)
-- 
cgit v1.2.3-70-g09d2


From 213cd118cbb88b76ae48f92cfb7dbef9a83cca62 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:54 +0200
Subject: mac80211: make bridge_packets a virtual interface option

The bridge_packets configuration really should be per virtual
interface (theoretically per AP/VLAN, but this is much easier);
there currently is no way to set it yet though. Also invert
the option to "NO_BRIDGE_PACKETS" so the default is to bridge.

While at it, also document the flags properly.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs.c     |  4 ----
 net/mac80211/ieee80211_i.h | 29 +++++++++++++++++++----------
 net/mac80211/main.c        |  2 --
 net/mac80211/rx.c          |  5 +++--
 4 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index ee509f1109e..24ce5446331 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -51,8 +51,6 @@ DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d",
 		      local->hw.conf.antenna_sel_tx);
 DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d",
 		      local->hw.conf.antenna_sel_rx);
-DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d",
-		      local->bridge_packets);
 DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
 		      local->rts_threshold);
 DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
@@ -206,7 +204,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_ADD(frequency);
 	DEBUGFS_ADD(antenna_sel_tx);
 	DEBUGFS_ADD(antenna_sel_rx);
-	DEBUGFS_ADD(bridge_packets);
 	DEBUGFS_ADD(rts_threshold);
 	DEBUGFS_ADD(fragmentation_threshold);
 	DEBUGFS_ADD(short_retry_limit);
@@ -263,7 +260,6 @@ void debugfs_hw_del(struct ieee80211_local *local)
 	DEBUGFS_DEL(frequency);
 	DEBUGFS_DEL(antenna_sel_tx);
 	DEBUGFS_DEL(antenna_sel_rx);
-	DEBUGFS_DEL(bridge_packets);
 	DEBUGFS_DEL(rts_threshold);
 	DEBUGFS_DEL(fragmentation_threshold);
 	DEBUGFS_DEL(short_retry_limit);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 199d64143b4..52f36ab1ee5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -402,11 +402,25 @@ struct ieee80211_if_mesh {
 	do { } while (0)
 #endif
 
-/* flags used in struct ieee80211_sub_if_data.flags */
-#define IEEE80211_SDATA_ALLMULTI	BIT(0)
-#define IEEE80211_SDATA_PROMISC		BIT(1)
-#define IEEE80211_SDATA_USERSPACE_MLME	BIT(2)
-#define IEEE80211_SDATA_OPERATING_GMODE	BIT(3)
+/**
+ * enum ieee80211_sub_if_data_flags - virtual interface flags
+ *
+ * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
+ * @IEEE80211_SDATA_PROMISC: interface is promisc
+ * @IEEE80211_SDATA_USERSPACE_MLME: userspace MLME is active
+ * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
+ * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
+ *	associated stations and deliver multicast frames both
+ *	back to wireless media and to the local net stack.
+ */
+enum ieee80211_sub_if_data_flags {
+	IEEE80211_SDATA_ALLMULTI		= BIT(0),
+	IEEE80211_SDATA_PROMISC			= BIT(1),
+	IEEE80211_SDATA_USERSPACE_MLME		= BIT(2),
+	IEEE80211_SDATA_OPERATING_GMODE		= BIT(3),
+	IEEE80211_SDATA_DONT_BRIDGE_PACKETS	= BIT(4),
+};
+
 struct ieee80211_sub_if_data {
 	struct list_head list;
 
@@ -635,10 +649,6 @@ struct ieee80211_local {
 	struct crypto_blkcipher *wep_rx_tfm;
 	u32 wep_iv;
 
-	int bridge_packets; /* bridge packets between associated stations and
-			     * deliver multicast frames both back to wireless
-			     * media and to the local net stack */
-
 	struct list_head interfaces;
 
 	/*
@@ -726,7 +736,6 @@ struct ieee80211_local {
 		struct dentry *frequency;
 		struct dentry *antenna_sel_tx;
 		struct dentry *antenna_sel_rx;
-		struct dentry *bridge_packets;
 		struct dentry *rts_threshold;
 		struct dentry *fragmentation_threshold;
 		struct dentry *short_retry_limit;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4bfac4b41c5..72e3f5574e9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1280,8 +1280,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	local->hw.queues = 1; /* default */
 
-	local->bridge_packets = 1;
-
 	local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 	local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
 	local->short_retry_limit = 7;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 208563a27bc..93f2cda9926 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1221,8 +1221,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	skb = rx->skb;
 	xmit_skb = NULL;
 
-	if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP ||
-				      sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
+	if ((sdata->vif.type == IEEE80211_IF_TYPE_AP ||
+	     sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
+	    !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
 	    (rx->flags & IEEE80211_RX_RA_MATCH)) {
 		if (is_multicast_ether_addr(ehdr->h_dest)) {
 			/*
-- 
cgit v1.2.3-70-g09d2


From c2b13452b283f9c4a5b02a6b53ed6416ebf4c03c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:55 +0200
Subject: mac80211: clean up scan namespace

Most of the scan functions are called ieee80211_sta_scan_*
or similar, make clean it up so they are all just called
ieee80211_scan_*.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  53 ++++++++--------
 net/mac80211/main.c        |   6 +-
 net/mac80211/mesh.c        |   2 +-
 net/mac80211/mlme.c        |  51 +++++++--------
 net/mac80211/rx.c          |  10 +--
 net/mac80211/scan.c        | 153 +++++++++++++++++++++++----------------------
 net/mac80211/tx.c          |   2 +-
 net/mac80211/util.c        |   2 +-
 net/mac80211/wext.c        |   6 +-
 9 files changed, 145 insertions(+), 140 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 52f36ab1ee5..5c38ea08454 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -71,9 +71,9 @@ struct ieee80211_fragment_entry {
 };
 
 
-struct ieee80211_sta_bss {
+struct ieee80211_bss {
 	struct list_head list;
-	struct ieee80211_sta_bss *hnext;
+	struct ieee80211_bss *hnext;
 	size_t ssid_len;
 
 	atomic_t users;
@@ -112,7 +112,7 @@ struct ieee80211_sta_bss {
 	u8 erp_value;
 };
 
-static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss)
+static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss)
 {
 #ifdef CONFIG_MAC80211_MESH
 	return bss->mesh_cfg;
@@ -120,7 +120,7 @@ static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss)
 	return NULL;
 }
 
-static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss)
+static inline u8 *bss_mesh_id(struct ieee80211_bss *bss)
 {
 #ifdef CONFIG_MAC80211_MESH
 	return bss->mesh_id;
@@ -128,7 +128,7 @@ static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss)
 	return NULL;
 }
 
-static inline u8 bss_mesh_id_len(struct ieee80211_sta_bss *bss)
+static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss)
 {
 #ifdef CONFIG_MAC80211_MESH
 	return bss->mesh_id_len;
@@ -658,8 +658,8 @@ struct ieee80211_local {
 	spinlock_t key_lock;
 
 
-	bool sta_sw_scanning;
-	bool sta_hw_scanning;
+	/* Scanning and BSS list */
+	bool sw_scanning, hw_scanning;
 	int scan_channel_idx;
 	enum ieee80211_band scan_band;
 
@@ -670,9 +670,9 @@ struct ieee80211_local {
 	struct ieee80211_channel *oper_channel, *scan_channel;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
-	struct list_head sta_bss_list;
-	struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE];
-	spinlock_t sta_bss_lock;
+	struct list_head bss_list;
+	struct ieee80211_bss *bss_hash[STA_HASH_SIZE];
+	spinlock_t bss_lock;
 
 	/* SNMP counters */
 	/* dot11CountersTable */
@@ -905,7 +905,7 @@ extern const struct iw_handler_def ieee80211_iw_handler_def;
 
 /* STA/IBSS code */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_scan_work(struct work_struct *work);
+void ieee80211_scan_work(struct work_struct *work);
 void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 			   struct ieee80211_rx_status *rx_status);
 int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
@@ -926,35 +926,38 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      u8 *ssid, size_t ssid_len);
 
 /* scan/BSS handling */
-int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len);
-int ieee80211_sta_scan_results(struct ieee80211_local *local,
-			       struct iw_request_info *info,
-			       char *buf, size_t len);
-ieee80211_rx_result ieee80211_sta_rx_scan(
-	struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-	struct ieee80211_rx_status *rx_status);
+int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
+			   u8 *ssid, size_t ssid_len);
+int ieee80211_scan_results(struct ieee80211_local *local,
+			   struct iw_request_info *info,
+			   char *buf, size_t len);
+ieee80211_rx_result
+ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata,
+		  struct sk_buff *skb,
+		  struct ieee80211_rx_status *rx_status);
 void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
 void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
-int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len);
+int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
+			       char *ie, size_t len);
 
 void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
-int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
-			     u8 *ssid, size_t ssid_len);
-struct ieee80211_sta_bss *
+int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
+			 u8 *ssid, size_t ssid_len);
+struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
 			  struct ieee80211_mgmt *mgmt,
 			  size_t len,
 			  struct ieee802_11_elems *elems,
 			  int freq, bool beacon);
-struct ieee80211_sta_bss *
+struct ieee80211_bss *
 ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len);
-struct ieee80211_sta_bss *
+struct ieee80211_bss *
 ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len);
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
-			  struct ieee80211_sta_bss *bss);
+			  struct ieee80211_bss *bss);
 
 /* interface handling */
 void ieee80211_if_setup(struct net_device *dev);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 72e3f5574e9..4c424acc01a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -598,7 +598,7 @@ static int ieee80211_stop(struct net_device *dev)
 			 * the scan_sdata is NULL already don't send out a
 			 * scan event to userspace -- the scan is incomplete.
 			 */
-			if (local->sta_sw_scanning)
+			if (local->sw_scanning)
 				ieee80211_scan_completed(&local->hw);
 		}
 
@@ -732,7 +732,7 @@ int ieee80211_hw_config(struct ieee80211_local *local)
 	struct ieee80211_channel *chan;
 	int ret = 0;
 
-	if (local->sta_sw_scanning)
+	if (local->sw_scanning)
 		chan = local->scan_channel;
 	else
 		chan = local->oper_channel;
@@ -1290,7 +1290,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	spin_lock_init(&local->key_lock);
 
-	INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work);
+	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
 	sta_info_init(local);
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a0141f5ff18..30cf891fd3a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -566,7 +566,7 @@ static void ieee80211_mesh_work(struct work_struct *work)
 	if (!netif_running(sdata->dev))
 		return;
 
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
+	if (local->sw_scanning || local->hw_scanning)
 		return;
 
 	while ((skb = skb_dequeue(&ifmsh->skb_queue)))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e1483010652..5b748447eb7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -52,7 +52,7 @@ static int ecw2cw(int ecw)
 	return (1 << ecw) - 1;
 }
 
-static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
+static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
 {
 	u8 *end, *pos;
 
@@ -72,7 +72,7 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_sta_bss *bss, u8 ie)
 	return NULL;
 }
 
-static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
+static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
 				      struct ieee80211_supported_band *sband,
 				      u64 *rates)
 {
@@ -239,7 +239,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	u8 *pos, *ies, *ht_add_ie;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	int wmm = 0;
 	struct ieee80211_supported_band *sband;
 	u64 rates = 0;
@@ -470,7 +470,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 
 /* MLME */
 static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_sta_bss *bss)
+					 struct ieee80211_bss *bss)
 {
 	struct ieee80211_local *local = sdata->local;
 	int i, have_higher_than_11mbit = 0;
@@ -621,7 +621,7 @@ static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata,
 }
 
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
-					   struct ieee80211_sta_bss *bss)
+					   struct ieee80211_bss *bss)
 {
 	u32 changed = 0;
 
@@ -674,7 +674,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_conf *conf = &local_to_hw(local)->conf;
 	u32 changed = BSS_CHANGED_ASSOC;
 
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
 	ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
@@ -846,7 +846,7 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	int bss_privacy;
 	int wep_privacy;
 	int privacy_invoked;
@@ -1219,7 +1219,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	/* Add STA entry for the AP */
 	sta = sta_info_get(local, ifsta->bssid);
 	if (!sta) {
-		struct ieee80211_sta_bss *bss;
+		struct ieee80211_bss *bss;
 		int err;
 
 		sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC);
@@ -1339,7 +1339,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta,
-				   struct ieee80211_sta_bss *bss)
+				   struct ieee80211_bss *bss)
 {
 	struct ieee80211_local *local = sdata->local;
 	int res, rates, i, j;
@@ -1490,7 +1490,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	int freq;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	struct sta_info *sta;
 	struct ieee80211_channel *channel;
 	u64 beacon_timestamp, rx_timestamp;
@@ -1893,7 +1893,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
 
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
 	       "IBSS networks with same SSID (merge)\n", sdata->dev->name);
-	ieee80211_sta_req_scan(sdata, ifsta->ssid, ifsta->ssid_len);
+	ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len);
 }
 
 
@@ -1974,7 +1974,7 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	struct ieee80211_supported_band *sband;
 	u8 bssid[ETH_ALEN], *pos;
 	int i;
@@ -2035,7 +2035,7 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
 				   struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	int found = 0;
 	u8 bssid[ETH_ALEN];
 	int active_ibss;
@@ -2050,8 +2050,8 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
 	       sdata->dev->name, active_ibss);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
-	spin_lock_bh(&local->sta_bss_lock);
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
+	spin_lock_bh(&local->bss_lock);
+	list_for_each_entry(bss, &local->bss_list, list) {
 		if (ifsta->ssid_len != bss->ssid_len ||
 		    memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0
 		    || !(bss->capability & WLAN_CAPABILITY_IBSS))
@@ -2065,7 +2065,7 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
 		if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0)
 			break;
 	}
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 	if (found)
@@ -2110,7 +2110,7 @@ dont_join:
 			      IEEE80211_SCAN_INTERVAL)) {
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
 		       "join\n", sdata->dev->name);
-		return ieee80211_sta_req_scan(sdata, ifsta->ssid,
+		return ieee80211_request_scan(sdata, ifsta->ssid,
 					      ifsta->ssid_len);
 	} else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) {
 		int interval = IEEE80211_SCAN_INTERVAL;
@@ -2145,12 +2145,12 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_if_sta *ifsta)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_sta_bss *bss, *selected = NULL;
+	struct ieee80211_bss *bss, *selected = NULL;
 	int top_rssi = 0, freq;
 
-	spin_lock_bh(&local->sta_bss_lock);
+	spin_lock_bh(&local->bss_lock);
 	freq = local->oper_channel->center_freq;
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
+	list_for_each_entry(bss, &local->bss_list, list) {
 		if (!(bss->capability & WLAN_CAPABILITY_ESS))
 			continue;
 
@@ -2180,7 +2180,7 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 	}
 	if (selected)
 		atomic_inc(&selected->users);
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 
 	if (selected) {
 		ieee80211_set_freq(sdata, selected->freq);
@@ -2207,9 +2207,9 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
 		if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
 			ifsta->assoc_scan_tries++;
 			if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL)
-				ieee80211_sta_start_scan(sdata, NULL, 0);
+				ieee80211_start_scan(sdata, NULL, 0);
 			else
-				ieee80211_sta_start_scan(sdata, ifsta->ssid,
+				ieee80211_start_scan(sdata, ifsta->ssid,
 							 ifsta->ssid_len);
 			ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE;
 			set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request);
@@ -2231,7 +2231,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 	if (!netif_running(sdata->dev))
 		return;
 
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
+	if (local->sw_scanning || local->hw_scanning)
 		return;
 
 	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
@@ -2246,7 +2246,8 @@ static void ieee80211_sta_work(struct work_struct *work)
 	    ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
 	    ifsta->state != IEEE80211_STA_MLME_ASSOCIATE &&
 	    test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) {
-		ieee80211_sta_start_scan(sdata, ifsta->scan_ssid, ifsta->scan_ssid_len);
+		ieee80211_start_scan(sdata, ifsta->scan_ssid,
+				     ifsta->scan_ssid_len);
 		return;
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 93f2cda9926..582396a4fdb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -403,12 +403,12 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct sk_buff *skb = rx->skb;
 
-	if (unlikely(local->sta_hw_scanning))
-		return ieee80211_sta_rx_scan(rx->sdata, skb, rx->status);
+	if (unlikely(local->hw_scanning))
+		return ieee80211_scan_rx(rx->sdata, skb, rx->status);
 
-	if (unlikely(local->sta_sw_scanning)) {
+	if (unlikely(local->sw_scanning)) {
 		/* drop all the other packets during a software scan anyway */
-		if (ieee80211_sta_rx_scan(rx->sdata, skb, rx->status)
+		if (ieee80211_scan_rx(rx->sdata, skb, rx->status)
 		    != RX_QUEUED)
 			dev_kfree_skb(skb);
 		return RX_QUEUED;
@@ -1918,7 +1918,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		return;
 	}
 
-	if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning))
+	if (unlikely(local->sw_scanning || local->hw_scanning))
 		rx.flags |= IEEE80211_RX_IN_SCAN;
 
 	ieee80211_parse_qos(&rx);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 27727027d76..5e719e7b720 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -32,26 +32,26 @@
 
 void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
 {
-	spin_lock_init(&local->sta_bss_lock);
-	INIT_LIST_HEAD(&local->sta_bss_list);
+	spin_lock_init(&local->bss_lock);
+	INIT_LIST_HEAD(&local->bss_list);
 }
 
 void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
 {
-	struct ieee80211_sta_bss *bss, *tmp;
+	struct ieee80211_bss *bss, *tmp;
 
-	list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list)
+	list_for_each_entry_safe(bss, tmp, &local->bss_list, list)
 		ieee80211_rx_bss_put(local, bss);
 }
 
-struct ieee80211_sta_bss *
+struct ieee80211_bss *
 ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[STA_HASH(bssid)];
+	spin_lock_bh(&local->bss_lock);
+	bss = local->bss_hash[STA_HASH(bssid)];
 	while (bss) {
 		if (!bss_mesh_cfg(bss) &&
 		    !memcmp(bss->bssid, bssid, ETH_ALEN) &&
@@ -63,13 +63,13 @@ ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
 		}
 		bss = bss->hnext;
 	}
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	return bss;
 }
 
-/* Caller must hold local->sta_bss_lock */
+/* Caller must hold local->bss_lock */
 static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
+					struct ieee80211_bss *bss)
 {
 	u8 hash_idx;
 
@@ -79,20 +79,20 @@ static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
 	else
 		hash_idx = STA_HASH(bss->bssid);
 
-	bss->hnext = local->sta_bss_hash[hash_idx];
-	local->sta_bss_hash[hash_idx] = bss;
+	bss->hnext = local->bss_hash[hash_idx];
+	local->bss_hash[hash_idx] = bss;
 }
 
-/* Caller must hold local->sta_bss_lock */
+/* Caller must hold local->bss_lock */
 static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
-					struct ieee80211_sta_bss *bss)
+					struct ieee80211_bss *bss)
 {
-	struct ieee80211_sta_bss *b, *prev = NULL;
-	b = local->sta_bss_hash[STA_HASH(bss->bssid)];
+	struct ieee80211_bss *b, *prev = NULL;
+	b = local->bss_hash[STA_HASH(bss->bssid)];
 	while (b) {
 		if (b == bss) {
 			if (!prev)
-				local->sta_bss_hash[STA_HASH(bss->bssid)] =
+				local->bss_hash[STA_HASH(bss->bssid)] =
 					bss->hnext;
 			else
 				prev->hnext = bss->hnext;
@@ -103,11 +103,11 @@ static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
 	}
 }
 
-struct ieee80211_sta_bss *
+struct ieee80211_bss *
 ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
 		     u8 *ssid, u8 ssid_len)
 {
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
 	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
 	if (!bss)
@@ -120,23 +120,23 @@ ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
 		bss->ssid_len = ssid_len;
 	}
 
-	spin_lock_bh(&local->sta_bss_lock);
+	spin_lock_bh(&local->bss_lock);
 	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
+	list_add_tail(&bss->list, &local->bss_list);
 	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	return bss;
 }
 
 #ifdef CONFIG_MAC80211_MESH
-static struct ieee80211_sta_bss *
+static struct ieee80211_bss *
 ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
 			  u8 *mesh_cfg, int freq)
 {
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
-	spin_lock_bh(&local->sta_bss_lock);
-	bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
+	spin_lock_bh(&local->bss_lock);
+	bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
 	while (bss) {
 		if (bss_mesh_cfg(bss) &&
 		    !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
@@ -149,15 +149,15 @@ ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_i
 		}
 		bss = bss->hnext;
 	}
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	return bss;
 }
 
-static struct ieee80211_sta_bss *
+static struct ieee80211_bss *
 ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
 			  u8 *mesh_cfg, int mesh_config_len, int freq)
 {
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
 	if (mesh_config_len != MESH_CFG_LEN)
 		return NULL;
@@ -186,16 +186,16 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i
 	memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
 	bss->mesh_id_len = mesh_id_len;
 	bss->freq = freq;
-	spin_lock_bh(&local->sta_bss_lock);
+	spin_lock_bh(&local->bss_lock);
 	/* TODO: order by RSSI? */
-	list_add_tail(&bss->list, &local->sta_bss_list);
+	list_add_tail(&bss->list, &local->bss_list);
 	__ieee80211_rx_bss_hash_add(local, bss);
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	return bss;
 }
 #endif
 
-static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
+static void ieee80211_rx_bss_free(struct ieee80211_bss *bss)
 {
 	kfree(bss->ies);
 	kfree(bss_mesh_id(bss));
@@ -204,21 +204,21 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
 }
 
 void ieee80211_rx_bss_put(struct ieee80211_local *local,
-			  struct ieee80211_sta_bss *bss)
+			  struct ieee80211_bss *bss)
 {
 	local_bh_disable();
-	if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) {
+	if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) {
 		local_bh_enable();
 		return;
 	}
 
 	__ieee80211_rx_bss_hash_del(local, bss);
 	list_del(&bss->list);
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	ieee80211_rx_bss_free(bss);
 }
 
-struct ieee80211_sta_bss *
+struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
 			  struct ieee80211_mgmt *mgmt,
@@ -226,7 +226,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee802_11_elems *elems,
 			  int freq, bool beacon)
 {
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	int clen;
 
 #ifdef CONFIG_MAC80211_MESH
@@ -252,9 +252,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	} else {
 #if 0
 		/* TODO: order by RSSI? */
-		spin_lock_bh(&local->sta_bss_lock);
-		list_move_tail(&bss->list, &local->sta_bss_list);
-		spin_unlock_bh(&local->sta_bss_lock);
+		spin_lock_bh(&local->bss_lock);
+		list_move_tail(&bss->list, &local->bss_list);
+		spin_unlock_bh(&local->bss_lock);
 #endif
 	}
 
@@ -327,11 +327,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 }
 
 ieee80211_rx_result
-ieee80211_sta_rx_scan(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		      struct ieee80211_rx_status *rx_status)
+ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		  struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_mgmt *mgmt;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 	u8 *elements;
 	struct ieee80211_channel *channel;
 	size_t baselen;
@@ -430,7 +430,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	struct ieee80211_sub_if_data *sdata;
 	union iwreq_data wrqu;
 
-	if (WARN_ON(!local->sta_hw_scanning && !local->sta_sw_scanning))
+	if (WARN_ON(!local->hw_scanning && !local->sw_scanning))
 		return;
 
 	local->last_scan_completed = jiffies;
@@ -445,8 +445,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	if (sdata)
 		wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
 
-	if (local->sta_hw_scanning) {
-		local->sta_hw_scanning = 0;
+	if (local->hw_scanning) {
+		local->hw_scanning = false;
 		if (ieee80211_hw_config(local))
 			printk(KERN_DEBUG "%s: failed to restore operational "
 			       "channel after scan\n", wiphy_name(local->hw.wiphy));
@@ -454,7 +454,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 		goto done;
 	}
 
-	local->sta_sw_scanning = 0;
+	local->sw_scanning = false;
 	if (ieee80211_hw_config(local))
 		printk(KERN_DEBUG "%s: failed to restore operational "
 		       "channel after scan\n", wiphy_name(local->hw.wiphy));
@@ -492,7 +492,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 EXPORT_SYMBOL(ieee80211_scan_completed);
 
 
-void ieee80211_sta_scan_work(struct work_struct *work)
+void ieee80211_scan_work(struct work_struct *work)
 {
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local, scan_work.work);
@@ -589,8 +589,8 @@ void ieee80211_sta_scan_work(struct work_struct *work)
 }
 
 
-int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
-			     u8 *ssid, size_t ssid_len)
+int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
+			 u8 *ssid, size_t ssid_len)
 {
 	struct ieee80211_local *local = scan_sdata->local;
 	struct ieee80211_sub_if_data *sdata;
@@ -615,7 +615,7 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	  * ResultCode: SUCCESS, INVALID_PARAMETERS
 	 */
 
-	if (local->sta_sw_scanning || local->sta_hw_scanning) {
+	if (local->sw_scanning || local->hw_scanning) {
 		if (local->scan_sdata == scan_sdata)
 			return 0;
 		return -EBUSY;
@@ -624,17 +624,17 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	if (local->ops->hw_scan) {
 		int rc;
 
-		local->sta_hw_scanning = 1;
+		local->hw_scanning = true;
 		rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len);
 		if (rc) {
-			local->sta_hw_scanning = 0;
+			local->hw_scanning = false;
 			return rc;
 		}
 		local->scan_sdata = scan_sdata;
 		return 0;
 	}
 
-	local->sta_sw_scanning = 1;
+	local->sw_scanning = true;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -675,13 +675,14 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 }
 
 
-int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t ssid_len)
+int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
+			   u8 *ssid, size_t ssid_len)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta;
 
 	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
-		return ieee80211_sta_start_scan(sdata, ssid, ssid_len);
+		return ieee80211_start_scan(sdata, ssid, ssid_len);
 
 	/*
 	 * STA has a state machine that might need to defer scanning
@@ -689,7 +690,7 @@ int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t
 	 * queue it up to the state machine in that case.
 	 */
 
-	if (local->sta_sw_scanning || local->sta_hw_scanning) {
+	if (local->sw_scanning || local->hw_scanning) {
 		if (local->scan_sdata == sdata)
 			return 0;
 		return -EBUSY;
@@ -707,9 +708,9 @@ int ieee80211_sta_req_scan(struct ieee80211_sub_if_data *sdata, u8 *ssid, size_t
 }
 
 
-static void ieee80211_sta_add_scan_ies(struct iw_request_info *info,
-				       struct ieee80211_sta_bss *bss,
-				       char **current_ev, char *end_buf)
+static void ieee80211_scan_add_ies(struct iw_request_info *info,
+				   struct ieee80211_bss *bss,
+				   char **current_ev, char *end_buf)
 {
 	u8 *pos, *end, *next;
 	struct iw_event iwe;
@@ -749,10 +750,10 @@ static void ieee80211_sta_add_scan_ies(struct iw_request_info *info,
 
 
 static char *
-ieee80211_sta_scan_result(struct ieee80211_local *local,
-			  struct iw_request_info *info,
-			  struct ieee80211_sta_bss *bss,
-			  char *current_ev, char *end_buf)
+ieee80211_scan_result(struct ieee80211_local *local,
+		      struct iw_request_info *info,
+		      struct ieee80211_bss *bss,
+		      char *current_ev, char *end_buf)
 {
 	struct iw_event iwe;
 	char *buf;
@@ -828,7 +829,7 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 	current_ev = iwe_stream_add_point(info, current_ev, end_buf,
 					  &iwe, "");
 
-	ieee80211_sta_add_scan_ies(info, bss, &current_ev, end_buf);
+	ieee80211_scan_add_ies(info, bss, &current_ev, end_buf);
 
 	if (bss->supp_rates_len > 0) {
 		/* display all supported rates in readable format */
@@ -914,23 +915,23 @@ ieee80211_sta_scan_result(struct ieee80211_local *local,
 }
 
 
-int ieee80211_sta_scan_results(struct ieee80211_local *local,
-			       struct iw_request_info *info,
-			       char *buf, size_t len)
+int ieee80211_scan_results(struct ieee80211_local *local,
+			   struct iw_request_info *info,
+			   char *buf, size_t len)
 {
 	char *current_ev = buf;
 	char *end_buf = buf + len;
-	struct ieee80211_sta_bss *bss;
+	struct ieee80211_bss *bss;
 
-	spin_lock_bh(&local->sta_bss_lock);
-	list_for_each_entry(bss, &local->sta_bss_list, list) {
+	spin_lock_bh(&local->bss_lock);
+	list_for_each_entry(bss, &local->bss_list, list) {
 		if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
-			spin_unlock_bh(&local->sta_bss_lock);
+			spin_unlock_bh(&local->bss_lock);
 			return -E2BIG;
 		}
-		current_ev = ieee80211_sta_scan_result(local, info, bss,
+		current_ev = ieee80211_scan_result(local, info, bss,
 						       current_ev, end_buf);
 	}
-	spin_unlock_bh(&local->sta_bss_lock);
+	spin_unlock_bh(&local->bss_lock);
 	return current_ev - buf;
 }
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1059b17c83b..e606ba08ddd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -222,7 +222,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED))
 		return TX_CONTINUE;
 
-	if (unlikely(tx->local->sta_sw_scanning) &&
+	if (unlikely(tx->local->sw_scanning) &&
 	    !ieee80211_is_probe_req(hdr->frame_control))
 		return TX_DROP;
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cf0b820a0ea..a7968df9dac 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -630,7 +630,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 		}
 		local->oper_channel = chan;
 
-		if (local->sta_sw_scanning || local->sta_hw_scanning)
+		if (local->sw_scanning || local->hw_scanning)
 			ret = 0;
 		else
 			ret = ieee80211_hw_config(local);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 97d132811c8..77b68ed8b83 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -552,7 +552,7 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 		ssid_len = req->essid_len;
 	}
 
-	return ieee80211_sta_req_scan(sdata, ssid, ssid_len);
+	return ieee80211_request_scan(sdata, ssid, ssid_len);
 }
 
 
@@ -566,10 +566,10 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (local->sta_sw_scanning || local->sta_hw_scanning)
+	if (local->sw_scanning || local->hw_scanning)
 		return -EAGAIN;
 
-	res = ieee80211_sta_scan_results(local, info, extra, data->length);
+	res = ieee80211_scan_results(local, info, extra, data->length);
 	if (res >= 0) {
 		data->length = res;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From ccd7b36286f8c42b3fa95c5a8d402162ffab41df Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:56 +0200
Subject: mac80211: clean up some comments

Some comments refer to 80211.o or similar; also remove
a comment about implementing fragments better, we really
have better things to do.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  7 ++-----
 net/mac80211/rx.c          |  4 ++--
 net/mac80211/tx.c          | 15 +--------------
 net/mac80211/wme.h         |  1 -
 4 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5c38ea08454..1f9336a30e3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -29,9 +29,6 @@
 #include "key.h"
 #include "sta_info.h"
 
-/* ieee80211.o internal definitions, etc. These are not included into
- * low-level drivers. */
-
 struct ieee80211_local;
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
@@ -293,13 +290,13 @@ struct mesh_config {
 #define IEEE80211_STA_AUTO_BSSID_SEL	BIT(11)
 #define IEEE80211_STA_AUTO_CHANNEL_SEL	BIT(12)
 #define IEEE80211_STA_PRIVACY_INVOKED	BIT(13)
-/* flags for  MLME request*/
+/* flags for MLME request */
 #define IEEE80211_STA_REQ_SCAN 0
 #define IEEE80211_STA_REQ_DIRECT_PROBE 1
 #define IEEE80211_STA_REQ_AUTH 2
 #define IEEE80211_STA_REQ_RUN  3
 
-/* flags used for setting mlme state */
+/* STA/IBSS MLME states */
 enum ieee80211_sta_mlme_state {
 	IEEE80211_STA_MLME_DISABLED,
 	IEEE80211_STA_MLME_DIRECT_PROBE,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 582396a4fdb..33530b29c42 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -501,8 +501,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 	/* Drop disallowed frame classes based on STA auth/assoc state;
 	 * IEEE 802.11, Chap 5.5.
 	 *
-	 * 80211.o does filtering only based on association state, i.e., it
-	 * drops Class 3 frames from not associated stations. hostapd sends
+	 * mac80211 filters only based on association state, i.e. it drops
+	 * Class 3 frames from not associated stations. hostapd sends
 	 * deauth/disassoc frames when needed. In addition, hostapd is
 	 * responsible for filtering on both auth and assoc states.
 	 */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e606ba08ddd..1bed3be01c2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -111,7 +111,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	hdr = (struct ieee80211_hdr *)tx->skb->data;
 	if (ieee80211_is_ctl(hdr->frame_control)) {
 		/* TODO: These control frames are not currently sent by
-		 * 80211.o, but should they be implemented, this function
+		 * mac80211, but should they be implemented, this function
 		 * needs to be updated to support duration field calculation.
 		 *
 		 * RTS: time needed to transmit pending data/mgmt frame plus
@@ -1580,19 +1580,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	nh_pos -= skip_header_bytes;
 	h_pos -= skip_header_bytes;
 
-	/* TODO: implement support for fragments so that there is no need to
-	 * reallocate and copy payload; it might be enough to support one
-	 * extra fragment that would be copied in the beginning of the frame
-	 * data.. anyway, it would be nice to include this into skb structure
-	 * somehow
-	 *
-	 * There are few options for this:
-	 * use skb->cb as an extra space for 802.11 header
-	 * allocate new buffer if not enough headroom
-	 * make sure that there is enough headroom in every skb by increasing
-	 * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and
-	 * alloc_skb() (net/core/skbuff.c)
-	 */
 	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
 	/*
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index 465e274df7c..bc62f28a4d3 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -1,5 +1,4 @@
 /*
- * IEEE 802.11 driver (80211.o) - QoS datatypes
  * Copyright 2004, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  *
-- 
cgit v1.2.3-70-g09d2


From 96dd22ac06b0dbfb069fdf530c72046a941e9694 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:57 +0200
Subject: mac80211: inform driver of basic rateset

Drivers need to know the basic rateset to be able to configure
the ACK/CTS programming in hardware correctly.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  6 ++++++
 net/mac80211/ieee80211_i.h |  7 ++-----
 net/mac80211/iface.c       |  4 +++-
 net/mac80211/mlme.c        | 40 +++++++++-------------------------------
 net/mac80211/tx.c          |  4 ++--
 net/mac80211/util.c        | 28 ++++++++++++++++++++++++++++
 6 files changed, 50 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d67882dd360..c81e579c17f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -160,6 +160,7 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_ERP_PREAMBLE: preamble changed
  * @BSS_CHANGED_ERP_SLOT: slot timing changed
  * @BSS_CHANGED_HT: 802.11n parameters changed
+ * @BSS_CHANGED_BASIC_RATES: Basic rateset changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -167,6 +168,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_ERP_PREAMBLE	= 1<<2,
 	BSS_CHANGED_ERP_SLOT		= 1<<3,
 	BSS_CHANGED_HT                  = 1<<4,
+	BSS_CHANGED_BASIC_RATES		= 1<<5,
 };
 
 /**
@@ -187,6 +189,9 @@ enum ieee80211_bss_change {
  * @assoc_ht: association in HT mode
  * @ht_conf: ht capabilities
  * @ht_bss_conf: ht extended capabilities
+ * @basic_rates: bitmap of basic rates, each bit stands for an
+ *	index into the rate table configured by the driver in
+ *	the current band.
  */
 struct ieee80211_bss_conf {
 	/* association related data */
@@ -200,6 +205,7 @@ struct ieee80211_bss_conf {
 	u16 beacon_int;
 	u16 assoc_capability;
 	u64 timestamp;
+	u64 basic_rates;
 	/* ht related data */
 	bool assoc_ht;
 	struct ieee80211_ht_info *ht_conf;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1f9336a30e3..21cc6d07b02 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -433,11 +433,6 @@ struct ieee80211_sub_if_data {
 
 	int drop_unencrypted;
 
-	/*
-	 * basic rates of this AP or the AP we're associated to
-	 */
-	u64 basic_rates;
-
 	/* Fragment table for host-based reassembly */
 	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
 	unsigned int fragment_next;
@@ -1017,6 +1012,8 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 void ieee802_11_parse_elems(u8 *start, size_t len,
 			    struct ieee802_11_elems *elems);
 int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
+u64 ieee80211_mandatory_rates(struct ieee80211_local *local,
+			      enum ieee80211_band band);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 61b19340488..dab8eba2602 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -144,7 +144,9 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	ieee80211_setup_sdata(sdata, type);
 
 	/* reset some values that shouldn't be kept across type changes */
-	sdata->basic_rates = 0;
+	sdata->bss_conf.basic_rates =
+		ieee80211_mandatory_rates(sdata->local,
+			sdata->local->hw.conf.channel->band);
 	sdata->drop_unencrypted = 0;
 
 	return 0;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5b748447eb7..55bc6071393 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -710,6 +710,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_led_assoc(local, 1);
 
 	sdata->bss_conf.assoc = 1;
+	/*
+	 * For now just always ask the driver to update the basic rateset
+	 * when we have associated, we aren't checking whether it actually
+	 * changed or not.
+	 */
+	changed |= BSS_CHANGED_BASIC_RATES;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	netif_tx_start_all_queues(sdata->dev);
@@ -1296,7 +1302,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	}
 
 	sta->supp_rates[local->hw.conf.channel->band] = rates;
-	sdata->basic_rates = basic_rates;
+	sdata->bss_conf.basic_rates = basic_rates;
 
 	/* cf. IEEE 802.11 9.2.12 */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
@@ -1453,34 +1459,6 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	return res;
 }
 
-static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local,
-					enum ieee80211_band band)
-{
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_rate *bitrates;
-	u64 mandatory_rates;
-	enum ieee80211_rate_flags mandatory_flag;
-	int i;
-
-	sband = local->hw.wiphy->bands[band];
-	if (!sband) {
-		WARN_ON(1);
-		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	}
-
-	if (band == IEEE80211_BAND_2GHZ)
-		mandatory_flag = IEEE80211_RATE_MANDATORY_B;
-	else
-		mandatory_flag = IEEE80211_RATE_MANDATORY_A;
-
-	bitrates = sband->bitrates;
-	mandatory_rates = 0;
-	for (i = 0; i < sband->n_bitrates; i++)
-		if (bitrates[i].flags & mandatory_flag)
-			mandatory_rates |= BIT(i);
-	return mandatory_rates;
-}
-
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_mgmt *mgmt,
 				  size_t len,
@@ -1522,7 +1500,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			prev_rates = sta->supp_rates[band];
 			/* make sure mandatory rates are always added */
 			sta->supp_rates[band] = supp_rates |
-				ieee80211_sta_get_mandatory_rates(local, band);
+				ieee80211_mandatory_rates(local, band);
 
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 			if (sta->supp_rates[band] != prev_rates)
@@ -2361,7 +2339,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 
 	/* make sure mandatory rates are always added */
 	sta->supp_rates[band] = supp_rates |
-			ieee80211_sta_get_mandatory_rates(local, band);
+			ieee80211_mandatory_rates(local, band);
 
 	rate_control_rate_init(sta, local);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1bed3be01c2..a523189f10c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -153,7 +153,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 		if (r->bitrate > txrate->bitrate)
 			break;
 
-		if (tx->sdata->basic_rates & BIT(i))
+		if (tx->sdata->bss_conf.basic_rates & BIT(i))
 			rate = r->bitrate;
 
 		switch (sband->band) {
@@ -594,7 +594,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		for (idx = 0; idx < sband->n_bitrates; idx++) {
 			if (sband->bitrates[idx].bitrate > rate->bitrate)
 				continue;
-			if (tx->sdata->basic_rates & BIT(idx) &&
+			if (tx->sdata->bss_conf.basic_rates & BIT(idx) &&
 			    (baserate < 0 ||
 			     (sband->bitrates[baserate].bitrate
 			      < sband->bitrates[idx].bitrate)))
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a7968df9dac..d6aca91e612 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -640,3 +640,31 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 
 	return ret;
 }
+
+u64 ieee80211_mandatory_rates(struct ieee80211_local *local,
+			      enum ieee80211_band band)
+{
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_rate *bitrates;
+	u64 mandatory_rates;
+	enum ieee80211_rate_flags mandatory_flag;
+	int i;
+
+	sband = local->hw.wiphy->bands[band];
+	if (!sband) {
+		WARN_ON(1);
+		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	}
+
+	if (band == IEEE80211_BAND_2GHZ)
+		mandatory_flag = IEEE80211_RATE_MANDATORY_B;
+	else
+		mandatory_flag = IEEE80211_RATE_MANDATORY_A;
+
+	bitrates = sband->bitrates;
+	mandatory_rates = 0;
+	for (i = 0; i < sband->n_bitrates; i++)
+		if (bitrates[i].flags & mandatory_flag)
+			mandatory_rates |= BIT(i);
+	return mandatory_rates;
+}
-- 
cgit v1.2.3-70-g09d2


From 05c914fe330fa8e1cc67870dc0d3809dfd96c107 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:58 +0200
Subject: mac80211: use nl80211 interface types

There's really no reason for mac80211 to be using its
own interface type defines. Use the nl80211 types and
simplify the configuration code a bit: there's no need
to translate them any more now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              | 22 +++----
 drivers/net/wireless/ath5k/ath5k.h          |  4 +-
 drivers/net/wireless/ath5k/attach.c         |  2 +-
 drivers/net/wireless/ath5k/base.c           | 50 ++++++++--------
 drivers/net/wireless/ath5k/base.h           |  2 +-
 drivers/net/wireless/ath5k/pcu.c            | 12 ++--
 drivers/net/wireless/ath5k/reset.c          |  2 +-
 drivers/net/wireless/ath9k/main.c           | 28 ++++-----
 drivers/net/wireless/b43/main.c             | 38 ++++++------
 drivers/net/wireless/b43/phy_common.c       |  4 +-
 drivers/net/wireless/b43legacy/main.c       | 30 +++++-----
 drivers/net/wireless/b43legacy/phy.c        |  4 +-
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c  |  2 +-
 drivers/net/wireless/iwlwifi/iwl-3945.c     |  6 +-
 drivers/net/wireless/iwlwifi/iwl-3945.h     |  2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c   |  8 +--
 drivers/net/wireless/iwlwifi/iwl-agn.c      | 64 ++++++++++-----------
 drivers/net/wireless/iwlwifi/iwl-core.c     |  8 +--
 drivers/net/wireless/iwlwifi/iwl-dev.h      |  2 +-
 drivers/net/wireless/iwlwifi/iwl-power.c    |  2 +-
 drivers/net/wireless/iwlwifi/iwl-rx.c       |  8 +--
 drivers/net/wireless/iwlwifi/iwl-scan.c     |  4 +-
 drivers/net/wireless/iwlwifi/iwl-sta.c      | 20 +++----
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  4 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c | 84 +++++++++++++--------------
 drivers/net/wireless/libertas_tf/main.c     | 18 +++---
 drivers/net/wireless/mac80211_hwsim.c       |  2 +-
 drivers/net/wireless/p54/p54common.c        | 14 ++---
 drivers/net/wireless/p54/p54pci.c           |  4 +-
 drivers/net/wireless/rt2x00/rt2500usb.c     |  2 +-
 drivers/net/wireless/rt2x00/rt2x00.h        |  2 +-
 drivers/net/wireless/rt2x00/rt2x00config.c  |  8 +--
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  8 +--
 drivers/net/wireless/rt2x00/rt2x00lib.h     |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     | 18 +++---
 drivers/net/wireless/rtl8180_dev.c          | 10 ++--
 drivers/net/wireless/rtl8187_dev.c          |  8 +--
 drivers/net/wireless/zd1211rw/zd_mac.c      | 20 +++----
 include/net/mac80211.h                      | 35 ++----------
 net/mac80211/cfg.c                          | 57 ++++++++----------
 net/mac80211/debugfs_netdev.c               | 28 ++++-----
 net/mac80211/ht.c                           |  6 +-
 net/mac80211/ieee80211_i.h                  |  6 +-
 net/mac80211/iface.c                        | 40 +++++++------
 net/mac80211/key.c                          |  6 +-
 net/mac80211/main.c                         | 89 ++++++++++++++---------------
 net/mac80211/mlme.c                         | 40 ++++++-------
 net/mac80211/rx.c                           | 65 ++++++++++-----------
 net/mac80211/scan.c                         |  8 +--
 net/mac80211/sta_info.c                     |  4 +-
 net/mac80211/tx.c                           | 28 ++++-----
 net/mac80211/util.c                         | 44 +++++++-------
 net/mac80211/wext.c                         | 84 +++++++++++++--------------
 53 files changed, 517 insertions(+), 551 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index c6a55cd12db..b2c050b6889 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -765,11 +765,11 @@ static void adm8211_update_mode(struct ieee80211_hw *dev)
 
 	priv->soft_rx_crc = 0;
 	switch (priv->mode) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->nar &= ~(ADM8211_NAR_PR | ADM8211_NAR_EA);
 		priv->nar |= ADM8211_NAR_ST | ADM8211_NAR_SR;
 		break;
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		priv->nar &= ~ADM8211_NAR_PR;
 		priv->nar |= ADM8211_NAR_EA | ADM8211_NAR_ST | ADM8211_NAR_SR;
 
@@ -777,7 +777,7 @@ static void adm8211_update_mode(struct ieee80211_hw *dev)
 		if (priv->pdev->revision >= ADM8211_REV_BA)
 			priv->soft_rx_crc = 1;
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		priv->nar &= ~(ADM8211_NAR_EA | ADM8211_NAR_ST);
 		priv->nar |= ADM8211_NAR_PR | ADM8211_NAR_SR;
 		break;
@@ -1410,11 +1410,11 @@ static int adm8211_add_interface(struct ieee80211_hw *dev,
 				 struct ieee80211_if_init_conf *conf)
 {
 	struct adm8211_priv *priv = dev->priv;
-	if (priv->mode != IEEE80211_IF_TYPE_MNTR)
+	if (priv->mode != NL80211_IFTYPE_MONITOR)
 		return -EOPNOTSUPP;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->mode = conf->type;
 		break;
 	default:
@@ -1437,7 +1437,7 @@ static void adm8211_remove_interface(struct ieee80211_hw *dev,
 				     struct ieee80211_if_init_conf *conf)
 {
 	struct adm8211_priv *priv = dev->priv;
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 }
 
 static int adm8211_init_rings(struct ieee80211_hw *dev)
@@ -1556,7 +1556,7 @@ static int adm8211_start(struct ieee80211_hw *dev)
 	ADM8211_CSR_WRITE(IER, ADM8211_IER_NIE | ADM8211_IER_AIE |
 			       ADM8211_IER_RCIE | ADM8211_IER_TCIE |
 			       ADM8211_IER_TDUIE | ADM8211_IER_GPTIE);
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	adm8211_update_mode(dev);
 	ADM8211_CSR_WRITE(RDR, 0);
 
@@ -1571,7 +1571,7 @@ static void adm8211_stop(struct ieee80211_hw *dev)
 {
 	struct adm8211_priv *priv = dev->priv;
 
-	priv->mode = IEEE80211_IF_TYPE_INVALID;
+	priv->mode = NL80211_IFTYPE_UNSPECIFIED;
 	priv->nar = 0;
 	ADM8211_CSR_WRITE(NAR, 0);
 	ADM8211_CSR_WRITE(IER, 0);
@@ -1896,7 +1896,7 @@ static int __devinit adm8211_probe(struct pci_dev *pdev,
 	priv->tx_power = 0x40;
 	priv->lpf_cutoff = 0xFF;
 	priv->lnags_threshold = 0xFF;
-	priv->mode = IEEE80211_IF_TYPE_INVALID;
+	priv->mode = NL80211_IFTYPE_UNSPECIFIED;
 
 	/* Power-on issue. EEPROM won't read correctly without */
 	if (pdev->revision >= ADM8211_REV_BA) {
@@ -1986,7 +1986,7 @@ static int adm8211_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct ieee80211_hw *dev = pci_get_drvdata(pdev);
 	struct adm8211_priv *priv = dev->priv;
 
-	if (priv->mode != IEEE80211_IF_TYPE_INVALID) {
+	if (priv->mode != NL80211_IFTYPE_UNSPECIFIED) {
 		ieee80211_stop_queues(dev);
 		adm8211_stop(dev);
 	}
@@ -2004,7 +2004,7 @@ static int adm8211_resume(struct pci_dev *pdev)
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 
-	if (priv->mode != IEEE80211_IF_TYPE_INVALID) {
+	if (priv->mode != NL80211_IFTYPE_UNSPECIFIED) {
 		adm8211_start(dev);
 		ieee80211_wake_queues(dev);
 	}
diff --git a/drivers/net/wireless/ath5k/ath5k.h b/drivers/net/wireless/ath5k/ath5k.h
index 4c0211798a7..20018869051 100644
--- a/drivers/net/wireless/ath5k/ath5k.h
+++ b/drivers/net/wireless/ath5k/ath5k.h
@@ -1008,7 +1008,7 @@ struct ath5k_hw {
 
 	enum ath5k_int		ah_imr;
 
-	enum ieee80211_if_types	ah_op_mode;
+	enum nl80211_iftype	ah_op_mode;
 	enum ath5k_power_mode	ah_power_mode;
 	struct ieee80211_channel ah_current_channel;
 	bool			ah_turbo;
@@ -1117,7 +1117,7 @@ extern void ath5k_hw_detach(struct ath5k_hw *ah);
 
 /* Reset Functions */
 extern int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial);
-extern int ath5k_hw_reset(struct ath5k_hw *ah, enum ieee80211_if_types op_mode, struct ieee80211_channel *channel, bool change_channel);
+extern int ath5k_hw_reset(struct ath5k_hw *ah, enum nl80211_iftype op_mode, struct ieee80211_channel *channel, bool change_channel);
 /* Power management functions */
 extern int ath5k_hw_set_power(struct ath5k_hw *ah, enum ath5k_power_mode mode, bool set_chip, u16 sleep_duration);
 
diff --git a/drivers/net/wireless/ath5k/attach.c b/drivers/net/wireless/ath5k/attach.c
index 0eb2511fe14..153c4111fab 100644
--- a/drivers/net/wireless/ath5k/attach.c
+++ b/drivers/net/wireless/ath5k/attach.c
@@ -124,7 +124,7 @@ struct ath5k_hw *ath5k_hw_attach(struct ath5k_softc *sc, u8 mac_version)
 	/*
 	 * HW information
 	 */
-	ah->ah_op_mode = IEEE80211_IF_TYPE_STA;
+	ah->ah_op_mode = NL80211_IFTYPE_STATION;
 	ah->ah_radar.r_enabled = AR5K_TUNE_RADAR_ALERT;
 	ah->ah_turbo = false;
 	ah->ah_txpower.txp_tpc = AR5K_TUNE_TPC_TXPOWER;
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 85260c39aa2..e09ed2ce675 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -507,7 +507,7 @@ ath5k_pci_probe(struct pci_dev *pdev,
 
 	sc->iobase = mem; /* So we can unmap it on detach */
 	sc->cachelsz = csz * sizeof(u32); /* convert to bytes */
-	sc->opmode = IEEE80211_IF_TYPE_STA;
+	sc->opmode = NL80211_IFTYPE_STATION;
 	mutex_init(&sc->lock);
 	spin_lock_init(&sc->rxbuflock);
 	spin_lock_init(&sc->txbuflock);
@@ -1377,8 +1377,8 @@ ath5k_beaconq_config(struct ath5k_softc *sc)
 	ret = ath5k_hw_get_tx_queueprops(ah, sc->bhalq, &qi);
 	if (ret)
 		return ret;
-	if (sc->opmode == IEEE80211_IF_TYPE_AP ||
-		sc->opmode == IEEE80211_IF_TYPE_MESH_POINT) {
+	if (sc->opmode == NL80211_IFTYPE_AP ||
+		sc->opmode == NL80211_IFTYPE_MESH_POINT) {
 		/*
 		 * Always burst out beacon and CAB traffic
 		 * (aifs = cwmin = cwmax = 0)
@@ -1386,7 +1386,7 @@ ath5k_beaconq_config(struct ath5k_softc *sc)
 		qi.tqi_aifs = 0;
 		qi.tqi_cw_min = 0;
 		qi.tqi_cw_max = 0;
-	} else if (sc->opmode == IEEE80211_IF_TYPE_IBSS) {
+	} else if (sc->opmode == NL80211_IFTYPE_ADHOC) {
 		/*
 		 * Adhoc mode; backoff between 0 and (2 * cw_min).
 		 */
@@ -1714,7 +1714,7 @@ ath5k_tasklet_rx(unsigned long data)
 			/* let crypto-error packets fall through in MNTR */
 			if ((rs.rs_status &
 				~(AR5K_RXERR_DECRYPT|AR5K_RXERR_MIC)) ||
-					sc->opmode != IEEE80211_IF_TYPE_MNTR)
+					sc->opmode != NL80211_IFTYPE_MONITOR)
 				goto next;
 		}
 accept:
@@ -1777,7 +1777,7 @@ accept:
 		ath5k_debug_dump_skb(sc, skb, "RX  ", 0);
 
 		/* check beacons in IBSS mode */
-		if (sc->opmode == IEEE80211_IF_TYPE_IBSS)
+		if (sc->opmode == NL80211_IFTYPE_ADHOC)
 			ath5k_check_ibss_tsf(sc, skb, &rxs);
 
 		__ieee80211_rx(sc->hw, skb, &rxs);
@@ -1892,7 +1892,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 	ds = bf->desc;
 
 	flags = AR5K_TXDESC_NOACK;
-	if (sc->opmode == IEEE80211_IF_TYPE_IBSS && ath5k_hw_hasveol(ah)) {
+	if (sc->opmode == NL80211_IFTYPE_ADHOC && ath5k_hw_hasveol(ah)) {
 		ds->ds_link = bf->daddr;	/* self-linked */
 		flags |= AR5K_TXDESC_VEOL;
 		/*
@@ -1941,8 +1941,8 @@ ath5k_beacon_send(struct ath5k_softc *sc)
 
 	ATH5K_DBG_UNLIMIT(sc, ATH5K_DEBUG_BEACON, "in beacon_send\n");
 
-	if (unlikely(bf->skb == NULL || sc->opmode == IEEE80211_IF_TYPE_STA ||
-			sc->opmode == IEEE80211_IF_TYPE_MNTR)) {
+	if (unlikely(bf->skb == NULL || sc->opmode == NL80211_IFTYPE_STATION ||
+			sc->opmode == NL80211_IFTYPE_MONITOR)) {
 		ATH5K_WARN(sc, "bf=%p bf_skb=%p\n", bf, bf ? bf->skb : NULL);
 		return;
 	}
@@ -2116,9 +2116,9 @@ ath5k_beacon_config(struct ath5k_softc *sc)
 	sc->bmisscount = 0;
 	sc->imask &= ~(AR5K_INT_BMISS | AR5K_INT_SWBA);
 
-	if (sc->opmode == IEEE80211_IF_TYPE_STA) {
+	if (sc->opmode == NL80211_IFTYPE_STATION) {
 		sc->imask |= AR5K_INT_BMISS;
-	} else if (sc->opmode == IEEE80211_IF_TYPE_IBSS) {
+	} else if (sc->opmode == NL80211_IFTYPE_ADHOC) {
 		/*
 		 * In IBSS mode we use a self-linked tx descriptor and let the
 		 * hardware send the beacons automatically. We have to load it
@@ -2323,7 +2323,7 @@ ath5k_intr(int irq, void *dev_id)
 				* transmission time) in order to detect wether
 				* automatic TSF updates happened.
 				*/
-				if (sc->opmode == IEEE80211_IF_TYPE_IBSS) {
+				if (sc->opmode == NL80211_IFTYPE_ADHOC) {
 					 /* XXX: only if VEOL suppported */
 					u64 tsf = ath5k_hw_get_tsf64(ah);
 					sc->nexttbtt += sc->bintval;
@@ -2553,7 +2553,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	ath5k_debug_dump_skb(sc, skb, "TX  ", 1);
 
-	if (sc->opmode == IEEE80211_IF_TYPE_MNTR)
+	if (sc->opmode == NL80211_IFTYPE_MONITOR)
 		ATH5K_DBG(sc, ATH5K_DEBUG_XMIT, "tx in monitor (scan?)\n");
 
 	/*
@@ -2688,9 +2688,9 @@ static int ath5k_add_interface(struct ieee80211_hw *hw,
 	sc->vif = conf->vif;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_MONITOR:
 		sc->opmode = conf->type;
 		break;
 	default:
@@ -2761,7 +2761,7 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	}
 
 	if (conf->changed & IEEE80211_IFCC_BEACON &&
-	    vif->type == IEEE80211_IF_TYPE_IBSS) {
+	    vif->type == NL80211_IFTYPE_ADHOC) {
 		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
 		if (!beacon) {
 			ret = -ENOMEM;
@@ -2880,17 +2880,17 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw,
 
 	/* XXX move these to mac80211, and add a beacon IFF flag to mac80211 */
 
-	if (sc->opmode == IEEE80211_IF_TYPE_MNTR)
+	if (sc->opmode == NL80211_IFTYPE_MONITOR)
 		rfilt |= AR5K_RX_FILTER_CONTROL | AR5K_RX_FILTER_BEACON |
 			AR5K_RX_FILTER_PROBEREQ | AR5K_RX_FILTER_PROM;
-	if (sc->opmode != IEEE80211_IF_TYPE_STA)
+	if (sc->opmode != NL80211_IFTYPE_STATION)
 		rfilt |= AR5K_RX_FILTER_PROBEREQ;
-	if (sc->opmode != IEEE80211_IF_TYPE_AP &&
-		sc->opmode != IEEE80211_IF_TYPE_MESH_POINT &&
+	if (sc->opmode != NL80211_IFTYPE_AP &&
+		sc->opmode != NL80211_IFTYPE_MESH_POINT &&
 		test_bit(ATH_STAT_PROMISC, sc->status))
 		rfilt |= AR5K_RX_FILTER_PROM;
-	if (sc->opmode == IEEE80211_IF_TYPE_STA ||
-		sc->opmode == IEEE80211_IF_TYPE_IBSS) {
+	if (sc->opmode == NL80211_IFTYPE_STATION ||
+		sc->opmode == NL80211_IFTYPE_ADHOC) {
 		rfilt |= AR5K_RX_FILTER_BEACON;
 	}
 
@@ -2995,7 +2995,7 @@ ath5k_reset_tsf(struct ieee80211_hw *hw)
 	 * in IBSS mode we need to update the beacon timers too.
 	 * this will also reset the TSF if we call it with 0
 	 */
-	if (sc->opmode == IEEE80211_IF_TYPE_IBSS)
+	if (sc->opmode == NL80211_IFTYPE_ADHOC)
 		ath5k_beacon_update_timers(sc, 0);
 	else
 		ath5k_hw_reset_tsf(sc->ah);
@@ -3010,7 +3010,7 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	ath5k_debug_dump_skb(sc, skb, "BC  ", 1);
 
-	if (sc->opmode != IEEE80211_IF_TYPE_IBSS) {
+	if (sc->opmode != NL80211_IFTYPE_ADHOC) {
 		ret = -EIO;
 		goto end;
 	}
diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h
index 1549b63d613..9d0b728928e 100644
--- a/drivers/net/wireless/ath5k/base.h
+++ b/drivers/net/wireless/ath5k/base.h
@@ -113,7 +113,7 @@ struct ath5k_softc {
 	struct ieee80211_channel channels[ATH_CHAN_MAX];
 	struct ieee80211_rate	rates[IEEE80211_NUM_BANDS][AR5K_MAX_RATES];
 	u8			rate_idx[IEEE80211_NUM_BANDS][AR5K_MAX_RATES];
-	enum ieee80211_if_types	opmode;
+	enum nl80211_iftype	opmode;
 	struct ath5k_hw		*ah;		/* Atheros HW */
 
 	struct ieee80211_supported_band		*curband;
diff --git a/drivers/net/wireless/ath5k/pcu.c b/drivers/net/wireless/ath5k/pcu.c
index 5a896d1e2a2..c77cee2a558 100644
--- a/drivers/net/wireless/ath5k/pcu.c
+++ b/drivers/net/wireless/ath5k/pcu.c
@@ -52,26 +52,26 @@ int ath5k_hw_set_opmode(struct ath5k_hw *ah)
 	ATH5K_TRACE(ah->ah_sc);
 
 	switch (ah->ah_op_mode) {
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		pcu_reg |= AR5K_STA_ID1_ADHOC | AR5K_STA_ID1_DESC_ANTENNA |
 			(ah->ah_version == AR5K_AR5210 ?
 				AR5K_STA_ID1_NO_PSPOLL : 0);
 		beacon_reg |= AR5K_BCR_ADHOC;
 		break;
 
-	case IEEE80211_IF_TYPE_AP:
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_MESH_POINT:
 		pcu_reg |= AR5K_STA_ID1_AP | AR5K_STA_ID1_RTS_DEF_ANTENNA |
 			(ah->ah_version == AR5K_AR5210 ?
 				AR5K_STA_ID1_NO_PSPOLL : 0);
 		beacon_reg |= AR5K_BCR_AP;
 		break;
 
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		pcu_reg |= AR5K_STA_ID1_DEFAULT_ANTENNA |
 			(ah->ah_version == AR5K_AR5210 ?
 				AR5K_STA_ID1_PWR_SV : 0);
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		pcu_reg |= AR5K_STA_ID1_DEFAULT_ANTENNA |
 			(ah->ah_version == AR5K_AR5210 ?
 				AR5K_STA_ID1_NO_PSPOLL : 0);
@@ -649,7 +649,7 @@ void ath5k_hw_init_beacon(struct ath5k_hw *ah, u32 next_beacon, u32 interval)
 	 * Set the additional timers by mode
 	 */
 	switch (ah->ah_op_mode) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		if (ah->ah_version == AR5K_AR5210) {
 			timer1 = 0xffffffff;
 			timer2 = 0xffffffff;
diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c
index d260fba0180..f5c3de890cd 100644
--- a/drivers/net/wireless/ath5k/reset.c
+++ b/drivers/net/wireless/ath5k/reset.c
@@ -399,7 +399,7 @@ int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial)
 /*
  * Main reset function
  */
-int ath5k_hw_reset(struct ath5k_hw *ah, enum ieee80211_if_types op_mode,
+int ath5k_hw_reset(struct ath5k_hw *ah, enum nl80211_iftype op_mode,
 	struct ieee80211_channel *channel, bool change_channel)
 {
 	struct ath5k_eeprom_info *ee = &ah->ah_capabilities.cap_eeprom;
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index b493dff5643..2a6e089062f 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -140,7 +140,7 @@ static int ath_key_config(struct ath_softc *sc,
 	struct ath9k_keyval hk;
 	const u8 *mac = NULL;
 	int ret = 0;
-	enum ieee80211_if_types opmode;
+	enum nl80211_iftype opmode;
 
 	memset(&hk, 0, sizeof(hk));
 
@@ -179,14 +179,14 @@ static int ath_key_config(struct ath_softc *sc,
 	 */
 	if (is_broadcast_ether_addr(addr)) {
 		switch (opmode) {
-		case IEEE80211_IF_TYPE_STA:
+		case NL80211_IFTYPE_STATION:
 			/* default key:  could be group WPA key
 			 * or could be static WEP key */
 			mac = NULL;
 			break;
-		case IEEE80211_IF_TYPE_IBSS:
+		case NL80211_IFTYPE_ADHOC:
 			break;
-		case IEEE80211_IF_TYPE_AP:
+		case NL80211_IFTYPE_AP:
 			break;
 		default:
 			ASSERT(0);
@@ -1147,13 +1147,13 @@ static int ath9k_add_interface(struct ieee80211_hw *hw,
 		return -ENOBUFS;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		ic_opmode = ATH9K_M_STA;
 		break;
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		ic_opmode = ATH9K_M_IBSS;
 		break;
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		ic_opmode = ATH9K_M_HOSTAP;
 		break;
 	default:
@@ -1275,7 +1275,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw,
 
 	/* TODO: Need to decide which hw opmode to use for multi-interface
 	 * cases */
-	if (vif->type == IEEE80211_IF_TYPE_AP &&
+	if (vif->type == NL80211_IFTYPE_AP &&
 	    ah->ah_opmode != ATH9K_M_HOSTAP) {
 		ah->ah_opmode = ATH9K_M_HOSTAP;
 		ath9k_hw_setopmode(ah);
@@ -1287,8 +1287,8 @@ static int ath9k_config_interface(struct ieee80211_hw *hw,
 	if ((conf->changed & IEEE80211_IFCC_BSSID) &&
 	    !is_zero_ether_addr(conf->bssid)) {
 		switch (vif->type) {
-		case IEEE80211_IF_TYPE_STA:
-		case IEEE80211_IF_TYPE_IBSS:
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_ADHOC:
 			/* Update ratectrl about the new state */
 			ath_rate_newstate(sc, avp);
 
@@ -1333,8 +1333,8 @@ static int ath9k_config_interface(struct ieee80211_hw *hw,
 	}
 
 	if ((conf->changed & IEEE80211_IFCC_BEACON) &&
-	    ((vif->type == IEEE80211_IF_TYPE_IBSS) ||
-	     (vif->type == IEEE80211_IF_TYPE_AP))) {
+	    ((vif->type == NL80211_IFTYPE_ADHOC) ||
+	     (vif->type == NL80211_IFTYPE_AP))) {
 		/*
 		 * Allocate and setup the beacon frame.
 		 *
@@ -1353,7 +1353,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw,
 	}
 
 	/* Check for WLAN_CAPABILITY_PRIVACY ? */
-	if ((avp->av_opmode != IEEE80211_IF_TYPE_STA)) {
+	if ((avp->av_opmode != NL80211_IFTYPE_STATION)) {
 		for (i = 0; i < IEEE80211_WEP_NKID; i++)
 			if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i))
 				ath9k_hw_keysetmac(sc->sc_ah,
@@ -1362,7 +1362,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw,
 	}
 
 	/* Only legacy IBSS for now */
-	if (vif->type == IEEE80211_IF_TYPE_IBSS)
+	if (vif->type == NL80211_IFTYPE_ADHOC)
 		ath_update_chainmask(sc, 0);
 
 	return 0;
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index d4a356b1163..df7a1e7f4a5 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1244,13 +1244,13 @@ generate_new:
 
 static void handle_irq_tbtt_indication(struct b43_wldev *dev)
 {
-	if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) {
+	if (b43_is_mode(dev->wl, NL80211_IFTYPE_AP)) {
 		///TODO: PS TBTT
 	} else {
 		if (1 /*FIXME: the last PSpoll frame was sent successfully */ )
 			b43_power_saving_ctl_bits(dev, 0);
 	}
-	if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS))
+	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC))
 		dev->dfq_valid = 1;
 }
 
@@ -1599,8 +1599,8 @@ static void handle_irq_beacon(struct b43_wldev *dev)
 	struct b43_wl *wl = dev->wl;
 	u32 cmd, beacon0_valid, beacon1_valid;
 
-	if (!b43_is_mode(wl, IEEE80211_IF_TYPE_AP) &&
-	    !b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT))
+	if (!b43_is_mode(wl, NL80211_IFTYPE_AP) &&
+	    !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT))
 		return;
 
 	/* This is the bottom half of the asynchronous beacon update. */
@@ -2568,10 +2568,10 @@ static void b43_adjust_opmode(struct b43_wldev *dev)
 	ctl &= ~B43_MACCTL_BEACPROMISC;
 	ctl |= B43_MACCTL_INFRA;
 
-	if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) ||
-	    b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT))
+	if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
+	    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT))
 		ctl |= B43_MACCTL_AP;
-	else if (b43_is_mode(wl, IEEE80211_IF_TYPE_IBSS))
+	else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC))
 		ctl &= ~B43_MACCTL_INFRA;
 
 	if (wl->filter_flags & FIF_CONTROL)
@@ -3406,8 +3406,8 @@ static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
 		phy->ops->set_rx_antenna(dev, antenna);
 
 	/* Update templates for AP/mesh mode. */
-	if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) ||
-	    b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT))
+	if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
+	    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT))
 		b43_set_beacon_int(dev, conf->beacon_int);
 
 	if (!!conf->radio_enabled != phy->radio_on) {
@@ -3595,14 +3595,14 @@ static int b43_op_config_interface(struct ieee80211_hw *hw,
 	else
 		memset(wl->bssid, 0, ETH_ALEN);
 	if (b43_status(dev) >= B43_STAT_INITIALIZED) {
-		if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) ||
-		    b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) {
+		if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
+		    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) {
 			B43_WARN_ON(vif->type != wl->if_type);
 			if (conf->changed & IEEE80211_IFCC_SSID)
 				b43_set_ssid(dev, conf->ssid, conf->ssid_len);
 			if (conf->changed & IEEE80211_IFCC_BEACON)
 				b43_update_templates(wl);
-		} else if (b43_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) {
+		} else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
 			if (conf->changed & IEEE80211_IFCC_BEACON)
 				b43_update_templates(wl);
 		}
@@ -3903,7 +3903,7 @@ static void b43_set_synth_pu_delay(struct b43_wldev *dev, bool idle)
 		pu_delay = 3700;
 	else
 		pu_delay = 1050;
-	if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS) || idle)
+	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC) || idle)
 		pu_delay = 500;
 	if ((dev->phy.radio_ver == 0x2050) && (dev->phy.radio_rev == 8))
 		pu_delay = max(pu_delay, (u16)2400);
@@ -3917,7 +3917,7 @@ static void b43_set_pretbtt(struct b43_wldev *dev)
 	u16 pretbtt;
 
 	/* The time value is in microseconds. */
-	if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS)) {
+	if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) {
 		pretbtt = 2;
 	} else {
 		if (dev->phy.type == B43_PHYTYPE_A)
@@ -4084,11 +4084,11 @@ static int b43_op_add_interface(struct ieee80211_hw *hw,
 
 	/* TODO: allow WDS/AP devices to coexist */
 
-	if (conf->type != IEEE80211_IF_TYPE_AP &&
-	    conf->type != IEEE80211_IF_TYPE_MESH_POINT &&
-	    conf->type != IEEE80211_IF_TYPE_STA &&
-	    conf->type != IEEE80211_IF_TYPE_WDS &&
-	    conf->type != IEEE80211_IF_TYPE_IBSS)
+	if (conf->type != NL80211_IFTYPE_AP &&
+	    conf->type != NL80211_IFTYPE_MESH_POINT &&
+	    conf->type != NL80211_IFTYPE_STATION &&
+	    conf->type != NL80211_IFTYPE_WDS &&
+	    conf->type != NL80211_IFTYPE_ADHOC)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&wl->mutex);
diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c
index 4d4345d76ab..af37abccccb 100644
--- a/drivers/net/wireless/b43/phy_common.c
+++ b/drivers/net/wireless/b43/phy_common.c
@@ -162,7 +162,7 @@ void b43_phy_lock(struct b43_wldev *dev)
 #endif
 	B43_WARN_ON(dev->dev->id.revision < 3);
 
-	if (!b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP))
+	if (!b43_is_mode(dev->wl, NL80211_IFTYPE_AP))
 		b43_power_saving_ctl_bits(dev, B43_PS_AWAKE);
 }
 
@@ -174,7 +174,7 @@ void b43_phy_unlock(struct b43_wldev *dev)
 #endif
 	B43_WARN_ON(dev->dev->id.revision < 3);
 
-	if (!b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP))
+	if (!b43_is_mode(dev->wl, NL80211_IFTYPE_AP))
 		b43_power_saving_ctl_bits(dev, 0);
 }
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 68f63f5093a..6e425410c99 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -888,13 +888,13 @@ generate_new:
 
 static void handle_irq_tbtt_indication(struct b43legacy_wldev *dev)
 {
-	if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) {
+	if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP)) {
 		/* TODO: PS TBTT */
 	} else {
 		if (1/*FIXME: the last PSpoll frame was sent successfully */)
 			b43legacy_power_saving_ctl_bits(dev, -1, -1);
 	}
-	if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS))
+	if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC))
 		dev->dfq_valid = 1;
 }
 
@@ -1201,7 +1201,7 @@ static void handle_irq_beacon(struct b43legacy_wldev *dev)
 	struct b43legacy_wl *wl = dev->wl;
 	u32 cmd;
 
-	if (!b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP))
+	if (!b43legacy_is_mode(wl, NL80211_IFTYPE_AP))
 		return;
 
 	/* This is the bottom half of the asynchronous beacon update. */
@@ -1936,9 +1936,9 @@ static void b43legacy_adjust_opmode(struct b43legacy_wldev *dev)
 	ctl &= ~B43legacy_MACCTL_BEACPROMISC;
 	ctl |= B43legacy_MACCTL_INFRA;
 
-	if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP))
+	if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP))
 		ctl |= B43legacy_MACCTL_AP;
-	else if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_IBSS))
+	else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC))
 		ctl &= ~B43legacy_MACCTL_INFRA;
 
 	if (wl->filter_flags & FIF_CONTROL)
@@ -2646,7 +2646,7 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 	b43legacy_mgmtframe_txantenna(dev, antenna_tx);
 
 	/* Update templates for AP mode. */
-	if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP))
+	if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP))
 		b43legacy_set_beacon_int(dev, conf->beacon_int);
 
 
@@ -2733,12 +2733,12 @@ static int b43legacy_op_config_interface(struct ieee80211_hw *hw,
 	else
 		memset(wl->bssid, 0, ETH_ALEN);
 	if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) {
-		if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) {
-			B43legacy_WARN_ON(vif->type != IEEE80211_IF_TYPE_AP);
+		if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) {
+			B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP);
 			b43legacy_set_ssid(dev, conf->ssid, conf->ssid_len);
 			if (conf->changed & IEEE80211_IFCC_BEACON)
 				b43legacy_update_templates(wl);
-		} else if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) {
+		} else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
 			if (conf->changed & IEEE80211_IFCC_BEACON)
 				b43legacy_update_templates(wl);
 		}
@@ -3020,7 +3020,7 @@ static void b43legacy_set_synth_pu_delay(struct b43legacy_wldev *dev,
 					  bool idle) {
 	u16 pu_delay = 1050;
 
-	if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS) || idle)
+	if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC) || idle)
 		pu_delay = 500;
 	if ((dev->phy.radio_ver == 0x2050) && (dev->phy.radio_rev == 8))
 		pu_delay = max(pu_delay, (u16)2400);
@@ -3035,7 +3035,7 @@ static void b43legacy_set_pretbtt(struct b43legacy_wldev *dev)
 	u16 pretbtt;
 
 	/* The time value is in microseconds. */
-	if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS))
+	if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC))
 		pretbtt = 2;
 	else
 		pretbtt = 250;
@@ -3259,10 +3259,10 @@ static int b43legacy_op_add_interface(struct ieee80211_hw *hw,
 
 	/* TODO: allow WDS/AP devices to coexist */
 
-	if (conf->type != IEEE80211_IF_TYPE_AP &&
-	    conf->type != IEEE80211_IF_TYPE_STA &&
-	    conf->type != IEEE80211_IF_TYPE_WDS &&
-	    conf->type != IEEE80211_IF_TYPE_IBSS)
+	if (conf->type != NL80211_IFTYPE_AP &&
+	    conf->type != NL80211_IFTYPE_STATION &&
+	    conf->type != NL80211_IFTYPE_WDS &&
+	    conf->type != NL80211_IFTYPE_ADHOC)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&wl->mutex);
diff --git a/drivers/net/wireless/b43legacy/phy.c b/drivers/net/wireless/b43legacy/phy.c
index bed67a54250..4c9442b16f3 100644
--- a/drivers/net/wireless/b43legacy/phy.c
+++ b/drivers/net/wireless/b43legacy/phy.c
@@ -103,7 +103,7 @@ void b43legacy_phy_lock(struct b43legacy_wldev *dev)
 	if (dev->dev->id.revision < 3) {
 		b43legacy_mac_suspend(dev);
 	} else {
-		if (!b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP))
+		if (!b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP))
 			b43legacy_power_saving_ctl_bits(dev, -1, 1);
 	}
 }
@@ -118,7 +118,7 @@ void b43legacy_phy_unlock(struct b43legacy_wldev *dev)
 	if (dev->dev->id.revision < 3) {
 		b43legacy_mac_enable(dev);
 	} else {
-		if (!b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP))
+		if (!b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP))
 			b43legacy_power_saving_ctl_bits(dev, -1, -1);
 	}
 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index 10c64bdb314..da23c927380 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -682,7 +682,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
 	rs_sta = (void *)sta->rate_ctrl_priv;
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !rs_sta->ibss_sta_added) {
 		u8 sta_id = iwl3945_hw_find_station(priv, hdr->addr1);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 3d100e89824..7ca5627cc07 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -520,10 +520,10 @@ static int iwl3945_is_network_packet(struct iwl3945_priv *priv,
 	/* Filter incoming packets to determine if they are targeted toward
 	 * this network, discarding packets coming from ourselves */
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_IBSS: /* Header: Dest. | Source    | BSSID */
+	case NL80211_IFTYPE_ADHOC: /* Header: Dest. | Source    | BSSID */
 		/* packets to our IBSS update information */
 		return !compare_ether_addr(header->addr3, priv->bssid);
-	case IEEE80211_IF_TYPE_STA: /* Header: Dest. | AP{BSSID} | Source */
+	case NL80211_IFTYPE_STATION: /* Header: Dest. | AP{BSSID} | Source */
 		/* packets to our IBSS update information */
 		return !compare_ether_addr(header->addr2, priv->bssid);
 	default:
@@ -807,7 +807,7 @@ void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv,
 
 	priv->stations[sta_id].current_rate.rate_n_flags = rate;
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    (sta_id != priv->hw_setting.bcast_sta_id) &&
 		(sta_id != IWL_MULTICAST_ID))
 		priv->stations[IWL_STA_ID].current_rate.rate_n_flags = rate;
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h
index 9bbbc9d7c0e..2a4933b5fb6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@ -851,7 +851,7 @@ struct iwl3945_priv {
 	/* eeprom */
 	struct iwl3945_eeprom eeprom;
 
-	enum ieee80211_if_types iw_mode;
+	enum nl80211_iftype iw_mode;
 
 	struct sk_buff *ibss_beacon;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index c293e5b6cbb..700da67ac28 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -821,7 +821,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 
 	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !lq_sta->ibss_sta_added)
 		goto out;
 
@@ -2093,7 +2093,7 @@ static void rs_initialize_lq(struct iwl_priv *priv,
 	i = sta->last_txrate_idx;
 
 	if ((lq_sta->lq.sta_id == 0xff) &&
-	    (priv->iw_mode == IEEE80211_IF_TYPE_IBSS))
+	    (priv->iw_mode == NL80211_IFTYPE_ADHOC))
 		goto out;
 
 	valid_tx_ant = priv->hw_params.valid_tx_ant;
@@ -2163,7 +2163,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
 	i = sta->last_txrate_idx;
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !lq_sta->ibss_sta_added) {
 		u8 sta_id = iwl_find_station(priv, hdr->addr1);
 		DECLARE_MAC_BUF(mac);
@@ -2243,7 +2243,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	 * after assoc.. */
 
 	lq_sta->ibss_sta_added = 0;
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		u8 sta_id = iwl_find_station(priv, sta->addr);
 		DECLARE_MAC_BUF(mac);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 31ea2885889..e8db33bf5e5 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -337,7 +337,7 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv)
 	/* If we have set the ASSOC_MSK and we are in BSS mode then
 	 * add the IWL_AP_ID to the station rate table */
 	if (new_assoc) {
-		if (priv->iw_mode == IEEE80211_IF_TYPE_STA) {
+		if (priv->iw_mode == NL80211_IFTYPE_STATION) {
 			ret = iwl_rxon_add_station(priv,
 					   priv->active_rxon.bssid_addr, 1);
 			if (ret == IWL_INVALID_STATION) {
@@ -448,8 +448,8 @@ static unsigned int iwl_fill_beacon_frame(struct iwl_priv *priv,
 					  const u8 *dest, int left)
 {
 	if (!iwl_is_associated(priv) || !priv->ibss_beacon ||
-	    ((priv->iw_mode != IEEE80211_IF_TYPE_IBSS) &&
-	     (priv->iw_mode != IEEE80211_IF_TYPE_AP)))
+	    ((priv->iw_mode != NL80211_IFTYPE_ADHOC) &&
+	     (priv->iw_mode != NL80211_IFTYPE_AP)))
 		return 0;
 
 	if (priv->ibss_beacon->len > left)
@@ -672,7 +672,7 @@ static void iwl4965_setup_rxon_timing(struct iwl_priv *priv)
 	beacon_int = priv->beacon_int;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_STA) {
+	if (priv->iw_mode == NL80211_IFTYPE_STATION) {
 		if (beacon_int == 0) {
 			priv->rxon_timing.beacon_interval = cpu_to_le16(100);
 			priv->rxon_timing.beacon_init_val = cpu_to_le32(102400);
@@ -721,7 +721,7 @@ static void iwl_set_flags_for_band(struct iwl_priv *priv,
 		else
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+		if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
 		priv->staging_rxon.flags |= RXON_FLG_BAND_24G_MSK;
@@ -740,23 +740,23 @@ static void iwl4965_connection_init_rx_config(struct iwl_priv *priv)
 	memset(&priv->staging_rxon, 0, sizeof(priv->staging_rxon));
 
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_AP;
 		break;
 
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_ESS;
 		priv->staging_rxon.filter_flags = RXON_FILTER_ACCEPT_GRP_MSK;
 		break;
 
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_IBSS;
 		priv->staging_rxon.flags = RXON_FLG_SHORT_PREAMBLE_MSK;
 		priv->staging_rxon.filter_flags = RXON_FILTER_BCON_AWARE_MSK |
 						  RXON_FILTER_ACCEPT_GRP_MSK;
 		break;
 
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_SNIFFER;
 		priv->staging_rxon.filter_flags = RXON_FILTER_PROMISC_MSK |
 		    RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_ACCEPT_GRP_MSK;
@@ -785,7 +785,7 @@ static void iwl4965_connection_init_rx_config(struct iwl_priv *priv)
 	 * in some case A channels are all non IBSS
 	 * in this case force B/G channel
 	 */
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !(is_channel_ibss(ch_info)))
 		ch_info = &priv->channel_info[0];
 
@@ -1182,7 +1182,7 @@ static void iwl4965_rx_beacon_notif(struct iwl_priv *priv,
 		le32_to_cpu(beacon->low_tsf), rate);
 #endif
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
 	    (!test_bit(STATUS_EXIT_PENDING, &priv->status)))
 		queue_work(priv->workqueue, &priv->beacon_update);
 }
@@ -2388,7 +2388,7 @@ static void iwl4965_bg_set_monitor(struct work_struct *work)
 
 	mutex_lock(&priv->mutex);
 
-	ret = iwl4965_set_mode(priv, IEEE80211_IF_TYPE_MNTR);
+	ret = iwl4965_set_mode(priv, NL80211_IFTYPE_MONITOR);
 
 	if (ret) {
 		if (ret == -EAGAIN)
@@ -2469,7 +2469,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv)
 	DECLARE_MAC_BUF(mac);
 	unsigned long flags;
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		IWL_ERROR("%s Should not be called in AP mode\n", __func__);
 		return;
 	}
@@ -2524,7 +2524,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv)
 		else
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+		if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
 	}
@@ -2532,10 +2532,10 @@ static void iwl4965_post_associate(struct iwl_priv *priv)
 	iwl4965_commit_rxon(priv);
 
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		break;
 
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 
 		/* assume default assoc id */
 		priv->assoc_id = 1;
@@ -2551,7 +2551,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv)
 		break;
 	}
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 		priv->assoc_station_added = 1;
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -2828,7 +2828,7 @@ static int iwl4965_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *co
 		goto out;
 	}
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS &&
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC &&
 	    !is_channel_ibss(ch_info)) {
 		IWL_ERROR("channel %d in band %d not IBSS channel\n",
 			conf->channel->hw_value, conf->channel->band);
@@ -2943,7 +2943,7 @@ static void iwl4965_config_ap(struct iwl_priv *priv)
 				priv->staging_rxon.flags &=
 					~RXON_FLG_SHORT_SLOT_MSK;
 
-			if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+			if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 				priv->staging_rxon.flags &=
 					~RXON_FLG_SHORT_SLOT_MSK;
 		}
@@ -2982,7 +2982,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 		return 0;
 	}
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS &&
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC &&
 	    conf->changed & IEEE80211_IFCC_BEACON) {
 		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
 		if (!beacon)
@@ -2992,7 +2992,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 			return rc;
 	}
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
 	    (!conf->ssid_len)) {
 		IWL_DEBUG_MAC80211
 		    ("Leaving in AP mode because HostAPD is not ready.\n");
@@ -3015,7 +3015,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 	    !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) {
  */
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		if (!conf->bssid) {
 			conf->bssid = priv->mac_addr;
 			memcpy(priv->bssid, priv->mac_addr, ETH_ALEN);
@@ -3050,11 +3050,11 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 		 * to verify) - jpk */
 		memcpy(priv->bssid, conf->bssid, ETH_ALEN);
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_AP)
+		if (priv->iw_mode == NL80211_IFTYPE_AP)
 			iwl4965_config_ap(priv);
 		else {
 			rc = iwl4965_commit_rxon(priv);
-			if ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && rc)
+			if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc)
 				iwl_rxon_add_station(
 					priv, priv->active_rxon.bssid_addr, 1);
 		}
@@ -3090,7 +3090,7 @@ static void iwl4965_configure_filter(struct ieee80211_hw *hw,
 
 	if (changed_flags & (*total_flags) & FIF_OTHER_BSS) {
 		IWL_DEBUG_MAC80211("Enter: type %d (0x%x, 0x%x)\n",
-				   IEEE80211_IF_TYPE_MNTR,
+				   NL80211_IFTYPE_MONITOR,
 				   changed_flags, *total_flags);
 		/* queue work 'cuz mac80211 is holding a lock which
 		 * prevents us from issuing (synchronous) f/w cmds */
@@ -3204,7 +3204,7 @@ static int iwl_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t ssid_len)
 		goto out_unlock;
 	}
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {	/* APs don't scan */
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {	/* APs don't scan */
 		ret = -EIO;
 		IWL_ERROR("ERROR: APs don't scan\n");
 		goto out_unlock;
@@ -3329,7 +3329,7 @@ static int iwl4965_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	 * in 1X mode.
 	 * In legacy wep mode, we use another host command to the uCode */
 	if (key->alg == ALG_WEP && sta_id == priv->hw_params.bcast_sta_id &&
-		priv->iw_mode != IEEE80211_IF_TYPE_AP) {
+		priv->iw_mode != NL80211_IFTYPE_AP) {
 		if (cmd == SET_KEY)
 			is_default_wep_key = !priv->key_mapping_key;
 		else
@@ -3400,7 +3400,7 @@ static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 	priv->qos_data.def_qos_parm.ac[q].reserved1 = 0;
 	priv->qos_data.qos_active = 1;
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP)
+	if (priv->iw_mode == NL80211_IFTYPE_AP)
 		iwl_activate_qos(priv, 1);
 	else if (priv->assoc_id && iwl_is_associated(priv))
 		iwl_activate_qos(priv, 0);
@@ -3518,7 +3518,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw)
 
 	priv->beacon_int = priv->hw->conf.beacon_int;
 	priv->timestamp = 0;
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_STA))
+	if ((priv->iw_mode == NL80211_IFTYPE_STATION))
 		priv->beacon_int = 0;
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -3532,7 +3532,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw)
 	/* we are restarting association process
 	 * clear RXON_FILTER_ASSOC_MSK bit
 	 */
-	if (priv->iw_mode != IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode != NL80211_IFTYPE_AP) {
 		iwl_scan_cancel_timeout(priv, 100);
 		priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK;
 		iwl4965_commit_rxon(priv);
@@ -3541,7 +3541,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw)
 	iwl_power_update_mode(priv, 0);
 
 	/* Per mac80211.h: This is only used in IBSS mode... */
-	if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode != NL80211_IFTYPE_ADHOC) {
 
 		/* switch to CAM during association period.
 		 * the ucode will block any association/authentication
@@ -3580,7 +3580,7 @@ static int iwl4965_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *sk
 		return -EIO;
 	}
 
-	if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode != NL80211_IFTYPE_ADHOC) {
 		IWL_DEBUG_MAC80211("leave - not IBSS\n");
 		mutex_unlock(&priv->mutex);
 		return -EIO;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 36d08b0eec4..d80184ee911 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -306,14 +306,14 @@ void iwl_reset_qos(struct iwl_priv *priv)
 	spin_lock_irqsave(&priv->lock, flags);
 	priv->qos_data.qos_active = 0;
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC) {
 		if (priv->qos_data.qos_enable)
 			priv->qos_data.qos_active = 1;
 		if (!(priv->active_rate & 0xfff0)) {
 			cw_min = 31;
 			is_legacy = 1;
 		}
-	} else if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	} else if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		if (priv->qos_data.qos_enable)
 			priv->qos_data.qos_active = 1;
 	} else if (!(priv->staging_rxon.flags & RXON_FLG_SHORT_SLOT_MSK)) {
@@ -932,7 +932,7 @@ int iwl_init_drv(struct iwl_priv *priv)
 	priv->ieee_rates = NULL;
 	priv->band = IEEE80211_BAND_2GHZ;
 
-	priv->iw_mode = IEEE80211_IF_TYPE_STA;
+	priv->iw_mode = NL80211_IFTYPE_STATION;
 
 	priv->use_ant_b_for_management_frame = 1; /* start with ant B */
 	priv->current_ht_config.sm_ps = WLAN_HT_CAP_SM_PS_DISABLED;
@@ -1396,7 +1396,7 @@ void iwl_radio_kill_sw_disable_radio(struct iwl_priv *priv)
 
 	iwl_scan_cancel(priv);
 	/* FIXME: This is a workaround for AP */
-	if (priv->iw_mode != IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode != NL80211_IFTYPE_AP) {
 		spin_lock_irqsave(&priv->lock, flags);
 		iwl_write32(priv, CSR_UCODE_DRV_GP1_SET,
 			    CSR_UCODE_SW_BIT_RFKILL);
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 1823687e582..c018121085e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -954,7 +954,7 @@ struct iwl_priv {
 	u8 *eeprom;
 	struct iwl_eeprom_calib_info *calib_info;
 
-	enum ieee80211_if_types iw_mode;
+	enum nl80211_iftype iw_mode;
 
 	struct sk_buff *ibss_beacon;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c
index 55ec31ec9e1..60a03d2d2d0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-power.c
+++ b/drivers/net/wireless/iwlwifi/iwl-power.c
@@ -290,7 +290,7 @@ int iwl_power_update_mode(struct iwl_priv *priv, bool force)
 		final_mode = setting->critical_power_setting;
 
 	/* driver only support CAM for non STA network */
-	if (priv->iw_mode != IEEE80211_IF_TYPE_STA)
+	if (priv->iw_mode != NL80211_IFTYPE_STATION)
 		final_mode = IWL_POWER_MODE_CAM;
 
 	if (!iwl_is_rfkill(priv) && !setting->power_disabled &&
diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c
index faad4d3f56e..38b2946b1d8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -1026,10 +1026,10 @@ static int iwl_is_network_packet(struct iwl_priv *priv,
 	/* Filter incoming packets to determine if they are targeted toward
 	 * this network, discarding packets coming from ourselves */
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_IBSS: /* Header: Dest. | Source    | BSSID */
+	case NL80211_IFTYPE_ADHOC: /* Header: Dest. | Source    | BSSID */
 		/* packets to our IBSS update information */
 		return !compare_ether_addr(header->addr3, priv->bssid);
-	case IEEE80211_IF_TYPE_STA: /* Header: Dest. | AP{BSSID} | Source */
+	case NL80211_IFTYPE_STATION: /* Header: Dest. | AP{BSSID} | Source */
 		/* packets to our IBSS update information */
 		return !compare_ether_addr(header->addr2, priv->bssid);
 	default:
@@ -1169,7 +1169,7 @@ void iwl_rx_reply_rx(struct iwl_priv *priv,
 		rx_status.flag |= RX_FLAG_SHORTPRE;
 
 	/* Take shortcut when only in monitor mode */
-	if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) {
+	if (priv->iw_mode == NL80211_IFTYPE_MONITOR) {
 		iwl_pass_packet_to_mac80211(priv, include_phy,
 						 rxb, &rx_status);
 		return;
@@ -1186,7 +1186,7 @@ void iwl_rx_reply_rx(struct iwl_priv *priv,
 	switch (fc & IEEE80211_FCTL_FTYPE) {
 	case IEEE80211_FTYPE_MGMT:
 	case IEEE80211_FTYPE_DATA:
-		if (priv->iw_mode == IEEE80211_IF_TYPE_AP)
+		if (priv->iw_mode == NL80211_IFTYPE_AP)
 			iwl_update_ps_mode(priv, fc  & IEEE80211_FCTL_PM,
 						header->addr2);
 		/* fall through */
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index d026aaf6233..09c264be049 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -463,7 +463,7 @@ void iwl_init_scan_params(struct iwl_priv *priv)
 
 int iwl_scan_initiate(struct iwl_priv *priv)
 {
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		IWL_ERROR("APs don't scan.\n");
 		return 0;
 	}
@@ -868,7 +868,7 @@ static void iwl_bg_request_scan(struct work_struct *data)
 
 	scan->tx_cmd.len = cpu_to_le16(cmd_len);
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR)
+	if (priv->iw_mode == NL80211_IFTYPE_MONITOR)
 		scan->filter_flags = RXON_FILTER_PROMISC_MSK;
 
 	scan->filter_flags |= (RXON_FILTER_ACCEPT_GRP_MSK |
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index a72569f1acb..61797f3f8d5 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -47,8 +47,8 @@ u8 iwl_find_station(struct iwl_priv *priv, const u8 *addr)
 	unsigned long flags;
 	DECLARE_MAC_BUF(mac);
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) ||
-	    (priv->iw_mode == IEEE80211_IF_TYPE_AP))
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) ||
+	    (priv->iw_mode == NL80211_IFTYPE_AP))
 		start = IWL_STA_ID;
 
 	if (is_broadcast_ether_addr(addr))
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(iwl_find_station);
 
 int iwl_get_ra_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr)
 {
-	if (priv->iw_mode == IEEE80211_IF_TYPE_STA) {
+	if (priv->iw_mode == NL80211_IFTYPE_STATION) {
 		return IWL_AP_ID;
 	} else {
 		u8 *da = ieee80211_get_DA(hdr);
@@ -286,7 +286,7 @@ u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr, int is_ap,
 
 	/* BCAST station and IBSS stations do not work in HT mode */
 	if (sta_id != priv->hw_params.bcast_sta_id &&
-	    priv->iw_mode != IEEE80211_IF_TYPE_IBSS)
+	    priv->iw_mode != NL80211_IFTYPE_ADHOC)
 		iwl_set_ht_add_station(priv, sta_id, ht_info);
 
 	spin_unlock_irqrestore(&priv->sta_lock, flags_spin);
@@ -817,7 +817,7 @@ int iwl_send_lq_cmd(struct iwl_priv *priv,
 	};
 
 	if ((lq->sta_id == 0xFF) &&
-	    (priv->iw_mode == IEEE80211_IF_TYPE_IBSS))
+	    (priv->iw_mode == NL80211_IFTYPE_ADHOC))
 		return -EINVAL;
 
 	if (lq->sta_id == 0xFF)
@@ -904,7 +904,7 @@ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap)
 
 	if ((is_ap) &&
 	    (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) &&
-	    (priv->iw_mode == IEEE80211_IF_TYPE_STA))
+	    (priv->iw_mode == NL80211_IFTYPE_STATION))
 		sta_id = iwl_add_station_flags(priv, addr, is_ap,
 						   0, cur_ht_config);
 	else
@@ -938,11 +938,11 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr)
 
 	/* If we are a client station in a BSS network, use the special
 	 * AP station entry (that's the only station we communicate with) */
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		return IWL_AP_ID;
 
 	/* If we are an AP, then find the station, or use BCAST */
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		sta_id = iwl_find_station(priv, hdr->addr1);
 		if (sta_id != IWL_INVALID_STATION)
 			return sta_id;
@@ -950,7 +950,7 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr)
 
 	/* If this frame is going out to an IBSS network, find the station,
 	 * or create a new station table entry */
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		sta_id = iwl_find_station(priv, hdr->addr1);
 		if (sta_id != IWL_INVALID_STATION)
 			return sta_id;
@@ -970,7 +970,7 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr)
 
 	/* If we are in monitor mode, use BCAST. This is required for
 	 * packet injection. */
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		return priv->hw_params.bcast_sta_id;
 
 	default:
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9d5bcf46cbe..e9feca4033f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -814,10 +814,10 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 
 	/* drop all data frame if we are not associated */
 	if (ieee80211_is_data(fc) &&
-	    (priv->iw_mode != IEEE80211_IF_TYPE_MNTR ||
+	    (priv->iw_mode != NL80211_IFTYPE_MONITOR ||
 	    !(info->flags & IEEE80211_TX_CTL_INJECTED)) && /* packet injection */
 	    (!iwl_is_associated(priv) ||
-	     ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && !priv->assoc_id) ||
+	     ((priv->iw_mode == NL80211_IFTYPE_STATION) && !priv->assoc_id) ||
 	     !priv->assoc_station_added)) {
 		IWL_DEBUG_DROP("Dropping - !iwl_is_associated\n");
 		goto drop_unlock;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index cbbe73a1288..e6c6ef6ad62 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -1160,7 +1160,7 @@ static int iwl3945_commit_rxon(struct iwl3945_priv *priv)
 	/* If we have set the ASSOC_MSK and we are in BSS mode then
 	 * add the IWL_AP_ID to the station rate table */
 	if (iwl3945_is_associated(priv) &&
-	    (priv->iw_mode == IEEE80211_IF_TYPE_STA))
+	    (priv->iw_mode == NL80211_IFTYPE_STATION))
 		if (iwl3945_add_station(priv, priv->active_rxon.bssid_addr, 1, 0)
 		    == IWL_INVALID_STATION) {
 			IWL_ERROR("Error adding AP address for transmit.\n");
@@ -1447,8 +1447,8 @@ unsigned int iwl3945_fill_beacon_frame(struct iwl3945_priv *priv,
 {
 
 	if (!iwl3945_is_associated(priv) || !priv->ibss_beacon ||
-	    ((priv->iw_mode != IEEE80211_IF_TYPE_IBSS) &&
-	     (priv->iw_mode != IEEE80211_IF_TYPE_AP)))
+	    ((priv->iw_mode != NL80211_IFTYPE_ADHOC) &&
+	     (priv->iw_mode != NL80211_IFTYPE_AP)))
 		return 0;
 
 	if (priv->ibss_beacon->len > left)
@@ -1746,14 +1746,14 @@ static void iwl3945_reset_qos(struct iwl3945_priv *priv)
 	spin_lock_irqsave(&priv->lock, flags);
 	priv->qos_data.qos_active = 0;
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC) {
 		if (priv->qos_data.qos_enable)
 			priv->qos_data.qos_active = 1;
 		if (!(priv->active_rate & 0xfff0)) {
 			cw_min = 31;
 			is_legacy = 1;
 		}
-	} else if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	} else if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		if (priv->qos_data.qos_enable)
 			priv->qos_data.qos_active = 1;
 	} else if (!(priv->staging_rxon.flags & RXON_FLG_SHORT_SLOT_MSK)) {
@@ -2120,7 +2120,7 @@ static void iwl3945_setup_rxon_timing(struct iwl3945_priv *priv)
 	beacon_int = priv->beacon_int;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_STA) {
+	if (priv->iw_mode == NL80211_IFTYPE_STATION) {
 		if (beacon_int == 0) {
 			priv->rxon_timing.beacon_interval = cpu_to_le16(100);
 			priv->rxon_timing.beacon_init_val = cpu_to_le32(102400);
@@ -2156,7 +2156,7 @@ static void iwl3945_setup_rxon_timing(struct iwl3945_priv *priv)
 
 static int iwl3945_scan_initiate(struct iwl3945_priv *priv)
 {
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		IWL_ERROR("APs don't scan.\n");
 		return 0;
 	}
@@ -2218,7 +2218,7 @@ static void iwl3945_set_flags_for_phymode(struct iwl3945_priv *priv,
 		else
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+		if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
 		priv->staging_rxon.flags |= RXON_FLG_BAND_24G_MSK;
@@ -2237,23 +2237,23 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv)
 	memset(&priv->staging_rxon, 0, sizeof(priv->staging_rxon));
 
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_AP;
 		break;
 
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_ESS;
 		priv->staging_rxon.filter_flags = RXON_FILTER_ACCEPT_GRP_MSK;
 		break;
 
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_IBSS;
 		priv->staging_rxon.flags = RXON_FLG_SHORT_PREAMBLE_MSK;
 		priv->staging_rxon.filter_flags = RXON_FILTER_BCON_AWARE_MSK |
 						  RXON_FILTER_ACCEPT_GRP_MSK;
 		break;
 
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		priv->staging_rxon.dev_type = RXON_DEV_TYPE_SNIFFER;
 		priv->staging_rxon.filter_flags = RXON_FILTER_PROMISC_MSK |
 		    RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_ACCEPT_GRP_MSK;
@@ -2282,7 +2282,7 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv)
 	 * in some case A channels are all non IBSS
 	 * in this case force B/G channel
 	 */
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !(is_channel_ibss(ch_info)))
 		ch_info = &priv->channel_info[0];
 
@@ -2302,7 +2302,7 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv)
 
 static int iwl3945_set_mode(struct iwl3945_priv *priv, int mode)
 {
-	if (mode == IEEE80211_IF_TYPE_IBSS) {
+	if (mode == NL80211_IFTYPE_ADHOC) {
 		const struct iwl3945_channel_info *ch_info;
 
 		ch_info = iwl3945_get_channel_info(priv,
@@ -2469,11 +2469,11 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h
 
 	/* If we are a client station in a BSS network, use the special
 	 * AP station entry (that's the only station we communicate with) */
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		return IWL_AP_ID;
 
 	/* If we are an AP, then find the station, or use BCAST */
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		sta_id = iwl3945_hw_find_station(priv, hdr->addr1);
 		if (sta_id != IWL_INVALID_STATION)
 			return sta_id;
@@ -2481,7 +2481,7 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h
 
 	/* If this frame is going out to an IBSS network, find the station,
 	 * or create a new station table entry */
-	case IEEE80211_IF_TYPE_IBSS: {
+	case NL80211_IFTYPE_ADHOC: {
 		DECLARE_MAC_BUF(mac);
 
 		/* Create new station table entry */
@@ -2502,7 +2502,7 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h
 	}
 	/* If we are in monitor mode, use BCAST. This is required for
 	 * packet injection. */
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		return priv->hw_setting.bcast_sta_id;
 
 	default:
@@ -2565,9 +2565,9 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 
 	/* drop all data frame if we are not associated */
 	if (ieee80211_is_data(fc) &&
-	    (priv->iw_mode != IEEE80211_IF_TYPE_MNTR) && /* packet injection */
+	    (priv->iw_mode != NL80211_IFTYPE_MONITOR) && /* packet injection */
 	    (!iwl3945_is_associated(priv) ||
-	     ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && !priv->assoc_id))) {
+	     ((priv->iw_mode == NL80211_IFTYPE_STATION) && !priv->assoc_id))) {
 		IWL_DEBUG_DROP("Dropping - !iwl3945_is_associated\n");
 		goto drop_unlock;
 	}
@@ -2806,7 +2806,7 @@ static void iwl3945_radio_kill_sw(struct iwl3945_priv *priv, int disable_radio)
 	if (disable_radio) {
 		iwl3945_scan_cancel(priv);
 		/* FIXME: This is a workaround for AP */
-		if (priv->iw_mode != IEEE80211_IF_TYPE_AP) {
+		if (priv->iw_mode != NL80211_IFTYPE_AP) {
 			spin_lock_irqsave(&priv->lock, flags);
 			iwl3945_write32(priv, CSR_UCODE_DRV_GP1_SET,
 				    CSR_UCODE_SW_BIT_RFKILL);
@@ -3161,7 +3161,7 @@ static void iwl3945_rx_beacon_notif(struct iwl3945_priv *priv,
 		le32_to_cpu(beacon->low_tsf), rate);
 #endif
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
 	    (!test_bit(STATUS_EXIT_PENDING, &priv->status)))
 		queue_work(priv->workqueue, &priv->beacon_update);
 }
@@ -6059,7 +6059,7 @@ static void iwl3945_bg_set_monitor(struct work_struct *work)
 	if (!iwl3945_is_ready(priv))
 		IWL_DEBUG(IWL_DL_STATE, "leave - not ready\n");
 	else
-		if (iwl3945_set_mode(priv, IEEE80211_IF_TYPE_MNTR) != 0)
+		if (iwl3945_set_mode(priv, NL80211_IFTYPE_MONITOR) != 0)
 			IWL_ERROR("iwl3945_set_mode() failed\n");
 
 	mutex_unlock(&priv->mutex);
@@ -6248,7 +6248,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	/* select Rx antennas */
 	scan->flags |= iwl3945_get_antenna_flags(priv);
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR)
+	if (priv->iw_mode == NL80211_IFTYPE_MONITOR)
 		scan->filter_flags = RXON_FILTER_PROMISC_MSK;
 
 	scan->channel_count =
@@ -6323,7 +6323,7 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv)
 	struct ieee80211_conf *conf = NULL;
 	DECLARE_MAC_BUF(mac);
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		IWL_ERROR("%s Should not be called in AP mode\n", __func__);
 		return;
 	}
@@ -6372,7 +6372,7 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv)
 		else
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+		if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 			priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK;
 
 	}
@@ -6380,11 +6380,11 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv)
 	iwl3945_commit_rxon(priv);
 
 	switch (priv->iw_mode) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		iwl3945_rate_scale_init(priv->hw, IWL_AP_ID);
 		break;
 
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 
 		/* clear out the station table */
 		iwl3945_clear_stations_table(priv);
@@ -6754,7 +6754,7 @@ static void iwl3945_config_ap(struct iwl3945_priv *priv)
 				priv->staging_rxon.flags &=
 					~RXON_FLG_SHORT_SLOT_MSK;
 
-			if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)
+			if (priv->iw_mode == NL80211_IFTYPE_ADHOC)
 				priv->staging_rxon.flags &=
 					~RXON_FLG_SHORT_SLOT_MSK;
 		}
@@ -6791,7 +6791,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 	}
 
 	/* handle this temporarily here */
-	if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS &&
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC &&
 	    conf->changed & IEEE80211_IFCC_BEACON) {
 		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
 		if (!beacon)
@@ -6803,7 +6803,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 
 	/* XXX: this MUST use conf->mac_addr */
 
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) &&
+	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
 	    (!conf->ssid_len)) {
 		IWL_DEBUG_MAC80211
 		    ("Leaving in AP mode because HostAPD is not ready.\n");
@@ -6826,7 +6826,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 	    !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) {
  */
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {
 		if (!conf->bssid) {
 			conf->bssid = priv->mac_addr;
 			memcpy(priv->bssid, priv->mac_addr, ETH_ALEN);
@@ -6861,11 +6861,11 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 		 * to verify) - jpk */
 		memcpy(priv->bssid, conf->bssid, ETH_ALEN);
 
-		if (priv->iw_mode == IEEE80211_IF_TYPE_AP)
+		if (priv->iw_mode == NL80211_IFTYPE_AP)
 			iwl3945_config_ap(priv);
 		else {
 			rc = iwl3945_commit_rxon(priv);
-			if ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && rc)
+			if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc)
 				iwl3945_add_station(priv,
 					priv->active_rxon.bssid_addr, 1, 0);
 		}
@@ -6901,7 +6901,7 @@ static void iwl3945_configure_filter(struct ieee80211_hw *hw,
 
 	if (changed_flags & (*total_flags) & FIF_OTHER_BSS) {
 		IWL_DEBUG_MAC80211("Enter: type %d (0x%x, 0x%x)\n",
-				   IEEE80211_IF_TYPE_MNTR,
+				   NL80211_IFTYPE_MONITOR,
 				   changed_flags, *total_flags);
 		/* queue work 'cuz mac80211 is holding a lock which
 		 * prevents us from issuing (synchronous) f/w cmds */
@@ -7010,7 +7010,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
 		goto out_unlock;
 	}
 
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP) {	/* APs don't scan */
+	if (priv->iw_mode == NL80211_IFTYPE_AP) {	/* APs don't scan */
 		rc = -EIO;
 		IWL_ERROR("ERROR: APs don't scan\n");
 		goto out_unlock;
@@ -7152,7 +7152,7 @@ static int iwl3945_mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	mutex_lock(&priv->mutex);
-	if (priv->iw_mode == IEEE80211_IF_TYPE_AP)
+	if (priv->iw_mode == NL80211_IFTYPE_AP)
 		iwl3945_activate_qos(priv, 1);
 	else if (priv->assoc_id && iwl3945_is_associated(priv))
 		iwl3945_activate_qos(priv, 0);
@@ -7239,7 +7239,7 @@ static void iwl3945_mac_reset_tsf(struct ieee80211_hw *hw)
 	priv->beacon_int = priv->hw->conf.beacon_int;
 	priv->timestamp1 = 0;
 	priv->timestamp0 = 0;
-	if ((priv->iw_mode == IEEE80211_IF_TYPE_STA))
+	if ((priv->iw_mode == NL80211_IFTYPE_STATION))
 		priv->beacon_int = 0;
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -7253,14 +7253,14 @@ static void iwl3945_mac_reset_tsf(struct ieee80211_hw *hw)
 	/* we are restarting association process
 	 * clear RXON_FILTER_ASSOC_MSK bit
 	*/
-	if (priv->iw_mode != IEEE80211_IF_TYPE_AP) {
+	if (priv->iw_mode != NL80211_IFTYPE_AP) {
 		iwl3945_scan_cancel_timeout(priv, 100);
 		priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK;
 		iwl3945_commit_rxon(priv);
 	}
 
 	/* Per mac80211.h: This is only used in IBSS mode... */
-	if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode != NL80211_IFTYPE_ADHOC) {
 
 		IWL_DEBUG_MAC80211("leave - not in IBSS\n");
 		mutex_unlock(&priv->mutex);
@@ -7289,7 +7289,7 @@ static int iwl3945_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *sk
 		return -EIO;
 	}
 
-	if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) {
+	if (priv->iw_mode != NL80211_IFTYPE_ADHOC) {
 		IWL_DEBUG_MAC80211("leave - not IBSS\n");
 		mutex_unlock(&priv->mutex);
 		return -EIO;
@@ -7996,7 +7996,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 		IWL_DEBUG_INFO("Radio disabled.\n");
 	}
 
-	priv->iw_mode = IEEE80211_IF_TYPE_STA;
+	priv->iw_mode = NL80211_IFTYPE_STATION;
 
 	printk(KERN_INFO DRV_NAME
 		": Detected Intel Wireless WiFi Link %s\n", priv->cfg->name);
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index c948021bff6..feff945ad85 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -219,7 +219,7 @@ static void lbtf_tx_work(struct work_struct *work)
 	struct sk_buff *skb = NULL;
 	int err;
 
-	if ((priv->vif->type == IEEE80211_IF_TYPE_AP) &&
+	if ((priv->vif->type == NL80211_IFTYPE_AP) &&
 	    (!skb_queue_empty(&priv->bc_ps_buf)))
 		skb = skb_dequeue(&priv->bc_ps_buf);
 	else if (priv->skb_to_tx) {
@@ -326,11 +326,11 @@ static int lbtf_op_add_interface(struct ieee80211_hw *hw,
 
 	priv->vif = conf->vif;
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_MESH_POINT:
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
 		lbtf_set_mode(priv, LBTF_AP_MODE);
 		break;
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		lbtf_set_mode(priv, LBTF_STA_MODE);
 		break;
 	default:
@@ -346,8 +346,8 @@ static void lbtf_op_remove_interface(struct ieee80211_hw *hw,
 {
 	struct lbtf_private *priv = hw->priv;
 
-	if (priv->vif->type == IEEE80211_IF_TYPE_AP ||
-	    priv->vif->type == IEEE80211_IF_TYPE_MESH_POINT)
+	if (priv->vif->type == NL80211_IFTYPE_AP ||
+	    priv->vif->type == NL80211_IFTYPE_MESH_POINT)
 		lbtf_beacon_ctrl(priv, 0, 0);
 	lbtf_set_mode(priv, LBTF_PASSIVE_MODE);
 	lbtf_set_bssid(priv, 0, NULL);
@@ -372,8 +372,8 @@ static int lbtf_op_config_interface(struct ieee80211_hw *hw,
 	struct sk_buff *beacon;
 
 	switch (priv->vif->type) {
-	case IEEE80211_IF_TYPE_AP:
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_MESH_POINT:
 		beacon = ieee80211_beacon_get(hw, vif);
 		if (beacon) {
 			lbtf_beacon_set(priv, beacon);
@@ -614,7 +614,7 @@ void lbtf_bcn_sent(struct lbtf_private *priv)
 {
 	struct sk_buff *skb = NULL;
 
-	if (priv->vif->type != IEEE80211_IF_TYPE_AP)
+	if (priv->vif->type != NL80211_IFTYPE_AP)
 		return;
 
 	if (skb_queue_empty(&priv->bc_ps_buf)) {
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6ba50f087f7..e855211a90f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -267,7 +267,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
 	struct sk_buff *skb;
 	struct ieee80211_tx_info *info;
 
-	if (vif->type != IEEE80211_IF_TYPE_AP)
+	if (vif->type != NL80211_IFTYPE_AP)
 		return;
 
 	skb = ieee80211_beacon_get(hw, vif);
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index da51786254d..bac58ed03e5 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -1139,7 +1139,7 @@ static int p54_start(struct ieee80211_hw *dev)
 
 	err = priv->open(dev);
 	if (!err)
-		priv->mode = IEEE80211_IF_TYPE_MNTR;
+		priv->mode = NL80211_IFTYPE_MONITOR;
 
 	p54_init_vdcf(dev);
 
@@ -1157,7 +1157,7 @@ static void p54_stop(struct ieee80211_hw *dev)
 		kfree_skb(skb);
 	priv->stop(dev);
 	priv->tsf_high32 = priv->tsf_low32 = 0;
-	priv->mode = IEEE80211_IF_TYPE_INVALID;
+	priv->mode = NL80211_IFTYPE_UNSPECIFIED;
 }
 
 static int p54_add_interface(struct ieee80211_hw *dev,
@@ -1165,11 +1165,11 @@ static int p54_add_interface(struct ieee80211_hw *dev,
 {
 	struct p54_common *priv = dev->priv;
 
-	if (priv->mode != IEEE80211_IF_TYPE_MNTR)
+	if (priv->mode != NL80211_IFTYPE_MONITOR)
 		return -EOPNOTSUPP;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->mode = conf->type;
 		break;
 	default:
@@ -1181,7 +1181,7 @@ static int p54_add_interface(struct ieee80211_hw *dev,
 	p54_set_filter(dev, 0, NULL);
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		p54_set_filter(dev, 1, NULL);
 		break;
 	default:
@@ -1198,7 +1198,7 @@ static void p54_remove_interface(struct ieee80211_hw *dev,
 				 struct ieee80211_if_init_conf *conf)
 {
 	struct p54_common *priv = dev->priv;
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	memset(priv->mac_addr, 0, ETH_ALEN);
 	p54_set_filter(dev, 0, NULL);
 }
@@ -1380,7 +1380,7 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len)
 		return NULL;
 
 	priv = dev->priv;
-	priv->mode = IEEE80211_IF_TYPE_INVALID;
+	priv->mode = NL80211_IFTYPE_UNSPECIFIED;
 	skb_queue_head_init(&priv->tx_queue);
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | /* not sure */
 		     IEEE80211_HW_RX_INCLUDES_FCS |
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index 1594786205f..1c2a02a741a 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -616,7 +616,7 @@ static int p54p_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct ieee80211_hw *dev = pci_get_drvdata(pdev);
 	struct p54p_priv *priv = dev->priv;
 
-	if (priv->common.mode != IEEE80211_IF_TYPE_INVALID) {
+	if (priv->common.mode != NL80211_IFTYPE_UNSPECIFIED) {
 		ieee80211_stop_queues(dev);
 		p54p_stop(dev);
 	}
@@ -634,7 +634,7 @@ static int p54p_resume(struct pci_dev *pdev)
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 
-	if (priv->common.mode != IEEE80211_IF_TYPE_INVALID) {
+	if (priv->common.mode != NL80211_IFTYPE_UNSPECIFIED) {
 		p54p_open(dev);
 		ieee80211_wake_queues(dev);
 	}
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index cb5f2d01a9c..d3bf7bba611 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -384,7 +384,7 @@ static void rt2500usb_config_intf(struct rt2x00_dev *rt2x00dev,
 		rt2500usb_register_read(rt2x00dev, TXRX_CSR20, &reg);
 		rt2x00_set_field16(&reg, TXRX_CSR20_OFFSET, bcn_preload >> 6);
 		rt2x00_set_field16(&reg, TXRX_CSR20_BCN_EXPECT_WINDOW,
-				   2 * (conf->type != IEEE80211_IF_TYPE_STA));
+				   2 * (conf->type != NL80211_IFTYPE_STATION));
 		rt2500usb_register_write(rt2x00dev, TXRX_CSR20, reg);
 
 		/*
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 6f296cef76a..1359a376840 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -483,7 +483,7 @@ struct rt2x00intf_conf {
 	/*
 	 * Interface type
 	 */
-	enum ieee80211_if_types type;
+	enum nl80211_iftype type;
 
 	/*
 	 * TSF sync value, this is dependant on the operation type.
diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c
index ca051f50ef1..4d5e87b015a 100644
--- a/drivers/net/wireless/rt2x00/rt2x00config.c
+++ b/drivers/net/wireless/rt2x00/rt2x00config.c
@@ -31,7 +31,7 @@
 
 void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev,
 			   struct rt2x00_intf *intf,
-			   enum ieee80211_if_types type,
+			   enum nl80211_iftype type,
 			   u8 *mac, u8 *bssid)
 {
 	struct rt2x00intf_conf conf;
@@ -40,11 +40,11 @@ void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev,
 	conf.type = type;
 
 	switch (type) {
-	case IEEE80211_IF_TYPE_IBSS:
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_AP:
 		conf.sync = TSF_SYNC_BEACON;
 		break;
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		conf.sync = TSF_SYNC_INFRA;
 		break;
 	default:
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 2f3bfc60688..86840e3585e 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -467,8 +467,8 @@ static void rt2x00lib_beacondone_iter(void *data, u8 *mac,
 	struct rt2x00_dev *rt2x00dev = data;
 	struct rt2x00_intf *intf = vif_to_intf(vif);
 
-	if (vif->type != IEEE80211_IF_TYPE_AP &&
-	    vif->type != IEEE80211_IF_TYPE_IBSS)
+	if (vif->type != NL80211_IFTYPE_AP &&
+	    vif->type != NL80211_IFTYPE_ADHOC)
 		return;
 
 	/*
@@ -1212,8 +1212,8 @@ static void rt2x00lib_resume_intf(void *data, u8 *mac,
 	/*
 	 * Master or Ad-hoc mode require a new beacon update.
 	 */
-	if (vif->type == IEEE80211_IF_TYPE_AP ||
-	    vif->type == IEEE80211_IF_TYPE_IBSS)
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC)
 		intf->delayed_flags |= DELAYED_UPDATE_BEACON;
 
 	spin_unlock(&intf->lock);
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index 7bbc16b1b6c..797eb619aa0 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -88,7 +88,7 @@ void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev);
  */
 void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev,
 			   struct rt2x00_intf *intf,
-			   enum ieee80211_if_types type,
+			   enum nl80211_iftype type,
 			   u8 *mac, u8 *bssid);
 void rt2x00lib_config_erp(struct rt2x00_dev *rt2x00dev,
 			  struct rt2x00_intf *intf,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 56829fad347..485c40de5cc 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -211,7 +211,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 		return -ENODEV;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		/*
 		 * We don't support mixed combinations of
 		 * sta and ap interfaces.
@@ -227,8 +227,8 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 			return -ENOBUFS;
 
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		/*
 		 * We don't support mixed combinations of
 		 * sta and ap interfaces.
@@ -268,7 +268,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 	 * increase interface count and start initialization.
 	 */
 
-	if (conf->type == IEEE80211_IF_TYPE_AP)
+	if (conf->type == NL80211_IFTYPE_AP)
 		rt2x00dev->intf_ap_count++;
 	else
 		rt2x00dev->intf_sta_count++;
@@ -277,7 +277,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 	spin_lock_init(&intf->seqlock);
 	intf->beacon = entry;
 
-	if (conf->type == IEEE80211_IF_TYPE_AP)
+	if (conf->type == NL80211_IFTYPE_AP)
 		memcpy(&intf->bssid, conf->mac_addr, ETH_ALEN);
 	memcpy(&intf->mac, conf->mac_addr, ETH_ALEN);
 
@@ -311,11 +311,11 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw,
 	 * no interface is present.
 	 */
 	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags) ||
-	    (conf->type == IEEE80211_IF_TYPE_AP && !rt2x00dev->intf_ap_count) ||
-	    (conf->type != IEEE80211_IF_TYPE_AP && !rt2x00dev->intf_sta_count))
+	    (conf->type == NL80211_IFTYPE_AP && !rt2x00dev->intf_ap_count) ||
+	    (conf->type != NL80211_IFTYPE_AP && !rt2x00dev->intf_sta_count))
 		return;
 
-	if (conf->type == IEEE80211_IF_TYPE_AP)
+	if (conf->type == NL80211_IFTYPE_AP)
 		rt2x00dev->intf_ap_count--;
 	else
 		rt2x00dev->intf_sta_count--;
@@ -331,7 +331,7 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw,
 	 * are cleared to prevent false ACKing of frames.
 	 */
 	rt2x00lib_config_intf(rt2x00dev, intf,
-			      IEEE80211_IF_TYPE_INVALID, NULL, NULL);
+			      NL80211_IFTYPE_UNSPECIFIED, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface);
 
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index 861c76a65d6..abcd641c54b 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -615,7 +615,7 @@ static int rtl8180_start(struct ieee80211_hw *dev)
 	reg |= RTL818X_CMD_TX_ENABLE;
 	rtl818x_iowrite8(priv, &priv->map->CMD, reg);
 
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	return 0;
 
  err_free_rings:
@@ -633,7 +633,7 @@ static void rtl8180_stop(struct ieee80211_hw *dev)
 	u8 reg;
 	int i;
 
-	priv->mode = IEEE80211_IF_TYPE_INVALID;
+	priv->mode = NL80211_IFTYPE_UNSPECIFIED;
 
 	rtl818x_iowrite16(priv, &priv->map->INT_MASK, 0);
 
@@ -661,11 +661,11 @@ static int rtl8180_add_interface(struct ieee80211_hw *dev,
 {
 	struct rtl8180_priv *priv = dev->priv;
 
-	if (priv->mode != IEEE80211_IF_TYPE_MNTR)
+	if (priv->mode != NL80211_IFTYPE_MONITOR)
 		return -EOPNOTSUPP;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->mode = conf->type;
 		break;
 	default:
@@ -688,7 +688,7 @@ static void rtl8180_remove_interface(struct ieee80211_hw *dev,
 				     struct ieee80211_if_init_conf *conf)
 {
 	struct rtl8180_priv *priv = dev->priv;
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	priv->vif = NULL;
 }
 
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index 8a42bfa6d4f..e9902613e2e 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -836,11 +836,11 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev,
 	struct rtl8187_priv *priv = dev->priv;
 	int i;
 
-	if (priv->mode != IEEE80211_IF_TYPE_MNTR)
+	if (priv->mode != NL80211_IFTYPE_MONITOR)
 		return -EOPNOTSUPP;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		priv->mode = conf->type;
 		break;
 	default:
@@ -865,7 +865,7 @@ static void rtl8187_remove_interface(struct ieee80211_hw *dev,
 {
 	struct rtl8187_priv *priv = dev->priv;
 	mutex_lock(&priv->conf_mutex);
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	priv->vif = NULL;
 	mutex_unlock(&priv->conf_mutex);
 }
@@ -1057,7 +1057,7 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
 	dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band;
 
 
-	priv->mode = IEEE80211_IF_TYPE_MNTR;
+	priv->mode = NL80211_IFTYPE_MONITOR;
 	dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
 		     IEEE80211_HW_RX_INCLUDES_FCS;
 
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 3005dd1fde4..a3da014f928 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -716,15 +716,15 @@ static int zd_op_add_interface(struct ieee80211_hw *hw,
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 
-	/* using IEEE80211_IF_TYPE_INVALID to indicate no mode selected */
-	if (mac->type != IEEE80211_IF_TYPE_INVALID)
+	/* using NL80211_IFTYPE_UNSPECIFIED to indicate no mode selected */
+	if (mac->type != NL80211_IFTYPE_UNSPECIFIED)
 		return -EOPNOTSUPP;
 
 	switch (conf->type) {
-	case IEEE80211_IF_TYPE_MNTR:
-	case IEEE80211_IF_TYPE_MESH_POINT:
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		mac->type = conf->type;
 		break;
 	default:
@@ -738,7 +738,7 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw,
 				    struct ieee80211_if_init_conf *conf)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
-	mac->type = IEEE80211_IF_TYPE_INVALID;
+	mac->type = NL80211_IFTYPE_UNSPECIFIED;
 	zd_set_beacon_interval(&mac->chip, 0);
 	zd_write_mac_addr(&mac->chip, NULL);
 }
@@ -757,8 +757,8 @@ static int zd_op_config_interface(struct ieee80211_hw *hw,
 	int associated;
 	int r;
 
-	if (mac->type == IEEE80211_IF_TYPE_MESH_POINT ||
-	    mac->type == IEEE80211_IF_TYPE_IBSS) {
+	if (mac->type == NL80211_IFTYPE_MESH_POINT ||
+	    mac->type == NL80211_IFTYPE_ADHOC) {
 		associated = true;
 		if (conf->changed & IEEE80211_IFCC_BEACON) {
 			struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
@@ -955,7 +955,7 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
 	spin_lock_init(&mac->lock);
 	mac->hw = hw;
 
-	mac->type = IEEE80211_IF_TYPE_INVALID;
+	mac->type = NL80211_IFTYPE_UNSPECIFIED;
 
 	memcpy(mac->channels, zd_channels, sizeof(zd_channels));
 	memcpy(mac->rates, zd_rates, sizeof(zd_rates));
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c81e579c17f..7f5ea55e00f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -477,33 +477,6 @@ struct ieee80211_conf {
 	struct ieee80211_ht_bss_info ht_bss_conf;
 };
 
-/**
- * enum ieee80211_if_types - types of 802.11 network interfaces
- *
- * @IEEE80211_IF_TYPE_INVALID: invalid interface type, not used
- *	by mac80211 itself
- * @IEEE80211_IF_TYPE_AP: interface in AP mode.
- * @IEEE80211_IF_TYPE_MGMT: special interface for communication with hostap
- *	daemon. Drivers should never see this type.
- * @IEEE80211_IF_TYPE_STA: interface in STA (client) mode.
- * @IEEE80211_IF_TYPE_IBSS: interface in IBSS (ad-hoc) mode.
- * @IEEE80211_IF_TYPE_MNTR: interface in monitor (rfmon) mode.
- * @IEEE80211_IF_TYPE_WDS: interface in WDS mode.
- * @IEEE80211_IF_TYPE_VLAN: VLAN interface bound to an AP, drivers
- *	will never see this type.
- * @IEEE80211_IF_TYPE_MESH_POINT: 802.11s mesh point
- */
-enum ieee80211_if_types {
-	IEEE80211_IF_TYPE_INVALID,
-	IEEE80211_IF_TYPE_AP,
-	IEEE80211_IF_TYPE_STA,
-	IEEE80211_IF_TYPE_IBSS,
-	IEEE80211_IF_TYPE_MESH_POINT,
-	IEEE80211_IF_TYPE_MNTR,
-	IEEE80211_IF_TYPE_WDS,
-	IEEE80211_IF_TYPE_VLAN,
-};
-
 /**
  * struct ieee80211_vif - per-interface data
  *
@@ -515,7 +488,7 @@ enum ieee80211_if_types {
  *	sizeof(void *).
  */
 struct ieee80211_vif {
-	enum ieee80211_if_types type;
+	enum nl80211_iftype type;
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
@@ -523,7 +496,7 @@ struct ieee80211_vif {
 static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
 {
 #ifdef CONFIG_MAC80211_MESH
-	return vif->type == IEEE80211_IF_TYPE_MESH_POINT;
+	return vif->type == NL80211_IFTYPE_MESH_POINT;
 #endif
 	return false;
 }
@@ -534,7 +507,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
  * @vif: pointer to a driver-use per-interface structure. The pointer
  *	itself is also used for various functions including
  *	ieee80211_beacon_get() and ieee80211_get_buffered_bc().
- * @type: one of &enum ieee80211_if_types constants. Determines the type of
+ * @type: one of &enum nl80211_iftype constants. Determines the type of
  *	added/removed interface.
  * @mac_addr: pointer to MAC address of the interface. This pointer is valid
  *	until the interface is removed (i.e. it cannot be used after
@@ -550,7 +523,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
  * in pure monitor mode.
  */
 struct ieee80211_if_init_conf {
-	enum ieee80211_if_types type;
+	enum nl80211_iftype type;
 	struct ieee80211_vif *vif;
 	void *mac_addr;
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6ec2127f9a6..d004351050c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -24,26 +24,19 @@ struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(wiphy_to_hw);
 
-static enum ieee80211_if_types
-nl80211_type_to_mac80211_type(enum nl80211_iftype type)
+static bool nl80211_type_check(enum nl80211_iftype type)
 {
 	switch (type) {
-	case NL80211_IFTYPE_UNSPECIFIED:
-		return IEEE80211_IF_TYPE_STA;
 	case NL80211_IFTYPE_ADHOC:
-		return IEEE80211_IF_TYPE_IBSS;
 	case NL80211_IFTYPE_STATION:
-		return IEEE80211_IF_TYPE_STA;
 	case NL80211_IFTYPE_MONITOR:
-		return IEEE80211_IF_TYPE_MNTR;
 #ifdef CONFIG_MAC80211_MESH
 	case NL80211_IFTYPE_MESH_POINT:
-		return IEEE80211_IF_TYPE_MESH_POINT;
 #endif
 	case NL80211_IFTYPE_WDS:
-		return IEEE80211_IF_TYPE_WDS;
+		return true;
 	default:
-		return IEEE80211_IF_TYPE_INVALID;
+		return false;
 	}
 }
 
@@ -52,17 +45,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 			       struct vif_params *params)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
-	enum ieee80211_if_types itype;
 	struct net_device *dev;
 	struct ieee80211_sub_if_data *sdata;
 	int err;
 
-	itype = nl80211_type_to_mac80211_type(type);
-	if (itype == IEEE80211_IF_TYPE_INVALID)
+	if (!nl80211_type_check(type))
 		return -EINVAL;
 
-	err = ieee80211_if_add(local, name, &dev, itype, params);
-	if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags)
+	err = ieee80211_if_add(local, name, &dev, type, params);
+	if (err || type != NL80211_IFTYPE_MONITOR || !flags)
 		return err;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -93,7 +84,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
-	enum ieee80211_if_types itype;
 	struct ieee80211_sub_if_data *sdata;
 	int ret;
 
@@ -102,8 +92,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	if (!dev)
 		return -ENODEV;
 
-	itype = nl80211_type_to_mac80211_type(type);
-	if (itype == IEEE80211_IF_TYPE_INVALID)
+	if (!nl80211_type_check(type))
 		return -EINVAL;
 
 	if (dev == local->mdev)
@@ -111,7 +100,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	ret = ieee80211_if_change_type(sdata, itype);
+	ret = ieee80211_if_change_type(sdata, type);
 	if (ret)
 		return ret;
 
@@ -120,7 +109,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 					    params->mesh_id_len,
 					    params->mesh_id);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || !flags)
+	if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags)
 		return 0;
 
 	sdata->u.mntr_flags = *flags;
@@ -516,7 +505,7 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
 	old = sdata->u.ap.beacon;
@@ -539,7 +528,7 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
 	old = sdata->u.ap.beacon;
@@ -561,7 +550,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
 	old = sdata->u.ap.beacon;
@@ -716,8 +705,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	if (params->vlan) {
 		sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
-		if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    sdata->vif.type != NL80211_IFTYPE_AP)
 			return -EINVAL;
 	} else
 		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -747,8 +736,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 		return err;
 	}
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+	    sdata->vif.type == NL80211_IFTYPE_AP)
 		ieee80211_send_layer2_update(sta);
 
 	rcu_read_unlock();
@@ -812,8 +801,8 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 	if (params->vlan && params->vlan != sta->sdata->dev) {
 		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
-		if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
-		    vlansdata->vif.type != IEEE80211_IF_TYPE_AP) {
+		if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    vlansdata->vif.type != NL80211_IFTYPE_AP) {
 			rcu_read_unlock();
 			return -EINVAL;
 		}
@@ -847,7 +836,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 		return -ENOTSUPP;
 
 	rcu_read_lock();
@@ -903,7 +892,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 		return -ENOTSUPP;
 
 	rcu_read_lock();
@@ -978,7 +967,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 		return -ENOTSUPP;
 
 	rcu_read_lock();
@@ -1006,7 +995,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
 		return -ENOTSUPP;
 
 	rcu_read_lock();
@@ -1035,7 +1024,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EINVAL;
 
 	if (params->use_cts_prot >= 0) {
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 0fa7681a3d2..1b33cad24ab 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -345,26 +345,26 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 		return;
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 #ifdef CONFIG_MAC80211_MESH
 		add_mesh_stats(sdata);
 		add_mesh_config(sdata);
 #endif
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		add_sta_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		add_ap_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		add_wds_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		add_monitor_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		add_vlan_files(sdata);
 		break;
 	default:
@@ -482,26 +482,26 @@ static void del_files(struct ieee80211_sub_if_data *sdata)
 		return;
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 #ifdef CONFIG_MAC80211_MESH
 		del_mesh_stats(sdata);
 		del_mesh_config(sdata);
 #endif
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		del_sta_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		del_ap_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		del_wds_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		del_monitor_files(sdata);
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		del_vlan_files(sdata);
 		break;
 	default:
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 4dc35c9dabc..bc3c71ad7ae 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -89,7 +89,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
@@ -139,7 +139,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
@@ -185,7 +185,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 	memset(mgmt, 0, 24);
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 21cc6d07b02..80d88f5ff90 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -954,10 +954,10 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 /* interface handling */
 void ieee80211_if_setup(struct net_device *dev);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
-		     struct net_device **new_dev, enum ieee80211_if_types type,
+		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params);
 int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
-			     enum ieee80211_if_types type);
+			     enum nl80211_iftype type);
 void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
 void ieee80211_remove_interfaces(struct ieee80211_local *local);
 
@@ -1001,7 +1001,7 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
 extern const unsigned char rfc1042_header[6];
 extern const unsigned char bridge_tunnel_header[6];
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
-			enum ieee80211_if_types type);
+			enum nl80211_iftype type);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index dab8eba2602..004fb23241d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -41,7 +41,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 	sdata->fragment_next = 0;
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		beacon = sdata->u.ap.beacon;
 		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
 		synchronize_rcu();
@@ -53,22 +53,23 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 		}
 
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			mesh_rmc_free(sdata);
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		kfree(sdata->u.sta.extra_ie);
 		kfree(sdata->u.sta.assocreq_ies);
 		kfree(sdata->u.sta.assocresp_ies);
 		kfree_skb(sdata->u.sta.probe_resp);
 		break;
-	case IEEE80211_IF_TYPE_WDS:
-	case IEEE80211_IF_TYPE_VLAN:
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_MONITOR:
 		break;
-	case IEEE80211_IF_TYPE_INVALID:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case __NL80211_IFTYPE_AFTER_LAST:
 		BUG();
 		break;
 	}
@@ -81,7 +82,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
  * Helper function to initialise an interface to a specific type.
  */
 static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
-				  enum ieee80211_if_types type)
+				  enum nl80211_iftype type)
 {
 	/* clear type-dependent union */
 	memset(&sdata->u, 0, sizeof(sdata->u));
@@ -93,28 +94,29 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	sdata->dev->type = ARPHRD_ETHER;
 
 	switch (type) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
 		INIT_LIST_HEAD(&sdata->u.ap.vlans);
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		ieee80211_sta_setup_sdata(sdata);
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			ieee80211_mesh_init_sdata(sdata);
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
 		sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit;
 		sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
 				      MONITOR_FLAG_OTHER_BSS;
 		break;
-	case IEEE80211_IF_TYPE_WDS:
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_AP_VLAN:
 		break;
-	case IEEE80211_IF_TYPE_INVALID:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case __NL80211_IFTYPE_AFTER_LAST:
 		BUG();
 		break;
 	}
@@ -123,7 +125,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 }
 
 int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
-			     enum ieee80211_if_types type)
+			     enum nl80211_iftype type)
 {
 	ASSERT_RTNL();
 
@@ -153,7 +155,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 }
 
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
-		     struct net_device **new_dev, enum ieee80211_if_types type,
+		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params)
 {
 	struct net_device *ndev;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 6597c779e35..d5b95748db2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -118,8 +118,8 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key)
 	 * address to indicate a transmit-only key.
 	 */
 	if (key->conf.alg != ALG_WEP &&
-	    (key->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
-	     key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN))
+	    (key->sdata->vif.type == NL80211_IFTYPE_AP ||
+	     key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
 		addr = zero_addr;
 
 	if (key->sta)
@@ -331,7 +331,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
 		 */
 		key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
 	} else {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			struct sta_info *ap;
 
 			/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4c424acc01a..584a75bd6cf 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -146,7 +146,7 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
 	int meshhdrlen;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	meshhdrlen = (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) ? 5 : 0;
+	meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0;
 
 	/* FIX: what would be proper limits for MTU?
 	 * This interface uses 802.3 frames. */
@@ -164,18 +164,16 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
 
 static inline int identical_mac_addr_allowed(int type1, int type2)
 {
-	return (type1 == IEEE80211_IF_TYPE_MNTR ||
-		type2 == IEEE80211_IF_TYPE_MNTR ||
-		(type1 == IEEE80211_IF_TYPE_AP &&
-		 type2 == IEEE80211_IF_TYPE_WDS) ||
-		(type1 == IEEE80211_IF_TYPE_WDS &&
-		 (type2 == IEEE80211_IF_TYPE_WDS ||
-		  type2 == IEEE80211_IF_TYPE_AP)) ||
-		(type1 == IEEE80211_IF_TYPE_AP &&
-		 type2 == IEEE80211_IF_TYPE_VLAN) ||
-		(type1 == IEEE80211_IF_TYPE_VLAN &&
-		 (type2 == IEEE80211_IF_TYPE_AP ||
-		  type2 == IEEE80211_IF_TYPE_VLAN)));
+	return type1 == NL80211_IFTYPE_MONITOR ||
+		type2 == NL80211_IFTYPE_MONITOR ||
+		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) ||
+		(type1 == NL80211_IFTYPE_WDS &&
+			(type2 == NL80211_IFTYPE_WDS ||
+			 type2 == NL80211_IFTYPE_AP)) ||
+		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) ||
+		(type1 == NL80211_IFTYPE_AP_VLAN &&
+			(type2 == NL80211_IFTYPE_AP ||
+			 type2 == NL80211_IFTYPE_AP_VLAN));
 }
 
 static int ieee80211_open(struct net_device *dev)
@@ -211,8 +209,8 @@ static int ieee80211_open(struct net_device *dev)
 			 * belonging to the same hardware. Then, however, we're
 			 * faced with having to adopt two different TSF timers...
 			 */
-			if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
-			    nsdata->vif.type == IEEE80211_IF_TYPE_IBSS)
+			if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
+			    nsdata->vif.type == NL80211_IFTYPE_ADHOC)
 				return -EBUSY;
 
 			/*
@@ -232,37 +230,38 @@ static int ieee80211_open(struct net_device *dev)
 			/*
 			 * can only add VLANs to enabled APs
 			 */
-			if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN &&
-			    nsdata->vif.type == IEEE80211_IF_TYPE_AP)
+			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+			    nsdata->vif.type == NL80211_IFTYPE_AP)
 				sdata->bss = &nsdata->u.ap;
 		}
 	}
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
 			return -ENOLINK;
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		if (!sdata->bss)
 			return -ENOLINK;
 		list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
 		break;
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		sdata->bss = &sdata->u.ap;
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
 		/* mesh ifaces must set allmulti to forward mcast traffic */
 		atomic_inc(&local->iff_allmultis);
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_MNTR:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_ADHOC:
 		/* no special treatment */
 		break;
-	case IEEE80211_IF_TYPE_INVALID:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case __NL80211_IFTYPE_AFTER_LAST:
 		/* cannot happen */
 		WARN_ON(1);
 		break;
@@ -309,10 +308,10 @@ static int ieee80211_open(struct net_device *dev)
 	}
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		/* no need to tell driver */
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
 			local->cooked_mntrs++;
 			break;
@@ -336,8 +335,8 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_configure_filter(local);
 		netif_addr_unlock_bh(local->mdev);
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
 		/* fall through */
 	default:
@@ -354,14 +353,14 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_bss_info_change_notify(sdata, changed);
 		ieee80211_enable_keys(sdata);
 
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 		    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
 			netif_carrier_off(dev);
 		else
 			netif_carrier_on(dev);
 	}
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
+	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
 		/* Create STA entry for the WDS peer */
 		sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
 				     GFP_KERNEL);
@@ -417,8 +416,8 @@ static int ieee80211_open(struct net_device *dev)
 	 * yet be effective. Trigger execution of ieee80211_sta_work
 	 * to fix this.
 	 */
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		queue_work(local->hw.workqueue, &ifsta->work);
 	}
@@ -433,7 +432,7 @@ static int ieee80211_open(struct net_device *dev)
 		local->ops->stop(local_to_hw(local));
  err_del_bss:
 	sdata->bss = NULL;
-	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		list_del(&sdata->u.vlan.list);
 	return res;
 }
@@ -496,7 +495,7 @@ static int ieee80211_stop(struct net_device *dev)
 	dev_mc_unsync(local->mdev, dev);
 
 	/* APs need special treatment */
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		struct ieee80211_sub_if_data *vlan, *tmp;
 		struct beacon_data *old_beacon = sdata->u.ap.beacon;
 
@@ -515,11 +514,11 @@ static int ieee80211_stop(struct net_device *dev)
 	local->open_count--;
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		list_del(&sdata->u.vlan.list);
 		/* no need to tell driver */
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
 			local->cooked_mntrs--;
 			break;
@@ -542,8 +541,8 @@ static int ieee80211_stop(struct net_device *dev)
 		ieee80211_configure_filter(local);
 		netif_addr_unlock_bh(local->mdev);
 		break;
-	case IEEE80211_IF_TYPE_STA:
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
 		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
 		memset(sdata->u.sta.bssid, 0, ETH_ALEN);
 		del_timer_sync(&sdata->u.sta.timer);
@@ -569,7 +568,7 @@ static int ieee80211_stop(struct net_device *dev)
 		sdata->u.sta.extra_ie = NULL;
 		sdata->u.sta.extra_ie_len = 0;
 		/* fall through */
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
 			/* allmulti is always set on mesh ifaces */
 			atomic_dec(&local->iff_allmultis);
@@ -698,12 +697,12 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 	memset(&conf, 0, sizeof(conf));
 	conf.changed = changed;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		conf.bssid = sdata->u.sta.bssid;
 		conf.ssid = sdata->u.sta.ssid;
 		conf.ssid_len = sdata->u.sta.ssid_len;
-	} else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+	} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		conf.bssid = sdata->dev->dev_addr;
 		conf.ssid = sdata->u.ap.ssid;
 		conf.ssid_len = sdata->u.ap.ssid_len;
@@ -1204,7 +1203,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) {
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
 			if (!netif_running(sdata->dev))
 				continue;
 
@@ -1450,7 +1449,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	/* add one default STA interface */
 	result = ieee80211_if_add(local, "wlan%d", NULL,
-				  IEEE80211_IF_TYPE_STA, NULL);
+				  NL80211_IFTYPE_STATION, NULL);
 	if (result)
 		printk(KERN_WARNING "%s: Failed to add default virtual iface\n",
 		       wiphy_name(local->hw.wiphy));
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 55bc6071393..8a2cfd3609b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -678,7 +678,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return;
 
 	bss = ieee80211_rx_bss_get(local, ifsta->bssid,
@@ -1002,17 +1002,17 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	DECLARE_MAC_BUF(mac);
 
 	if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 		return;
 
 	if (len < 24 + 6)
 		return;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 	    memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0)
 		return;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 	    memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
@@ -1020,7 +1020,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 	status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		/*
 		 * IEEE 802.11 standard does not require authentication in IBSS
 		 * networks and most implementations do not seem to use it.
@@ -1487,7 +1487,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
 		return;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates &&
+	if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
 		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 
@@ -1532,14 +1532,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	 * In STA mode, the remaining parameters should not be overridden
 	 * by beacons because they're not necessarily accurate there.
 	 */
-	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 	    bss->last_probe_resp && beacon) {
 		ieee80211_rx_bss_put(local, bss);
 		return;
 	}
 
 	/* check if we need to merge IBSS */
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
+	if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon &&
 	    bss->capability & WLAN_CAPABILITY_IBSS &&
 	    bss->freq == local->oper_channel->center_freq &&
 	    elems->ssid_len == sdata->u.sta.ssid_len &&
@@ -1649,7 +1649,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return;
 	ifsta = &sdata->u.sta;
 
@@ -1700,7 +1700,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	DECLARE_MAC_BUF(mac3);
 #endif
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS ||
+	if (sdata->vif.type != NL80211_IFTYPE_ADHOC ||
 	    ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED ||
 	    len < 24 + 2 || !ifsta->probe_resp)
 		return;
@@ -2212,8 +2212,8 @@ static void ieee80211_sta_work(struct work_struct *work)
 	if (local->sw_scanning || local->hw_scanning)
 		return;
 
-	if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-		    sdata->vif.type != IEEE80211_IF_TYPE_IBSS))
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_ADHOC))
 		return;
 	ifsta = &sdata->u.sta;
 
@@ -2273,7 +2273,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 
 static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 {
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		queue_work(sdata->local->hw.workqueue,
 			   &sdata->u.sta.work);
 }
@@ -2355,7 +2355,7 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return;
 
 	if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
@@ -2407,7 +2407,7 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size
 	else
 		ifsta->flags &= ~IEEE80211_STA_SSID_SET;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+	if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
 	    !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
 		ifsta->ibss_join_req = jiffies;
 		ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
@@ -2482,8 +2482,8 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason
 	printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
 	       sdata->dev->name, reason);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 		return -EINVAL;
 
 	ieee80211_set_disassoc(sdata, ifsta, true, true, reason);
@@ -2497,7 +2497,7 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 	printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
 	       sdata->dev->name, reason);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return -EINVAL;
 
 	if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
@@ -2513,7 +2513,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
 	struct ieee80211_if_sta *ifsta;
 
-	if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		ifsta = &sdata->u.sta;
 		if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
 		    (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
@@ -2539,7 +2539,7 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 	case IEEE80211_NOTIFY_RE_ASSOC:
 		rcu_read_lock();
 		list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-			if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				continue;
 
 			ieee80211_sta_req_auth(sdata, &sdata->u.sta);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 33530b29c42..8c3dda5f00b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -295,7 +295,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		if (!netif_running(sdata->dev))
 			continue;
 
-		if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR)
+		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
 			continue;
 
 		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)
@@ -512,7 +512,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 
 	if (unlikely((ieee80211_is_data(hdr->frame_control) ||
 		      ieee80211_is_pspoll(hdr->frame_control)) &&
-		     rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+		     rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 		     (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
 		if ((!ieee80211_has_fromds(hdr->frame_control) &&
 		     !ieee80211_has_tods(hdr->frame_control) &&
@@ -724,14 +724,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	/* Update last_rx only for IBSS packets which are for the current
 	 * BSSID to avoid keeping the current IBSS network alive in cases where
 	 * other STAs are using different BSSID. */
-	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
-						IEEE80211_IF_TYPE_IBSS);
+						NL80211_IFTYPE_ADHOC);
 		if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
 			sta->last_rx = jiffies;
 	} else
 	if (!is_multicast_ether_addr(hdr->addr1) ||
-	    rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+	    rx->sdata->vif.type == NL80211_IFTYPE_STATION) {
 		/* Update last_rx only for unicast frames in order to prevent
 		 * the Probe Request frames (the only broadcast frames from a
 		 * STA in infrastructure mode) from keeping a connection alive.
@@ -751,8 +751,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->last_noise = rx->status->noise;
 
 	if (!ieee80211_has_morefrags(hdr->frame_control) &&
-	    (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
-	     rx->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) {
+	    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
+	     rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
 		/* Change STA power saving mode only in the end of a frame
 		 * exchange sequence */
 		if (test_sta_flags(sta, WLAN_STA_PS) &&
@@ -982,8 +982,8 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		   !(rx->flags & IEEE80211_RX_RA_MATCH)))
 		return RX_CONTINUE;
 
-	if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) &&
-	    (sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
+	if ((sdata->vif.type != NL80211_IFTYPE_AP) &&
+	    (sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
 		return RX_DROP_UNUSABLE;
 
 	skb = skb_dequeue(&rx->sta->tx_filtered);
@@ -1131,23 +1131,23 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 	switch (hdr->frame_control &
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case __constant_cpu_to_le16(IEEE80211_FCTL_TODS):
-		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
-			     sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
+		if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP &&
+			     sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
 			return -1;
 		break;
 	case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
-		if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS &&
-			     sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT))
+		if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS &&
+			     sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
 			return -1;
 		break;
 	case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS):
-		if (sdata->vif.type != IEEE80211_IF_TYPE_STA ||
+		if (sdata->vif.type != NL80211_IFTYPE_STATION ||
 		    (is_multicast_ether_addr(dst) &&
 		     !compare_ether_addr(src, dev->dev_addr)))
 			return -1;
 		break;
 	case __constant_cpu_to_le16(0):
-		if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			return -1;
 		break;
 	}
@@ -1221,8 +1221,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	skb = rx->skb;
 	xmit_skb = NULL;
 
-	if ((sdata->vif.type == IEEE80211_IF_TYPE_AP ||
-	     sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
+	if ((sdata->vif.type == NL80211_IFTYPE_AP ||
+	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
 	    !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
 	    (rx->flags & IEEE80211_RX_RA_MATCH)) {
 		if (is_multicast_ether_addr(ehdr->h_dest)) {
@@ -1536,8 +1536,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	 * FIXME: revisit this, I'm sure we should handle most
 	 *	  of these frames in other modes as well!
 	 */
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 		return RX_DROP_MONITOR;
 
 	switch (mgmt->u.action.category) {
@@ -1595,8 +1595,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 		return RX_DROP_MONITOR;
 
 	if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
@@ -1632,7 +1632,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 	if (!ieee80211_has_protected(hdr->frame_control))
 		goto ignore;
 
-	if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
+	if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) {
 		/*
 		 * APs with pairwise keys should never receive Michael MIC
 		 * errors for non-zero keyidx because these are reserved for
@@ -1702,7 +1702,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx)
 		if (!netif_running(sdata->dev))
 			continue;
 
-		if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR ||
+		if (sdata->vif.type != NL80211_IFTYPE_MONITOR ||
 		    !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES))
 			continue;
 
@@ -1801,7 +1801,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 	int multicast = is_multicast_ether_addr(hdr->addr1);
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		if (!bssid)
 			return 0;
 		if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) {
@@ -1816,7 +1816,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		if (!bssid)
 			return 0;
 		if (ieee80211_is_beacon(hdr->frame_control)) {
@@ -1837,7 +1837,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 						bssid, hdr->addr2,
 						BIT(rx->status->rate_idx));
 		break;
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (!multicast &&
 		    compare_ether_addr(sdata->dev->dev_addr,
 				       hdr->addr1) != 0) {
@@ -1847,8 +1847,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_AP:
 		if (!bssid) {
 			if (compare_ether_addr(sdata->dev->dev_addr,
 					       hdr->addr1))
@@ -1860,16 +1860,17 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		if (bssid || !ieee80211_is_data(hdr->frame_control))
 			return 0;
 		if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
 			return 0;
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		/* take everything */
 		break;
-	case IEEE80211_IF_TYPE_INVALID:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case __NL80211_IFTYPE_AFTER_LAST:
 		/* should never get here */
 		WARN_ON(1);
 		break;
@@ -1930,7 +1931,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		if (!netif_running(sdata->dev))
 			continue;
 
-		if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR)
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
 			continue;
 
 		bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5e719e7b720..8e6685e7ae8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -475,7 +475,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		/* Tell AP we're back */
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
 				ieee80211_send_nullfunc(local, sdata, 0);
 				netif_tx_wake_all_queues(sdata->dev);
@@ -539,7 +539,7 @@ void ieee80211_scan_work(struct work_struct *work)
 		chan = &sband->channels[local->scan_channel_idx];
 
 		if (chan->flags & IEEE80211_CHAN_DISABLED ||
-		    (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+		    (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
 		     chan->flags & IEEE80211_CHAN_NO_IBSS))
 			skip = 1;
 
@@ -638,7 +638,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
 				netif_tx_stop_all_queues(sdata->dev);
 				ieee80211_send_nullfunc(local, sdata, 1);
@@ -681,7 +681,7 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_sta *ifsta;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return ieee80211_start_scan(sdata, ssid, ssid_len);
 
 	/*
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3370b262563..31246d8e532 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -319,7 +319,7 @@ int sta_info_insert(struct sta_info *sta)
 
 	/* notify driver */
 	if (local->ops->sta_notify) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			sdata = container_of(sdata->bss,
 					     struct ieee80211_sub_if_data,
 					     u.ap);
@@ -456,7 +456,7 @@ static void __sta_info_unlink(struct sta_info **sta)
 	local->num_sta--;
 
 	if (local->ops->sta_notify) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			sdata = container_of(sdata->bss,
 					     struct ieee80211_sub_if_data,
 					     u.ap);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a523189f10c..f4bcc589d67 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -226,7 +226,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	    !ieee80211_is_probe_req(hdr->frame_control))
 		return TX_DROP;
 
-	if (tx->sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT)
+	if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
 		return TX_CONTINUE;
 
 	if (tx->flags & IEEE80211_TX_PS_BUFFERED)
@@ -236,7 +236,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 
 	if (likely(tx->flags & IEEE80211_TX_UNICAST)) {
 		if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
-			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
+			     tx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 			     ieee80211_is_data(hdr->frame_control))) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			DECLARE_MAC_BUF(mac);
@@ -250,7 +250,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	} else {
 		if (unlikely(ieee80211_is_data(hdr->frame_control) &&
 			     tx->local->num_sta == 0 &&
-			     tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) {
+			     tx->sdata->vif.type != NL80211_IFTYPE_ADHOC)) {
 			/*
 			 * No associated STAs - no need to send multicast
 			 * frames.
@@ -281,7 +281,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		struct ieee80211_if_ap *ap;
-		if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
+		if (sdata->vif.type != NL80211_IFTYPE_AP)
 			continue;
 		ap = &sdata->u.ap;
 		skb = skb_dequeue(&ap->ps_bc_buf);
@@ -979,7 +979,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	/* process and remove the injection radiotap header */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) {
+	if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) {
 		if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP)
 			return TX_DROP;
 
@@ -1457,8 +1457,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
 
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_AP:
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		/* DA BSSID SA */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -1466,7 +1466,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
 		hdrlen = 24;
 		break;
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
 		memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN);
@@ -1476,7 +1476,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		hdrlen = 30;
 		break;
 #ifdef CONFIG_MAC80211_MESH
-	case IEEE80211_IF_TYPE_MESH_POINT:
+	case NL80211_IFTYPE_MESH_POINT:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
 		/* RA TA DA SA */
 		memset(hdr.addr1, 0, ETH_ALEN);
@@ -1493,7 +1493,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		hdrlen = 30;
 		break;
 #endif
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
 		/* BSSID SA DA */
 		memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN);
@@ -1501,7 +1501,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		memcpy(hdr.addr3, skb->data, ETH_ALEN);
 		hdrlen = 24;
 		break;
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		/* DA SA BSSID */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
 		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
@@ -1812,7 +1812,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	sdata = vif_to_sdata(vif);
 	bdev = sdata->dev;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		ap = &sdata->u.ap;
 		beacon = rcu_dereference(ap->beacon);
 		if (ap && beacon) {
@@ -1854,7 +1854,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			num_beacons = &ap->num_beacons;
 		} else
 			goto out;
-	} else if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		struct ieee80211_hdr *hdr;
 		ifsta = &sdata->u.sta;
 
@@ -1999,7 +1999,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	rcu_read_lock();
 	beacon = rcu_dereference(bss->beacon);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || !beacon->head)
+	if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head)
 		goto out;
 
 	if (bss->dtim_count != 0)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d6aca91e612..6eb222369bc 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -43,7 +43,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
 
 
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
-			enum ieee80211_if_types type)
+			enum nl80211_iftype type)
 {
 	__le16 fc = hdr->frame_control;
 
@@ -77,10 +77,10 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 
 		if (ieee80211_is_back_req(fc)) {
 			switch (type) {
-			case IEEE80211_IF_TYPE_STA:
+			case NL80211_IFTYPE_STATION:
 				return hdr->addr2;
-			case IEEE80211_IF_TYPE_AP:
-			case IEEE80211_IF_TYPE_VLAN:
+			case NL80211_IFTYPE_AP:
+			case NL80211_IFTYPE_AP_VLAN:
 				return hdr->addr1;
 			default:
 				break; /* fall through to the return */
@@ -376,15 +376,16 @@ void ieee80211_iterate_active_interfaces(
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case IEEE80211_IF_TYPE_INVALID:
-		case IEEE80211_IF_TYPE_MNTR:
-		case IEEE80211_IF_TYPE_VLAN:
+		case __NL80211_IFTYPE_AFTER_LAST:
+		case NL80211_IFTYPE_UNSPECIFIED:
+		case NL80211_IFTYPE_MONITOR:
+		case NL80211_IFTYPE_AP_VLAN:
 			continue;
-		case IEEE80211_IF_TYPE_AP:
-		case IEEE80211_IF_TYPE_STA:
-		case IEEE80211_IF_TYPE_IBSS:
-		case IEEE80211_IF_TYPE_WDS:
-		case IEEE80211_IF_TYPE_MESH_POINT:
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_ADHOC:
+		case NL80211_IFTYPE_WDS:
+		case NL80211_IFTYPE_MESH_POINT:
 			break;
 		}
 		if (netif_running(sdata->dev))
@@ -409,15 +410,16 @@ void ieee80211_iterate_active_interfaces_atomic(
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case IEEE80211_IF_TYPE_INVALID:
-		case IEEE80211_IF_TYPE_MNTR:
-		case IEEE80211_IF_TYPE_VLAN:
+		case __NL80211_IFTYPE_AFTER_LAST:
+		case NL80211_IFTYPE_UNSPECIFIED:
+		case NL80211_IFTYPE_MONITOR:
+		case NL80211_IFTYPE_AP_VLAN:
 			continue;
-		case IEEE80211_IF_TYPE_AP:
-		case IEEE80211_IF_TYPE_STA:
-		case IEEE80211_IF_TYPE_IBSS:
-		case IEEE80211_IF_TYPE_WDS:
-		case IEEE80211_IF_TYPE_MESH_POINT:
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_ADHOC:
+		case NL80211_IFTYPE_WDS:
+		case NL80211_IFTYPE_MESH_POINT:
 			break;
 		}
 		if (netif_running(sdata->dev))
@@ -622,7 +624,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 	chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
 
 	if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) {
-		if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
 		    chan->flags & IEEE80211_CHAN_NO_IBSS) {
 			printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
 				"%d MHz\n", sdata->dev->name, chan->center_freq);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 77b68ed8b83..aef9707700f 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -122,8 +122,8 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 	if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
 		return -EOPNOTSUPP;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length);
 		if (ret)
 			return ret;
@@ -273,21 +273,21 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int type;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		return -EOPNOTSUPP;
 
 	switch (*mode) {
 	case IW_MODE_INFRA:
-		type = IEEE80211_IF_TYPE_STA;
+		type = NL80211_IFTYPE_STATION;
 		break;
 	case IW_MODE_ADHOC:
-		type = IEEE80211_IF_TYPE_IBSS;
+		type = NL80211_IFTYPE_ADHOC;
 		break;
 	case IW_MODE_REPEAT:
-		type = IEEE80211_IF_TYPE_WDS;
+		type = NL80211_IFTYPE_WDS;
 		break;
 	case IW_MODE_MONITOR:
-		type = IEEE80211_IF_TYPE_MNTR;
+		type = NL80211_IFTYPE_MONITOR;
 		break;
 	default:
 		return -EINVAL;
@@ -305,22 +305,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	switch (sdata->vif.type) {
-	case IEEE80211_IF_TYPE_AP:
+	case NL80211_IFTYPE_AP:
 		*mode = IW_MODE_MASTER;
 		break;
-	case IEEE80211_IF_TYPE_STA:
+	case NL80211_IFTYPE_STATION:
 		*mode = IW_MODE_INFRA;
 		break;
-	case IEEE80211_IF_TYPE_IBSS:
+	case NL80211_IFTYPE_ADHOC:
 		*mode = IW_MODE_ADHOC;
 		break;
-	case IEEE80211_IF_TYPE_MNTR:
+	case NL80211_IFTYPE_MONITOR:
 		*mode = IW_MODE_MONITOR;
 		break;
-	case IEEE80211_IF_TYPE_WDS:
+	case NL80211_IFTYPE_WDS:
 		*mode = IW_MODE_REPEAT;
 		break;
-	case IEEE80211_IF_TYPE_VLAN:
+	case NL80211_IFTYPE_AP_VLAN:
 		*mode = IW_MODE_SECOND;		/* FIXME */
 		break;
 	default:
@@ -336,13 +336,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
 
 	/* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
 	if (freq->e == 0) {
 		if (freq->m < 0) {
-			if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
+			if (sdata->vif.type == NL80211_IFTYPE_STATION)
 				sdata->u.sta.flags |=
 					IEEE80211_STA_AUTO_CHANNEL_SEL;
 			return 0;
@@ -386,8 +386,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		len--;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		int ret;
 		if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
 			if (len > IEEE80211_MAX_SSID_LEN)
@@ -407,7 +407,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		return 0;
 	}
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		memcpy(sdata->u.ap.ssid, ssid, len);
 		memset(sdata->u.ap.ssid + len, 0,
 		       IEEE80211_MAX_SSID_LEN - len);
@@ -426,8 +426,8 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 
 	struct ieee80211_sub_if_data *sdata;
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		int res = ieee80211_sta_get_ssid(sdata, ssid, &len);
 		if (res == 0) {
 			data->length = len;
@@ -437,7 +437,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 		return res;
 	}
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		len = sdata->u.ap.ssid_len;
 		if (len > IW_ESSID_MAX_SIZE)
 			len = IW_ESSID_MAX_SIZE;
@@ -457,8 +457,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		int ret;
 		if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
 			memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
@@ -477,7 +477,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 			return ret;
 		ieee80211_sta_req_auth(sdata, &sdata->u.sta);
 		return 0;
-	} else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
+	} else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
 		/*
 		 * If it is necessary to update the WDS peer address
 		 * while the interface is running, then we need to do
@@ -505,8 +505,8 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED ||
 		    sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) {
 			ap_addr->sa_family = ARPHRD_ETHER;
@@ -516,7 +516,7 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 			memset(&ap_addr->sa_data, 0, ETH_ALEN);
 			return 0;
 		}
-	} else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
+	} else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
 		ap_addr->sa_family = ARPHRD_ETHER;
 		memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
 		return 0;
@@ -538,10 +538,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_AP)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+	    sdata->vif.type != NL80211_IFTYPE_AP)
 		return -EOPNOTSUPP;
 
 	/* if SSID was specified explicitly then use that */
@@ -627,7 +627,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return -EOPNOTSUPP;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
@@ -858,8 +858,8 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
 	struct iw_mlme *mlme = (struct iw_mlme *) extra;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
-	    sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 		return -EINVAL;
 
 	switch (mlme->cmd) {
@@ -954,7 +954,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev,
 	erq->length = sdata->keys[idx]->conf.keylen;
 	erq->flags |= IW_ENCODE_ENABLED;
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 		switch (ifsta->auth_alg) {
 		case WLAN_AUTH_OPEN:
@@ -1028,7 +1028,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 		sdata->drop_unencrypted = !!data->value;
 		break;
 	case IW_AUTH_PRIVACY_INVOKED:
-		if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			ret = -EINVAL;
 		else {
 			sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
@@ -1043,8 +1043,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
 		}
 		break;
 	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-		    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
+		if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+		    sdata->vif.type == NL80211_IFTYPE_ADHOC)
 			sdata->u.sta.auth_algs = data->value;
 		else
 			ret = -EOPNOTSUPP;
@@ -1066,8 +1066,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
 
 	rcu_read_lock();
 
-	if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-	    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		sta = sta_info_get(local, sdata->u.sta.bssid);
 	if (!sta) {
 		wstats->discard.fragment = 0;
@@ -1097,8 +1097,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
 
 	switch (data->flags & IW_AUTH_INDEX) {
 	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
-		    sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
+		if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+		    sdata->vif.type == NL80211_IFTYPE_ADHOC)
 			data->value = sdata->u.sta.auth_algs;
 		else
 			ret = -EOPNOTSUPP;
-- 
cgit v1.2.3-70-g09d2


From 0d143fe1e2efc084fa730d2dfa22d0d1ca2ee5f1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:01:59 +0200
Subject: mac80211: move regular interface handling

Move the code to handle regular interfaces out of main.c and
into iface.c, keep only the master interface stuff in main.c.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |   2 +-
 net/mac80211/iface.c       | 534 ++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/main.c        | 565 ++-------------------------------------------
 3 files changed, 550 insertions(+), 551 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 80d88f5ff90..6bd6a6306da 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -891,6 +891,7 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
 			struct ieee80211_ht_bss_info *req_bss_cap);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
+void ieee80211_configure_filter(struct ieee80211_local *local);
 
 /* wireless extensions */
 extern const struct iw_handler_def ieee80211_iw_handler_def;
@@ -952,7 +953,6 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
 
 /* interface handling */
-void ieee80211_if_setup(struct net_device *dev);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 004fb23241d..f528962b13e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1,4 +1,6 @@
 /*
+ * Interface handling (except master interface)
+ *
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
@@ -17,7 +19,539 @@
 #include "sta_info.h"
 #include "debugfs_netdev.h"
 #include "mesh.h"
+#include "led.h"
+
+static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
+{
+	int meshhdrlen;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0;
+
+	/* FIX: what would be proper limits for MTU?
+	 * This interface uses 802.3 frames. */
+	if (new_mtu < 256 ||
+	    new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) {
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu);
+#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static inline int identical_mac_addr_allowed(int type1, int type2)
+{
+	return type1 == NL80211_IFTYPE_MONITOR ||
+		type2 == NL80211_IFTYPE_MONITOR ||
+		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) ||
+		(type1 == NL80211_IFTYPE_WDS &&
+			(type2 == NL80211_IFTYPE_WDS ||
+			 type2 == NL80211_IFTYPE_AP)) ||
+		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) ||
+		(type1 == NL80211_IFTYPE_AP_VLAN &&
+			(type2 == NL80211_IFTYPE_AP ||
+			 type2 == NL80211_IFTYPE_AP_VLAN));
+}
+
+static int ieee80211_open(struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata, *nsdata;
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct sta_info *sta;
+	struct ieee80211_if_init_conf conf;
+	u32 changed = 0;
+	int res;
+	bool need_hw_reconfig = 0;
+	u8 null_addr[ETH_ALEN] = {0};
+
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	/* fail early if user set an invalid address */
+	if (compare_ether_addr(dev->dev_addr, null_addr) &&
+	    !is_valid_ether_addr(dev->dev_addr))
+		return -EADDRNOTAVAIL;
+
+	/* we hold the RTNL here so can safely walk the list */
+	list_for_each_entry(nsdata, &local->interfaces, list) {
+		struct net_device *ndev = nsdata->dev;
+
+		if (ndev != dev && netif_running(ndev)) {
+			/*
+			 * Allow only a single IBSS interface to be up at any
+			 * time. This is restricted because beacon distribution
+			 * cannot work properly if both are in the same IBSS.
+			 *
+			 * To remove this restriction we'd have to disallow them
+			 * from setting the same SSID on different IBSS interfaces
+			 * belonging to the same hardware. Then, however, we're
+			 * faced with having to adopt two different TSF timers...
+			 */
+			if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
+			    nsdata->vif.type == NL80211_IFTYPE_ADHOC)
+				return -EBUSY;
+
+			/*
+			 * The remaining checks are only performed for interfaces
+			 * with the same MAC address.
+			 */
+			if (compare_ether_addr(dev->dev_addr, ndev->dev_addr))
+				continue;
+
+			/*
+			 * check whether it may have the same address
+			 */
+			if (!identical_mac_addr_allowed(sdata->vif.type,
+							nsdata->vif.type))
+				return -ENOTUNIQ;
+
+			/*
+			 * can only add VLANs to enabled APs
+			 */
+			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+			    nsdata->vif.type == NL80211_IFTYPE_AP)
+				sdata->bss = &nsdata->u.ap;
+		}
+	}
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_WDS:
+		if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
+			return -ENOLINK;
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		if (!sdata->bss)
+			return -ENOLINK;
+		list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
+		break;
+	case NL80211_IFTYPE_AP:
+		sdata->bss = &sdata->u.ap;
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		if (!ieee80211_vif_is_mesh(&sdata->vif))
+			break;
+		/* mesh ifaces must set allmulti to forward mcast traffic */
+		atomic_inc(&local->iff_allmultis);
+		break;
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_ADHOC:
+		/* no special treatment */
+		break;
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case __NL80211_IFTYPE_AFTER_LAST:
+		/* cannot happen */
+		WARN_ON(1);
+		break;
+	}
+
+	if (local->open_count == 0) {
+		res = 0;
+		if (local->ops->start)
+			res = local->ops->start(local_to_hw(local));
+		if (res)
+			goto err_del_bss;
+		need_hw_reconfig = 1;
+		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
+	}
 
+	/*
+	 * Check all interfaces and copy the hopefully now-present
+	 * MAC address to those that have the special null one.
+	 */
+	list_for_each_entry(nsdata, &local->interfaces, list) {
+		struct net_device *ndev = nsdata->dev;
+
+		/*
+		 * No need to check netif_running since we do not allow
+		 * it to start up with this invalid address.
+		 */
+		if (compare_ether_addr(null_addr, ndev->dev_addr) == 0)
+			memcpy(ndev->dev_addr,
+			       local->hw.wiphy->perm_addr,
+			       ETH_ALEN);
+	}
+
+	if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0)
+		memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr,
+		       ETH_ALEN);
+
+	/*
+	 * Validate the MAC address for this device.
+	 */
+	if (!is_valid_ether_addr(dev->dev_addr)) {
+		if (!local->open_count && local->ops->stop)
+			local->ops->stop(local_to_hw(local));
+		return -EADDRNOTAVAIL;
+	}
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP_VLAN:
+		/* no need to tell driver */
+		break;
+	case NL80211_IFTYPE_MONITOR:
+		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
+			local->cooked_mntrs++;
+			break;
+		}
+
+		/* must be before the call to ieee80211_configure_filter */
+		local->monitors++;
+		if (local->monitors == 1)
+			local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
+
+		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
+			local->fif_fcsfail++;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
+			local->fif_plcpfail++;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
+			local->fif_control++;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
+			local->fif_other_bss++;
+
+		netif_addr_lock_bh(local->mdev);
+		ieee80211_configure_filter(local);
+		netif_addr_unlock_bh(local->mdev);
+		break;
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+		sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
+		/* fall through */
+	default:
+		conf.vif = &sdata->vif;
+		conf.type = sdata->vif.type;
+		conf.mac_addr = dev->dev_addr;
+		res = local->ops->add_interface(local_to_hw(local), &conf);
+		if (res)
+			goto err_stop;
+
+		if (ieee80211_vif_is_mesh(&sdata->vif))
+			ieee80211_start_mesh(sdata);
+		changed |= ieee80211_reset_erp_info(sdata);
+		ieee80211_bss_info_change_notify(sdata, changed);
+		ieee80211_enable_keys(sdata);
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
+			netif_carrier_off(dev);
+		else
+			netif_carrier_on(dev);
+	}
+
+	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
+		/* Create STA entry for the WDS peer */
+		sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
+				     GFP_KERNEL);
+		if (!sta) {
+			res = -ENOMEM;
+			goto err_del_interface;
+		}
+
+		/* no locking required since STA is not live yet */
+		sta->flags |= WLAN_STA_AUTHORIZED;
+
+		res = sta_info_insert(sta);
+		if (res) {
+			/* STA has been freed */
+			goto err_del_interface;
+		}
+	}
+
+	if (local->open_count == 0) {
+		res = dev_open(local->mdev);
+		WARN_ON(res);
+		if (res)
+			goto err_del_interface;
+		tasklet_enable(&local->tx_pending_tasklet);
+		tasklet_enable(&local->tasklet);
+	}
+
+	/*
+	 * set_multicast_list will be invoked by the networking core
+	 * which will check whether any increments here were done in
+	 * error and sync them down to the hardware as filter flags.
+	 */
+	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
+		atomic_inc(&local->iff_allmultis);
+
+	if (sdata->flags & IEEE80211_SDATA_PROMISC)
+		atomic_inc(&local->iff_promiscs);
+
+	local->open_count++;
+	if (need_hw_reconfig) {
+		ieee80211_hw_config(local);
+		/*
+		 * set default queue parameters so drivers don't
+		 * need to initialise the hardware if the hardware
+		 * doesn't start up with sane defaults
+		 */
+		ieee80211_set_wmm_default(sdata);
+	}
+
+	/*
+	 * ieee80211_sta_work is disabled while network interface
+	 * is down. Therefore, some configuration changes may not
+	 * yet be effective. Trigger execution of ieee80211_sta_work
+	 * to fix this.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
+		queue_work(local->hw.workqueue, &ifsta->work);
+	}
+
+	netif_tx_start_all_queues(dev);
+
+	return 0;
+ err_del_interface:
+	local->ops->remove_interface(local_to_hw(local), &conf);
+ err_stop:
+	if (!local->open_count && local->ops->stop)
+		local->ops->stop(local_to_hw(local));
+ err_del_bss:
+	sdata->bss = NULL;
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		list_del(&sdata->u.vlan.list);
+	return res;
+}
+
+static int ieee80211_stop(struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_init_conf conf;
+	struct sta_info *sta;
+
+	/*
+	 * Stop TX on this interface first.
+	 */
+	netif_tx_stop_all_queues(dev);
+
+	/*
+	 * Now delete all active aggregation sessions.
+	 */
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sta->sdata == sdata)
+			ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
+	}
+
+	rcu_read_unlock();
+
+	/*
+	 * Remove all stations associated with this interface.
+	 *
+	 * This must be done before calling ops->remove_interface()
+	 * because otherwise we can later invoke ops->sta_notify()
+	 * whenever the STAs are removed, and that invalidates driver
+	 * assumptions about always getting a vif pointer that is valid
+	 * (because if we remove a STA after ops->remove_interface()
+	 * the driver will have removed the vif info already!)
+	 *
+	 * We could relax this and only unlink the stations from the
+	 * hash table and list but keep them on a per-sdata list that
+	 * will be inserted back again when the interface is brought
+	 * up again, but I don't currently see a use case for that,
+	 * except with WDS which gets a STA entry created when it is
+	 * brought up.
+	 */
+	sta_info_flush(local, sdata);
+
+	/*
+	 * Don't count this interface for promisc/allmulti while it
+	 * is down. dev_mc_unsync() will invoke set_multicast_list
+	 * on the master interface which will sync these down to the
+	 * hardware as filter flags.
+	 */
+	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
+		atomic_dec(&local->iff_allmultis);
+
+	if (sdata->flags & IEEE80211_SDATA_PROMISC)
+		atomic_dec(&local->iff_promiscs);
+
+	dev_mc_unsync(local->mdev, dev);
+
+	/* APs need special treatment */
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
+		struct ieee80211_sub_if_data *vlan, *tmp;
+		struct beacon_data *old_beacon = sdata->u.ap.beacon;
+
+		/* remove beacon */
+		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+		synchronize_rcu();
+		kfree(old_beacon);
+
+		/* down all dependent devices, that is VLANs */
+		list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
+					 u.vlan.list)
+			dev_close(vlan->dev);
+		WARN_ON(!list_empty(&sdata->u.ap.vlans));
+	}
+
+	local->open_count--;
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP_VLAN:
+		list_del(&sdata->u.vlan.list);
+		/* no need to tell driver */
+		break;
+	case NL80211_IFTYPE_MONITOR:
+		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
+			local->cooked_mntrs--;
+			break;
+		}
+
+		local->monitors--;
+		if (local->monitors == 0)
+			local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
+
+		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
+			local->fif_fcsfail--;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
+			local->fif_plcpfail--;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
+			local->fif_control--;
+		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
+			local->fif_other_bss--;
+
+		netif_addr_lock_bh(local->mdev);
+		ieee80211_configure_filter(local);
+		netif_addr_unlock_bh(local->mdev);
+		break;
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
+		memset(sdata->u.sta.bssid, 0, ETH_ALEN);
+		del_timer_sync(&sdata->u.sta.timer);
+		/*
+		 * If the timer fired while we waited for it, it will have
+		 * requeued the work. Now the work will be running again
+		 * but will not rearm the timer again because it checks
+		 * whether the interface is running, which, at this point,
+		 * it no longer is.
+		 */
+		cancel_work_sync(&sdata->u.sta.work);
+		/*
+		 * When we get here, the interface is marked down.
+		 * Call synchronize_rcu() to wait for the RX path
+		 * should it be using the interface and enqueuing
+		 * frames at this very time on another CPU.
+		 */
+		synchronize_rcu();
+		skb_queue_purge(&sdata->u.sta.skb_queue);
+
+		sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
+		kfree(sdata->u.sta.extra_ie);
+		sdata->u.sta.extra_ie = NULL;
+		sdata->u.sta.extra_ie_len = 0;
+		/* fall through */
+	case NL80211_IFTYPE_MESH_POINT:
+		if (ieee80211_vif_is_mesh(&sdata->vif)) {
+			/* allmulti is always set on mesh ifaces */
+			atomic_dec(&local->iff_allmultis);
+			ieee80211_stop_mesh(sdata);
+		}
+		/* fall through */
+	default:
+		if (local->scan_sdata == sdata) {
+			if (!local->ops->hw_scan)
+				cancel_delayed_work_sync(&local->scan_work);
+			/*
+			 * The software scan can no longer run now, so we can
+			 * clear out the scan_sdata reference. However, the
+			 * hardware scan may still be running. The complete
+			 * function must be prepared to handle a NULL value.
+			 */
+			local->scan_sdata = NULL;
+			/*
+			 * The memory barrier guarantees that another CPU
+			 * that is hardware-scanning will now see the fact
+			 * that this interface is gone.
+			 */
+			smp_mb();
+			/*
+			 * If software scanning, complete the scan but since
+			 * the scan_sdata is NULL already don't send out a
+			 * scan event to userspace -- the scan is incomplete.
+			 */
+			if (local->sw_scanning)
+				ieee80211_scan_completed(&local->hw);
+		}
+
+		conf.vif = &sdata->vif;
+		conf.type = sdata->vif.type;
+		conf.mac_addr = dev->dev_addr;
+		/* disable all keys for as long as this netdev is down */
+		ieee80211_disable_keys(sdata);
+		local->ops->remove_interface(local_to_hw(local), &conf);
+	}
+
+	sdata->bss = NULL;
+
+	if (local->open_count == 0) {
+		if (netif_running(local->mdev))
+			dev_close(local->mdev);
+
+		if (local->ops->stop)
+			local->ops->stop(local_to_hw(local));
+
+		ieee80211_led_radio(local, 0);
+
+		flush_workqueue(local->hw.workqueue);
+
+		tasklet_disable(&local->tx_pending_tasklet);
+		tasklet_disable(&local->tasklet);
+	}
+
+	return 0;
+}
+
+static void ieee80211_set_multicast_list(struct net_device *dev)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	int allmulti, promisc, sdata_allmulti, sdata_promisc;
+
+	allmulti = !!(dev->flags & IFF_ALLMULTI);
+	promisc = !!(dev->flags & IFF_PROMISC);
+	sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
+	sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
+
+	if (allmulti != sdata_allmulti) {
+		if (dev->flags & IFF_ALLMULTI)
+			atomic_inc(&local->iff_allmultis);
+		else
+			atomic_dec(&local->iff_allmultis);
+		sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
+	}
+
+	if (promisc != sdata_promisc) {
+		if (dev->flags & IFF_PROMISC)
+			atomic_inc(&local->iff_promiscs);
+		else
+			atomic_dec(&local->iff_promiscs);
+		sdata->flags ^= IEEE80211_SDATA_PROMISC;
+	}
+
+	dev_mc_sync(local->mdev, dev);
+}
+
+static void ieee80211_if_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->hard_start_xmit = ieee80211_subif_start_xmit;
+	dev->wireless_handlers = &ieee80211_iw_handler_def;
+	dev->set_multicast_list = ieee80211_set_multicast_list;
+	dev->change_mtu = ieee80211_change_mtu;
+	dev->open = ieee80211_open;
+	dev->stop = ieee80211_stop;
+	dev->destructor = free_netdev;
+	/* we will validate the address ourselves in ->open */
+	dev->validate_addr = NULL;
+}
 /*
  * Called when the netdev is removed or, by the code below, before
  * the interface type changes.
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 584a75bd6cf..c532043c1a1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -45,16 +45,9 @@ struct ieee80211_tx_status_rtap_hdr {
 	u8 data_retries;
 } __attribute__ ((packed));
 
-/* common interface routines */
-
-static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr)
-{
-	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
-	return ETH_ALEN;
-}
 
 /* must be called under mdev tx lock */
-static void ieee80211_configure_filter(struct ieee80211_local *local)
+void ieee80211_configure_filter(struct ieee80211_local *local)
 {
 	unsigned int changed_flags;
 	unsigned int new_flags = 0;
@@ -97,6 +90,20 @@ static void ieee80211_configure_filter(struct ieee80211_local *local)
 
 /* master interface */
 
+static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr)
+{
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
+	return ETH_ALEN;
+}
+
+static const struct header_ops ieee80211_header_ops = {
+	.create		= eth_header,
+	.parse		= header_parse_80211,
+	.rebuild	= eth_rebuild_header,
+	.cache		= eth_header_cache,
+	.cache_update	= eth_header_cache_update,
+};
+
 static int ieee80211_master_open(struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
@@ -139,548 +146,6 @@ static void ieee80211_master_set_multicast_list(struct net_device *dev)
 	ieee80211_configure_filter(local);
 }
 
-/* regular interfaces */
-
-static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
-{
-	int meshhdrlen;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0;
-
-	/* FIX: what would be proper limits for MTU?
-	 * This interface uses 802.3 frames. */
-	if (new_mtu < 256 ||
-	    new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) {
-		return -EINVAL;
-	}
-
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-	printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu);
-#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
-	dev->mtu = new_mtu;
-	return 0;
-}
-
-static inline int identical_mac_addr_allowed(int type1, int type2)
-{
-	return type1 == NL80211_IFTYPE_MONITOR ||
-		type2 == NL80211_IFTYPE_MONITOR ||
-		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) ||
-		(type1 == NL80211_IFTYPE_WDS &&
-			(type2 == NL80211_IFTYPE_WDS ||
-			 type2 == NL80211_IFTYPE_AP)) ||
-		(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) ||
-		(type1 == NL80211_IFTYPE_AP_VLAN &&
-			(type2 == NL80211_IFTYPE_AP ||
-			 type2 == NL80211_IFTYPE_AP_VLAN));
-}
-
-static int ieee80211_open(struct net_device *dev)
-{
-	struct ieee80211_sub_if_data *sdata, *nsdata;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct sta_info *sta;
-	struct ieee80211_if_init_conf conf;
-	u32 changed = 0;
-	int res;
-	bool need_hw_reconfig = 0;
-	u8 null_addr[ETH_ALEN] = {0};
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	/* fail early if user set an invalid address */
-	if (compare_ether_addr(dev->dev_addr, null_addr) &&
-	    !is_valid_ether_addr(dev->dev_addr))
-		return -EADDRNOTAVAIL;
-
-	/* we hold the RTNL here so can safely walk the list */
-	list_for_each_entry(nsdata, &local->interfaces, list) {
-		struct net_device *ndev = nsdata->dev;
-
-		if (ndev != dev && netif_running(ndev)) {
-			/*
-			 * Allow only a single IBSS interface to be up at any
-			 * time. This is restricted because beacon distribution
-			 * cannot work properly if both are in the same IBSS.
-			 *
-			 * To remove this restriction we'd have to disallow them
-			 * from setting the same SSID on different IBSS interfaces
-			 * belonging to the same hardware. Then, however, we're
-			 * faced with having to adopt two different TSF timers...
-			 */
-			if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
-			    nsdata->vif.type == NL80211_IFTYPE_ADHOC)
-				return -EBUSY;
-
-			/*
-			 * The remaining checks are only performed for interfaces
-			 * with the same MAC address.
-			 */
-			if (compare_ether_addr(dev->dev_addr, ndev->dev_addr))
-				continue;
-
-			/*
-			 * check whether it may have the same address
-			 */
-			if (!identical_mac_addr_allowed(sdata->vif.type,
-							nsdata->vif.type))
-				return -ENOTUNIQ;
-
-			/*
-			 * can only add VLANs to enabled APs
-			 */
-			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-			    nsdata->vif.type == NL80211_IFTYPE_AP)
-				sdata->bss = &nsdata->u.ap;
-		}
-	}
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_WDS:
-		if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
-			return -ENOLINK;
-		break;
-	case NL80211_IFTYPE_AP_VLAN:
-		if (!sdata->bss)
-			return -ENOLINK;
-		list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
-		break;
-	case NL80211_IFTYPE_AP:
-		sdata->bss = &sdata->u.ap;
-		break;
-	case NL80211_IFTYPE_MESH_POINT:
-		if (!ieee80211_vif_is_mesh(&sdata->vif))
-			break;
-		/* mesh ifaces must set allmulti to forward mcast traffic */
-		atomic_inc(&local->iff_allmultis);
-		break;
-	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_MONITOR:
-	case NL80211_IFTYPE_ADHOC:
-		/* no special treatment */
-		break;
-	case NL80211_IFTYPE_UNSPECIFIED:
-	case __NL80211_IFTYPE_AFTER_LAST:
-		/* cannot happen */
-		WARN_ON(1);
-		break;
-	}
-
-	if (local->open_count == 0) {
-		res = 0;
-		if (local->ops->start)
-			res = local->ops->start(local_to_hw(local));
-		if (res)
-			goto err_del_bss;
-		need_hw_reconfig = 1;
-		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
-	}
-
-	/*
-	 * Check all interfaces and copy the hopefully now-present
-	 * MAC address to those that have the special null one.
-	 */
-	list_for_each_entry(nsdata, &local->interfaces, list) {
-		struct net_device *ndev = nsdata->dev;
-
-		/*
-		 * No need to check netif_running since we do not allow
-		 * it to start up with this invalid address.
-		 */
-		if (compare_ether_addr(null_addr, ndev->dev_addr) == 0)
-			memcpy(ndev->dev_addr,
-			       local->hw.wiphy->perm_addr,
-			       ETH_ALEN);
-	}
-
-	if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0)
-		memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr,
-		       ETH_ALEN);
-
-	/*
-	 * Validate the MAC address for this device.
-	 */
-	if (!is_valid_ether_addr(dev->dev_addr)) {
-		if (!local->open_count && local->ops->stop)
-			local->ops->stop(local_to_hw(local));
-		return -EADDRNOTAVAIL;
-	}
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP_VLAN:
-		/* no need to tell driver */
-		break;
-	case NL80211_IFTYPE_MONITOR:
-		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
-			local->cooked_mntrs++;
-			break;
-		}
-
-		/* must be before the call to ieee80211_configure_filter */
-		local->monitors++;
-		if (local->monitors == 1)
-			local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
-
-		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
-			local->fif_fcsfail++;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
-			local->fif_plcpfail++;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
-			local->fif_control++;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
-			local->fif_other_bss++;
-
-		netif_addr_lock_bh(local->mdev);
-		ieee80211_configure_filter(local);
-		netif_addr_unlock_bh(local->mdev);
-		break;
-	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_ADHOC:
-		sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
-		/* fall through */
-	default:
-		conf.vif = &sdata->vif;
-		conf.type = sdata->vif.type;
-		conf.mac_addr = dev->dev_addr;
-		res = local->ops->add_interface(local_to_hw(local), &conf);
-		if (res)
-			goto err_stop;
-
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_start_mesh(sdata);
-		changed |= ieee80211_reset_erp_info(sdata);
-		ieee80211_bss_info_change_notify(sdata, changed);
-		ieee80211_enable_keys(sdata);
-
-		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-		    !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
-			netif_carrier_off(dev);
-		else
-			netif_carrier_on(dev);
-	}
-
-	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
-		/* Create STA entry for the WDS peer */
-		sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
-				     GFP_KERNEL);
-		if (!sta) {
-			res = -ENOMEM;
-			goto err_del_interface;
-		}
-
-		/* no locking required since STA is not live yet */
-		sta->flags |= WLAN_STA_AUTHORIZED;
-
-		res = sta_info_insert(sta);
-		if (res) {
-			/* STA has been freed */
-			goto err_del_interface;
-		}
-	}
-
-	if (local->open_count == 0) {
-		res = dev_open(local->mdev);
-		WARN_ON(res);
-		if (res)
-			goto err_del_interface;
-		tasklet_enable(&local->tx_pending_tasklet);
-		tasklet_enable(&local->tasklet);
-	}
-
-	/*
-	 * set_multicast_list will be invoked by the networking core
-	 * which will check whether any increments here were done in
-	 * error and sync them down to the hardware as filter flags.
-	 */
-	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
-		atomic_inc(&local->iff_allmultis);
-
-	if (sdata->flags & IEEE80211_SDATA_PROMISC)
-		atomic_inc(&local->iff_promiscs);
-
-	local->open_count++;
-	if (need_hw_reconfig) {
-		ieee80211_hw_config(local);
-		/*
-		 * set default queue parameters so drivers don't
-		 * need to initialise the hardware if the hardware
-		 * doesn't start up with sane defaults
-		 */
-		ieee80211_set_wmm_default(sdata);
-	}
-
-	/*
-	 * ieee80211_sta_work is disabled while network interface
-	 * is down. Therefore, some configuration changes may not
-	 * yet be effective. Trigger execution of ieee80211_sta_work
-	 * to fix this.
-	 */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
-	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
-		struct ieee80211_if_sta *ifsta = &sdata->u.sta;
-		queue_work(local->hw.workqueue, &ifsta->work);
-	}
-
-	netif_tx_start_all_queues(dev);
-
-	return 0;
- err_del_interface:
-	local->ops->remove_interface(local_to_hw(local), &conf);
- err_stop:
-	if (!local->open_count && local->ops->stop)
-		local->ops->stop(local_to_hw(local));
- err_del_bss:
-	sdata->bss = NULL;
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		list_del(&sdata->u.vlan.list);
-	return res;
-}
-
-static int ieee80211_stop(struct net_device *dev)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_init_conf conf;
-	struct sta_info *sta;
-
-	/*
-	 * Stop TX on this interface first.
-	 */
-	netif_tx_stop_all_queues(dev);
-
-	/*
-	 * Now delete all active aggregation sessions.
-	 */
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(sta, &local->sta_list, list) {
-		if (sta->sdata == sdata)
-			ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
-	}
-
-	rcu_read_unlock();
-
-	/*
-	 * Remove all stations associated with this interface.
-	 *
-	 * This must be done before calling ops->remove_interface()
-	 * because otherwise we can later invoke ops->sta_notify()
-	 * whenever the STAs are removed, and that invalidates driver
-	 * assumptions about always getting a vif pointer that is valid
-	 * (because if we remove a STA after ops->remove_interface()
-	 * the driver will have removed the vif info already!)
-	 *
-	 * We could relax this and only unlink the stations from the
-	 * hash table and list but keep them on a per-sdata list that
-	 * will be inserted back again when the interface is brought
-	 * up again, but I don't currently see a use case for that,
-	 * except with WDS which gets a STA entry created when it is
-	 * brought up.
-	 */
-	sta_info_flush(local, sdata);
-
-	/*
-	 * Don't count this interface for promisc/allmulti while it
-	 * is down. dev_mc_unsync() will invoke set_multicast_list
-	 * on the master interface which will sync these down to the
-	 * hardware as filter flags.
-	 */
-	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
-		atomic_dec(&local->iff_allmultis);
-
-	if (sdata->flags & IEEE80211_SDATA_PROMISC)
-		atomic_dec(&local->iff_promiscs);
-
-	dev_mc_unsync(local->mdev, dev);
-
-	/* APs need special treatment */
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		struct ieee80211_sub_if_data *vlan, *tmp;
-		struct beacon_data *old_beacon = sdata->u.ap.beacon;
-
-		/* remove beacon */
-		rcu_assign_pointer(sdata->u.ap.beacon, NULL);
-		synchronize_rcu();
-		kfree(old_beacon);
-
-		/* down all dependent devices, that is VLANs */
-		list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
-					 u.vlan.list)
-			dev_close(vlan->dev);
-		WARN_ON(!list_empty(&sdata->u.ap.vlans));
-	}
-
-	local->open_count--;
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP_VLAN:
-		list_del(&sdata->u.vlan.list);
-		/* no need to tell driver */
-		break;
-	case NL80211_IFTYPE_MONITOR:
-		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
-			local->cooked_mntrs--;
-			break;
-		}
-
-		local->monitors--;
-		if (local->monitors == 0)
-			local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
-
-		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
-			local->fif_fcsfail--;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
-			local->fif_plcpfail--;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
-			local->fif_control--;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
-			local->fif_other_bss--;
-
-		netif_addr_lock_bh(local->mdev);
-		ieee80211_configure_filter(local);
-		netif_addr_unlock_bh(local->mdev);
-		break;
-	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_ADHOC:
-		sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED;
-		memset(sdata->u.sta.bssid, 0, ETH_ALEN);
-		del_timer_sync(&sdata->u.sta.timer);
-		/*
-		 * If the timer fired while we waited for it, it will have
-		 * requeued the work. Now the work will be running again
-		 * but will not rearm the timer again because it checks
-		 * whether the interface is running, which, at this point,
-		 * it no longer is.
-		 */
-		cancel_work_sync(&sdata->u.sta.work);
-		/*
-		 * When we get here, the interface is marked down.
-		 * Call synchronize_rcu() to wait for the RX path
-		 * should it be using the interface and enqueuing
-		 * frames at this very time on another CPU.
-		 */
-		synchronize_rcu();
-		skb_queue_purge(&sdata->u.sta.skb_queue);
-
-		sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
-		kfree(sdata->u.sta.extra_ie);
-		sdata->u.sta.extra_ie = NULL;
-		sdata->u.sta.extra_ie_len = 0;
-		/* fall through */
-	case NL80211_IFTYPE_MESH_POINT:
-		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			/* allmulti is always set on mesh ifaces */
-			atomic_dec(&local->iff_allmultis);
-			ieee80211_stop_mesh(sdata);
-		}
-		/* fall through */
-	default:
-		if (local->scan_sdata == sdata) {
-			if (!local->ops->hw_scan)
-				cancel_delayed_work_sync(&local->scan_work);
-			/*
-			 * The software scan can no longer run now, so we can
-			 * clear out the scan_sdata reference. However, the
-			 * hardware scan may still be running. The complete
-			 * function must be prepared to handle a NULL value.
-			 */
-			local->scan_sdata = NULL;
-			/*
-			 * The memory barrier guarantees that another CPU
-			 * that is hardware-scanning will now see the fact
-			 * that this interface is gone.
-			 */
-			smp_mb();
-			/*
-			 * If software scanning, complete the scan but since
-			 * the scan_sdata is NULL already don't send out a
-			 * scan event to userspace -- the scan is incomplete.
-			 */
-			if (local->sw_scanning)
-				ieee80211_scan_completed(&local->hw);
-		}
-
-		conf.vif = &sdata->vif;
-		conf.type = sdata->vif.type;
-		conf.mac_addr = dev->dev_addr;
-		/* disable all keys for as long as this netdev is down */
-		ieee80211_disable_keys(sdata);
-		local->ops->remove_interface(local_to_hw(local), &conf);
-	}
-
-	sdata->bss = NULL;
-
-	if (local->open_count == 0) {
-		if (netif_running(local->mdev))
-			dev_close(local->mdev);
-
-		if (local->ops->stop)
-			local->ops->stop(local_to_hw(local));
-
-		ieee80211_led_radio(local, 0);
-
-		flush_workqueue(local->hw.workqueue);
-
-		tasklet_disable(&local->tx_pending_tasklet);
-		tasklet_disable(&local->tasklet);
-	}
-
-	return 0;
-}
-
-static void ieee80211_set_multicast_list(struct net_device *dev)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	int allmulti, promisc, sdata_allmulti, sdata_promisc;
-
-	allmulti = !!(dev->flags & IFF_ALLMULTI);
-	promisc = !!(dev->flags & IFF_PROMISC);
-	sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
-	sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
-
-	if (allmulti != sdata_allmulti) {
-		if (dev->flags & IFF_ALLMULTI)
-			atomic_inc(&local->iff_allmultis);
-		else
-			atomic_dec(&local->iff_allmultis);
-		sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
-	}
-
-	if (promisc != sdata_promisc) {
-		if (dev->flags & IFF_PROMISC)
-			atomic_inc(&local->iff_promiscs);
-		else
-			atomic_dec(&local->iff_promiscs);
-		sdata->flags ^= IEEE80211_SDATA_PROMISC;
-	}
-
-	dev_mc_sync(local->mdev, dev);
-}
-
-static const struct header_ops ieee80211_header_ops = {
-	.create		= eth_header,
-	.parse		= header_parse_80211,
-	.rebuild	= eth_rebuild_header,
-	.cache		= eth_header_cache,
-	.cache_update	= eth_header_cache_update,
-};
-
-void ieee80211_if_setup(struct net_device *dev)
-{
-	ether_setup(dev);
-	dev->hard_start_xmit = ieee80211_subif_start_xmit;
-	dev->wireless_handlers = &ieee80211_iw_handler_def;
-	dev->set_multicast_list = ieee80211_set_multicast_list;
-	dev->change_mtu = ieee80211_change_mtu;
-	dev->open = ieee80211_open;
-	dev->stop = ieee80211_stop;
-	dev->destructor = free_netdev;
-	/* we will validate the address ourselves in ->open */
-	dev->validate_addr = NULL;
-}
-
 /* everything else */
 
 int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
-- 
cgit v1.2.3-70-g09d2


From 7a725f73403e874ec52c58741e9b98cd604dbd03 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:02:00 +0200
Subject: mac80211: warn on some invalid vlan operations

These should never happen, but better warn about them than
crashing a driver, the fact that they never happen is rather
subtle throughout mac80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c532043c1a1..dd838b725af 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -156,6 +156,9 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 	if (WARN_ON(!netif_running(sdata->dev)))
 		return 0;
 
+	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
+		return -EINVAL;
+
 	if (!local->ops->config_interface)
 		return 0;
 
@@ -321,6 +324,9 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 
+	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
+		return;
+
 	if (!changed)
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From 17741cdc264e4d768167766a252210e201c1519a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 00:02:02 +0200
Subject: mac80211: share STA information with driver

This patch changes mac80211 to share some more data about
stations with drivers. Should help iwlwifi and ath9k when
 they get around to updating, and might also help with
implementing rate control algorithms without internals.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Sujith Manoharan <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c         | 30 ++++++++--------
 drivers/net/wireless/b43/main.c           |  5 +--
 drivers/net/wireless/b43legacy/main.c     |  2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 12 +++----
 drivers/net/wireless/iwlwifi/iwl-agn.c    | 12 +++----
 drivers/net/wireless/mac80211_hwsim.c     |  3 +-
 include/net/mac80211.h                    | 53 ++++++++++++++++++++++++---
 net/mac80211/cfg.c                        | 12 +++----
 net/mac80211/debugfs_key.c                |  3 +-
 net/mac80211/debugfs_sta.c                |  6 ++--
 net/mac80211/ht.c                         | 22 ++++++------
 net/mac80211/iface.c                      |  3 +-
 net/mac80211/key.c                        |  2 +-
 net/mac80211/mesh_hwmp.c                  |  8 ++---
 net/mac80211/mesh_plink.c                 | 44 +++++++++++------------
 net/mac80211/mlme.c                       |  5 +--
 net/mac80211/rx.c                         | 12 +++----
 net/mac80211/sta_info.c                   | 59 +++++++++++++++++++------------
 net/mac80211/sta_info.h                   |  7 ++--
 net/mac80211/tkip.c                       |  2 +-
 net/mac80211/tx.c                         | 10 +++---
 net/mac80211/wme.c                        |  2 +-
 net/mac80211/wpa.c                        |  2 +-
 23 files changed, 189 insertions(+), 127 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 2a6e089062f..1ba18006f47 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1405,7 +1405,7 @@ static void ath9k_configure_filter(struct ieee80211_hw *hw,
 static void ath9k_sta_notify(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif,
 			     enum sta_notify_cmd cmd,
-			     const u8 *addr)
+			     struct ieee80211_sta *sta)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_node *an;
@@ -1413,19 +1413,18 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw,
 	DECLARE_MAC_BUF(mac);
 
 	spin_lock_irqsave(&sc->node_lock, flags);
-	an = ath_node_find(sc, (u8 *) addr);
+	an = ath_node_find(sc, sta->addr);
 	spin_unlock_irqrestore(&sc->node_lock, flags);
 
 	switch (cmd) {
 	case STA_NOTIFY_ADD:
 		spin_lock_irqsave(&sc->node_lock, flags);
 		if (!an) {
-			ath_node_attach(sc, (u8 *)addr, 0);
+			ath_node_attach(sc, sta->addr, 0);
 			DPRINTF(sc, ATH_DBG_CONFIG, "%s: Attach a node: %s\n",
-				__func__,
-				print_mac(mac, addr));
+				__func__, print_mac(mac, sta->addr));
 		} else {
-			ath_node_get(sc, (u8 *)addr);
+			ath_node_get(sc, sta->addr);
 		}
 		spin_unlock_irqrestore(&sc->node_lock, flags);
 		break;
@@ -1438,7 +1437,7 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw,
 			ath_node_put(sc, an, ATH9K_BH_STATUS_INTACT);
 			DPRINTF(sc, ATH_DBG_CONFIG, "%s: Put a node: %s\n",
 				__func__,
-				print_mac(mac, addr));
+				print_mac(mac, sta->addr));
 		}
 		break;
 	default:
@@ -1581,45 +1580,44 @@ static void ath9k_reset_tsf(struct ieee80211_hw *hw)
 
 static int ath9k_ampdu_action(struct ieee80211_hw *hw,
 		       enum ieee80211_ampdu_mlme_action action,
-		       const u8 *addr,
-		       u16 tid,
-		       u16 *ssn)
+		       struct ieee80211_sta *sta,
+		       u16 tid, u16 *ssn)
 {
 	struct ath_softc *sc = hw->priv;
 	int ret = 0;
 
 	switch (action) {
 	case IEEE80211_AMPDU_RX_START:
-		ret = ath_rx_aggr_start(sc, addr, tid, ssn);
+		ret = ath_rx_aggr_start(sc, sta->addr, tid, ssn);
 		if (ret < 0)
 			DPRINTF(sc, ATH_DBG_FATAL,
 				"%s: Unable to start RX aggregation\n",
 				__func__);
 		break;
 	case IEEE80211_AMPDU_RX_STOP:
-		ret = ath_rx_aggr_stop(sc, addr, tid);
+		ret = ath_rx_aggr_stop(sc, sta->addr, tid);
 		if (ret < 0)
 			DPRINTF(sc, ATH_DBG_FATAL,
 				"%s: Unable to stop RX aggregation\n",
 				__func__);
 		break;
 	case IEEE80211_AMPDU_TX_START:
-		ret = ath_tx_aggr_start(sc, addr, tid, ssn);
+		ret = ath_tx_aggr_start(sc, sta->addr, tid, ssn);
 		if (ret < 0)
 			DPRINTF(sc, ATH_DBG_FATAL,
 				"%s: Unable to start TX aggregation\n",
 				__func__);
 		else
-			ieee80211_start_tx_ba_cb_irqsafe(hw, (u8 *)addr, tid);
+			ieee80211_start_tx_ba_cb_irqsafe(hw, sta->addr, tid);
 		break;
 	case IEEE80211_AMPDU_TX_STOP:
-		ret = ath_tx_aggr_stop(sc, addr, tid);
+		ret = ath_tx_aggr_stop(sc, sta->addr, tid);
 		if (ret < 0)
 			DPRINTF(sc, ATH_DBG_FATAL,
 				"%s: Unable to stop TX aggregation\n",
 				__func__);
 
-		ieee80211_stop_tx_ba_cb_irqsafe(hw, (u8 *)addr, tid);
+		ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid);
 		break;
 	default:
 		DPRINTF(sc, ATH_DBG_FATAL,
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index df7a1e7f4a5..0f628a29d83 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4234,7 +4234,8 @@ out_unlock:
 	return err;
 }
 
-static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set)
+static int b43_op_beacon_set_tim(struct ieee80211_hw *hw,
+				 struct ieee80211_sta *sta, bool set)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	unsigned long flags;
@@ -4249,7 +4250,7 @@ static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set)
 static void b43_op_sta_notify(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif,
 			      enum sta_notify_cmd notify_cmd,
-			      const u8 *addr)
+			      struct ieee80211_sta *sta)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 6e425410c99..9fb1421cbec 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3403,7 +3403,7 @@ out_unlock:
 }
 
 static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw,
-				       int aid, int set)
+				       struct ieee80211_sta *sta, bool set)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	unsigned long flags;
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index 700da67ac28..af4e0b994e4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -366,8 +366,8 @@ static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv,
 	if (state == HT_AGG_STATE_IDLE &&
 	    rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) {
 		IWL_DEBUG_HT("Starting Tx agg: STA: %s tid: %d\n",
-				print_mac(mac, sta->addr), tid);
-		ieee80211_start_tx_ba_session(priv->hw, sta->addr, tid);
+				print_mac(mac, sta->sta.addr), tid);
+		ieee80211_start_tx_ba_session(priv->hw, sta->sta.addr, tid);
 	}
 }
 
@@ -2244,17 +2244,17 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 
 	lq_sta->ibss_sta_added = 0;
 	if (priv->iw_mode == NL80211_IFTYPE_AP) {
-		u8 sta_id = iwl_find_station(priv, sta->addr);
+		u8 sta_id = iwl_find_station(priv, sta->sta.addr);
 		DECLARE_MAC_BUF(mac);
 
 		/* for IBSS the call are from tasklet */
 		IWL_DEBUG_RATE("LQ: ADD station %s\n",
-			     print_mac(mac, sta->addr));
+			     print_mac(mac, sta->sta.addr));
 
 		if (sta_id == IWL_INVALID_STATION) {
 			IWL_DEBUG_RATE("LQ: ADD station %s\n",
-				       print_mac(mac, sta->addr));
-			sta_id = iwl_add_station_flags(priv, sta->addr,
+				       print_mac(mac, sta->sta.addr));
+			sta_id = iwl_add_station_flags(priv, sta->sta.addr,
 							0, CMD_ASYNC, NULL);
 		}
 		if ((sta_id != IWL_INVALID_STATION)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index e8db33bf5e5..5eeffb41d8c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -3413,13 +3413,13 @@ static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, u16 queue,
 
 static int iwl4965_mac_ampdu_action(struct ieee80211_hw *hw,
 			     enum ieee80211_ampdu_mlme_action action,
-			     const u8 *addr, u16 tid, u16 *ssn)
+			     struct ieee80211_sta *sta, u16 tid, u16 *ssn)
 {
 	struct iwl_priv *priv = hw->priv;
 	DECLARE_MAC_BUF(mac);
 
 	IWL_DEBUG_HT("A-MPDU action on addr %s tid %d\n",
-		     print_mac(mac, addr), tid);
+		     print_mac(mac, sta->addr), tid);
 
 	if (!(priv->cfg->sku & IWL_SKU_N))
 		return -EACCES;
@@ -3427,16 +3427,16 @@ static int iwl4965_mac_ampdu_action(struct ieee80211_hw *hw,
 	switch (action) {
 	case IEEE80211_AMPDU_RX_START:
 		IWL_DEBUG_HT("start Rx\n");
-		return iwl_rx_agg_start(priv, addr, tid, *ssn);
+		return iwl_rx_agg_start(priv, sta->addr, tid, *ssn);
 	case IEEE80211_AMPDU_RX_STOP:
 		IWL_DEBUG_HT("stop Rx\n");
-		return iwl_rx_agg_stop(priv, addr, tid);
+		return iwl_rx_agg_stop(priv, sta->addr, tid);
 	case IEEE80211_AMPDU_TX_START:
 		IWL_DEBUG_HT("start Tx\n");
-		return iwl_tx_agg_start(priv, addr, tid, ssn);
+		return iwl_tx_agg_start(priv, sta->addr, tid, ssn);
 	case IEEE80211_AMPDU_TX_STOP:
 		IWL_DEBUG_HT("stop Tx\n");
-		return iwl_tx_agg_stop(priv, addr, tid);
+		return iwl_tx_agg_stop(priv, sta->addr, tid);
 	default:
 		IWL_DEBUG_HT("unknown\n");
 		return -EINVAL;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index bdedf10fc86..173dd5d2c62 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -390,7 +390,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 
 static void mac80211_hwsim_sta_notify(struct ieee80211_hw *hw,
 				      struct ieee80211_vif *vif,
-				      enum sta_notify_cmd cmd, const u8 *addr)
+				      enum sta_notify_cmd cmd,
+				      struct ieee80211_sta *sta)
 {
 	hwsim_check_magic(vif);
 }
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7f5ea55e00f..5a6a029da4d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -300,6 +300,9 @@ enum mac80211_tx_control_flags {
  *  (2) driver internal use (if applicable)
  *  (3) TX status information - driver tells mac80211 what happened
  *
+ * The TX control's sta pointer is only valid during the ->tx call,
+ * it may be NULL.
+ *
  * @flags: transmit info flags, defined above
  * @band: TBD
  * @tx_rate_idx: TBD
@@ -329,8 +332,8 @@ struct ieee80211_tx_info {
 		struct {
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
+			struct ieee80211_sta *sta;
 			unsigned long jiffies;
-			u16 aid;
 			s8 rts_cts_rate_idx, alt_retry_rate_idx;
 			u8 retry_limit;
 			u8 icv_len;
@@ -651,6 +654,29 @@ enum set_key_cmd {
 	SET_KEY, DISABLE_KEY,
 };
 
+/**
+ * struct ieee80211_sta - station table entry
+ *
+ * A station table entry represents a station we are possibly
+ * communicating with. Since stations are RCU-managed in
+ * mac80211, any ieee80211_sta pointer you get access to must
+ * either be protected by rcu_read_lock() explicitly or implicitly,
+ * or you must take good care to not use such a pointer after a
+ * call to your sta_notify callback that removed it.
+ *
+ * @addr: MAC address
+ * @aid: AID we assigned to the station if we're an AP
+ * @drv_priv: data area for driver use, will always be aligned to
+ *	sizeof(void *), size is determined in hw information.
+ */
+struct ieee80211_sta {
+	u8 addr[ETH_ALEN];
+	u16 aid;
+
+	/* must be last */
+	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
+};
+
 /**
  * enum sta_notify_cmd - sta notify command
  *
@@ -795,6 +821,8 @@ enum ieee80211_hw_flags {
  *
  * @vif_data_size: size (in bytes) of the drv_priv data area
  *	within &struct ieee80211_vif.
+ * @sta_data_size: size (in bytes) of the drv_priv data area
+ *	within &struct ieee80211_sta.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -806,6 +834,7 @@ struct ieee80211_hw {
 	unsigned int extra_tx_headroom;
 	int channel_change_time;
 	int vif_data_size;
+	int sta_data_size;
 	u16 queues;
 	u16 ampdu_queues;
 	u16 max_listen_interval;
@@ -1089,7 +1118,7 @@ enum ieee80211_ampdu_mlme_action {
  *	This callback must be implemented and atomic.
  *
  * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit
- * 	must be set or cleared for a given AID. Must be atomic.
+ * 	must be set or cleared for a given STA. Must be atomic.
  *
  * @set_key: See the section "Hardware crypto acceleration"
  *	This callback can sleep, and is only called between add_interface
@@ -1175,7 +1204,8 @@ struct ieee80211_ops {
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
 				 int mc_count, struct dev_addr_list *mc_list);
-	int (*set_tim)(struct ieee80211_hw *hw, int aid, int set);
+	int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+		       bool set);
 	int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		       const u8 *local_address, const u8 *address,
 		       struct ieee80211_key_conf *key);
@@ -1192,7 +1222,7 @@ struct ieee80211_ops {
 	int (*set_retry_limit)(struct ieee80211_hw *hw,
 			       u32 short_retry, u32 long_retr);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-			enum sta_notify_cmd, const u8 *addr);
+			enum sta_notify_cmd, struct ieee80211_sta *sta);
 	int (*conf_tx)(struct ieee80211_hw *hw, u16 queue,
 		       const struct ieee80211_tx_queue_params *params);
 	int (*get_tx_stats)(struct ieee80211_hw *hw,
@@ -1202,7 +1232,7 @@ struct ieee80211_ops {
 	int (*tx_last_beacon)(struct ieee80211_hw *hw);
 	int (*ampdu_action)(struct ieee80211_hw *hw,
 			    enum ieee80211_ampdu_mlme_action action,
-			    const u8 *addr, u16 tid, u16 *ssn);
+			    struct ieee80211_sta *sta, u16 tid, u16 *ssn);
 };
 
 /**
@@ -1752,4 +1782,17 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra,
  */
 void ieee80211_notify_mac(struct ieee80211_hw *hw,
 			  enum ieee80211_notification_types  notif_type);
+
+/**
+ * ieee80211_find_sta - find a station
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @addr: station's address
+ *
+ * This function must be called under RCU lock and the
+ * resulting pointer is only valid under RCU lock as well.
+ */
+struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
+					 const u8 *addr);
+
 #endif /* MAC80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d004351050c..ed5e77ce627 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -364,7 +364,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 	sta = sta_info_get_by_idx(local, idx, dev);
 	if (sta) {
 		ret = 0;
-		memcpy(mac, sta->addr, ETH_ALEN);
+		memcpy(mac, sta->sta.addr, ETH_ALEN);
 		sta_set_sinfo(sta, sinfo);
 	}
 
@@ -593,7 +593,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
 	 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
 
 	memset(msg->da, 0xff, ETH_ALEN);
-	memcpy(msg->sa, sta->addr, ETH_ALEN);
+	memcpy(msg->sa, sta->sta.addr, ETH_ALEN);
 	msg->len = htons(6);
 	msg->dsap = 0;
 	msg->ssap = 0x01;	/* NULL LSAP, CR Bit: Response */
@@ -648,9 +648,9 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 	 */
 
 	if (params->aid) {
-		sta->aid = params->aid;
-		if (sta->aid > IEEE80211_MAX_AID)
-			sta->aid = 0; /* XXX: should this be an error? */
+		sta->sta.aid = params->aid;
+		if (sta->sta.aid > IEEE80211_MAX_AID)
+			sta->sta.aid = 0; /* XXX: should this be an error? */
 	}
 
 	if (params->listen_interval >= 0)
@@ -919,7 +919,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 			    struct mpath_info *pinfo)
 {
 	if (mpath->next_hop)
-		memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN);
+		memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN);
 	else
 		memset(next_hop, 0, ETH_ALEN);
 
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index cf82acec913..a3294d10932 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -206,7 +206,8 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
 	rcu_read_lock();
 	sta = rcu_dereference(key->sta);
 	if (sta)
-		sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr));
+		sprintf(buf, "../../stations/%s",
+			print_mac(mac, sta->sta.addr));
 	rcu_read_unlock();
 
 	/* using sta as a boolean is fine outside RCU lock */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6abe5427752..81f350eaf8a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -50,7 +50,7 @@ static const struct file_operations sta_ ##name## _ops = {		\
 		STA_READ_##format(name, field)				\
 		STA_OPS(name)
 
-STA_FILE(aid, aid, D);
+STA_FILE(aid, sta.aid, D);
 STA_FILE(dev, sdata->dev->name, S);
 STA_FILE(rx_packets, rx_packets, LU);
 STA_FILE(tx_packets, tx_packets, LU);
@@ -176,7 +176,7 @@ static ssize_t sta_agg_status_write(struct file *file,
 	struct net_device *dev = sta->sdata->dev;
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hw *hw = &local->hw;
-	u8 *da = sta->addr;
+	u8 *da = sta->sta.addr;
 	static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0,
 					0, 0, 0, 0, 0, 0, 0, 0};
 	static int tid_static_rx[16] = {1, 1, 1, 1, 1, 1, 1, 1,
@@ -253,7 +253,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	if (!stations_dir)
 		return;
 
-	mac = print_mac(mbuf, sta->addr);
+	mac = print_mac(mbuf, sta->sta.addr);
 
 	sta->debugfs.dir = debugfs_create_dir(mac, stations_dir);
 	if (!sta->debugfs.dir)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index bc3c71ad7ae..dc7d9a3d70d 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -274,7 +274,7 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
-					ra, tid, NULL);
+				       &sta->sta, tid, NULL);
 	if (ret)
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
@@ -328,7 +328,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 
 	rcu_read_lock();
 
-	sta = sta_info_get(local, temp_sta->addr);
+	sta = sta_info_get(local, temp_sta->sta.addr);
 	if (!sta) {
 		rcu_read_unlock();
 		return;
@@ -354,7 +354,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	/* go through the state check in stop_BA_session */
 	*state = HT_AGG_STATE_OPERATIONAL;
 	spin_unlock_bh(&sta->lock);
-	ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid,
+	ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid,
 				     WLAN_BACK_INITIATOR);
 
 timer_expired_exit:
@@ -465,7 +465,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
-						ra, tid, &start_seq_num);
+					       &sta->sta, tid, &start_seq_num);
 
 	if (ret) {
 		/* No need to requeue the packets in the agg queue, since we
@@ -557,7 +557,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP,
-						ra, tid, NULL);
+					       &sta->sta, tid, NULL);
 
 	/* case HW denied going back to legacy */
 	if (ret) {
@@ -767,7 +767,7 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
+	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
 					 (u16)*ptid, WLAN_BACK_TIMER,
 					 WLAN_REASON_QSTA_TIMEOUT);
 }
@@ -874,7 +874,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
-					       sta->addr, tid, &start_seq_num);
+					       &sta->sta, tid, &start_seq_num);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
@@ -899,7 +899,7 @@ end:
 	spin_unlock_bh(&sta->lock);
 
 end_no_lock:
-	ieee80211_send_addba_resp(sta->sdata, sta->addr, tid,
+	ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
 				  dialog_token, status, 1, buf_size, timeout);
 }
 
@@ -952,7 +952,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		/* this will allow the state check in stop_BA_session */
 		*state = HT_AGG_STATE_OPERATIONAL;
 		spin_unlock_bh(&sta->lock);
-		ieee80211_stop_tx_ba_session(hw, sta->addr, tid,
+		ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid,
 					     WLAN_BACK_INITIATOR);
 	}
 }
@@ -979,14 +979,14 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (initiator == WLAN_BACK_INITIATOR)
-		ieee80211_sta_stop_rx_ba_session(sdata, sta->addr, tid,
+		ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid,
 						 WLAN_BACK_INITIATOR, 0);
 	else { /* WLAN_BACK_RECIPIENT */
 		spin_lock_bh(&sta->lock);
 		sta->ampdu_mlme.tid_state_tx[tid] =
 				HT_AGG_STATE_OPERATIONAL;
 		spin_unlock_bh(&sta->lock);
-		ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid,
+		ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid,
 					     WLAN_BACK_RECIPIENT);
 	}
 }
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f528962b13e..a7ef0289fbd 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -336,7 +336,8 @@ static int ieee80211_stop(struct net_device *dev)
 
 	list_for_each_entry_rcu(sta, &local->sta_list, list) {
 		if (sta->sdata == sdata)
-			ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
+			ieee80211_sta_tear_down_BA_sessions(sdata,
+							    sta->sta.addr);
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index d5b95748db2..57afcd38cd9 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -123,7 +123,7 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key)
 		addr = zero_addr;
 
 	if (key->sta)
-		addr = key->sta->addr;
+		addr = key->sta->sta.addr;
 
 	return addr;
 }
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 1fad792ad25..15a5c99270a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -517,7 +517,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 		spin_unlock_bh(&mpath->state_lock);
 		goto fail;
 	}
-	memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN);
+	memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN);
 	spin_unlock_bh(&mpath->state_lock);
 	--ttl;
 	flags = PREP_IE_FLAGS(prep_elem);
@@ -529,7 +529,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr,
 		cpu_to_le32(orig_dsn), 0, dst_addr,
-		cpu_to_le32(dst_dsn), mpath->next_hop->addr, hopcount, ttl,
+		cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl,
 		cpu_to_le32(lifetime), cpu_to_le32(metric),
 		0, sdata);
 	rcu_read_unlock();
@@ -557,7 +557,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
 	if (mpath) {
 		spin_lock_bh(&mpath->state_lock);
 		if (mpath->flags & MESH_PATH_ACTIVE &&
-		    memcmp(ta, mpath->next_hop->addr, ETH_ALEN) == 0 &&
+		    memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 &&
 		    (!(mpath->flags & MESH_PATH_DSN_VALID) ||
 		    DSN_GT(dst_dsn, mpath->dsn))) {
 			mpath->flags &= ~MESH_PATH_ACTIVE;
@@ -799,7 +799,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 			mesh_queue_preq(mpath,
 					PREQ_Q_F_START | PREQ_Q_F_REFRESH);
 		}
-		memcpy(hdr->addr1, mpath->next_hop->addr,
+		memcpy(hdr->addr1, mpath->next_hop->sta.addr,
 				ETH_ALEN);
 	} else {
 		if (!(mpath->flags & MESH_PATH_RESOLVING)) {
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 990a4b7f6bc..debf7834dbc 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -275,7 +275,7 @@ static void mesh_plink_timer(unsigned long data)
 		return;
 	}
 	mpl_dbg("Mesh plink timer for %s fired on state %d\n",
-			print_mac(mac, sta->addr), sta->plink_state);
+			print_mac(mac, sta->sta.addr), sta->plink_state);
 	reason = 0;
 	llid = sta->llid;
 	plid = sta->plid;
@@ -288,7 +288,7 @@ static void mesh_plink_timer(unsigned long data)
 		if (sta->plink_retries < dot11MeshMaxRetries(sdata)) {
 			u32 rand;
 			mpl_dbg("Mesh plink for %s (retry, timeout): %d %d\n",
-					print_mac(mac, sta->addr),
+					print_mac(mac, sta->sta.addr),
 					sta->plink_retries, sta->plink_timeout);
 			get_random_bytes(&rand, sizeof(u32));
 			sta->plink_timeout = sta->plink_timeout +
@@ -296,7 +296,7 @@ static void mesh_plink_timer(unsigned long data)
 			++sta->plink_retries;
 			mod_plink_timer(sta, sta->plink_timeout);
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid,
 					    0, 0);
 			break;
 		}
@@ -309,7 +309,7 @@ static void mesh_plink_timer(unsigned long data)
 		sta->plink_state = PLINK_HOLDING;
 		mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 		spin_unlock_bh(&sta->lock);
-		mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, plid,
+		mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid,
 				    reason);
 		break;
 	case PLINK_HOLDING:
@@ -352,10 +352,10 @@ int mesh_plink_open(struct sta_info *sta)
 	mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
 	spin_unlock_bh(&sta->lock);
 	mpl_dbg("Mesh plink: starting establishment with %s\n",
-		print_mac(mac, sta->addr));
+		print_mac(mac, sta->sta.addr));
 
 	return mesh_plink_frame_tx(sdata, PLINK_OPEN,
-				   sta->addr, llid, 0, 0);
+				   sta->sta.addr, llid, 0, 0);
 }
 
 void mesh_plink_block(struct sta_info *sta)
@@ -379,7 +379,7 @@ int mesh_plink_close(struct sta_info *sta)
 #endif
 
 	mpl_dbg("Mesh plink: closing link with %s\n",
-			print_mac(mac, sta->addr));
+			print_mac(mac, sta->sta.addr));
 	spin_lock_bh(&sta->lock);
 	sta->reason = cpu_to_le16(MESH_LINK_CANCELLED);
 	reason = sta->reason;
@@ -400,7 +400,7 @@ int mesh_plink_close(struct sta_info *sta)
 	llid = sta->llid;
 	plid = sta->plid;
 	spin_unlock_bh(&sta->lock);
-	mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->addr, llid,
+	mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid,
 			    plid, reason);
 	return 0;
 }
@@ -577,9 +577,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			sta->llid = llid;
 			mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid,
 					    0, 0);
-			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr,
 					    llid, plid, 0);
 			break;
 		default:
@@ -604,7 +604,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
@@ -613,7 +613,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			sta->plid = plid;
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
 					    plid, 0);
 			break;
 		case CNF_ACPT:
@@ -646,13 +646,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
 					    plid, 0);
 			break;
 		case CNF_ACPT:
@@ -661,7 +661,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			mesh_plink_inc_estab_count(sdata);
 			spin_unlock_bh(&sta->lock);
 			mpl_dbg("Mesh plink with %s ESTABLISHED\n",
-					print_mac(mac, sta->addr));
+					print_mac(mac, sta->sta.addr));
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
@@ -685,7 +685,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
@@ -694,8 +694,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			mesh_plink_inc_estab_count(sdata);
 			spin_unlock_bh(&sta->lock);
 			mpl_dbg("Mesh plink with %s ESTABLISHED\n",
-					print_mac(mac, sta->addr));
-			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
+					print_mac(mac, sta->sta.addr));
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
 					    plid, 0);
 			break;
 		default:
@@ -714,13 +714,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
 					    plid, reason);
 			break;
 		case OPN_ACPT:
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid,
+			mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
 					    plid, 0);
 			break;
 		default:
@@ -743,8 +743,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			llid = sta->llid;
 			reason = sta->reason;
 			spin_unlock_bh(&sta->lock);
-			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid,
-					    plid, reason);
+			mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr,
+					    llid, plid, reason);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8a2cfd3609b..35c421b89dd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -804,7 +804,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	netif_tx_stop_all_queues(sdata->dev);
 	netif_carrier_off(sdata->dev);
 
-	ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr);
+	ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr);
 
 	if (self_disconnected) {
 		if (deauth)
@@ -1507,7 +1507,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				printk(KERN_DEBUG "%s: updated supp_rates set "
 				    "for %s based on beacon info (0x%llx | "
 				    "0x%llx -> 0x%llx)\n",
-				    sdata->dev->name, print_mac(mac, sta->addr),
+				    sdata->dev->name,
+				    print_mac(mac, sta->sta.addr),
 				    (unsigned long long) prev_rates,
 				    (unsigned long long) supp_rates,
 				    (unsigned long long) sta->supp_rates[band]);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8c3dda5f00b..92d898b901e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -661,7 +661,7 @@ static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta)
 	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n",
-	       dev->name, print_mac(mac, sta->addr), sta->aid);
+	       dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 }
 
@@ -685,7 +685,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n",
-	       dev->name, print_mac(mac, sta->addr), sta->aid);
+	       dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 
 	/* Send all buffered frames to the station */
@@ -702,7 +702,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "%s: STA %s aid %d send PS frame "
 		       "since STA not sleeping anymore\n", dev->name,
-		       print_mac(mac, sta->addr), sta->aid);
+		       print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 		info->flags |= IEEE80211_TX_CTL_REQUEUE;
 		dev_queue_xmit(skb);
@@ -1007,7 +1007,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n",
-		       print_mac(mac, rx->sta->addr), rx->sta->aid,
+		       print_mac(mac, rx->sta->sta.addr), rx->sta->sta.aid,
 		       skb_queue_len(&rx->sta->ps_tx_buf));
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 
@@ -1032,7 +1032,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		 */
 		printk(KERN_DEBUG "%s: STA %s sent PS Poll even "
 		       "though there are no buffered frames for it\n",
-		       rx->dev->name, print_mac(mac, rx->sta->addr));
+		       rx->dev->name, print_mac(mac, rx->sta->sta.addr));
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 	}
 
@@ -2140,7 +2140,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr,
+		ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
 			tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
 		ret = 1;
 		goto end_reorder;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 31246d8e532..d9774ac2e0f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -73,11 +73,11 @@ static int sta_info_hash_del(struct ieee80211_local *local,
 {
 	struct sta_info *s;
 
-	s = local->sta_hash[STA_HASH(sta->addr)];
+	s = local->sta_hash[STA_HASH(sta->sta.addr)];
 	if (!s)
 		return -ENOENT;
 	if (s == sta) {
-		rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)],
+		rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)],
 				   s->hnext);
 		return 0;
 	}
@@ -94,13 +94,13 @@ static int sta_info_hash_del(struct ieee80211_local *local,
 
 /* protected by RCU */
 static struct sta_info *__sta_info_find(struct ieee80211_local *local,
-					u8 *addr)
+					const u8 *addr)
 {
 	struct sta_info *sta;
 
 	sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]);
 	while (sta) {
-		if (compare_ether_addr(sta->addr, addr) == 0)
+		if (compare_ether_addr(sta->sta.addr, addr) == 0)
 			break;
 		sta = rcu_dereference(sta->hnext);
 	}
@@ -151,7 +151,7 @@ static void __sta_info_free(struct ieee80211_local *local,
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Destroyed STA %s\n",
-	       wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr));
+	       wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr));
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 	kfree(sta);
@@ -219,8 +219,8 @@ void sta_info_destroy(struct sta_info *sta)
 static void sta_info_hash_add(struct ieee80211_local *local,
 			      struct sta_info *sta)
 {
-	sta->hnext = local->sta_hash[STA_HASH(sta->addr)];
-	rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], sta);
+	sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)];
+	rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
 }
 
 struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
@@ -231,14 +231,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	int i;
 	DECLARE_MAC_BUF(mbuf);
 
-	sta = kzalloc(sizeof(*sta), gfp);
+	sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
 	if (!sta)
 		return NULL;
 
 	spin_lock_init(&sta->lock);
 	spin_lock_init(&sta->flaglock);
 
-	memcpy(sta->addr, addr, ETH_ALEN);
+	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->sdata = sdata;
 
@@ -271,7 +271,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Allocated STA %s\n",
-	       wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr));
+	       wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr));
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 #ifdef CONFIG_MAC80211_MESH
@@ -300,15 +300,15 @@ int sta_info_insert(struct sta_info *sta)
 		goto out_free;
 	}
 
-	if (WARN_ON(compare_ether_addr(sta->addr, sdata->dev->dev_addr) == 0 ||
-	            is_multicast_ether_addr(sta->addr))) {
+	if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 ||
+	            is_multicast_ether_addr(sta->sta.addr))) {
 		err = -EINVAL;
 		goto out_free;
 	}
 
 	spin_lock_irqsave(&local->sta_lock, flags);
 	/* check if STA exists already */
-	if (__sta_info_find(local, sta->addr)) {
+	if (__sta_info_find(local, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
 		err = -EEXIST;
 		goto out_free;
@@ -325,12 +325,12 @@ int sta_info_insert(struct sta_info *sta)
 					     u.ap);
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
-				       STA_NOTIFY_ADD, sta->addr);
+				       STA_NOTIFY_ADD, &sta->sta);
 	}
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Inserted STA %s\n",
-	       wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr));
+	       wiphy_name(local->hw.wiphy), print_mac(mac, sta->sta.addr));
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 	spin_unlock_irqrestore(&local->sta_lock, flags);
@@ -379,11 +379,12 @@ static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss,
 {
 	BUG_ON(!bss);
 
-	__bss_tim_set(bss, sta->aid);
+	__bss_tim_set(bss, sta->sta.aid);
 
 	if (sta->local->ops->set_tim) {
 		sta->local->tim_in_locked_section = true;
-		sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1);
+		sta->local->ops->set_tim(local_to_hw(sta->local),
+					 &sta->sta, true);
 		sta->local->tim_in_locked_section = false;
 	}
 }
@@ -404,11 +405,12 @@ static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss,
 {
 	BUG_ON(!bss);
 
-	__bss_tim_clear(bss, sta->aid);
+	__bss_tim_clear(bss, sta->sta.aid);
 
 	if (sta->local->ops->set_tim) {
 		sta->local->tim_in_locked_section = true;
-		sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0);
+		sta->local->ops->set_tim(local_to_hw(sta->local),
+					 &sta->sta, false);
 		sta->local->tim_in_locked_section = false;
 	}
 }
@@ -462,7 +464,7 @@ static void __sta_info_unlink(struct sta_info **sta)
 					     u.ap);
 
 		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
-				       STA_NOTIFY_REMOVE, (*sta)->addr);
+				       STA_NOTIFY_REMOVE, &(*sta)->sta);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -474,7 +476,7 @@ static void __sta_info_unlink(struct sta_info **sta)
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Removed STA %s\n",
-	       wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->addr));
+	       wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->sta.addr));
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 	/*
@@ -570,7 +572,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 		local->total_ps_buffered--;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "Buffered frame expired (STA "
-		       "%s)\n", print_mac(mac, sta->addr));
+		       "%s)\n", print_mac(mac, sta->sta.addr));
 #endif
 		dev_kfree_skb(skb);
 
@@ -817,7 +819,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 			printk(KERN_DEBUG "%s: expiring inactive STA %s\n",
-			       sdata->dev->name, print_mac(mac, sta->addr));
+			       sdata->dev->name, print_mac(mac, sta->sta.addr));
 #endif
 			__sta_info_unlink(&sta);
 			if (sta)
@@ -828,3 +830,14 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 	list_for_each_entry_safe(sta, tmp, &tmp_list, list)
 		sta_info_destroy(sta);
 }
+
+struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
+                                         const u8 *addr)
+{
+	struct sta_info *sta = __sta_info_find(hw_to_local(hw), addr);
+
+	if (!sta)
+		return NULL;
+	return &sta->sta;
+}
+EXPORT_SYMBOL(ieee80211_find_sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 22007990099..e7ce12dbf27 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -218,6 +218,7 @@ struct sta_ampdu_mlme {
  * @plink_timeout: TBD
  * @plink_timer: TBD
  * @debugfs: debug filesystem info
+ * @sta: station information we share with the driver
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -232,8 +233,7 @@ struct sta_info {
 	spinlock_t flaglock;
 	struct ieee80211_ht_info ht_info;
 	u64 supp_rates[IEEE80211_NUM_BANDS];
-	u8 addr[ETH_ALEN];
-	u16 aid;
+
 	u16 listen_interval;
 
 	/*
@@ -327,6 +327,9 @@ struct sta_info {
 		struct dentry *agg_status;
 	} debugfs;
 #endif
+
+	/* keep last! */
+	struct ieee80211_sta sta;
 };
 
 static inline enum plink_state sta_plink_state(struct sta_info *sta)
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 995f7af3d25..34b32bc8f60 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -304,7 +304,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 			key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
 			u8 bcast[ETH_ALEN] =
 				{0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-			u8 *sta_addr = key->sta->addr;
+			u8 *sta_addr = key->sta->sta.addr;
 
 			if (is_multicast_ether_addr(ra))
 				sta_addr = bcast;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f4bcc589d67..07bf228d0b1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -381,7 +381,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries "
 		       "before %d)\n",
-		       print_mac(mac, sta->addr), sta->aid,
+		       print_mac(mac, sta->sta.addr), sta->sta.aid,
 		       skb_queue_len(&sta->ps_tx_buf));
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 		if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
@@ -392,7 +392,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 			if (net_ratelimit()) {
 				printk(KERN_DEBUG "%s: STA %s TX "
 				       "buffer full - dropping oldest frame\n",
-				       tx->dev->name, print_mac(mac, sta->addr));
+				       tx->dev->name, print_mac(mac, sta->sta.addr));
 			}
 #endif
 			dev_kfree_skb(old);
@@ -411,7 +411,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 	else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) {
 		printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll "
 		       "set -> send frame\n", tx->dev->name,
-		       print_mac(mac, sta->addr));
+		       print_mac(mac, sta->sta.addr));
 	}
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 	clear_sta_flags(sta, WLAN_STA_PSPOLL);
@@ -528,7 +528,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	if (tx->sta)
-		info->control.aid = tx->sta->aid;
+		info->control.sta = &tx->sta->sta;
 
 	if (!info->control.retry_limit) {
 		if (!is_multicast_ether_addr(hdr->addr1)) {
@@ -608,7 +608,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	}
 
 	if (tx->sta)
-		info->control.aid = tx->sta->aid;
+		info->control.sta = &tx->sta->sta;
 
 	return TX_CONTINUE;
 }
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 7229e958879..6748dedcab5 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -210,7 +210,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 				DECLARE_MAC_BUF(mac);
 				printk(KERN_DEBUG "allocated aggregation queue"
 					" %d tid %d addr %s pool=0x%lX\n",
-					i, tid, print_mac(mac, sta->addr),
+					i, tid, print_mac(mac, sta->sta.addr),
 					local->queue_pool[0]);
 			}
 #endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 78021780b88..37ae9a959f6 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -256,7 +256,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 
 	res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
 					  key, skb->data + hdrlen,
-					  skb->len - hdrlen, rx->sta->addr,
+					  skb->len - hdrlen, rx->sta->sta.addr,
 					  hdr->addr1, hwaccel, rx->queue,
 					  &rx->tkip_iv32,
 					  &rx->tkip_iv16);
-- 
cgit v1.2.3-70-g09d2


From 95dac040041723d0c0ab245642c1b9802f12cc8d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 02:03:28 +0200
Subject: mac80211: small rate control changes

This patch fixes mac80211 to not rely on the rate control
algorithm to update sta->tx_retry_failed and sta->tx_retry_count
(even if we don't currently use them), removes a number of
completely unused values we don't even show in debugfs and
changes the code in ieee80211_tx_status() to not look up the
sta_info repeatedly.

The only behaviour change here would be not calling the rate
control function rate_control_tx_status() when no sta_info is
found, but all rate control algorithms ignore such calls anyway.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c             | 50 ++++++++++++++++++++---------------------
 net/mac80211/rc80211_pid_algo.c | 11 ---------
 net/mac80211/sta_info.h         |  7 ------
 3 files changed, 25 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index dd838b725af..c307dba7ec0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -546,29 +546,27 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	rcu_read_lock();
 
-	if (info->status.excessive_retries) {
-		sta = sta_info_get(local, hdr->addr1);
-		if (sta) {
-			if (test_sta_flags(sta, WLAN_STA_PS)) {
-				/*
-				 * The STA is in power save mode, so assume
-				 * that this TX packet failed because of that.
-				 */
-				ieee80211_handle_filtered_frame(local, sta, skb);
-				rcu_read_unlock();
-				return;
-			}
+	sta = sta_info_get(local, hdr->addr1);
+
+	if (sta) {
+		if (info->status.excessive_retries &&
+		    test_sta_flags(sta, WLAN_STA_PS)) {
+			/*
+			 * The STA is in power save mode, so assume
+			 * that this TX packet failed because of that.
+			 */
+			ieee80211_handle_filtered_frame(local, sta, skb);
+			rcu_read_unlock();
+			return;
 		}
-	}
 
-	fc = hdr->frame_control;
+		fc = hdr->frame_control;
+
+		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
+		    (ieee80211_is_data_qos(fc))) {
+			u16 tid, ssn;
+			u8 *qc;
 
-	if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
-	    (ieee80211_is_data_qos(fc))) {
-		u16 tid, ssn;
-		u8 *qc;
-		sta = sta_info_get(local, hdr->addr1);
-		if (sta) {
 			qc = ieee80211_get_qos_ctl(hdr);
 			tid = qc[0] & 0xf;
 			ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10)
@@ -576,17 +574,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			ieee80211_send_bar(sta->sdata, hdr->addr1,
 					   tid, ssn);
 		}
-	}
 
-	if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
-		sta = sta_info_get(local, hdr->addr1);
-		if (sta) {
+		if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
 			ieee80211_handle_filtered_frame(local, sta, skb);
 			rcu_read_unlock();
 			return;
+		} else {
+			if (info->status.excessive_retries)
+				sta->tx_retry_failed++;
+			sta->tx_retry_count += info->status.retry_count;
 		}
-	} else
+
 		rate_control_tx_status(local->mdev, skb);
+	}
 
 	rcu_read_unlock();
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 21e1942ea97..94867860c3e 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -282,17 +282,6 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 		spinfo->tx_num_xmit++;
 	}
 
-	if (info->status.excessive_retries) {
-		sta->tx_retry_failed++;
-		sta->tx_num_consecutive_failures++;
-		sta->tx_num_mpdu_fail++;
-	} else {
-		sta->tx_num_consecutive_failures = 0;
-		sta->tx_num_mpdu_ok++;
-	}
-	sta->tx_retry_count += info->status.retry_count;
-	sta->tx_num_mpdu_fail += info->status.retry_count;
-
 	/* Update PID controller state. */
 	period = (HZ * pinfo->sampling_period + 500) / 1000;
 	if (!period)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e7ce12dbf27..4a9b96eeb68 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -195,9 +195,6 @@ struct sta_ampdu_mlme {
  * @tx_filtered_count: TBD
  * @tx_retry_failed: TBD
  * @tx_retry_count: TBD
- * @tx_num_consecutive_failures: TBD
- * @tx_num_mpdu_ok: TBD
- * @tx_num_mpdu_fail: TBD
  * @fail_avg: moving percentage of failed MSDUs
  * @tx_packets: number of RX/TX MSDUs
  * @tx_bytes: TBD
@@ -273,10 +270,6 @@ struct sta_info {
 	/* Updated from TX status path only, no locking requirements */
 	unsigned long tx_filtered_count;
 	unsigned long tx_retry_failed, tx_retry_count;
-	/* TODO: update in generic code not rate control? */
-	u32 tx_num_consecutive_failures;
-	u32 tx_num_mpdu_ok;
-	u32 tx_num_mpdu_fail;
 	/* moving percentage of failed MSDUs */
 	unsigned int fail_avg;
 
-- 
cgit v1.2.3-70-g09d2


From b7e35008815a1c39123f4dd53b430788e2e18da4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 02:22:58 +0200
Subject: mac80211: move last_txrate_idx into RC algorithms

This variable in sta_info is only used in a meaningful way
by the Intel RC algorithms, so move it into those.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 20 ++++++++++++--------
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c  | 15 +++++++++------
 net/mac80211/rc80211_pid_algo.c            |  2 --
 net/mac80211/sta_info.h                    |  2 --
 4 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index da23c927380..46b672c3458 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -65,6 +65,9 @@ struct iwl3945_rs_sta {
 	u8 ibss_sta_added;
 	struct timer_list rate_scale_flush;
 	struct iwl3945_rate_scale_data win[IWL_RATE_COUNT];
+
+	/* used to be in sta_info */
+	int last_txrate_idx;
 };
 
 static s32 iwl3945_expected_tpt_g[IWL_RATE_COUNT] = {
@@ -319,6 +322,7 @@ static void iwl3945_collect_tx_data(struct iwl3945_rs_sta *rs_sta,
 static void rs_rate_init(void *priv_rate, void *priv_sta,
 			 struct ieee80211_local *local, struct sta_info *sta)
 {
+	struct iwl3945_rs_sta *rs_sta = (void *)sta->rate_ctrl_priv;
 	int i;
 
 	IWL_DEBUG_RATE("enter\n");
@@ -335,11 +339,11 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 		}
 	}
 
-	sta->last_txrate_idx = sta->txrate_idx;
+	rs_sta->last_txrate_idx = sta->txrate_idx;
 
 	/* For 5 GHz band it start at IWL_FIRST_OFDM_RATE */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
-		sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
+		rs_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
 
 	IWL_DEBUG_RATE("leave\n");
 }
@@ -674,14 +678,14 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 		return;
 	}
 
+	rs_sta = (void *)sta->rate_ctrl_priv;
+
 	rate_mask = sta->supp_rates[sband->band];
-	index = min(sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1);
+	index = min(rs_sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1);
 
 	if (sband->band == IEEE80211_BAND_5GHZ)
 		rate_mask = rate_mask << IWL_FIRST_OFDM_RATE;
 
-	rs_sta = (void *)sta->rate_ctrl_priv;
-
 	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !rs_sta->ibss_sta_added) {
 		u8 sta_id = iwl3945_hw_find_station(priv, hdr->addr1);
@@ -803,11 +807,11 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
  out:
 
-	sta->last_txrate_idx = index;
+	rs_sta->last_txrate_idx = index;
 	if (sband->band == IEEE80211_BAND_5GHZ)
-		sta->txrate_idx = sta->last_txrate_idx - IWL_FIRST_OFDM_RATE;
+		sta->txrate_idx = rs_sta->last_txrate_idx - IWL_FIRST_OFDM_RATE;
 	else
-		sta->txrate_idx = sta->last_txrate_idx;
+		sta->txrate_idx = rs_sta->last_txrate_idx;
 
 	rcu_read_unlock();
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index af4e0b994e4..54f076bb202 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -163,6 +163,9 @@ struct iwl_lq_sta {
 	u32 dbg_fixed_rate;
 #endif
 	struct iwl_priv *drv;
+
+	/* used to be in sta_info */
+	int last_txrate_idx;
 };
 
 static void rs_rate_scale_perform(struct iwl_priv *priv,
@@ -1746,7 +1749,7 @@ static void rs_rate_scale_perform(struct iwl_priv *priv,
 	is_green = lq_sta->is_green;
 
 	/* current tx rate */
-	index = sta->last_txrate_idx;
+	index = lq_sta->last_txrate_idx;
 
 	IWL_DEBUG_RATE("Rate scale index %d for type %d\n", index,
 		       tbl->lq_type);
@@ -2059,7 +2062,7 @@ lq_update:
 out:
 	tbl->current_rate = rate_n_flags_from_tbl(tbl, index, is_green);
 	i = index;
-	sta->last_txrate_idx = i;
+	lq_sta->last_txrate_idx = i;
 
 	/* sta->txrate_idx is an index to A mode rates which start
 	 * at IWL_FIRST_OFDM_RATE
@@ -2090,7 +2093,7 @@ static void rs_initialize_lq(struct iwl_priv *priv,
 		goto out;
 
 	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
-	i = sta->last_txrate_idx;
+	i = lq_sta->last_txrate_idx;
 
 	if ((lq_sta->lq.sta_id == 0xff) &&
 	    (priv->iw_mode == NL80211_IFTYPE_ADHOC))
@@ -2161,7 +2164,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	}
 
 	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
-	i = sta->last_txrate_idx;
+	i = lq_sta->last_txrate_idx;
 
 	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !lq_sta->ibss_sta_added) {
@@ -2270,10 +2273,10 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 		if (sta->supp_rates[sband->band] & BIT(i))
 			sta->txrate_idx = i;
 
-	sta->last_txrate_idx = sta->txrate_idx;
+	lq_sta->last_txrate_idx = sta->txrate_idx;
 	/* For MODE_IEEE80211A, skip over cck rates in global rate table */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
-		sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
+		lq_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
 
 	lq_sta->is_dup = 0;
 	lq_sta->is_green = rs_use_green(priv, conf);
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 94867860c3e..24e44f52130 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -329,8 +329,6 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 	if (rateidx >= sband->n_bitrates)
 		rateidx = sband->n_bitrates - 1;
 
-	sta->last_txrate_idx = rateidx;
-
 	rcu_read_unlock();
 
 	sel->rate_idx = rateidx;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4a9b96eeb68..df42d181545 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -200,7 +200,6 @@ struct sta_ampdu_mlme {
  * @tx_bytes: TBD
  * @tx_fragments: number of transmitted MPDUs
  * @txrate_idx: TBD
- * @last_txrate_idx: TBD
  * @tid_seq: TBD
  * @wme_tx_queue: TBD
  * @ampdu_mlme: TBD
@@ -278,7 +277,6 @@ struct sta_info {
 	unsigned long tx_bytes;
 	unsigned long tx_fragments;
 	int txrate_idx;
-	int last_txrate_idx;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
 	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
-- 
cgit v1.2.3-70-g09d2


From 323ce79a9cdbf838ea577677b1ddace8e0b4d4c6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 02:45:11 +0200
Subject: mac80211: share sta->supp_rates

As more preparation for a saner rate control algorithm API,
share the supported rates bitmap in the public API.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/rc.c            |  2 +-
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c |  4 ++--
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c  |  6 +++---
 include/net/mac80211.h                     |  2 ++
 net/mac80211/cfg.c                         |  2 +-
 net/mac80211/mesh_plink.c                  |  4 ++--
 net/mac80211/mlme.c                        | 12 ++++++------
 net/mac80211/rate.h                        |  2 +-
 net/mac80211/sta_info.h                    |  2 --
 9 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index 390019ed398..226d70c73fe 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -1825,7 +1825,7 @@ static void ath_setup_rates(struct ieee80211_local *local, struct sta_info *sta)
 
 	sband =  local->hw.wiphy->bands[local->hw.conf.channel->band];
 	for (i = 0; i < sband->n_bitrates; i++) {
-		if (sta->supp_rates[local->hw.conf.channel->band] & BIT(i)) {
+		if (sta->sta.supp_rates[local->hw.conf.channel->band] & BIT(i)) {
 			rc_priv->neg_rates.rs_rates[j]
 				= (sband->bitrates[i].bitrate * 2) / 10;
 			j++;
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index 46b672c3458..f751b909759 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -333,7 +333,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	 * after assoc.. */
 
 	for (i = IWL_RATE_COUNT - 1; i >= 0; i--) {
-		if (sta->supp_rates[local->hw.conf.channel->band] & (1 << i)) {
+		if (sta->sta.supp_rates[local->hw.conf.channel->band] & (1 << i)) {
 			sta->txrate_idx = i;
 			break;
 		}
@@ -680,7 +680,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
 	rs_sta = (void *)sta->rate_ctrl_priv;
 
-	rate_mask = sta->supp_rates[sband->band];
+	rate_mask = sta->sta.supp_rates[sband->band];
 	index = min(rs_sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1);
 
 	if (sband->band == IEEE80211_BAND_5GHZ)
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index 54f076bb202..f7191a9a7fb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -1731,7 +1731,7 @@ static void rs_rate_scale_perform(struct iwl_priv *priv,
 		return;
 
 	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
-	lq_sta->supp_rates = sta->supp_rates[lq_sta->band];
+	lq_sta->supp_rates = sta->sta.supp_rates[lq_sta->band];
 
 	tid = rs_tl_add_packet(lq_sta, hdr);
 
@@ -2233,7 +2233,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
 	lq_sta->flush_timer = 0;
-	lq_sta->supp_rates = sta->supp_rates[sband->band];
+	lq_sta->supp_rates = sta->sta.supp_rates[sband->band];
 	sta->txrate_idx = 3;
 	for (j = 0; j < LQ_SIZE; j++)
 		for (i = 0; i < IWL_RATE_COUNT; i++)
@@ -2270,7 +2270,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 
 	/* Find highest tx rate supported by hardware and destination station */
 	for (i = 0; i < sband->n_bitrates; i++)
-		if (sta->supp_rates[sband->band] & BIT(i))
+		if (sta->sta.supp_rates[sband->band] & BIT(i))
 			sta->txrate_idx = i;
 
 	lq_sta->last_txrate_idx = sta->txrate_idx;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5a6a029da4d..ef8e4cc32c2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -666,10 +666,12 @@ enum set_key_cmd {
  *
  * @addr: MAC address
  * @aid: AID we assigned to the station if we're an AP
+ * @supp_rates: Bitmap of supported rates (per band)
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  */
 struct ieee80211_sta {
+	u64 supp_rates[IEEE80211_NUM_BANDS];
 	u8 addr[ETH_ALEN];
 	u16 aid;
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ed5e77ce627..47988d2eb15 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -667,7 +667,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 					rates |= BIT(j);
 			}
 		}
-		sta->supp_rates[local->oper_channel->band] = rates;
+		sta->sta.supp_rates[local->oper_channel->band] = rates;
 	}
 
 	if (params->ht_capa) {
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index debf7834dbc..faac101c0f8 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -106,7 +106,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 
 	sta->flags = WLAN_STA_AUTHORIZED;
-	sta->supp_rates[local->hw.conf.channel->band] = rates;
+	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
 
 	return sta;
 }
@@ -243,7 +243,7 @@ void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data
 	}
 
 	sta->last_rx = jiffies;
-	sta->supp_rates[local->hw.conf.channel->band] = rates;
+	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
 	if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN &&
 			sdata->u.mesh.accepting_plinks &&
 			sdata->u.mesh.mshcfg.auto_open_plinks)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 35c421b89dd..c049f336e58 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1301,7 +1301,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	sta->supp_rates[local->hw.conf.channel->band] = rates;
+	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
 	sdata->bss_conf.basic_rates = basic_rates;
 
 	/* cf. IEEE 802.11 9.2.12 */
@@ -1497,13 +1497,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		if (sta) {
 			u64 prev_rates;
 
-			prev_rates = sta->supp_rates[band];
+			prev_rates = sta->sta.supp_rates[band];
 			/* make sure mandatory rates are always added */
-			sta->supp_rates[band] = supp_rates |
+			sta->sta.supp_rates[band] = supp_rates |
 				ieee80211_mandatory_rates(local, band);
 
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
-			if (sta->supp_rates[band] != prev_rates)
+			if (sta->sta.supp_rates[band] != prev_rates)
 				printk(KERN_DEBUG "%s: updated supp_rates set "
 				    "for %s based on beacon info (0x%llx | "
 				    "0x%llx -> 0x%llx)\n",
@@ -1511,7 +1511,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    print_mac(mac, sta->sta.addr),
 				    (unsigned long long) prev_rates,
 				    (unsigned long long) supp_rates,
-				    (unsigned long long) sta->supp_rates[band]);
+				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
 		} else {
 			ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid,
@@ -2339,7 +2339,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 	set_sta_flags(sta, WLAN_STA_AUTHORIZED);
 
 	/* make sure mandatory rates are always added */
-	sta->supp_rates[band] = supp_rates |
+	sta->sta.supp_rates[band] = supp_rates |
 			ieee80211_mandatory_rates(local, band);
 
 	rate_control_rate_init(sta, local);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index ede7ab56f65..5f18c27eb90 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -134,7 +134,7 @@ static inline int rate_supported(struct sta_info *sta,
 				 enum ieee80211_band band,
 				 int index)
 {
-	return (sta == NULL || sta->supp_rates[band] & BIT(index));
+	return (sta == NULL || sta->sta.supp_rates[band] & BIT(index));
 }
 
 static inline s8
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index df42d181545..4dafa044b2f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -168,7 +168,6 @@ struct sta_ampdu_mlme {
  *	in the header file.
  * @flaglock: spinlock for flags accesses
  * @ht_info: HT capabilities of this STA
- * @supp_rates: Bitmap of supported rates (per band)
  * @addr: MAC address of this STA
  * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
  *	only used in AP (and IBSS?) mode
@@ -228,7 +227,6 @@ struct sta_info {
 	spinlock_t lock;
 	spinlock_t flaglock;
 	struct ieee80211_ht_info ht_info;
-	u64 supp_rates[IEEE80211_NUM_BANDS];
 
 	u16 listen_interval;
 
-- 
cgit v1.2.3-70-g09d2


From ae17e986091637e7ef5a8224c7b689029b105131 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 03:04:36 +0200
Subject: mac80211: move txrate_idx into RC algorithms

The sta_info->txrate_idx member isn't used by all RC algorithms
in the way it was intended to be used, move it into those that
require it (only PID) and keep track in the core code of which
rate was last used for reporting to userspace and the mesh MLME.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/rc.c            |  1 -
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 10 +++------
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c  | 13 ++----------
 net/mac80211/mesh_hwmp.c                   |  2 +-
 net/mac80211/rc80211_pid.h                 |  2 ++
 net/mac80211/rc80211_pid_algo.c            | 33 +++++++++++++++++-------------
 net/mac80211/sta_info.h                    |  2 +-
 net/mac80211/tx.c                          |  2 ++
 net/mac80211/wext.c                        |  4 ++--
 9 files changed, 32 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index 226d70c73fe..1cc9daf4455 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -2039,7 +2039,6 @@ static void ath_rate_init(void *priv, void *priv_sta,
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	sta->txrate_idx = rate_lowest_index(local, sband, sta);
 
 	ath_setup_rates(local, sta);
 	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index f751b909759..a279bf1dc9b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -334,13 +334,11 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 
 	for (i = IWL_RATE_COUNT - 1; i >= 0; i--) {
 		if (sta->sta.supp_rates[local->hw.conf.channel->band] & (1 << i)) {
-			sta->txrate_idx = i;
+			rs_sta->last_txrate_idx = i;
 			break;
 		}
 	}
 
-	rs_sta->last_txrate_idx = sta->txrate_idx;
-
 	/* For 5 GHz band it start at IWL_FIRST_OFDM_RATE */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
 		rs_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
@@ -809,15 +807,13 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 
 	rs_sta->last_txrate_idx = index;
 	if (sband->band == IEEE80211_BAND_5GHZ)
-		sta->txrate_idx = rs_sta->last_txrate_idx - IWL_FIRST_OFDM_RATE;
+		sel->rate_idx = rs_sta->last_txrate_idx - IWL_FIRST_OFDM_RATE;
 	else
-		sta->txrate_idx = rs_sta->last_txrate_idx;
+		sel->rate_idx = rs_sta->last_txrate_idx;
 
 	rcu_read_unlock();
 
 	IWL_DEBUG_RATE("leave: %d\n", index);
-
-	sel->rate_idx = sta->txrate_idx;
 }
 
 static struct rate_control_ops rs_ops = {
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index f7191a9a7fb..a8711c314e6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -2064,14 +2064,6 @@ out:
 	i = index;
 	lq_sta->last_txrate_idx = i;
 
-	/* sta->txrate_idx is an index to A mode rates which start
-	 * at IWL_FIRST_OFDM_RATE
-	 */
-	if (lq_sta->band == IEEE80211_BAND_5GHZ)
-		sta->txrate_idx = i - IWL_FIRST_OFDM_RATE;
-	else
-		sta->txrate_idx = i;
-
 	return;
 }
 
@@ -2234,7 +2226,6 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 
 	lq_sta->flush_timer = 0;
 	lq_sta->supp_rates = sta->sta.supp_rates[sband->band];
-	sta->txrate_idx = 3;
 	for (j = 0; j < LQ_SIZE; j++)
 		for (i = 0; i < IWL_RATE_COUNT; i++)
 			rs_rate_scale_clear_window(&lq_sta->lq_info[j].win[i]);
@@ -2269,11 +2260,11 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	}
 
 	/* Find highest tx rate supported by hardware and destination station */
+	lq_sta->last_txrate_idx = 3;
 	for (i = 0; i < sband->n_bitrates; i++)
 		if (sta->sta.supp_rates[sband->band] & BIT(i))
-			sta->txrate_idx = i;
+			lq_sta->last_txrate_idx = i;
 
-	lq_sta->last_txrate_idx = sta->txrate_idx;
 	/* For MODE_IEEE80211A, skip over cck rates in global rate table */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
 		lq_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 15a5c99270a..501c7831adb 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -223,7 +223,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
 	/* bitrate is in units of 100 Kbps, while we need rate in units of
 	 * 1Mbps. This will be corrected on tx_time computation.
 	 */
-	rate = sband->bitrates[sta->txrate_idx].bitrate;
+	rate = sband->bitrates[sta->last_txrate_idx].bitrate;
 	tx_time = (device_constant + 10 * test_frame_len / rate);
 	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
 	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 0a9135b974b..ffafc5da572 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -180,6 +180,8 @@ struct rc_pid_sta_info {
 	u32 tx_num_failed;
 	u32 tx_num_xmit;
 
+	int txrate_idx;
+
 	/* Average failed frames percentage error (i.e. actual vs. target
 	 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
 	 * using using an exponential weighted average technique:
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 24e44f52130..bc1c4569caa 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -75,7 +75,8 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_supported_band *sband;
 	int cur_sorted, new_sorted, probe, tmp, n_bitrates, band;
-	int cur = sta->txrate_idx;
+	struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv;
+	int cur = spinfo->txrate_idx;
 
 	sdata = sta->sdata;
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
@@ -111,7 +112,7 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	/* Fit the rate found to the nearest supported rate. */
 	do {
 		if (rate_supported(sta, band, rinfo[tmp].index)) {
-			sta->txrate_idx = rinfo[tmp].index;
+			spinfo->txrate_idx = rinfo[tmp].index;
 			break;
 		}
 		if (adj < 0)
@@ -121,9 +122,9 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
 	} while (tmp < n_bitrates && tmp >= 0);
 
 #ifdef CONFIG_MAC80211_DEBUGFS
-	rate_control_pid_event_rate_change(
-		&((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events,
-		sta->txrate_idx, sband->bitrates[sta->txrate_idx].bitrate);
+	rate_control_pid_event_rate_change(&spinfo->events,
+		spinfo->txrate_idx,
+		sband->bitrates[spinfo->txrate_idx].bitrate);
 #endif
 }
 
@@ -190,16 +191,16 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	spinfo->tx_num_failed = 0;
 
 	/* If we just switched rate, update the rate behaviour info. */
-	if (pinfo->oldrate != sta->txrate_idx) {
+	if (pinfo->oldrate != spinfo->txrate_idx) {
 
 		i = rinfo[pinfo->oldrate].rev_index;
-		j = rinfo[sta->txrate_idx].rev_index;
+		j = rinfo[spinfo->txrate_idx].rev_index;
 
 		tmp = (pf - spinfo->last_pf);
 		tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
 
 		rinfo[j].diff = rinfo[i].diff + tmp;
-		pinfo->oldrate = sta->txrate_idx;
+		pinfo->oldrate = spinfo->txrate_idx;
 	}
 	rate_control_pid_normalize(pinfo, sband->n_bitrates);
 
@@ -252,19 +253,20 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	if (!sta)
 		goto unlock;
 
+	spinfo = sta->rate_ctrl_priv;
+
 	/* Don't update the state if we're not controlling the rate. */
 	sdata = sta->sdata;
 	if (sdata->force_unicast_rateidx > -1) {
-		sta->txrate_idx = sdata->max_ratectrl_rateidx;
+		spinfo->txrate_idx = sdata->max_ratectrl_rateidx;
 		goto unlock;
 	}
 
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
-	if (info->tx_rate_idx != sta->txrate_idx)
+	if (info->tx_rate_idx != spinfo->txrate_idx)
 		goto unlock;
 
-	spinfo = sta->rate_ctrl_priv;
 	spinfo->tx_num_xmit++;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -301,6 +303,7 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_sub_if_data *sdata;
+	struct rc_pid_sta_info *spinfo;
 	struct sta_info *sta;
 	int rateidx;
 	u16 fc;
@@ -321,10 +324,11 @@ static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
 
 	/* If a forced rate is in effect, select it. */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	spinfo = (struct rc_pid_sta_info *)sta->rate_ctrl_priv;
 	if (sdata->force_unicast_rateidx > -1)
-		sta->txrate_idx = sdata->force_unicast_rateidx;
+		spinfo->txrate_idx = sdata->force_unicast_rateidx;
 
-	rateidx = sta->txrate_idx;
+	rateidx = spinfo->txrate_idx;
 
 	if (rateidx >= sband->n_bitrates)
 		rateidx = sband->n_bitrates - 1;
@@ -349,9 +353,10 @@ static void rate_control_pid_rate_init(void *priv, void *priv_sta,
 	 * Until that method is implemented, we will use the lowest supported
 	 * rate as a workaround. */
 	struct ieee80211_supported_band *sband;
+	struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	sta->txrate_idx = rate_lowest_index(local, sband, sta);
+	spinfo->txrate_idx = rate_lowest_index(local, sband, sta);
 	sta->fail_avg = 0;
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4dafa044b2f..5d8fabf7a68 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -274,7 +274,7 @@ struct sta_info {
 	unsigned long tx_packets;
 	unsigned long tx_bytes;
 	unsigned long tx_fragments;
-	int txrate_idx;
+	unsigned int last_txrate_idx;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 #ifdef CONFIG_MAC80211_DEBUG_COUNTERS
 	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 07bf228d0b1..7468495d6f9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -485,6 +485,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 
 	if (likely(tx->rate_idx < 0)) {
 		rate_control_get_rate(tx->dev, sband, tx->skb, &rsel);
+		if (tx->sta)
+			tx->sta->last_txrate_idx = rsel.rate_idx;
 		tx->rate_idx = rsel.rate_idx;
 		if (unlikely(rsel.probe_idx >= 0)) {
 			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index aef9707700f..7e0d53abde2 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -636,8 +636,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 
 	sta = sta_info_get(local, sdata->u.sta.bssid);
 
-	if (sta && sta->txrate_idx < sband->n_bitrates)
-		rate->value = sband->bitrates[sta->txrate_idx].bitrate;
+	if (sta && sta->last_txrate_idx < sband->n_bitrates)
+		rate->value = sband->bitrates[sta->last_txrate_idx].bitrate;
 	else
 		rate->value = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 687c7c0807371aeaa94ff2fff511eeb326b5c5de Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 03:14:11 +0200
Subject: mac80211: share sta_info->ht_info

Rate control algorithms may need access to a station's
HT capabilities, so share the ht_info struct in the
public station API.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 6 +++---
 include/net/mac80211.h                    | 2 ++
 net/mac80211/cfg.c                        | 2 +-
 net/mac80211/mlme.c                       | 4 ++--
 net/mac80211/sta_info.h                   | 2 --
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index a8711c314e6..f45a752e93c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -1154,10 +1154,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 	s8 is_green = lq_sta->is_green;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_info.ht_supported)
+	    !sta->sta.ht_info.ht_supported)
 		return -1;
 
-	if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
+	if (((sta->sta.ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
 						== WLAN_HT_CAP_SM_PS_STATIC)
 		return -1;
 
@@ -1222,7 +1222,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv,
 	s32 rate;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_info.ht_supported)
+	    !sta->sta.ht_info.ht_supported)
 		return -1;
 
 	IWL_DEBUG_RATE("LQ: try to switch to SISO\n");
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ef8e4cc32c2..d6669fd3ffa 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -667,6 +667,7 @@ enum set_key_cmd {
  * @addr: MAC address
  * @aid: AID we assigned to the station if we're an AP
  * @supp_rates: Bitmap of supported rates (per band)
+ * @ht_info: HT capabilities of this STA
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  */
@@ -674,6 +675,7 @@ struct ieee80211_sta {
 	u64 supp_rates[IEEE80211_NUM_BANDS];
 	u8 addr[ETH_ALEN];
 	u16 aid;
+	struct ieee80211_ht_info ht_info;
 
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 47988d2eb15..e2574885db4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -672,7 +672,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 
 	if (params->ht_capa) {
 		ieee80211_ht_cap_ie_to_ht_info(params->ht_capa,
-					       &sta->ht_info);
+					       &sta->sta.ht_info);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c049f336e58..8611a8318c9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1316,11 +1316,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		struct ieee80211_ht_bss_info bss_info;
 		ieee80211_ht_cap_ie_to_ht_info(
 				(struct ieee80211_ht_cap *)
-				elems.ht_cap_elem, &sta->ht_info);
+				elems.ht_cap_elem, &sta->sta.ht_info);
 		ieee80211_ht_addt_info_ie_to_ht_bss_info(
 				(struct ieee80211_ht_addt_info *)
 				elems.ht_info_elem, &bss_info);
-		ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info);
+		ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info);
 	}
 
 	rate_control_rate_init(sta, local);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5d8fabf7a68..b773c7b8d29 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -167,7 +167,6 @@ struct sta_ampdu_mlme {
  * @lock: used for locking all fields that require locking, see comments
  *	in the header file.
  * @flaglock: spinlock for flags accesses
- * @ht_info: HT capabilities of this STA
  * @addr: MAC address of this STA
  * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
  *	only used in AP (and IBSS?) mode
@@ -226,7 +225,6 @@ struct sta_info {
 	void *rate_ctrl_priv;
 	spinlock_t lock;
 	spinlock_t flaglock;
-	struct ieee80211_ht_info ht_info;
 
 	u16 listen_interval;
 
-- 
cgit v1.2.3-70-g09d2


From 3061307013267c2c75efae3925f461858d832101 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 11 Sep 2008 05:27:40 +0200
Subject: mac80211: pass AP vif pointer for VLANs

We cannot pass a VLAN vif pointer to the driver since those are
entirely virtual and we never tell the driver.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7468495d6f9..698c8233e6b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1351,6 +1351,10 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		return 0;
 	}
 
+	if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		osdata = container_of(osdata->bss,
+				      struct ieee80211_sub_if_data,
+				      u.ap);
 	info->control.vif = &osdata->vif;
 	ret = ieee80211_tx(odev, skb);
 	dev_put(odev);
-- 
cgit v1.2.3-70-g09d2


From bed7aac9416f50425d2200df32bcc9bf248ff8cb Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Tue, 26 Aug 2008 11:58:01 -0300
Subject: rfkill: remove transmitter blocking on suspend

Currently, rfkill would stand in the way of properly supporting wireless
devices that are capable of waking the system up from sleep or hibernation
when they receive a special wireless message.  It would also get in the way
of mesh devices that need to remain operational even during platform
suspend.

To avoid that, stop trying to block the transmitters on the rfkill class
suspend handler.

Drivers that need rfkill's older behaviour will have to implement it by
themselves in their own suspend handling.

Do note that rfkill *will* attempt to restore the transmitter state on
resume in any situation.  This happens after the driver's resume method is
called by the suspend core (class devices resume after the devices they are
attached to have been resumed).

The following drivers need to check if they need to explicitly block
their transmitters in their own suspend handlers (maintainers Cc'd):
	arch/arm/mach-pxa/tosa-bt.c
	drivers/net/usb/hso.c
	drivers/net/wireless/rt2x00/* (USB might need it?)
	drivers/net/wireless/b43/ (SSB over USB might need it?)
	drivers/misc/hp-wmi.c
	eeepc-laptop w/rfkill support (not in mainline yet)
	Compal laptop w/rfkill support (not in mainline yet)
	toshiba-acpi w/rfkill support (not in mainline yet)

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Andrew Bird <ajb@spheresystems.co.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Cezary Jackiewicz <cezary.jackiewicz@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt | 32 ++++++++++++++++++++++++++++----
 net/rfkill/rfkill.c      | 16 ++--------------
 2 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 6fcb3060dec..b65f0799df4 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -341,6 +341,8 @@ key that does nothing by itself, as well as any hot key that is type-specific
 3.1 Guidelines for wireless device drivers
 ------------------------------------------
 
+(in this text, rfkill->foo means the foo field of struct rfkill).
+
 1. Each independent transmitter in a wireless device (usually there is only one
 transmitter per device) should have a SINGLE rfkill class attached to it.
 
@@ -363,10 +365,32 @@ This rule exists because users of the rfkill subsystem expect to get (and set,
 when possible) the overall transmitter rfkill state, not of a particular rfkill
 line.
 
-5. During suspend, the rfkill class will attempt to soft-block the radio
-through a call to rfkill->toggle_radio, and will try to restore its previous
-state during resume.  After a rfkill class is suspended, it will *not* call
-rfkill->toggle_radio until it is resumed.
+5. The wireless device driver MUST NOT leave the transmitter enabled during
+suspend and hibernation unless:
+
+	5.1. The transmitter has to be enabled for some sort of functionality
+	like wake-on-wireless-packet or autonomous packed forwarding in a mesh
+	network, and that functionality is enabled for this suspend/hibernation
+	cycle.
+
+AND
+
+	5.2. The device was not on a user-requested BLOCKED state before
+	the suspend (i.e. the driver must NOT unblock a device, not even
+	to support wake-on-wireless-packet or remain in the mesh).
+
+In other words, there is absolutely no allowed scenario where a driver can
+automatically take action to unblock a rfkill controller (obviously, this deals
+with scenarios where soft-blocking or both soft and hard blocking is happening.
+Scenarios where hardware rfkill lines are the only ones blocking the
+transmitter are outside of this rule, since the wireless device driver does not
+control its input hardware rfkill lines in the first place).
+
+6. During resume, rfkill will try to restore its previous state.
+
+7. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio
+until it is resumed.
+
 
 Example of a WLAN wireless driver connected to the rfkill subsystem:
 --------------------------------------------------------------------
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index d5735799ccd..ea0dc04b3c7 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -512,21 +512,9 @@ static void rfkill_release(struct device *dev)
 #ifdef CONFIG_PM
 static int rfkill_suspend(struct device *dev, pm_message_t state)
 {
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	if (dev->power.power_state.event != state.event) {
-		if (state.event & PM_EVENT_SLEEP) {
-			/* Stop transmitter, keep state, no notifies */
-			update_rfkill_state(rfkill);
-
-			mutex_lock(&rfkill->mutex);
-			rfkill->toggle_radio(rfkill->data,
-						RFKILL_STATE_SOFT_BLOCKED);
-			mutex_unlock(&rfkill->mutex);
-		}
-
+	/* mark class device as suspended */
+	if (dev->power.power_state.event != state.event)
 		dev->power.power_state = state;
-	}
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 25d834e16294c8dfd923dae6bdb8a055391a99a5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Sep 2008 22:52:47 +0200
Subject: mac80211: fix virtual interfaces vs. injection

Currently, virtual interface pointers passed to drivers might be
from monitor interfaces and as such completely uninitialised
because we do not tell the driver about monitor interfaces when
those are created. Instead of passing them, we should therefore
indicate to the driver that there is no information; do that by
passing a NULL value and adjust drivers to cope with it.

As a result, some mac80211 API functions also need to cope with
a NULL vif pointer so drivers can still call them unconditionally.

Also, when injecting frames we really don't want to pass NULL all
the time, if we know we are the source address of a frame and have
a local interface for that address, we can to use that interface.
This also helps with processing the frame correctly for that
interface which will help the 802.11w implementation. It's not
entirely correct for VLANs or WDS interfaces because there the MAC
address isn't unique, but it's already a lot better than what we
do now.

Finally, when injecting without a matching local interface, don't
assign sequence numbers at all.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c     |  3 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c | 21 ++++++----
 include/net/mac80211.h                    |  1 +
 net/mac80211/tx.c                         | 64 +++++++++++++++++++++++++++++--
 net/mac80211/util.c                       | 37 ++++++++++++------
 5 files changed, 101 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index da8aa83481f..631d65b73ed 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -258,7 +258,8 @@ static int mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	txi = IEEE80211_SKB_CB(skb);
 
-	hwsim_check_magic(txi->control.vif);
+	if (txi->control.vif)
+		hwsim_check_magic(txi->control.vif);
 	if (txi->control.sta)
 		hwsim_check_sta_magic(txi->control.sta);
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index a5e965068c8..b7f4fe8fba6 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -155,7 +155,6 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
-	struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
 	struct ieee80211_rate *rate =
 	    ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
@@ -278,16 +277,22 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 	 * sequence counter given by mac80211.
 	 */
 	if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
-		spin_lock_irqsave(&intf->seqlock, irqflags);
+		if (likely(tx_info->control.vif)) {
+			struct rt2x00_intf *intf;
 
-		if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
-			intf->seqno += 0x10;
-		hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
-		hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+			intf = vif_to_intf(tx_info->control.vif);
 
-		spin_unlock_irqrestore(&intf->seqlock, irqflags);
+			spin_lock_irqsave(&intf->seqlock, irqflags);
 
-		__set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
+			if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
+				intf->seqno += 0x10;
+			hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
+			hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+
+			spin_unlock_irqrestore(&intf->seqlock, irqflags);
+
+			__set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
+		}
 	}
 
 	/*
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d6669fd3ffa..003e4a03874 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -330,6 +330,7 @@ struct ieee80211_tx_info {
 
 	union {
 		struct {
+			/* NB: vif can be NULL for injected frames */
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			struct ieee80211_sta *sta;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 698c8233e6b..c12f361d718 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -624,7 +624,14 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 	u8 *qc;
 	int tid;
 
-	/* only for injected frames */
+	/*
+	 * Packet injection may want to control the sequence
+	 * number, if we have no matching interface then we
+	 * neither assign one ourselves nor ask the driver to.
+	 */
+	if (unlikely(!info->control.vif))
+		return TX_CONTINUE;
+
 	if (unlikely(ieee80211_is_ctl(hdr->frame_control)))
 		return TX_CONTINUE;
 
@@ -849,7 +856,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	skb->do_not_encrypt = 1;
-	info->flags |= IEEE80211_TX_CTL_INJECTED;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
 	/*
@@ -981,7 +987,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	/* process and remove the injection radiotap header */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) {
+	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP)
 			return TX_DROP;
 
@@ -1300,6 +1306,11 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	struct ieee80211_sub_if_data *osdata;
 	int headroom;
 	bool may_encrypt;
+	enum {
+		NOT_MONITOR,
+		FOUND_SDATA,
+		UNKNOWN_ADDRESS,
+	} monitor_iface = NOT_MONITOR;
 	int ret;
 
 	if (skb->iif)
@@ -1335,6 +1346,50 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
 							     fwded_frames);
 		}
+	} else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) {
+		struct ieee80211_sub_if_data *sdata;
+		struct ieee80211_local *local = osdata->local;
+		struct ieee80211_hdr *hdr;
+		int hdrlen;
+		u16 len_rthdr;
+
+		info->flags |= IEEE80211_TX_CTL_INJECTED;
+		monitor_iface = UNKNOWN_ADDRESS;
+
+		len_rthdr = ieee80211_get_radiotap_len(skb->data);
+		hdr = (struct ieee80211_hdr *)skb->data + len_rthdr;
+		hdrlen = ieee80211_hdrlen(hdr->frame_control);
+
+		/* check the header is complete in the frame */
+		if (likely(skb->len >= len_rthdr + hdrlen)) {
+			/*
+			 * We process outgoing injected frames that have a
+			 * local address we handle as though they are our
+			 * own frames.
+			 * This code here isn't entirely correct, the local
+			 * MAC address is not necessarily enough to find
+			 * the interface to use; for that proper VLAN/WDS
+			 * support we will need a different mechanism.
+			 */
+
+			rcu_read_lock();
+			list_for_each_entry_rcu(sdata, &local->interfaces,
+						list) {
+				if (!netif_running(sdata->dev))
+					continue;
+				if (compare_ether_addr(sdata->dev->dev_addr,
+						       hdr->addr2)) {
+					dev_hold(sdata->dev);
+					dev_put(odev);
+					osdata = sdata;
+					odev = osdata->dev;
+					skb->iif = sdata->dev->ifindex;
+					monitor_iface = FOUND_SDATA;
+					break;
+				}
+			}
+			rcu_read_unlock();
+		}
 	}
 
 	may_encrypt = !skb->do_not_encrypt;
@@ -1355,7 +1410,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		osdata = container_of(osdata->bss,
 				      struct ieee80211_sub_if_data,
 				      u.ap);
-	info->control.vif = &osdata->vif;
+	if (likely(monitor_iface != UNKNOWN_ADDRESS))
+		info->control.vif = &osdata->vif;
 	ret = ieee80211_tx(odev, skb);
 	dev_put(odev);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6eb222369bc..f32561ec224 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -231,16 +231,21 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
 					struct ieee80211_rate *rate)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_sub_if_data *sdata;
 	u16 dur;
 	int erp;
+	bool short_preamble = false;
 
 	erp = 0;
-	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
-		erp = rate->flags & IEEE80211_RATE_ERP_G;
+	if (vif) {
+		sdata = vif_to_sdata(vif);
+		short_preamble = sdata->bss_conf.use_short_preamble;
+		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+			erp = rate->flags & IEEE80211_RATE_ERP_G;
+	}
 
 	dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp,
-				       sdata->bss_conf.use_short_preamble);
+				       short_preamble);
 
 	return cpu_to_le16(dur);
 }
@@ -252,7 +257,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_sub_if_data *sdata;
 	bool short_preamble;
 	int erp;
 	u16 dur;
@@ -260,13 +265,17 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	short_preamble = sdata->bss_conf.use_short_preamble;
+	short_preamble = false;
 
 	rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx];
 
 	erp = 0;
-	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
-		erp = rate->flags & IEEE80211_RATE_ERP_G;
+	if (vif) {
+		sdata = vif_to_sdata(vif);
+		short_preamble = sdata->bss_conf.use_short_preamble;
+		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+			erp = rate->flags & IEEE80211_RATE_ERP_G;
+	}
 
 	/* CTS duration */
 	dur = ieee80211_frame_duration(local, 10, rate->bitrate,
@@ -289,7 +298,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate;
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_sub_if_data *sdata;
 	bool short_preamble;
 	int erp;
 	u16 dur;
@@ -297,12 +306,16 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	short_preamble = sdata->bss_conf.use_short_preamble;
+	short_preamble = false;
 
 	rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx];
 	erp = 0;
-	if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
-		erp = rate->flags & IEEE80211_RATE_ERP_G;
+	if (vif) {
+		sdata = vif_to_sdata(vif);
+		short_preamble = sdata->bss_conf.use_short_preamble;
+		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+			erp = rate->flags & IEEE80211_RATE_ERP_G;
+	}
 
 	/* Data frame duration */
 	dur = ieee80211_frame_duration(local, frame_len, rate->bitrate,
-- 
cgit v1.2.3-70-g09d2


From 9222963f7c81e2897833a43066c9db75350c8586 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Sep 2008 09:53:27 +0200
Subject: mac80211: fix sta_info kernel-doc warning

Sorry, forgot to run kernel-doc and just got the output from the nightly
run by email, this fixes a warning which I introduced when doing the
first RC API cleanups.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b773c7b8d29..daedfa9e1c6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -197,7 +197,7 @@ struct sta_ampdu_mlme {
  * @tx_packets: number of RX/TX MSDUs
  * @tx_bytes: TBD
  * @tx_fragments: number of transmitted MPDUs
- * @txrate_idx: TBD
+ * @last_txrate_idx: Index of the last used transmit rate
  * @tid_seq: TBD
  * @wme_tx_queue: TBD
  * @ampdu_mlme: TBD
-- 
cgit v1.2.3-70-g09d2


From c1b6cf4ee0fb8a3698c563e101a60f9ee4910de0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Sep 2008 11:05:39 +0200
Subject: mac80211: remove beacon counters

The beacon counters mac80211 keeps are only used for debugfs,
unfortunately, they are incorrect for many hardware designs,
namely any design that has a beacon template. Hence, remove the
counters so we don't create the impression they are usable.

This also allows removing the beacon MESH #ifdef again.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c | 6 ------
 net/mac80211/ieee80211_i.h    | 5 -----
 net/mac80211/tx.c             | 9 ---------
 3 files changed, 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 1b33cad24ab..2a451562377 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -173,7 +173,6 @@ IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC);
 IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX);
 IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC);
 IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC);
-IEEE80211_IF_FILE(num_beacons_sta, u.sta.num_beacons, DEC);
 
 static ssize_t ieee80211_if_fmt_flags(
 	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -192,7 +191,6 @@ __IEEE80211_IF_FILE(flags);
 /* AP attributes */
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
-IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
 
 static ssize_t ieee80211_if_fmt_num_buffered_multicast(
 	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -265,7 +263,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(auth_alg, sta);
 	DEBUGFS_ADD(auth_transaction, sta);
 	DEBUGFS_ADD(flags, sta);
-	DEBUGFS_ADD(num_beacons_sta, sta);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
@@ -276,7 +273,6 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_count, ap);
-	DEBUGFS_ADD(num_beacons, ap);
 	DEBUGFS_ADD(num_buffered_multicast, ap);
 }
 
@@ -398,7 +394,6 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_DEL(auth_alg, sta);
 	DEBUGFS_DEL(auth_transaction, sta);
 	DEBUGFS_DEL(flags, sta);
-	DEBUGFS_DEL(num_beacons_sta, sta);
 }
 
 static void del_ap_files(struct ieee80211_sub_if_data *sdata)
@@ -409,7 +404,6 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
 
 	DEBUGFS_DEL(num_sta_ps, ap);
 	DEBUGFS_DEL(dtim_count, ap);
-	DEBUGFS_DEL(num_beacons, ap);
 	DEBUGFS_DEL(num_buffered_multicast, ap);
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6bd6a6306da..3912fba6d3d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -229,7 +229,6 @@ struct ieee80211_if_ap {
 	struct sk_buff_head ps_bc_buf;
 	atomic_t num_sta_ps; /* number of stations in PS mode */
 	int dtim_count;
-	int num_beacons; /* number of TXed beacon frames for this BSS */
 };
 
 struct ieee80211_if_wds {
@@ -352,7 +351,6 @@ struct ieee80211_if_sta {
 	u32 supp_rates_bits[IEEE80211_NUM_BANDS];
 
 	int wmm_last_param_set;
-	int num_beacons; /* number of TXed beacon frames by this STA */
 };
 
 struct ieee80211_if_mesh {
@@ -388,7 +386,6 @@ struct ieee80211_if_mesh {
 	struct mesh_config mshcfg;
 	u32 mesh_seqnum;
 	bool accepting_plinks;
-	int num_beacons;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -484,7 +481,6 @@ struct ieee80211_sub_if_data {
 			struct dentry *auth_alg;
 			struct dentry *auth_transaction;
 			struct dentry *flags;
-			struct dentry *num_beacons_sta;
 			struct dentry *force_unicast_rateidx;
 			struct dentry *max_ratectrl_rateidx;
 		} sta;
@@ -492,7 +488,6 @@ struct ieee80211_sub_if_data {
 			struct dentry *drop_unencrypted;
 			struct dentry *num_sta_ps;
 			struct dentry *dtim_count;
-			struct dentry *num_beacons;
 			struct dentry *force_unicast_rateidx;
 			struct dentry *max_ratectrl_rateidx;
 			struct dentry *num_buffered_multicast;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c12f361d718..d136a371e6b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1864,7 +1864,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	struct rate_selection rsel;
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
-	int *num_beacons;
 	enum ieee80211_band band = local->hw.conf.channel->band;
 
 	sband = local->hw.wiphy->bands[band];
@@ -1912,8 +1911,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			if (beacon->tail)
 				memcpy(skb_put(skb, beacon->tail_len),
 				       beacon->tail, beacon->tail_len);
-
-			num_beacons = &ap->num_beacons;
 		} else
 			goto out;
 	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
@@ -1931,8 +1928,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						 IEEE80211_STYPE_BEACON);
 
-		num_beacons = &ifsta->num_beacons;
-#ifdef CONFIG_MAC80211_MESH
 	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
 		struct ieee80211_mgmt *mgmt;
 		u8 *pos;
@@ -1960,9 +1955,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		*pos++ = 0x0;
 
 		mesh_mgmt_ies_add(skb, sdata);
-
-		num_beacons = &sdata->u.mesh.num_beacons;
-#endif
 	} else {
 		WARN_ON(1);
 		goto out;
@@ -1999,7 +1991,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	info->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 	info->control.retry_limit = 1;
 
-	(*num_beacons)++;
 out:
 	rcu_read_unlock();
 	return skb;
-- 
cgit v1.2.3-70-g09d2


From 538df283c185c477dbdafafa9652c33e9742de75 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 12 Sep 2008 15:22:53 +0200
Subject: mac80211: remove debug frame dumping

You can just pull up a monitor interface to get much more
detailed information, or, when debugging a driver, insert
dump code into the driver (which usually you will have to
do anyway to dump the driver-specific information). Hence
this option is useless.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 13 -------------
 net/mac80211/tx.c    | 42 ------------------------------------------
 2 files changed, 55 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 80d693392b0..8427518e4f2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -179,19 +179,6 @@ config MAC80211_VERBOSE_MPL_DEBUG
 
 	  Do not select this option.
 
-config MAC80211_LOWTX_FRAME_DUMP
-	bool "Debug frame dumping"
-	depends on MAC80211_DEBUG_MENU
-	---help---
-	  Selecting this option will cause the stack to
-	  print a message for each frame that is handed
-	  to the lowlevel driver for transmission. This
-	  message includes all MAC addresses and the
-	  frame control field.
-
-	  If unsure, say N and insert the debugging code
-	  you require into the driver you are debugging.
-
 config MAC80211_DEBUG_COUNTERS
 	bool "Extra statistics for TX/RX debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d136a371e6b..20d683641b4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -38,43 +38,6 @@
 
 /* misc utils */
 
-#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP
-static void ieee80211_dump_frame(const char *ifname, const char *title,
-				 const struct sk_buff *skb)
-{
-	const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	unsigned int hdrlen;
-	DECLARE_MAC_BUF(mac);
-
-	printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len);
-	if (skb->len < 4) {
-		printk("\n");
-		return;
-	}
-
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-	if (hdrlen > skb->len)
-		hdrlen = skb->len;
-	if (hdrlen >= 4)
-		printk(" FC=0x%04x DUR=0x%04x",
-		    le16_to_cpu(hdr->frame_control), le16_to_cpu(hdr->duration_id));
-	if (hdrlen >= 10)
-		printk(" A1=%s", print_mac(mac, hdr->addr1));
-	if (hdrlen >= 16)
-		printk(" A2=%s", print_mac(mac, hdr->addr2));
-	if (hdrlen >= 24)
-		printk(" A3=%s", print_mac(mac, hdr->addr3));
-	if (hdrlen >= 30)
-		printk(" A4=%s", print_mac(mac, hdr->addr4));
-	printk("\n");
-}
-#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */
-static inline void ieee80211_dump_frame(const char *ifname, const char *title,
-					struct sk_buff *skb)
-{
-}
-#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */
-
 static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 				 int next_frag_len)
 {
@@ -1068,8 +1031,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 			return IEEE80211_TX_AGAIN;
 		info =  IEEE80211_SKB_CB(skb);
 
-		ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
-				     "TX to low-level driver", skb);
 		ret = local->ops->tx(local_to_hw(local), skb);
 		if (ret)
 			return IEEE80211_TX_AGAIN;
@@ -1099,9 +1060,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 						~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 			}
 
-			ieee80211_dump_frame(wiphy_name(local->hw.wiphy),
-					     "TX to low-level driver",
-					     tx->extra_frag[i]);
 			ret = local->ops->tx(local_to_hw(local),
 					    tx->extra_frag[i]);
 			if (ret)
-- 
cgit v1.2.3-70-g09d2


From e16ce63c893ff7ccb314d2fbdafbbc915b64d173 Mon Sep 17 00:00:00 2001
From: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Date: Fri, 12 Sep 2008 13:44:08 -0700
Subject: mac80211 : Fix mode change hard_start_xmit function

When monitor mode is changed to BSS or IBSS, data trasnfer can not happen
because proper transmit function is not assigend for BSS ,IBSS mode.
This patch fixes this problem by assigning the ieee80211_subif_start_xmit
to device's hard_start_xmit function.

Signed-off-by: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Acked-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index a7ef0289fbd..a72fbebb8ea 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -624,6 +624,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 
 	/* and set some type-dependent values */
 	sdata->vif.type = type;
+	sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit;
 
 	/* only monitor differs */
 	sdata->dev->type = ARPHRD_ETHER;
-- 
cgit v1.2.3-70-g09d2


From 9e691ed68d94ab3047e028736641445b4cf74d67 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Wed, 17 Sep 2008 10:10:41 +1000
Subject: ipvs: only unlock in ip_vs_edit_service() if already locked

Jumping to out unlocks __ip_vs_svc_lock, but that lock is not taken until
after code that may jump to out.

This problem was detected by sparse.

make C=1
  CHECK   net/ipv4/ipvs/ip_vs_ctl.c
net/ipv4/ipvs/ip_vs_ctl.c:1332:2: warning: context imbalance in 'ip_vs_edit_service' - unexpected unlock

Acked-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 993a83fb0d5..60ca24b9ec0 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1305,7 +1305,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 		 */
 		if ((ret = ip_vs_unbind_scheduler(svc))) {
 			old_sched = sched;
-			goto out;
+			goto out_unlock;
 		}
 
 		/*
@@ -1324,12 +1324,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 			 */
 			ip_vs_bind_scheduler(svc, old_sched);
 			old_sched = sched;
-			goto out;
+			goto out_unlock;
 		}
 	}
 
-  out:
+  out_unlock:
 	write_unlock_bh(&__ip_vs_svc_lock);
+  out:
 
 	if (old_sched)
 		ip_vs_scheduler_put(old_sched);
-- 
cgit v1.2.3-70-g09d2


From dff630ddad3884b99fae3ad92f5eccbf26618679 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Wed, 17 Sep 2008 10:10:42 +1000
Subject: ipvs: supply a valid 0 address to ip_vs_conn_new()

ip_vs_conn_new expects a union nf_inet_addr as the type for its address
parameters, not a plain integer.

This problem was detected by sparse.

make C=1
  CHECK   net/ipv4/ipvs/ip_vs_core.c
net/ipv4/ipvs/ip_vs_core.c:469:9: warning: Using plain integer as NULL pointer

Acked-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 80a4fcf33a5..ece748dbd0c 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -457,6 +457,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
+		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
 
@@ -465,7 +466,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		cp = ip_vs_conn_new(svc->af, iph.protocol,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
-				    0, 0,
+				    &daddr, 0,
 				    IP_VS_CONN_F_BYPASS,
 				    NULL);
 		if (cp == NULL)
-- 
cgit v1.2.3-70-g09d2


From 563e94f072714657a82a59a3bf81a719a6a25591 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Wed, 17 Sep 2008 10:10:42 +1000
Subject: ipvs: add __aquire/__release annotations to
 ip_vs_info_seq_start/ip_vs_info_seq_stop

This teaches sparse that the following are not problems:

make C=1
  CHECK   net/ipv4/ipvs/ip_vs_ctl.c
net/ipv4/ipvs/ip_vs_ctl.c:1793:14: warning: context imbalance in 'ip_vs_info_seq_start' - wrong count at exit
net/ipv4/ipvs/ip_vs_ctl.c:1842:13: warning: context imbalance in 'ip_vs_info_seq_stop' - unexpected unlock

Acked-by: Sven Wegener <sven.wegener@stealer.net>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 60ca24b9ec0..771551d8fba 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1787,6 +1787,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 }
 
 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+__acquires(__ip_vs_svc_lock)
 {
 
 	read_lock_bh(&__ip_vs_svc_lock);
@@ -1840,6 +1841,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+__releases(__ip_vs_svc_lock)
 {
 	read_unlock_bh(&__ip_vs_svc_lock);
 }
-- 
cgit v1.2.3-70-g09d2


From d286600e199aa2f1058a1f883d234e73626304d2 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Tue, 16 Sep 2008 11:11:11 -0400
Subject: ipvs: change some __constant_htons() to htons()

Change __contant_htons() to htons() in the IPVS code when not in an
initializer.

-Brian

Signed-off-by: Brian Haley <brian.haley@hp.com>
Acked-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_proto.c        | 2 +-
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index b06da1c3445..0791f9e08fe 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -237,7 +237,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
 			  const char *msg)
 {
 #ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == __constant_htons(ETH_P_IPV6))
+	if (skb->protocol == htons(ETH_P_IPV6))
 		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
 	else
 #endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 2b18a78d039..80ab0c8e5b4 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -167,7 +167,7 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 		    int offset, const char *msg)
 {
 #ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == __constant_htons(ETH_P_IPV6))
+	if (skb->protocol == htons(ETH_P_IPV6))
 		ah_esp_debug_packet_v6(pp, skb, offset, msg);
 	else
 #endif
-- 
cgit v1.2.3-70-g09d2


From 45e9c0de2e86485f8b6633fd64ab19cfbff167f6 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 15 Sep 2008 16:43:18 -0700
Subject: warn: Turn the netdev timeout WARN_ON() into a WARN()

this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(),
so that the device and driver names are inside the warning message.
This helps automated tools like kerneloops.org to collect the data
and do statistics, as well as making it more likely that humans
cut-n-paste the important message as part of a bugreport.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 10 ++++++++++
 net/sched/sch_generic.c   |  3 +--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a3f738cffdb..edc6ba82e09 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,16 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ONCE(condition, format...)	({			\
+	static int __warned;					\
+	int __ret_warn_once = !!(condition);			\
+								\
+	if (unlikely(__ret_warn_once))				\
+		if (WARN(!__warned, format)) 			\
+			__warned = 1;				\
+	unlikely(__ret_warn_once);				\
+})
+
 #define WARN_ON_RATELIMIT(condition, state)			\
 		WARN_ON((condition) && __ratelimit(state))
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9634091ee2f..ec0a0839ce5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg)
 			    time_after(jiffies, (dev->trans_start +
 						 dev->watchdog_timeo))) {
 				char drivername[64];
-				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
 				dev->tx_timeout(dev);
-				WARN_ON_ONCE(1);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
-- 
cgit v1.2.3-70-g09d2


From a3028b8ed1e1e9930bfa70ce4555fb7f9fad3dcc Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 02:48:25 -0700
Subject: sctp: set the skb->ip_summed correctly when sending over loopback.

Loopback used to clobber the ip_summed filed which sctp then used
to figure out if it needed to do checksumming or not.  Now that
loopback doesn't do that any more, sctp needs to set the ip_summed
field correctly.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0dc4a7dfb23..225c7123c41 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -533,7 +533,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	if (!(dst->dev->features & NETIF_F_NO_CSUM)) {
 		crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 		crc32 = sctp_end_cksum(crc32);
-	}
+	} else
+		nskb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	/* 3) Put the resultant value into the checksum field in the
 	 *    common header, and leave the rest of the bits unchanged.
-- 
cgit v1.2.3-70-g09d2


From 0ef46e285c062cbe35d60c0adbff96f530d31c86 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 16:27:38 -0700
Subject: sctp: do not enable peer features if we can't do them.

Do not enable peer features like addip and auth, if they
are administratively disabled localy.  If the peer resports
that he supports something that we don't, neither end can
use it so enabling it is pointless.  This solves a problem
when talking to a peer that has auth and addip enabled while
we do not.  Found by Andrei Pelinescu-Onciul <andrei@iptel.org>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e8ca4e54981..fe94f42fa06 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1886,11 +1886,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 			    /* if the peer reports AUTH, assume that he
 			     * supports AUTH.
 			     */
-			    asoc->peer.auth_capable = 1;
+			    if (sctp_auth_enable)
+				    asoc->peer.auth_capable = 1;
 			    break;
 		    case SCTP_CID_ASCONF:
 		    case SCTP_CID_ASCONF_ACK:
-			    asoc->peer.asconf_capable = 1;
+			    if (sctp_addip_enable)
+				    asoc->peer.asconf_capable = 1;
 			    break;
 		    default:
 			    break;
@@ -2460,6 +2462,9 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
+		if (!sctp_addip_enable)
+			goto fall_through;
+
 		addr_param = param.v + sizeof(sctp_addip_param_t);
 
 		af = sctp_get_af_specific(param_type2af(param.p->type));
-- 
cgit v1.2.3-70-g09d2


From add52379dde2e5300e2d574b172e62c6cf43b3d3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 18 Sep 2008 16:28:27 -0700
Subject: sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH

If INIT-ACK is received with SupportedExtensions parameter which
indicates that the peer does not support AUTH, the packet will be
silently ignore, and sctp_process_init() do cleanup all of the
transports in the association.
When T1-Init timer is expires, OOPS happen while we try to choose
a different init transport.

The solution is to only clean up the non-active transports, i.e
the ones that the peer added.  However, that introduces a problem
with sctp_connectx(), because we don't mark the proper state for
the transports provided by the user.  So, we'll simply mark
user-provided transports as ACTIVE.  That will allow INIT
retransmissions to work properly in the sctp_connectx() context
and prevent the crash.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 9 +++++----
 net/sctp/sm_make_chunk.c | 6 ++----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 8472b8b349c..abd51cef241 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -599,11 +599,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	/* Check to see if this is a duplicate. */
 	peer = sctp_assoc_lookup_paddr(asoc, addr);
 	if (peer) {
+		/* An UNKNOWN state is only set on transports added by
+		 * user in sctp_connectx() call.  Such transports should be
+		 * considered CONFIRMED per RFC 4960, Section 5.4.
+		 */
 		if (peer->state == SCTP_UNKNOWN) {
-			if (peer_state == SCTP_ACTIVE)
-				peer->state = SCTP_ACTIVE;
-			if (peer_state == SCTP_UNCONFIRMED)
-				peer->state = SCTP_UNCONFIRMED;
+			peer->state = SCTP_ACTIVE;
 		}
 		return peer;
 	}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fe94f42fa06..b599cbba4fb 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2321,12 +2321,10 @@ clean_up:
 	/* Release the transport structures. */
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		transport = list_entry(pos, struct sctp_transport, transports);
-		list_del_init(pos);
-		sctp_transport_free(transport);
+		if (transport->state != SCTP_ACTIVE)
+			sctp_assoc_rm_peer(asoc, transport);
 	}
 
-	asoc->peer.transport_count = 0;
-
 nomem:
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 821c92f258bd9b01eb900992969803645b6ba9d6 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Thu, 18 Sep 2008 16:44:31 -0700
Subject: ISDN sockets: add missing lockdep strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 91f8bbc9352..23b8b9da36b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_MAX"
 };
 static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_MAX"
+  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_MAX"
 };
 static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
@@ -182,7 +182,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_MAX"
+  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_MAX"
 };
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 64edc2736e23994e0334b70c5ff08dc33e2ebbd9 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:18:32 -0700
Subject: tcp: Partial hint clearing has again become meaningless
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ie., the difference between partial and all clearing doesn't
exists anymore since the SACK optimizations got dropped by
an sacktag rewrite.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 7 +------
 net/ipv4/tcp_input.c  | 5 ++---
 net/ipv4/tcp_output.c | 4 ++--
 3 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8983386356a..b7167632695 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1035,7 +1035,7 @@ static inline void tcp_mib_init(struct net *net)
 }
 
 /* from STCP */
-static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
+static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
 {
 	tp->lost_skb_hint = NULL;
 	tp->scoreboard_skb_hint = NULL;
@@ -1043,11 +1043,6 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
 	tp->forward_skb_hint = NULL;
 }
 
-static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
-{
-	tcp_clear_retrans_hints_partial(tp);
-}
-
 /* MD5 Signature */
 struct crypto_hash;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f79a5160729..7306bfb16cd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1883,7 +1883,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
 
-	tcp_clear_retrans_hints_partial(tp);
+	tcp_clear_all_retrans_hints(tp);
 }
 
 static void tcp_clear_retrans_partial(struct tcp_sock *tp)
@@ -1934,12 +1934,11 @@ void tcp_enter_loss(struct sock *sk, int how)
 		/* Push undo marker, if it was plain RTO and nothing
 		 * was retransmitted. */
 		tp->undo_marker = tp->snd_una;
-		tcp_clear_retrans_hints_partial(tp);
 	} else {
 		tp->sacked_out = 0;
 		tp->fackets_out = 0;
-		tcp_clear_all_retrans_hints(tp);
 	}
+	tcp_clear_all_retrans_hints(tp);
 
 	tcp_for_write_queue(skb, sk) {
 		if (skb == tcp_send_head(sk))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8165f5aa8c7..11490958a09 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -750,7 +750,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	BUG_ON(len > skb->len);
 
-	tcp_clear_retrans_hints_partial(tp);
+	tcp_clear_all_retrans_hints(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -1823,7 +1823,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
 	tp->packets_out -= tcp_skb_pcount(next_skb);
 
 	/* changed transmit queue under us so clear hints */
-	tcp_clear_retrans_hints_partial(tp);
+	tcp_clear_all_retrans_hints(tp);
 
 	sk_wmem_free_skb(sk, next_skb);
 }
-- 
cgit v1.2.3-70-g09d2


From c8c213f20ce97c66fe2ff86f33814d1ca0f9d7ea Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:18:55 -0700
Subject: tcp: move tcp_verify_retransmit_hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7306bfb16cd..9e95ad637db 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,6 +979,19 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	}
 }
 
+/* RFC: This is from the original, I doubt that this is necessary at all:
+ * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
+ * retransmitted past LOST markings in the first place? I'm not fully sure
+ * about undo and end of connection cases, which can cause R without L?
+ */
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	if ((tp->retransmit_skb_hint != NULL) &&
+	    before(TCP_SKB_CB(skb)->seq,
+		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+		tp->retransmit_skb_hint = NULL;
+}
+
 /* This procedure tags the retransmission queue when SACKs arrive.
  *
  * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -2156,19 +2169,6 @@ static int tcp_time_to_recover(struct sock *sk)
 	return 0;
 }
 
-/* RFC: This is from the original, I doubt that this is necessary at all:
- * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
- * retransmitted past LOST markings in the first place? I'm not fully sure
- * about undo and end of connection cases, which can cause R without L?
- */
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
-{
-	if ((tp->retransmit_skb_hint != NULL) &&
-	    before(TCP_SKB_CB(skb)->seq,
-		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
-		tp->retransmit_skb_hint = NULL;
-}
-
 /* Mark head of queue up as lost. With RFC3517 SACK, the packets is
  * is against sacked "cnt", otherwise it's against facked "cnt"
  */
-- 
cgit v1.2.3-70-g09d2


From 41ea36e35a0daa75377b3e70680e5c3a3f83fe27 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:19:22 -0700
Subject: tcp: add helper for lost bit toggling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This useful because we'd need to verifying soon in many places
which makes things slightly more complex than it used to be.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9e95ad637db..12512336dbd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -992,6 +992,16 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 		tp->retransmit_skb_hint = NULL;
 }
 
+static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+		tcp_verify_retransmit_hint(tp, skb);
+
+		tp->lost_out += tcp_skb_pcount(skb);
+		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+	}
+}
+
 /* This procedure tags the retransmission queue when SACKs arrive.
  *
  * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -2216,11 +2226,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 			cnt = packets;
 		}
 
-		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
-			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			tp->lost_out += tcp_skb_pcount(skb);
-			tcp_verify_retransmit_hint(tp, skb);
-		}
+		tcp_skb_mark_lost(tp, skb);
 	}
 	tcp_verify_left_out(tp);
 }
@@ -2262,11 +2268,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 			if (!tcp_skb_timedout(sk, skb))
 				break;
 
-			if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
-				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				tp->lost_out += tcp_skb_pcount(skb);
-				tcp_verify_retransmit_hint(tp, skb);
-			}
+			tcp_skb_mark_lost(tp, skb);
 		}
 
 		tp->scoreboard_skb_hint = skb;
-- 
cgit v1.2.3-70-g09d2


From 006f582c73f4eda35e06fd323193c3df43fb3459 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:20:20 -0700
Subject: tcp: convert retransmit_cnt_hint to seqno
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Main benefit in this is that we can then freely point
the retransmit_skb_hint to anywhere we want to because
there's no longer need to know what would be the count
changes involve, and since this is really used only as a
terminator, unnecessary work is one time walk at most,
and if some retransmissions are necessary after that
point later on, the walk is not full waste of time
anyway.

Since retransmit_high must be kept valid, all lost
markers must ensure that.

Now I also have learned how those "holes" in the
rexmittable skbs can appear, mtu probe does them. So
I removed the misleading comment as well.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 +-
 include/net/tcp.h     |  2 ++
 net/ipv4/tcp_input.c  | 34 ++++++++++++++++++++--------------
 net/ipv4/tcp_output.c | 25 +++++++------------------
 4 files changed, 30 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2e2557388e3..d7637c4b284 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -358,7 +358,7 @@ struct tcp_sock {
 					 */
 
 	int     lost_cnt_hint;
-	int     retransmit_cnt_hint;
+	u32     retransmit_high;	/* L-bits may be on up to this seqno */
 
 	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b7167632695..d0e90c50722 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -472,6 +472,8 @@ extern void tcp_send_delayed_ack(struct sock *sk);
 
 /* tcp_input.c */
 extern void tcp_cwnd_application_limited(struct sock *sk);
+extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
+					    struct sk_buff *skb);
 
 /* tcp_timer.c */
 extern void tcp_init_xmit_timers(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12512336dbd..d271cc82500 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,17 +979,17 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	}
 }
 
-/* RFC: This is from the original, I doubt that this is necessary at all:
- * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
- * retransmitted past LOST markings in the first place? I'm not fully sure
- * about undo and end of connection cases, which can cause R without L?
- */
+/* This must be called before lost_out is incremented */
 static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if ((tp->retransmit_skb_hint != NULL) &&
+	if ((tp->retransmit_skb_hint == NULL) ||
 	    before(TCP_SKB_CB(skb)->seq,
 		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
-		tp->retransmit_skb_hint = NULL;
+		tp->retransmit_skb_hint = skb;
+
+	if (!tp->lost_out ||
+	    after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
+		tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
 }
 
 static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
@@ -1002,6 +1002,16 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
 	}
 }
 
+void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	tcp_verify_retransmit_hint(tp, skb);
+
+	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+		tp->lost_out += tcp_skb_pcount(skb);
+		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+	}
+}
+
 /* This procedure tags the retransmission queue when SACKs arrive.
  *
  * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1178,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 			tp->retrans_out -= tcp_skb_pcount(skb);
 
-			/* clear lost hint */
-			tp->retransmit_skb_hint = NULL;
-
-			if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
-				tp->lost_out += tcp_skb_pcount(skb);
-				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			}
+			tcp_skb_mark_lost_uncond_verify(tp, skb);
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
 		} else {
 			if (before(ack_seq, new_low_seq))
@@ -1890,6 +1894,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
+			tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
 		}
 	}
 	tcp_verify_left_out(tp);
@@ -1974,6 +1979,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
+			tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
 		}
 	}
 	tcp_verify_left_out(tp);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 11490958a09..cfae61b40c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1838,7 +1838,7 @@ void tcp_simple_retransmit(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	unsigned int mss = tcp_current_mss(sk, 0);
-	int lost = 0;
+	u32 prior_lost = tp->lost_out;
 
 	tcp_for_write_queue(skb, sk) {
 		if (skb == tcp_send_head(sk))
@@ -1849,17 +1849,13 @@ void tcp_simple_retransmit(struct sock *sk)
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 			}
-			if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
-				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				tp->lost_out += tcp_skb_pcount(skb);
-				lost = 1;
-			}
+			tcp_skb_mark_lost_uncond_verify(tp, skb);
 		}
 	}
 
 	tcp_clear_all_retrans_hints(tp);
 
-	if (!lost)
+	if (prior_lost == tp->lost_out)
 		return;
 
 	if (tcp_is_reno(tp))
@@ -2009,15 +2005,11 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt;
 
-	if (tp->retransmit_skb_hint) {
+	if (tp->retransmit_skb_hint)
 		skb = tp->retransmit_skb_hint;
-		packet_cnt = tp->retransmit_cnt_hint;
-	} else {
+	else
 		skb = tcp_write_queue_head(sk);
-		packet_cnt = 0;
-	}
 
 	/* First pass: retransmit lost packets. */
 	if (tp->lost_out) {
@@ -2028,7 +2020,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 				break;
 			/* we could do better than to assign each time */
 			tp->retransmit_skb_hint = skb;
-			tp->retransmit_cnt_hint = packet_cnt;
 
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
@@ -2039,6 +2030,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			 */
 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 				return;
+			if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high))
+				break;
 
 			if (sacked & TCPCB_LOST) {
 				if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
@@ -2059,10 +2052,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 									  inet_csk(sk)->icsk_rto,
 									  TCP_RTO_MAX);
 				}
-
-				packet_cnt += tcp_skb_pcount(skb);
-				if (packet_cnt >= tp->lost_out)
-					break;
 			}
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From f09142eddb75005e41b0af3e5214979d8b534b1d Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:20:50 -0700
Subject: tcp: Kill precaution that's very likely obsolete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I suspect it might have been related to the changed amount
of lost skbs, which was counted by retransmit_cnt_hint that
got changed.

The place for this clearing was very illogical anyway,
it should have been after the LOST-bit clearing loop to
make any sense.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d271cc82500..28e93f1e421 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2385,10 +2385,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 	}
 	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
-
-	/* There is something screwy going on with the retrans hints after
-	   an undo */
-	tcp_clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
-- 
cgit v1.2.3-70-g09d2


From 184d68b2b0b836587f92887b14baea41033ffeef Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:21:16 -0700
Subject: tcp: No need to clear retransmit_skb_hint when SACKing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because lost counter no longer requires tuning, this is
trivial to remove (the tuning wouldn't have been too
hard either) because no "new" retransmittable skb appeared
below retransmit_skb_hint when SACKing for sure.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28e93f1e421..d017aed6edd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1298,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 					~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
 				tp->lost_out -= tcp_skb_pcount(skb);
 				tp->retrans_out -= tcp_skb_pcount(skb);
-
-				/* clear lost hint */
-				tp->retransmit_skb_hint = NULL;
 			}
 		} else {
 			if (!(sacked & TCPCB_RETRANS)) {
@@ -1319,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 			if (sacked & TCPCB_LOST) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 				tp->lost_out -= tcp_skb_pcount(skb);
-
-				/* clear lost hint */
-				tp->retransmit_skb_hint = NULL;
 			}
 		}
 
@@ -1351,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 	if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		tp->retrans_out -= tcp_skb_pcount(skb);
-		tp->retransmit_skb_hint = NULL;
 	}
 
 	return flag;
-- 
cgit v1.2.3-70-g09d2


From b5afe7bc71a1689376c9b547376d17568469f3b3 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:21:54 -0700
Subject: tcp: add tcp_can_forward_retransmit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 46 ++++++++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfae61b40c4..957c4e3d217 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1992,6 +1992,33 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	return err;
 }
 
+static int tcp_can_forward_retransmit(struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Forward retransmissions are possible only during Recovery. */
+	if (icsk->icsk_ca_state != TCP_CA_Recovery)
+		return 0;
+
+	/* No forward retransmissions in Reno are possible. */
+	if (tcp_is_reno(tp))
+		return 0;
+
+	/* Yeah, we have to make difficult choice between forward transmission
+	 * and retransmission... Both ways have their merits...
+	 *
+	 * For now we do not retransmit anything, while we have some new
+	 * segments to send. In the other cases, follow rule 3 for
+	 * NextSeg() specified in RFC3517.
+	 */
+
+	if (tcp_may_send_now(sk))
+		return 0;
+
+	return 1;
+}
+
 /* This gets called after a retransmit timeout, and the initially
  * retransmitted data is acknowledged.  It tries to continue
  * resending the rest of the retransmit queue, until either
@@ -2057,24 +2084,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	}
 
 	/* OK, demanded retransmission is finished. */
-
-	/* Forward retransmissions are possible only during Recovery. */
-	if (icsk->icsk_ca_state != TCP_CA_Recovery)
-		return;
-
-	/* No forward retransmissions in Reno are possible. */
-	if (tcp_is_reno(tp))
-		return;
-
-	/* Yeah, we have to make difficult choice between forward transmission
-	 * and retransmission... Both ways have their merits...
-	 *
-	 * For now we do not retransmit anything, while we have some new
-	 * segments to send. In the other cases, follow rule 3 for
-	 * NextSeg() specified in RFC3517.
-	 */
-
-	if (tcp_may_send_now(sk))
+	if (!tcp_can_forward_retransmit(sk))
 		return;
 
 	/* If nothing is SACKed, highest_sack in the loop won't be valid */
-- 
cgit v1.2.3-70-g09d2


From 34638570b58290e8cb875fb24dcbe836ffeb6cb8 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:22:17 -0700
Subject: tcp: remove obsolete validity concern
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 957c4e3d217..6f2a3f4a1af 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2087,10 +2087,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (!tcp_can_forward_retransmit(sk))
 		return;
 
-	/* If nothing is SACKed, highest_sack in the loop won't be valid */
-	if (!tp->sacked_out)
-		return;
-
 	if (tp->forward_skb_hint)
 		skb = tp->forward_skb_hint;
 	else
-- 
cgit v1.2.3-70-g09d2


From 61eb55f4db7eaf5fb2d5ec12981a8cda755bb0e1 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:22:59 -0700
Subject: tcp: Reorganize skb tagbit checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6f2a3f4a1af..2f24ecc3706 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2032,6 +2032,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
+	int mib_idx;
 
 	if (tp->retransmit_skb_hint)
 		skb = tp->retransmit_skb_hint;
@@ -2059,27 +2060,26 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 				return;
 			if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high))
 				break;
+			if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
+				continue;
 
-			if (sacked & TCPCB_LOST) {
-				if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					int mib_idx;
-
-					if (tcp_retransmit_skb(sk, skb)) {
-						tp->retransmit_skb_hint = NULL;
-						return;
-					}
-					if (icsk->icsk_ca_state != TCP_CA_Loss)
-						mib_idx = LINUX_MIB_TCPFASTRETRANS;
-					else
-						mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-					NET_INC_STATS_BH(sock_net(sk), mib_idx);
-
-					if (skb == tcp_write_queue_head(sk))
-						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-									  inet_csk(sk)->icsk_rto,
-									  TCP_RTO_MAX);
-				}
+			if (!(sacked & TCPCB_LOST))
+				continue;
+
+			if (tcp_retransmit_skb(sk, skb)) {
+				tp->retransmit_skb_hint = NULL;
+				return;
 			}
+			if (icsk->icsk_ca_state != TCP_CA_Loss)
+				mib_idx = LINUX_MIB_TCPFASTRETRANS;
+			else
+				mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+			NET_INC_STATS_BH(sock_net(sk), mib_idx);
+
+			if (skb == tcp_write_queue_head(sk))
+				inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+							  inet_csk(sk)->icsk_rto,
+							  TCP_RTO_MAX);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 08ebd1721ab8fd362e90ae17b461c07b23fa2824 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:23:49 -0700
Subject: tcp: remove tp->lost_out guard to make joining diff nicer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The validity of the retransmit_high must then be ensured
if no L'ed skb exits!

This makes a minor change to behavior, we now have to
iterate the head to find out that the loop terminates.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 75 ++++++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f24ecc3706..9f44be633ef 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2034,53 +2034,54 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct sk_buff *skb;
 	int mib_idx;
 
+	if (!tp->lost_out)
+		tp->retransmit_high = tp->snd_una;
+
 	if (tp->retransmit_skb_hint)
 		skb = tp->retransmit_skb_hint;
 	else
 		skb = tcp_write_queue_head(sk);
 
 	/* First pass: retransmit lost packets. */
-	if (tp->lost_out) {
-		tcp_for_write_queue_from(skb, sk) {
-			__u8 sacked = TCP_SKB_CB(skb)->sacked;
+	tcp_for_write_queue_from(skb, sk) {
+		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
-			if (skb == tcp_send_head(sk))
-				break;
-			/* we could do better than to assign each time */
-			tp->retransmit_skb_hint = skb;
-
-			/* Assume this retransmit will generate
-			 * only one packet for congestion window
-			 * calculation purposes.  This works because
-			 * tcp_retransmit_skb() will chop up the
-			 * packet to be MSS sized and all the
-			 * packet counting works out.
-			 */
-			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-				return;
-			if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high))
-				break;
-			if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
-				continue;
+		if (skb == tcp_send_head(sk))
+			break;
+		/* we could do better than to assign each time */
+		tp->retransmit_skb_hint = skb;
+
+		/* Assume this retransmit will generate
+		 * only one packet for congestion window
+		 * calculation purposes.  This works because
+		 * tcp_retransmit_skb() will chop up the
+		 * packet to be MSS sized and all the
+		 * packet counting works out.
+		 */
+		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+			return;
+		if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high))
+			break;
+		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
+			continue;
 
-			if (!(sacked & TCPCB_LOST))
-				continue;
+		if (!(sacked & TCPCB_LOST))
+			continue;
 
-			if (tcp_retransmit_skb(sk, skb)) {
-				tp->retransmit_skb_hint = NULL;
-				return;
-			}
-			if (icsk->icsk_ca_state != TCP_CA_Loss)
-				mib_idx = LINUX_MIB_TCPFASTRETRANS;
-			else
-				mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-			NET_INC_STATS_BH(sock_net(sk), mib_idx);
-
-			if (skb == tcp_write_queue_head(sk))
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-							  inet_csk(sk)->icsk_rto,
-							  TCP_RTO_MAX);
+		if (tcp_retransmit_skb(sk, skb)) {
+			tp->retransmit_skb_hint = NULL;
+			return;
 		}
+		if (icsk->icsk_ca_state != TCP_CA_Loss)
+			mib_idx = LINUX_MIB_TCPFASTRETRANS;
+		else
+			mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
+
+		if (skb == tcp_write_queue_head(sk))
+			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+						  inet_csk(sk)->icsk_rto,
+						  TCP_RTO_MAX);
 	}
 
 	/* OK, demanded retransmission is finished. */
-- 
cgit v1.2.3-70-g09d2


From 0e1c54c2a405494281e0639aacc90db03b50ae77 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:24:21 -0700
Subject: tcp: reorganize retransmit code loops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both loops are quite similar, so they can be combined
with little effort. As a result, forward_skb_hint becomes
obsolete as well.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 -
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_output.c | 79 +++++++++++++++++++++------------------------------
 3 files changed, 33 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d7637c4b284..76729062829 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -342,7 +342,6 @@ struct tcp_sock {
 	struct sk_buff* lost_skb_hint;
 	struct sk_buff *scoreboard_skb_hint;
 	struct sk_buff *retransmit_skb_hint;
-	struct sk_buff *forward_skb_hint;
 
 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0e90c50722..220f54cf42e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1042,7 +1042,6 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
 	tp->lost_skb_hint = NULL;
 	tp->scoreboard_skb_hint = NULL;
 	tp->retransmit_skb_hint = NULL;
-	tp->forward_skb_hint = NULL;
 }
 
 /* MD5 Signature */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9f44be633ef..b5b4ddcdda4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2032,7 +2032,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
+	struct sk_buff *hole = NULL;
 	int mib_idx;
+	int fwd_rexmitting = 0;
 
 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;
@@ -2049,7 +2051,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		if (skb == tcp_send_head(sk))
 			break;
 		/* we could do better than to assign each time */
-		tp->retransmit_skb_hint = skb;
+		if (hole == NULL)
+			tp->retransmit_skb_hint = skb;
 
 		/* Assume this retransmit will generate
 		 * only one packet for congestion window
@@ -2060,65 +2063,49 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		 */
 		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 			return;
-		if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high))
-			break;
-		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
-			continue;
-
-		if (!(sacked & TCPCB_LOST))
-			continue;
-
-		if (tcp_retransmit_skb(sk, skb)) {
-			tp->retransmit_skb_hint = NULL;
-			return;
-		}
-		if (icsk->icsk_ca_state != TCP_CA_Loss)
-			mib_idx = LINUX_MIB_TCPFASTRETRANS;
-		else
-			mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-		NET_INC_STATS_BH(sock_net(sk), mib_idx);
-
-		if (skb == tcp_write_queue_head(sk))
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-						  inet_csk(sk)->icsk_rto,
-						  TCP_RTO_MAX);
-	}
-
-	/* OK, demanded retransmission is finished. */
-	if (!tcp_can_forward_retransmit(sk))
-		return;
 
-	if (tp->forward_skb_hint)
-		skb = tp->forward_skb_hint;
-	else
-		skb = tcp_write_queue_head(sk);
+		if (fwd_rexmitting) {
+begin_fwd:
+			if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+				break;
+			mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
 
-	tcp_for_write_queue_from(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-		tp->forward_skb_hint = skb;
+		} else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
+			if (!tcp_can_forward_retransmit(sk))
+				break;
+			/* Backtrack if necessary to non-L'ed skb */
+			if (hole != NULL) {
+				skb = hole;
+				hole = NULL;
+			}
+			fwd_rexmitting = 1;
+			goto begin_fwd;
 
-		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-			break;
+		} else if (!(sacked & TCPCB_LOST)) {
+			if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS))
+				hole = skb;
+			continue;
 
-		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-			break;
+		} else {
+			if (icsk->icsk_ca_state != TCP_CA_Loss)
+				mib_idx = LINUX_MIB_TCPFASTRETRANS;
+			else
+				mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+		}
 
-		if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
-		/* Ok, retransmit it. */
 		if (tcp_retransmit_skb(sk, skb)) {
-			tp->forward_skb_hint = NULL;
-			break;
+			tp->retransmit_skb_hint = NULL;
+			return;
 		}
+		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 		if (skb == tcp_write_queue_head(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
-
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From f0ceb0ed86b4792a4ed9d3438f5f7572e48f9803 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:24:49 -0700
Subject: tcp: remove retransmit_skb_hint clearing from failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This doesn't much sense here afaict, probably never has. Since
fragmenting and collapsing deal the hints by themselves, there
should be very little reason for the rexmit loop to do that.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b5b4ddcdda4..f900fae8b87 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2096,10 +2096,8 @@ begin_fwd:
 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (tcp_retransmit_skb(sk, skb)) {
-			tp->retransmit_skb_hint = NULL;
+		if (tcp_retransmit_skb(sk, skb))
 			return;
-		}
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 		if (skb == tcp_write_queue_head(sk))
-- 
cgit v1.2.3-70-g09d2


From ef9da47c7cc64d69526331f315e76b5680d4048f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:25:15 -0700
Subject: tcp: don't clear retransmit_skb_hint when not necessary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most importantly avoid doing it with cumulative ACK. Not clearing
means that we no longer need n^2 processing in resolution of each
fast recovery.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 7 ++++++-
 net/ipv4/tcp_input.c  | 4 +++-
 net/ipv4/tcp_output.c | 8 +++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 220f54cf42e..ea815723d41 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1037,10 +1037,15 @@ static inline void tcp_mib_init(struct net *net)
 }
 
 /* from STCP */
-static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
+static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
 {
 	tp->lost_skb_hint = NULL;
 	tp->scoreboard_skb_hint = NULL;
+}
+
+static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
+{
+	tcp_clear_retrans_hints_partial(tp);
 	tp->retransmit_skb_hint = NULL;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d017aed6edd..44a4fffc2cc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2925,7 +2925,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 
 		tcp_unlink_write_queue(skb, sk);
 		sk_wmem_free_skb(sk, skb);
-		tcp_clear_all_retrans_hints(tp);
+		tcp_clear_retrans_hints_partial(tp);
+		if (skb == tp->retransmit_skb_hint)
+			tp->retransmit_skb_hint = NULL;
 	}
 
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f900fae8b87..239cea7b6c0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -750,7 +750,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	BUG_ON(len > skb->len);
 
-	tcp_clear_all_retrans_hints(tp);
+	tcp_clear_retrans_hints_partial(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -1823,7 +1823,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
 	tp->packets_out -= tcp_skb_pcount(next_skb);
 
 	/* changed transmit queue under us so clear hints */
-	tcp_clear_all_retrans_hints(tp);
+	tcp_clear_retrans_hints_partial(tp);
+	if (next_skb == tp->retransmit_skb_hint)
+		tp->retransmit_skb_hint = skb;
 
 	sk_wmem_free_skb(sk, next_skb);
 }
@@ -1853,7 +1855,7 @@ void tcp_simple_retransmit(struct sock *sk)
 		}
 	}
 
-	tcp_clear_all_retrans_hints(tp);
+	tcp_clear_retrans_hints_partial(tp);
 
 	if (prior_lost == tp->lost_out)
 		return;
-- 
cgit v1.2.3-70-g09d2


From 90638a04ad8484b6b6c567656fb3f6d0689e23da Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:25:52 -0700
Subject: tcp: don't clear lost_skb_hint when not necessary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most importantly avoid doing it with cumulative ACK. However,
since we have lost_cnt_hint in the picture as well needing
adjustments, it's not as trivial as dealing with
retransmit_skb_hint (and cannot be done in the all place we
could trivially leave retransmit_skb_hint untouched).

With the previous patch, this should mostly remove O(n^2)
behavior while cumulative ACKs start flowing once rexmit
after a lossy round-trip made it through.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 44a4fffc2cc..85627f83665 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2844,6 +2844,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 	int flag = 0;
 	u32 pkts_acked = 0;
 	u32 reord = tp->packets_out;
+	u32 prior_sacked = tp->sacked_out;
 	s32 seq_rtt = -1;
 	s32 ca_seq_rtt = -1;
 	ktime_t last_ackt = net_invalid_timestamp();
@@ -2925,9 +2926,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 
 		tcp_unlink_write_queue(skb, sk);
 		sk_wmem_free_skb(sk, skb);
-		tcp_clear_retrans_hints_partial(tp);
+		tp->scoreboard_skb_hint = NULL;
 		if (skb == tp->retransmit_skb_hint)
 			tp->retransmit_skb_hint = NULL;
+		if (skb == tp->lost_skb_hint)
+			tp->lost_skb_hint = NULL;
 	}
 
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
@@ -2946,6 +2949,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 			/* Non-retransmitted hole got filled? That's reordering */
 			if (reord < prior_fackets)
 				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+
+			/* No need to care for underflows here because
+			 * the lost_skb_hint gets NULLed if we're past it
+			 * (or something non-trivial happened)
+			 */
+			if (tcp_is_fack(tp))
+				tp->lost_cnt_hint -= pkts_acked;
+			else
+				tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
 		}
 
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-- 
cgit v1.2.3-70-g09d2


From 618d9f25548ba6fc3a9cd2ce5cd56f4f015b0635 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 20 Sep 2008 21:26:22 -0700
Subject: tcp: back retransmit_high when it over-estimated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If lost skb is sacked, we might have nothing to retransmit
as high as the retransmit_high is pointing to, so place
it lower to avoid unnecessary walking.

This is mainly for the case where high L'ed skbs gets sacked.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 239cea7b6c0..8f9793a37b6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2035,16 +2035,22 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	struct sk_buff *hole = NULL;
+	u32 last_lost;
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;
 
-	if (tp->retransmit_skb_hint)
+	if (tp->retransmit_skb_hint) {
 		skb = tp->retransmit_skb_hint;
-	else
+		last_lost = TCP_SKB_CB(skb)->end_seq;
+		if (after(last_lost, tp->retransmit_high))
+			last_lost = tp->retransmit_high;
+	} else {
 		skb = tcp_write_queue_head(sk);
+		last_lost = tp->snd_una;
+	}
 
 	/* First pass: retransmit lost packets. */
 	tcp_for_write_queue_from(skb, sk) {
@@ -2073,6 +2079,7 @@ begin_fwd:
 			mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
 
 		} else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
+			tp->retransmit_high = last_lost;
 			if (!tcp_can_forward_retransmit(sk))
 				break;
 			/* Backtrack if necessary to non-L'ed skb */
@@ -2089,6 +2096,7 @@ begin_fwd:
 			continue;
 
 		} else {
+			last_lost = TCP_SKB_CB(skb)->end_seq;
 			if (icsk->icsk_ca_state != TCP_CA_Loss)
 				mib_idx = LINUX_MIB_TCPFASTRETRANS;
 			else
-- 
cgit v1.2.3-70-g09d2


From ad55dcaff0e34269f86975ce2ea0da22e9eb74a1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Sat, 20 Sep 2008 22:05:50 -0700
Subject: netdev: simple_tx_hash shouldn't hash inside fragments

Currently simple_tx_hash is hashing inside of udp fragments.  As a result
packets are getting getting sent to all queues when they shouldn't be.
This causes a serious performance regression which can be seen by sending
UDP frames larger than mtu on multiqueue devices.  This change will make
it so that fragments are hashed only as IP datagrams w/o any protocol
information.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e719ed29310..e8eb2b47834 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -122,6 +122,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
 #include <linux/jhash.h>
@@ -1667,7 +1668,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 {
 	u32 addr1, addr2, ports;
 	u32 hash, ihl;
-	u8 ip_proto;
+	u8 ip_proto = 0;
 
 	if (unlikely(!simple_tx_hashrnd_initialized)) {
 		get_random_bytes(&simple_tx_hashrnd, 4);
@@ -1676,7 +1677,8 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		ip_proto = ip_hdr(skb)->protocol;
+		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+			ip_proto = ip_hdr(skb)->protocol;
 		addr1 = ip_hdr(skb)->saddr;
 		addr2 = ip_hdr(skb)->daddr;
 		ihl = ip_hdr(skb)->ihl;
-- 
cgit v1.2.3-70-g09d2


From a574420ff46cff0245e6b0fce2e961aa2717743d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Sat, 20 Sep 2008 22:07:34 -0700
Subject: multiq: requeue should rewind the current_band

Currently dequeueing a packet and requeueing the same packet will cause a
different packet to be pulled on the next dequeue.  This change forces
requeue to rewind the current_band.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_multiq.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 5d9cd68e91d..915f3149dde 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -97,6 +97,7 @@ static int
 multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct Qdisc *qdisc;
+	struct multiq_sched_data *q = qdisc_priv(sch);
 	int ret;
 
 	qdisc = multiq_classify(skb, sch, &ret);
@@ -113,6 +114,10 @@ multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
+		if (q->curband)
+			q->curband--;
+		else
+			q->curband = q->bands - 1;
 		return NET_XMIT_SUCCESS;
 	}
 	if (net_xmit_drop_count(ret))
-- 
cgit v1.2.3-70-g09d2


From 6067804047b64dde89f4f133fc7eba48ee44107d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Sep 2008 22:20:49 -0700
Subject: net: Use hton[sl]() instead of __constant_hton[sl]() where applicable

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c           |  2 +-
 net/atm/br2684.c               |  8 ++++----
 net/core/dev.c                 |  4 ++--
 net/ethernet/eth.c             |  2 +-
 net/ipv4/ipvs/ip_vs_core.c     |  4 ++--
 net/ipv6/ip6_tunnel.c          |  4 ++--
 net/mac80211/wme.c             |  2 +-
 net/sched/cls_flow.c           | 28 ++++++++++++++--------------
 net/sched/sch_dsmark.c         |  8 ++++----
 net/sched/sch_sfq.c            |  4 ++--
 net/sunrpc/xprtrdma/rpc_rdma.c |  4 ++--
 11 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 97688cdb550..8883e9c8a22 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -48,7 +48,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 	switch (veth->h_vlan_encapsulated_proto) {
 #ifdef CONFIG_INET
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 
 		/* TODO:  Confirm this will work with VLAN headers... */
 		return arp_find(veth->h_dest, skb);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 8d9a6f15888..280de481edc 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -375,11 +375,11 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			if (memcmp
 			    (skb->data + 6, ethertype_ipv6,
 			     sizeof(ethertype_ipv6)) == 0)
-				skb->protocol = __constant_htons(ETH_P_IPV6);
+				skb->protocol = htons(ETH_P_IPV6);
 			else if (memcmp
 				 (skb->data + 6, ethertype_ipv4,
 				  sizeof(ethertype_ipv4)) == 0)
-				skb->protocol = __constant_htons(ETH_P_IP);
+				skb->protocol = htons(ETH_P_IP);
 			else
 				goto error;
 			skb_pull(skb, sizeof(llc_oui_ipv4));
@@ -404,9 +404,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			skb_reset_network_header(skb);
 			iph = ip_hdr(skb);
 			if (iph->version == 4)
-				skb->protocol = __constant_htons(ETH_P_IP);
+				skb->protocol = htons(ETH_P_IP);
 			else if (iph->version == 6)
-				skb->protocol = __constant_htons(ETH_P_IPV6);
+				skb->protocol = htons(ETH_P_IPV6);
 			else
 				goto error;
 			skb->pkt_type = PACKET_HOST;
diff --git a/net/core/dev.c b/net/core/dev.c
index f48d1b24f9c..fdfc4b6a644 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1675,13 +1675,13 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 	}
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		ip_proto = ip_hdr(skb)->protocol;
 		addr1 = ip_hdr(skb)->saddr;
 		addr2 = ip_hdr(skb)->daddr;
 		ihl = ip_hdr(skb)->ihl;
 		break;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		ip_proto = ipv6_hdr(skb)->nexthdr;
 		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
 		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a80839b02e3..647a9edee37 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -129,7 +129,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 
 	switch (eth->h_proto) {
 #ifdef CONFIG_INET
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return arp_find(eth->h_dest, skb);
 #endif
 	default:
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index ece748dbd0c..958abf3e5f8 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -938,7 +938,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 
 	EnterFunction(11);
 
-	af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
 
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
@@ -1258,7 +1258,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_conn *cp;
 	int ret, restart, af;
 
-	af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
 
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 17c7b098cdb..64ce3d33d9c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1050,10 +1050,10 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		ret = ip4ip6_tnl_xmit(skb, dev);
 		break;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		ret = ip6ip6_tnl_xmit(skb, dev);
 		break;
 	default:
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 6748dedcab5..c703f8b44e9 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -39,7 +39,7 @@ static unsigned int classify_1d(struct sk_buff *skb)
 		return skb->priority - 256;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		dscp = ip_hdr(skb)->tos & 0xfc;
 		break;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 8f63a1a9401..0ebaff637e3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr)
 static u32 flow_get_src(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(ip_hdr(skb)->saddr);
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
 	default:
 		return addr_fold(skb->sk);
@@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb)
 static u32 flow_get_dst(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(ip_hdr(skb)->daddr);
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
 	default:
 		return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
@@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb)
 static u32 flow_get_proto(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ip_hdr(skb)->protocol;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ipv6_hdr(skb)->nexthdr;
 	default:
 		return 0;
@@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
 	u32 res = 0;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP): {
+	case htons(ETH_P_IP): {
 		struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6): {
+	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (has_ports(iph->nexthdr))
@@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 	u32 res = 0;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP): {
+	case htons(ETH_P_IP): {
 		struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6): {
+	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (has_ports(iph->nexthdr))
@@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 static u32 flow_get_nfct_src(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, src.u3.ip));
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
@@ -225,9 +225,9 @@ fallback:
 static u32 flow_get_nfct_dst(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, dst.u3.ip));
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
 	}
 fallback:
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index edd1298f85f..ba43aab3a85 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (p->set_tc_index) {
 		switch (skb->protocol) {
-		case __constant_htons(ETH_P_IP):
+		case htons(ETH_P_IP):
 			if (skb_cow_head(skb, sizeof(struct iphdr)))
 				goto drop;
 
@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				& ~INET_ECN_MASK;
 			break;
 
-		case __constant_htons(ETH_P_IPV6):
+		case htons(ETH_P_IPV6):
 			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
 				goto drop;
 
@@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	pr_debug("index %d->%d\n", skb->tc_index, index);
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 				    p->value[index]);
 			break;
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 		ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
 				    p->value[index]);
 			break;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6e041d10dbd..fe1508ef0d3 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	u32 h, h2;
 
 	switch (skb->protocol) {
-	case __constant_htons(ETH_P_IP):
+	case htons(ETH_P_IP):
 	{
 		const struct iphdr *iph = ip_hdr(skb);
 		h = iph->daddr;
@@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 			h2 ^= *(((u32*)iph) + iph->ihl);
 		break;
 	}
-	case __constant_htons(ETH_P_IPV6):
+	case htons(ETH_P_IPV6):
 	{
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		h = iph->daddr.s6_addr32[3];
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e55427f73df..5c1954d28d0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -769,7 +769,7 @@ repost:
 	/* check for expected message types */
 	/* The order of some of these tests is important. */
 	switch (headerp->rm_type) {
-	case __constant_htonl(RDMA_MSG):
+	case htonl(RDMA_MSG):
 		/* never expect read chunks */
 		/* never expect reply chunks (two ways to check) */
 		/* never expect write chunks without having offered RDMA */
@@ -802,7 +802,7 @@ repost:
 		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
 		break;
 
-	case __constant_htonl(RDMA_NOMSG):
+	case htonl(RDMA_NOMSG):
 		/* never expect read or write chunks, always reply chunks */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
-- 
cgit v1.2.3-70-g09d2


From f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 Mon Sep 17 00:00:00 2001
From: Tom Quetchenbach <virtualphtn@gmail.com>
Date: Sun, 21 Sep 2008 00:21:51 -0700
Subject: tcp: advertise MSS requested by user

I'm trying to use the TCP_MAXSEG option to setsockopt() to set the MSS
for both sides of a bidirectional connection.

man tcp says: "If this option is set before connection establishment, it
also changes the MSS value announced to the other end in the initial
packet."

However, the kernel only uses the MTU/route cache to set the advertised
MSS. That means if I set the MSS to, say, 500 before calling connect(),
I will send at most 500-byte packets, but I will still receive 1500-byte
packets in reply.

This is a bug, either in the kernel or the documentation.

This patch (applies to latest net-2.6) reduces the advertised value to
that requested by the user as long as setsockopt() is called before
connect() or accept(). This seems like the behavior that one would
expect as well as that which is documented.

I've tried to make sure that things that depend on the advertised MSS
are set correctly.

Signed-off-by: Tom Quetchenbach <virtualphtn@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c   |  4 ++++
 net/ipv4/tcp_output.c | 13 ++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3dfbc21e555..44aef1c1f37 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1364,6 +1364,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tcp_sk(sk)->rx_opt.user_mss &&
+	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
 	tcp_initialize_rcv_mss(newsk);
 
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8f9793a37b6..c3d58ee3e16 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2232,6 +2232,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	__u8 *md5_hash_location;
+	int mss;
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
 	if (skb == NULL)
@@ -2242,13 +2243,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	skb->dst = dst_clone(dst);
 
+	mss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+		mss = tp->rx_opt.user_mss;
+
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
 		__u8 rcv_wscale;
 		/* Set this up on the first call only */
 		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
 		/* tcp_full_space because it is guaranteed to be the first packet */
 		tcp_select_initial_window(tcp_full_space(sk),
-			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
@@ -2258,8 +2263,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	memset(&opts, 0, sizeof(opts));
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_header_size = tcp_synack_options(sk, req,
-					     dst_metric(dst, RTAX_ADVMSS),
+	tcp_header_size = tcp_synack_options(sk, req, mss,
 					     skb, &opts, &md5) +
 			  sizeof(struct tcphdr);
 
@@ -2333,6 +2337,9 @@ static void tcp_connect_init(struct sock *sk)
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
+		tp->advmss = tp->rx_opt.user_mss;
+
 	tcp_initialize_rcv_mss(sk);
 
 	tcp_select_initial_window(tcp_full_space(sk),
-- 
cgit v1.2.3-70-g09d2


From e6f225ebb7c35fe30fdf8608927c5cf8fce6de7d Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Fri, 19 Sep 2008 20:41:56 +0200
Subject: ipvs: Restrict sync message to 255 connections

The nr_conns variable in the sync message header is only eight bits wide
and will overflow on interfaces with a large MTU. As a result the backup
won't parse all connections contained in the sync buffer. On regular
ethernet with an MTU of 1500 this isn't a problem, because we can't
overflow the value, but consider jumbo frames being used on a cross-over
connection between both directors.

We now restrict the size of the sync buffer, so that we never put more
than 255 connections into a single sync buffer.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_sync.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 28237a5f62e..de5e7e118ee 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -30,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/kthread.h>
 #include <linux/wait.h>
+#include <linux/kernel.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
@@ -99,6 +100,7 @@ struct ip_vs_sync_thread_data {
 */
 
 #define SYNC_MESG_HEADER_LEN	4
+#define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
 
 struct ip_vs_sync_mesg {
 	__u8                    nr_conns;
@@ -516,8 +518,8 @@ static int set_sync_mesg_maxlen(int sync_state)
 		num = (dev->mtu - sizeof(struct iphdr) -
 		       sizeof(struct udphdr) -
 		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		sync_send_mesg_maxlen =
-			SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num;
+		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
 		IP_VS_DBG(7, "setting the maximum length of sync sending "
 			  "message %d.\n", sync_send_mesg_maxlen);
 	} else if (sync_state == IP_VS_STATE_BACKUP) {
-- 
cgit v1.2.3-70-g09d2


From 8d5803bf6fbe5264000afc8c34bff08e8ecc023b Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sat, 20 Sep 2008 11:48:33 +0200
Subject: ipvs: Fix unused label warning

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/ipvs/ip_vs_ctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 771551d8fba..0302cf3e503 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1330,7 +1330,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 
   out_unlock:
 	write_unlock_bh(&__ip_vs_svc_lock);
+#ifdef CONFIG_IP_VS_IPV6
   out:
+#endif
 
 	if (old_sched)
 		ip_vs_scheduler_put(old_sched);
-- 
cgit v1.2.3-70-g09d2


From 43f59c89399fd76883a06c551f24794e98409432 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 21 Sep 2008 21:28:51 -0700
Subject: net: Remove __skb_insert() calls outside of skbuff internals.

This minor cleanup simplifies later changes which will convert
struct sk_buff and friends over to using struct list_head.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cassini.c     | 2 +-
 drivers/net/ppp_generic.c | 2 +-
 drivers/net/pppol2tp.c    | 2 +-
 include/net/tcp.h         | 4 ++--
 net/ipv4/tcp_input.c      | 4 ++--
 net/sctp/ulpqueue.c       | 5 ++---
 6 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index f1936d51b45..40ff6a90d0d 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2182,7 +2182,7 @@ static inline void cas_rx_flow_pkt(struct cas *cp, const u64 *words,
 	 * do any additional locking here. stick the buffer
 	 * at the end.
 	 */
-	__skb_insert(skb, flow->prev, (struct sk_buff *) flow, flow);
+	__skb_queue_tail(flow, skb);
 	if (words[0] & RX_COMP1_RELEASE_FLOW) {
 		while ((skb = __skb_dequeue(flow))) {
 			cas_skb_release(skb);
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index ddccc074a76..98e04958fef 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1864,7 +1864,7 @@ ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb)
 	for (p = list->next; p != (struct sk_buff *)list; p = p->next)
 		if (seq_before(seq, p->sequence))
 			break;
-	__skb_insert(skb, p->prev, p, list);
+	__skb_queue_before(list, p, skb);
 }
 
 /*
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index ff175e8f36b..185b1dff10a 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -353,7 +353,7 @@ static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_
 	spin_lock_bh(&session->reorder_q.lock);
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
 		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
-			__skb_insert(skb, skbp->prev, skbp, &session->reorder_q);
+			__skb_queue_before(&session->reorder_q, skbp, skb);
 			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
 			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
 			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ea815723d41..f857c3eff71 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1268,12 +1268,12 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
 	__skb_queue_after(&sk->sk_write_queue, skb, buff);
 }
 
-/* Insert skb between prev and next on the write queue of sk.  */
+/* Insert new before skb on the write queue of sk.  */
 static inline void tcp_insert_write_queue_before(struct sk_buff *new,
 						  struct sk_buff *skb,
 						  struct sock *sk)
 {
-	__skb_insert(new, skb->prev, skb, &sk->sk_write_queue);
+	__skb_queue_before(&sk->sk_write_queue, skb, new);
 
 	if (sk->sk_send_head == skb)
 		sk->sk_send_head = new;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 85627f83665..cbfe13d5f42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4156,7 +4156,7 @@ drop:
 				skb1 = skb1->prev;
 			}
 		}
-		__skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
+		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
 
 		/* And clean segments covered by new one as whole. */
 		while ((skb1 = skb->next) !=
@@ -4254,7 +4254,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		memcpy(nskb->head, skb->head, header);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-		__skb_insert(nskb, skb->prev, skb, list);
+		__skb_queue_before(list, skb, nskb);
 		skb_set_owner_r(nskb, sk);
 
 		/* Copy data, releasing collapsed skbs. */
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 5061a26c502..7b23803343c 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -317,7 +317,7 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
 	}
 
 	/* Insert before pos. */
-	__skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm);
+	__skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
 
 }
 
@@ -825,8 +825,7 @@ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq,
 
 
 	/* Insert before pos. */
-	__skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby);
-
+	__skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
 }
 
 static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
-- 
cgit v1.2.3-70-g09d2


From 2cdc55751c33829f00510e0104562d0f8d8a9b85 Mon Sep 17 00:00:00 2001
From: Kaihui Luo <kaih.luo@gmail.com>
Date: Mon, 22 Sep 2008 19:02:36 -0700
Subject: netfilter: xt_time gives a wrong monthday in a leap year

The function localtime_3 in xt_time.c gives a wrong monthday in a leap
year after 28th 2.  calculating monthday should use the array
days_since_leapyear[] not days_since_year[] in a leap year.

Signed-off-by: Kaihui Luo <kaih.luo@gmail.com>
Acked-by: Jan Engelhardt <jengelh@computergmbh.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 9f328593287..307a2c3c2df 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -136,17 +136,19 @@ static void localtime_3(struct xtm *r, time_t time)
 	 * from w repeatedly while counting.)
 	 */
 	if (is_leap(year)) {
+		/* use days_since_leapyear[] in a leap year */
 		for (i = ARRAY_SIZE(days_since_leapyear) - 1;
-		    i > 0 && days_since_year[i] > w; --i)
+		    i > 0 && days_since_leapyear[i] > w; --i)
 			/* just loop */;
+		r->monthday = w - days_since_leapyear[i] + 1;
 	} else {
 		for (i = ARRAY_SIZE(days_since_year) - 1;
 		    i > 0 && days_since_year[i] > w; --i)
 			/* just loop */;
+		r->monthday = w - days_since_year[i] + 1;
 	}
 
 	r->month    = i + 1;
-	r->monthday = w - days_since_year[i] + 1;
 	return;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d48abfecea8513cfd2fd7e341439c1b8a28e9ff4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Mon, 22 Sep 2008 19:20:51 -0700
Subject: net: em_cmp.c use unaligned access helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/em_cmp.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index cc49c932641..bc450397487 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/tc_ematch/tc_em_cmp.h>
+#include <asm/unaligned.h>
 #include <net/pkt_cls.h>
 
 static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
@@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 			break;
 
 		case TCF_EM_ALIGN_U16:
-			val = *ptr << 8;
-			val |= *(ptr+1);
+			val = get_unaligned_be16(ptr);
 
 			if (cmp_needs_transformation(cmp))
 				val = be16_to_cpu(val);
@@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 		case TCF_EM_ALIGN_U32:
 			/* Worth checking boundries? The branching seems
 			 * to get worse. Visit again. */
-			val = *ptr << 24;
-			val |= *(ptr+1) << 16;
-			val |= *(ptr+2) << 8;
-			val |= *(ptr+3);
+			val = get_unaligned_be32(ptr);
 
 			if (cmp_needs_transformation(cmp))
 				val = be32_to_cpu(val);
-- 
cgit v1.2.3-70-g09d2


From 5e687220a047dc4f0c2fb9ce886359a23075ddbc Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 22 Sep 2008 19:24:45 -0700
Subject: net/atm/lec.c: drop code after return

The break after the return serves no purpose.

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5799fb52365..8f701cde594 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1931,7 +1931,6 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		switch (priv->lane_version) {
 		case 1:
 			return priv->mcast_vcc;
-			break;
 		case 2:	/* LANE2 wants arp for multicast addresses */
 			if (!compare_ether_addr(mac_to_find, bus_mac))
 				return priv->mcast_vcc;
-- 
cgit v1.2.3-70-g09d2


From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 22 Sep 2008 19:48:19 -0700
Subject: ipsec: Fix xfrm_state_walk race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As discovered by Timo Teräs, the currently xfrm_state_walk scheme
is racy because if a second dump finishes before the first, we
may free xfrm states that the first dump would walk over later.

This patch fixes this by storing the dumps in a list in order
to calculate the correct completion counter which cures this
problem.

I've expanded netlink_cb in order to accomodate the extra state
related to this.  It shouldn't be a big deal since netlink_cb
is kmalloced for each dump and we're just increasing it by 4 or
8 bytes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h |  2 +-
 include/net/xfrm.h      | 10 +++-------
 net/xfrm/xfrm_state.c   | 39 ++++++++++++++++++++++++++++++---------
 3 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9ff1b54908f..cbba7760545 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -220,7 +220,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[6];
+	long		args[7];
 };
 
 struct netlink_notify
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4bb94992b5f..48630b26659 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1246,6 +1246,8 @@ struct xfrm6_tunnel {
 };
 
 struct xfrm_state_walk {
+	struct list_head list;
+	unsigned long genid;
 	struct xfrm_state *state;
 	int count;
 	u8 proto;
@@ -1281,13 +1283,7 @@ static inline void xfrm6_fini(void)
 extern int xfrm_proc_init(void);
 #endif
 
-static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
-{
-	walk->proto = proto;
-	walk->state = NULL;
-	walk->count = 0;
-}
-
+extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
 extern int xfrm_state_walk(struct xfrm_state_walk *walk,
 			   int (*func)(struct xfrm_state *, int, void*), void *);
 extern void xfrm_state_walk_done(struct xfrm_state_walk *walk);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index abbe2702c40..053970e8765 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -64,6 +64,9 @@ static unsigned long xfrm_state_walk_ongoing;
 /* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
 static unsigned long xfrm_state_walk_completed;
 
+/* List of outstanding state walks used to set the completed counter.  */
+static LIST_HEAD(xfrm_state_walks);
+
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -1584,7 +1587,6 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 			if (err) {
 				xfrm_state_hold(last);
 				walk->state = last;
-				xfrm_state_walk_ongoing++;
 				goto out;
 			}
 		}
@@ -1599,25 +1601,44 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 		err = func(last, 0, data);
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	if (old != NULL) {
+	if (old != NULL)
 		xfrm_state_put(old);
-		xfrm_state_walk_completed++;
-		if (!list_empty(&xfrm_state_gc_leftovers))
-			schedule_work(&xfrm_state_gc_work);
-	}
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+{
+	walk->proto = proto;
+	walk->state = NULL;
+	walk->count = 0;
+	list_add_tail(&walk->list, &xfrm_state_walks);
+	walk->genid = ++xfrm_state_walk_ongoing;
+}
+EXPORT_SYMBOL(xfrm_state_walk_init);
+
 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
 {
+	struct list_head *prev;
+
 	if (walk->state != NULL) {
 		xfrm_state_put(walk->state);
 		walk->state = NULL;
-		xfrm_state_walk_completed++;
-		if (!list_empty(&xfrm_state_gc_leftovers))
-			schedule_work(&xfrm_state_gc_work);
 	}
+
+	prev = walk->list.prev;
+	list_del(&walk->list);
+
+	if (prev != &xfrm_state_walks) {
+		list_entry(prev, struct xfrm_state_walk, list)->genid =
+			walk->genid;
+		return;
+	}
+
+	xfrm_state_walk_completed = walk->genid;
+
+	if (!list_empty(&xfrm_state_gc_leftovers))
+		schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(xfrm_state_walk_done);
 
-- 
cgit v1.2.3-70-g09d2


From bce7b15426cac3000bf6a9cf59d9356ef0be2dec Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 19:51:15 -0700
Subject: Phonet: global definitions

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h  |   1 +
 include/linux/if_phonet.h |  14 ++++++
 include/linux/phonet.h    | 125 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h |   4 ++
 include/linux/socket.h    |   4 +-
 net/core/sock.c           |   9 ++--
 6 files changed, 153 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/if_phonet.h
 create mode 100644 include/linux/phonet.h

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 5028e0b6082..723a1c5fbc6 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -100,6 +100,7 @@
 #define ETH_P_ECONET	0x0018		/* Acorn Econet			*/
 #define ETH_P_HDLC	0x0019		/* HDLC frames			*/
 #define ETH_P_ARCNET	0x001A		/* 1A for ArcNet :-)            */
+#define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 
 /*
  *	This is an Ethernet frame header.
diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h
new file mode 100644
index 00000000000..22df25fbc4e
--- /dev/null
+++ b/include/linux/if_phonet.h
@@ -0,0 +1,14 @@
+/*
+ * File: if_phonet.h
+ *
+ * Phonet interface kernel definitions
+ *
+ * Copyright (C) 2008 Nokia Corporation. All rights reserved.
+ */
+
+#define PHONET_HEADER_LEN	8	/* Phonet header length */
+
+#define PHONET_MIN_MTU		6
+/* 6 bytes header + 65535 bytes payload */
+#define PHONET_MAX_MTU		65541
+#define PHONET_DEV_MTU		PHONET_MAX_MTU
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
new file mode 100644
index 00000000000..6a764f8584a
--- /dev/null
+++ b/include/linux/phonet.h
@@ -0,0 +1,125 @@
+/**
+ * file phonet.h
+ *
+ * Phonet sockets kernel interface
+ *
+ * Copyright (C) 2008 Nokia Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef LINUX_PHONET_H
+#define LINUX_PHONET_H
+
+/* Automatic protocol selection */
+#define PN_PROTO_TRANSPORT	0
+/* Phonet datagram socket */
+#define PN_PROTO_PHONET		1
+#define PHONET_NPROTO		2
+
+#define PNADDR_ANY		0
+#define PNPORT_RESOURCE_ROUTING	0
+
+/* Phonet protocol header */
+struct phonethdr {
+	__u8	pn_rdev;
+	__u8	pn_sdev;
+	__u8	pn_res;
+	__be16	pn_length;
+	__u8	pn_robj;
+	__u8	pn_sobj;
+} __attribute__((packed));
+
+/* Phonet socket address structure */
+struct sockaddr_pn {
+	sa_family_t spn_family;
+	__u8 spn_obj;
+	__u8 spn_dev;
+	__u8 spn_resource;
+	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
+} __attribute__ ((packed));
+
+static inline __u16 pn_object(__u8 addr, __u16 port)
+{
+	return (addr << 8) | (port & 0x3ff);
+}
+
+static inline __u8 pn_obj(__u16 handle)
+{
+	return handle & 0xff;
+}
+
+static inline __u8 pn_dev(__u16 handle)
+{
+	return handle >> 8;
+}
+
+static inline __u16 pn_port(__u16 handle)
+{
+	return handle & 0x3ff;
+}
+
+static inline __u8 pn_addr(__u16 handle)
+{
+	return (handle >> 8) & 0xfc;
+}
+
+static inline void pn_sockaddr_set_addr(struct sockaddr_pn *spn, __u8 addr)
+{
+	spn->spn_dev &= 0x03;
+	spn->spn_dev |= addr & 0xfc;
+}
+
+static inline void pn_sockaddr_set_port(struct sockaddr_pn *spn, __u16 port)
+{
+	spn->spn_dev &= 0xfc;
+	spn->spn_dev |= (port >> 8) & 0x03;
+	spn->spn_obj = port & 0xff;
+}
+
+static inline void pn_sockaddr_set_object(struct sockaddr_pn *spn,
+						__u16 handle)
+{
+	spn->spn_dev = pn_dev(handle);
+	spn->spn_obj = pn_obj(handle);
+}
+
+static inline void pn_sockaddr_set_resource(struct sockaddr_pn *spn,
+						__u8 resource)
+{
+	spn->spn_resource = resource;
+}
+
+static inline __u8 pn_sockaddr_get_addr(const struct sockaddr_pn *spn)
+{
+	return spn->spn_dev & 0xfc;
+}
+
+static inline __u16 pn_sockaddr_get_port(const struct sockaddr_pn *spn)
+{
+	return ((spn->spn_dev & 0x03) << 8) | spn->spn_obj;
+}
+
+static inline __u16 pn_sockaddr_get_object(const struct sockaddr_pn *spn)
+{
+	return pn_object(spn->spn_dev, spn->spn_obj);
+}
+
+static inline __u8 pn_sockaddr_get_resource(const struct sockaddr_pn *spn)
+{
+	return spn->spn_resource;
+}
+
+#endif
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ca643b13b02..2b3d51c6ec9 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -582,6 +582,10 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV6_RULE	RTNLGRP_IPV6_RULE
 	RTNLGRP_ND_USEROPT,
 #define RTNLGRP_ND_USEROPT	RTNLGRP_ND_USEROPT
+	RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR	RTNLGRP_PHONET_IFADDR
+	RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE	RTNLGRP_PHONET_ROUTE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/include/linux/socket.h b/include/linux/socket.h
index dc5086fe773..818ca33bf79 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -190,7 +190,8 @@ struct ucred {
 #define AF_IUCV		32	/* IUCV sockets			*/
 #define AF_RXRPC	33	/* RxRPC sockets 		*/
 #define AF_ISDN		34	/* mISDN sockets 		*/
-#define AF_MAX		35	/* For now.. */
+#define AF_PHONET	35	/* Phonet sockets		*/
+#define AF_MAX		36	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -227,6 +228,7 @@ struct ucred {
 #define PF_IUCV		AF_IUCV
 #define PF_RXRPC	AF_RXRPC
 #define PF_ISDN		AF_ISDN
+#define PF_PHONET	AF_PHONET
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/net/core/sock.c b/net/core/sock.c
index 23b8b9da36b..2d358dd8a03 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_MAX"
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
+  "sk_lock-AF_MAX"
 };
 static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -168,7 +169,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_MAX"
+  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
+  "slock-AF_MAX"
 };
 static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
@@ -182,7 +184,8 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_MAX"
+  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
+  "clock-AF_MAX"
 };
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 4b07b3f69a8471cdc142c51461a331226fef248a Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:02:10 -0700
Subject: Phonet: PF_PHONET protocol family support

This is the basis for the Phonet protocol families, and introduces
the ETH_P_PHONET packet type and the PF_PHONET socket family.

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |  74 +++++++++++++++
 net/phonet/af_phonet.c      | 216 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 290 insertions(+)
 create mode 100644 include/net/phonet/phonet.h
 create mode 100644 net/phonet/af_phonet.c

(limited to 'net')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
new file mode 100644
index 00000000000..c53f2abc059
--- /dev/null
+++ b/include/net/phonet/phonet.h
@@ -0,0 +1,74 @@
+/*
+ * File: af_phonet.h
+ *
+ * Phonet sockets kernel definitions
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef AF_PHONET_H
+#define AF_PHONET_H
+
+/*
+ * The lower layers may not require more space, ever. Make sure it's
+ * enough.
+ */
+#define MAX_PHONET_HEADER	8
+
+static inline struct phonethdr *pn_hdr(struct sk_buff *skb)
+{
+	return (struct phonethdr *)skb_network_header(skb);
+}
+
+/*
+ * Get the other party's sockaddr from received skb. The skb begins
+ * with a Phonet header.
+ */
+static inline
+void pn_skb_get_src_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa)
+{
+	struct phonethdr *ph = pn_hdr(skb);
+	u16 obj = pn_object(ph->pn_sdev, ph->pn_sobj);
+
+	sa->spn_family = AF_PHONET;
+	pn_sockaddr_set_object(sa, obj);
+	pn_sockaddr_set_resource(sa, ph->pn_res);
+	memset(sa->spn_zero, 0, sizeof(sa->spn_zero));
+}
+
+static inline
+void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa)
+{
+	struct phonethdr *ph = pn_hdr(skb);
+	u16 obj = pn_object(ph->pn_rdev, ph->pn_robj);
+
+	sa->spn_family = AF_PHONET;
+	pn_sockaddr_set_object(sa, obj);
+	pn_sockaddr_set_resource(sa, ph->pn_res);
+	memset(sa->spn_zero, 0, sizeof(sa->spn_zero));
+}
+
+/* Protocols in Phonet protocol family. */
+struct phonet_protocol {
+	struct proto		*prot;
+	int			sock_type;
+};
+
+int phonet_proto_register(int protocol, struct phonet_protocol *pp);
+void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
+
+#endif
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
new file mode 100644
index 00000000000..0cfea9bc994
--- /dev/null
+++ b/net/phonet/af_phonet.c
@@ -0,0 +1,216 @@
+/*
+ * File: af_phonet.c
+ *
+ * Phonet protocols family
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Original author: Sakari Ailus <sakari.ailus@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <net/sock.h>
+
+#include <linux/if_phonet.h>
+#include <linux/phonet.h>
+#include <net/phonet/phonet.h>
+
+static struct net_proto_family phonet_proto_family;
+static struct phonet_protocol *phonet_proto_get(int protocol);
+static inline void phonet_proto_put(struct phonet_protocol *pp);
+
+/* protocol family functions */
+
+static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
+{
+	struct phonet_protocol *pnp;
+	int err;
+
+	if (net != &init_net)
+		return -EAFNOSUPPORT;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (protocol == 0) {
+		/* Default protocol selection */
+		switch (sock->type) {
+		case SOCK_DGRAM:
+			protocol = PN_PROTO_PHONET;
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+	}
+
+	pnp = phonet_proto_get(protocol);
+	if (pnp == NULL)
+		return -EPROTONOSUPPORT;
+	if (sock->type != pnp->sock_type) {
+		err = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	/* TODO: create and init the struct sock */
+	err = -EPROTONOSUPPORT;
+
+out:
+	phonet_proto_put(pnp);
+	return err;
+}
+
+static struct net_proto_family phonet_proto_family = {
+	.family = AF_PHONET,
+	.create = pn_socket_create,
+	.owner = THIS_MODULE,
+};
+
+/* packet type functions */
+
+/*
+ * Stuff received packets to associated sockets.
+ * On error, returns non-zero and releases the skb.
+ */
+static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
+			struct packet_type *pkttype,
+			struct net_device *orig_dev)
+{
+	struct phonethdr *ph;
+	struct sockaddr_pn sa;
+	u16 len;
+
+	if (dev_net(dev) != &init_net)
+		goto out;
+
+	/* check we have at least a full Phonet header */
+	if (!pskb_pull(skb, sizeof(struct phonethdr)))
+		goto out;
+
+	/* check that the advertised length is correct */
+	ph = pn_hdr(skb);
+	len = get_unaligned_be16(&ph->pn_length);
+	if (len < 2)
+		goto out;
+	len -= 2;
+	if ((len > skb->len) || pskb_trim(skb, len))
+		goto out;
+	skb_reset_transport_header(skb);
+
+	pn_skb_get_dst_sockaddr(skb, &sa);
+	if (pn_sockaddr_get_addr(&sa) == 0)
+		goto out; /* currently, we cannot be device 0 */
+
+	/* TODO: put packets to sockets backlog */
+
+out:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static struct packet_type phonet_packet_type = {
+	.type = __constant_htons(ETH_P_PHONET),
+	.dev = NULL,
+	.func = phonet_rcv,
+};
+
+/* Transport protocol registration */
+static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static DEFINE_SPINLOCK(proto_tab_lock);
+
+int __init_or_module phonet_proto_register(int protocol,
+						struct phonet_protocol *pp)
+{
+	int err = 0;
+
+	if (protocol >= PHONET_NPROTO)
+		return -EINVAL;
+
+	err = proto_register(pp->prot, 1);
+	if (err)
+		return err;
+
+	spin_lock(&proto_tab_lock);
+	if (proto_tab[protocol])
+		err = -EBUSY;
+	else
+		proto_tab[protocol] = pp;
+	spin_unlock(&proto_tab_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(phonet_proto_register);
+
+void phonet_proto_unregister(int protocol, struct phonet_protocol *pp)
+{
+	spin_lock(&proto_tab_lock);
+	BUG_ON(proto_tab[protocol] != pp);
+	proto_tab[protocol] = NULL;
+	spin_unlock(&proto_tab_lock);
+	proto_unregister(pp->prot);
+}
+EXPORT_SYMBOL(phonet_proto_unregister);
+
+static struct phonet_protocol *phonet_proto_get(int protocol)
+{
+	struct phonet_protocol *pp;
+
+	if (protocol >= PHONET_NPROTO)
+		return NULL;
+
+	spin_lock(&proto_tab_lock);
+	pp = proto_tab[protocol];
+	if (pp && !try_module_get(pp->prot->owner))
+		pp = NULL;
+	spin_unlock(&proto_tab_lock);
+
+	return pp;
+}
+
+static inline void phonet_proto_put(struct phonet_protocol *pp)
+{
+	module_put(pp->prot->owner);
+}
+
+/* Module registration */
+static int __init phonet_init(void)
+{
+	int err;
+
+	err = sock_register(&phonet_proto_family);
+	if (err) {
+		printk(KERN_ALERT
+			"phonet protocol family initialization failed\n");
+		return err;
+	}
+
+	dev_add_pack(&phonet_packet_type);
+	return 0;
+}
+
+static void __exit phonet_exit(void)
+{
+	sock_unregister(AF_PHONET);
+	dev_remove_pack(&phonet_packet_type);
+}
+
+module_init(phonet_init);
+module_exit(phonet_exit);
+MODULE_DESCRIPTION("Phonet protocol stack for Linux");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From 8ead536dec142f27d5b5f72c3994eb39f4741717 Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:03:00 -0700
Subject: Phonet: add CONFIG_PHONET

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig         |  1 +
 net/Makefile        |  1 +
 net/phonet/Kconfig  | 16 ++++++++++++++++
 net/phonet/Makefile |  4 ++++
 4 files changed, 22 insertions(+)
 create mode 100644 net/phonet/Kconfig
 create mode 100644 net/phonet/Makefile

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index d87de48ba65..9103a16a77b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/can/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
+source "net/phonet/Kconfig"
 
 config FIB_RULES
 	bool
diff --git a/net/Makefile b/net/Makefile
index 4f43e7f874f..acaf819f24a 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
+obj-$(CONFIG_PHONET)		+= phonet/
 ifneq ($(CONFIG_VLAN_8021Q),)
 obj-y				+= 8021q/
 endif
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
new file mode 100644
index 00000000000..51a5669573f
--- /dev/null
+++ b/net/phonet/Kconfig
@@ -0,0 +1,16 @@
+#
+# Phonet protocol
+#
+
+config PHONET
+	tristate "Phonet protocols family"
+	help
+	  The Phone Network protocol (PhoNet) is a packet-oriented
+	  communication protocol developped by Nokia for use with its modems.
+
+	  This is required for Maemo to use cellular data connectivity (if
+	  supported). It can also be used to control Nokia phones
+	  from a Linux computer, although AT commands may be easier to use.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called phonet. If unsure, say N.
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
new file mode 100644
index 00000000000..5dbff68a6f3
--- /dev/null
+++ b/net/phonet/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_PHONET) += phonet.o
+
+phonet-objs := \
+	af_phonet.o
-- 
cgit v1.2.3-70-g09d2


From f8ff60283de2b6775d7a14619056a08e3083bd40 Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:03:44 -0700
Subject: Phonet: network device and address handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides support for adding Phonet addresses to and removing
Phonet addresses from network devices.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  50 +++++++++++
 net/phonet/Makefile         |   1 +
 net/phonet/af_phonet.c      |   3 +
 net/phonet/pn_dev.c         | 208 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 262 insertions(+)
 create mode 100644 include/net/phonet/pn_dev.h
 create mode 100644 net/phonet/pn_dev.c

(limited to 'net')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
new file mode 100644
index 00000000000..bbd2a836e04
--- /dev/null
+++ b/include/net/phonet/pn_dev.h
@@ -0,0 +1,50 @@
+/*
+ * File: pn_dev.h
+ *
+ * Phonet network device
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef PN_DEV_H
+#define PN_DEV_H
+
+struct phonet_device_list {
+	struct list_head list;
+	spinlock_t lock;
+};
+
+extern struct phonet_device_list pndevs;
+
+struct phonet_device {
+	struct list_head list;
+	struct net_device *netdev;
+	DECLARE_BITMAP(addrs, 64);
+};
+
+void phonet_device_init(void);
+void phonet_device_exit(void);
+struct net_device *phonet_device_get(struct net *net);
+
+int phonet_address_add(struct net_device *dev, u8 addr);
+int phonet_address_del(struct net_device *dev, u8 addr);
+u8 phonet_address_get(struct net_device *dev, u8 addr);
+int phonet_address_lookup(u8 addr);
+
+#define PN_NO_ADDR	0xff
+
+#endif
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index 5dbff68a6f3..980a3866c9a 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_PHONET) += phonet.o
 
 phonet-objs := \
+	pn_dev.o \
 	af_phonet.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 0cfea9bc994..a8ba6f177b2 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -31,6 +31,7 @@
 #include <linux/if_phonet.h>
 #include <linux/phonet.h>
 #include <net/phonet/phonet.h>
+#include <net/phonet/pn_dev.h>
 
 static struct net_proto_family phonet_proto_family;
 static struct phonet_protocol *phonet_proto_get(int protocol);
@@ -200,6 +201,7 @@ static int __init phonet_init(void)
 		return err;
 	}
 
+	phonet_device_init();
 	dev_add_pack(&phonet_packet_type);
 	return 0;
 }
@@ -208,6 +210,7 @@ static void __exit phonet_exit(void)
 {
 	sock_unregister(AF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
+	phonet_device_exit();
 }
 
 module_init(phonet_init);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
new file mode 100644
index 00000000000..53be9fc82aa
--- /dev/null
+++ b/net/phonet/pn_dev.c
@@ -0,0 +1,208 @@
+/*
+ * File: pn_dev.c
+ *
+ * Phonet network device
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Original author: Sakari Ailus <sakari.ailus@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/phonet.h>
+#include <net/sock.h>
+#include <net/phonet/pn_dev.h>
+
+/* when accessing, remember to lock with spin_lock(&pndevs.lock); */
+struct phonet_device_list pndevs = {
+	.list = LIST_HEAD_INIT(pndevs.list),
+	.lock = __SPIN_LOCK_UNLOCKED(pndevs.lock),
+};
+
+/* Allocate new Phonet device. */
+static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
+{
+	struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC);
+	if (pnd == NULL)
+		return NULL;
+	pnd->netdev = dev;
+	bitmap_zero(pnd->addrs, 64);
+
+	list_add(&pnd->list, &pndevs.list);
+	return pnd;
+}
+
+static struct phonet_device *__phonet_get(struct net_device *dev)
+{
+	struct phonet_device *pnd;
+
+	list_for_each_entry(pnd, &pndevs.list, list) {
+		if (pnd->netdev == dev)
+			return pnd;
+	}
+	return NULL;
+}
+
+static void __phonet_device_free(struct phonet_device *pnd)
+{
+	list_del(&pnd->list);
+	kfree(pnd);
+}
+
+struct net_device *phonet_device_get(struct net *net)
+{
+	struct phonet_device *pnd;
+	struct net_device *dev;
+
+	spin_lock_bh(&pndevs.lock);
+	list_for_each_entry(pnd, &pndevs.list, list) {
+		dev = pnd->netdev;
+		BUG_ON(!dev);
+
+		if (dev_net(dev) == net &&
+			(dev->reg_state == NETREG_REGISTERED) &&
+			((pnd->netdev->flags & IFF_UP)) == IFF_UP)
+			break;
+		dev = NULL;
+	}
+	if (dev)
+		dev_hold(dev);
+	spin_unlock_bh(&pndevs.lock);
+	return dev;
+}
+
+int phonet_address_add(struct net_device *dev, u8 addr)
+{
+	struct phonet_device *pnd;
+	int err = 0;
+
+	spin_lock_bh(&pndevs.lock);
+	/* Find or create Phonet-specific device data */
+	pnd = __phonet_get(dev);
+	if (pnd == NULL)
+		pnd = __phonet_device_alloc(dev);
+	if (unlikely(pnd == NULL))
+		err = -ENOMEM;
+	else if (test_and_set_bit(addr >> 2, pnd->addrs))
+		err = -EEXIST;
+	spin_unlock_bh(&pndevs.lock);
+	return err;
+}
+
+int phonet_address_del(struct net_device *dev, u8 addr)
+{
+	struct phonet_device *pnd;
+	int err = 0;
+
+	spin_lock_bh(&pndevs.lock);
+	pnd = __phonet_get(dev);
+	if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs))
+		err = -EADDRNOTAVAIL;
+	if (bitmap_empty(pnd->addrs, 64))
+		__phonet_device_free(pnd);
+	spin_unlock_bh(&pndevs.lock);
+	return err;
+}
+
+/* Gets a source address toward a destination, through a interface. */
+u8 phonet_address_get(struct net_device *dev, u8 addr)
+{
+	struct phonet_device *pnd;
+
+	spin_lock_bh(&pndevs.lock);
+	pnd = __phonet_get(dev);
+	if (pnd) {
+		BUG_ON(bitmap_empty(pnd->addrs, 64));
+
+		/* Use same source address as destination, if possible */
+		if (!test_bit(addr >> 2, pnd->addrs))
+			addr = find_first_bit(pnd->addrs, 64) << 2;
+	} else
+		addr = PN_NO_ADDR;
+	spin_unlock_bh(&pndevs.lock);
+	return addr;
+}
+
+int phonet_address_lookup(u8 addr)
+{
+	struct phonet_device *pnd;
+
+	spin_lock_bh(&pndevs.lock);
+	list_for_each_entry(pnd, &pndevs.list, list) {
+		/* Don't allow unregistering devices! */
+		if ((pnd->netdev->reg_state != NETREG_REGISTERED) ||
+				((pnd->netdev->flags & IFF_UP)) != IFF_UP)
+			continue;
+
+		if (test_bit(addr >> 2, pnd->addrs)) {
+			spin_unlock_bh(&pndevs.lock);
+			return 0;
+		}
+	}
+	spin_unlock_bh(&pndevs.lock);
+	return -EADDRNOTAVAIL;
+}
+
+/* notify Phonet of device events */
+static int phonet_device_notify(struct notifier_block *me, unsigned long what,
+				void *arg)
+{
+	struct net_device *dev = arg;
+
+	if (what == NETDEV_UNREGISTER) {
+		struct phonet_device *pnd;
+
+		/* Destroy phonet-specific device data */
+		spin_lock_bh(&pndevs.lock);
+		pnd = __phonet_get(dev);
+		if (pnd)
+			__phonet_device_free(pnd);
+		spin_unlock_bh(&pndevs.lock);
+	}
+	return 0;
+
+}
+
+static struct notifier_block phonet_device_notifier = {
+	.notifier_call = phonet_device_notify,
+	.priority = 0,
+};
+
+/* Initialize Phonet devices list */
+void phonet_device_init(void)
+{
+	register_netdevice_notifier(&phonet_device_notifier);
+}
+
+void phonet_device_exit(void)
+{
+	struct phonet_device *pnd, *n;
+
+	rtnl_unregister_all(PF_PHONET);
+	rtnl_lock();
+	spin_lock_bh(&pndevs.lock);
+
+	list_for_each_entry_safe(pnd, n, &pndevs.list, list)
+		__phonet_device_free(pnd);
+
+	spin_unlock_bh(&pndevs.lock);
+	rtnl_unlock();
+	unregister_netdevice_notifier(&phonet_device_notifier);
+}
-- 
cgit v1.2.3-70-g09d2


From 8fb397406f6470f79040c41eec49af20900a9e3b Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:04:30 -0700
Subject: Phonet: Netlink interface

This provides support for configuring Phonet addresses, notifying
Phonet configuration changes, and dumping the configuration.

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |   1 +
 net/phonet/Makefile         |   1 +
 net/phonet/af_phonet.c      |   1 +
 net/phonet/pn_netlink.c     | 186 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 189 insertions(+)
 create mode 100644 net/phonet/pn_netlink.c

(limited to 'net')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index c53f2abc059..8b777943d20 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -71,4 +71,5 @@ struct phonet_protocol {
 int phonet_proto_register(int protocol, struct phonet_protocol *pp);
 void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
 
+void phonet_netlink_register(void);
 #endif
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index 980a3866c9a..4143c3e1dfd 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -2,4 +2,5 @@ obj-$(CONFIG_PHONET) += phonet.o
 
 phonet-objs := \
 	pn_dev.o \
+	pn_netlink.o \
 	af_phonet.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a8ba6f177b2..5c729ba5693 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -203,6 +203,7 @@ static int __init phonet_init(void)
 
 	phonet_device_init();
 	dev_add_pack(&phonet_packet_type);
+	phonet_netlink_register();
 	return 0;
 }
 
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
new file mode 100644
index 00000000000..b1ea19a230d
--- /dev/null
+++ b/net/phonet/pn_netlink.c
@@ -0,0 +1,186 @@
+/*
+ * File: pn_netlink.c
+ *
+ * Phonet netlink interface
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Original author: Sakari Ailus <sakari.ailus@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/phonet.h>
+#include <net/sock.h>
+#include <net/phonet/pn_dev.h>
+
+static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
+		     u32 pid, u32 seq, int event);
+
+static void rtmsg_notify(int event, struct net_device *dev, u8 addr)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+			nla_total_size(1), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+	err = fill_addr(skb, dev, addr, 0, 0, event);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	err = rtnl_notify(skb, dev_net(dev), 0,
+			  RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
+}
+
+static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr)
+{
+	struct rtattr **rta = attr;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlm);
+	struct net_device *dev;
+	int err;
+	u8 pnaddr;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ASSERT_RTNL();
+
+	if (rta[IFA_LOCAL - 1] == NULL)
+		return -EINVAL;
+
+	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+	if (dev == NULL)
+		return -ENODEV;
+
+	if (ifm->ifa_prefixlen > 0)
+		return -EINVAL;
+
+	memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1);
+
+	err = phonet_address_add(dev, pnaddr);
+	if (!err)
+		rtmsg_notify(RTM_NEWADDR, dev, pnaddr);
+	return err;
+}
+
+static int deladdr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr)
+{
+	struct rtattr **rta = attr;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlm);
+	struct net_device *dev;
+	int err;
+	u8 pnaddr;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ASSERT_RTNL();
+
+	if (rta[IFA_LOCAL - 1] == NULL)
+		return -EINVAL;
+
+	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+	if (dev == NULL)
+		return -ENODEV;
+
+	if (ifm->ifa_prefixlen > 0)
+		return -EADDRNOTAVAIL;
+
+	memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1);
+
+	err = phonet_address_del(dev, pnaddr);
+	if (!err)
+		rtmsg_notify(RTM_DELADDR, dev, pnaddr);
+	return err;
+}
+
+static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
+			u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr *nlh;
+	unsigned int orig_len = skb->len;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct ifaddrmsg));
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_PHONET;
+	ifm->ifa_prefixlen = 0;
+	ifm->ifa_flags = IFA_F_PERMANENT;
+	ifm->ifa_scope = RT_SCOPE_HOST;
+	ifm->ifa_index = dev->ifindex;
+	RTA_PUT(skb, IFA_LOCAL, 1, &addr);
+	nlh->nlmsg_len = skb->len - orig_len;
+
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, orig_len);
+
+	return -1;
+}
+
+static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct phonet_device *pnd;
+	int dev_idx = 0, dev_start_idx = cb->args[0];
+	int addr_idx = 0, addr_start_idx = cb->args[1];
+
+	spin_lock_bh(&pndevs.lock);
+	list_for_each_entry(pnd, &pndevs.list, list) {
+		u8 addr;
+
+		if (dev_idx > dev_start_idx)
+			addr_start_idx = 0;
+		if (dev_idx++ < dev_start_idx)
+			continue;
+
+		addr_idx = 0;
+		for (addr = find_first_bit(pnd->addrs, 64); addr < 64;
+			addr = find_next_bit(pnd->addrs, 64, 1+addr)) {
+			if (addr_idx++ < addr_start_idx)
+				continue;
+
+			if (fill_addr(skb, pnd->netdev, addr << 2,
+					 NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, RTM_NEWADDR))
+				goto out;
+		}
+	}
+
+out:
+	spin_unlock_bh(&pndevs.lock);
+	cb->args[0] = dev_idx;
+	cb->args[1] = addr_idx;
+
+	return skb->len;
+}
+
+void __init phonet_netlink_register(void)
+{
+	rtnl_register(PF_PHONET, RTM_NEWADDR, newaddr_doit, NULL);
+	rtnl_register(PF_PHONET, RTM_DELADDR, deladdr_doit, NULL);
+	rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
+}
-- 
cgit v1.2.3-70-g09d2


From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:05:19 -0700
Subject: Phonet: common socket glue

This provides the socket API for the Phonet protocols family.

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h      |   3 +
 include/net/phonet/phonet.h |  23 ++++
 net/phonet/Makefile         |   1 +
 net/phonet/af_phonet.c      |  28 +++-
 net/phonet/socket.c         | 311 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 363 insertions(+), 3 deletions(-)
 create mode 100644 net/phonet/socket.c

(limited to 'net')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 6a764f8584a..001c0e67909 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -32,6 +32,9 @@
 #define PNADDR_ANY		0
 #define PNPORT_RESOURCE_ROUTING	0
 
+/* ioctls */
+#define SIOCPNGETOBJECT		(SIOCPROTOPRIVATE + 0)
+
 /* Phonet protocol header */
 struct phonethdr {
 	__u8	pn_rdev;
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 8b777943d20..2ae5cbb59b6 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -29,6 +29,28 @@
  */
 #define MAX_PHONET_HEADER	8
 
+/*
+ * Every Phonet* socket has this structure first in its
+ * protocol-specific structure under name c.
+ */
+struct pn_sock {
+	struct sock	sk;
+	u16		sobject;
+	u8		resource;
+};
+
+static inline struct pn_sock *pn_sk(struct sock *sk)
+{
+	return (struct pn_sock *)sk;
+}
+
+extern const struct proto_ops phonet_dgram_ops;
+
+struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa);
+void pn_sock_hash(struct sock *sk);
+void pn_sock_unhash(struct sock *sk);
+int pn_sock_get_port(struct sock *sk, unsigned short sport);
+
 static inline struct phonethdr *pn_hdr(struct sk_buff *skb)
 {
 	return (struct phonethdr *)skb_network_header(skb);
@@ -64,6 +86,7 @@ void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa)
 
 /* Protocols in Phonet protocol family. */
 struct phonet_protocol {
+	const struct proto_ops	*ops;
 	struct proto		*prot;
 	int			sock_type;
 };
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index 4143c3e1dfd..c1d671de783 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -3,4 +3,5 @@ obj-$(CONFIG_PHONET) += phonet.o
 phonet-objs := \
 	pn_dev.o \
 	pn_netlink.o \
+	socket.o \
 	af_phonet.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 5c729ba5693..ba54d53020f 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -41,6 +41,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp);
 
 static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 {
+	struct sock *sk;
+	struct pn_sock *pn;
 	struct phonet_protocol *pnp;
 	int err;
 
@@ -69,8 +71,22 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 		goto out;
 	}
 
-	/* TODO: create and init the struct sock */
-	err = -EPROTONOSUPPORT;
+	sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot);
+	if (sk == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sock_init_data(sock, sk);
+	sock->state = SS_UNCONNECTED;
+	sock->ops = pnp->ops;
+	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+	sk->sk_protocol = protocol;
+	pn = pn_sk(sk);
+	pn->sobject = 0;
+	pn->resource = 0;
+	sk->sk_prot->init(sk);
+	err = 0;
 
 out:
 	phonet_proto_put(pnp);
@@ -94,6 +110,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 			struct net_device *orig_dev)
 {
 	struct phonethdr *ph;
+	struct sock *sk;
 	struct sockaddr_pn sa;
 	u16 len;
 
@@ -118,7 +135,12 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (pn_sockaddr_get_addr(&sa) == 0)
 		goto out; /* currently, we cannot be device 0 */
 
-	/* TODO: put packets to sockets backlog */
+	sk = pn_find_sock_by_sa(&sa);
+	if (sk == NULL)
+		goto out;
+
+	/* Push data to the socket (or other sockets connected to it). */
+	return sk_receive_skb(sk, skb, 0);
 
 out:
 	kfree_skb(skb);
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
new file mode 100644
index 00000000000..99a4945d565
--- /dev/null
+++ b/net/phonet/socket.c
@@ -0,0 +1,311 @@
+/*
+ * File: socket.c
+ *
+ * Phonet sockets
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Original author: Sakari Ailus <sakari.ailus@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+#include <linux/phonet.h>
+#include <net/phonet/phonet.h>
+#include <net/phonet/pn_dev.h>
+
+static int pn_socket_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		sock->sk = NULL;
+		sk->sk_prot->close(sk, 0);
+	}
+	return 0;
+}
+
+static struct  {
+	struct hlist_head hlist;
+	spinlock_t lock;
+} pnsocks = {
+	.hlist = HLIST_HEAD_INIT,
+	.lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock),
+};
+
+/*
+ * Find address based on socket address, match only certain fields.
+ * Also grab sock if it was found. Remember to sock_put it later.
+ */
+struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn)
+{
+	struct hlist_node *node;
+	struct sock *sknode;
+	struct sock *rval = NULL;
+	u16 obj = pn_sockaddr_get_object(spn);
+	u8 res = spn->spn_resource;
+
+	spin_lock_bh(&pnsocks.lock);
+
+	sk_for_each(sknode, node, &pnsocks.hlist) {
+		struct pn_sock *pn = pn_sk(sknode);
+		BUG_ON(!pn->sobject); /* unbound socket */
+
+		if (pn_port(obj)) {
+			/* Look up socket by port */
+			if (pn_port(pn->sobject) != pn_port(obj))
+				continue;
+		} else {
+			/* If port is zero, look up by resource */
+			if (pn->resource != res)
+				continue;
+		}
+		if (pn_addr(pn->sobject)
+		 && pn_addr(pn->sobject) != pn_addr(obj))
+			continue;
+
+		rval = sknode;
+		sock_hold(sknode);
+		break;
+	}
+
+	spin_unlock_bh(&pnsocks.lock);
+
+	return rval;
+
+}
+
+void pn_sock_hash(struct sock *sk)
+{
+	spin_lock_bh(&pnsocks.lock);
+	sk_add_node(sk, &pnsocks.hlist);
+	spin_unlock_bh(&pnsocks.lock);
+}
+EXPORT_SYMBOL(pn_sock_hash);
+
+void pn_sock_unhash(struct sock *sk)
+{
+	spin_lock_bh(&pnsocks.lock);
+	sk_del_node_init(sk);
+	spin_unlock_bh(&pnsocks.lock);
+}
+EXPORT_SYMBOL(pn_sock_unhash);
+
+static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
+{
+	struct sock *sk = sock->sk;
+	struct pn_sock *pn = pn_sk(sk);
+	struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
+	int err;
+	u16 handle;
+	u8 saddr;
+
+	if (sk->sk_prot->bind)
+		return sk->sk_prot->bind(sk, addr, len);
+
+	if (len < sizeof(struct sockaddr_pn))
+		return -EINVAL;
+	if (spn->spn_family != AF_PHONET)
+		return -EAFNOSUPPORT;
+
+	handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr);
+	saddr = pn_addr(handle);
+	if (saddr && phonet_address_lookup(saddr))
+		return -EADDRNOTAVAIL;
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) {
+		err = -EINVAL; /* attempt to rebind */
+		goto out;
+	}
+	err = sk->sk_prot->get_port(sk, pn_port(handle));
+	if (err)
+		goto out;
+
+	/* get_port() sets the port, bind() sets the address if applicable */
+	pn->sobject = pn_object(saddr, pn_port(pn->sobject));
+	pn->resource = spn->spn_resource;
+
+	/* Enable RX on the socket */
+	sk->sk_prot->hash(sk);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int pn_socket_autobind(struct socket *sock)
+{
+	struct sockaddr_pn sa;
+	int err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.spn_family = AF_PHONET;
+	err = pn_socket_bind(sock, (struct sockaddr *)&sa,
+				sizeof(struct sockaddr_pn));
+	if (err != -EINVAL)
+		return err;
+	BUG_ON(!pn_port(pn_sk(sock->sk)->sobject));
+	return 0; /* socket was already bound */
+}
+
+static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
+				int *sockaddr_len, int peer)
+{
+	struct sock *sk = sock->sk;
+	struct pn_sock *pn = pn_sk(sk);
+
+	memset(addr, 0, sizeof(struct sockaddr_pn));
+	addr->sa_family = AF_PHONET;
+	if (!peer) /* Race with bind() here is userland's problem. */
+		pn_sockaddr_set_object((struct sockaddr_pn *)addr,
+					pn->sobject);
+
+	*sockaddr_len = sizeof(struct sockaddr_pn);
+	return 0;
+}
+
+static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
+				unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct pn_sock *pn = pn_sk(sk);
+
+	if (cmd == SIOCPNGETOBJECT) {
+		struct net_device *dev;
+		u16 handle;
+		u8 saddr;
+
+		if (get_user(handle, (__u16 __user *)arg))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (sk->sk_bound_dev_if)
+			dev = dev_get_by_index(sock_net(sk),
+						sk->sk_bound_dev_if);
+		else
+			dev = phonet_device_get(sock_net(sk));
+		if (dev && (dev->flags & IFF_UP))
+			saddr = phonet_address_get(dev, pn_addr(handle));
+		else
+			saddr = PN_NO_ADDR;
+		release_sock(sk);
+
+		if (dev)
+			dev_put(dev);
+		if (saddr == PN_NO_ADDR)
+			return -EHOSTUNREACH;
+
+		handle = pn_object(saddr, pn_port(pn->sobject));
+		return put_user(handle, (__u16 __user *)arg);
+	}
+
+	return sk->sk_prot->ioctl(sk, cmd, arg);
+}
+
+static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *m, size_t total_len)
+{
+	struct sock *sk = sock->sk;
+
+	if (pn_socket_autobind(sock))
+		return -EAGAIN;
+
+	return sk->sk_prot->sendmsg(iocb, sk, m, total_len);
+}
+
+const struct proto_ops phonet_dgram_ops = {
+	.family		= AF_PHONET,
+	.owner		= THIS_MODULE,
+	.release	= pn_socket_release,
+	.bind		= pn_socket_bind,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pn_socket_getname,
+	.poll		= datagram_poll,
+	.ioctl		= pn_socket_ioctl,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = sock_no_setsockopt,
+	.compat_getsockopt = sock_no_getsockopt,
+#endif
+	.sendmsg	= pn_socket_sendmsg,
+	.recvmsg	= sock_common_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+static DEFINE_MUTEX(port_mutex);
+
+/* allocate port for a socket */
+int pn_sock_get_port(struct sock *sk, unsigned short sport)
+{
+	static int port_cur;
+	struct pn_sock *pn = pn_sk(sk);
+	struct sockaddr_pn try_sa;
+	struct sock *tmpsk;
+
+	memset(&try_sa, 0, sizeof(struct sockaddr_pn));
+	try_sa.spn_family = AF_PHONET;
+
+	mutex_lock(&port_mutex);
+
+	if (!sport) {
+		/* search free port */
+		int port, pmin = 0x40, pmax = 0x7f;
+
+		for (port = pmin; port <= pmax; port++) {
+			port_cur++;
+			if (port_cur < pmin || port_cur > pmax)
+				port_cur = pmin;
+
+			pn_sockaddr_set_port(&try_sa, port_cur);
+			tmpsk = pn_find_sock_by_sa(&try_sa);
+			if (tmpsk == NULL) {
+				sport = port_cur;
+				goto found;
+			} else
+				sock_put(tmpsk);
+		}
+	} else {
+		/* try to find specific port */
+		pn_sockaddr_set_port(&try_sa, sport);
+		tmpsk = pn_find_sock_by_sa(&try_sa);
+		if (tmpsk == NULL)
+			/* No sock there! We can use that port... */
+			goto found;
+		else
+			sock_put(tmpsk);
+	}
+	mutex_unlock(&port_mutex);
+
+	/* the port must be in use already */
+	return -EADDRINUSE;
+
+found:
+	mutex_unlock(&port_mutex);
+	pn->sobject = pn_object(pn_addr(pn->sobject), sport);
+	return 0;
+}
+EXPORT_SYMBOL(pn_sock_get_port);
-- 
cgit v1.2.3-70-g09d2


From 107d0d9b8d9a236883db72841fb61cedd5be845e Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:05:57 -0700
Subject: Phonet: Phonet datagram transport protocol

This provides the basic SOCK_DGRAM transport protocol for Phonet.

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |   6 ++
 net/phonet/Makefile         |   1 +
 net/phonet/af_phonet.c      | 106 ++++++++++++++++++++++++
 net/phonet/datagram.c       | 197 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 310 insertions(+)
 create mode 100644 net/phonet/datagram.c

(limited to 'net')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 2ae5cbb59b6..d3957d3be0f 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -51,6 +51,9 @@ void pn_sock_hash(struct sock *sk);
 void pn_sock_unhash(struct sock *sk);
 int pn_sock_get_port(struct sock *sk, unsigned short sport);
 
+int pn_skb_send(struct sock *sk, struct sk_buff *skb,
+		const struct sockaddr_pn *target);
+
 static inline struct phonethdr *pn_hdr(struct sk_buff *skb)
 {
 	return (struct phonethdr *)skb_network_header(skb);
@@ -95,4 +98,7 @@ int phonet_proto_register(int protocol, struct phonet_protocol *pp);
 void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
 
 void phonet_netlink_register(void);
+int isi_register(void);
+void isi_unregister(void);
+
 #endif
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index c1d671de783..d218abc3f06 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -4,4 +4,5 @@ phonet-objs := \
 	pn_dev.o \
 	pn_netlink.o \
 	socket.o \
+	datagram.o \
 	af_phonet.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index ba54d53020f..e6771d3961c 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -99,6 +99,101 @@ static struct net_proto_family phonet_proto_family = {
 	.owner = THIS_MODULE,
 };
 
+/*
+ * Prepends an ISI header and sends a datagram.
+ */
+static int pn_send(struct sk_buff *skb, struct net_device *dev,
+			u16 dst, u16 src, u8 res)
+{
+	struct phonethdr *ph;
+	int err;
+
+	if (skb->len + 2 > 0xffff) {
+		/* Phonet length field would overflow */
+		err = -EMSGSIZE;
+		goto drop;
+	}
+
+	skb_reset_transport_header(skb);
+	WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
+	skb_push(skb, sizeof(struct phonethdr));
+	skb_reset_network_header(skb);
+	ph = pn_hdr(skb);
+	ph->pn_rdev = pn_dev(dst);
+	ph->pn_sdev = pn_dev(src);
+	ph->pn_res = res;
+	ph->pn_length = __cpu_to_be16(skb->len + 2 - sizeof(*ph));
+	ph->pn_robj = pn_obj(dst);
+	ph->pn_sobj = pn_obj(src);
+
+	skb->protocol = htons(ETH_P_PHONET);
+	skb->priority = 0;
+	skb->dev = dev;
+
+	if (pn_addr(src) == pn_addr(dst)) {
+		skb_reset_mac_header(skb);
+		skb->pkt_type = PACKET_LOOPBACK;
+		skb_orphan(skb);
+		netif_rx_ni(skb);
+		err = 0;
+	} else {
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+					NULL, NULL, skb->len);
+		if (err < 0) {
+			err = -EHOSTUNREACH;
+			goto drop;
+		}
+		err = dev_queue_xmit(skb);
+	}
+
+	return err;
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+/*
+ * Create a Phonet header for the skb and send it out. Returns
+ * non-zero error code if failed. The skb is freed then.
+ */
+int pn_skb_send(struct sock *sk, struct sk_buff *skb,
+		const struct sockaddr_pn *target)
+{
+	struct net_device *dev;
+	struct pn_sock *pn = pn_sk(sk);
+	int err;
+	u16 src;
+	u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR;
+
+	err = -EHOSTUNREACH;
+	if (sk->sk_bound_dev_if)
+		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
+	else
+		dev = phonet_device_get(sock_net(sk));
+	if (!dev || !(dev->flags & IFF_UP))
+		goto drop;
+
+	saddr = phonet_address_get(dev, daddr);
+	if (saddr == PN_NO_ADDR)
+		goto drop;
+
+	src = pn->sobject;
+	if (!pn_addr(src))
+		src = pn_object(saddr, pn_obj(src));
+
+	err = pn_send(skb, dev, pn_sockaddr_get_object(target),
+			src, pn_sockaddr_get_resource(target));
+	dev_put(dev);
+	return err;
+
+drop:
+	kfree_skb(skb);
+	if (dev)
+		dev_put(dev);
+	return err;
+}
+EXPORT_SYMBOL(pn_skb_send);
+
 /* packet type functions */
 
 /*
@@ -226,11 +321,22 @@ static int __init phonet_init(void)
 	phonet_device_init();
 	dev_add_pack(&phonet_packet_type);
 	phonet_netlink_register();
+
+	err = isi_register();
+	if (err)
+		goto err;
 	return 0;
+
+err:
+	sock_unregister(AF_PHONET);
+	dev_remove_pack(&phonet_packet_type);
+	phonet_device_exit();
+	return err;
 }
 
 static void __exit phonet_exit(void)
 {
+	isi_unregister();
 	sock_unregister(AF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
 	phonet_device_exit();
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
new file mode 100644
index 00000000000..e087862ed7e
--- /dev/null
+++ b/net/phonet/datagram.c
@@ -0,0 +1,197 @@
+/*
+ * File: datagram.c
+ *
+ * Datagram (ISI) Phonet sockets
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Original author: Sakari Ailus <sakari.ailus@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <asm/ioctls.h>
+#include <net/sock.h>
+
+#include <linux/phonet.h>
+#include <net/phonet/phonet.h>
+
+static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+
+/* associated socket ceases to exist */
+static void pn_sock_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	struct sk_buff *skb;
+	int answ;
+
+	switch (cmd) {
+	case SIOCINQ:
+		lock_sock(sk);
+		skb = skb_peek(&sk->sk_receive_queue);
+		answ = skb ? skb->len : 0;
+		release_sock(sk);
+		return put_user(answ, (int __user *)arg);
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+/* Destroy socket. All references are gone. */
+static void pn_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static int pn_init(struct sock *sk)
+{
+	sk->sk_destruct = pn_destruct;
+	return 0;
+}
+
+static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
+			struct msghdr *msg, size_t len)
+{
+	struct sockaddr_pn *target;
+	struct sk_buff *skb;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (msg->msg_name == NULL)
+		return -EDESTADDRREQ;
+
+	if (msg->msg_namelen < sizeof(struct sockaddr_pn))
+		return -EINVAL;
+
+	target = (struct sockaddr_pn *)msg->msg_name;
+	if (target->spn_family != AF_PHONET)
+		return -EAFNOSUPPORT;
+
+	skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len,
+					msg->msg_flags & MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		return err;
+	skb_reserve(skb, MAX_PHONET_HEADER);
+
+	err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	/*
+	 * Fill in the Phonet header and
+	 * finally pass the packet forwards.
+	 */
+	err = pn_skb_send(sk, skb, target);
+
+	/* If ok, return len. */
+	return (err >= 0) ? len : err;
+}
+
+static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
+			struct msghdr *msg, size_t len, int noblock,
+			int flags, int *addr_len)
+{
+	struct sk_buff *skb = NULL;
+	struct sockaddr_pn sa;
+	int rval = -EOPNOTSUPP;
+	int copylen;
+
+	if (flags & MSG_OOB)
+		goto out_nofree;
+
+	if (addr_len)
+		*addr_len = sizeof(sa);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &rval);
+	if (skb == NULL)
+		goto out_nofree;
+
+	pn_skb_get_src_sockaddr(skb, &sa);
+
+	copylen = skb->len;
+	if (len < copylen) {
+		msg->msg_flags |= MSG_TRUNC;
+		copylen = len;
+	}
+
+	rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen);
+	if (rval) {
+		rval = -EFAULT;
+		goto out;
+	}
+
+	rval = (flags & MSG_TRUNC) ? skb->len : copylen;
+
+	if (msg->msg_name != NULL)
+		memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn));
+
+out:
+	skb_free_datagram(sk, skb);
+
+out_nofree:
+	return rval;
+}
+
+/* Queue an skb for a sock. */
+static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int err = sock_queue_rcv_skb(sk, skb);
+	if (err < 0)
+		kfree_skb(skb);
+	return err ? NET_RX_DROP : NET_RX_SUCCESS;
+}
+
+/* Module registration */
+static struct proto pn_proto = {
+	.close		= pn_sock_close,
+	.ioctl		= pn_ioctl,
+	.init		= pn_init,
+	.sendmsg	= pn_sendmsg,
+	.recvmsg	= pn_recvmsg,
+	.backlog_rcv	= pn_backlog_rcv,
+	.hash		= pn_sock_hash,
+	.unhash		= pn_sock_unhash,
+	.get_port	= pn_sock_get_port,
+	.obj_size	= sizeof(struct pn_sock),
+	.owner		= THIS_MODULE,
+	.name		= "PHONET",
+};
+
+static struct phonet_protocol pn_dgram_proto = {
+	.ops		= &phonet_dgram_ops,
+	.prot		= &pn_proto,
+	.sock_type	= SOCK_DGRAM,
+};
+
+int __init isi_register(void)
+{
+	return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto);
+}
+
+void __exit isi_unregister(void)
+{
+	phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto);
+}
-- 
cgit v1.2.3-70-g09d2


From 5f77076d75d35c9f5619e1f9d7e7428a627f65e6 Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:08:04 -0700
Subject: Phonet: provide MAC header operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_phonet.h |  4 ++++
 net/phonet/af_phonet.c    | 29 +++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'net')

diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h
index 22df25fbc4e..7e989216ec1 100644
--- a/include/linux/if_phonet.h
+++ b/include/linux/if_phonet.h
@@ -12,3 +12,7 @@
 /* 6 bytes header + 65535 bytes payload */
 #define PHONET_MAX_MTU		65541
 #define PHONET_DEV_MTU		PHONET_MAX_MTU
+
+#ifdef __KERNEL__
+extern struct header_ops phonet_header_ops;
+#endif
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index e6771d3961c..51397ff308b 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -99,6 +99,35 @@ static struct net_proto_family phonet_proto_family = {
 	.owner = THIS_MODULE,
 };
 
+/* Phonet device header operations */
+static int pn_header_create(struct sk_buff *skb, struct net_device *dev,
+				unsigned short type, const void *daddr,
+				const void *saddr, unsigned len)
+{
+	u8 *media = skb_push(skb, 1);
+
+	if (type != ETH_P_PHONET)
+		return -1;
+
+	if (!saddr)
+		saddr = dev->dev_addr;
+	*media = *(const u8 *)saddr;
+	return 1;
+}
+
+static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+	const u8 *media = skb_mac_header(skb);
+	*haddr = *media;
+	return 1;
+}
+
+struct header_ops phonet_header_ops = {
+	.create = pn_header_create,
+	.parse = pn_header_parse,
+};
+EXPORT_SYMBOL(phonet_header_ops);
+
 /*
  * Prepends an ISI header and sends a datagram.
  */
-- 
cgit v1.2.3-70-g09d2


From 87ab4e20b445c6d2d2727ab4f96fa17f7259511e Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:08:39 -0700
Subject: Phonet: proc interface for port range

Phonet endpoints are bound to individual ports.
This provides a /proc/sys/net/phonet (or sysctl) interface for
selecting the range of automatically allocated ports (much like the
ip_local_port_range with IPv4).

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |   3 ++
 net/phonet/Makefile         |   1 +
 net/phonet/af_phonet.c      |   3 ++
 net/phonet/socket.c         |   3 +-
 net/phonet/sysctl.c         | 113 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 net/phonet/sysctl.c

(limited to 'net')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index d3957d3be0f..1c6f7e7d5fe 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -47,6 +47,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk)
 extern const struct proto_ops phonet_dgram_ops;
 
 struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa);
+void phonet_get_local_port_range(int *min, int *max);
 void pn_sock_hash(struct sock *sk);
 void pn_sock_unhash(struct sock *sk);
 int pn_sock_get_port(struct sock *sk, unsigned short sport);
@@ -97,6 +98,8 @@ struct phonet_protocol {
 int phonet_proto_register(int protocol, struct phonet_protocol *pp);
 void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
 
+int phonet_sysctl_init(void);
+void phonet_sysctl_exit(void);
 void phonet_netlink_register(void);
 int isi_register(void);
 void isi_unregister(void);
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index d218abc3f06..ae9c3ed5be8 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -5,4 +5,5 @@ phonet-objs := \
 	pn_netlink.o \
 	socket.o \
 	datagram.o \
+	sysctl.o \
 	af_phonet.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 51397ff308b..50dc258d5aa 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -350,6 +350,7 @@ static int __init phonet_init(void)
 	phonet_device_init();
 	dev_add_pack(&phonet_packet_type);
 	phonet_netlink_register();
+	phonet_sysctl_init();
 
 	err = isi_register();
 	if (err)
@@ -357,6 +358,7 @@ static int __init phonet_init(void)
 	return 0;
 
 err:
+	phonet_sysctl_exit();
 	sock_unregister(AF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
 	phonet_device_exit();
@@ -366,6 +368,7 @@ err:
 static void __exit phonet_exit(void)
 {
 	isi_unregister();
+	phonet_sysctl_exit();
 	sock_unregister(AF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
 	phonet_device_exit();
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 99a4945d565..dfd4061646d 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -273,8 +273,9 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
 
 	if (!sport) {
 		/* search free port */
-		int port, pmin = 0x40, pmax = 0x7f;
+		int port, pmin, pmax;
 
+		phonet_get_local_port_range(&pmin, &pmax);
 		for (port = pmin; port <= pmax; port++) {
 			port_cur++;
 			if (port_cur < pmin || port_cur > pmax)
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
new file mode 100644
index 00000000000..600a4309b8c
--- /dev/null
+++ b/net/phonet/sysctl.c
@@ -0,0 +1,113 @@
+/*
+ * File: sysctl.c
+ *
+ * Phonet /proc/sys/net/phonet interface implementation
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/seqlock.h>
+#include <linux/sysctl.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+
+#define DYNAMIC_PORT_MIN	0x40
+#define DYNAMIC_PORT_MAX	0x7f
+
+static DEFINE_SEQLOCK(local_port_range_lock);
+static int local_port_range_min[2] = {0, 0};
+static int local_port_range_max[2] = {1023, 1023};
+static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX};
+static struct ctl_table_header *phonet_table_hrd;
+
+static void set_local_port_range(int range[2])
+{
+	write_seqlock(&local_port_range_lock);
+	local_port_range[0] = range[0];
+	local_port_range[1] = range[1];
+	write_sequnlock(&local_port_range_lock);
+}
+
+void phonet_get_local_port_range(int *min, int *max)
+{
+	unsigned seq;
+	do {
+		seq = read_seqbegin(&local_port_range_lock);
+		if (min)
+			*min = local_port_range[0];
+		if (max)
+			*max = local_port_range[1];
+	} while (read_seqretry(&local_port_range_lock, seq));
+}
+
+static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+				void __user *buffer,
+				size_t *lenp, loff_t *ppos)
+{
+	int ret;
+	int range[2] = {local_port_range[0], local_port_range[1]};
+	ctl_table tmp = {
+		.data = &range,
+		.maxlen = sizeof(range),
+		.mode = table->mode,
+		.extra1 = &local_port_range_min,
+		.extra2 = &local_port_range_max,
+	};
+
+	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+
+	if (write && ret == 0) {
+		if (range[1] < range[0])
+			ret = -EINVAL;
+		else
+			set_local_port_range(range);
+	}
+
+	return ret;
+}
+
+static struct ctl_table phonet_table[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "local_port_range",
+		.data		= &local_port_range,
+		.maxlen		= sizeof(local_port_range),
+		.mode		= 0644,
+		.proc_handler	= &proc_local_port_range,
+		.strategy	= NULL,
+	},
+	{ .ctl_name = 0 }
+};
+
+struct ctl_path phonet_ctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "phonet", .ctl_name = CTL_UNNUMBERED, },
+	{ },
+};
+
+int __init phonet_sysctl_init(void)
+{
+	phonet_table_hrd = register_sysctl_paths(phonet_ctl_path, phonet_table);
+	return phonet_table_hrd == NULL ? -ENOMEM : 0;
+}
+
+void phonet_sysctl_exit(void)
+{
+	unregister_sysctl_table(phonet_table_hrd);
+}
-- 
cgit v1.2.3-70-g09d2


From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001
From: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 22 Sep 2008 20:09:13 -0700
Subject: Phonet: emit errors when a packet cannot be delivered locally

When there is no listener socket for a received packet, send an error
back to the sender.

Signed-off-by: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h      | 32 +++++++++++++++
 include/net/phonet/phonet.h |  5 +++
 net/phonet/af_phonet.c      | 96 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 129 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 001c0e67909..3a027f588a4 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -45,6 +45,38 @@ struct phonethdr {
 	__u8	pn_sobj;
 } __attribute__((packed));
 
+/* Common Phonet payload header */
+struct phonetmsg {
+	__u8	pn_trans_id;	/* transaction ID */
+	__u8	pn_msg_id;	/* message type */
+	union {
+		struct {
+			__u8	pn_submsg_id;	/* message subtype */
+			__u8	pn_data[5];
+		} base;
+		struct {
+			__u16	pn_e_res_id;	/* extended resource ID */
+			__u8	pn_e_submsg_id;	/* message subtype */
+			__u8	pn_e_data[3];
+		} ext;
+	} pn_msg_u;
+};
+#define PN_COMMON_MESSAGE	0xF0
+#define PN_PREFIX		0xE0 /* resource for extended messages */
+#define pn_submsg_id		pn_msg_u.base.pn_submsg_id
+#define pn_e_submsg_id		pn_msg_u.ext.pn_e_submsg_id
+#define pn_e_res_id		pn_msg_u.ext.pn_e_res_id
+#define pn_data			pn_msg_u.base.pn_data
+#define pn_e_data		pn_msg_u.ext.pn_e_data
+
+/* data for unreachable errors */
+#define PN_COMM_SERVICE_NOT_IDENTIFIED_RESP	0x01
+#define PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP	0x14
+#define pn_orig_msg_id		pn_data[0]
+#define pn_status		pn_data[1]
+#define pn_e_orig_msg_id	pn_e_data[0]
+#define pn_e_status		pn_e_data[1]
+
 /* Phonet socket address structure */
 struct sockaddr_pn {
 	sa_family_t spn_family;
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 1c6f7e7d5fe..d4e72508e14 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -60,6 +60,11 @@ static inline struct phonethdr *pn_hdr(struct sk_buff *skb)
 	return (struct phonethdr *)skb_network_header(skb);
 }
 
+static inline struct phonetmsg *pn_msg(struct sk_buff *skb)
+{
+	return (struct phonetmsg *)skb_transport_header(skb);
+}
+
 /*
  * Get the other party's sockaddr from received skb. The skb begins
  * with a Phonet header.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 50dc258d5aa..1d8df6b7e3d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -132,7 +132,7 @@ EXPORT_SYMBOL(phonet_header_ops);
  * Prepends an ISI header and sends a datagram.
  */
 static int pn_send(struct sk_buff *skb, struct net_device *dev,
-			u16 dst, u16 src, u8 res)
+			u16 dst, u16 src, u8 res, u8 irq)
 {
 	struct phonethdr *ph;
 	int err;
@@ -163,7 +163,10 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 		skb_reset_mac_header(skb);
 		skb->pkt_type = PACKET_LOOPBACK;
 		skb_orphan(skb);
-		netif_rx_ni(skb);
+		if (irq)
+			netif_rx(skb);
+		else
+			netif_rx_ni(skb);
 		err = 0;
 	} else {
 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -181,6 +184,19 @@ drop:
 	return err;
 }
 
+static int pn_raw_send(const void *data, int len, struct net_device *dev,
+			u16 dst, u16 src, u8 res)
+{
+	struct sk_buff *skb = alloc_skb(MAX_PHONET_HEADER + len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, MAX_PHONET_HEADER);
+	__skb_put(skb, len);
+	skb_copy_to_linear_data(skb, data, len);
+	return pn_send(skb, dev, dst, src, res, 1);
+}
+
 /*
  * Create a Phonet header for the skb and send it out. Returns
  * non-zero error code if failed. The skb is freed then.
@@ -211,7 +227,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 		src = pn_object(saddr, pn_obj(src));
 
 	err = pn_send(skb, dev, pn_sockaddr_get_object(target),
-			src, pn_sockaddr_get_resource(target));
+			src, pn_sockaddr_get_resource(target), 0);
 	dev_put(dev);
 	return err;
 
@@ -223,6 +239,73 @@ drop:
 }
 EXPORT_SYMBOL(pn_skb_send);
 
+/* Do not send an error message in response to an error message */
+static inline int can_respond(struct sk_buff *skb)
+{
+	const struct phonethdr *ph;
+	const struct phonetmsg *pm;
+	u8 submsg_id;
+
+	if (!pskb_may_pull(skb, 3))
+		return 0;
+
+	ph = pn_hdr(skb);
+	if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev)
+		return 0; /* we are not the destination */
+	if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5))
+		return 0;
+
+	ph = pn_hdr(skb); /* re-acquires the pointer */
+	pm = pn_msg(skb);
+	if (pm->pn_msg_id != PN_COMMON_MESSAGE)
+		return 1;
+	submsg_id = (ph->pn_res == PN_PREFIX)
+		? pm->pn_e_submsg_id : pm->pn_submsg_id;
+	if (submsg_id != PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP &&
+		pm->pn_e_submsg_id != PN_COMM_SERVICE_NOT_IDENTIFIED_RESP)
+		return 1;
+	return 0;
+}
+
+static int send_obj_unreachable(struct sk_buff *rskb)
+{
+	const struct phonethdr *oph = pn_hdr(rskb);
+	const struct phonetmsg *opm = pn_msg(rskb);
+	struct phonetmsg resp;
+
+	memset(&resp, 0, sizeof(resp));
+	resp.pn_trans_id = opm->pn_trans_id;
+	resp.pn_msg_id = PN_COMMON_MESSAGE;
+	if (oph->pn_res == PN_PREFIX) {
+		resp.pn_e_res_id = opm->pn_e_res_id;
+		resp.pn_e_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP;
+		resp.pn_e_orig_msg_id = opm->pn_msg_id;
+		resp.pn_e_status = 0;
+	} else {
+		resp.pn_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP;
+		resp.pn_orig_msg_id = opm->pn_msg_id;
+		resp.pn_status = 0;
+	}
+	return pn_raw_send(&resp, sizeof(resp), rskb->dev,
+				pn_object(oph->pn_sdev, oph->pn_sobj),
+				pn_object(oph->pn_rdev, oph->pn_robj),
+				oph->pn_res);
+}
+
+static int send_reset_indications(struct sk_buff *rskb)
+{
+	struct phonethdr *oph = pn_hdr(rskb);
+	static const u8 data[4] = {
+		0x00 /* trans ID */, 0x10 /* subscribe msg */,
+		0x00 /* subscription count */, 0x00 /* dummy */
+	};
+
+	return pn_raw_send(data, sizeof(data), rskb->dev,
+				pn_object(oph->pn_sdev, 0x00),
+				pn_object(oph->pn_rdev, oph->pn_robj), 0x10);
+}
+
+
 /* packet type functions */
 
 /*
@@ -260,8 +343,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto out; /* currently, we cannot be device 0 */
 
 	sk = pn_find_sock_by_sa(&sa);
-	if (sk == NULL)
+	if (sk == NULL) {
+		if (can_respond(skb)) {
+			send_obj_unreachable(skb);
+			send_reset_indications(skb);
+		}
 		goto out;
+	}
 
 	/* Push data to the socket (or other sockets connected to it). */
 	return sk_receive_skb(sk, skb, 0);
-- 
cgit v1.2.3-70-g09d2


From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 22 Sep 2008 21:28:11 -0700
Subject: net: network device name ifalias support

This patch add support for keeping an additional character alias
associated with an network interface. This is useful for maintaining
the SNMP ifAlias value which is a user defined value. Routers use this
to hold information like which circuit or line it is connected to. It
is just an arbitrary text label on the network device.

There are two exposed interfaces with this patch, the value can be
read/written either via netlink or sysfs.

This could be maintained just by the snmp daemon, but it is more
generally useful for other management tools, and the kernel is good
place to act as an agreed upon interface to store it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if.h        |  1 +
 include/linux/if_link.h   |  1 +
 include/linux/netdevice.h |  3 +++
 net/core/dev.c            | 23 +++++++++++++++++++++++
 net/core/net-sysfs.c      | 36 ++++++++++++++++++++++++++++++++++++
 net/core/rtnetlink.c      | 13 +++++++++++++
 6 files changed, 77 insertions(+)

(limited to 'net')

diff --git a/include/linux/if.h b/include/linux/if.h
index 5c9d1fa93fe..65246846c84 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>		/* for "__user" et al           */
 
 #define	IFNAMSIZ	16
+#define	IFALIASZ	256
 #include <linux/hdlc/ioctl.h>
 
 /* Standard interface flags (netdevice->flags). */
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 84c3492ae5c..f9032c88716 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -79,6 +79,7 @@ enum
 	IFLA_LINKINFO,
 #define IFLA_LINKINFO IFLA_LINKINFO
 	IFLA_NET_NS_PID,
+	IFLA_IFALIAS,
 	__IFLA_MAX
 };
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 488c56e649b..d675df08b94 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -471,6 +471,8 @@ struct net_device
 	char			name[IFNAMSIZ];
 	/* device name hash chain */
 	struct hlist_node	name_hlist;
+	/* snmp alias */
+	char 			*ifalias;
 
 	/*
 	 *	I/O specific fields
@@ -1224,6 +1226,7 @@ extern int		dev_ethtool(struct net *net, struct ifreq *);
 extern unsigned		dev_get_flags(const struct net_device *);
 extern int		dev_change_flags(struct net_device *, unsigned);
 extern int		dev_change_name(struct net_device *, char *);
+extern int		dev_set_alias(struct net_device *, const char *, size_t);
 extern int		dev_change_net_namespace(struct net_device *,
 						 struct net *, const char *);
 extern int		dev_set_mtu(struct net_device *, int);
diff --git a/net/core/dev.c b/net/core/dev.c
index fdfc4b6a644..e9139053399 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -953,6 +953,29 @@ rollback:
 	return err;
 }
 
+/**
+ *	dev_set_alias - change ifalias of a device
+ *	@dev: device
+ *	@alias: name up to IFALIASZ
+ *
+ *	Set ifalias for a device,
+ */
+int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+{
+	ASSERT_RTNL();
+
+	if (len >= IFALIASZ)
+		return -EINVAL;
+
+	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
+	if (!dev->ifalias)
+		return -ENOMEM;
+
+	strlcpy(dev->ifalias, alias, len+1);
+	return len;
+}
+
+
 /**
  *	netdev_features_change - device changes features
  *	@dev: device to cause notification
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c1f4e0d428c..92d6b946731 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev,
 	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
 }
 
+static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t len)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	size_t count = len;
+	ssize_t ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	/* ignore trailing newline */
+	if (len >  0 && buf[len - 1] == '\n')
+		--count;
+
+	rtnl_lock();
+	ret = dev_set_alias(netdev, buf, count);
+	rtnl_unlock();
+
+	return ret < 0 ? ret : len;
+}
+
+static ssize_t show_ifalias(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	const struct net_device *netdev = to_net_dev(dev);
+	ssize_t ret = 0;
+
+	rtnl_lock();
+	if (netdev->ifalias)
+		ret = sprintf(buf, "%s\n", netdev->ifalias);
+	rtnl_unlock();
+	return ret;
+}
+
 static struct device_attribute net_class_attributes[] = {
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
+	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
 	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
 	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
 	__ATTR(features, S_IRUGO, show_features, NULL),
@@ -418,6 +453,7 @@ static void netdev_release(struct device *d)
 
 	BUG_ON(dev->reg_state != NETREG_RELEASED);
 
+	kfree(dev->ifalias);
 	kfree((char *)dev - dev->padded);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 71edb8b3634..8862498fd4a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
 	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
 	       + nla_total_size(sizeof(struct rtnl_link_stats))
@@ -640,6 +641,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (txq->qdisc_sleeping)
 		NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
 
+	if (dev->ifalias)
+		NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias);
+
 	if (1) {
 		struct rtnl_link_ifmap map = {
 			.mem_start   = dev->mem_start,
@@ -713,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -853,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		modified = 1;
 	}
 
+	if (tb[IFLA_IFALIAS]) {
+		err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]),
+				    nla_len(tb[IFLA_IFALIAS]));
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
 		send_addr_notify = 1;
-- 
cgit v1.2.3-70-g09d2


From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 Sep 2008 22:15:30 -0700
Subject: pkt_sched: Make qdisc->gso_skb a list.

The idea is that we can use this to get rid of
->requeue().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 +-
 net/sched/sch_generic.c   | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e5569625d2a..3b983e8a055 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -52,7 +52,7 @@ struct Qdisc
 	u32			parent;
 	atomic_t		refcnt;
 	unsigned long		state;
-	struct sk_buff		*gso_skb;
+	struct sk_buff_head	requeue;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
 	struct Qdisc		*next_sched;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ec0a0839ce5..5961536be60 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -45,7 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	if (unlikely(skb->next))
-		q->gso_skb = skb;
+		__skb_queue_head(&q->requeue, skb);
 	else
 		q->ops->requeue(skb, q);
 
@@ -57,9 +57,8 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
 	struct sk_buff *skb;
 
-	if ((skb = q->gso_skb))
-		q->gso_skb = NULL;
-	else
+	skb = __skb_dequeue(&q->requeue);
+	if (!skb)
 		skb = q->dequeue(q);
 
 	return skb;
@@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
@@ -352,6 +352,7 @@ static struct Qdisc noqueue_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.dev_queue	=	&noqueue_netdev_queue,
 };
@@ -472,6 +473,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->requeue);
 	skb_queue_head_init(&sch->q);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
@@ -539,7 +541,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
-	kfree_skb(qdisc->gso_skb);
+	__skb_queue_purge(&qdisc->requeue);
 
 	kfree((char *) qdisc - qdisc->padded);
 }
-- 
cgit v1.2.3-70-g09d2


From f0876520b0b721bedafd9cec3b1b0624ae566eee Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 22 Sep 2008 22:15:58 -0700
Subject: pkt_sched: Always use q->requeue in dev_requeue_skb().

There is no reason to call into the complicated qdiscs
just to remember the last SKB where we found the device
blocked.

The SKB is outside of the qdiscs realm at this point.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5961536be60..1b508bd1c06 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,10 +44,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	if (unlikely(skb->next))
-		__skb_queue_head(&q->requeue, skb);
-	else
-		q->ops->requeue(skb, q);
+	__skb_queue_head(&q->requeue, skb);
 
 	__netif_schedule(q);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From ebf059821ed8a36acd706484b719d14d212ada32 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 22 Sep 2008 22:16:23 -0700
Subject: pkt_sched: Check the state of tx_queue in dequeue_skb()

Check in dequeue_skb() the state of tx_queue for requeued skb to save
on locking and re-requeuing, and possibly remove the current check in
qdisc_run(). Based on the idea of Alexander Duyck.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1b508bd1c06..5e7e0bd38fe 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,11 +52,21 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
-	struct sk_buff *skb;
-
-	skb = __skb_dequeue(&q->requeue);
-	if (!skb)
+	struct sk_buff *skb = skb_peek(&q->requeue);
+
+	if (unlikely(skb)) {
+		struct net_device *dev = qdisc_dev(q);
+		struct netdev_queue *txq;
+
+		/* check the reason of requeuing without tx lock first */
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+			__skb_unlink(skb, &q->requeue);
+		else
+			skb = NULL;
+	} else {
 		skb = q->dequeue(q);
+	}
 
 	return skb;
 }
-- 
cgit v1.2.3-70-g09d2


From f72051b0674f36c960698653a0583edaec1e495e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 23 Sep 2008 01:11:18 -0700
Subject: neigh: Remove by-hand SKB queue handling.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9d92e41826e..1dc728b3858 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -927,8 +927,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 			if (skb_queue_len(&neigh->arp_queue) >=
 			    neigh->parms->queue_len) {
 				struct sk_buff *buff;
-				buff = neigh->arp_queue.next;
-				__skb_unlink(buff, &neigh->arp_queue);
+				buff = __skb_dequeue(&neigh->arp_queue);
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
@@ -1259,24 +1258,20 @@ static void neigh_proxy_process(unsigned long arg)
 	struct neigh_table *tbl = (struct neigh_table *)arg;
 	long sched_next = 0;
 	unsigned long now = jiffies;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *n;
 
 	spin_lock(&tbl->proxy_queue.lock);
 
-	skb = tbl->proxy_queue.next;
-
-	while (skb != (struct sk_buff *)&tbl->proxy_queue) {
-		struct sk_buff *back = skb;
-		long tdif = NEIGH_CB(back)->sched_next - now;
+	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
+		long tdif = NEIGH_CB(skb)->sched_next - now;
 
-		skb = skb->next;
 		if (tdif <= 0) {
-			struct net_device *dev = back->dev;
-			__skb_unlink(back, &tbl->proxy_queue);
+			struct net_device *dev = skb->dev;
+			__skb_unlink(skb, &tbl->proxy_queue);
 			if (tbl->proxy_redo && netif_running(dev))
-				tbl->proxy_redo(back);
+				tbl->proxy_redo(skb);
 			else
-				kfree_skb(back);
+				kfree_skb(skb);
 
 			dev_put(dev);
 		} else if (!sched_next || tdif < sched_next)
-- 
cgit v1.2.3-70-g09d2


From 77d40a0952b16e020ce07c4cf9fb22024448275b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 23 Sep 2008 01:29:23 -0700
Subject: tcp: Fix order of tests in tcp_retransmit_skb()

tcp_write_queue_next() must only be made if we know that
tcp_skb_is_last() evaluates to false.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c3d58ee3e16..a8499ef3234 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1932,8 +1932,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	/* Collapse two adjacent packets if worthwhile and we can. */
 	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
 	    (skb->len < (cur_mss >> 1)) &&
-	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
 	    (!tcp_skb_is_last(sk, skb)) &&
+	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
 	    (skb_shinfo(skb)->nr_frags == 0 &&
 	     skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
 	    (tcp_skb_pcount(skb) == 1 &&
-- 
cgit v1.2.3-70-g09d2


From 28e3487b7dd8a9791baac924bc887140ec747bed Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 23 Sep 2008 02:51:41 -0700
Subject: tcp: Fix queue traversal in tcp_use_frto().

We must check tcp_skb_is_last() before doing a tcp_write_queue_next().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cbfe13d5f42..3b76bce769d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1746,6 +1746,8 @@ int tcp_use_frto(struct sock *sk)
 		return 0;
 
 	skb = tcp_write_queue_head(sk);
+	if (tcp_skb_is_last(sk, skb))
+		return 1;
 	skb = tcp_write_queue_next(sk, skb);	/* Skips head */
 	tcp_for_write_queue_from(skb, sk) {
 		if (skb == tcp_send_head(sk))
-- 
cgit v1.2.3-70-g09d2


From 2d4c8266774188cda7f7e612e6dfb8ad12c579d5 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@googlemail.com>
Date: Mon, 22 Sep 2008 13:57:49 -0700
Subject: sys_paccept: disable paccept() until API design is resolved

The reasons for disabling paccept() are as follows:

* The API is more complex than needed.  There is AFAICS no demonstrated
  use case that the sigset argument of this syscall serves that couldn't
  equally be served by the use of pselect/ppoll/epoll_pwait + traditional
  accept().  Roland seems to concur with this opinion
  (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).  I
  have (more than once) asked Ulrich to explain otherwise
  (http://thread.gmane.org/gmane.linux.kernel/723952/focus=731018), but he
  does not respond, so one is left to assume that he doesn't know of such
  a case.

* The use of a sigset argument is not consistent with other I/O APIs
  that can block on a single file descriptor (e.g., read(), recv(),
  connect()).

* The behavior of paccept() when interrupted by a signal is IMO strange:
  the kernel restarts the system call if SA_RESTART was set for the
  handler.  I think that it should not do this -- that it should behave
  consistently with paccept()/ppoll()/epoll_pwait(), which never restart,
  regardless of SA_RESTART.  The reasoning here is that the very purpose
  of paccept() is to wait for a connection or a signal, and that
  restarting in the latter case is probably never useful.  (Note: Roland
  disagrees on this point, believing that rather paccept() should be
  consistent with accept() in its behavior wrt EINTR
  (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).)

I believe that instead, a simpler API, consistent with Ulrich's other
recent additions, is preferable:

accept4(int fd, struct sockaddr *sa, socklen_t *salen, ind flags);

(This simpler API was originally proposed by Ulrich:
http://thread.gmane.org/gmane.linux.network/92072)

If this simpler API is added, then if we later decide that the sigset
argument really is required, then a suitable bit in 'flags' could be added
to indicate the presence of the sigset argument.

At this point, I am hoping we either will get a counter-argument from
Ulrich about why we really do need paccept()'s sigset argument, or that he
will resubmit the original accept4() patch.

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@redhat.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 8ef8ba81b9e..3e8d4e35c08 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1511,6 +1511,7 @@ out_fd:
 	goto out_put;
 }
 
+#if 0
 #ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 			    int __user *upeer_addrlen,
@@ -1564,6 +1565,7 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
 }
 #endif
+#endif
 
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
-- 
cgit v1.2.3-70-g09d2


From 96ca4a2cc1454cf633a1e0796b7ef39d937b87ec Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Tue, 23 Sep 2008 21:23:19 -0700
Subject: net: remove ifalias on empty given alias

This patch removes the potentially allocated ifalias when the (new) given alias is empty.

E.g. when setting

echo "" > /sys/class/net/eth0/ifalias

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e9139053399..a90737fe247 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -967,6 +967,14 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 	if (len >= IFALIASZ)
 		return -EINVAL;
 
+	if (!len) {
+		if (dev->ifalias) {
+			kfree(dev->ifalias);
+			dev->ifalias = NULL;
+		}
+		return 0;
+	}
+
 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
 	if (!dev->ifalias)
 		return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 762af43bda3d8281a2738d3920ae5ded170aaf39 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Sep 2008 10:30:34 +0200
Subject: cfg80211: fix static regdomains

When Luis added the static regdomains back he used +/-20
of the centre frequencies to account for 40MHz bandwidth
neglecting the fact that 40MHz bandwidth cannot be used
on the channels close to the allowed band edges.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index a910cd2d0fd..59e4d7debf0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -41,7 +41,7 @@ const struct ieee80211_regdomain world_regdom = {
 	.n_reg_rules = 1,
 	.alpha2 =  "00",
 	.reg_rules = {
-		REG_RULE(2402, 2472, 40, 6, 20,
+		REG_RULE(2412-10, 2462+10, 40, 6, 20,
 			NL80211_RRF_PASSIVE_SCAN |
 			NL80211_RRF_NO_IBSS),
 	}
@@ -64,17 +64,17 @@ const struct ieee80211_regdomain us_regdom = {
 	.alpha2 =  "US",
 	.reg_rules = {
 		/* IEEE 802.11b/g, channels 1..11 */
-		REG_RULE(2412-20, 2462+20, 40, 6, 27, 0),
+		REG_RULE(2412-10, 2462+10, 40, 6, 27, 0),
 		/* IEEE 802.11a, channel 36 */
-		REG_RULE(5180-20, 5180+20, 40, 6, 23, 0),
+		REG_RULE(5180-10, 5180+10, 40, 6, 23, 0),
 		/* IEEE 802.11a, channel 40 */
-		REG_RULE(5200-20, 5200+20, 40, 6, 23, 0),
+		REG_RULE(5200-10, 5200+10, 40, 6, 23, 0),
 		/* IEEE 802.11a, channel 44 */
-		REG_RULE(5220-20, 5220+20, 40, 6, 23, 0),
+		REG_RULE(5220-10, 5220+10, 40, 6, 23, 0),
 		/* IEEE 802.11a, channels 48..64 */
-		REG_RULE(5240-20, 5320+20, 40, 6, 23, 0),
+		REG_RULE(5240-10, 5320+10, 40, 6, 23, 0),
 		/* IEEE 802.11a, channels 149..165, outdoor */
-		REG_RULE(5745-20, 5825+20, 40, 6, 30, 0),
+		REG_RULE(5745-10, 5825+10, 40, 6, 30, 0),
 	}
 };
 
@@ -83,12 +83,12 @@ const struct ieee80211_regdomain jp_regdom = {
 	.alpha2 =  "JP",
 	.reg_rules = {
 		/* IEEE 802.11b/g, channels 1..14 */
-		REG_RULE(2412-20, 2484+20, 40, 6, 20, 0),
+		REG_RULE(2412-10, 2484+10, 40, 6, 20, 0),
 		/* IEEE 802.11a, channels 34..48 */
-		REG_RULE(5170-20, 5240+20, 40, 6, 20,
+		REG_RULE(5170-10, 5240+10, 40, 6, 20,
 			NL80211_RRF_PASSIVE_SCAN),
 		/* IEEE 802.11a, channels 52..64 */
-		REG_RULE(5260-20, 5320+20, 40, 6, 20,
+		REG_RULE(5260-10, 5320+10, 40, 6, 20,
 			NL80211_RRF_NO_IBSS |
 			NL80211_RRF_DFS),
 	}
@@ -101,22 +101,22 @@ const struct ieee80211_regdomain eu_regdom = {
 	.alpha2 =  "EU",
 	.reg_rules = {
 		/* IEEE 802.11b/g, channels 1..13 */
-		REG_RULE(2412-20, 2472+20, 40, 6, 20, 0),
+		REG_RULE(2412-10, 2472+10, 40, 6, 20, 0),
 		/* IEEE 802.11a, channel 36 */
-		REG_RULE(5180-20, 5180+20, 40, 6, 23,
+		REG_RULE(5180-10, 5180+10, 40, 6, 23,
 			NL80211_RRF_PASSIVE_SCAN),
 		/* IEEE 802.11a, channel 40 */
-		REG_RULE(5200-20, 5200+20, 40, 6, 23,
+		REG_RULE(5200-10, 5200+10, 40, 6, 23,
 			NL80211_RRF_PASSIVE_SCAN),
 		/* IEEE 802.11a, channel 44 */
-		REG_RULE(5220-20, 5220+20, 40, 6, 23,
+		REG_RULE(5220-10, 5220+10, 40, 6, 23,
 			NL80211_RRF_PASSIVE_SCAN),
 		/* IEEE 802.11a, channels 48..64 */
-		REG_RULE(5240-20, 5320+20, 40, 6, 20,
+		REG_RULE(5240-10, 5320+10, 40, 6, 20,
 			NL80211_RRF_NO_IBSS |
 			NL80211_RRF_DFS),
 		/* IEEE 802.11a, channels 100..140 */
-		REG_RULE(5500-20, 5700+20, 40, 6, 30,
+		REG_RULE(5500-10, 5700+10, 40, 6, 30,
 			NL80211_RRF_NO_IBSS |
 			NL80211_RRF_DFS),
 	}
-- 
cgit v1.2.3-70-g09d2


From 734366deaee05b1a5842d977960b4cc574d7551d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Sep 2008 10:56:48 +0200
Subject: cfg80211: clean up regulatory mess

The recent code from Luis is an #ifdef hell and contains lots of
code that's stuffed into the wrong file making a whole bunch of
things needlessly non-static, and besides, what is it doing in
core.c??

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 144 -----------------------------
 net/wireless/reg.c  | 254 +++++++++++++++++++++++++++++++++++++++++-----------
 net/wireless/reg.h  |  35 +-------
 3 files changed, 203 insertions(+), 230 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 59e4d7debf0..88cb7339486 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -29,107 +29,6 @@ MODULE_AUTHOR("Johannes Berg");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("wireless configuration support");
 
-struct list_head regulatory_requests;
-
-/* Central wireless core regulatory domains, we only need two,
- * the current one and a world regulatory domain in case we have no
- * information to give us an alpha2 */
-struct ieee80211_regdomain *cfg80211_regdomain;
-
-/* We keep a static world regulatory domain in case of the absence of CRDA */
-const struct ieee80211_regdomain world_regdom = {
-	.n_reg_rules = 1,
-	.alpha2 =  "00",
-	.reg_rules = {
-		REG_RULE(2412-10, 2462+10, 40, 6, 20,
-			NL80211_RRF_PASSIVE_SCAN |
-			NL80211_RRF_NO_IBSS),
-	}
-};
-
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-/* All this fucking static junk will be removed soon, so
- * don't fucking count on it !@#$ */
-
-static char *ieee80211_regdom = "US";
-module_param(ieee80211_regdom, charp, 0444);
-MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
-
-/* We assume 40 MHz bandwidth for the old regulatory work.
- * We make emphasis we are using the exact same frequencies
- * as before */
-
-const struct ieee80211_regdomain us_regdom = {
-	.n_reg_rules = 6,
-	.alpha2 =  "US",
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..11 */
-		REG_RULE(2412-10, 2462+10, 40, 6, 27, 0),
-		/* IEEE 802.11a, channel 36 */
-		REG_RULE(5180-10, 5180+10, 40, 6, 23, 0),
-		/* IEEE 802.11a, channel 40 */
-		REG_RULE(5200-10, 5200+10, 40, 6, 23, 0),
-		/* IEEE 802.11a, channel 44 */
-		REG_RULE(5220-10, 5220+10, 40, 6, 23, 0),
-		/* IEEE 802.11a, channels 48..64 */
-		REG_RULE(5240-10, 5320+10, 40, 6, 23, 0),
-		/* IEEE 802.11a, channels 149..165, outdoor */
-		REG_RULE(5745-10, 5825+10, 40, 6, 30, 0),
-	}
-};
-
-const struct ieee80211_regdomain jp_regdom = {
-	.n_reg_rules = 3,
-	.alpha2 =  "JP",
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..14 */
-		REG_RULE(2412-10, 2484+10, 40, 6, 20, 0),
-		/* IEEE 802.11a, channels 34..48 */
-		REG_RULE(5170-10, 5240+10, 40, 6, 20,
-			NL80211_RRF_PASSIVE_SCAN),
-		/* IEEE 802.11a, channels 52..64 */
-		REG_RULE(5260-10, 5320+10, 40, 6, 20,
-			NL80211_RRF_NO_IBSS |
-			NL80211_RRF_DFS),
-	}
-};
-
-const struct ieee80211_regdomain eu_regdom = {
-	.n_reg_rules = 6,
-	/* This alpha2 is bogus, we leave it here just for stupid
-	 * backward compatibility */
-	.alpha2 =  "EU",
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..13 */
-		REG_RULE(2412-10, 2472+10, 40, 6, 20, 0),
-		/* IEEE 802.11a, channel 36 */
-		REG_RULE(5180-10, 5180+10, 40, 6, 23,
-			NL80211_RRF_PASSIVE_SCAN),
-		/* IEEE 802.11a, channel 40 */
-		REG_RULE(5200-10, 5200+10, 40, 6, 23,
-			NL80211_RRF_PASSIVE_SCAN),
-		/* IEEE 802.11a, channel 44 */
-		REG_RULE(5220-10, 5220+10, 40, 6, 23,
-			NL80211_RRF_PASSIVE_SCAN),
-		/* IEEE 802.11a, channels 48..64 */
-		REG_RULE(5240-10, 5320+10, 40, 6, 20,
-			NL80211_RRF_NO_IBSS |
-			NL80211_RRF_DFS),
-		/* IEEE 802.11a, channels 100..140 */
-		REG_RULE(5500-10, 5700+10, 40, 6, 30,
-			NL80211_RRF_NO_IBSS |
-			NL80211_RRF_DFS),
-	}
-};
-
-#endif
-
-struct ieee80211_regdomain *cfg80211_world_regdom =
-	(struct ieee80211_regdomain *) &world_regdom;
-
-LIST_HEAD(regulatory_requests);
-DEFINE_MUTEX(cfg80211_reg_mutex);
-
 /* RCU might be appropriate here since we usually
  * only read the list, and that can happen quite
  * often because we need to do it for each command */
@@ -514,34 +413,10 @@ static struct notifier_block cfg80211_netdev_notifier = {
 	.notifier_call = cfg80211_netdev_notifier_call,
 };
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-const struct ieee80211_regdomain *static_regdom(char *alpha2)
-{
-	if (alpha2[0] == 'U' && alpha2[1] == 'S')
-		return &us_regdom;
-	if (alpha2[0] == 'J' && alpha2[1] == 'P')
-		return &jp_regdom;
-	if (alpha2[0] == 'E' && alpha2[1] == 'U')
-		return &eu_regdom;
-	/* Default, as per the old rules */
-	return &us_regdom;
-}
-#endif
-
 static int cfg80211_init(void)
 {
 	int err;
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) static_regdom(ieee80211_regdom);
-	/* Used during reset_regdomains_static() */
-	cfg80211_world_regdom = cfg80211_regdomain;
-#else
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) cfg80211_world_regdom;
-#endif
-
 	err = wiphy_sysfs_init();
 	if (err)
 		goto out_fail_sysfs;
@@ -560,25 +435,6 @@ static int cfg80211_init(void)
 	if (err)
 		goto out_fail_reg;
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n");
-	print_regdomain_info(cfg80211_regdomain);
-	/* The old code still requests for a new regdomain and if
-	 * you have CRDA you get it updated, otherwise you get
-	 * stuck with the static values. We ignore "EU" code as
-	 * that is not a valid ISO / IEC 3166 alpha2 */
-	if (ieee80211_regdom[0] != 'E' &&
-			ieee80211_regdom[1] != 'U')
-		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
-			ieee80211_regdom, NULL);
-#else
-	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL);
-	if (err)
-		printk(KERN_ERR "cfg80211: calling CRDA failed - "
-			"unable to update world regulatory domain, "
-			"using static definition\n");
-#endif
-
 	return 0;
 
 out_fail_reg:
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 592b2e391d4..5fbeab50996 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -42,6 +42,18 @@
 #include "core.h"
 #include "reg.h"
 
+/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */
+struct regulatory_request {
+	struct list_head list;
+	struct wiphy *wiphy;
+	int granted;
+	enum reg_set_by initiator;
+	char alpha2[2];
+};
+
+static LIST_HEAD(regulatory_requests);
+DEFINE_MUTEX(cfg80211_reg_mutex);
+
 /* To trigger userspace events */
 static struct platform_device *reg_pdev;
 
@@ -51,6 +63,161 @@ static u32 supported_bandwidths[] = {
 	MHZ_TO_KHZ(20),
 };
 
+static struct list_head regulatory_requests;
+
+/* Central wireless core regulatory domains, we only need two,
+ * the current one and a world regulatory domain in case we have no
+ * information to give us an alpha2 */
+static struct ieee80211_regdomain *cfg80211_regdomain;
+
+/* We keep a static world regulatory domain in case of the absence of CRDA */
+static const struct ieee80211_regdomain world_regdom = {
+	.n_reg_rules = 1,
+	.alpha2 =  "00",
+	.reg_rules = {
+		REG_RULE(2412-10, 2462+10, 40, 6, 20,
+			NL80211_RRF_PASSIVE_SCAN |
+			NL80211_RRF_NO_IBSS),
+	}
+};
+
+static struct ieee80211_regdomain *cfg80211_world_regdom =
+	(struct ieee80211_regdomain *) &world_regdom;
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+static char *ieee80211_regdom = "US";
+module_param(ieee80211_regdom, charp, 0444);
+MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
+
+/* We assume 40 MHz bandwidth for the old regulatory work.
+ * We make emphasis we are using the exact same frequencies
+ * as before */
+
+static const struct ieee80211_regdomain us_regdom = {
+	.n_reg_rules = 6,
+	.alpha2 =  "US",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..11 */
+		REG_RULE(2412-10, 2462+10, 40, 6, 27, 0),
+		/* IEEE 802.11a, channel 36 */
+		REG_RULE(5180-10, 5180+10, 40, 6, 23, 0),
+		/* IEEE 802.11a, channel 40 */
+		REG_RULE(5200-10, 5200+10, 40, 6, 23, 0),
+		/* IEEE 802.11a, channel 44 */
+		REG_RULE(5220-10, 5220+10, 40, 6, 23, 0),
+		/* IEEE 802.11a, channels 48..64 */
+		REG_RULE(5240-10, 5320+10, 40, 6, 23, 0),
+		/* IEEE 802.11a, channels 149..165, outdoor */
+		REG_RULE(5745-10, 5825+10, 40, 6, 30, 0),
+	}
+};
+
+static const struct ieee80211_regdomain jp_regdom = {
+	.n_reg_rules = 3,
+	.alpha2 =  "JP",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..14 */
+		REG_RULE(2412-10, 2484+10, 40, 6, 20, 0),
+		/* IEEE 802.11a, channels 34..48 */
+		REG_RULE(5170-10, 5240+10, 40, 6, 20,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channels 52..64 */
+		REG_RULE(5260-10, 5320+10, 40, 6, 20,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+	}
+};
+
+static const struct ieee80211_regdomain eu_regdom = {
+	.n_reg_rules = 6,
+	/* This alpha2 is bogus, we leave it here just for stupid
+	 * backward compatibility */
+	.alpha2 =  "EU",
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..13 */
+		REG_RULE(2412-10, 2472+10, 40, 6, 20, 0),
+		/* IEEE 802.11a, channel 36 */
+		REG_RULE(5180-10, 5180+10, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channel 40 */
+		REG_RULE(5200-10, 5200+10, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channel 44 */
+		REG_RULE(5220-10, 5220+10, 40, 6, 23,
+			NL80211_RRF_PASSIVE_SCAN),
+		/* IEEE 802.11a, channels 48..64 */
+		REG_RULE(5240-10, 5320+10, 40, 6, 20,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+		/* IEEE 802.11a, channels 100..140 */
+		REG_RULE(5500-10, 5700+10, 40, 6, 30,
+			NL80211_RRF_NO_IBSS |
+			NL80211_RRF_DFS),
+	}
+};
+
+static const struct ieee80211_regdomain *static_regdom(char *alpha2)
+{
+	if (alpha2[0] == 'U' && alpha2[1] == 'S')
+		return &us_regdom;
+	if (alpha2[0] == 'J' && alpha2[1] == 'P')
+		return &jp_regdom;
+	if (alpha2[0] == 'E' && alpha2[1] == 'U')
+		return &eu_regdom;
+	/* Default, as per the old rules */
+	return &us_regdom;
+}
+
+static bool is_old_static_regdom(struct ieee80211_regdomain *rd)
+{
+	if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom)
+		return true;
+	return false;
+}
+
+/* The old crap never deals with a world regulatory domain, it only
+ * deals with the static regulatory domain passed and if possible
+ * an updated "US" or "JP" regulatory domain. We do however store the
+ * old static regulatory domain in cfg80211_world_regdom for convenience
+ * of use here */
+static void reset_regdomains_static(void)
+{
+	if (!is_old_static_regdom(cfg80211_regdomain))
+		kfree(cfg80211_regdomain);
+	/* This is setting the regdom to the old static regdom */
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+}
+#else
+static void reset_regdomains(void)
+{
+	if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) {
+		if (cfg80211_world_regdom == cfg80211_regdomain) {
+			kfree(cfg80211_regdomain);
+		} else {
+			kfree(cfg80211_world_regdom);
+			kfree(cfg80211_regdomain);
+		}
+	} else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom)
+		kfree(cfg80211_regdomain);
+
+	cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom;
+	cfg80211_regdomain = NULL;
+}
+
+/* Dynamic world regulatory domain requested by the wireless
+ * core upon initialization */
+static void update_world_regdomain(struct ieee80211_regdomain *rd)
+{
+	BUG_ON(list_empty(&regulatory_requests));
+
+	reset_regdomains();
+
+	cfg80211_world_regdom = rd;
+	cfg80211_regdomain = rd;
+}
+#endif
+
 bool is_world_regdom(char *alpha2)
 {
 	if (!alpha2)
@@ -555,58 +722,6 @@ void print_regdomain_info(struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-
-static bool is_old_static_regdom(struct ieee80211_regdomain *rd)
-{
-	if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom)
-		return true;
-	return false;
-}
-
-/* The old crap never deals with a world regulatory domain, it only
- * deals with the static regulatory domain passed and if possible
- * an updated "US" or "JP" regulatory domain. We do however store the
- * old static regulatory domain in cfg80211_world_regdom for convenience
- * of use here */
-static void reset_regdomains_static(void)
-{
-	if (!is_old_static_regdom(cfg80211_regdomain))
-		kfree(cfg80211_regdomain);
-	/* This is setting the regdom to the old static regdom */
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) cfg80211_world_regdom;
-}
-#else
-static void reset_regdomains(void)
-{
-	if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) {
-		if (cfg80211_world_regdom == cfg80211_regdomain) {
-			kfree(cfg80211_regdomain);
-		} else {
-			kfree(cfg80211_world_regdom);
-			kfree(cfg80211_regdomain);
-		}
-	} else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom)
-		kfree(cfg80211_regdomain);
-
-	cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom;
-	cfg80211_regdomain = NULL;
-}
-
-/* Dynamic world regulatory domain requested by the wireless
- * core upon initialization */
-static void update_world_regdomain(struct ieee80211_regdomain *rd)
-{
-	BUG_ON(list_empty(&regulatory_requests));
-
-	reset_regdomains();
-
-	cfg80211_world_regdom = rd;
-	cfg80211_regdomain = rd;
-}
-#endif
-
 static int __set_regdom(struct ieee80211_regdomain *rd)
 {
 	struct regulatory_request *request = NULL;
@@ -615,7 +730,7 @@ static int __set_regdom(struct ieee80211_regdomain *rd)
 
 #ifdef CONFIG_WIRELESS_OLD_REGULATORY
 	/* We ignore the world regdom with the old static regdomains setup
-	 * as there is no point to it with satic regulatory definitions :(
+	 * as there is no point to it with static regulatory definitions :(
 	 * Don't worry this shit will be removed soon... */
 	if (is_world_regdom(rd->alpha2))
 		return -EINVAL;
@@ -735,25 +850,58 @@ int set_regdom(struct ieee80211_regdomain *rd)
 
 int regulatory_init(void)
 {
+	int err;
+
 	reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
 	if (IS_ERR(reg_pdev))
 		return PTR_ERR(reg_pdev);
+
+#ifdef CONFIG_WIRELESS_OLD_REGULATORY
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) static_regdom(ieee80211_regdom);
+	/* Used during reset_regdomains_static() */
+	cfg80211_world_regdom = cfg80211_regdomain;
+
+	printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n");
+	print_regdomain_info(cfg80211_regdomain);
+	/* The old code still requests for a new regdomain and if
+	 * you have CRDA you get it updated, otherwise you get
+	 * stuck with the static values. We ignore "EU" code as
+	 * that is not a valid ISO / IEC 3166 alpha2 */
+	if (ieee80211_regdom[0] != 'E' && ieee80211_regdom[1] != 'U')
+		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
+					ieee80211_regdom, NULL);
+#else
+	cfg80211_regdomain =
+		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+
+	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL);
+	if (err)
+		printk(KERN_ERR "cfg80211: calling CRDA failed - "
+		       "unable to update world regulatory domain, "
+		       "using static definition\n");
+#endif
+
 	return 0;
 }
 
 void regulatory_exit(void)
 {
 	struct regulatory_request *req, *req_tmp;
+
 	mutex_lock(&cfg80211_drv_mutex);
+
 #ifdef CONFIG_WIRELESS_OLD_REGULATORY
 	reset_regdomains_static();
 #else
 	reset_regdomains();
 #endif
+
 	list_for_each_entry_safe(req, req_tmp, &regulatory_requests, list) {
 		list_del(&req->list);
 		kfree(req);
 	}
 	platform_device_unregister(reg_pdev);
+
 	mutex_unlock(&cfg80211_drv_mutex);
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index d75fd023297..b169815987f 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,44 +1,13 @@
 #ifndef __NET_WIRELESS_REG_H
 #define __NET_WIRELESS_REG_H
 
-extern const struct ieee80211_regdomain world_regdom;
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-extern const struct ieee80211_regdomain us_regdom;
-extern const struct ieee80211_regdomain jp_regdom;
-extern const struct ieee80211_regdomain eu_regdom;
-#endif
-
-extern struct ieee80211_regdomain *cfg80211_regdomain;
-extern struct ieee80211_regdomain *cfg80211_world_regdom;
-extern struct list_head regulatory_requests;
-
-struct regdom_last_setby {
-	struct wiphy *wiphy;
-	u8 initiator;
-};
-
-/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */
-struct regulatory_request {
-	struct list_head list;
-	struct wiphy *wiphy;
-	int granted;
-	enum reg_set_by initiator;
-	char alpha2[2];
-};
-
+extern struct mutex cfg80211_reg_mutex;
 bool is_world_regdom(char *alpha2);
 bool reg_is_valid_request(char *alpha2);
 
-int set_regdom(struct ieee80211_regdomain *rd);
-int __regulatory_hint_alpha2(struct wiphy *wiphy, enum reg_set_by set_by,
-		      const char *alpha2);
-
 int regulatory_init(void);
 void regulatory_exit(void);
 
-void print_regdomain_info(struct ieee80211_regdomain *);
-
-/* If a char is A-Z */
-#define IS_ALPHA(letter) (letter >= 65 && letter <= 90)
+int set_regdom(struct ieee80211_regdomain *rd);
 
 #endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3-70-g09d2


From a3d2eaf0dcad6dfdf44f3093aef688dfca714b6c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Sep 2008 11:10:52 +0200
Subject: cfg80211: fix regulatory code const

A few pointers and structures in the regulatory code are const,
but because it wasn't done properly a whole bunch of bogus
casts were needed to compile without warning. Mark everything
const properly to avoid that kind of junk code.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 58 ++++++++++++++++++++++++++----------------------------
 net/wireless/reg.h |  6 +++---
 2 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5fbeab50996..7aba46efc7d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -68,7 +68,7 @@ static struct list_head regulatory_requests;
 /* Central wireless core regulatory domains, we only need two,
  * the current one and a world regulatory domain in case we have no
  * information to give us an alpha2 */
-static struct ieee80211_regdomain *cfg80211_regdomain;
+static const struct ieee80211_regdomain *cfg80211_regdomain;
 
 /* We keep a static world regulatory domain in case of the absence of CRDA */
 static const struct ieee80211_regdomain world_regdom = {
@@ -81,8 +81,8 @@ static const struct ieee80211_regdomain world_regdom = {
 	}
 };
 
-static struct ieee80211_regdomain *cfg80211_world_regdom =
-	(struct ieee80211_regdomain *) &world_regdom;
+static const struct ieee80211_regdomain *cfg80211_world_regdom =
+	&world_regdom;
 
 #ifdef CONFIG_WIRELESS_OLD_REGULATORY
 static char *ieee80211_regdom = "US";
@@ -168,7 +168,7 @@ static const struct ieee80211_regdomain *static_regdom(char *alpha2)
 	return &us_regdom;
 }
 
-static bool is_old_static_regdom(struct ieee80211_regdomain *rd)
+static bool is_old_static_regdom(const struct ieee80211_regdomain *rd)
 {
 	if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom)
 		return true;
@@ -201,13 +201,13 @@ static void reset_regdomains(void)
 	} else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom)
 		kfree(cfg80211_regdomain);
 
-	cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom;
+	cfg80211_world_regdom = &world_regdom;
 	cfg80211_regdomain = NULL;
 }
 
 /* Dynamic world regulatory domain requested by the wireless
  * core upon initialization */
-static void update_world_regdomain(struct ieee80211_regdomain *rd)
+static void update_world_regdomain(const struct ieee80211_regdomain *rd)
 {
 	BUG_ON(list_empty(&regulatory_requests));
 
@@ -218,7 +218,7 @@ static void update_world_regdomain(struct ieee80211_regdomain *rd)
 }
 #endif
 
-bool is_world_regdom(char *alpha2)
+bool is_world_regdom(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
@@ -227,7 +227,7 @@ bool is_world_regdom(char *alpha2)
 	return false;
 }
 
-static bool is_alpha2_set(char *alpha2)
+static bool is_alpha2_set(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
@@ -244,7 +244,7 @@ static bool is_alpha_upper(char letter)
 	return false;
 }
 
-static bool is_unknown_alpha2(char *alpha2)
+static bool is_unknown_alpha2(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
@@ -255,7 +255,7 @@ static bool is_unknown_alpha2(char *alpha2)
 	return false;
 }
 
-static bool is_an_alpha2(char *alpha2)
+static bool is_an_alpha2(const char *alpha2)
 {
 	if (!alpha2)
 		return false;
@@ -264,7 +264,7 @@ static bool is_an_alpha2(char *alpha2)
 	return false;
 }
 
-static bool alpha2_equal(char *alpha2_x, char *alpha2_y)
+static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y)
 {
 	if (!alpha2_x || !alpha2_y)
 		return false;
@@ -274,7 +274,7 @@ static bool alpha2_equal(char *alpha2_x, char *alpha2_y)
 	return false;
 }
 
-static bool regdom_changed(char *alpha2)
+static bool regdom_changed(const char *alpha2)
 {
 	if (!cfg80211_regdomain)
 		return true;
@@ -405,7 +405,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
 	}
 }
 
-static bool __reg_is_valid_request(char *alpha2,
+static bool __reg_is_valid_request(const char *alpha2,
 	struct regulatory_request **request)
 {
 	struct regulatory_request *req;
@@ -421,16 +421,16 @@ static bool __reg_is_valid_request(char *alpha2,
 }
 
 /* Used by nl80211 before kmalloc'ing our regulatory domain */
-bool reg_is_valid_request(char *alpha2)
+bool reg_is_valid_request(const char *alpha2)
 {
 	struct regulatory_request *request = NULL;
 	return  __reg_is_valid_request(alpha2, &request);
 }
 
 /* Sanity check on a regulatory rule */
-static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule)
+static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule)
 {
-	struct ieee80211_freq_range *freq_range = &rule->freq_range;
+	const struct ieee80211_freq_range *freq_range = &rule->freq_range;
 	u32 freq_diff;
 
 	if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0)
@@ -447,9 +447,9 @@ static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule)
 	return true;
 }
 
-static bool is_valid_rd(struct ieee80211_regdomain *rd)
+static bool is_valid_rd(const struct ieee80211_regdomain *rd)
 {
-	struct ieee80211_reg_rule *reg_rule = NULL;
+	const struct ieee80211_reg_rule *reg_rule = NULL;
 	unsigned int i;
 
 	if (!rd->n_reg_rules)
@@ -661,12 +661,12 @@ unlock_and_exit:
 EXPORT_SYMBOL(regulatory_hint);
 
 
-static void print_rd_rules(struct ieee80211_regdomain *rd)
+static void print_rd_rules(const struct ieee80211_regdomain *rd)
 {
 	unsigned int i;
-	struct ieee80211_reg_rule *reg_rule = NULL;
-	struct ieee80211_freq_range *freq_range = NULL;
-	struct ieee80211_power_rule *power_rule = NULL;
+	const struct ieee80211_reg_rule *reg_rule = NULL;
+	const struct ieee80211_freq_range *freq_range = NULL;
+	const struct ieee80211_power_rule *power_rule = NULL;
 
 	printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), "
 		"(max_antenna_gain, max_eirp)\n");
@@ -696,7 +696,7 @@ static void print_rd_rules(struct ieee80211_regdomain *rd)
 	}
 }
 
-static void print_regdomain(struct ieee80211_regdomain *rd)
+static void print_regdomain(const struct ieee80211_regdomain *rd)
 {
 
 	if (is_world_regdom(rd->alpha2))
@@ -715,14 +715,14 @@ static void print_regdomain(struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
-void print_regdomain_info(struct ieee80211_regdomain *rd)
+void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
 	printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n",
 		rd->alpha2[0], rd->alpha2[1]);
 	print_rd_rules(rd);
 }
 
-static int __set_regdom(struct ieee80211_regdomain *rd)
+static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
 	struct regulatory_request *request = NULL;
 
@@ -804,7 +804,7 @@ static int __set_regdom(struct ieee80211_regdomain *rd)
  * multiple drivers can be ironed out later. Caller must've already
  * kmalloc'd the rd structure. If this calls fails you should kfree()
  * the passed rd. Caller must hold cfg80211_drv_mutex */
-int set_regdom(struct ieee80211_regdomain *rd)
+int set_regdom(const struct ieee80211_regdomain *rd)
 {
 	struct regulatory_request *this_request = NULL, *prev_request = NULL;
 	int r;
@@ -857,8 +857,7 @@ int regulatory_init(void)
 		return PTR_ERR(reg_pdev);
 
 #ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) static_regdom(ieee80211_regdom);
+	cfg80211_regdomain = static_regdom(ieee80211_regdom);
 	/* Used during reset_regdomains_static() */
 	cfg80211_world_regdom = cfg80211_regdomain;
 
@@ -872,8 +871,7 @@ int regulatory_init(void)
 		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
 					ieee80211_regdom, NULL);
 #else
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+	cfg80211_regdomain = cfg80211_world_regdom;
 
 	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL);
 	if (err)
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index b169815987f..a33362872f3 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -2,12 +2,12 @@
 #define __NET_WIRELESS_REG_H
 
 extern struct mutex cfg80211_reg_mutex;
-bool is_world_regdom(char *alpha2);
-bool reg_is_valid_request(char *alpha2);
+bool is_world_regdom(const char *alpha2);
+bool reg_is_valid_request(const char *alpha2);
 
 int regulatory_init(void);
 void regulatory_exit(void);
 
-int set_regdom(struct ieee80211_regdomain *rd);
+int set_regdom(const struct ieee80211_regdomain *rd);
 
 #endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3-70-g09d2


From 942b25cf9028e7c2f6446ee7c6618bd70dafec5f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 15 Sep 2008 11:26:47 +0200
Subject: cfg80211: clean up static regdomain mess

The statically defined regdomains are used in a very convoluted
way, use them instead to prime the information we have and then
continue operating normally.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 73 +++++++++++++++---------------------------------------
 1 file changed, 20 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 7aba46efc7d..626dbb68849 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -174,32 +174,27 @@ static bool is_old_static_regdom(const struct ieee80211_regdomain *rd)
 		return true;
 	return false;
 }
-
-/* The old crap never deals with a world regulatory domain, it only
- * deals with the static regulatory domain passed and if possible
- * an updated "US" or "JP" regulatory domain. We do however store the
- * old static regulatory domain in cfg80211_world_regdom for convenience
- * of use here */
-static void reset_regdomains_static(void)
+#else
+static inline bool is_old_static_regdom(const struct ieee80211_regdomain *rd)
 {
-	if (!is_old_static_regdom(cfg80211_regdomain))
-		kfree(cfg80211_regdomain);
-	/* This is setting the regdom to the old static regdom */
-	cfg80211_regdomain =
-		(struct ieee80211_regdomain *) cfg80211_world_regdom;
+	return false;
 }
-#else
+#endif
+
 static void reset_regdomains(void)
 {
-	if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) {
-		if (cfg80211_world_regdom == cfg80211_regdomain) {
-			kfree(cfg80211_regdomain);
-		} else {
-			kfree(cfg80211_world_regdom);
-			kfree(cfg80211_regdomain);
-		}
-	} else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom)
-		kfree(cfg80211_regdomain);
+	/* avoid freeing static information or freeing something twice */
+	if (cfg80211_regdomain == cfg80211_world_regdom)
+		cfg80211_regdomain = NULL;
+	if (cfg80211_world_regdom == &world_regdom)
+		cfg80211_world_regdom = NULL;
+	if (cfg80211_regdomain == &world_regdom)
+		cfg80211_regdomain = NULL;
+	if (is_old_static_regdom(cfg80211_regdomain))
+		cfg80211_regdomain = NULL;
+
+	kfree(cfg80211_regdomain);
+	kfree(cfg80211_world_regdom);
 
 	cfg80211_world_regdom = &world_regdom;
 	cfg80211_regdomain = NULL;
@@ -216,7 +211,6 @@ static void update_world_regdomain(const struct ieee80211_regdomain *rd)
 	cfg80211_world_regdom = rd;
 	cfg80211_regdomain = rd;
 }
-#endif
 
 bool is_world_regdom(const char *alpha2)
 {
@@ -297,12 +291,8 @@ static int call_crda(const char *alpha2)
 		printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n",
 			alpha2[0], alpha2[1]);
 	else
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-		return -EINVAL;
-#else
 		printk(KERN_INFO "cfg80211: Calling CRDA to update world "
 			"regulatory domain\n");
-#endif
 
 	country_env[8] = alpha2[0];
 	country_env[9] = alpha2[1];
@@ -728,20 +718,12 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 
 	/* Some basic sanity checks first */
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	/* We ignore the world regdom with the old static regdomains setup
-	 * as there is no point to it with static regulatory definitions :(
-	 * Don't worry this shit will be removed soon... */
-	if (is_world_regdom(rd->alpha2))
-		return -EINVAL;
-#else
 	if (is_world_regdom(rd->alpha2)) {
 		if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request)))
 			return -EINVAL;
 		update_world_regdomain(rd);
 		return 0;
 	}
-#endif
 
 	if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) &&
 			!is_unknown_alpha2(rd->alpha2))
@@ -750,15 +732,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	if (list_empty(&regulatory_requests))
 		return -EINVAL;
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	/* Static "US" and "JP" will be overridden, but just once */
+	/* allow overriding the static definitions if CRDA is present */
 	if (!is_old_static_regdom(cfg80211_regdomain) &&
-			!regdom_changed(rd->alpha2))
-		return -EINVAL;
-#else
-	if (!regdom_changed(rd->alpha2))
+	    !regdom_changed(rd->alpha2))
 		return -EINVAL;
-#endif
 
 	/* Now lets set the regulatory domain, update all driver channels
 	 * and finally inform them of what we have done, in case they want
@@ -768,11 +745,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request)))
 		return -EINVAL;
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	reset_regdomains_static();
-#else
 	reset_regdomains();
-#endif
 
 	/* Country IE parsing coming soon */
 	switch (request->initiator) {
@@ -858,10 +831,8 @@ int regulatory_init(void)
 
 #ifdef CONFIG_WIRELESS_OLD_REGULATORY
 	cfg80211_regdomain = static_regdom(ieee80211_regdom);
-	/* Used during reset_regdomains_static() */
-	cfg80211_world_regdom = cfg80211_regdomain;
 
-	printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n");
+	printk(KERN_INFO "cfg80211: Using static regulatory domain info\n");
 	print_regdomain_info(cfg80211_regdomain);
 	/* The old code still requests for a new regdomain and if
 	 * you have CRDA you get it updated, otherwise you get
@@ -889,11 +860,7 @@ void regulatory_exit(void)
 
 	mutex_lock(&cfg80211_drv_mutex);
 
-#ifdef CONFIG_WIRELESS_OLD_REGULATORY
-	reset_regdomains_static();
-#else
 	reset_regdomains();
-#endif
 
 	list_for_each_entry_safe(req, req_tmp, &regulatory_requests, list) {
 		list_del(&req->list);
-- 
cgit v1.2.3-70-g09d2


From 133b822638ff01eb1e32e1917b197c40ed095ddd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Sep 2008 14:18:59 +0200
Subject: mac80211: make master iface not wireless

There's no need to register the master netdev with cfg80211,
in fact, this is quite dangerous and lead to having to add
checks for the master interface all over the config handlers.
This patch removes the "ieee80211_ptr" from the master iface
in favour of having a small netdev_priv() associated with
the master interface that stores the ieee80211_local pointer.
Because of this, a lot of code in the configuration handlers
can go away. To make this patch easier to verify I have also
removed a number of wiphy_priv() calls in favour of getting
the sdata first and then the local pointer from that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 59 ----------------------------------------------
 net/mac80211/debugfs_sta.c |  3 +--
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/main.c        | 19 ++++++++-------
 net/mac80211/mesh.c        |  2 +-
 net/mac80211/rx.c          | 25 ++++++++------------
 net/mac80211/tx.c          | 26 ++++++++++----------
 7 files changed, 39 insertions(+), 99 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e2574885db4..a8501f14b16 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -82,7 +82,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 				  enum nl80211_iftype type, u32 *flags,
 				  struct vif_params *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
 	struct ieee80211_sub_if_data *sdata;
 	int ret;
@@ -95,9 +94,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	if (!nl80211_type_check(type))
 		return -EINVAL;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	ret = ieee80211_if_change_type(sdata, type);
@@ -120,16 +116,12 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 			     u8 key_idx, u8 *mac_addr,
 			     struct key_params *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta = NULL;
 	enum ieee80211_key_alg alg;
 	struct ieee80211_key *key;
 	int err;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	switch (params->cipher) {
@@ -174,14 +166,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 			     u8 key_idx, u8 *mac_addr)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	int ret;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
@@ -222,7 +210,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 			     void (*callback)(void *cookie,
 					      struct key_params *params))
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta = NULL;
 	u8 seq[6] = {0};
@@ -232,9 +219,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	u16 iv16;
 	int err = -ENOENT;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	rcu_read_lock();
@@ -310,12 +294,8 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
 					u8 key_idx)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	rcu_read_lock();
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -496,13 +476,9 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
 				struct beacon_parameters *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP)
@@ -519,13 +495,9 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
 				struct beacon_parameters *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP)
@@ -541,13 +513,9 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
 
 static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct beacon_data *old;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP)
@@ -695,9 +663,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 	int err;
 
-	if (dev == local->mdev || params->vlan == local->mdev)
-		return -EOPNOTSUPP;
-
 	/* Prevent a race with changing the rate control algorithm */
 	if (!netif_running(dev))
 		return -ENETDOWN;
@@ -752,9 +717,6 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (mac) {
@@ -786,9 +748,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *vlansdata;
 
-	if (dev == local->mdev || params->vlan == local->mdev)
-		return -EOPNOTSUPP;
-
 	rcu_read_lock();
 
 	/* XXX: get sta belonging to dev */
@@ -828,9 +787,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 	struct sta_info *sta;
 	int err;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
@@ -884,9 +840,6 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
@@ -958,13 +911,9 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 			       u8 *dst, u8 *next_hop, struct mpath_info *pinfo)
 
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
@@ -986,13 +935,9 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 				 int idx, u8 *dst, u8 *next_hop,
 				 struct mpath_info *pinfo)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
@@ -1015,13 +960,9 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct bss_parameters *params)
 {
-	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
 	u32 changed = 0;
 
-	if (dev == local->mdev)
-		return -EOPNOTSUPP;
-
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 81f350eaf8a..b9902e425f0 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -173,8 +173,7 @@ static ssize_t sta_agg_status_write(struct file *file,
 		const char __user *user_buf, size_t count, loff_t *ppos)
 {
 	struct sta_info *sta = file->private_data;
-	struct net_device *dev = sta->sdata->dev;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sta->sdata->local;
 	struct ieee80211_hw *hw = &local->hw;
 	u8 *da = sta->sta.addr;
 	static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3912fba6d3d..0b25b0f46b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -573,6 +573,10 @@ enum {
 /* maximum number of hardware queues we support. */
 #define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
 
+struct ieee80211_master_priv {
+	struct ieee80211_local *local;
+};
+
 struct ieee80211_local {
 	/* embed the driver visible part.
 	 * don't cast (use the static inlines below), but we keep
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c307dba7ec0..7d2d5a041e2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -106,7 +106,8 @@ static const struct header_ops ieee80211_header_ops = {
 
 static int ieee80211_master_open(struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
+	struct ieee80211_local *local = mpriv->local;
 	struct ieee80211_sub_if_data *sdata;
 	int res = -EOPNOTSUPP;
 
@@ -128,7 +129,8 @@ static int ieee80211_master_open(struct net_device *dev)
 
 static int ieee80211_master_stop(struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
+	struct ieee80211_local *local = mpriv->local;
 	struct ieee80211_sub_if_data *sdata;
 
 	/* we hold the RTNL here so can safely walk the list */
@@ -141,7 +143,8 @@ static int ieee80211_master_stop(struct net_device *dev)
 
 static void ieee80211_master_set_multicast_list(struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
+	struct ieee80211_local *local = mpriv->local;
 
 	ieee80211_configure_filter(local);
 }
@@ -787,7 +790,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	int result;
 	enum ieee80211_band band;
 	struct net_device *mdev;
-	struct wireless_dev *mwdev;
+	struct ieee80211_master_priv *mpriv;
 
 	/*
 	 * generic code guarantees at least one band,
@@ -829,16 +832,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (hw->queues < 4)
 		hw->ampdu_queues = 0;
 
-	mdev = alloc_netdev_mq(sizeof(struct wireless_dev),
+	mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv),
 			       "wmaster%d", ether_setup,
 			       ieee80211_num_queues(hw));
 	if (!mdev)
 		goto fail_mdev_alloc;
 
-	mwdev = netdev_priv(mdev);
-	mdev->ieee80211_ptr = mwdev;
-	mwdev->wiphy = local->hw.wiphy;
-
+	mpriv = netdev_priv(mdev);
+	mpriv->local = local;
 	local->mdev = mdev;
 
 	ieee80211_rx_bss_list_init(local);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 30cf891fd3a..8013277924f 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -351,7 +351,7 @@ static void ieee80211_mesh_path_timer(unsigned long data)
 	struct ieee80211_sub_if_data *sdata =
 		(struct ieee80211_sub_if_data *) data;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	struct ieee80211_local *local = wdev_priv(&sdata->wdev);
+	struct ieee80211_local *local = sdata->local;
 
 	queue_work(local->hw.workqueue, &ifmsh->work);
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 92d898b901e..3ab9670f180 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -650,32 +650,28 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	return result;
 }
 
-static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta)
+static void ap_sta_ps_start(struct sta_info *sta)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	DECLARE_MAC_BUF(mac);
 
-	sdata = sta->sdata;
-
 	atomic_inc(&sdata->bss->num_sta_ps);
 	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n",
-	       dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
+	       sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 }
 
-static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
+static int ap_sta_ps_end(struct sta_info *sta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	int sent = 0;
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_tx_info *info;
 	DECLARE_MAC_BUF(mac);
 
-	sdata = sta->sdata;
-
 	atomic_dec(&sdata->bss->num_sta_ps);
 
 	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
@@ -685,7 +681,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n",
-	       dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
+	       sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 
 	/* Send all buffered frames to the station */
@@ -701,7 +697,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta)
 		sent++;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "%s: STA %s aid %d send PS frame "
-		       "since STA not sleeping anymore\n", dev->name,
+		       "since STA not sleeping anymore\n", sdata->dev->name,
 		       print_mac(mac, sta->sta.addr), sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 		info->flags |= IEEE80211_TX_CTL_REQUEUE;
@@ -715,7 +711,6 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 {
 	struct sta_info *sta = rx->sta;
-	struct net_device *dev = rx->dev;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 
 	if (!sta)
@@ -757,10 +752,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		 * exchange sequence */
 		if (test_sta_flags(sta, WLAN_STA_PS) &&
 		    !ieee80211_has_pm(hdr->frame_control))
-			rx->sent_ps_buffered += ap_sta_ps_end(dev, sta);
+			rx->sent_ps_buffered += ap_sta_ps_end(sta);
 		else if (!test_sta_flags(sta, WLAN_STA_PS) &&
 			 ieee80211_has_pm(hdr->frame_control))
-			ap_sta_ps_start(dev, sta);
+			ap_sta_ps_start(sta);
 	}
 
 	/* Drop data::nullfunc frames silently, since they are used only to
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 20d683641b4..00d798cc9e0 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -165,11 +165,10 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	return cpu_to_le16(dur);
 }
 
-static int inline is_ieee80211_device(struct net_device *dev,
-				      struct net_device *master)
+static int inline is_ieee80211_device(struct ieee80211_local *local,
+				      struct net_device *dev)
 {
-	return (wdev_priv(dev->ieee80211_ptr) ==
-		wdev_priv(master->ieee80211_ptr));
+	return local == wdev_priv(dev->ieee80211_ptr);
 }
 
 /* tx handlers */
@@ -1001,14 +1000,14 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 /*
  * NB: @tx is uninitialised when passed in here
  */
-static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
-				struct sk_buff *skb,
-				struct net_device *mdev)
+static int ieee80211_tx_prepare(struct ieee80211_local *local,
+				struct ieee80211_tx_data *tx,
+				struct sk_buff *skb)
 {
 	struct net_device *dev;
 
 	dev = dev_get_by_index(&init_net, skb->iif);
-	if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
+	if (unlikely(dev && !is_ieee80211_device(local, dev))) {
 		dev_put(dev);
 		dev = NULL;
 	}
@@ -1258,6 +1257,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 int ieee80211_master_start_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
+	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
+	struct ieee80211_local *local = mpriv->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct net_device *odev = NULL;
@@ -1273,7 +1274,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 
 	if (skb->iif)
 		odev = dev_get_by_index(&init_net, skb->iif);
-	if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
+	if (unlikely(odev && !is_ieee80211_device(local, odev))) {
 		dev_put(odev);
 		odev = NULL;
 	}
@@ -1449,8 +1450,8 @@ fail:
 int ieee80211_subif_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	int ret = 1, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
 	__le16 fc;
@@ -1462,7 +1463,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	struct sta_info *sta;
 	u32 sta_flags = 0;
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (unlikely(skb->len < ETH_HLEN)) {
 		ret = 0;
 		goto fail;
@@ -2032,7 +2032,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 		}
 
-		if (!ieee80211_tx_prepare(&tx, skb, local->mdev))
+		if (!ieee80211_tx_prepare(local, &tx, skb))
 			break;
 		dev_kfree_skb_any(skb);
 	}
-- 
cgit v1.2.3-70-g09d2


From 60719ffd721f6764b7d07ca188c0d944a4330b69 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Sep 2008 14:55:09 +0200
Subject: cfg80211: show interface type

This patch makes cfg80211 show the interface in the nl80211
information about a specific interface. API users are required
to keep the type updated (everything else is fairly complicated)
but you will get a warning if you fail to keep it updated.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 6 ++++--
 include/net/wireless.h | 2 ++
 net/mac80211/iface.c   | 1 +
 net/wireless/core.c    | 2 ++
 net/wireless/nl80211.c | 6 +++++-
 5 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9f40c4d417d..0e85ec39b63 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -363,11 +363,13 @@ struct wiphy;
  * wireless extensions but this is subject to reevaluation as soon as this
  * code is used more widely and we have a first user without wext.
  *
- * @add_virtual_intf: create a new virtual interface with the given name
+ * @add_virtual_intf: create a new virtual interface with the given name,
+ *	must set the struct wireless_dev's iftype.
  *
  * @del_virtual_intf: remove the virtual interface determined by ifindex.
  *
- * @change_virtual_intf: change type of virtual interface
+ * @change_virtual_intf: change type/configuration of virtual interface,
+ *	keep the struct wireless_dev's iftype updated.
  *
  * @add_key: add a key with the given parameters. @mac_addr will be %NULL
  *	when adding a group key.
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 0de7fde9f33..721efb363db 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -223,9 +223,11 @@ struct wiphy {
  * the netdev.)
  *
  * @wiphy: pointer to hardware description
+ * @iftype: interface type
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
+	enum nl80211_iftype iftype;
 
 	/* private to the generic wireless code */
 	struct list_head list;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index a72fbebb8ea..b5cd91e8971 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -625,6 +625,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	/* and set some type-dependent values */
 	sdata->vif.type = type;
 	sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit;
+	sdata->wdev.iftype = type;
 
 	/* only monitor differs */
 	sdata->dev->type = ARPHRD_ETHER;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 88cb7339486..d6940085d59 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -384,6 +384,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 
 	rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
 
+	WARN_ON(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_UNSPECIFIED);
+
 	switch (state) {
 	case NETDEV_REGISTER:
 		mutex_lock(&rdev->devlist_mtx);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1221d726ed5..44771a690d5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -299,7 +299,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
 	NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name);
-	/* TODO: interface type */
+	NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype);
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -453,6 +453,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 				  &flags);
 	err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex,
 					    type, err ? NULL : &flags, &params);
+
+	dev = __dev_get_by_index(&init_net, ifindex);
+	WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type));
+
 	rtnl_unlock();
 
  unlock:
-- 
cgit v1.2.3-70-g09d2


From 723b038def23ce0606754c4f598cbb96bae9a102 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Sep 2008 20:22:09 +0200
Subject: cfg80211: allow set_interface without type

Which then causes no type change.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 44771a690d5..a745932d137 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -422,19 +422,20 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&params, 0, sizeof(params));
 
-	if (info->attrs[NL80211_ATTR_IFTYPE]) {
-		type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
-		if (type > NL80211_IFTYPE_MAX)
-			return -EINVAL;
-	} else
-		return -EINVAL;
-
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
 		return err;
 	ifindex = dev->ifindex;
+	type = dev->ieee80211_ptr->iftype;
 	dev_put(dev);
 
+	err = -EINVAL;
+	if (info->attrs[NL80211_ATTR_IFTYPE]) {
+		type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
+		if (type > NL80211_IFTYPE_MAX)
+			goto unlock;
+	}
+
 	if (!drv->ops->change_virtual_intf ||
 	    !(drv->wiphy.interface_modes & (1 << type))) {
 		err = -EOPNOTSUPP;
-- 
cgit v1.2.3-70-g09d2


From f8b25cdad719cddceb9cf0d350065b3e59e74219 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Sep 2008 20:22:21 +0200
Subject: mac80211: allow interface settings changes only when down

We currently allow monitor flags changes and mesh ID changes when
the interface is up, which can lead to trouble. Change it to only
allow when down.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a8501f14b16..89a183c2327 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -100,6 +100,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 	if (ret)
 		return ret;
 
+	if (netif_running(sdata->dev))
+		return -EBUSY;
+
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
 		ieee80211_sdata_set_mesh_id(sdata,
 					    params->mesh_id_len,
-- 
cgit v1.2.3-70-g09d2


From 92ffe055c3ea45856183bebed62f8880f75fef3b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 16 Sep 2008 20:39:36 +0200
Subject: cfg80211: reject invalid configuration items

Reject configuring mesh-id for non-mesh, monitor flags for non-monitor.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a745932d137..572793c8c7a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -418,7 +418,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	int err, ifindex;
 	enum nl80211_iftype type;
 	struct net_device *dev;
-	u32 flags;
+	u32 _flags, *flags = NULL;
 
 	memset(&params, 0, sizeof(params));
 
@@ -442,18 +442,28 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	if (type == NL80211_IFTYPE_MESH_POINT &&
-	    info->attrs[NL80211_ATTR_MESH_ID]) {
+	if (info->attrs[NL80211_ATTR_MESH_ID]) {
+		if (type != NL80211_IFTYPE_MESH_POINT) {
+			err = -EINVAL;
+			goto unlock;
+		}
 		params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
 		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
 	}
 
+	if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
+		if (type != NL80211_IFTYPE_MONITOR) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
+					  &_flags);
+		if (!err)
+			flags = &_flags;
+	}
 	rtnl_lock();
-	err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
-				  info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
-				  &flags);
 	err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex,
-					    type, err ? NULL : &flags, &params);
+					    type, flags, &params);
 
 	dev = __dev_get_by_index(&init_net, ifindex);
 	WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type));
-- 
cgit v1.2.3-70-g09d2


From 79617deeebb9cf089e2bc2aad19743b1209043f6 Mon Sep 17 00:00:00 2001
From: YanBo <dreamfly281@gmail.com>
Date: Mon, 22 Sep 2008 13:30:32 +0800
Subject: mac80211: mesh portal functionality support

Currently the mesh code doesn't support bridging mesh point interfaces
with wired ethernet or AP to construct an MPP or MAP. This patch adds
code to support the "6 address frame format packet" functionality to
mesh point interfaces. Now the mesh network can be used as backhaul
for end to end communication.

Signed-off-by: Li YanBo <dreamfly281@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h   |   5 ++
 net/mac80211/mesh.h         |   4 ++
 net/mac80211/mesh_pathtbl.c | 127 +++++++++++++++++++++++++++++++++++++++++++-
 net/mac80211/rx.c           |  32 +++++++++--
 net/mac80211/tx.c           |  44 ++++++++++++---
 5 files changed, 201 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index abc1abc63bf..14126bc3664 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -471,6 +471,11 @@ struct ieee80211s_hdr {
 	u8 eaddr3[6];
 } __attribute__ ((packed));
 
+/* Mesh flags */
+#define MESH_FLAGS_AE_A4 	0x1
+#define MESH_FLAGS_AE_A5_A6	0x2
+#define MESH_FLAGS_PS_DEEP	0x4
+
 /**
  * struct ieee80211_quiet_ie
  *
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 8ee414a0447..e10471c6ba4 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -71,6 +71,7 @@ enum mesh_path_flags {
  */
 struct mesh_path {
 	u8 dst[ETH_ALEN];
+	u8 mpp[ETH_ALEN];	/* used for MPP or MAP */
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *next_hop;
 	struct timer_list timer;
@@ -226,6 +227,9 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata);
 struct mesh_path *mesh_path_lookup(u8 *dst,
 		struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mpp_path_lookup(u8 *dst,
+				  struct ieee80211_sub_if_data *sdata);
+int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata);
 struct mesh_path *mesh_path_lookup_by_idx(int idx,
 		struct ieee80211_sub_if_data *sdata);
 void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index e4fa2905fad..3c72557df45 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -36,6 +36,7 @@ struct mpath_node {
 };
 
 static struct mesh_table *mesh_paths;
+static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */
 
 /* This lock will have the grow table function as writer and add / delete nodes
  * as readers. When reading the table (i.e. doing lookups) we are well protected
@@ -94,6 +95,34 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
 	return NULL;
 }
 
+struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
+{
+	struct mesh_path *mpath;
+	struct hlist_node *n;
+	struct hlist_head *bucket;
+	struct mesh_table *tbl;
+	struct mpath_node *node;
+
+	tbl = rcu_dereference(mpp_paths);
+
+	bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)];
+	hlist_for_each_entry_rcu(node, n, bucket, list) {
+		mpath = node->mpath;
+		if (mpath->sdata == sdata &&
+		    memcmp(dst, mpath->dst, ETH_ALEN) == 0) {
+			if (MPATH_EXPIRED(mpath)) {
+				spin_lock_bh(&mpath->state_lock);
+				if (MPATH_EXPIRED(mpath))
+					mpath->flags &= ~MESH_PATH_ACTIVE;
+				spin_unlock_bh(&mpath->state_lock);
+			}
+			return mpath;
+		}
+	}
+	return NULL;
+}
+
+
 /**
  * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index
  * @idx: index
@@ -226,6 +255,91 @@ err_path_alloc:
 }
 
 
+int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
+{
+	struct mesh_path *mpath, *new_mpath;
+	struct mpath_node *node, *new_node;
+	struct hlist_head *bucket;
+	struct hlist_node *n;
+	int grow = 0;
+	int err = 0;
+	u32 hash_idx;
+
+
+	if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0)
+		/* never add ourselves as neighbours */
+		return -ENOTSUPP;
+
+	if (is_multicast_ether_addr(dst))
+		return -ENOTSUPP;
+
+	err = -ENOMEM;
+	new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL);
+	if (!new_mpath)
+		goto err_path_alloc;
+
+	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
+	if (!new_node)
+		goto err_node_alloc;
+
+	read_lock(&pathtbl_resize_lock);
+	memcpy(new_mpath->dst, dst, ETH_ALEN);
+	memcpy(new_mpath->mpp, mpp, ETH_ALEN);
+	new_mpath->sdata = sdata;
+	new_mpath->flags = 0;
+	skb_queue_head_init(&new_mpath->frame_queue);
+	new_node->mpath = new_mpath;
+	new_mpath->exp_time = jiffies;
+	spin_lock_init(&new_mpath->state_lock);
+
+	hash_idx = mesh_table_hash(dst, sdata, mpp_paths);
+	bucket = &mpp_paths->hash_buckets[hash_idx];
+
+	spin_lock(&mpp_paths->hashwlock[hash_idx]);
+
+	err = -EEXIST;
+	hlist_for_each_entry(node, n, bucket, list) {
+		mpath = node->mpath;
+		if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0)
+			goto err_exists;
+	}
+
+	hlist_add_head_rcu(&new_node->list, bucket);
+	if (atomic_inc_return(&mpp_paths->entries) >=
+		mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1))
+		grow = 1;
+
+	spin_unlock(&mpp_paths->hashwlock[hash_idx]);
+	read_unlock(&pathtbl_resize_lock);
+	if (grow) {
+		struct mesh_table *oldtbl, *newtbl;
+
+		write_lock(&pathtbl_resize_lock);
+		oldtbl = mpp_paths;
+		newtbl = mesh_table_grow(mpp_paths);
+		if (!newtbl) {
+			write_unlock(&pathtbl_resize_lock);
+			return 0;
+		}
+		rcu_assign_pointer(mpp_paths, newtbl);
+		write_unlock(&pathtbl_resize_lock);
+
+		synchronize_rcu();
+		mesh_table_free(oldtbl, false);
+	}
+	return 0;
+
+err_exists:
+	spin_unlock(&mpp_paths->hashwlock[hash_idx]);
+	read_unlock(&pathtbl_resize_lock);
+	kfree(new_node);
+err_node_alloc:
+	kfree(new_mpath);
+err_path_alloc:
+	return err;
+}
+
+
 /**
  * mesh_plink_broken - deactivates paths and sends perr when a link breaks
  *
@@ -475,11 +589,21 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl)
 int mesh_pathtbl_init(void)
 {
 	mesh_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER);
+	if (!mesh_paths)
+		return -ENOMEM;
 	mesh_paths->free_node = &mesh_path_node_free;
 	mesh_paths->copy_node = &mesh_path_node_copy;
 	mesh_paths->mean_chain_len = MEAN_CHAIN_LEN;
-	if (!mesh_paths)
+
+	mpp_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER);
+	if (!mpp_paths) {
+		mesh_table_free(mesh_paths, true);
 		return -ENOMEM;
+	}
+	mpp_paths->free_node = &mesh_path_node_free;
+	mpp_paths->copy_node = &mesh_path_node_copy;
+	mpp_paths->mean_chain_len = MEAN_CHAIN_LEN;
+
 	return 0;
 }
 
@@ -511,4 +635,5 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
 void mesh_pathtbl_unregister(void)
 {
 	mesh_table_free(mesh_paths, true);
+	mesh_table_free(mpp_paths, true);
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3ab9670f180..2efa4dd47b5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1107,10 +1107,6 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if (ieee80211_vif_is_mesh(&sdata->vif))
-		hdrlen += ieee80211_get_mesh_hdrlen(
-				(struct ieee80211s_hdr *) (skb->data + hdrlen));
-
 	/* convert IEEE 802.11 header + possible LLC headers into Ethernet
 	 * header
 	 * IEEE 802.11 address fields:
@@ -1134,6 +1130,15 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 		if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS &&
 			     sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
 			return -1;
+		if (ieee80211_vif_is_mesh(&sdata->vif)) {
+			struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *)
+				(skb->data + hdrlen);
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
+				memcpy(dst, meshdr->eaddr1, ETH_ALEN);
+				memcpy(src, meshdr->eaddr2, ETH_ALEN);
+			}
+		}
 		break;
 	case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS):
 		if (sdata->vif.type != NL80211_IFTYPE_STATION ||
@@ -1393,6 +1398,25 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		/* illegal frame */
 		return RX_DROP_MONITOR;
 
+	if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6){
+		struct ieee80211_sub_if_data *sdata;
+		struct mesh_path *mppath;
+
+		sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
+		rcu_read_lock();
+		mppath = mpp_path_lookup(mesh_hdr->eaddr2, sdata);
+		if (!mppath) {
+			mpp_path_add(mesh_hdr->eaddr2, hdr->addr4, sdata);
+		} else {
+			spin_lock_bh(&mppath->state_lock);
+			mppath->exp_time = jiffies;
+			if (compare_ether_addr(mppath->mpp, hdr->addr4) != 0)
+				memcpy(mppath->mpp, hdr->addr4, ETH_ALEN);
+			spin_unlock_bh(&mppath->state_lock);
+		}
+		rcu_read_unlock();
+	}
+
 	if (compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0)
 		return RX_CONTINUE;
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 00d798cc9e0..00d96e63dce 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1498,18 +1498,50 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 #ifdef CONFIG_MAC80211_MESH
 	case NL80211_IFTYPE_MESH_POINT:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
-		/* RA TA DA SA */
-		memset(hdr.addr1, 0, ETH_ALEN);
-		memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(hdr.addr3, skb->data, ETH_ALEN);
-		memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
 		if (!sdata->u.mesh.mshcfg.dot11MeshTTL) {
 			/* Do not send frames with mesh_ttl == 0 */
 			sdata->u.mesh.mshstats.dropped_frames_ttl++;
 			ret = 0;
 			goto fail;
 		}
-		meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata);
+		memset(&mesh_hdr, 0, sizeof(mesh_hdr));
+
+		if (compare_ether_addr(dev->dev_addr,
+					  skb->data + ETH_ALEN) == 0) {
+			/* RA TA DA SA */
+			memset(hdr.addr1, 0, ETH_ALEN);
+			memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+			memcpy(hdr.addr3, skb->data, ETH_ALEN);
+			memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata);
+		} else {
+			/* packet from other interface */
+			struct mesh_path *mppath;
+
+			memset(hdr.addr1, 0, ETH_ALEN);
+			memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN);
+			memcpy(hdr.addr4, dev->dev_addr, ETH_ALEN);
+
+			if (is_multicast_ether_addr(skb->data))
+				memcpy(hdr.addr3, skb->data, ETH_ALEN);
+			else {
+				rcu_read_lock();
+				mppath = mpp_path_lookup(skb->data, sdata);
+				if (mppath)
+					memcpy(hdr.addr3, mppath->mpp, ETH_ALEN);
+				else
+					memset(hdr.addr3, 0xff, ETH_ALEN);
+				rcu_read_unlock();
+			}
+
+			mesh_hdr.flags |= MESH_FLAGS_AE_A5_A6;
+			mesh_hdr.ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
+			put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &mesh_hdr.seqnum);
+			memcpy(mesh_hdr.eaddr1, skb->data, ETH_ALEN);
+			memcpy(mesh_hdr.eaddr2, skb->data + ETH_ALEN, ETH_ALEN);
+			sdata->u.mesh.mesh_seqnum++;
+			meshhdrlen = 18;
+		}
 		hdrlen = 30;
 		break;
 #endif
-- 
cgit v1.2.3-70-g09d2


From 2ff6a6d4e92270283432690adf53a7e5ab186d19 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 18 Sep 2008 12:24:20 +0200
Subject: mac80211: fix mesh action frame handling

When I split off the action frame handling I made the code drop
all action frames we don't want to handle. This is wrong since
some action frames are actually handled via rx_h_mgmt through
being queued to the sta/mesh implementations.

Thanks to Li YanBo for noticing the problem.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Li YanBo <dreamfly281@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2efa4dd47b5..c489865761b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1557,7 +1557,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	 */
 	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
 	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
-		return RX_DROP_MONITOR;
+		return RX_CONTINUE;
 
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
-- 
cgit v1.2.3-70-g09d2


From 4b7679a561e552eeda1e3567119bef2bca99b66e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 18 Sep 2008 18:14:18 +0200
Subject: mac80211: clean up rate control API

Long awaited, hard work. This patch totally cleans up the rate control
API to remove the requirement to include internal headers outside of
net/mac80211/.

There's one internal use in the PID algorithm left for mesh networking,
we'll have to figure out a way to clean that one up and decide how to
do the peer link evaluation, possibly independent of the rate control
algorithm or via new API.

Additionally, ath9k is left using the cross-inclusion hack for now, we
will add new API where necessary to make this work properly, but right
now I'm not expert enough to do it. It's still off better than before.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/rc.c             |  98 +++++++--------
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c  | 182 ++++++----------------------
 drivers/net/wireless/iwlwifi/iwl-3945-rs.h  |   9 --
 drivers/net/wireless/iwlwifi/iwl-3945.h     |   4 +
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c   | 146 +++++++++-------------
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  11 +-
 include/net/mac80211.h                      |  68 +++++++++++
 net/mac80211/cfg.c                          |   2 +-
 net/mac80211/ieee80211_i.h                  |   2 +
 net/mac80211/main.c                         |   4 +-
 net/mac80211/mlme.c                         |   4 +-
 net/mac80211/rate.c                         |  71 ++++++++---
 net/mac80211/rate.h                         | 102 ++++------------
 net/mac80211/rc80211_pid.h                  |   2 -
 net/mac80211/rc80211_pid_algo.c             | 158 +++++++++---------------
 net/mac80211/sta_info.c                     |  17 +--
 net/mac80211/sta_info.h                     |   2 +-
 net/mac80211/tx.c                           |   5 +-
 18 files changed, 373 insertions(+), 514 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index 1cc9daf4455..cca2fc5b076 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -20,6 +20,7 @@
  */
 
 #include "core.h"
+/* FIXME: remove this include! */
 #include "../net/mac80211/rate.h"
 
 static u32 tx_triglevel_max;
@@ -1812,20 +1813,18 @@ static void ath_rc_sib_init(struct ath_rate_node *ath_rc_priv)
 }
 
 
-static void ath_setup_rates(struct ieee80211_local *local, struct sta_info *sta)
+static void ath_setup_rates(struct ath_softc *sc,
+			    struct ieee80211_supported_band *sband,
+			    struct ieee80211_sta *sta,
+			    struct ath_rate_node *rc_priv)
 
 {
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_hw *hw = local_to_hw(local);
-	struct ath_softc *sc = hw->priv;
-	struct ath_rate_node *rc_priv = sta->rate_ctrl_priv;
 	int i, j = 0;
 
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
 
-	sband =  local->hw.wiphy->bands[local->hw.conf.channel->band];
 	for (i = 0; i < sband->n_bitrates; i++) {
-		if (sta->sta.supp_rates[local->hw.conf.channel->band] & BIT(i)) {
+		if (sta->supp_rates[sband->band] & BIT(i)) {
 			rc_priv->neg_rates.rs_rates[j]
 				= (sband->bitrates[i].bitrate * 2) / 10;
 			j++;
@@ -1852,19 +1851,17 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv)
 }
 
 /* Rate Control callbacks */
-static void ath_tx_status(void *priv, struct net_device *dev,
+static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband,
+			  struct ieee80211_sta *sta, void *priv_sta,
 			  struct sk_buff *skb)
 {
 	struct ath_softc *sc = priv;
 	struct ath_tx_info_priv *tx_info_priv;
 	struct ath_node *an;
-	struct sta_info *sta;
-	struct ieee80211_local *local;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr;
 	__le16 fc;
 
-	local = hw_to_local(sc->hw);
 	hdr = (struct ieee80211_hdr *)skb->data;
 	fc = hdr->frame_control;
 	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
@@ -1873,8 +1870,7 @@ static void ath_tx_status(void *priv, struct net_device *dev,
 	an = ath_node_find(sc, hdr->addr1);
 	spin_unlock_bh(&sc->node_lock);
 
-	sta = sta_info_get(local, hdr->addr1);
-	if (!an || !sta || !ieee80211_is_data(fc)) {
+	if (!an || !priv_sta || !ieee80211_is_data(fc)) {
 		if (tx_info->driver_data[0] != NULL) {
 			kfree(tx_info->driver_data[0]);
 			tx_info->driver_data[0] = NULL;
@@ -1882,24 +1878,22 @@ static void ath_tx_status(void *priv, struct net_device *dev,
 		return;
 	}
 	if (tx_info->driver_data[0] != NULL) {
-		ath_rate_tx_complete(sc, an, sta->rate_ctrl_priv, tx_info_priv);
+		ath_rate_tx_complete(sc, an, priv_sta, tx_info_priv);
 		kfree(tx_info->driver_data[0]);
 		tx_info->driver_data[0] = NULL;
 	}
 }
 
 static void ath_tx_aggr_resp(struct ath_softc *sc,
-			     struct sta_info *sta,
+			     struct ieee80211_supported_band *sband,
+			     struct ieee80211_sta *sta,
 			     struct ath_node *an,
 			     u8 tidno)
 {
-	struct ieee80211_hw *hw = sc->hw;
-	struct ieee80211_local *local;
 	struct ath_atx_tid *txtid;
-	struct ieee80211_supported_band *sband;
 	u16 buffersize = 0;
 	int state;
-	DECLARE_MAC_BUF(mac);
+	struct sta_info *si;
 
 	if (!(sc->sc_flags & SC_OP_TXAGGR))
 		return;
@@ -1908,11 +1902,16 @@ static void ath_tx_aggr_resp(struct ath_softc *sc,
 	if (!txtid->paused)
 		return;
 
-	local = hw_to_local(sc->hw);
-	sband = hw->wiphy->bands[hw->conf.channel->band];
+	/*
+	 * XXX: This is entirely busted, we aren't supposed to
+	 *	access the sta from here because it's internal
+	 *	to mac80211, and looking at the state without
+	 *	locking is wrong too.
+	 */
+	si = container_of(sta, struct sta_info, sta);
 	buffersize = IEEE80211_MIN_AMPDU_BUF <<
 		sband->ht_info.ampdu_factor; /* FIXME */
-	state = sta->ampdu_mlme.tid_state_tx[tidno];
+	state = si->ampdu_mlme.tid_state_tx[tidno];
 
 	if (state & HT_ADDBA_RECEIVED_MSK) {
 		txtid->addba_exchangecomplete = 1;
@@ -1928,18 +1927,15 @@ static void ath_tx_aggr_resp(struct ath_softc *sc,
 	}
 }
 
-static void ath_get_rate(void *priv, struct net_device *dev,
-			 struct ieee80211_supported_band *sband,
-			 struct sk_buff *skb,
-			 struct rate_selection *sel)
+static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta,
+			 struct sk_buff *skb, struct rate_selection *sel)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct sta_info *sta;
-	struct ath_softc *sc = (struct ath_softc *)priv;
+	struct ath_softc *sc = priv;
 	struct ieee80211_hw *hw = sc->hw;
 	struct ath_tx_info_priv *tx_info_priv;
-	struct ath_rate_node *ath_rc_priv;
+	struct ath_rate_node *ath_rc_priv = priv_sta;
 	struct ath_node *an;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	int is_probe = FALSE, chk, ret;
@@ -1955,8 +1951,7 @@ static void ath_get_rate(void *priv, struct net_device *dev,
 	ASSERT(tx_info->driver_data[0] != NULL);
 	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
 
-	sta = sta_info_get(local, hdr->addr1);
-	lowest_idx = rate_lowest_index(local, sband, sta);
+	lowest_idx = rate_lowest_index(sband, sta);
 	tx_info_priv->min_rate = (sband->bitrates[lowest_idx].bitrate * 2) / 10;
 	/* lowest rate for management and multicast/broadcast frames */
 	if (!ieee80211_is_data(fc) ||
@@ -1965,8 +1960,6 @@ static void ath_get_rate(void *priv, struct net_device *dev,
 		return;
 	}
 
-	ath_rc_priv = sta->rate_ctrl_priv;
-
 	/* Find tx rate for unicast frames */
 	ath_rate_findrate(sc, ath_rc_priv,
 			  ATH_11N_TXMAXTRY, 4,
@@ -1975,8 +1968,7 @@ static void ath_get_rate(void *priv, struct net_device *dev,
 			  &is_probe,
 			  false);
 	if (is_probe)
-		sel->probe_idx = ((struct ath_tx_ratectrl *)
-				  sta->rate_ctrl_priv)->probe_rate;
+		sel->probe_idx = ath_rc_priv->tx_ratectrl.probe_rate;
 
 	/* Ratecontrol sometimes returns invalid rate index */
 	if (tx_info_priv->rcs[0].rix != 0xff)
@@ -2020,37 +2012,31 @@ static void ath_get_rate(void *priv, struct net_device *dev,
 						__func__,
 						print_mac(mac, hdr->addr1));
 			} else if (chk == AGGR_EXCHANGE_PROGRESS)
-				ath_tx_aggr_resp(sc, sta, an, tid);
+				ath_tx_aggr_resp(sc, sband, sta, an, tid);
 		}
 	}
 }
 
-static void ath_rate_init(void *priv, void *priv_sta,
-			  struct ieee80211_local *local,
-			  struct sta_info *sta)
+static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
+                          struct ieee80211_sta *sta, void *priv_sta)
 {
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_hw *hw = local_to_hw(local);
-	struct ieee80211_conf *conf = &local->hw.conf;
-	struct ath_softc *sc = hw->priv;
+	struct ath_softc *sc = priv;
 	struct ath_rate_node *ath_rc_priv = priv_sta;
 	int i, j = 0;
 
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	ath_setup_rates(local, sta);
-	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
+	ath_setup_rates(sc, sband, sta, ath_rc_priv);
+	if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
 		for (i = 0; i < MCS_SET_SIZE; i++) {
-			if (conf->ht_conf.supp_mcs_set[i/8] & (1<<(i%8)))
+			if (sc->hw->conf.ht_conf.supp_mcs_set[i/8] & (1<<(i%8)))
 				ath_rc_priv->neg_ht_rates.rs_rates[j++] = i;
 			if (j == ATH_RATE_MAX)
 				break;
 		}
 		ath_rc_priv->neg_ht_rates.rs_nrates = j;
 	}
-	ath_rc_node_update(hw, priv_sta);
+	ath_rc_node_update(sc->hw, priv_sta);
 }
 
 static void ath_rate_clear(void *priv)
@@ -2058,13 +2044,12 @@ static void ath_rate_clear(void *priv)
 	return;
 }
 
-static void *ath_rate_alloc(struct ieee80211_local *local)
+static void *ath_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
-	struct ieee80211_hw *hw = local_to_hw(local);
 	struct ath_softc *sc = hw->priv;
 
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
-	return local->hw.priv;
+	return hw->priv;
 }
 
 static void ath_rate_free(void *priv)
@@ -2072,7 +2057,7 @@ static void ath_rate_free(void *priv)
 	return;
 }
 
-static void *ath_rate_alloc_sta(void *priv, gfp_t gfp)
+static void *ath_rate_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 {
 	struct ath_softc *sc = priv;
 	struct ath_vap *avp = sc->sc_vaps[0];
@@ -2092,7 +2077,8 @@ static void *ath_rate_alloc_sta(void *priv, gfp_t gfp)
 	return rate_priv;
 }
 
-static void ath_rate_free_sta(void *priv, void *priv_sta)
+static void ath_rate_free_sta(void *priv, struct ieee80211_sta *sta,
+			      void *priv_sta)
 {
 	struct ath_rate_node *rate_priv = priv_sta;
 	struct ath_softc *sc = priv;
@@ -2111,7 +2097,7 @@ static struct rate_control_ops ath_rate_ops = {
 	.alloc = ath_rate_alloc,
 	.free = ath_rate_free,
 	.alloc_sta = ath_rate_alloc_sta,
-	.free_sta = ath_rate_free_sta
+	.free_sta = ath_rate_free_sta,
 };
 
 int ath_rate_control_register(void)
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index a279bf1dc9b..6fc5e7361f2 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -36,8 +36,6 @@
 
 #include <linux/workqueue.h>
 
-#include "../net/mac80211/rate.h"
-
 #include "iwl-3945.h"
 
 #define RS_NAME "iwl-3945-rs"
@@ -319,10 +317,10 @@ static void iwl3945_collect_tx_data(struct iwl3945_rs_sta *rs_sta,
 	}
 }
 
-static void rs_rate_init(void *priv_rate, void *priv_sta,
-			 struct ieee80211_local *local, struct sta_info *sta)
+static void rs_rate_init(void *priv, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta)
 {
-	struct iwl3945_rs_sta *rs_sta = (void *)sta->rate_ctrl_priv;
+	struct iwl3945_rs_sta *rs_sta = priv_sta;
 	int i;
 
 	IWL_DEBUG_RATE("enter\n");
@@ -333,22 +331,22 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	 * after assoc.. */
 
 	for (i = IWL_RATE_COUNT - 1; i >= 0; i--) {
-		if (sta->sta.supp_rates[local->hw.conf.channel->band] & (1 << i)) {
+		if (sta->supp_rates[sband->band] & (1 << i)) {
 			rs_sta->last_txrate_idx = i;
 			break;
 		}
 	}
 
 	/* For 5 GHz band it start at IWL_FIRST_OFDM_RATE */
-	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
+	if (sband->band == IEEE80211_BAND_5GHZ)
 		rs_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
 
 	IWL_DEBUG_RATE("leave\n");
 }
 
-static void *rs_alloc(struct ieee80211_local *local)
+static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
-	return local->hw.priv;
+	return hw->priv;
 }
 
 /* rate scale requires free function to be implemented */
@@ -356,17 +354,24 @@ static void rs_free(void *priv)
 {
 	return;
 }
+
 static void rs_clear(void *priv)
 {
 	return;
 }
 
 
-static void *rs_alloc_sta(void *priv, gfp_t gfp)
+static void *rs_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 {
 	struct iwl3945_rs_sta *rs_sta;
+	struct iwl3945_sta_priv *psta = (void *) sta->drv_priv;
 	int i;
 
+	/*
+	 * XXX: If it's using sta->drv_priv anyway, it might
+	 *	as well just put all the information there.
+	 */
+
 	IWL_DEBUG_RATE("enter\n");
 
 	rs_sta = kzalloc(sizeof(struct iwl3945_rs_sta), gfp);
@@ -375,6 +380,8 @@ static void *rs_alloc_sta(void *priv, gfp_t gfp)
 		return NULL;
 	}
 
+	psta->rs_sta = rs_sta;
+
 	spin_lock_init(&rs_sta->lock);
 
 	rs_sta->start_rate = IWL_RATE_INVALID;
@@ -400,10 +407,14 @@ static void *rs_alloc_sta(void *priv, gfp_t gfp)
 	return rs_sta;
 }
 
-static void rs_free_sta(void *priv, void *priv_sta)
+static void rs_free_sta(void *priv, struct ieee80211_sta *sta,
+			void *priv_sta)
 {
+	struct iwl3945_sta_priv *psta = (void *) sta->drv_priv;
 	struct iwl3945_rs_sta *rs_sta = priv_sta;
 
+	psta->rs_sta = NULL;
+
 	IWL_DEBUG_RATE("enter\n");
 	del_timer_sync(&rs_sta->rate_scale_flush);
 	kfree(rs_sta);
@@ -445,26 +456,19 @@ static int rs_adjust_next_rate(struct iwl3945_priv *priv, int rate)
  * NOTE: Uses iwl3945_priv->retry_rate for the # of retries attempted by
  * the hardware for each rate.
  */
-static void rs_tx_status(void *priv_rate,
-			 struct net_device *dev,
+static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta,
 			 struct sk_buff *skb)
 {
 	u8 retries, current_count;
 	int scale_rate_index, first_index, last_index;
 	unsigned long flags;
-	struct sta_info *sta;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct iwl3945_rs_sta *rs_sta;
-	struct ieee80211_supported_band *sband;
+	struct iwl3945_rs_sta *rs_sta = priv_sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	IWL_DEBUG_RATE("enter\n");
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-
 	retries = info->status.retry_count;
 	first_index = sband->bitrates[info->tx_rate_idx].hw_value;
 	if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) {
@@ -472,17 +476,11 @@ static void rs_tx_status(void *priv_rate,
 		return;
 	}
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-	if (!sta || !sta->rate_ctrl_priv) {
-		rcu_read_unlock();
+	if (!priv_sta) {
 		IWL_DEBUG_RATE("leave: No STA priv data to update!\n");
 		return;
 	}
 
-	rs_sta = (void *)sta->rate_ctrl_priv;
-
 	rs_sta->tx_packets++;
 
 	scale_rate_index = first_index;
@@ -549,8 +547,6 @@ static void rs_tx_status(void *priv_rate,
 
 	spin_unlock_irqrestore(&rs_sta->lock, flags);
 
-	rcu_read_unlock();
-
 	IWL_DEBUG_RATE("leave\n");
 
 	return;
@@ -634,16 +630,15 @@ static u16 iwl3945_get_adjacent_rate(struct iwl3945_rs_sta *rs_sta,
  * rate table and must reference the driver allocated rate table
  *
  */
-static void rs_get_rate(void *priv_rate, struct net_device *dev,
-			struct ieee80211_supported_band *sband,
-			struct sk_buff *skb,
-			struct rate_selection *sel)
+static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
+			struct ieee80211_sta *sta, void *priv_sta,
+			struct sk_buff *skb, struct rate_selection *sel)
 {
 	u8 low = IWL_RATE_INVALID;
 	u8 high = IWL_RATE_INVALID;
 	u16 high_low;
 	int index;
-	struct iwl3945_rs_sta *rs_sta;
+	struct iwl3945_rs_sta *rs_sta = priv_sta;
 	struct iwl3945_rate_scale_data *window = NULL;
 	int current_tpt = IWL_INV_TPT;
 	int low_tpt = IWL_INV_TPT;
@@ -651,34 +646,25 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	u32 fail_count;
 	s8 scale_action = 0;
 	unsigned long flags;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct sta_info *sta;
 	u16 fc, rate_mask;
-	struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate;
+	struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_r;
 	DECLARE_MAC_BUF(mac);
 
 	IWL_DEBUG_RATE("enter\n");
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-
 	/* Send management frames and broadcast/multicast data using lowest
 	 * rate. */
 	fc = le16_to_cpu(hdr->frame_control);
 	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
 	    is_multicast_ether_addr(hdr->addr1) ||
-	    !sta || !sta->rate_ctrl_priv) {
+	    !sta || !priv_sta) {
 		IWL_DEBUG_RATE("leave: No STA priv data to update!\n");
-		sel->rate_idx = rate_lowest_index(local, sband, sta);
-		rcu_read_unlock();
+		sel->rate_idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
-	rs_sta = (void *)sta->rate_ctrl_priv;
-
-	rate_mask = sta->sta.supp_rates[sband->band];
+	rate_mask = sta->supp_rates[sband->band];
 	index = min(rs_sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1);
 
 	if (sband->band == IEEE80211_BAND_5GHZ)
@@ -811,8 +797,6 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 	else
 		sel->rate_idx = rs_sta->last_txrate_idx;
 
-	rcu_read_unlock();
-
 	IWL_DEBUG_RATE("leave: %d\n", index);
 }
 
@@ -829,114 +813,28 @@ static struct rate_control_ops rs_ops = {
 	.free_sta = rs_free_sta,
 };
 
-int iwl3945_fill_rs_info(struct ieee80211_hw *hw, char *buf, u8 sta_id)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct iwl3945_priv *priv = hw->priv;
-	struct iwl3945_rs_sta *rs_sta;
-	struct sta_info *sta;
-	unsigned long flags;
-	int count = 0, i;
-	u32 samples = 0, success = 0, good = 0;
-	unsigned long now = jiffies;
-	u32 max_time = 0;
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, priv->stations[sta_id].sta.sta.addr);
-	if (!sta || !sta->rate_ctrl_priv) {
-		if (sta)
-			IWL_DEBUG_RATE("leave - no private rate data!\n");
-		else
-			IWL_DEBUG_RATE("leave - no station!\n");
-		rcu_read_unlock();
-		return sprintf(buf, "station %d not found\n", sta_id);
-	}
-
-	rs_sta = (void *)sta->rate_ctrl_priv;
-	spin_lock_irqsave(&rs_sta->lock, flags);
-	i = IWL_RATE_54M_INDEX;
-	while (1) {
-		u64 mask;
-		int j;
-
-		count +=
-		    sprintf(&buf[count], " %2dMbs: ", iwl3945_rates[i].ieee / 2);
-
-		mask = (1ULL << (IWL_RATE_MAX_WINDOW - 1));
-		for (j = 0; j < IWL_RATE_MAX_WINDOW; j++, mask >>= 1)
-			buf[count++] =
-			    (rs_sta->win[i].data & mask) ? '1' : '0';
-
-		samples += rs_sta->win[i].counter;
-		good += rs_sta->win[i].success_counter;
-		success += rs_sta->win[i].success_counter *
-						iwl3945_rates[i].ieee;
-
-		if (rs_sta->win[i].stamp) {
-			int delta =
-			    jiffies_to_msecs(now - rs_sta->win[i].stamp);
-
-			if (delta > max_time)
-				max_time = delta;
-
-			count += sprintf(&buf[count], "%5dms\n", delta);
-		} else
-			buf[count++] = '\n';
-
-		j = iwl3945_get_prev_ieee_rate(i);
-		if (j == i)
-			break;
-		i = j;
-	}
-	spin_unlock_irqrestore(&rs_sta->lock, flags);
-	rcu_read_unlock();
-
-	/* Display the average rate of all samples taken.
-	 *
-	 * NOTE:  We multiple # of samples by 2 since the IEEE measurement
-	 * added from iwl3945_rates is actually 2X the rate */
-	if (samples)
-		count += sprintf(
-			&buf[count],
-			"\nAverage rate is %3d.%02dMbs over last %4dms\n"
-			"%3d%% success (%d good packets over %d tries)\n",
-			success / (2 * samples), (success * 5 / samples) % 10,
-			max_time, good * 100 / samples, good, samples);
-	else
-		count += sprintf(&buf[count], "\nAverage rate: 0Mbs\n");
-
-	return count;
-}
-
 void iwl3945_rate_scale_init(struct ieee80211_hw *hw, s32 sta_id)
 {
 	struct iwl3945_priv *priv = hw->priv;
 	s32 rssi = 0;
 	unsigned long flags;
-	struct ieee80211_local *local = hw_to_local(hw);
 	struct iwl3945_rs_sta *rs_sta;
-	struct sta_info *sta;
+	struct ieee80211_sta *sta;
+	struct iwl3945_sta_priv *psta;
 
 	IWL_DEBUG_RATE("enter\n");
 
-	if (!local->rate_ctrl->ops->name ||
-	    strcmp(local->rate_ctrl->ops->name, RS_NAME)) {
-		IWL_WARNING("iwl-3945-rs not selected as rate control algo!\n");
-		IWL_DEBUG_RATE("leave - mac80211 picked the wrong RC algo.\n");
-		return;
-	}
-
 	rcu_read_lock();
 
-	sta = sta_info_get(local, priv->stations[sta_id].sta.sta.addr);
-	if (!sta || !sta->rate_ctrl_priv) {
+	sta = ieee80211_find_sta(hw, priv->stations[sta_id].sta.sta.addr);
+	psta = (void *) sta->drv_priv;
+	if (!sta || !psta) {
 		IWL_DEBUG_RATE("leave - no private rate data!\n");
 		rcu_read_unlock();
 		return;
 	}
 
-	rs_sta = (void *)sta->rate_ctrl_priv;
+	rs_sta = psta->rs_sta;
 
 	spin_lock_irqsave(&rs_sta->lock, flags);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h
index f085d330bdc..98b17ae6ef2 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h
@@ -175,15 +175,6 @@ static inline u8 iwl3945_get_prev_ieee_rate(u8 rate_index)
 	return rate;
 }
 
-/**
- * iwl3945_fill_rs_info - Fill an output text buffer with the rate representation
- *
- * NOTE:  This is provided as a quick mechanism for a user to visualize
- * the performance of the rate control algorithm and is not meant to be
- * parsed software.
- */
-extern int iwl3945_fill_rs_info(struct ieee80211_hw *, char *buf, u8 sta_id);
-
 /**
  * iwl3945_rate_scale_init - Initialize the rate scale table based on assoc info
  *
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h
index 2a4933b5fb6..bdd32475b99 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@ -73,6 +73,10 @@ extern struct pci_device_id iwl3945_hw_card_ids[];
 extern int iwl3945_param_hwcrypto;
 extern int iwl3945_param_queues_num;
 
+struct iwl3945_sta_priv {
+	struct iwl3945_rs_sta *rs_sta;
+};
+
 enum iwl3945_antenna {
 	IWL_ANTENNA_DIVERSITY,
 	IWL_ANTENNA_MAIN,
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index 8b57b390c8b..93944de923c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -35,8 +35,6 @@
 
 #include <linux/workqueue.h>
 
-#include "../net/mac80211/rate.h"
-
 #include "iwl-dev.h"
 #include "iwl-sta.h"
 #include "iwl-core.h"
@@ -169,9 +167,9 @@ struct iwl_lq_sta {
 };
 
 static void rs_rate_scale_perform(struct iwl_priv *priv,
-				   struct net_device *dev,
 				   struct ieee80211_hdr *hdr,
-				   struct sta_info *sta);
+				   struct ieee80211_sta *sta,
+				   struct iwl_lq_sta *lq_sta);
 static void rs_fill_link_cmd(const struct iwl_priv *priv,
 			     struct iwl_lq_sta *lq_sta, u32 rate_n_flags);
 
@@ -357,20 +355,20 @@ static u32 rs_tl_get_load(struct iwl_lq_sta *lq_data, u8 tid)
 
 static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv,
 				      struct iwl_lq_sta *lq_data, u8 tid,
-				      struct sta_info *sta)
+				      struct ieee80211_sta *sta)
 {
 	DECLARE_MAC_BUF(mac);
 
 	if (rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) {
 		IWL_DEBUG_HT("Starting Tx agg: STA: %s tid: %d\n",
-				print_mac(mac, sta->sta.addr), tid);
-		ieee80211_start_tx_ba_session(priv->hw, sta->sta.addr, tid);
+				print_mac(mac, sta->addr), tid);
+		ieee80211_start_tx_ba_session(priv->hw, sta->addr, tid);
 	}
 }
 
 static void rs_tl_turn_on_agg(struct iwl_priv *priv, u8 tid,
 			      struct iwl_lq_sta *lq_data,
-			      struct sta_info *sta)
+			      struct ieee80211_sta *sta)
 {
 	if ((tid < TID_MAX_LOAD_COUNT))
 		rs_tl_turn_on_agg_for_tid(priv, lq_data, tid, sta);
@@ -770,7 +768,8 @@ out:
 /*
  * mac80211 sends us Tx status
  */
-static void rs_tx_status(void *priv_rate, struct net_device *dev,
+static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta,
 			 struct sk_buff *skb)
 {
 	int status;
@@ -778,11 +777,9 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	int rs_index, index = 0;
 	struct iwl_lq_sta *lq_sta;
 	struct iwl_link_quality_cmd *table;
-	struct sta_info *sta;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct iwl_priv *priv = (struct iwl_priv *)priv_rate;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hw *hw = local_to_hw(local);
+	struct iwl_priv *priv = (struct iwl_priv *)priv_r;
+	struct ieee80211_hw *hw = priv->hw;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl_rate_scale_data *window = NULL;
 	struct iwl_rate_scale_data *search_win = NULL;
@@ -808,15 +805,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	if (retries > 15)
 		retries = 15;
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-
-	if (!sta || !sta->rate_ctrl_priv)
-		goto out;
-
-
-	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
+	lq_sta = (struct iwl_lq_sta *)priv_sta;
 
 	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
 	    !lq_sta->ibss_sta_added)
@@ -962,9 +951,8 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev,
 	}
 
 	/* See if there's a better rate or modulation mode to try. */
-	rs_rate_scale_perform(priv, dev, hdr, sta);
+	rs_rate_scale_perform(priv, hdr, sta, lq_sta);
 out:
-	rcu_read_unlock();
 	return;
 }
 
@@ -1140,7 +1128,7 @@ static s32 rs_get_best_rate(struct iwl_priv *priv,
 static int rs_switch_to_mimo2(struct iwl_priv *priv,
 			     struct iwl_lq_sta *lq_sta,
 			     struct ieee80211_conf *conf,
-			     struct sta_info *sta,
+			     struct ieee80211_sta *sta,
 			     struct iwl_scale_tbl_info *tbl, int index)
 {
 	u16 rate_mask;
@@ -1148,10 +1136,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 	s8 is_green = lq_sta->is_green;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->sta.ht_info.ht_supported)
+	    !sta->ht_info.ht_supported)
 		return -1;
 
-	if (((sta->sta.ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
+	if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
 						== WLAN_HT_CAP_SM_PS_STATIC)
 		return -1;
 
@@ -1208,7 +1196,7 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 static int rs_switch_to_siso(struct iwl_priv *priv,
 			     struct iwl_lq_sta *lq_sta,
 			     struct ieee80211_conf *conf,
-			     struct sta_info *sta,
+			     struct ieee80211_sta *sta,
 			     struct iwl_scale_tbl_info *tbl, int index)
 {
 	u16 rate_mask;
@@ -1216,7 +1204,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv,
 	s32 rate;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->sta.ht_info.ht_supported)
+	    !sta->ht_info.ht_supported)
 		return -1;
 
 	IWL_DEBUG_RATE("LQ: try to switch to SISO\n");
@@ -1268,7 +1256,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv,
 static int rs_move_legacy_other(struct iwl_priv *priv,
 				struct iwl_lq_sta *lq_sta,
 				struct ieee80211_conf *conf,
-				struct sta_info *sta,
+				struct ieee80211_sta *sta,
 				int index)
 {
 	struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]);
@@ -1376,7 +1364,7 @@ out:
 static int rs_move_siso_to_other(struct iwl_priv *priv,
 				 struct iwl_lq_sta *lq_sta,
 				 struct ieee80211_conf *conf,
-				 struct sta_info *sta, int index)
+				 struct ieee80211_sta *sta, int index)
 {
 	u8 is_green = lq_sta->is_green;
 	struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]);
@@ -1487,7 +1475,7 @@ static int rs_move_siso_to_other(struct iwl_priv *priv,
 static int rs_move_mimo_to_other(struct iwl_priv *priv,
 				 struct iwl_lq_sta *lq_sta,
 				 struct ieee80211_conf *conf,
-				 struct sta_info *sta, int index)
+				 struct ieee80211_sta *sta, int index)
 {
 	s8 is_green = lq_sta->is_green;
 	struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]);
@@ -1680,12 +1668,11 @@ static void rs_stay_in_table(struct iwl_lq_sta *lq_sta)
  * Do rate scaling and search for new modulation mode.
  */
 static void rs_rate_scale_perform(struct iwl_priv *priv,
-				  struct net_device *dev,
 				  struct ieee80211_hdr *hdr,
-				  struct sta_info *sta)
+				  struct ieee80211_sta *sta,
+				  struct iwl_lq_sta *lq_sta)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hw *hw = local_to_hw(local);
+	struct ieee80211_hw *hw = priv->hw;
 	struct ieee80211_conf *conf = &hw->conf;
 	int low = IWL_RATE_INVALID;
 	int high = IWL_RATE_INVALID;
@@ -1700,7 +1687,6 @@ static void rs_rate_scale_perform(struct iwl_priv *priv,
 	__le16 fc;
 	u16 rate_mask;
 	u8 update_lq = 0;
-	struct iwl_lq_sta *lq_sta;
 	struct iwl_scale_tbl_info *tbl, *tbl1;
 	u16 rate_scale_index_msk = 0;
 	u32 rate;
@@ -1721,11 +1707,10 @@ static void rs_rate_scale_perform(struct iwl_priv *priv,
 		return;
 	}
 
-	if (!sta || !sta->rate_ctrl_priv)
+	if (!sta || !lq_sta)
 		return;
 
-	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
-	lq_sta->supp_rates = sta->sta.supp_rates[lq_sta->band];
+	lq_sta->supp_rates = sta->supp_rates[lq_sta->band];
 
 	tid = rs_tl_add_packet(lq_sta, hdr);
 
@@ -2064,9 +2049,9 @@ out:
 
 static void rs_initialize_lq(struct iwl_priv *priv,
 			     struct ieee80211_conf *conf,
-			     struct sta_info *sta)
+			     struct ieee80211_sta *sta,
+			     struct iwl_lq_sta *lq_sta)
 {
-	struct iwl_lq_sta *lq_sta;
 	struct iwl_scale_tbl_info *tbl;
 	int rate_idx;
 	int i;
@@ -2075,10 +2060,9 @@ static void rs_initialize_lq(struct iwl_priv *priv,
 	u8 active_tbl = 0;
 	u8 valid_tx_ant;
 
-	if (!sta || !sta->rate_ctrl_priv)
+	if (!sta || !lq_sta)
 		goto out;
 
-	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
 	i = lq_sta->last_txrate_idx;
 
 	if ((lq_sta->lq.sta_id == 0xff) &&
@@ -2119,37 +2103,30 @@ static void rs_initialize_lq(struct iwl_priv *priv,
 	return;
 }
 
-static void rs_get_rate(void *priv_rate, struct net_device *dev,
-			struct ieee80211_supported_band *sband,
-			struct sk_buff *skb,
-			struct rate_selection *sel)
+static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
+			struct ieee80211_sta *sta, void *priv_sta,
+			struct sk_buff *skb, struct rate_selection *sel)
 {
 
 	int i;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_conf *conf = &local->hw.conf;
+	struct iwl_priv *priv = (struct iwl_priv *)priv_r;
+	struct ieee80211_conf *conf = &priv->hw->conf;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct sta_info *sta;
 	__le16 fc;
-	struct iwl_priv *priv = (struct iwl_priv *)priv_rate;
 	struct iwl_lq_sta *lq_sta;
 
 	IWL_DEBUG_RATE_LIMIT("rate scale calculate new rate for skb\n");
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-
 	/* Send management frames and broadcast/multicast data using lowest
 	 * rate. */
 	fc = hdr->frame_control;
 	if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) ||
-	    !sta || !sta->rate_ctrl_priv) {
-		sel->rate_idx = rate_lowest_index(local, sband, sta);
-		goto out;
+	    !sta || !priv_sta) {
+		sel->rate_idx = rate_lowest_index(sband, sta);
+		return;
 	}
 
-	lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv;
+	lq_sta = (struct iwl_lq_sta *)priv_sta;
 	i = lq_sta->last_txrate_idx;
 
 	if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) &&
@@ -2167,23 +2144,22 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev,
 			lq_sta->lq.sta_id = sta_id;
 			lq_sta->lq.rs_table[0].rate_n_flags = 0;
 			lq_sta->ibss_sta_added = 1;
-			rs_initialize_lq(priv, conf, sta);
+			rs_initialize_lq(priv, conf, sta, lq_sta);
 		}
 	}
 
 	if ((i < 0) || (i > IWL_RATE_COUNT)) {
-		sel->rate_idx = rate_lowest_index(local, sband, sta);
-		goto out;
+		sel->rate_idx = rate_lowest_index(sband, sta);
+		return;
 	}
 
 	if (sband->band == IEEE80211_BAND_5GHZ)
 		i -= IWL_FIRST_OFDM_RATE;
 	sel->rate_idx = i;
-out:
-	rcu_read_unlock();
 }
 
-static void *rs_alloc_sta(void *priv_rate, gfp_t gfp)
+static void *rs_alloc_sta(void *priv_rate, struct ieee80211_sta *sta,
+			  gfp_t gfp)
 {
 	struct iwl_lq_sta *lq_sta;
 	struct iwl_priv *priv;
@@ -2206,20 +2182,16 @@ static void *rs_alloc_sta(void *priv_rate, gfp_t gfp)
 	return lq_sta;
 }
 
-static void rs_rate_init(void *priv_rate, void *priv_sta,
-			 struct ieee80211_local *local,
-			 struct sta_info *sta)
+static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta)
 {
 	int i, j;
-	struct ieee80211_conf *conf = &local->hw.conf;
-	struct ieee80211_supported_band *sband;
-	struct iwl_priv *priv = (struct iwl_priv *)priv_rate;
+	struct iwl_priv *priv = (struct iwl_priv *)priv_r;
+	struct ieee80211_conf *conf = &priv->hw->conf;
 	struct iwl_lq_sta *lq_sta = priv_sta;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
 	lq_sta->flush_timer = 0;
-	lq_sta->supp_rates = sta->sta.supp_rates[sband->band];
+	lq_sta->supp_rates = sta->supp_rates[sband->band];
 	for (j = 0; j < LQ_SIZE; j++)
 		for (i = 0; i < IWL_RATE_COUNT; i++)
 			rs_rate_scale_clear_window(&lq_sta->lq_info[j].win[i]);
@@ -2232,17 +2204,17 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 
 	lq_sta->ibss_sta_added = 0;
 	if (priv->iw_mode == NL80211_IFTYPE_AP) {
-		u8 sta_id = iwl_find_station(priv, sta->sta.addr);
+		u8 sta_id = iwl_find_station(priv, sta->addr);
 		DECLARE_MAC_BUF(mac);
 
 		/* for IBSS the call are from tasklet */
 		IWL_DEBUG_RATE("LQ: ADD station %s\n",
-			     print_mac(mac, sta->sta.addr));
+			     print_mac(mac, sta->addr));
 
 		if (sta_id == IWL_INVALID_STATION) {
 			IWL_DEBUG_RATE("LQ: ADD station %s\n",
-				       print_mac(mac, sta->sta.addr));
-			sta_id = iwl_add_station_flags(priv, sta->sta.addr,
+				       print_mac(mac, sta->addr));
+			sta_id = iwl_add_station_flags(priv, sta->addr,
 							0, CMD_ASYNC, NULL);
 		}
 		if ((sta_id != IWL_INVALID_STATION)) {
@@ -2256,11 +2228,11 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	/* Find highest tx rate supported by hardware and destination station */
 	lq_sta->last_txrate_idx = 3;
 	for (i = 0; i < sband->n_bitrates; i++)
-		if (sta->sta.supp_rates[sband->band] & BIT(i))
+		if (sta->supp_rates[sband->band] & BIT(i))
 			lq_sta->last_txrate_idx = i;
 
 	/* For MODE_IEEE80211A, skip over cck rates in global rate table */
-	if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
+	if (sband->band == IEEE80211_BAND_5GHZ)
 		lq_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE;
 
 	lq_sta->is_dup = 0;
@@ -2301,7 +2273,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta,
 	lq_sta->tx_agg_tid_en = IWL_AGG_ALL_TID;
 	lq_sta->drv = priv;
 
-	rs_initialize_lq(priv, conf, sta);
+	rs_initialize_lq(priv, conf, sta, lq_sta);
 }
 
 static void rs_fill_link_cmd(const struct iwl_priv *priv,
@@ -2423,9 +2395,9 @@ static void rs_fill_link_cmd(const struct iwl_priv *priv,
 	lq_cmd->agg_params.agg_time_limit = cpu_to_le16(4000);
 }
 
-static void *rs_alloc(struct ieee80211_local *local)
+static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
-	return local->hw.priv;
+	return hw->priv;
 }
 /* rate scale requires free function to be implemented */
 static void rs_free(void *priv_rate)
@@ -2446,12 +2418,12 @@ static void rs_clear(void *priv_rate)
 #endif /* CONFIG_IWLWIFI_DEBUG */
 }
 
-static void rs_free_sta(void *priv_rate, void *priv_sta)
+static void rs_free_sta(void *priv_r, struct ieee80211_sta *sta,
+			void *priv_sta)
 {
 	struct iwl_lq_sta *lq_sta = priv_sta;
-	struct iwl_priv *priv;
+	struct iwl_priv *priv = priv_r;
 
-	priv = (struct iwl_priv *)priv_rate;
 	IWL_DEBUG_RATE("enter\n");
 	kfree(lq_sta);
 	IWL_DEBUG_RATE("leave\n");
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 62b26befddc..d15a2c99795 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -7370,15 +7370,6 @@ static ssize_t show_temperature(struct device *d,
 
 static DEVICE_ATTR(temperature, S_IRUGO, show_temperature, NULL);
 
-static ssize_t show_rs_window(struct device *d,
-			      struct device_attribute *attr,
-			      char *buf)
-{
-	struct iwl3945_priv *priv = d->driver_data;
-	return iwl3945_fill_rs_info(priv->hw, buf, IWL_AP_ID);
-}
-static DEVICE_ATTR(rs_window, S_IRUGO, show_rs_window, NULL);
-
 static ssize_t show_tx_power(struct device *d,
 			     struct device_attribute *attr, char *buf)
 {
@@ -7840,7 +7831,6 @@ static struct attribute *iwl3945_sysfs_entries[] = {
 #endif
 	&dev_attr_power_level.attr,
 	&dev_attr_retry_rate.attr,
-	&dev_attr_rs_window.attr,
 	&dev_attr_statistics.attr,
 	&dev_attr_status.attr,
 	&dev_attr_temperature.attr,
@@ -7908,6 +7898,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
 	SET_IEEE80211_DEV(hw, &pdev->dev);
 
 	hw->rate_control_algorithm = "iwl-3945-rs";
+	hw->sta_data_size = sizeof(struct iwl3945_sta_priv);
 
 	IWL_DEBUG_INFO("*** LOAD DRIVER ***\n");
 	priv = hw->priv;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 003e4a03874..f5f5b1ff158 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1800,4 +1800,72 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw,
 struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
 					 const u8 *addr);
 
+
+/* Rate control API */
+/**
+ * struct rate_selection - rate information for/from rate control algorithms
+ *
+ * @rate_idx: selected transmission rate index
+ * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used
+ * @probe_idx: rate for probing (or -1)
+ * @max_rate_idx: maximum rate index that can be used, this is
+ *	input to the algorithm and will be enforced
+ */
+struct rate_selection {
+	s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx;
+};
+
+struct rate_control_ops {
+	struct module *module;
+	const char *name;
+	void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
+	void (*clear)(void *priv);
+	void (*free)(void *priv);
+
+	void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
+	void (*rate_init)(void *priv, struct ieee80211_supported_band *sband,
+			  struct ieee80211_sta *sta, void *priv_sta);
+	void (*free_sta)(void *priv, struct ieee80211_sta *sta,
+			 void *priv_sta);
+
+	void (*tx_status)(void *priv, struct ieee80211_supported_band *sband,
+			  struct ieee80211_sta *sta, void *priv_sta,
+			  struct sk_buff *skb);
+	void (*get_rate)(void *priv, struct ieee80211_supported_band *sband,
+			 struct ieee80211_sta *sta, void *priv_sta,
+			 struct sk_buff *skb,
+			 struct rate_selection *sel);
+
+	void (*add_sta_debugfs)(void *priv, void *priv_sta,
+				struct dentry *dir);
+	void (*remove_sta_debugfs)(void *priv, void *priv_sta);
+};
+
+static inline int rate_supported(struct ieee80211_sta *sta,
+				 enum ieee80211_band band,
+				 int index)
+{
+	return (sta == NULL || sta->supp_rates[band] & BIT(index));
+}
+
+static inline s8
+rate_lowest_index(struct ieee80211_supported_band *sband,
+		  struct ieee80211_sta *sta)
+{
+	int i;
+
+	for (i = 0; i < sband->n_bitrates; i++)
+		if (rate_supported(sta, sband->band, i))
+			return i;
+
+	/* warn when we cannot find a rate. */
+	WARN_ON(1);
+
+	return 0;
+}
+
+
+int ieee80211_rate_control_register(struct rate_control_ops *ops);
+void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
+
 #endif /* MAC80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 89a183c2327..855126a3039 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -693,7 +693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 
 	sta_apply_parameters(local, sta, params);
 
-	rate_control_rate_init(sta, local);
+	rate_control_rate_init(sta);
 
 	rcu_read_lock();
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0b25b0f46b1..8025b294588 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -724,6 +724,8 @@ struct ieee80211_local {
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct local_debugfsdentries {
+		struct dentry *rcdir;
+		struct dentry *rcname;
 		struct dentry *frequency;
 		struct dentry *antenna_sel_tx;
 		struct dentry *antenna_sel_rx;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7d2d5a041e2..d608c44047c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -542,6 +542,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u16 frag, type;
 	__le16 fc;
+	struct ieee80211_supported_band *sband;
 	struct ieee80211_tx_status_rtap_hdr *rthdr;
 	struct ieee80211_sub_if_data *sdata;
 	struct net_device *prev_dev = NULL;
@@ -588,7 +589,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			sta->tx_retry_count += info->status.retry_count;
 		}
 
-		rate_control_tx_status(local->mdev, skb);
+		sband = local->hw.wiphy->bands[info->band];
+		rate_control_tx_status(local, sband, sta, skb);
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8611a8318c9..109c3a7e63a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1323,7 +1323,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info);
 	}
 
-	rate_control_rate_init(sta, local);
+	rate_control_rate_init(sta);
 
 	if (elems.wmm_param) {
 		set_sta_flags(sta, WLAN_STA_WME);
@@ -2342,7 +2342,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 	sta->sta.supp_rates[band] = supp_rates |
 			ieee80211_mandatory_rates(local, band);
 
-	rate_control_rate_init(sta, local);
+	rate_control_rate_init(sta);
 
 	if (sta_info_insert(sta))
 		return NULL;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 0388c090dfe..5d786720d93 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -12,6 +12,7 @@
 #include <linux/rtnetlink.h>
 #include "rate.h"
 #include "ieee80211_i.h"
+#include "debugfs.h"
 
 struct rate_control_alg {
 	struct list_head list;
@@ -127,19 +128,46 @@ static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops)
 	module_put(ops->module);
 }
 
+#ifdef CONFIG_MAC80211_DEBUGFS
+static ssize_t rcname_read(struct file *file, char __user *userbuf,
+			   size_t count, loff_t *ppos)
+{
+	struct rate_control_ref *ref = file->private_data;
+	int len = strlen(ref->ops->name);
+
+	return simple_read_from_buffer(userbuf, count, ppos,
+				       ref->ops->name, len);
+}
+
+static const struct file_operations rcname_ops = {
+	.read = rcname_read,
+	.open = mac80211_open_file_generic,
+};
+#endif
+
 struct rate_control_ref *rate_control_alloc(const char *name,
 					    struct ieee80211_local *local)
 {
+	struct dentry *debugfsdir = NULL;
 	struct rate_control_ref *ref;
 
 	ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
 	if (!ref)
 		goto fail_ref;
 	kref_init(&ref->kref);
+	ref->local = local;
 	ref->ops = ieee80211_rate_control_ops_get(name);
 	if (!ref->ops)
 		goto fail_ops;
-	ref->priv = ref->ops->alloc(local);
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
+	local->debugfs.rcdir = debugfsdir;
+	local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir,
+						    ref, &rcname_ops);
+#endif
+
+	ref->priv = ref->ops->alloc(&local->hw, debugfsdir);
 	if (!ref->priv)
 		goto fail_priv;
 	return ref;
@@ -158,29 +186,46 @@ static void rate_control_release(struct kref *kref)
 
 	ctrl_ref = container_of(kref, struct rate_control_ref, kref);
 	ctrl_ref->ops->free(ctrl_ref->priv);
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	debugfs_remove(ctrl_ref->local->debugfs.rcname);
+	ctrl_ref->local->debugfs.rcname = NULL;
+	debugfs_remove(ctrl_ref->local->debugfs.rcdir);
+	ctrl_ref->local->debugfs.rcdir = NULL;
+#endif
+
 	ieee80211_rate_control_ops_put(ctrl_ref->ops);
 	kfree(ctrl_ref);
 }
 
-void rate_control_get_rate(struct net_device *dev,
+void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct ieee80211_supported_band *sband,
-			   struct sk_buff *skb,
+			   struct sta_info *sta, struct sk_buff *skb,
 			   struct rate_selection *sel)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct rate_control_ref *ref = local->rate_ctrl;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct sta_info *sta;
+	struct rate_control_ref *ref = sdata->local->rate_ctrl;
+	void *priv_sta = NULL;
+	struct ieee80211_sta *ista = NULL;
 	int i;
 
-	rcu_read_lock();
-	sta = sta_info_get(local, hdr->addr1);
-
 	sel->rate_idx = -1;
 	sel->nonerp_idx = -1;
 	sel->probe_idx = -1;
+	sel->max_rate_idx = sdata->max_ratectrl_rateidx;
+
+	if (sta) {
+		ista = &sta->sta;
+		priv_sta = sta->rate_ctrl_priv;
+	}
+
+	if (sta && sdata->force_unicast_rateidx > -1)
+		sel->rate_idx = sdata->force_unicast_rateidx;
+	else
+		ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel);
 
-	ref->ops->get_rate(ref->priv, dev, sband, skb, sel);
+	if (sdata->max_ratectrl_rateidx > -1 &&
+	    sel->rate_idx > sdata->max_ratectrl_rateidx)
+		sel->rate_idx = sdata->max_ratectrl_rateidx;
 
 	BUG_ON(sel->rate_idx < 0);
 
@@ -191,13 +236,11 @@ void rate_control_get_rate(struct net_device *dev,
 			if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate)
 				break;
 
-			if (rate_supported(sta, sband->band, i) &&
+			if (rate_supported(ista, sband->band, i) &&
 			    !(rate->flags & IEEE80211_RATE_ERP_G))
 				sel->nonerp_idx = i;
 		}
 	}
-
-	rcu_read_unlock();
 }
 
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5f18c27eb90..eb94e584d24 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -19,77 +19,48 @@
 #include "ieee80211_i.h"
 #include "sta_info.h"
 
-/**
- * struct rate_selection - rate selection for rate control algos
- * @rate: selected transmission rate index
- * @nonerp: Non-ERP rate to use instead if ERP cannot be used
- * @probe: rate for probing (or -1)
- *
- */
-struct rate_selection {
-	s8 rate_idx, nonerp_idx, probe_idx;
-};
-
-struct rate_control_ops {
-	struct module *module;
-	const char *name;
-	void (*tx_status)(void *priv, struct net_device *dev,
-			  struct sk_buff *skb);
-	void (*get_rate)(void *priv, struct net_device *dev,
-			 struct ieee80211_supported_band *band,
-			 struct sk_buff *skb,
-			 struct rate_selection *sel);
-	void (*rate_init)(void *priv, void *priv_sta,
-			  struct ieee80211_local *local, struct sta_info *sta);
-	void (*clear)(void *priv);
-
-	void *(*alloc)(struct ieee80211_local *local);
-	void (*free)(void *priv);
-	void *(*alloc_sta)(void *priv, gfp_t gfp);
-	void (*free_sta)(void *priv, void *priv_sta);
-
-	int (*add_attrs)(void *priv, struct kobject *kobj);
-	void (*remove_attrs)(void *priv, struct kobject *kobj);
-	void (*add_sta_debugfs)(void *priv, void *priv_sta,
-				struct dentry *dir);
-	void (*remove_sta_debugfs)(void *priv, void *priv_sta);
-};
-
 struct rate_control_ref {
+	struct ieee80211_local *local;
 	struct rate_control_ops *ops;
 	void *priv;
 	struct kref kref;
 };
 
-int ieee80211_rate_control_register(struct rate_control_ops *ops);
-void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
-
 /* Get a reference to the rate control algorithm. If `name' is NULL, get the
  * first available algorithm. */
 struct rate_control_ref *rate_control_alloc(const char *name,
 					    struct ieee80211_local *local);
-void rate_control_get_rate(struct net_device *dev,
+void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct ieee80211_supported_band *sband,
-			   struct sk_buff *skb,
+			   struct sta_info *sta, struct sk_buff *skb,
 			   struct rate_selection *sel);
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
 void rate_control_put(struct rate_control_ref *ref);
 
-static inline void rate_control_tx_status(struct net_device *dev,
+static inline void rate_control_tx_status(struct ieee80211_local *local,
+					  struct ieee80211_supported_band *sband,
+					  struct sta_info *sta,
 					  struct sk_buff *skb)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct rate_control_ref *ref = local->rate_ctrl;
+	struct ieee80211_sta *ista = &sta->sta;
+	void *priv_sta = sta->rate_ctrl_priv;
 
-	ref->ops->tx_status(ref->priv, dev, skb);
+	ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
 }
 
 
-static inline void rate_control_rate_init(struct sta_info *sta,
-					  struct ieee80211_local *local)
+static inline void rate_control_rate_init(struct sta_info *sta)
 {
+	struct ieee80211_local *local = sta->sdata->local;
 	struct rate_control_ref *ref = sta->rate_ctrl;
-	ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta);
+	struct ieee80211_sta *ista = &sta->sta;
+	void *priv_sta = sta->rate_ctrl_priv;
+	struct ieee80211_supported_band *sband;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
 }
 
 
@@ -100,15 +71,19 @@ static inline void rate_control_clear(struct ieee80211_local *local)
 }
 
 static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
+					   struct ieee80211_sta *sta,
 					   gfp_t gfp)
 {
-	return ref->ops->alloc_sta(ref->priv, gfp);
+	return ref->ops->alloc_sta(ref->priv, sta, gfp);
 }
 
-static inline void rate_control_free_sta(struct rate_control_ref *ref,
-					 void *priv)
+static inline void rate_control_free_sta(struct sta_info *sta)
 {
-	ref->ops->free_sta(ref->priv, priv);
+	struct rate_control_ref *ref = sta->rate_ctrl;
+	struct ieee80211_sta *ista = &sta->sta;
+	void *priv_sta = sta->rate_ctrl_priv;
+
+	ref->ops->free_sta(ref->priv, ista, priv_sta);
 }
 
 static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
@@ -130,31 +105,6 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
 #endif
 }
 
-static inline int rate_supported(struct sta_info *sta,
-				 enum ieee80211_band band,
-				 int index)
-{
-	return (sta == NULL || sta->sta.supp_rates[band] & BIT(index));
-}
-
-static inline s8
-rate_lowest_index(struct ieee80211_local *local,
-		  struct ieee80211_supported_band *sband,
-		  struct sta_info *sta)
-{
-	int i;
-
-	for (i = 0; i < sband->n_bitrates; i++)
-		if (rate_supported(sta, sband->band, i))
-			return i;
-
-	/* warn when we cannot find a rate. */
-	WARN_ON(1);
-
-	return 0;
-}
-
-
 /* functions for rate control related to a device */
 int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
 				 const char *name);
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index ffafc5da572..01d64d53f3b 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -124,7 +124,6 @@ struct rc_pid_events_file_info {
  * struct rc_pid_debugfs_entries - tunable parameters
  *
  * Algorithm parameters, tunable via debugfs.
- * @dir: the debugfs directory for a specific phy
  * @target: target percentage for failed frames
  * @sampling_period: error sampling interval in milliseconds
  * @coeff_p: absolute value of the proportional coefficient
@@ -143,7 +142,6 @@ struct rc_pid_events_file_info {
  *	ordering of rates)
  */
 struct rc_pid_debugfs_entries {
-	struct dentry *dir;
 	struct dentry *target;
 	struct dentry *sampling_period;
 	struct dentry *coeff_p;
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index bc1c4569caa..86eb374e3b8 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -68,18 +68,14 @@
  * exhibited a worse failed frames behaviour and we'll choose the highest rate
  * whose failed frames behaviour is not worse than the one of the original rate
  * target. While at it, check that the new rate is valid. */
-static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
-					 struct sta_info *sta, int adj,
+static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband,
+					 struct ieee80211_sta *sta,
+					 struct rc_pid_sta_info *spinfo, int adj,
 					 struct rc_pid_rateinfo *rinfo)
 {
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_supported_band *sband;
 	int cur_sorted, new_sorted, probe, tmp, n_bitrates, band;
-	struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv;
 	int cur = spinfo->txrate_idx;
 
-	sdata = sta->sdata;
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 	band = sband->band;
 	n_bitrates = sband->n_bitrates;
 
@@ -146,13 +142,11 @@ static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l)
 }
 
 static void rate_control_pid_sample(struct rc_pid_info *pinfo,
-				    struct ieee80211_local *local,
-				    struct sta_info *sta)
+				    struct ieee80211_supported_band *sband,
+				    struct ieee80211_sta *sta,
+				    struct rc_pid_sta_info *spinfo)
 {
-	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
 	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
-	struct ieee80211_supported_band *sband;
 	u32 pf;
 	s32 err_avg;
 	u32 err_prop;
@@ -161,9 +155,6 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	int adj, i, j, tmp;
 	unsigned long period;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	spinfo = sta->rate_ctrl_priv;
-
 	/* In case nothing happened during the previous control interval, turn
 	 * the sharpening factor on. */
 	period = (HZ * pinfo->sampling_period + 500) / 1000;
@@ -179,11 +170,15 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 	if (unlikely(spinfo->tx_num_xmit == 0))
 		pf = spinfo->last_pf;
 	else {
+		/* XXX: BAD HACK!!! */
+		struct sta_info *si = container_of(sta, struct sta_info, sta);
+
 		pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
-		if (ieee80211_vif_is_mesh(&sdata->vif) && pf == 100)
-			mesh_plink_broken(sta);
+
+		if (ieee80211_vif_is_mesh(&si->sdata->vif) && pf == 100)
+			mesh_plink_broken(si);
 		pf <<= RC_PID_ARITH_SHIFT;
-		sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9)
+		si->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9)
 					>> RC_PID_ARITH_SHIFT;
 	}
 
@@ -229,43 +224,25 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo,
 
 	/* Change rate. */
 	if (adj)
-		rate_control_pid_adjust_rate(local, sta, adj, rinfo);
+		rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo);
 }
 
-static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
+static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband,
+				       struct ieee80211_sta *sta, void *priv_sta,
 				       struct sk_buff *skb)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_sub_if_data *sdata;
 	struct rc_pid_info *pinfo = priv;
-	struct sta_info *sta;
-	struct rc_pid_sta_info *spinfo;
+	struct rc_pid_sta_info *spinfo = priv_sta;
 	unsigned long period;
-	struct ieee80211_supported_band *sband;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	if (!sta)
-		goto unlock;
-
-	spinfo = sta->rate_ctrl_priv;
-
-	/* Don't update the state if we're not controlling the rate. */
-	sdata = sta->sdata;
-	if (sdata->force_unicast_rateidx > -1) {
-		spinfo->txrate_idx = sdata->max_ratectrl_rateidx;
-		goto unlock;
-	}
+	if (!spinfo)
+		return;
 
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
 	if (info->tx_rate_idx != spinfo->txrate_idx)
-		goto unlock;
+		return;
 
 	spinfo->tx_num_xmit++;
 
@@ -289,78 +266,63 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
 	if (!period)
 		period = 1;
 	if (time_after(jiffies, spinfo->last_sample + period))
-		rate_control_pid_sample(pinfo, local, sta);
-
- unlock:
-	rcu_read_unlock();
+		rate_control_pid_sample(pinfo, sband, sta, spinfo);
 }
 
-static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
-				      struct ieee80211_supported_band *sband,
-				      struct sk_buff *skb,
-				      struct rate_selection *sel)
+static void
+rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband,
+			  struct ieee80211_sta *sta, void *priv_sta,
+			  struct sk_buff *skb,
+			  struct rate_selection *sel)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_sub_if_data *sdata;
-	struct rc_pid_sta_info *spinfo;
-	struct sta_info *sta;
+	struct rc_pid_sta_info *spinfo = priv_sta;
 	int rateidx;
 	u16 fc;
 
-	rcu_read_lock();
-
-	sta = sta_info_get(local, hdr->addr1);
-
 	/* Send management frames and broadcast/multicast data using lowest
 	 * rate. */
 	fc = le16_to_cpu(hdr->frame_control);
-	if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
-	    is_multicast_ether_addr(hdr->addr1) || !sta) {
-		sel->rate_idx = rate_lowest_index(local, sband, sta);
-		rcu_read_unlock();
+	if (!sta || !spinfo ||
+	    (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+	    is_multicast_ether_addr(hdr->addr1)) {
+		sel->rate_idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
-	/* If a forced rate is in effect, select it. */
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	spinfo = (struct rc_pid_sta_info *)sta->rate_ctrl_priv;
-	if (sdata->force_unicast_rateidx > -1)
-		spinfo->txrate_idx = sdata->force_unicast_rateidx;
-
 	rateidx = spinfo->txrate_idx;
 
 	if (rateidx >= sband->n_bitrates)
 		rateidx = sband->n_bitrates - 1;
 
-	rcu_read_unlock();
-
 	sel->rate_idx = rateidx;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
-	rate_control_pid_event_tx_rate(
-		&((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events,
+	rate_control_pid_event_tx_rate(&spinfo->events,
 		rateidx, sband->bitrates[rateidx].bitrate);
 #endif
 }
 
-static void rate_control_pid_rate_init(void *priv, void *priv_sta,
-					  struct ieee80211_local *local,
-					  struct sta_info *sta)
+static void
+rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
+			   struct ieee80211_sta *sta, void *priv_sta)
 {
+	struct rc_pid_sta_info *spinfo = priv_sta;
+	struct sta_info *si;
+
 	/* TODO: This routine should consider using RSSI from previous packets
 	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
 	 * Until that method is implemented, we will use the lowest supported
 	 * rate as a workaround. */
-	struct ieee80211_supported_band *sband;
-	struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-	spinfo->txrate_idx = rate_lowest_index(local, sband, sta);
-	sta->fail_avg = 0;
+	spinfo->txrate_idx = rate_lowest_index(sband, sta);
+	/* HACK */
+	si = container_of(sta, struct sta_info, sta);
+	si->fail_avg = 0;
 }
 
-static void *rate_control_pid_alloc(struct ieee80211_local *local)
+static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
+				    struct dentry *debugfsdir)
 {
 	struct rc_pid_info *pinfo;
 	struct rc_pid_rateinfo *rinfo;
@@ -371,7 +333,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 	struct rc_pid_debugfs_entries *de;
 #endif
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+	sband = hw->wiphy->bands[hw->conf.channel->band];
 
 	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
 	if (!pinfo)
@@ -426,30 +388,28 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local)
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	de = &pinfo->dentries;
-	de->dir = debugfs_create_dir("rc80211_pid",
-				     local->hw.wiphy->debugfsdir);
 	de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
-					de->dir, &pinfo->target);
+					debugfsdir, &pinfo->target);
 	de->sampling_period = debugfs_create_u32("sampling_period",
-						 S_IRUSR | S_IWUSR, de->dir,
+						 S_IRUSR | S_IWUSR, debugfsdir,
 						 &pinfo->sampling_period);
 	de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR,
-					 de->dir, &pinfo->coeff_p);
+					 debugfsdir, &pinfo->coeff_p);
 	de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR,
-					 de->dir, &pinfo->coeff_i);
+					 debugfsdir, &pinfo->coeff_i);
 	de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR,
-					 de->dir, &pinfo->coeff_d);
+					 debugfsdir, &pinfo->coeff_d);
 	de->smoothing_shift = debugfs_create_u32("smoothing_shift",
-						 S_IRUSR | S_IWUSR, de->dir,
+						 S_IRUSR | S_IWUSR, debugfsdir,
 						 &pinfo->smoothing_shift);
 	de->sharpen_factor = debugfs_create_u32("sharpen_factor",
-					       S_IRUSR | S_IWUSR, de->dir,
+					       S_IRUSR | S_IWUSR, debugfsdir,
 					       &pinfo->sharpen_factor);
 	de->sharpen_duration = debugfs_create_u32("sharpen_duration",
-						  S_IRUSR | S_IWUSR, de->dir,
+						  S_IRUSR | S_IWUSR, debugfsdir,
 						  &pinfo->sharpen_duration);
 	de->norm_offset = debugfs_create_u32("norm_offset",
-					     S_IRUSR | S_IWUSR, de->dir,
+					     S_IRUSR | S_IWUSR, debugfsdir,
 					     &pinfo->norm_offset);
 #endif
 
@@ -471,7 +431,6 @@ static void rate_control_pid_free(void *priv)
 	debugfs_remove(de->coeff_p);
 	debugfs_remove(de->sampling_period);
 	debugfs_remove(de->target);
-	debugfs_remove(de->dir);
 #endif
 
 	kfree(pinfo->rinfo);
@@ -482,7 +441,8 @@ static void rate_control_pid_clear(void *priv)
 {
 }
 
-static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
+static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta,
+					gfp_t gfp)
 {
 	struct rc_pid_sta_info *spinfo;
 
@@ -500,10 +460,10 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
 	return spinfo;
 }
 
-static void rate_control_pid_free_sta(void *priv, void *priv_sta)
+static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta,
+				      void *priv_sta)
 {
-	struct rc_pid_sta_info *spinfo = priv_sta;
-	kfree(spinfo);
+	kfree(priv_sta);
 }
 
 static struct rate_control_ops mac80211_rcpid = {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index d9774ac2e0f..9b72d15bc8d 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -93,8 +93,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
 }
 
 /* protected by RCU */
-static struct sta_info *__sta_info_find(struct ieee80211_local *local,
-					const u8 *addr)
+struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr)
 {
 	struct sta_info *sta;
 
@@ -107,12 +106,6 @@ static struct sta_info *__sta_info_find(struct ieee80211_local *local,
 	return sta;
 }
 
-struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr)
-{
-	return __sta_info_find(local, addr);
-}
-EXPORT_SYMBOL(sta_info_get);
-
 struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx,
 				     struct net_device *dev)
 {
@@ -146,7 +139,7 @@ static void __sta_info_free(struct ieee80211_local *local,
 {
 	DECLARE_MAC_BUF(mbuf);
 
-	rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv);
+	rate_control_free_sta(sta);
 	rate_control_put(sta->rate_ctrl);
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -244,7 +237,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 	sta->rate_ctrl = rate_control_get(local->rate_ctrl);
 	sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl,
-						     gfp);
+						     &sta->sta, gfp);
 	if (!sta->rate_ctrl_priv) {
 		rate_control_put(sta->rate_ctrl);
 		kfree(sta);
@@ -308,7 +301,7 @@ int sta_info_insert(struct sta_info *sta)
 
 	spin_lock_irqsave(&local->sta_lock, flags);
 	/* check if STA exists already */
-	if (__sta_info_find(local, sta->sta.addr)) {
+	if (sta_info_get(local, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
 		err = -EEXIST;
 		goto out_free;
@@ -834,7 +827,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
                                          const u8 *addr)
 {
-	struct sta_info *sta = __sta_info_find(hw_to_local(hw), addr);
+	struct sta_info *sta = sta_info_get(hw_to_local(hw), addr);
 
 	if (!sta)
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index daedfa9e1c6..c3f43696462 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -416,7 +416,7 @@ static inline u32 get_sta_flags(struct sta_info *sta)
 /*
  * Get a STA info, must have be under RCU read lock.
  */
-struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr);
+struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr);
 /*
  * Get STA info by index, BROKEN!
  */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 00d96e63dce..0cc2e23f082 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -446,7 +446,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	if (likely(tx->rate_idx < 0)) {
-		rate_control_get_rate(tx->dev, sband, tx->skb, &rsel);
+		rate_control_get_rate(tx->sdata, sband, tx->sta,
+				      tx->skb, &rsel);
 		if (tx->sta)
 			tx->sta->last_txrate_idx = rsel.rate_idx;
 		tx->rate_idx = rsel.rate_idx;
@@ -1955,7 +1956,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	skb->do_not_encrypt = 1;
 
 	info->band = band;
-	rate_control_get_rate(local->mdev, sband, skb, &rsel);
+	rate_control_get_rate(sdata, sband, NULL, skb, &rsel);
 
 	if (unlikely(rsel.rate_idx < 0)) {
 		if (net_ratelimit()) {
-- 
cgit v1.2.3-70-g09d2


From 4dfe51e10047a60e82734860cec0d9f660b102fc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 19 Sep 2008 05:10:34 +0200
Subject: mac80211: probe with correct SSID

While associated, we should probe with the SSID we're associated to,
not the scan SSID.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 109c3a7e63a..52a64813301 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -942,8 +942,8 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
 				disassoc = 1;
 			} else
 				ieee80211_send_probe_req(sdata, ifsta->bssid,
-							 local->scan_ssid,
-							 local->scan_ssid_len);
+							 ifsta->ssid,
+							 ifsta->ssid_len);
 			ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL;
 		} else {
 			ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL;
-- 
cgit v1.2.3-70-g09d2


From 4492bea656b70dad6a9ae7b59b1430fa38ba3345 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 22 Sep 2008 17:10:10 +0300
Subject: mac80211: fix led behavior in IBSS

This patch fixes the led behavior in IBSS. After we joined an IBSS cell we
need to inform the led that we got associated. Although there is no 802.11
association in IBSS mode, the semantic of "There is a link" is relevant.
This allows the led to blink in IBSS mode (at least this solves a bug for
iwlwifi).

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 52a64813301..e859a0ab616 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1452,6 +1452,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED;
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
 
+	ieee80211_led_assoc(local, true);
+
 	memset(&wrqu, 0, sizeof(wrqu));
 	memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
 	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
-- 
cgit v1.2.3-70-g09d2


From 638af07386972861272ed9d0cff01cad528fdceb Mon Sep 17 00:00:00 2001
From: Denis ChengRq <crquan@gmail.com>
Date: Tue, 23 Sep 2008 02:35:37 +0800
Subject: wireless: a global static to local static improvement

There are two improvements in this simple patch:
1. wiphy_counter is a static var only used in one function, so
   can use local static instead of global static;
2. wiphy_counter wrap handling killed one comparision;

Signed-off-by: Denis ChengRq <crquan@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6940085d59..5cadbeb76a1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -34,7 +34,6 @@ MODULE_DESCRIPTION("wireless configuration support");
  * often because we need to do it for each command */
 LIST_HEAD(cfg80211_drv_list);
 DEFINE_MUTEX(cfg80211_drv_mutex);
-static int wiphy_counter;
 
 /* for debugfs */
 static struct dentry *ieee80211_debugfs_dir;
@@ -206,6 +205,8 @@ out_unlock:
 
 struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 {
+	static int wiphy_counter;
+
 	struct cfg80211_registered_device *drv;
 	int alloc_size;
 
@@ -222,21 +223,18 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 
 	mutex_lock(&cfg80211_drv_mutex);
 
-	drv->idx = wiphy_counter;
-
-	/* now increase counter for the next device unless
-	 * it has wrapped previously */
-	if (wiphy_counter >= 0)
-		wiphy_counter++;
-
-	mutex_unlock(&cfg80211_drv_mutex);
+	drv->idx = wiphy_counter++;
 
 	if (unlikely(drv->idx < 0)) {
+		wiphy_counter--;
+		mutex_unlock(&cfg80211_drv_mutex);
 		/* ugh, wrapped! */
 		kfree(drv);
 		return NULL;
 	}
 
+	mutex_unlock(&cfg80211_drv_mutex);
+
 	/* give it a proper name */
 	snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
 		 PHY_NAME "%d", drv->idx);
-- 
cgit v1.2.3-70-g09d2


From 72029fe85d8d060b3f966f2dbc36b3c75b5a6532 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p: implement proper trans module refcounting and unregistration

9p trans modules aren't refcounted nor were they unregistered
properly.  Fix it.

* Add 9p_trans_module->owner and reference the module on each trans
  instance creation and put it on destruction.

* Protect v9fs_trans_list with a spinlock.  This isn't strictly
  necessary as the list is manipulated only during module loading /
  unloading but it's a good idea to make the API safe.

* Unregister trans modules when the corresponding module is being
  unloaded.

* While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h        |  1 +
 include/net/9p/transport.h |  9 ++++-
 net/9p/client.c            | 10 ++++-
 net/9p/mod.c               | 92 +++++++++++++++++++++++++++++++++-------------
 net/9p/trans_fd.c          | 11 +++++-
 net/9p/trans_virtio.c      |  2 +
 6 files changed, 95 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b3d3e27c629..c3626c0ba9d 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -596,4 +596,5 @@ int p9_idpool_check(int id, struct p9_idpool *p);
 int p9_error_init(void);
 int p9_errstr2errno(char *, int);
 int p9_trans_fd_init(void);
+void p9_trans_fd_exit(void);
 #endif /* NET_9P_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 0db3a4038dc..3ca737120a9 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,6 +26,8 @@
 #ifndef NET_9P_TRANSPORT_H
 #define NET_9P_TRANSPORT_H
 
+#include <linux/module.h>
+
 /**
  * enum p9_trans_status - different states of underlying transports
  * @Connected: transport is connected and healthy
@@ -91,9 +93,12 @@ struct p9_trans_module {
 	int maxsize;		/* max message size of transport */
 	int def;		/* this transport should be default */
 	struct p9_trans * (*create)(const char *, char *, int, unsigned char);
+	struct module *owner;
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
-struct p9_trans_module *v9fs_match_trans(const substring_t *name);
-struct p9_trans_module *v9fs_default_trans(void);
+void v9fs_unregister_trans(struct p9_trans_module *m);
+struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name);
+struct p9_trans_module *v9fs_get_default_trans(void);
+void v9fs_put_trans(struct p9_trans_module *m);
 #endif /* NET_9P_TRANSPORT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 2ffe40cf2f0..10e320307ec 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	int option;
 	int ret = 0;
 
-	clnt->trans_mod = v9fs_default_trans();
 	clnt->dotu = 1;
 	clnt->msize = 8192;
 
@@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			clnt->msize = option;
 			break;
 		case Opt_trans:
-			clnt->trans_mod = v9fs_match_trans(&args[0]);
+			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
 			break;
 		case Opt_legacy:
 			clnt->dotu = 0;
@@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			continue;
 		}
 	}
+
+	if (!clnt->trans_mod)
+		clnt->trans_mod = v9fs_get_default_trans();
+
 	kfree(options);
 	return ret;
 }
@@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
 
+	clnt->trans_mod = NULL;
 	clnt->trans = NULL;
 	spin_lock_init(&clnt->lock);
 	INIT_LIST_HEAD(&clnt->fidlist);
@@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt)
 		clnt->trans = NULL;
 	}
 
+	v9fs_put_trans(clnt->trans_mod);
+
 	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist)
 		p9_fid_destroy(fid);
 
diff --git a/net/9p/mod.c b/net/9p/mod.c
index bdee1fb7cc6..1084feb24cb 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -31,6 +31,7 @@
 #include <linux/parser.h>
 #include <net/9p/transport.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 
 #ifdef CONFIG_NET_9P_DEBUG
 unsigned int p9_debug_level = 0;	/* feature-rific global debug level  */
@@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level");
  *
  */
 
+static DEFINE_SPINLOCK(v9fs_trans_lock);
 static LIST_HEAD(v9fs_trans_list);
-static struct p9_trans_module *v9fs_default_transport;
 
 /**
  * v9fs_register_trans - register a new transport with 9p
@@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport;
  */
 void v9fs_register_trans(struct p9_trans_module *m)
 {
+	spin_lock(&v9fs_trans_lock);
 	list_add_tail(&m->list, &v9fs_trans_list);
-	if (m->def)
-		v9fs_default_transport = m;
+	spin_unlock(&v9fs_trans_lock);
 }
 EXPORT_SYMBOL(v9fs_register_trans);
 
 /**
- * v9fs_match_trans - match transport versus registered transports
+ * v9fs_unregister_trans - unregister a 9p transport
+ * @m: the transport to remove
+ *
+ */
+void v9fs_unregister_trans(struct p9_trans_module *m)
+{
+	spin_lock(&v9fs_trans_lock);
+	list_del_init(&m->list);
+	spin_unlock(&v9fs_trans_lock);
+}
+EXPORT_SYMBOL(v9fs_unregister_trans);
+
+/**
+ * v9fs_get_trans_by_name - get transport with the matching name
  * @name: string identifying transport
  *
  */
-struct p9_trans_module *v9fs_match_trans(const substring_t *name)
+struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name)
 {
-	struct list_head *p;
-	struct p9_trans_module *t = NULL;
-
-	list_for_each(p, &v9fs_trans_list) {
-		t = list_entry(p, struct p9_trans_module, list);
-		if (strncmp(t->name, name->from, name->to-name->from) == 0)
-			return t;
-	}
-	return NULL;
+	struct p9_trans_module *t, *found = NULL;
+
+	spin_lock(&v9fs_trans_lock);
+
+	list_for_each_entry(t, &v9fs_trans_list, list)
+		if (strncmp(t->name, name->from, name->to-name->from) == 0 &&
+		    try_module_get(t->owner)) {
+			found = t;
+			break;
+		}
+
+	spin_unlock(&v9fs_trans_lock);
+	return found;
 }
-EXPORT_SYMBOL(v9fs_match_trans);
+EXPORT_SYMBOL(v9fs_get_trans_by_name);
 
 /**
- * v9fs_default_trans - returns pointer to default transport
+ * v9fs_get_default_trans - get the default transport
  *
  */
 
-struct p9_trans_module *v9fs_default_trans(void)
+struct p9_trans_module *v9fs_get_default_trans(void)
 {
-	if (v9fs_default_transport)
-		return v9fs_default_transport;
-	else if (!list_empty(&v9fs_trans_list))
-		return list_first_entry(&v9fs_trans_list,
-					struct p9_trans_module, list);
-	else
-		return NULL;
+	struct p9_trans_module *t, *found = NULL;
+
+	spin_lock(&v9fs_trans_lock);
+
+	list_for_each_entry(t, &v9fs_trans_list, list)
+		if (t->def && try_module_get(t->owner)) {
+			found = t;
+			break;
+		}
+
+	if (!found)
+		list_for_each_entry(t, &v9fs_trans_list, list)
+			if (try_module_get(t->owner)) {
+				found = t;
+				break;
+			}
+
+	spin_unlock(&v9fs_trans_lock);
+	return found;
 }
-EXPORT_SYMBOL(v9fs_default_trans);
+EXPORT_SYMBOL(v9fs_get_default_trans);
 
+/**
+ * v9fs_put_trans - put trans
+ * @m: transport to put
+ *
+ */
+void v9fs_put_trans(struct p9_trans_module *m)
+{
+	if (m)
+		module_put(m->owner);
+}
 
 /**
  * v9fs_init - Initialize module
@@ -120,6 +160,8 @@ static int __init init_p9(void)
 static void __exit exit_p9(void)
 {
 	printk(KERN_INFO "Unloading 9P2000 support\n");
+
+	p9_trans_fd_exit();
 }
 
 module_init(init_p9)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index cdf137af7ad..6a32ffdb942 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 1,
 	.create = p9_trans_create_tcp,
+	.owner = THIS_MODULE,
 };
 
 static struct p9_trans_module p9_unix_trans = {
@@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
 	.create = p9_trans_create_unix,
+	.owner = THIS_MODULE,
 };
 
 static struct p9_trans_module p9_fd_trans = {
@@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
 	.create = p9_trans_create_fd,
+	.owner = THIS_MODULE,
 };
 
 int p9_trans_fd_init(void)
@@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void)
 
 	return 0;
 }
-EXPORT_SYMBOL(p9_trans_fd_init);
+
+void p9_trans_fd_exit(void)
+{
+	v9fs_unregister_trans(&p9_tcp_trans);
+	v9fs_unregister_trans(&p9_unix_trans);
+	v9fs_unregister_trans(&p9_fd_trans);
+}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 42adc052b14..94912e077a5 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.create = p9_virtio_create,
 	.maxsize = PAGE_SIZE*16,
 	.def = 0,
+	.owner = THIS_MODULE,
 };
 
 /* The standard init function */
@@ -545,6 +546,7 @@ static int __init p9_virtio_init(void)
 static void __exit p9_virtio_cleanup(void)
 {
 	unregister_virtio_driver(&p9_virtio_drv);
+	v9fs_unregister_trans(&p9_virtio_trans);
 }
 
 module_init(p9_virtio_init);
-- 
cgit v1.2.3-70-g09d2


From 7dc5d24be06a5ed874af035d52a083a7b61ef1bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: fix trans_fd::p9_conn_destroy()

p9_conn_destroy() first kills all current requests by calling
p9_conn_cancel(), then waits for the request list to be cleared by
waiting on p9_conn->equeue.  After that, polling is stopped and the
trans is destroyed.  This sequence has a few problems.

* Read and write works were never cancelled and the p9_conn can be
  destroyed while the works are running as r/w works remove requests
  from the list and dereference the p9_conn from them.

* The list emptiness wait using p9_conn->equeue wouldn't trigger
  because p9_conn_cancel() always clears all the lists and the only
  way the wait can be triggered is to have another task to issue a
  request between the slim window between p9_conn_cancel() and the
  wait, which isn't safe under the current implementation with or
  without the wait.

This patch fixes the problem by first stopping poll, which can
schedule r/w works, first and cancle r/w works which guarantees that
r/w works are not and will not run from that point and then calling
p9_conn_cancel() and do the rest of destruction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6a32ffdb942..ee0d151da31 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -151,7 +151,6 @@ struct p9_mux_poll_task {
  * @trans: reference to transport instance for this connection
  * @tagpool: id accounting for transactions
  * @err: error state
- * @equeue: event wait_q (?)
  * @req_list: accounting for requests which have been sent
  * @unsent_req_list: accounting for requests that haven't been sent
  * @rcall: current response &p9_fcall structure
@@ -178,7 +177,6 @@ struct p9_conn {
 	struct p9_trans *trans;
 	struct p9_idpool *tagpool;
 	int err;
-	wait_queue_head_t equeue;
 	struct list_head req_list;
 	struct list_head unsent_req_list;
 	struct p9_fcall *rcall;
@@ -430,7 +428,6 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	}
 
 	m->err = 0;
-	init_waitqueue_head(&m->equeue);
 	INIT_LIST_HEAD(&m->req_list);
 	INIT_LIST_HEAD(&m->unsent_req_list);
 	m->rcall = NULL;
@@ -483,18 +480,13 @@ static void p9_conn_destroy(struct p9_conn *m)
 {
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
 		m->mux_list.prev, m->mux_list.next);
-	p9_conn_cancel(m, -ECONNRESET);
-
-	if (!list_empty(&m->req_list)) {
-		/* wait until all processes waiting on this session exit */
-		P9_DPRINTK(P9_DEBUG_MUX,
-			"mux %p waiting for empty request queue\n", m);
-		wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
-		P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m,
-			list_empty(&m->req_list));
-	}
 
 	p9_mux_poll_stop(m);
+	cancel_work_sync(&m->rq);
+	cancel_work_sync(&m->wq);
+
+	p9_conn_cancel(m, -ECONNRESET);
+
 	m->trans = NULL;
 	p9_idpool_destroy(m->tagpool);
 	kfree(m);
@@ -840,8 +832,6 @@ static void p9_read_work(struct work_struct *work)
 					(*req->cb) (req, req->cba);
 				else
 					kfree(req->rcall);
-
-				wake_up(&m->equeue);
 			}
 		} else {
 			if (err >= 0 && rcall->id != P9_RFLUSH)
@@ -984,8 +974,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a)
 			(*req->cb) (req, req->cba);
 		else
 			kfree(req->rcall);
-
-		wake_up(&m->equeue);
 	}
 
 	kfree(freq->tcall);
@@ -1191,8 +1179,6 @@ void p9_conn_cancel(struct p9_conn *m, int err)
 		else
 			kfree(req->rcall);
 	}
-
-	wake_up(&m->equeue);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 571ffeafffbfdd0b8f2f9d3b991028797ec87e42 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: clean up p9_conn_create()

* Use kzalloc() to allocate p9_conn and remove 0/NULL initializations.

* Clean up error return paths.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index ee0d151da31..6c88e898375 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -407,11 +407,11 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 {
 	int i, n;
-	struct p9_conn *m, *mtmp;
+	struct p9_conn *m;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans,
 								trans->msize);
-	m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL);
+	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
 	if (!m)
 		return ERR_PTR(-ENOMEM);
 
@@ -422,24 +422,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	m->trans = trans;
 	m->tagpool = p9_idpool_create();
 	if (IS_ERR(m->tagpool)) {
-		mtmp = ERR_PTR(-ENOMEM);
 		kfree(m);
-		return mtmp;
+		return ERR_PTR(-ENOMEM);
 	}
 
-	m->err = 0;
 	INIT_LIST_HEAD(&m->req_list);
 	INIT_LIST_HEAD(&m->unsent_req_list);
-	m->rcall = NULL;
-	m->rpos = 0;
-	m->rbuf = NULL;
-	m->wpos = m->wsize = 0;
-	m->wbuf = NULL;
 	INIT_WORK(&m->rq, p9_read_work);
 	INIT_WORK(&m->wq, p9_write_work);
-	m->wsched = 0;
-	memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
-	m->poll_task = NULL;
 	n = p9_mux_poll_start(m);
 	if (n) {
 		kfree(m);
@@ -460,10 +450,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 	for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
 		if (IS_ERR(m->poll_waddr[i])) {
 			p9_mux_poll_stop(m);
-			mtmp = (void *)m->poll_waddr;	/* the error code */
 			kfree(m);
-			m = mtmp;
-			break;
+			return (void *)m->poll_waddr;	/* the error code */
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From ec3c68f232f6d98b4596c05c1c7551b44c617c5f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: don't do fs segment mangling in p9_fd_poll()

p9_fd_poll() is never called with user pointers and f_op->poll()
doesn't expect its arguments to be from userland.  There's no need to
set kernel ds before calling f_op->poll() from p9_fd_poll().  Remove
it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6c88e898375..f6d4af16cb1 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1344,7 +1344,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
 {
 	int ret, n;
 	struct p9_trans_fd *ts = NULL;
-	mm_segment_t oldfs;
 
 	if (trans && trans->status == Connected)
 		ts = trans->priv;
@@ -1358,24 +1357,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt)
 	if (!ts->wr->f_op || !ts->wr->f_op->poll)
 		return -EIO;
 
-	oldfs = get_fs();
-	set_fs(get_ds());
-
 	ret = ts->rd->f_op->poll(ts->rd, pt);
 	if (ret < 0)
-		goto end;
+		return ret;
 
 	if (ts->rd != ts->wr) {
 		n = ts->wr->f_op->poll(ts->wr, pt);
-		if (n < 0) {
-			ret = n;
-			goto end;
-		}
+		if (n < 0)
+			return n;
 		ret = (ret & ~POLLOUT) | (n & ~POLLIN);
 	}
 
-end:
-	set_fs(oldfs);
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 206ca50de77033c6cc17d0e14fbb12d119a67b01 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2008 16:22:23 -0500
Subject: 9p-trans_fd: fix and clean up module init/exit paths

trans_fd leaked p9_mux_wq on module unload.  Fix it.  While at it,
collapse p9_mux_global_init() into p9_trans_fd_init().  It's easier to
follow this way and the global poll_tasks array is about to removed
anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_fd.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f6d4af16cb1..0b4eb5f7835 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -238,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
 
 static void p9_conn_cancel(struct p9_conn *m, int err);
 
-static int p9_mux_global_init(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
-		p9_mux_poll_tasks[i].task = NULL;
-
-	p9_mux_wq = create_workqueue("v9fs");
-	if (!p9_mux_wq) {
-		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
 static u16 p9_mux_get_tag(struct p9_conn *m)
 {
 	int tag;
@@ -1616,10 +1600,15 @@ static struct p9_trans_module p9_fd_trans = {
 
 int p9_trans_fd_init(void)
 {
-	int ret = p9_mux_global_init();
-	if (ret) {
-		printk(KERN_WARNING "9p: starting mux failed\n");
-		return ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++)
+		p9_mux_poll_tasks[i].task = NULL;
+
+	p9_mux_wq = create_workqueue("v9fs");
+	if (!p9_mux_wq) {
+		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
+		return -ENOMEM;
 	}
 
 	v9fs_register_trans(&p9_tcp_trans);
@@ -1634,4 +1623,6 @@ void p9_trans_fd_exit(void)
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
+
+	destroy_workqueue(p9_mux_wq);
 }
-- 
cgit v1.2.3-70-g09d2


From 620678244bc7b83287e2e283ed4fe6b959e94b7d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 24 Sep 2008 16:22:22 -0500
Subject: 9p: introduce missing kfree

Error handling code following a kmalloc should free the allocated data.

The semantic match that finds the problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
statement S;
expression E;
identifier f,l;
position p1,p2;
expression *ptr != NULL;
@@

(
if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S
|
x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
)
<... when != x
     when != if (...) { <+...x...+> }
x->f = E
...>
(
 return \(0\|<+...x...+>\|ptr\);
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 net/9p/trans_fd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 0b4eb5f7835..d652baf5ff9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -870,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m,
 	else
 		n = p9_mux_get_tag(m);
 
-	if (n < 0)
+	if (n < 0) {
+		kfree(req);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	p9_set_tag(tc, n);
 
-- 
cgit v1.2.3-70-g09d2


From 16ec4700127d479143c77fd9128dfa17ab572963 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.austin.ibm.com>
Date: Wed, 24 Sep 2008 16:22:22 -0500
Subject: 9p: fix put_data error handling

Abhishek Kulkarni pointed out an inconsistency in the way
errors are returned from p9_put_data.  On deeper exploration it
seems the error handling for this path was completely wrong.
This patch adds checks for allocation problems and propagates
errors correctly.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/conv.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/conv.c b/net/9p/conv.c
index 44547201f5b..5ad3a3bd73b 100644
--- a/net/9p/conv.c
+++ b/net/9p/conv.c
@@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count,
 		   unsigned char **pdata)
 {
 	*pdata = buf_alloc(bufp, count);
+	if (*pdata == NULL)
+		return -ENOMEM;
 	memmove(*pdata, data, count);
-	return count;
+	return 0;
 }
 
 static int
@@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count,
 		   unsigned char **pdata)
 {
 	*pdata = buf_alloc(bufp, count);
+	if (*pdata == NULL)
+		return -ENOMEM;
 	return copy_from_user(*pdata, data, count);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8ca31ce52a5cfd03b960fd81a49197ae85d25347 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Wed, 24 Sep 2008 15:53:39 -0700
Subject: netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion

The current code ignores rules for internal options in HBH/DST options
header in packet processing if 'Not strict' mode is specified (which is not
implemented). Clearly it is not expected by user.

Kernel should reject HBH/DST rule insertion with 'Not strict' mode
in the first place.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6t_hbh.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 62e39ace058..26654b26d7f 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -97,8 +97,6 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
 	hdrlen -= 2;
 	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
 		return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		pr_debug("Not strict - not implemented");
 	} else {
 		pr_debug("Strict ");
 		pr_debug("#%d ", optinfo->optsnr);
@@ -177,6 +175,12 @@ hbh_mt6_check(const char *tablename, const void *entry,
 		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
 		return false;
 	}
+
+	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
+		pr_debug("ip6t_opts: Not strict - not implemented");
+		return false;
+	}
+
 	return true;
 }
 
-- 
cgit v1.2.3-70-g09d2


From e851db5b05408b89b9a9429a66814b79fabee2a1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Jun 2008 18:45:30 -0400
Subject: SUNRPC: Add address family field to svc_serv data structure

Introduce and initialize an address family field in the svc_serv structure.

This field will determine what family to use for the service's listener
sockets and what families are advertised via the local rpcbind daemon.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/svc.c             |  2 +-
 fs/nfs/callback.c          |  3 ++-
 fs/nfsd/nfssvc.c           |  1 +
 include/linux/sunrpc/svc.h |  9 +++++----
 net/sunrpc/svc.c           | 11 ++++++-----
 5 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5bd9bf0fa9d..1553fecc567 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -266,7 +266,7 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 			"lockd_up: no pid, %d users??\n", nlmsvc_users);
 
 	error = -ENOMEM;
-	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
+	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, AF_INET, NULL);
 	if (!serv) {
 		printk(KERN_WARNING "lockd_up: create service failed\n");
 		goto out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index f447f4b4476..6a09760c596 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -105,7 +105,8 @@ int nfs_callback_up(void)
 	mutex_lock(&nfs_callback_mutex);
 	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
 		goto out;
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
+	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
+				AF_INET, NULL);
 	ret = -ENOMEM;
 	if (!serv)
 		goto out_err;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 80292ff5e92..7f3d76a7839 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -229,6 +229,7 @@ int nfsd_create_serv(void)
 
 	atomic_set(&nfsd_busy, 0);
 	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+				      AF_INET,
 				      nfsd_last_thread, nfsd, THIS_MODULE);
 	if (nfsd_serv == NULL)
 		err = -ENOMEM;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index dc69068d94c..23143f38b12 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -66,6 +66,7 @@ struct svc_serv {
 	struct list_head	sv_tempsocks;	/* all temporary sockets */
 	int			sv_tmpcnt;	/* count of temporary sockets */
 	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */
+	sa_family_t		sv_family;	/* listener's address family */
 
 	char *			sv_name;	/* service name */
 
@@ -381,14 +382,14 @@ struct svc_procedure {
 /*
  * Function prototypes.
  */
-struct svc_serv *  svc_create(struct svc_program *, unsigned int,
-			      void (*shutdown)(struct svc_serv*));
+struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
+			    void (*shutdown)(struct svc_serv *));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
 					struct svc_pool *pool);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-			void (*shutdown)(struct svc_serv*), svc_thread_fn,
-			struct module *);
+			sa_family_t, void (*shutdown)(struct svc_serv *),
+			svc_thread_fn, struct module *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void		   svc_destroy(struct svc_serv *);
 int		   svc_process(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a32cb7c4bb..9ba17044109 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -357,7 +357,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	   void (*shutdown)(struct svc_serv *serv))
+	   sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -366,6 +366,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
+	serv->sv_family    = family;
 	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
 	serv->sv_nrthreads = 1;
@@ -425,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-		void (*shutdown)(struct svc_serv *serv))
+		sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 {
-	return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+	return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
 }
 EXPORT_SYMBOL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-		void (*shutdown)(struct svc_serv *serv),
+		  sa_family_t family, void (*shutdown)(struct svc_serv *serv),
 		  svc_thread_fn func, struct module *mod)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
 
-	serv = __svc_create(prog, bufsize, npools, shutdown);
+	serv = __svc_create(prog, bufsize, npools, family, shutdown);
 
 	if (serv != NULL) {
 		serv->sv_function = func;
-- 
cgit v1.2.3-70-g09d2


From 5dd248f6f1ffe1f691fd66749e2a3dc8f8eb7b5e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Jun 2008 18:45:37 -0400
Subject: SUNRPC: Use proper INADDR_ANY when setting up RPC services on IPv6

Teach svc_create_xprt() to use the correct ANY address for AF_INET6 based
RPC services.

No caller uses AF_INET6 yet.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc_xprt.c | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825f495..bf5b5cdafeb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
-		    int flags)
+static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
+					 struct svc_serv *serv,
+					 unsigned short port, int flags)
 {
-	struct svc_xprt_class *xcl;
 	struct sockaddr_in sin = {
 		.sin_family		= AF_INET,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= htons(port),
+	};
+	struct sockaddr *sap;
+	size_t len;
+
+	switch (serv->sv_family) {
+	case AF_INET:
+		sap = (struct sockaddr *)&sin;
+		len = sizeof(sin);
+		break;
+	case AF_INET6:
+		sap = (struct sockaddr *)&sin6;
+		len = sizeof(sin6);
+		break;
+	default:
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+}
+
+int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+		    int flags)
+{
+	struct svc_xprt_class *xcl;
+
 	dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
 	spin_lock(&svc_xprt_class_lock);
 	list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 			goto err;
 
 		spin_unlock(&svc_xprt_class_lock);
-		newxprt = xcl->xcl_ops->
-			xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
-				   flags);
+		newxprt = __svc_xpo_create(xcl, serv, port, flags);
 		if (IS_ERR(newxprt)) {
 			module_put(xcl->xcl_owner);
 			return PTR_ERR(newxprt);
-- 
cgit v1.2.3-70-g09d2


From b6632339e3afbcbb438a3c8935190ea22464fc99 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 18 Aug 2008 19:33:44 -0400
Subject: SUNRPC: Set V6ONLY socket option for RPC listener sockets

My plan is to use an AF_INET listener on systems that support only IPv4,
and an AF_INET6 listener on systems that can support IPv6. Incoming
IPv4 packets will be posted to an AF_INET6 listener with a mapped IPv4
address.

Max Matveev <makc@sgi.com> says:
  Creating a single listener can be dangerous - if net.ipv6.bindv6only
  is enabled then it's possible to create another listener in v4
  namespace on the same port and steal the traffic from the "unifed"
  listener. You need to disable V6ONLY explicitly via a sockopt to stop
  that.

Set appropriate socket option on RPC server listener sockets to prevent
this.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcsock.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719f1ef..f91377c1495 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	struct svc_sock	*svsk;
 	struct sock	*inet;
 	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+	int		val;
 
 	dprintk("svc: svc_setup_socket %p\n", sock);
 	if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	else
 		svc_tcp_init(svsk, serv);
 
+	/*
+	 * We start one listener per sv_serv.  We want AF_INET
+	 * requests to be automatically shunted to our AF_INET6
+	 * listener using a mapped IPv4 address.  Make sure
+	 * no-one starts an equivalent IPv4 listener, which
+	 * would steal our incoming connections.
+	 */
+	val = 0;
+	if (serv->sv_family == AF_INET6)
+		kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+					(char *)&val, sizeof(val));
+
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
 
-- 
cgit v1.2.3-70-g09d2


From 14aeb2118d6e9fd9ee988324c740a00c80979093 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 18 Aug 2008 19:34:00 -0400
Subject: SUNRPC: Simplify rpcb_register() API

Bruce suggested there's no need to expose the difference between an error
sending the PMAP_SET request and an error reply from the portmapper to
rpcb_register's callers.  The user space equivalent of rpcb_register() is
pmap_set(3), which returns a bool_t : either the PMAP set worked, or it
didn't.  Simple.

So let's remove the "*okay" argument from rpcb_register() and
rpcb_v4_register(), and simply return an error if any part of the call
didn't work.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/clnt.h |  4 +--
 net/sunrpc/rpcb_clnt.c      | 65 +++++++++++++++++++--------------------------
 net/sunrpc/svc.c            |  8 ++----
 3 files changed, 32 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index e5bfe01ee30..8ac8e75243a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -124,10 +124,10 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 
-int		rpcb_register(u32, u32, int, unsigned short, int *);
+int		rpcb_register(u32, u32, int, unsigned short);
 int		rpcb_v4_register(const u32 program, const u32 version,
 				 const struct sockaddr *address,
-				 const char *netid, int *result);
+				 const char *netid);
 int		rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24db2b4d12d..cc7250d4659 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -176,13 +176,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 }
 
 static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
-			      u32 version, struct rpc_message *msg,
-			      int *result)
+			      u32 version, struct rpc_message *msg)
 {
 	struct rpc_clnt *rpcb_clnt;
-	int error = 0;
+	int result, error = 0;
 
-	*result = 0;
+	msg->rpc_resp = &result;
 
 	rpcb_clnt = rpcb_create_local(addr, addrlen, version);
 	if (!IS_ERR(rpcb_clnt)) {
@@ -191,12 +190,19 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
 	} else
 		error = PTR_ERR(rpcb_clnt);
 
-	if (error < 0)
+	if (error < 0) {
 		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
-	dprintk("RPC:       registration status %d/%d\n", error, *result);
+		return error;
+	}
+
+	if (!result) {
+		dprintk("RPC:       registration failed\n");
+		return -EACCES;
+	}
 
-	return error;
+	dprintk("RPC:       registration succeeded\n");
+	return 0;
 }
 
 /**
@@ -205,7 +211,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * @vers: RPC version number to bind
  * @prot: transport protocol to register
  * @port: port value to register
- * @okay: OUT: result code
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success.  Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
  *
  * RPC services invoke this function to advertise their contact
  * information via the system's rpcbind daemon.  RPC services
@@ -217,15 +227,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * all registered transports for [program, version] from the local
  * rpcbind database.
  *
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received.  The rpcbind daemon's
- * boolean result code is stored in *okay.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
  * This function uses rpcbind protocol version 2 to contact the
  * local rpcbind daemon.
  *
@@ -236,7 +237,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
  * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
  * addresses).
  */
-int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
 {
 	struct rpcbind_args map = {
 		.r_prog		= prog,
@@ -246,7 +247,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
-		.rpc_resp	= okay,
 	};
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
@@ -259,7 +259,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
 					sizeof(rpcb_inaddr_loopback),
-					RPCBVERS_2, &msg, okay);
+					RPCBVERS_2, &msg);
 }
 
 /*
@@ -290,7 +290,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
 					sizeof(rpcb_inaddr_loopback),
-					RPCBVERS_4, msg, msg->rpc_resp);
+					RPCBVERS_4, msg);
 }
 
 /*
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
 
 	return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
 					sizeof(rpcb_in6addr_loopback),
-					RPCBVERS_4, msg, msg->rpc_resp);
+					RPCBVERS_4, msg);
 }
 
 /**
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * @version: RPC version number of service to (un)register
  * @address: address family, IP address, and port to (un)register
  * @netid: netid of transport protocol to (un)register
- * @result: result code from rpcbind RPC call
+ *
+ * Returns zero if the registration request was dispatched successfully
+ * and the rpcbind daemon returned success.  Otherwise, returns an errno
+ * value that reflects the nature of the error (request could not be
+ * dispatched, timed out, or rpcbind returned an error).
  *
  * RPC services invoke this function to advertise their contact
  * information via the system's rpcbind daemon.  RPC services
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * to zero.  Callers pass a netid of "" to unregister all
  * transport netids associated with [program, version, address].
  *
- * Returns zero if the registration request was dispatched
- * successfully and a reply was received.  The rpcbind daemon's
- * result code is stored in *result.
- *
- * Returns an errno value and sets *result to zero if there was
- * some problem that prevented the rpcbind request from being
- * dispatched, or if the rpcbind daemon did not respond within
- * the timeout.
- *
  * This function uses rpcbind protocol version 4 to contact the
  * local rpcbind daemon.  The local rpcbind daemon must support
  * version 4 of the rpcbind protocol in order for these functions
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * advertises the service on all IPv4 and IPv6 addresses.
  */
 int rpcb_v4_register(const u32 program, const u32 version,
-		     const struct sockaddr *address, const char *netid,
-		     int *result)
+		     const struct sockaddr *address, const char *netid)
 {
 	struct rpcbind_args map = {
 		.r_prog		= program,
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &map,
-		.rpc_resp	= result,
 	};
 
-	*result = 0;
-
 	switch (address->sa_family) {
 	case AF_INET:
 		return rpcb_register_netid4((struct sockaddr_in *)address,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9ba17044109..9805143d066 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -730,7 +730,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 	struct svc_program	*progp;
 	unsigned long		flags;
 	unsigned int		i;
-	int			error = 0, dummy;
+	int			error = 0;
 
 	if (!port)
 		clear_thread_flag(TIF_SIGPENDING);
@@ -751,13 +751,9 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 			if (progp->pg_vers[i]->vs_hidden)
 				continue;
 
-			error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
+			error = rpcb_register(progp->pg_prog, i, proto, port);
 			if (error < 0)
 				break;
-			if (port && !dummy) {
-				error = -EACCES;
-				break;
-			}
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 7252d575ab0e8771269a3d245c36a05ace5152bd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 18 Aug 2008 19:34:08 -0400
Subject: SUNRPC: Split portmap unregister API into separate function

Create a separate server-level interface for unregistering RPC services.

The mechanics of, and the API for, registering and unregistering RPC
services will diverge further as support for IPv6 is added.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9805143d066..9eb78a771da 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -28,6 +28,8 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
+static void svc_unregister(const struct svc_serv *serv);
+
 #define svc_serv_is_pooled(serv)    ((serv)->sv_function)
 
 /*
@@ -417,9 +419,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		spin_lock_init(&pool->sp_lock);
 	}
 
-
 	/* Remove any stale portmap registrations */
-	svc_register(serv, 0, 0);
+	svc_unregister(serv);
 
 	return serv;
 }
@@ -487,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 
-	/* Unregister service with the portmapper */
-	svc_register(serv, 0, 0);
+	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv);
 }
@@ -728,12 +728,10 @@ int
 svc_register(struct svc_serv *serv, int proto, unsigned short port)
 {
 	struct svc_program	*progp;
-	unsigned long		flags;
 	unsigned int		i;
 	int			error = 0;
 
-	if (!port)
-		clear_thread_flag(TIF_SIGPENDING);
+	BUG_ON(proto == 0 && port == 0);
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 		for (i = 0; i < progp->pg_nvers; i++) {
@@ -757,13 +755,53 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 		}
 	}
 
-	if (!port) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	return error;
+}
+
+/*
+ * All transport protocols and ports for this service are removed
+ * from the local rpcbind database if the service is not hidden.
+ *
+ * The result of unregistration is reported via dprintk for those
+ * who want verification of the result, but is otherwise not
+ * important.
+ *
+ * The local rpcbind daemon listens on either only IPv6 or only
+ * IPv4.  The kernel can't tell how it's configured.  However,
+ * AF_INET addresses are mapped to AF_INET6 in IPv6-only config-
+ * urations, so even an unregistration request on AF_INET will
+ * get to a local rpcbind daemon listening only on AF_INET6.  So
+ * we always unregister via AF_INET.
+ *
+ * At this point we don't need rpcbind version 4 for unregis-
+ * tration:  A v2 UNSET request will clear all transports (netids),
+ * addresses, and address families for [program, version].
+ */
+static void svc_unregister(const struct svc_serv *serv)
+{
+	struct svc_program *progp;
+	unsigned long flags;
+	unsigned int i;
+	int error;
+
+	clear_thread_flag(TIF_SIGPENDING);
+
+	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+		for (i = 0; i < progp->pg_nvers; i++) {
+			if (progp->pg_vers[i] == NULL)
+				continue;
+			if (progp->pg_vers[i]->vs_hidden)
+				continue;
+
+			error = rpcb_register(progp->pg_prog, i, 0, 0);
+			dprintk("svc: svc_unregister(%sv%u), error %d\n",
+					progp->pg_name, i, error);
+		}
 	}
 
-	return error;
+	spin_lock_irqsave(&current->sighand->siglock, flags);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a26cfad6e0a308a2c68df1f1ef50aabd48b17e6d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 18 Aug 2008 19:34:16 -0400
Subject: SUNRPC: Support IPv6 when registering kernel RPC services

In order to advertise NFS-related services on IPv6 interfaces via
rpcbind, the kernel RPC server implementation must use
rpcb_v4_register() instead of rpcb_register().

A new kernel build option allows distributions to use the legacy
v2 call until they integrate an appropriate user-space rpcbind
daemon that can support IPv6 RPC services.

I tried adding some automatic logic to fall back if registering
with a v4 protocol request failed, but there are too many corner
cases.  So I just made it a compile-time switch that distributions
can throw when they've replaced portmapper with rpcbind.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/Kconfig                 | 22 +++++++++++
 include/linux/sunrpc/svc.h |  4 +-
 net/sunrpc/svc.c           | 95 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 113 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/fs/Kconfig b/fs/Kconfig
index c6ae4d4842e..ed57a5a3725 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1773,6 +1773,28 @@ config SUNRPC_XPRT_RDMA
 
 	  If unsure, say N.
 
+config SUNRPC_REGISTER_V4
+	bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
+	depends on SUNRPC && EXPERIMENTAL
+	default n
+	help
+	  Sun added support for registering RPC services at an IPv6
+	  address by creating two new versions of the rpcbind protocol
+	  (RFC 1833).
+
+	  This option enables support in the kernel RPC server for
+	  registering kernel RPC services via version 4 of the rpcbind
+	  protocol.  If you enable this option, you must run a portmapper
+	  daemon that supports rpcbind protocol version 4.
+
+	  Serving NFS over IPv6 from knfsd (the kernel's NFS server)
+	  requires that you enable this option and use a portmapper that
+	  supports rpcbind version 4.
+
+	  If unsure, say N to get traditional behavior (register kernel
+	  RPC services using only rpcbind version 2).  Distributions
+	  using the legacy Linux portmapper daemon must say N here.
+
 config RPCSEC_GSS_KRB5
 	tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
 	depends on SUNRPC && EXPERIMENTAL
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 23143f38b12..54a79e1ad63 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -393,7 +393,9 @@ struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void		   svc_destroy(struct svc_serv *);
 int		   svc_process(struct svc_rqst *);
-int		   svc_register(struct svc_serv *, int, unsigned short);
+int		   svc_register(const struct svc_serv *, const unsigned short,
+				const unsigned short);
+
 void		   svc_wake_up(struct svc_serv *);
 void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9eb78a771da..c43ccb62805 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -719,13 +719,92 @@ svc_exit_thread(struct svc_rqst *rqstp)
 }
 EXPORT_SYMBOL(svc_exit_thread);
 
+#ifdef CONFIG_SUNRPC_REGISTER_V4
 /*
- * Register an RPC service with the local portmapper.
- * To unregister a service, call this routine with
- * proto and port == 0.
+ * Registering kernel RPC services with rpcbind version 2 will work
+ * over either IPv4 or IPv6, since the Linux kernel always registers
+ * services for the "any" address.
+ *
+ * However, the local rpcbind daemon listens on either only AF_INET
+ * or AF_INET6 (never both).  When it listens on AF_INET6, an rpcbind
+ * version 2 registration will result in registering the service at
+ * IN6ADDR_ANY, even if the RPC service being registered is not
+ * IPv6-enabled.
+ *
+ * Rpcbind version 4 allows us to be a little more specific.  Kernel
+ * RPC services that don't yet support AF_INET6 can register
+ * themselves as IPv4-only with the local rpcbind daemon, even if the
+ * daemon is listening only on AF_INET6.
+ *
+ * And, registering IPv6-enabled kernel RPC services via AF_INET6
+ * verifies that the local user space rpcbind daemon is properly
+ * configured to support remote AF_INET6 rpcbind requests.
+ *
+ * An AF_INET6 registration request will fail if the local rpcbind
+ * daemon is not set up to listen on AF_INET6.  Likewise, we fail
+ * AF_INET6 registration requests if svc_register() is configured to
+ * support only rpcbind version 2.
  */
-int
-svc_register(struct svc_serv *serv, int proto, unsigned short port)
+static int __svc_register(const u32 program, const u32 version,
+			  const sa_family_t family,
+			  const unsigned short protocol,
+			  const unsigned short port)
+{
+	struct sockaddr_in sin = {
+		.sin_family		= AF_INET,
+		.sin_addr.s_addr	= htonl(INADDR_ANY),
+		.sin_port		= htons(port),
+	};
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= htons(port),
+	};
+	struct sockaddr *sap;
+	char *netid;
+
+	switch (family) {
+	case AF_INET:
+		sap = (struct sockaddr *)&sin;
+		netid = RPCBIND_NETID_TCP;
+		if (protocol == IPPROTO_UDP)
+			netid = RPCBIND_NETID_UDP;
+		break;
+	case AF_INET6:
+		sap = (struct sockaddr *)&sin6;
+		netid = RPCBIND_NETID_TCP6;
+		if (protocol == IPPROTO_UDP)
+			netid = RPCBIND_NETID_UDP6;
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	return rpcb_v4_register(program, version, sap, netid);
+}
+#else
+static int __svc_register(const u32 program, const u32 version,
+			  sa_family_t family,
+			  const unsigned short protocol,
+			  const unsigned short port)
+{
+	if (family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	return rpcb_register(program, version, protocol, port);
+}
+#endif
+
+/**
+ * svc_register - register an RPC service with the local portmapper
+ * @serv: svc_serv struct for the service to register
+ * @proto: transport protocol number to advertise
+ * @port: port to advertise
+ *
+ * Service is registered for any address in serv's address family
+ */
+int svc_register(const struct svc_serv *serv, const unsigned short proto,
+		 const unsigned short port)
 {
 	struct svc_program	*progp;
 	unsigned int		i;
@@ -738,8 +817,9 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 			if (progp->pg_vers[i] == NULL)
 				continue;
 
-			dprintk("svc: svc_register(%s, %s, %d, %d)%s\n",
+			dprintk("svc: svc_register(%s, %u, %s, %u, %d)%s\n",
 					progp->pg_name,
+					serv->sv_family,
 					proto == IPPROTO_UDP?  "udp" : "tcp",
 					port,
 					i,
@@ -749,7 +829,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 			if (progp->pg_vers[i]->vs_hidden)
 				continue;
 
-			error = rpcb_register(progp->pg_prog, i, proto, port);
+			error = __svc_register(progp->pg_prog, i,
+						serv->sv_family, proto, port);
 			if (error < 0)
 				break;
 		}
-- 
cgit v1.2.3-70-g09d2


From 2c7eb0b206b8408d92c518033a359f4374c75314 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 15 Sep 2008 16:27:23 -0500
Subject: SUNRPC: Register both netids for AF_INET6 servers

TI-RPC is a user-space library of RPC functions that replaces ONC RPC
and allows RPC to operate in the new world of IPv6.

TI-RPC combines the concept of a transport protocol (UDP and TCP)
and a protocol family (PF_INET and PF_INET6) into a single identifier
called a "netid."  For example, "udp" means UDP over IPv4, and "udp6"
means UDP over IPv6.

For rpcbind, then, the RPC service tuple that is registered and
advertised is:

  [RPC program, RPC version, service address and port, netid]

instead of

  [RPC program, RPC version, port, protocol]

Service address is typically ANYADDR, but can be a specific address
of one of the interfaces on a multi-homed host.  The third item in
the new tuple is expressed as a universal address.

The current Linux rpcbind implementation registers a netid for both
protocol families when RPCB_SET is done for just the PF_INET6 version
of the netid (ie udp6 or tcp6).  So registering "udp6" causes a
registration for "udp" to appear automatically as well.

We've recently determined that this is incorrect behavior.  In the
TI-RPC world, "udp6" is not meant to imply that the registered RPC
service handles requests from AF_INET as well, even if the listener
socket does address mapping.  "udp" and "udp6" are entirely separate
capabilities, and must be registered separately.

The Linux kernel, unlike TI-RPC, leverages address mapping to allow a
single listener socket to handle requests for both AF_INET and AF_INET6.
This is still OK, but the kernel currently assumes registering "udp6"
will cover "udp" as well.  It registers only "udp6" for it's AF_INET6
services, even though they handle both AF_INET and AF_INET6 on the same
port.

So svc_register() actually needs to register both "udp" and "udp6"
explicitly (and likewise for TCP).  Until rpcbind is fixed, the
kernel can ignore the return code for the second RPCB_SET call.

Please merge this with commit 15231312:

    SUNRPC: Support IPv6 when registering kernel RPC services

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Olaf Kirch <okir@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc.c | 143 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 100 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index c43ccb62805..b8d2fcd0f71 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -720,69 +720,125 @@ svc_exit_thread(struct svc_rqst *rqstp)
 EXPORT_SYMBOL(svc_exit_thread);
 
 #ifdef CONFIG_SUNRPC_REGISTER_V4
+
 /*
- * Registering kernel RPC services with rpcbind version 2 will work
- * over either IPv4 or IPv6, since the Linux kernel always registers
- * services for the "any" address.
- *
- * However, the local rpcbind daemon listens on either only AF_INET
- * or AF_INET6 (never both).  When it listens on AF_INET6, an rpcbind
- * version 2 registration will result in registering the service at
- * IN6ADDR_ANY, even if the RPC service being registered is not
- * IPv6-enabled.
+ * Register an "inet" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
  *
- * Rpcbind version 4 allows us to be a little more specific.  Kernel
- * RPC services that don't yet support AF_INET6 can register
- * themselves as IPv4-only with the local rpcbind daemon, even if the
- * daemon is listening only on AF_INET6.
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
  *
- * And, registering IPv6-enabled kernel RPC services via AF_INET6
- * verifies that the local user space rpcbind daemon is properly
- * configured to support remote AF_INET6 rpcbind requests.
- *
- * An AF_INET6 registration request will fail if the local rpcbind
- * daemon is not set up to listen on AF_INET6.  Likewise, we fail
- * AF_INET6 registration requests if svc_register() is configured to
- * support only rpcbind version 2.
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
  */
-static int __svc_register(const u32 program, const u32 version,
-			  const sa_family_t family,
-			  const unsigned short protocol,
-			  const unsigned short port)
+static int __svc_rpcb_register4(const u32 program, const u32 version,
+				const unsigned short protocol,
+				const unsigned short port)
 {
 	struct sockaddr_in sin = {
 		.sin_family		= AF_INET,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
+	char *netid;
+
+	switch (protocol) {
+	case IPPROTO_UDP:
+		netid = RPCBIND_NETID_UDP;
+		break;
+	case IPPROTO_TCP:
+		netid = RPCBIND_NETID_TCP;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	return rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin, netid);
+}
+
+/*
+ * Register an "inet6" protocol family netid with the local
+ * rpcbind daemon via an rpcbind v4 SET request.
+ *
+ * No netconfig infrastructure is available in the kernel, so
+ * we map IP_ protocol numbers to netids by hand.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_rpcb_register6(const u32 program, const u32 version,
+				const unsigned short protocol,
+				const unsigned short port)
+{
 	struct sockaddr_in6 sin6 = {
 		.sin6_family		= AF_INET6,
 		.sin6_addr		= IN6ADDR_ANY_INIT,
 		.sin6_port		= htons(port),
 	};
-	struct sockaddr *sap;
 	char *netid;
 
-	switch (family) {
-	case AF_INET:
-		sap = (struct sockaddr *)&sin;
-		netid = RPCBIND_NETID_TCP;
-		if (protocol == IPPROTO_UDP)
-			netid = RPCBIND_NETID_UDP;
+	switch (protocol) {
+	case IPPROTO_UDP:
+		netid = RPCBIND_NETID_UDP6;
 		break;
-	case AF_INET6:
-		sap = (struct sockaddr *)&sin6;
+	case IPPROTO_TCP:
 		netid = RPCBIND_NETID_TCP6;
-		if (protocol == IPPROTO_UDP)
-			netid = RPCBIND_NETID_UDP6;
 		break;
 	default:
-		return -EAFNOSUPPORT;
+		return -EPROTONOSUPPORT;
+	}
+
+	return rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin6, netid);
+}
+
+/*
+ * Register a kernel RPC service via rpcbind version 4.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
+static int __svc_register(const u32 program, const u32 version,
+			  const sa_family_t family,
+			  const unsigned short protocol,
+			  const unsigned short port)
+{
+	int error;
+
+	switch (family) {
+	case AF_INET:
+		return __svc_rpcb_register4(program, version,
+						protocol, port);
+	case AF_INET6:
+		error = __svc_rpcb_register6(program, version,
+						protocol, port);
+		if (error < 0)
+			return error;
+
+		/*
+		 * Work around bug in some versions of Linux rpcbind
+		 * which don't allow registration of both inet and
+		 * inet6 netids.
+		 *
+		 * Error return ignored for now.
+		 */
+		__svc_rpcb_register4(program, version,
+						protocol, port);
+		return 0;
 	}
 
-	return rpcb_v4_register(program, version, sap, netid);
+	return -EAFNOSUPPORT;
 }
-#else
+
+#else	/* CONFIG_SUNRPC_REGISTER_V4 */
+
+/*
+ * Register a kernel RPC service via rpcbind version 2.
+ *
+ * Returns zero on success; a negative errno value is returned
+ * if any error occurs.
+ */
 static int __svc_register(const u32 program, const u32 version,
 			  sa_family_t family,
 			  const unsigned short protocol,
@@ -793,7 +849,8 @@ static int __svc_register(const u32 program, const u32 version,
 
 	return rpcb_register(program, version, protocol, port);
 }
-#endif
+
+#endif /* CONFIG_SUNRPC_REGISTER_V4 */
 
 /**
  * svc_register - register an RPC service with the local portmapper
@@ -817,12 +874,12 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
 			if (progp->pg_vers[i] == NULL)
 				continue;
 
-			dprintk("svc: svc_register(%s, %u, %s, %u, %d)%s\n",
+			dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
 					progp->pg_name,
-					serv->sv_family,
+					i,
 					proto == IPPROTO_UDP?  "udp" : "tcp",
 					port,
-					i,
+					serv->sv_family,
 					progp->pg_vers[i]->vs_hidden?
 						" (but not telling portmap)" : "");
 
-- 
cgit v1.2.3-70-g09d2


From 9d548b9c955c0709d1229d21d0bc14afa6b356de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 15 Sep 2008 16:27:30 -0500
Subject: SUNRPC: Use short-hand IPv6 ANYADDR for RPCB_SET

Clean up: When doing an RPCB_SET, make the kernel's rpcb client use the
shorthand "::" for the universal form of the IPv6 ANY address.

Without this patch, rpcbind will advertise:

  0000:0000:0000:0000:0000:0000:0000:0000.x.y

This is cosmetic only.  It cleans up the display of information from
/sbin/rpcinfo.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/rpcb_clnt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cc7250d4659..0fa1086cf99 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
 #include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
@@ -304,10 +305,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
 	char buf[64];
 
 	/* Construct AF_INET6 universal address */
-	snprintf(buf, sizeof(buf),
-			NIP6_FMT".%u.%u",
-			NIP6(address_to_register->sin6_addr),
-			port >> 8, port & 0xff);
+	if (ipv6_addr_any(&address_to_register->sin6_addr))
+		snprintf(buf, sizeof(buf), "::.%u.%u",
+				port >> 8, port & 0xff);
+	else
+		snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
+				NIP6(address_to_register->sin6_addr),
+				port >> 8, port & 0xff);
 	map->r_addr = buf;
 
 	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
-- 
cgit v1.2.3-70-g09d2


From f6fb3f6f591b50fa4f51962ad06ee0d8782e1bc8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 25 Sep 2008 11:56:57 -0400
Subject: SUNRPC: Fix up svc_unregister()

With the new rpcbind code, a PMAP_UNSET will not have any effect on
services registered via rpcbind v3 or v4.

Implement a version of svc_unregister() that uses an RPCB_UNSET with
an empty netid string to make sure we have cleared *all* entries for
a kernel RPC service when shutting down, or before starting a fresh
instance of the service.

Use the new version only when CONFIG_SUNRPC_REGISTER_V4 is enabled;
otherwise, the legacy PMAP version is used to ensure complete
backwards-compatibility with the Linux portmapper daemon.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svc.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b8d2fcd0f71..54c98d87684 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -896,31 +896,51 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
 	return error;
 }
 
+#ifdef CONFIG_SUNRPC_REGISTER_V4
+
+static void __svc_unregister(const u32 program, const u32 version,
+			     const char *progname)
+{
+	struct sockaddr_in6 sin6 = {
+		.sin6_family		= AF_INET6,
+		.sin6_addr		= IN6ADDR_ANY_INIT,
+		.sin6_port		= 0,
+	};
+	int error;
+
+	error = rpcb_v4_register(program, version,
+				(struct sockaddr *)&sin6, "");
+	dprintk("svc: %s(%sv%u), error %d\n",
+			__func__, progname, version, error);
+}
+
+#else	/* CONFIG_SUNRPC_REGISTER_V4 */
+
+static void __svc_unregister(const u32 program, const u32 version,
+			     const char *progname)
+{
+	int error;
+
+	error = rpcb_register(program, version, 0, 0);
+	dprintk("svc: %s(%sv%u), error %d\n",
+			__func__, progname, version, error);
+}
+
+#endif	/* CONFIG_SUNRPC_REGISTER_V4 */
+
 /*
- * All transport protocols and ports for this service are removed
- * from the local rpcbind database if the service is not hidden.
- *
- * The result of unregistration is reported via dprintk for those
- * who want verification of the result, but is otherwise not
- * important.
+ * All netids, bind addresses and ports registered for [program, version]
+ * are removed from the local rpcbind database (if the service is not
+ * hidden) to make way for a new instance of the service.
  *
- * The local rpcbind daemon listens on either only IPv6 or only
- * IPv4.  The kernel can't tell how it's configured.  However,
- * AF_INET addresses are mapped to AF_INET6 in IPv6-only config-
- * urations, so even an unregistration request on AF_INET will
- * get to a local rpcbind daemon listening only on AF_INET6.  So
- * we always unregister via AF_INET.
- *
- * At this point we don't need rpcbind version 4 for unregis-
- * tration:  A v2 UNSET request will clear all transports (netids),
- * addresses, and address families for [program, version].
+ * The result of unregistration is reported via dprintk for those who want
+ * verification of the result, but is otherwise not important.
  */
 static void svc_unregister(const struct svc_serv *serv)
 {
 	struct svc_program *progp;
 	unsigned long flags;
 	unsigned int i;
-	int error;
 
 	clear_thread_flag(TIF_SIGPENDING);
 
@@ -931,9 +951,7 @@ static void svc_unregister(const struct svc_serv *serv)
 			if (progp->pg_vers[i]->vs_hidden)
 				continue;
 
-			error = rpcb_register(progp->pg_prog, i, 0, 0);
-			dprintk("svc: svc_unregister(%sv%u), error %d\n",
-					progp->pg_name, i, error);
+			__svc_unregister(progp->pg_prog, i, progp->pg_name);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From db820d6376aa81accf5b648651e160fd76e363e2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 25 Sep 2008 11:57:05 -0400
Subject: SUNRPC: Clean up debug messages in rpcb_clnt.c

The RPCB XDR functions are used for multiple procedures.  For instance,
rpcb_encode_getaddr() is used for RPCB_GETADDR, RPCB_SET, and
RPCB_UNSET.  Make the XDR debug messages more generic so they are less
confusing.

And, unlike in other RPC consumers in the kernel, a single debug flag
enables all levels of debug messages in the RPC bind client, including
XDR debug messages.  Since the XDR decoders already report success or
failure in this case, remove redundant debug messages in the mid-level
rpcb_register_call() function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/rpcb_clnt.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0fa1086cf99..34abc91058d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -197,12 +197,8 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
 		return error;
 	}
 
-	if (!result) {
-		dprintk("RPC:       registration failed\n");
+	if (!result)
 		return -EACCES;
-	}
-
-	dprintk("RPC:       registration succeeded\n");
 	return 0;
 }
 
@@ -628,7 +624,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
-	dprintk("RPC:       rpcb_encode_mapping(%u, %u, %d, %u)\n",
+	dprintk("RPC:       encoding rpcb request (%u, %u, %d, %u)\n",
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 	*p++ = htonl(rpcb->r_prog);
 	*p++ = htonl(rpcb->r_vers);
@@ -643,7 +639,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
 			       unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
-	dprintk("RPC:       rpcb_decode_getport result %u\n",
+	dprintk("RPC:       rpcb getport result: %u\n",
 			*portp);
 	return 0;
 }
@@ -652,7 +648,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 			   unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
-	dprintk("RPC:       rpcb_decode_set: call %s\n",
+	dprintk("RPC:       rpcb set/unset call %s\n",
 			(*boolp ? "succeeded" : "failed"));
 	return 0;
 }
@@ -660,7 +656,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
-	dprintk("RPC:       rpcb_encode_getaddr(%u, %u, %s)\n",
+	dprintk("RPC:       encoding rpcb request (%u, %u, %s)\n",
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
 	*p++ = htonl(rpcb->r_prog);
 	*p++ = htonl(rpcb->r_vers);
-- 
cgit v1.2.3-70-g09d2


From d5b337b4877f7c4e1d761434ee04d045b0201e03 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Sun, 28 Sep 2008 09:21:26 +0300
Subject: nfsd: use nfs client rpc callback program

since commit ff7d9756b501744540be65e172d27ee321d86103
"nfsd: use static memory for callback program and stats"
do_probe_callback uses a static callback program
(NFS4_CALLBACK) rather than the one set in clp->cl_callback.cb_prog
as passed in by the client in setclientid (4.0)
or create_session (4.1).

This patches introduces rpc_create_args.prognumber that allows
overriding program->number when creating rpc_clnt.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c      | 1 +
 include/linux/sunrpc/clnt.h | 1 +
 net/sunrpc/clnt.c           | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f7c793a5b80..094747a1227 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -380,6 +380,7 @@ static int do_probe_callback(void *data)
 		.addrsize	= sizeof(addr),
 		.timeout	= &timeparms,
 		.program	= &cb_program,
+		.prognumber	= cb->cb_prog,
 		.version	= nfs_cb_version[1]->number,
 		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8ac8e75243a..6f0ee1b84a4 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -104,6 +104,7 @@ struct rpc_create_args {
 	const struct rpc_timeout *timeout;
 	char			*servername;
 	struct rpc_program	*program;
+	u32			prognumber;	/* overrides program->number */
 	u32			version;
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76739e928d0..da0789fa1b8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_protname = program->name;
-	clnt->cl_prog     = program->number;
+	clnt->cl_prog     = args->prognumber ? : program->number;
 	clnt->cl_vers     = version->number;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
-- 
cgit v1.2.3-70-g09d2


From d01dbeb6af7a0848063033f73c3d146fec7451f3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 30 Sep 2008 02:03:19 -0700
Subject: ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space

We're never supposed to shrink the headroom or tailroom.  In fact,
shrinking the headroom is a fatal action.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ac25b4c0e98..dc50f1e71f7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 		- skb_headroom(skb);
 	int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
 
-	if (nhead > 0 || ntail > 0)
-		return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
-
-	return 0;
+	if (nhead <= 0) {
+		if (ntail <= 0)
+			return 0;
+		nhead = 0;
+	} else if (ntail < 0)
+		ntail = 0;
+
+	return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
 }
 
 static int xfrm_output_one(struct sk_buff *skb, int err)
-- 
cgit v1.2.3-70-g09d2


From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 30 Sep 2008 02:22:14 -0700
Subject: netdev: use const for some name functions

dev_change_name and netdev_drivername should use const char on
parameters that are read-only input values. The strcpy to newname is
not needed since newname is not used later in function.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 net/core/dev.c            | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d675df08b94..9cfd20be8b7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1225,7 +1225,7 @@ extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
 extern int		dev_ethtool(struct net *net, struct ifreq *);
 extern unsigned		dev_get_flags(const struct net_device *);
 extern int		dev_change_flags(struct net_device *, unsigned);
-extern int		dev_change_name(struct net_device *, char *);
+extern int		dev_change_name(struct net_device *, const char *);
 extern int		dev_set_alias(struct net_device *, const char *, size_t);
 extern int		dev_change_net_namespace(struct net_device *,
 						 struct net *, const char *);
@@ -1670,7 +1670,7 @@ extern void dev_seq_stop(struct seq_file *seq, void *v);
 extern int netdev_class_create_file(struct class_attribute *class_attr);
 extern void netdev_class_remove_file(struct class_attribute *class_attr);
 
-extern char *netdev_drivername(struct net_device *dev, char *buffer, int len);
+extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len);
 
 extern void linkwatch_run_queue(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a90737fe247..64f0d5b7cdf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -890,7 +890,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
  *	Change name of a device, can pass format strings "eth%d".
  *	for wildcarding.
  */
-int dev_change_name(struct net_device *dev, char *newname)
+int dev_change_name(struct net_device *dev, const char *newname)
 {
 	char oldname[IFNAMSIZ];
 	int err = 0;
@@ -916,7 +916,6 @@ int dev_change_name(struct net_device *dev, char *newname)
 		err = dev_alloc_name(dev, newname);
 		if (err < 0)
 			return err;
-		strcpy(newname, dev->name);
 	}
 	else if (__dev_get_by_name(net, newname))
 		return -EEXIST;
@@ -4754,10 +4753,10 @@ err_name:
 	return -ENOMEM;
 }
 
-char *netdev_drivername(struct net_device *dev, char *buffer, int len)
+char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 {
-	struct device_driver *driver;
-	struct device *parent;
+	const struct device_driver *driver;
+	const struct device *parent;
 
 	if (len <= 0 || !buffer)
 		return buffer;
-- 
cgit v1.2.3-70-g09d2


From f0db275a81ef184293ca7ef3646fe065b336efb7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 30 Sep 2008 02:23:58 -0700
Subject: netdev: docbook comment update (revised)

Add more docbook comments to network device functions and cleanup
the comments.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 64f0d5b7cdf..2cc258b7acc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -956,6 +956,7 @@ rollback:
  *	dev_set_alias - change ifalias of a device
  *	@dev: device
  *	@alias: name up to IFALIASZ
+ *	@len: limit of bytes to copy from info
  *
  *	Set ifalias for a device,
  */
@@ -3330,6 +3331,12 @@ static void dev_addr_discard(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/**
+ *	dev_get_flags - get flags reported to userspace
+ *	@dev: device
+ *
+ *	Get the combination of flag bits exported through APIs to userspace.
+ */
 unsigned dev_get_flags(const struct net_device *dev)
 {
 	unsigned flags;
@@ -3354,6 +3361,14 @@ unsigned dev_get_flags(const struct net_device *dev)
 	return flags;
 }
 
+/**
+ *	dev_change_flags - change device settings
+ *	@dev: device
+ *	@flags: device state flags
+ *
+ *	Change settings on device based state flags. The flags are
+ *	in the userspace exported format.
+ */
 int dev_change_flags(struct net_device *dev, unsigned flags)
 {
 	int ret, changes;
@@ -3423,6 +3438,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	return ret;
 }
 
+/**
+ *	dev_set_mtu - Change maximum transfer unit
+ *	@dev: device
+ *	@new_mtu: new transfer unit
+ *
+ *	Change the maximum transfer size of the network device.
+ */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
 	int err;
@@ -3447,6 +3469,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	return err;
 }
 
+/**
+ *	dev_set_mac_address - Change Media Access Control Address
+ *	@dev: device
+ *	@sa: new address
+ *
+ *	Change the hardware (MAC) address of the device
+ */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
 	int err;
@@ -4350,7 +4379,12 @@ void free_netdev(struct net_device *dev)
 	put_device(&dev->dev);
 }
 
-/* Synchronize with packet receive processing. */
+/**
+ *	synchronize_net -  Synchronize with packet receive processing
+ *
+ *	Wait for packets currently being received to be done.
+ *	Does not block later packets from starting.
+ */
 void synchronize_net(void)
 {
 	might_sleep();
@@ -4652,7 +4686,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 }
 
 /**
- * netdev_dma_regiser - register the networking subsystem as a DMA client
+ * netdev_dma_register - register the networking subsystem as a DMA client
  */
 static int __init netdev_dma_register(void)
 {
@@ -4753,6 +4787,14 @@ err_name:
 	return -ENOMEM;
 }
 
+/**
+ *	netdev_drivername - network driver for the device
+ *	@dev: network device
+ *	@buffer: buffer for resulting name
+ *	@len: size of buffer
+ *
+ *	Determine network driver for device.
+ */
 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 {
 	const struct device_driver *driver;
-- 
cgit v1.2.3-70-g09d2


From 8980713b97393b21a50d11408a22d2caa87d016a Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Tue, 30 Sep 2008 02:51:18 -0700
Subject: Phonet: Netlink factorization and cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_netlink.c | 91 +++++++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index b1ea19a230d..b1770d66bc8 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -54,11 +54,16 @@ errout:
 		rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
 }
 
-static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr)
+static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
+	[IFA_LOCAL] = { .type = NLA_U8 },
+};
+
+static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
 {
-	struct rtattr **rta = attr;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlm);
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[IFA_MAX+1];
 	struct net_device *dev;
+	struct ifaddrmsg *ifm;
 	int err;
 	u8 pnaddr;
 
@@ -67,52 +72,28 @@ static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr)
 
 	ASSERT_RTNL();
 
-	if (rta[IFA_LOCAL - 1] == NULL)
-		return -EINVAL;
-
-	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
-	if (dev == NULL)
-		return -ENODEV;
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy);
+	if (err < 0)
+		return err;
 
-	if (ifm->ifa_prefixlen > 0)
+	ifm = nlmsg_data(nlh);
+	if (tb[IFA_LOCAL] == NULL)
 		return -EINVAL;
-
-	memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1);
-
-	err = phonet_address_add(dev, pnaddr);
-	if (!err)
-		rtmsg_notify(RTM_NEWADDR, dev, pnaddr);
-	return err;
-}
-
-static int deladdr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr)
-{
-	struct rtattr **rta = attr;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlm);
-	struct net_device *dev;
-	int err;
-	u8 pnaddr;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	ASSERT_RTNL();
-
-	if (rta[IFA_LOCAL - 1] == NULL)
+	pnaddr = nla_get_u8(tb[IFA_LOCAL]);
+	if (pnaddr & 3)
+		/* Phonet addresses only have 6 high-order bits */
 		return -EINVAL;
 
-	dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+	dev = __dev_get_by_index(net, ifm->ifa_index);
 	if (dev == NULL)
 		return -ENODEV;
 
-	if (ifm->ifa_prefixlen > 0)
-		return -EADDRNOTAVAIL;
-
-	memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1);
-
-	err = phonet_address_del(dev, pnaddr);
+	if (nlh->nlmsg_type == RTM_NEWADDR)
+		err = phonet_address_add(dev, pnaddr);
+	else
+		err = phonet_address_del(dev, pnaddr);
 	if (!err)
-		rtmsg_notify(RTM_DELADDR, dev, pnaddr);
+		rtmsg_notify(nlh->nlmsg_type, dev, pnaddr);
 	return err;
 }
 
@@ -121,25 +102,23 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr *nlh;
-	unsigned int orig_len = skb->len;
 
-	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct ifaddrmsg));
-	ifm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_PHONET;
 	ifm->ifa_prefixlen = 0;
 	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_HOST;
+	ifm->ifa_scope = RT_SCOPE_LINK;
 	ifm->ifa_index = dev->ifindex;
-	RTA_PUT(skb, IFA_LOCAL, 1, &addr);
-	nlh->nlmsg_len = skb->len - orig_len;
-
-	return 0;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, orig_len);
+	NLA_PUT_U8(skb, IFA_LOCAL, addr);
+	return nlmsg_end(skb, nlh);
 
-	return -1;
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
 static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
@@ -180,7 +159,7 @@ out:
 
 void __init phonet_netlink_register(void)
 {
-	rtnl_register(PF_PHONET, RTM_NEWADDR, newaddr_doit, NULL);
-	rtnl_register(PF_PHONET, RTM_DELADDR, deladdr_doit, NULL);
+	rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL);
+	rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL);
 	rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
 }
-- 
cgit v1.2.3-70-g09d2


From 8b122efd13a227d35d5ca242561770db1b5e3658 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 30 Sep 2008 03:03:35 -0700
Subject: iucv: Fix mismerge again.

fb65a7c091529bfffb1262515252c0d0f6241c5c ("iucv: Fix bad merging.") fixed
a merge error, but in a wrong way. We now end up with the bug below.
This patch corrects the mismerge like it was intended.

BUG: scheduling while atomic: swapper/1/0x00000000
Modules linked in:
CPU: 1 Not tainted 2.6.27-rc7-00094-gc0f4d6d #9
Process swapper (pid: 1, task: 000000003fe7d988, ksp: 000000003fe838c0)
0000000000000000 000000003fe839b8 0000000000000002 0000000000000000
       000000003fe83a58 000000003fe839d0 000000003fe839d0 0000000000390de6
       000000000058acd8 00000000000000d0 000000003fe7dcd8 0000000000000000
       000000000000000c 000000000000000d 0000000000000000 000000003fe83a28
       000000000039c5b8 0000000000015e5e 000000003fe839b8 000000003fe83a00
Call Trace:
([<0000000000015d6a>] show_trace+0xe6/0x134)
 [<0000000000039656>] __schedule_bug+0xa2/0xa8
 [<0000000000391744>] schedule+0x49c/0x910
 [<0000000000391f64>] schedule_timeout+0xc4/0x114
 [<00000000003910d4>] wait_for_common+0xe8/0x1b4
 [<00000000000549ae>] call_usermodehelper_exec+0xa6/0xec
 [<00000000001af7b8>] kobject_uevent_env+0x418/0x438
 [<00000000001d08fc>] bus_add_driver+0x1e4/0x298
 [<00000000001d1ee4>] driver_register+0x90/0x18c
 [<0000000000566848>] netiucv_init+0x168/0x2c8
 [<00000000000120be>] do_one_initcall+0x3e/0x17c
 [<000000000054a31a>] kernel_init+0x1ce/0x248
 [<000000000001a97a>] kernel_thread_starter+0x6/0xc
 [<000000000001a974>] kernel_thread_starter+0x0/0xc
 iucv: NETIUCV driver initialized
initcall netiucv_init+0x0/0x2c8 returned with preemption imbalance

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 705959b31e2..d7b54b5bfa6 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -524,7 +524,6 @@ static int iucv_enable(void)
 	get_online_cpus();
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
-	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
@@ -547,7 +546,9 @@ out:
  */
 static void iucv_disable(void)
 {
+	get_online_cpus();
 	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
+	put_online_cpus();
 	kfree(iucv_path_table);
 }
 
-- 
cgit v1.2.3-70-g09d2


From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 30 Sep 2008 05:32:24 -0700
Subject: sctp: Fix kernel panic while process protocol violation parameter

Since call to function sctp_sf_abort_violation() need paramter 'arg' with
'struct sctp_chunk' type, it will read the chunk type and chunk length from
the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen()
always with 'struct sctp_paramhdr' type's parameter, it will be passed to
sctp_sf_abort_violation(). This may cause kernel panic.

   sctp_sf_violation_paramlen()
     |-- sctp_sf_abort_violation()
        |-- sctp_make_abort_violation()

This patch fixed this problem. This patch also fix two place which called
sctp_sf_violation_paramlen() with wrong paramter type.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    |  3 +++
 net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++-------------
 net/sctp/sm_statefuns.c  | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 64 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 24811732bdb..029a54a0239 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -227,6 +227,9 @@ struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
 				   const struct sctp_chunk *,
 				   const __u8 *,
 				   const size_t );
+struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *,
+				   const struct sctp_chunk *,
+				   struct sctp_paramhdr *);
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *,
 				  const struct sctp_transport *,
 				  const void *payload,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b599cbba4fb..d68869f966c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1012,6 +1012,29 @@ end:
 	return retval;
 }
 
+struct sctp_chunk *sctp_make_violation_paramlen(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk,
+	struct sctp_paramhdr *param)
+{
+	struct sctp_chunk *retval;
+	static const char error[] = "The following parameter had invalid length:";
+	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+				sizeof(sctp_paramhdr_t);
+
+	retval = sctp_make_abort(asoc, chunk, payload_len);
+	if (!retval)
+		goto nodata;
+
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
+			sizeof(error) + sizeof(sctp_paramhdr_t));
+	sctp_addto_chunk(retval, sizeof(error), error);
+	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+
+nodata:
+	return retval;
+}
+
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 				  const struct sctp_transport *transport,
@@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 					const struct sctp_chunk *chunk,
 					struct sctp_chunk **errp)
 {
-	static const char error[] = "The following parameter had invalid length:";
-	size_t		payload_len = WORD_ROUND(sizeof(error)) +
-						sizeof(sctp_paramhdr_t);
-
-
 	/* This is a fatal error.  Any accumulated non-fatal errors are
 	 * not reported.
 	 */
@@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
 		sctp_chunk_free(*errp);
 
 	/* Create an error chunk and fill it in with our payload. */
-	*errp = sctp_make_op_error_space(asoc, chunk, payload_len);
-
-	if (*errp) {
-		sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION,
-				sizeof(error) + sizeof(sctp_paramhdr_t));
-		sctp_addto_chunk(*errp, sizeof(error), error);
-		sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param);
-	}
+	*errp = sctp_make_violation_paramlen(asoc, chunk, param);
 
 	return 0;
 }
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8848d329aa2..7c622af2ce5 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
+				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_ctsn(
@@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	addr_param = (union sctp_addr_param *)hdr->params;
 	length = ntohs(addr_param->p.length);
 	if (length < sizeof(sctp_paramhdr_t))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
 			   (void *)addr_param, commands);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 			    (sctp_paramhdr_t *)((void *)addr_param + length),
 			    (void *)chunk->chunk_end,
 			    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-						  (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+						  (void *)err_param, commands);
 
 	/* ADDIP 5.2 E1) Compare the value of the serial number to the value
 	 * the endpoint stored in a new association variable
@@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	    (sctp_paramhdr_t *)addip_hdr->params,
 	    (void *)asconf_ack->chunk_end,
 	    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type,
-			   (void *)&err_param, commands);
+		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+			   (void *)err_param, commands);
 
 	if (last_asconf) {
 		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
@@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands) {
-	static const char err_str[] = "The following parameter had invalid length:";
+				     void *arg, void *ext,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk =  arg;
+	struct sctp_paramhdr *param = ext;
+	struct sctp_chunk *abort = NULL;
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
+		goto discard;
+
+	/* Make the abort chunk. */
+	abort = sctp_make_violation_paramlen(asoc, chunk, param);
+	if (!abort)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
+			SCTP_ERROR(ECONNABORTED));
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+			SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+discard:
+	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+
+	return SCTP_DISPOSITION_ABORT;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
 }
 
 /* Handle a protocol violation when the peer trying to advance the
-- 
cgit v1.2.3-70-g09d2


From b4a4bf5d77c7d32098a7080f34a8857dd7fa466d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 26 Sep 2008 13:34:54 +0200
Subject: mac80211: fixups for "make master iface not wireless"

In "mac80211: make master iface not wireless" I accidentally
forgot to include these changes ... leading to the expected
BUG_ON errors.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 9 ++++-----
 net/mac80211/wme.c   | 8 ++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b5cd91e8971..8336fee68d3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -58,8 +58,9 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
 
 static int ieee80211_open(struct net_device *dev)
 {
-	struct ieee80211_sub_if_data *sdata, *nsdata;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *nsdata;
+	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct ieee80211_if_init_conf conf;
 	u32 changed = 0;
@@ -67,8 +68,6 @@ static int ieee80211_open(struct net_device *dev)
 	bool need_hw_reconfig = 0;
 	u8 null_addr[ETH_ALEN] = {0};
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
 	/* fail early if user set an invalid address */
 	if (compare_ether_addr(dev->dev_addr, null_addr) &&
 	    !is_valid_ether_addr(dev->dev_addr))
@@ -512,8 +511,8 @@ static int ieee80211_stop(struct net_device *dev)
 
 static void ieee80211_set_multicast_list(struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
 	int allmulti, promisc, sdata_allmulti, sdata_promisc;
 
 	allmulti = !!(dev->flags & IFF_ALLMULTI);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index c703f8b44e9..139b5f267b3 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -73,9 +73,8 @@ static int wme_downgrade_ac(struct sk_buff *skb)
 
 
 /* Indicate which queue to use.  */
-static u16 classify80211(struct sk_buff *skb, struct net_device *dev)
+static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	if (!ieee80211_is_data(hdr->frame_control)) {
@@ -113,14 +112,15 @@ static u16 classify80211(struct sk_buff *skb, struct net_device *dev)
 
 u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 {
+	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
+	struct ieee80211_local *local = mpriv->local;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct sta_info *sta;
 	u16 queue;
 	u8 tid;
 
-	queue = classify80211(skb, dev);
+	queue = classify80211(local, skb);
 	if (unlikely(queue >= local->hw.queues))
 		queue = local->hw.queues - 1;
 
-- 
cgit v1.2.3-70-g09d2


From d88410a0b657c5ccebd1c120af1f14c5ca6a3d95 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Thu, 25 Sep 2008 20:45:01 +0300
Subject: mac80211: remove wme_tx_queue and wme_rx_queue from
 net/mac80211/sta_info.h

This patch removes wme_tx_queue and wme_rx_queue from struct sta_info
and from the debugfs sub-structure of struct sta_info
in net/mac80211/sta_info.h, as they are useless and not used.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index c3f43696462..a6b51862a89 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,7 +189,6 @@ struct sta_ampdu_mlme {
  * @last_qual: qual of last received frame from this STA
  * @last_noise: noise of last received frame from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
- * @wme_rx_queue: TBD
  * @tx_filtered_count: TBD
  * @tx_retry_failed: TBD
  * @tx_retry_count: TBD
@@ -199,7 +198,6 @@ struct sta_ampdu_mlme {
  * @tx_fragments: number of transmitted MPDUs
  * @last_txrate_idx: Index of the last used transmit rate
  * @tid_seq: TBD
- * @wme_tx_queue: TBD
  * @ampdu_mlme: TBD
  * @timer_to_tid: identity mapping to ID timers
  * @tid_to_tx_q: map tid to tx queue
@@ -258,9 +256,6 @@ struct sta_info {
 	int last_qual;
 	int last_noise;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-	unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES];
-#endif
 
 	/* Updated from TX status path only, no locking requirements */
 	unsigned long tx_filtered_count;
@@ -274,9 +269,6 @@ struct sta_info {
 	unsigned long tx_fragments;
 	unsigned int last_txrate_idx;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-	unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES];
-#endif
 
 	/*
 	 * Aggregation information, locked with lock.
@@ -307,10 +299,6 @@ struct sta_info {
 		struct dentry *num_ps_buf_frames;
 		struct dentry *inactive_ms;
 		struct dentry *last_seq_ctrl;
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
-		struct dentry *wme_rx_queue;
-		struct dentry *wme_tx_queue;
-#endif
 		struct dentry *agg_status;
 	} debugfs;
 #endif
-- 
cgit v1.2.3-70-g09d2


From b0dee5784dff3e2984510a7fe7a7e48109001f94 Mon Sep 17 00:00:00 2001
From: Davide Pesavento <davidepesa@gmail.com>
Date: Sat, 27 Sep 2008 17:29:12 +0200
Subject: Fix modpost failure when rx handlers are not inlined.

When CONFIG_MAC80211_MESH=n and CONFIG_MAC80211_NOINLINE=y,
gcc doesn't optimize out a call to ieee80211_rx_h_mesh_fwding,
even if the previous comparison is always false in this case.
This leads to the following errors during modpost:

ERROR: "mpp_path_lookup" [net/mac80211/mac80211.ko] undefined!
ERROR: "mpp_path_add" [net/mac80211/mac80211.ko] undefined!

Fix by removing the possibility of uninlining
ieee80211_rx_h_mesh_fwding rx handler.

Signed-off-by: Davide Pesavento <davidepesa@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c489865761b..77e7b014872 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1379,7 +1379,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
-static ieee80211_rx_result debug_noinline
+static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr;
-- 
cgit v1.2.3-70-g09d2


From 4dd7972d1204c3851a4092cecd2207e05eb29b09 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev@openvz.org>
Date: Wed, 1 Oct 2008 01:51:39 -0700
Subject: tcp: Fix NULL dereference in tcp_4_send_ack()

Fix NULL dereference in tcp_4_send_ack().

As skb->dev is reset to NULL in tcp_v4_rcv() thus OOPS occurs:

BUG: unable to handle kernel NULL pointer dereference at 00000000000004d0
IP: [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250

Stack:  ffff810005dbb000 ffff810015c8acc0 e77b2c6e5f861600 a01610802e90cb6d
 0a08010100000000 88afffff88afffff 0000000080762be8 0000000115c872e8
 0004122000000000 0000000000000001 ffffffff80762b88 0000000000000020
Call Trace:
 <IRQ>  [<ffffffff80499c33>] tcp_v4_reqsk_send_ack+0x20/0x22
 [<ffffffff8049bce5>] tcp_check_req+0x108/0x14c
 [<ffffffff8047aaf7>] ? rt_intern_hash+0x322/0x33c
 [<ffffffff80499846>] tcp_v4_do_rcv+0x399/0x4ec
 [<ffffffff8045ce4b>] ? skb_checksum+0x4f/0x272
 [<ffffffff80485b74>] ? __inet_lookup_listener+0x14a/0x15c
 [<ffffffff8049babc>] tcp_v4_rcv+0x6a1/0x701
 [<ffffffff8047e739>] ip_local_deliver_finish+0x157/0x24a
 [<ffffffff8047ec9a>] ip_local_deliver+0x72/0x7c
 [<ffffffff8047e5bd>] ip_rcv_finish+0x38d/0x3b2
 [<ffffffff803d3548>] ? scsi_io_completion+0x19d/0x39e
 [<ffffffff8047ebe5>] ip_rcv+0x2a2/0x2e5
 [<ffffffff80462faa>] netif_receive_skb+0x293/0x303
 [<ffffffff80465a9b>] process_backlog+0x80/0xd0
 [<ffffffff802630b4>] ? __rcu_process_callbacks+0x125/0x1b4
 [<ffffffff8046560e>] net_rx_action+0xb9/0x17f
 [<ffffffff80234cc5>] __do_softirq+0xa3/0x164
 [<ffffffff8020c52c>] call_softirq+0x1c/0x28
 <EOI>  [<ffffffff8020de1c>] do_softirq+0x34/0x72
 [<ffffffff80234b8e>] local_bh_enable_ip+0x3f/0x50
 [<ffffffff804d43ca>] _spin_unlock_bh+0x12/0x14
 [<ffffffff804599cd>] release_sock+0xb8/0xc1
 [<ffffffff804a6f9a>] inet_stream_connect+0x146/0x25c
 [<ffffffff80243078>] ? autoremove_wake_function+0x0/0x38
 [<ffffffff8045751f>] sys_connect+0x68/0x8e
 [<ffffffff80291818>] ? fd_install+0x5f/0x68
 [<ffffffff80457784>] ? sock_map_fd+0x55/0x62
 [<ffffffff8020b39b>] system_call_after_swapgs+0x7b/0x80

Code: 41 10 11 d0 83 d0 00 4d 85 ed 89 45 c0 c7 45 c4 08 00 00 00 74 07 41 8b 45 04 89 45 c8 48 8b 43 20 8b 4d b8 48 8d 55 b0 48 89 de <48> 8b 80 d0 04 00 00 48 8b b8 60 01 00 00 e8 20 ae fe ff 65 48
RIP  [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250
 RSP <ffffffff80762b78>
CR2: 00000000000004d0

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1b4fee20fc9..011478e46c4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -618,7 +618,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			];
 	} rep;
 	struct ip_reply_arg arg;
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb->dst->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
-- 
cgit v1.2.3-70-g09d2


From 2a5b82751f73a0bf6f604ce56d34adba6da1b246 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 1 Oct 2008 02:13:16 -0700
Subject: ipv6: NULL pointer dereferrence in tcp_v6_send_ack

The following actions are possible:
tcp_v6_rcv
  skb->dev = NULL;
  tcp_v6_do_rcv
    tcp_v6_hnd_req
      tcp_check_req
        req->rsk_ops->send_ack == tcp_v6_send_ack

So, skb->dev can be NULL in tcp_v6_send_ack. We must obtain namespace
from dst entry.

Thanks to Vitaliy Gusev <vgusev@openvz.org> for initial problem finding
in IPv4 code.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b585c850a89..10e22fd4822 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1050,7 +1050,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb->dst->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
 	__be32 *topt;
-- 
cgit v1.2.3-70-g09d2


From 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Wed, 1 Oct 2008 02:33:12 -0700
Subject: net: add skb_recycle_check() to enable netdriver skb recycling

This patch adds skb_recycle_check(), which can be used by a network
driver after transmitting an skb to check whether this skb can be
recycled as a receive buffer.

skb_recycle_check() checks that the skb is not shared or cloned, and
that it is linear and its head portion large enough (as determined by
the driver) to be recycled as a receive buffer.  If these conditions
are met, it does any necessary reference count dropping and cleans
up the skbuff as if it just came from __alloc_skb().

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a19ea43fea0..720b688c22b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -383,6 +383,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, 1, -1);
 }
 
+extern int skb_recycle_check(struct sk_buff *skb, int skb_size);
+
 extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ca1ccdf1ef7..2c218a0808b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -363,8 +363,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 	}
 }
 
-/* Free everything but the sk_buff shell. */
-static void skb_release_all(struct sk_buff *skb)
+static void skb_release_head_state(struct sk_buff *skb)
 {
 	dst_release(skb->dst);
 #ifdef CONFIG_XFRM
@@ -388,6 +387,12 @@ static void skb_release_all(struct sk_buff *skb)
 	skb->tc_verd = 0;
 #endif
 #endif
+}
+
+/* Free everything but the sk_buff shell. */
+static void skb_release_all(struct sk_buff *skb)
+{
+	skb_release_head_state(skb);
 	skb_release_data(skb);
 }
 
@@ -424,6 +429,38 @@ void kfree_skb(struct sk_buff *skb)
 	__kfree_skb(skb);
 }
 
+int skb_recycle_check(struct sk_buff *skb, int skb_size)
+{
+	struct skb_shared_info *shinfo;
+
+	if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
+		return 0;
+
+	skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
+	if (skb_end_pointer(skb) - skb->head < skb_size)
+		return 0;
+
+	if (skb_shared(skb) || skb_cloned(skb))
+		return 0;
+
+	skb_release_head_state(skb);
+	shinfo = skb_shinfo(skb);
+	atomic_set(&shinfo->dataref, 1);
+	shinfo->nr_frags = 0;
+	shinfo->gso_size = 0;
+	shinfo->gso_segs = 0;
+	shinfo->gso_type = 0;
+	shinfo->ip6_frag_id = 0;
+	shinfo->frag_list = NULL;
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb_reset_tail_pointer(skb);
+	skb->data = skb->head + NET_SKB_PAD;
+
+	return 1;
+}
+EXPORT_SYMBOL(skb_recycle_check);
+
 static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
 	new->tstamp		= old->tstamp;
-- 
cgit v1.2.3-70-g09d2


From 5dc121e9a7a8a3721cefeb07f3559f50fbedc67e Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Wed, 1 Oct 2008 02:37:56 -0700
Subject: XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep

ip6_dst_blackhole_ops.kmem_cachep is not expected to be NULL (i.e. to
be initialized) when dst_alloc() is called from ip6_dst_blackhole().
Otherwise, it results in the following (xfrm_larval_drop is now set to
1 by default):

[   78.697642] Unable to handle kernel paging request for data at address 0x0000004c
[   78.703449] Faulting instruction address: 0xc0097f54
[   78.786896] Oops: Kernel access of bad area, sig: 11 [#1]
[   78.792791] PowerMac
[   78.798383] Modules linked in: btusb usbhid bluetooth b43 mac80211 cfg80211 ehci_hcd ohci_hcd sungem sungem_phy usbcore ssb
[   78.804263] NIP: c0097f54 LR: c0334a28 CTR: c002d430
[   78.809997] REGS: eef19ad0 TRAP: 0300   Not tainted  (2.6.27-rc5)
[   78.815743] MSR: 00001032 <ME,IR,DR>  CR: 22242482  XER: 20000000
[   78.821550] DAR: 0000004c, DSISR: 40000000
[   78.827278] TASK = eef0df40[3035] 'mip6d' THREAD: eef18000
[   78.827408] GPR00: 00001032 eef19b80 eef0df40 00000000 00008020 eef19c30 00000001 00000000
[   78.833249] GPR08: eee5101c c05a5c10 ef9ad500 00000000 24242422 1005787c 00000000 1004f960
[   78.839151] GPR16: 00000000 10024e90 10050040 48030018 0fe44150 00000000 00000000 eef19c30
[   78.845046] GPR24: eef19e44 00000000 eef19bf8 efb37c14 eef19bf8 00008020 00009032 c0596064
[   78.856671] NIP [c0097f54] kmem_cache_alloc+0x20/0x94
[   78.862581] LR [c0334a28] dst_alloc+0x40/0xc4
[   78.868451] Call Trace:
[   78.874252] [eef19b80] [c03c1810] ip6_dst_lookup_tail+0x1c8/0x1dc (unreliable)
[   78.880222] [eef19ba0] [c0334a28] dst_alloc+0x40/0xc4
[   78.886164] [eef19bb0] [c03cd698] ip6_dst_blackhole+0x28/0x1cc
[   78.892090] [eef19be0] [c03d9be8] rawv6_sendmsg+0x75c/0xc88
[   78.897999] [eef19cb0] [c038bca4] inet_sendmsg+0x4c/0x78
[   78.903907] [eef19cd0] [c03207c8] sock_sendmsg+0xac/0xe4
[   78.909734] [eef19db0] [c03209e4] sys_sendmsg+0x1e4/0x2a0
[   78.915540] [eef19f00] [c03220a8] sys_socketcall+0xfc/0x210
[   78.921406] [eef19f40] [c0014b3c] ret_from_syscall+0x0/0x38
[   78.927295] --- Exception: c01 at 0xfe2d730
[   78.927297]     LR = 0xfe2d71c
[   78.939019] Instruction dump:
[   78.944835] 91640018 9144001c 900a0000 4bffff44 9421ffe0 7c0802a6 bf810010 7c9d2378
[   78.950694] 90010024 7fc000a6 57c0045e 7c000124 <83e3004c> 8383005c 2f9f0000 419e0050
[   78.956464] ---[ end trace 05fa1ed7972487a1 ]---

As commented by Benjamin Thery, the bug was introduced by
f2fc6a54585a1be6669613a31fbaba2ecbadcd36, while adding network
namespaces support to ipv6 routes.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9af6115f0f5..63442a1e741 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2688,6 +2688,8 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_kmem_cache;
 
+	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
+
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-- 
cgit v1.2.3-70-g09d2


From 93c8b90f01f0dc73891da4e84b26524b61d29d66 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 1 Oct 2008 02:48:31 -0700
Subject: ipv6: almost identical frag hashing funcs combined
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

$ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c
 --- reassembly.c:ip6qhashfn()
 +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn()
@@ -1,5 +1,5 @@
-static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
-			       struct in6_addr *daddr)
+static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
+			       const struct in6_addr *daddr)
 {
 	u32 a, b, c;

@@ -9,7 +9,7 @@

 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frags.rnd;
+	c += nf_frags.rnd;
 	__jhash_mix(a, b, c);

 	a += (__force u32)saddr->s6_addr32[3];

And codiff xx.o.old xx.o.new:

net/ipv6/netfilter/nf_conntrack_reasm.c:
  ip6qhashfn         | -512
  nf_hashfn          |   +6
  nf_ct_frag6_gather |  +36
 3 functions changed, 42 bytes added, 512 bytes removed, diff: -470
net/ipv6/reassembly.c:
  ip6qhashfn    | -512
  ip6_hashfn    |   +7
  ipv6_frag_rcv |  +89
 3 functions changed, 96 bytes added, 512 bytes removed, diff: -416

net/ipv6/reassembly.c:
  inet6_hash_frag | +510
 1 function changed, 510 bytes added, diff: +510

Total: -376

Compile tested.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h                      |  2 ++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++------------------------------
 net/ipv6/reassembly.c                   | 11 ++++++-----
 3 files changed, 10 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 113028fb8f6..dfa7ae3c560 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -576,6 +576,8 @@ extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
 extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 			 struct group_filter __user *optval,
 			 int __user *optlen);
+extern unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+				    const struct in6_addr *daddr, u32 rnd);
 
 #ifdef CONFIG_PROC_FS
 extern int  ac6_proc_init(struct net *net);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 52d06dd4b81..9967ac7a01a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -27,7 +27,6 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -103,39 +102,12 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 };
 #endif
 
-static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
-			       const struct in6_addr *daddr)
-{
-	u32 a, b, c;
-
-	a = (__force u32)saddr->s6_addr32[0];
-	b = (__force u32)saddr->s6_addr32[1];
-	c = (__force u32)saddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += nf_frags.rnd;
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)saddr->s6_addr32[3];
-	b += (__force u32)daddr->s6_addr32[0];
-	c += (__force u32)daddr->s6_addr32[1];
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)daddr->s6_addr32[2];
-	b += (__force u32)daddr->s6_addr32[3];
-	c += (__force u32)id;
-	__jhash_mix(a, b, c);
-
-	return c & (INETFRAGS_HASHSZ - 1);
-}
-
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
 	const struct nf_ct_frag6_queue *nq;
 
 	nq = container_of(q, struct nf_ct_frag6_queue, q);
-	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
+	return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
 }
 
 static void nf_skb_free(struct sk_buff *skb)
@@ -209,7 +181,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 	arg.dst = dst;
 
 	read_lock_bh(&nf_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
+	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
 
 	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
 	local_bh_enable();
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 89184b576e2..2eeadfa039c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -99,8 +99,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
  * callers should be careful not to use the hash value outside the ipfrag_lock
  * as doing so could race with ipfrag_hash_rnd being recalculated.
  */
-static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
-			       struct in6_addr *daddr)
+unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+			     const struct in6_addr *daddr, u32 rnd)
 {
 	u32 a, b, c;
 
@@ -110,7 +110,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frags.rnd;
+	c += rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -125,13 +125,14 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	return c & (INETFRAGS_HASHSZ - 1);
 }
+EXPORT_SYMBOL_GPL(inet6_hash_frag);
 
 static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
 	struct frag_queue *fq;
 
 	fq = container_of(q, struct frag_queue, q);
-	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
+	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
 }
 
 int ip6_frag_match(struct inet_frag_queue *q, void *a)
@@ -247,7 +248,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	arg.dst = dst;
 
 	read_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
+	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
 	if (q == NULL)
-- 
cgit v1.2.3-70-g09d2


From 0523820482dcb42784572ffd2296c2f08c275a2b Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Wed, 1 Oct 2008 05:17:54 -0700
Subject: af_key: Free dumping state on socket close

Fix a xfrm_{state,policy}_walk leak if pfkey socket is closed while
dumping is on-going.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index d628df97e02..b7f5a1c353e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -73,22 +73,18 @@ static int pfkey_can_dump(struct sock *sk)
 	return 0;
 }
 
-static int pfkey_do_dump(struct pfkey_sock *pfk)
+static void pfkey_terminate_dump(struct pfkey_sock *pfk)
 {
-	int rc;
-
-	rc = pfk->dump.dump(pfk);
-	if (rc == -ENOBUFS)
-		return 0;
-
-	pfk->dump.done(pfk);
-	pfk->dump.dump = NULL;
-	pfk->dump.done = NULL;
-	return rc;
+	if (pfk->dump.dump) {
+		pfk->dump.done(pfk);
+		pfk->dump.dump = NULL;
+		pfk->dump.done = NULL;
+	}
 }
 
 static void pfkey_sock_destruct(struct sock *sk)
 {
+	pfkey_terminate_dump(pfkey_sk(sk));
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -310,6 +306,18 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 	return err;
 }
 
+static int pfkey_do_dump(struct pfkey_sock *pfk)
+{
+	int rc;
+
+	rc = pfk->dump.dump(pfk);
+	if (rc == -ENOBUFS)
+		return 0;
+
+	pfkey_terminate_dump(pfk);
+	return rc;
+}
+
 static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
 {
 	*new = *orig;
-- 
cgit v1.2.3-70-g09d2


From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 1 Oct 2008 07:03:24 -0700
Subject: ipsec: Put dumpers on the dump list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Herbert Xu came up with the idea and the original patch to make
xfrm_state dump list contain also dumpers:

As it is we go to extraordinary lengths to ensure that states
don't go away while dumpers go to sleep.  It's much easier if
we just put the dumpers themselves on the list since they can't
go away while they're going.

I've also changed the order of addition on new states to prevent
a never-ending dump.

Timo Teräs improved the patch to apply cleanly to latest tree,
modified iteration code to be more readable by using a common
struct for entries in the list, implemented the same idea for
xfrm_policy dumping and moved the af_key specific "last" entry
caching to af_key.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h |   2 +-
 include/net/xfrm.h      |  70 ++++++++++++------------------
 net/key/af_key.c        |  38 ++++++++++++++---
 net/xfrm/xfrm_policy.c  | 111 +++++++++++++++++++++++++-----------------------
 net/xfrm/xfrm_state.c   | 109 ++++++++++++++++-------------------------------
 net/xfrm/xfrm_user.c    |   4 +-
 6 files changed, 159 insertions(+), 175 deletions(-)

(limited to 'net')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index cbba7760545..9ff1b54908f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -220,7 +220,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[7];
+	long		args[6];
 };
 
 struct netlink_notify
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 48630b26659..b98d2056f27 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -117,12 +117,21 @@ extern struct mutex xfrm_cfg_mutex;
       metrics. Plus, it will be made via sk->sk_dst_cache. Solved.
  */
 
+struct xfrm_state_walk {
+	struct list_head	all;
+	u8			state;
+	union {
+		u8		dying;
+		u8		proto;
+	};
+	u32			seq;
+};
+
 /* Full description of state of transformer. */
 struct xfrm_state
 {
-	struct list_head	all;
 	union {
-		struct list_head	gclist;
+		struct hlist_node	gclist;
 		struct hlist_node	bydst;
 	};
 	struct hlist_node	bysrc;
@@ -136,12 +145,8 @@ struct xfrm_state
 
 	u32			genid;
 
-	/* Key manger bits */
-	struct {
-		u8		state;
-		u8		dying;
-		u32		seq;
-	} km;
+	/* Key manager bits */
+	struct xfrm_state_walk	km;
 
 	/* Parameters of this state. */
 	struct {
@@ -449,10 +454,20 @@ struct xfrm_tmpl
 
 #define XFRM_MAX_DEPTH		6
 
+struct xfrm_policy_walk_entry {
+	struct list_head	all;
+	u8			dead;
+};
+
+struct xfrm_policy_walk {
+	struct xfrm_policy_walk_entry walk;
+	u8 type;
+	u32 seq;
+};
+
 struct xfrm_policy
 {
 	struct xfrm_policy	*next;
-	struct list_head	bytype;
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
@@ -467,13 +482,12 @@ struct xfrm_policy
 	struct xfrm_lifetime_cfg lft;
 	struct xfrm_lifetime_cur curlft;
 	struct dst_entry       *bundles;
-	u16			family;
+	struct xfrm_policy_walk_entry walk;
 	u8			type;
 	u8			action;
 	u8			flags;
-	u8			dead;
 	u8			xfrm_nr;
-	/* XXX 1 byte hole, try to pack */
+	u16			family;
 	struct xfrm_sec_ctx	*security;
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
 };
@@ -1245,20 +1259,6 @@ struct xfrm6_tunnel {
 	int priority;
 };
 
-struct xfrm_state_walk {
-	struct list_head list;
-	unsigned long genid;
-	struct xfrm_state *state;
-	int count;
-	u8 proto;
-};
-
-struct xfrm_policy_walk {
-	struct xfrm_policy *policy;
-	int count;
-	u8 type, cur_type;
-};
-
 extern void xfrm_init(void);
 extern void xfrm4_init(void);
 extern void xfrm_state_init(void);
@@ -1410,24 +1410,10 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 
 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
 
-static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
-{
-	walk->cur_type = XFRM_POLICY_TYPE_MAIN;
-	walk->type = type;
-	walk->policy = NULL;
-	walk->count = 0;
-}
-
-static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
-{
-	if (walk->policy != NULL) {
-		xfrm_pol_put(walk->policy);
-		walk->policy = NULL;
-	}
-}
-
+extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type);
 extern int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 	int (*func)(struct xfrm_policy *, int, int, void*), void *);
+extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_selector *sel,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b7f5a1c353e..7ae641df70b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -58,6 +58,7 @@ struct pfkey_sock {
 			struct xfrm_policy_walk	policy;
 			struct xfrm_state_walk	state;
 		} u;
+		struct sk_buff	*skb;
 	} dump;
 };
 
@@ -76,6 +77,10 @@ static int pfkey_can_dump(struct sock *sk)
 static void pfkey_terminate_dump(struct pfkey_sock *pfk)
 {
 	if (pfk->dump.dump) {
+		if (pfk->dump.skb) {
+			kfree_skb(pfk->dump.skb);
+			pfk->dump.skb = NULL;
+		}
 		pfk->dump.done(pfk);
 		pfk->dump.dump = NULL;
 		pfk->dump.done = NULL;
@@ -308,12 +313,25 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 
 static int pfkey_do_dump(struct pfkey_sock *pfk)
 {
+	struct sadb_msg *hdr;
 	int rc;
 
 	rc = pfk->dump.dump(pfk);
 	if (rc == -ENOBUFS)
 		return 0;
 
+	if (pfk->dump.skb) {
+		if (!pfkey_can_dump(&pfk->sk))
+			return 0;
+
+		hdr = (struct sadb_msg *) pfk->dump.skb->data;
+		hdr->sadb_msg_seq = 0;
+		hdr->sadb_msg_errno = rc;
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+				&pfk->sk);
+		pfk->dump.skb = NULL;
+	}
+
 	pfkey_terminate_dump(pfk);
 	return rc;
 }
@@ -1744,9 +1762,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
 	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_reserved = 0;
-	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_seq = count + 1;
 	out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
-	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk);
+
+	if (pfk->dump.skb)
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+				&pfk->sk);
+	pfk->dump.skb = out_skb;
+
 	return 0;
 }
 
@@ -2245,7 +2268,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
-	xp->dead = 1;
+	xp->walk.dead = 1;
 	xfrm_policy_destroy(xp);
 	return err;
 }
@@ -2583,9 +2606,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
 	out_hdr->sadb_msg_type = SADB_X_SPDDUMP;
 	out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
 	out_hdr->sadb_msg_errno = 0;
-	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_seq = count + 1;
 	out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
-	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk);
+
+	if (pfk->dump.skb)
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+				&pfk->sk);
+	pfk->dump.skb = out_skb;
+
 	return 0;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ef9ccbc3875..b7ec08025ff 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX];
+static struct list_head xfrm_policy_all;
 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 EXPORT_SYMBOL(xfrm_policy_count);
 
@@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data)
 
 	read_lock(&xp->lock);
 
-	if (xp->dead)
+	if (xp->walk.dead)
 		goto out;
 
 	dir = xfrm_policy_id2dir(xp->index);
@@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
-		INIT_LIST_HEAD(&policy->bytype);
+		INIT_LIST_HEAD(&policy->walk.all);
 		INIT_HLIST_NODE(&policy->bydst);
 		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
@@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
 
 void xfrm_policy_destroy(struct xfrm_policy *policy)
 {
-	BUG_ON(!policy->dead);
+	BUG_ON(!policy->walk.dead);
 
 	BUG_ON(policy->bundles);
 
 	if (del_timer(&policy->timer))
 		BUG();
 
-	write_lock_bh(&xfrm_policy_lock);
-	list_del(&policy->bytype);
-	write_unlock_bh(&xfrm_policy_lock);
-
 	security_xfrm_policy_free(policy->security);
 	kfree(policy);
 }
@@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 	int dead;
 
 	write_lock_bh(&policy->lock);
-	dead = policy->dead;
-	policy->dead = 1;
+	dead = policy->walk.dead;
+	policy->walk.dead = 1;
 	write_unlock_bh(&policy->lock);
 
 	if (unlikely(dead)) {
@@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	if (delpol) {
 		hlist_del(&delpol->bydst);
 		hlist_del(&delpol->byidx);
+		list_del(&delpol->walk.all);
 		xfrm_policy_count[dir]--;
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
@@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
-	list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]);
+	list_add(&policy->walk.all, &xfrm_policy_all);
 	write_unlock_bh(&xfrm_policy_lock);
 
 	if (delpol)
@@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 				}
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
+				list_del(&pol->walk.all);
 				xfrm_policy_count[dir]--;
 			}
 			ret = pol;
@@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 				}
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
+				list_del(&pol->walk.all);
 				xfrm_policy_count[dir]--;
 			}
 			ret = pol;
@@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 					continue;
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
+				list_del(&pol->walk.all);
 				write_unlock_bh(&xfrm_policy_lock);
 
 				xfrm_audit_policy_delete(pol, 1,
@@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 		     int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
-	struct xfrm_policy *old, *pol, *last = NULL;
+	struct xfrm_policy *pol;
+	struct xfrm_policy_walk_entry *x;
 	int error = 0;
 
 	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
 	    walk->type != XFRM_POLICY_TYPE_ANY)
 		return -EINVAL;
 
-	if (walk->policy == NULL && walk->count != 0)
+	if (list_empty(&walk->walk.all) && walk->seq != 0)
 		return 0;
 
-	old = pol = walk->policy;
-	walk->policy = NULL;
-	read_lock_bh(&xfrm_policy_lock);
-
-	for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) {
-		if (walk->type != walk->cur_type &&
-		    walk->type != XFRM_POLICY_TYPE_ANY)
+	write_lock_bh(&xfrm_policy_lock);
+	if (list_empty(&walk->walk.all))
+		x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all);
+	else
+		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
+	list_for_each_entry_from(x, &xfrm_policy_all, all) {
+		if (x->dead)
 			continue;
-
-		if (pol == NULL) {
-			pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type],
-					       struct xfrm_policy, bytype);
-		}
-		list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) {
-			if (pol->dead)
-				continue;
-			if (last) {
-				error = func(last, xfrm_policy_id2dir(last->index),
-					     walk->count, data);
-				if (error) {
-					xfrm_pol_hold(last);
-					walk->policy = last;
-					goto out;
-				}
-			}
-			last = pol;
-			walk->count++;
+		pol = container_of(x, struct xfrm_policy, walk);
+		if (walk->type != XFRM_POLICY_TYPE_ANY &&
+		    walk->type != pol->type)
+			continue;
+		error = func(pol, xfrm_policy_id2dir(pol->index),
+			     walk->seq, data);
+		if (error) {
+			list_move_tail(&walk->walk.all, &x->all);
+			goto out;
 		}
-		pol = NULL;
+		walk->seq++;
 	}
-	if (walk->count == 0) {
+	if (walk->seq == 0) {
 		error = -ENOENT;
 		goto out;
 	}
-	if (last)
-		error = func(last, xfrm_policy_id2dir(last->index), 0, data);
+	list_del_init(&walk->walk.all);
 out:
-	read_unlock_bh(&xfrm_policy_lock);
-	if (old != NULL)
-		xfrm_pol_put(old);
+	write_unlock_bh(&xfrm_policy_lock);
 	return error;
 }
 EXPORT_SYMBOL(xfrm_policy_walk);
 
+void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
+{
+	INIT_LIST_HEAD(&walk->walk.all);
+	walk->walk.dead = 1;
+	walk->type = type;
+	walk->seq = 0;
+}
+EXPORT_SYMBOL(xfrm_policy_walk_init);
+
+void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
+{
+	if (list_empty(&walk->walk.all))
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+	list_del(&walk->walk.all);
+	write_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_policy_walk_done);
+
 /*
  * Find policy to apply to this flow.
  *
@@ -1077,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
 						     pol->family, dir);
 
-	list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]);
+	list_add(&pol->walk.all, &xfrm_policy_all);
 	hlist_add_head(&pol->bydst, chain);
 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
 	xfrm_policy_count[dir]++;
@@ -1095,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 
 	hlist_del(&pol->bydst);
 	hlist_del(&pol->byidx);
+	list_del(&pol->walk.all);
 	xfrm_policy_count[dir]--;
 
 	return pol;
@@ -1720,7 +1729,7 @@ restart:
 
 		for (pi = 0; pi < npols; pi++) {
 			read_lock_bh(&pols[pi]->lock);
-			pol_dead |= pols[pi]->dead;
+			pol_dead |= pols[pi]->walk.dead;
 			read_unlock_bh(&pols[pi]->lock);
 		}
 
@@ -2415,9 +2424,7 @@ static void __init xfrm_policy_init(void)
 			panic("XFRM: failed to allocate bydst hash\n");
 	}
 
-	for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++)
-		INIT_LIST_HEAD(&xfrm_policy_bytype[dir]);
-
+	INIT_LIST_HEAD(&xfrm_policy_all);
 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
 	register_netdevice_notifier(&xfrm_dev_notifier);
 }
@@ -2601,7 +2608,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 	int i, j, n = 0;
 
 	write_lock_bh(&pol->lock);
-	if (unlikely(pol->dead)) {
+	if (unlikely(pol->walk.dead)) {
 		/* target policy has been deleted */
 		write_unlock_bh(&pol->lock);
 		return -ENOENT;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 053970e8765..747fd8c291a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-/* Counter indicating ongoing walk, protected by xfrm_state_lock. */
-static unsigned long xfrm_state_walk_ongoing;
-/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
-static unsigned long xfrm_state_walk_completed;
-
-/* List of outstanding state walks used to set the completed counter.  */
-static LIST_HEAD(xfrm_state_walks);
-
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static LIST_HEAD(xfrm_state_gc_leftovers);
-static LIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -412,23 +403,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *data)
 {
-	struct xfrm_state *x, *tmp;
-	unsigned long completed;
+	struct xfrm_state *x;
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
-	mutex_lock(&xfrm_cfg_mutex);
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers);
+	hlist_move_list(&xfrm_state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	completed = xfrm_state_walk_completed;
-	mutex_unlock(&xfrm_cfg_mutex);
-
-	list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
-		if ((long)(x->lastused - completed) > 0)
-			break;
-		list_del(&x->gclist);
+	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
-	}
 
 	wake_up(&km_waitq);
 }
@@ -529,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 	if (x) {
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
-		INIT_LIST_HEAD(&x->all);
+		INIT_LIST_HEAD(&x->km.all);
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
@@ -556,7 +540,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_add_tail(&x->gclist, &xfrm_state_gc_list);
+	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -569,8 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
-		x->lastused = xfrm_state_walk_ongoing;
-		list_del_rcu(&x->all);
+		list_del(&x->km.all);
 		hlist_del(&x->bydst);
 		hlist_del(&x->bysrc);
 		if (x->id.spi)
@@ -871,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add_tail(&x->all, &xfrm_state_all);
+			list_add(&x->km.all, &xfrm_state_all);
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -940,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	x->genid = ++xfrm_state_genid;
 
-	list_add_tail(&x->all, &xfrm_state_all);
+	list_add(&x->km.all, &xfrm_state_all);
 
 	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
@@ -1069,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
-		list_add_tail(&x->all, &xfrm_state_all);
+		list_add(&x->km.all, &xfrm_state_all);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1566,79 +1549,59 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*),
 		    void *data)
 {
-	struct xfrm_state *old, *x, *last = NULL;
+	struct xfrm_state *state;
+	struct xfrm_state_walk *x;
 	int err = 0;
 
-	if (walk->state == NULL && walk->count != 0)
+	if (walk->seq != 0 && list_empty(&walk->all))
 		return 0;
 
-	old = x = walk->state;
-	walk->state = NULL;
 	spin_lock_bh(&xfrm_state_lock);
-	if (x == NULL)
-		x = list_first_entry(&xfrm_state_all, struct xfrm_state, all);
+	if (list_empty(&walk->all))
+		x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all);
+	else
+		x = list_entry(&walk->all, struct xfrm_state_walk, all);
 	list_for_each_entry_from(x, &xfrm_state_all, all) {
-		if (x->km.state == XFRM_STATE_DEAD)
+		if (x->state == XFRM_STATE_DEAD)
 			continue;
-		if (!xfrm_id_proto_match(x->id.proto, walk->proto))
+		state = container_of(x, struct xfrm_state, km);
+		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
 			continue;
-		if (last) {
-			err = func(last, walk->count, data);
-			if (err) {
-				xfrm_state_hold(last);
-				walk->state = last;
-				goto out;
-			}
+		err = func(state, walk->seq, data);
+		if (err) {
+			list_move_tail(&walk->all, &x->all);
+			goto out;
 		}
-		last = x;
-		walk->count++;
+		walk->seq++;
 	}
-	if (walk->count == 0) {
+	if (walk->seq == 0) {
 		err = -ENOENT;
 		goto out;
 	}
-	if (last)
-		err = func(last, 0, data);
+	list_del_init(&walk->all);
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	if (old != NULL)
-		xfrm_state_put(old);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
 {
+	INIT_LIST_HEAD(&walk->all);
 	walk->proto = proto;
-	walk->state = NULL;
-	walk->count = 0;
-	list_add_tail(&walk->list, &xfrm_state_walks);
-	walk->genid = ++xfrm_state_walk_ongoing;
+	walk->state = XFRM_STATE_DEAD;
+	walk->seq = 0;
 }
 EXPORT_SYMBOL(xfrm_state_walk_init);
 
 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
 {
-	struct list_head *prev;
-
-	if (walk->state != NULL) {
-		xfrm_state_put(walk->state);
-		walk->state = NULL;
-	}
-
-	prev = walk->list.prev;
-	list_del(&walk->list);
-
-	if (prev != &xfrm_state_walks) {
-		list_entry(prev, struct xfrm_state_walk, list)->genid =
-			walk->genid;
+	if (list_empty(&walk->all))
 		return;
-	}
-
-	xfrm_state_walk_completed = walk->genid;
 
-	if (!list_empty(&xfrm_state_gc_leftovers))
-		schedule_work(&xfrm_state_gc_work);
+	spin_lock_bh(&xfrm_state_lock);
+	list_del(&walk->all);
+	spin_lock_bh(&xfrm_state_lock);
 }
 EXPORT_SYMBOL(xfrm_state_walk_done);
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 04c41504f84..76f75df21e1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1102,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	return xp;
  error:
 	*errp = err;
-	xp->dead = 1;
+	xp->walk.dead = 1;
 	xfrm_policy_destroy(xp);
 	return NULL;
 }
@@ -1595,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -ENOENT;
 
 	read_lock(&xp->lock);
-	if (xp->dead) {
+	if (xp->walk.dead) {
 		read_unlock(&xp->lock);
 		goto out;
 	}
-- 
cgit v1.2.3-70-g09d2


From 4edd87ad5cad8e159e0db3ce3131b3d97219c9cd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 1 Oct 2008 07:09:38 -0700
Subject: net: BUG instead of corrupting memory in pskb_expand_head

If the caller of pskb_expand_head specifies a negative nhead
we'll silently overwrite other people's memory.  This patch
makes it BUG instead.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2c218a0808b..8bd248a6487 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -738,6 +738,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 #endif
 	long off;
 
+	BUG_ON(nhead < 0);
+
 	if (skb_shared(skb))
 		BUG();
 
-- 
cgit v1.2.3-70-g09d2


From a210d01ae3ee006b59e54e772a7f212486e0f021 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 1 Oct 2008 07:28:28 -0700
Subject: ipv4: Loosen source address check on IPv4 output

ip_route_output() contains a check to make sure that no flows with
non-local source IP addresses are routed. This obviously makes using
such addresses impossible.

This patch introduces a flowi flag which makes omitting this check
possible. The new flag provides a way of handling transparent and
non-transparent connections differently.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h |  2 ++
 net/ipv4/route.c   | 20 +++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 228b2477cee..b45a5e4fcad 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -47,6 +47,8 @@ struct flowi {
 #define fl4_scope	nl_u.ip4_u.scope
 
 	__u8	proto;
+	__u8	flags;
+#define FLOWI_FLAG_ANYSRC 0x01
 	union {
 		struct {
 			__be16	sport;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f62187bb6d0..a6d7c584f53 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2361,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 		    ipv4_is_zeronet(oldflp->fl4_src))
 			goto out;
 
-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-		dev_out = ip_dev_find(net, oldflp->fl4_src);
-		if (dev_out == NULL)
-			goto out;
-
 		/* I removed check for oif == dev_out->oif here.
 		   It was wrong for two reasons:
 		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
@@ -2377,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 		if (oldflp->oif == 0
 		    && (ipv4_is_multicast(oldflp->fl4_dst) ||
 			oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
+			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(net, oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
+
 			/* Special hack: user can direct multicasts
 			   and limited broadcast via necessary interface
 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2395,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 			fl.oif = dev_out->ifindex;
 			goto make_route;
 		}
-		if (dev_out)
+
+		if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
+			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+			dev_out = ip_dev_find(net, oldflp->fl4_src);
+			if (dev_out == NULL)
+				goto out;
 			dev_put(dev_out);
-		dev_out = NULL;
+			dev_out = NULL;
+		}
 	}
 
 
-- 
cgit v1.2.3-70-g09d2


From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:30:02 -0700
Subject: ipv4: Implement IP_TRANSPARENT socket option

This patch introduces the IP_TRANSPARENT socket option: enabling that
will make the IPv4 routing omit the non-local source address check on
output. Setting IP_TRANSPARENT requires NET_ADMIN capability.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h               |  1 +
 include/net/inet_sock.h          |  3 ++-
 include/net/inet_timewait_sock.h |  3 ++-
 net/ipv4/inet_timewait_sock.c    |  1 +
 net/ipv4/ip_sockglue.c           | 15 ++++++++++++++-
 5 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/in.h b/include/linux/in.h
index 4065313cd7e..db458beef19 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -75,6 +75,7 @@ struct in_addr {
 #define IP_IPSEC_POLICY	16
 #define IP_XFRM_POLICY	17
 #define IP_PASSSEC	18
+#define IP_TRANSPARENT	19
 
 /* BSD compatibility */
 #define IP_RECVRETOPTS	IP_RETOPTS
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 643e26be058..e97b66e2a9d 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -129,7 +129,8 @@ struct inet_sock {
 				is_icsk:1,
 				freebind:1,
 				hdrincl:1,
-				mc_loop:1;
+				mc_loop:1,
+				transparent:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 91324908fcc..80e4977631b 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -128,7 +128,8 @@ struct inet_timewait_sock {
 	__be16			tw_dport;
 	__u16			tw_num;
 	/* And these are ours. */
-	__u8			tw_ipv6only:1;
+	__u8			tw_ipv6only:1,
+				tw_transparent:1;
 	/* 15 bits hole, try to pack */
 	__u16			tw_ipv6_offset;
 	unsigned long		tw_ttd;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 743f011b9a8..1c5fd38f882 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_reuse	    = sk->sk_reuse;
 		tw->tw_hash	    = sk->sk_hash;
 		tw->tw_ipv6only	    = 0;
+		tw->tw_transparent  = inet->transparent;
 		tw->tw_prot	    = sk->sk_prot_creator;
 		twsk_net_set(tw, hold_net(sock_net(sk)));
 		atomic_set(&tw->tw_refcnt, 1);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 105d92a039b..465abf0a986 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			     (1<<IP_PASSSEC))) ||
+			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_LOOP) {
 		if (optlen >= sizeof(int)) {
@@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		err = xfrm_user_policy(sk, optname, optval, optlen);
 		break;
 
+	case IP_TRANSPARENT:
+		if (!capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
+			break;
+		}
+		if (optlen < 1)
+			goto e_inval;
+		inet->transparent = !!val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_FREEBIND:
 		val = inet->freebind;
 		break;
+	case IP_TRANSPARENT:
+		val = inet->transparent;
+		break;
 	default:
 		release_sock(sk);
 		return -ENOPROTOOPT;
-- 
cgit v1.2.3-70-g09d2


From b9fb15067ce93497bef852c05e406d7a96212a9a Mon Sep 17 00:00:00 2001
From: Tóth László Attila <panther@balabit.hu>
Date: Wed, 1 Oct 2008 07:31:24 -0700
Subject: ipv4: Allow binding to non-local addresses if IP_TRANSPARENT is set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting IP_TRANSPARENT is not really useful without allowing non-local
binds for the socket. To make user-space code simpler we allow these
binds even if IP_TRANSPARENT is set but IP_FREEBIND is not.

Signed-off-by: Tóth László Attila <panther@balabit.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8a3ac1fa71a..1fbff5fa424 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -469,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	 */
 	err = -EADDRNOTAVAIL;
 	if (!sysctl_ip_nonlocal_bind &&
-	    !inet->freebind &&
+	    !(inet->freebind || inet->transparent) &&
 	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
 	    chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST &&
-- 
cgit v1.2.3-70-g09d2


From 1668e010cbe1a7567c81d4c02d31dde9859e9da1 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:33:10 -0700
Subject: ipv4: Make inet_sock.h independent of route.h

inet_iif() in inet_sock.h requires route.h. Since users of inet_iif()
usually require other route.h functionality anyway this patch moves
inet_iif() to route.h.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h            | 7 -------
 include/net/ip_vs.h                | 1 +
 include/net/route.h                | 5 +++++
 net/ipv4/netfilter/nf_nat_helper.c | 1 +
 net/ipv6/af_inet6.c                | 1 +
 5 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index e97b66e2a9d..139b78b4dfe 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -24,7 +24,6 @@
 #include <net/flow.h>
 #include <net/sock.h>
 #include <net/request_sock.h>
-#include <net/route.h>
 #include <net/netns/hash.h>
 
 /** struct ip_options - IP Options
@@ -195,12 +194,6 @@ static inline int inet_sk_ehashfn(const struct sock *sk)
 	return inet_ehashfn(net, laddr, lport, faddr, fport);
 }
 
-
-static inline int inet_iif(const struct sk_buff *skb)
-{
-	return skb->rtable->rt_iif;
-}
-
 static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
 {
 	struct request_sock *req = reqsk_alloc(ops);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 33e2ac6ceb3..0b2071d9326 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -22,6 +22,7 @@
 
 #include <net/checksum.h>
 #include <linux/netfilter.h>		/* for union nf_inet_addr */
+#include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
 #include <net/ipv6.h>			/* for ipv6_addr_copy */
 
diff --git a/include/net/route.h b/include/net/route.h
index 4f0d8c14736..31d1485b624 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -204,4 +204,9 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
 	return rt->peer;
 }
 
+static inline int inet_iif(const struct sk_buff *skb)
+{
+	return skb->rtable->rt_iif;
+}
+
 #endif	/* _ROUTE_H */
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 11976ea2988..112dcfa1290 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -16,6 +16,7 @@
 #include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <net/route.h>
 
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/nf_conntrack.h>
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 95055f8c3f3..f018704ecb8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -50,6 +50,7 @@
 #include <net/ipip.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
+#include <net/route.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
-- 
cgit v1.2.3-70-g09d2


From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:41:00 -0700
Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency

The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to
incoming packets. The non-local source address check on output bites
us again, as replies for transparently redirected traffic won't have a
chance to leave the node.

This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the
route lookup for those replies. Transparent replies are enabled if the
listening socket has the transparent socket flag set.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  8 +++++++-
 include/net/ip.h        |  3 +++
 net/ipv4/tcp_ipv4.c     | 12 +++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 139b78b4dfe..dced3f64f97 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -72,7 +72,8 @@ struct inet_request_sock {
 				sack_ok	   : 1,
 				wscale_ok  : 1,
 				ecn_ok	   : 1,
-				acked	   : 1;
+				acked	   : 1,
+				no_srccheck: 1;
 	struct ip_options	*opt;
 };
 
@@ -204,4 +205,9 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
 	return req;
 }
 
+static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
+{
+	return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 250e6ef025a..90b27f634b7 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -140,12 +140,15 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf)
 
 struct ip_reply_arg {
 	struct kvec iov[1];   
+	int	    flags;
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
 }; 
 
+#define IP_REPLY_ARG_NOSRCCHECK 1
+
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
 		   unsigned int len); 
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d13688e3558..8b24bd833cb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -591,6 +591,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 				      ip_hdr(skb)->saddr, /* XXX */
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
 	net = dev_net(skb->dst->dev);
 	ip_send_reply(net->ipv4.tcp_sock, skb,
@@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts, int oif,
-			    struct tcp_md5sig_key *key)
+			    struct tcp_md5sig_key *key,
+			    int reply_flags)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 	struct {
@@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
+	arg.flags = reply_flags;
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
-			tcp_twsk_md5_key(tcptw)
+			tcp_twsk_md5_key(tcptw),
+			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 			);
 
 	inet_twsk_put(tw);
@@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent,
 			0,
-			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr));
+			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 }
 
 /*
@@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
-- 
cgit v1.2.3-70-g09d2


From 86b08d867d7de001ab224180ed7865fab93fd56e Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:44:42 -0700
Subject: ipv4: Make Netfilter's ip_route_me_harder() non-local address
 compatible

Netfilter's ip_route_me_harder() tries to re-route packets either
generated or re-routed by Netfilter. This patch changes
ip_route_me_harder() to handle packets from non-locally-bound sockets
with IP_TRANSPARENT set as local and to set the appropriate flowi
flags when re-doing the routing lookup.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h                | 6 ++++++
 net/ipv4/inet_connection_sock.c | 1 +
 net/ipv4/ip_output.c            | 4 +++-
 net/ipv4/netfilter.c            | 3 +++
 net/ipv4/syncookies.c           | 2 ++
 5 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 90b27f634b7..d678ea3d474 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -29,6 +29,7 @@
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/flow.h>
 
 struct sock;
 
@@ -149,6 +150,11 @@ struct ip_reply_arg {
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
+static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
+{
+	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
+}
+
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
 		   unsigned int len); 
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0c1ae68ee84..432c570c9f5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -335,6 +335,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 					.saddr = ireq->loc_addr,
 					.tos = RT_CONN_FLAGS(sk) } },
 			    .proto = sk->sk_protocol,
+			    .flags = inet_sk_flowi_flags(sk),
 			    .uli_u = { .ports =
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d533a89e08d..d2a8f8bb78a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 							.saddr = inet->saddr,
 							.tos = RT_CONN_FLAGS(sk) } },
 					    .proto = sk->sk_protocol,
+					    .flags = inet_sk_flowi_flags(sk),
 					    .uli_u = { .ports =
 						       { .sport = inet->sport,
 							 .dport = inet->dport } } };
@@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 				    .uli_u = { .ports =
 					       { .sport = tcp_hdr(skb)->dest,
 						 .dport = tcp_hdr(skb)->source } },
-				    .proto = sk->sk_protocol };
+				    .proto = sk->sk_protocol,
+				    .flags = ip_reply_arg_flowi_flags(arg) };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(sock_net(sk), &rt, &fl))
 			return;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f8edacdf991..01671ad51ed 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -20,6 +20,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	unsigned int type;
 
 	type = inet_addr_type(&init_net, iph->saddr);
+	if (skb->sk && inet_sk(skb->sk)->transparent)
+		type = RTN_LOCAL;
 	if (addr_type == RTN_UNSPEC)
 		addr_type = type;
 
@@ -33,6 +35,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
 		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 		fl.mark = skb->mark;
+		fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
 		if (ip_route_output_key(&init_net, &rt, &fl) != 0)
 			return -1;
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9d38005abba..929302b2ba9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -16,6 +16,7 @@
 #include <linux/cryptohash.h>
 #include <linux/kernel.h>
 #include <net/tcp.h>
+#include <net/route.h>
 
 /* Timestamps: lowest 9 bits store TCP options */
 #define TSBITS 9
@@ -337,6 +338,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 						.saddr = ireq->loc_addr,
 						.tos = RT_CONN_FLAGS(sk) } },
 				    .proto = IPPROTO_TCP,
+				    .flags = inet_sk_flowi_flags(sk),
 				    .uli_u = { .ports =
 					       { .sport = th->dest,
 						 .dport = th->source } } };
-- 
cgit v1.2.3-70-g09d2


From a3116ac5c216fc3c145906a46df9ce542ff7dcf2 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:46:49 -0700
Subject: tcp: Port redirection support for TCP

Current TCP code relies on the local port of the listening socket
being the same as the destination address of the incoming
connection. Port redirection used by many transparent proxying
techniques obviously breaks this, so we have to store the original
destination port address.

This patch extends struct inet_request_sock and stores the incoming
destination port value there. It also modifies the handshake code to
use that value as the source port when sending reply packets.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h         | 2 +-
 include/net/tcp.h               | 1 +
 net/ipv4/inet_connection_sock.c | 2 ++
 net/ipv4/syncookies.c           | 1 +
 net/ipv4/tcp_output.c           | 2 +-
 5 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index dced3f64f97..de0ecc71cf0 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -61,8 +61,8 @@ struct inet_request_sock {
 	struct request_sock	req;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	u16			inet6_rsk_offset;
-	/* 2 bytes hole, try to pack */
 #endif
+	__be16			loc_port;
 	__be32			loc_addr;
 	__be32			rmt_addr;
 	__be16			rmt_port;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 12c9b4fec04..f6cc3414315 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -976,6 +976,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
 	ireq->rmt_port = tcp_hdr(skb)->source;
+	ireq->loc_port = tcp_hdr(skb)->dest;
 }
 
 extern void tcp_enter_memory_pressure(struct sock *sk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 432c570c9f5..21fcc5a9045 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -516,6 +516,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->dport = inet_rsk(req)->rmt_port;
+		inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port);
+		inet_sk(newsk)->sport = inet_rsk(req)->loc_port;
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 929302b2ba9..d346c22aa6a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -297,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
+	ireq->loc_port		= th->dest;
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a8499ef3234..493553c71d3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2275,7 +2275,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->syn = 1;
 	th->ack = 1;
 	TCP_ECN_make_synack(req, th);
-	th->source = inet_sk(sk)->sport;
+	th->source = ireq->loc_port;
 	th->dest = ireq->rmt_port;
 	/* Setting of flags are superfluous here for callers (and ECE is
 	 * not even correctly set)
-- 
cgit v1.2.3-70-g09d2


From bcd41303f422015ab662c9276d108414aa75b796 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:48:10 -0700
Subject: udp: Export UDP socket lookup function

The iptables tproxy code has to be able to do UDP socket hash lookups,
so we have to provide an exported lookup function for this purpose.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 4 ++++
 net/ipv4/udp.c    | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/include/net/udp.h b/include/net/udp.h
index addcdc67234..d38f6f2419f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -148,6 +148,10 @@ extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int optlen,
 				   int (*push_pending_frames)(struct sock *));
 
+extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+				    __be32 daddr, __be16 dport,
+				    int dif);
+
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 
 /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 57e26fa6618..c83d0ef469c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -302,6 +302,13 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	return result;
 }
 
+struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+			     __be32 daddr, __be16 dport, int dif)
+{
+	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash);
+}
+EXPORT_SYMBOL_GPL(udp4_lib_lookup);
+
 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
 					     __be16 loc_port, __be32 loc_addr,
 					     __be16 rmt_port, __be32 rmt_addr,
-- 
cgit v1.2.3-70-g09d2


From 2cd9b822bfa79fc1335d3e71a0449f3cd0b5078e Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 19 Jun 2008 17:59:13 -0400
Subject: sctp: Only mark chunks as missing when there are gaps

Frist small step in optimizing SACK processing.   Do not call
sctp_mark_missing() when there are no gaps reported and thus
not missing chunks.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4328ad5439c..ef5ea7423cc 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1129,12 +1129,13 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	unsigned outstanding;
 	struct sctp_transport *primary = asoc->peer.primary_path;
 	int count_of_newacks = 0;
+	int gap_ack_blocks;
 
 	/* Grab the association's destination address list. */
 	transport_list = &asoc->peer.transport_addr_list;
 
 	sack_ctsn = ntohl(sack->cum_tsn_ack);
-
+	gap_ack_blocks = ntohs(sack->num_gap_ack_blocks);
 	/*
 	 * SFR-CACC algorithm:
 	 * On receipt of a SACK the sender SHOULD execute the
@@ -1161,7 +1162,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 * A) Initialize the cacc_saw_newack to 0 for all destination
 	 * addresses.
 	 */
-	if (sack->num_gap_ack_blocks &&
+	if (gap_ack_blocks &&
 	    primary->cacc.changeover_active) {
 		list_for_each_entry(transport, transport_list, transports) {
 			transport->cacc.cacc_saw_newack = 0;
@@ -1170,9 +1171,8 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 
 	/* Get the highest TSN in the sack. */
 	highest_tsn = sack_ctsn;
-	if (sack->num_gap_ack_blocks)
-		highest_tsn +=
-		    ntohs(frags[ntohs(sack->num_gap_ack_blocks) - 1].gab.end);
+	if (gap_ack_blocks)
+		highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
 
 	if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
 		highest_new_tsn = highest_tsn;
@@ -1181,11 +1181,11 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 		highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
 	}
 
+
 	/* Run through the retransmit queue.  Credit bytes received
 	 * and free those chunks that we can.
 	 */
 	sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
-	sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0);
 
 	/* Run through the transmitted queue.
 	 * Credit bytes received and free those chunks which we can.
@@ -1204,9 +1204,12 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 			count_of_newacks ++;
 	}
 
-	list_for_each_entry(transport, transport_list, transports) {
-		sctp_mark_missing(q, &transport->transmitted, transport,
-				  highest_new_tsn, count_of_newacks);
+	if (gap_ack_blocks) {
+		sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0);
+
+		list_for_each_entry(transport, transport_list, transports)
+			sctp_mark_missing(q, &transport->transmitted, transport,
+					  highest_new_tsn, count_of_newacks);
 	}
 
 	/* Move the Cumulative TSN Ack Point if appropriate.  */
-- 
cgit v1.2.3-70-g09d2


From ab5216a5bd453752f04bb79c29e8f01b11d69006 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 19 Jun 2008 18:17:24 -0400
Subject: sctp: Optimize SFR-CACC transport list walking during SACK processing

There is a possibility of walking the transport list twice during
SACK processing when doing SFR-CACC algorithm.  We can restructure
the code to only do this once.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index ef5ea7423cc..c8de4da57f3 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1145,27 +1145,31 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be
 	 * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for
 	 * all destinations.
-	 */
-	if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) {
-		primary->cacc.changeover_active = 0;
-		list_for_each_entry(transport, transport_list,
-				transports) {
-			transport->cacc.cycling_changeover = 0;
-		}
-	}
-
-	/*
-	 * SFR-CACC algorithm:
 	 * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE
 	 * is set the receiver of the SACK MUST take the following actions:
 	 *
 	 * A) Initialize the cacc_saw_newack to 0 for all destination
 	 * addresses.
+	 *
+	 * Only bother if changeover_active is set. Otherwise, this is
+	 * totally suboptimal to do on every SACK.
 	 */
-	if (gap_ack_blocks &&
-	    primary->cacc.changeover_active) {
-		list_for_each_entry(transport, transport_list, transports) {
-			transport->cacc.cacc_saw_newack = 0;
+	if (primary->cacc.changeover_active) {
+		u8 clear_cycling = 0;
+
+		if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) {
+			primary->cacc.changeover_active = 0;
+			clear_cycling = 1;
+		}
+
+		if (clear_cycling || gap_ack_blocks) {
+			list_for_each_entry(transport, transport_list,
+					transports) {
+				if (clear_cycling)
+					transport->cacc.cycling_changeover = 0;
+				if (gap_ack_blocks)
+					transport->cacc.cacc_saw_newack = 0;
+			}
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 845b8eda4d783a7ce2670d482a716840a650389e Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 23 Jun 2008 15:26:20 -0400
Subject: sctp: Retransmit list is ineligable for missing indications

Chunks placed on the retransmit list are marked as inelegible
for fast retrasnmission.   Since missing indications determine
when fast reransmission is done, there is not point in calling
sctp_mark_missing() on the retransmit list since those chunks
will not be marked.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c8de4da57f3..da8d846301c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1209,8 +1209,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	}
 
 	if (gap_ack_blocks) {
-		sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0);
-
 		list_for_each_entry(transport, transport_list, transports)
 			sctp_mark_missing(q, &transport->transmitted, transport,
 					  highest_new_tsn, count_of_newacks);
-- 
cgit v1.2.3-70-g09d2


From c226ef9b83694311327f3ab0036c6de9c22e9daf Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 25 Jul 2008 12:44:09 -0400
Subject: sctp: reduce memory footprint of sctp_chunk structure

sctp_chunks should be put on a diet.  This is some of the low hanging
fruit that we can strip out.  Changes all the __s8/__u8 flags to
bitfields.  Saves 12 bytes per chunk.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 31 +++++++++++++++++--------------
 net/sctp/output.c          |  2 +-
 net/sctp/outqueue.c        | 14 +++++++-------
 net/sctp/sm_make_chunk.c   |  2 +-
 4 files changed, 26 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ab1c472ea75..0dc1b7267c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -731,20 +731,23 @@ struct sctp_chunk {
 	 */
 	struct sk_buff *auth_chunk;
 
-	__u8 rtt_in_progress;	/* Is this chunk used for RTT calculation? */
-	__u8 resent;		/* Has this chunk ever been retransmitted. */
-	__u8 has_tsn;		/* Does this chunk have a TSN yet? */
-	__u8 has_ssn;		/* Does this chunk have a SSN yet? */
-	__u8 singleton;		/* Was this the only chunk in the packet? */
-	__u8 end_of_packet;	/* Was this the last chunk in the packet? */
-	__u8 ecn_ce_done;	/* Have we processed the ECN CE bit? */
-	__u8 pdiscard;		/* Discard the whole packet now? */
-	__u8 tsn_gap_acked;	/* Is this chunk acked by a GAP ACK? */
-	__s8 fast_retransmit;	 /* Is this chunk fast retransmitted? */
-	__u8 tsn_missing_report; /* Data chunk missing counter. */
-	__u8 data_accepted; 	/* At least 1 chunk in this packet accepted */
-	__u8 auth;		/* IN: was auth'ed | OUT: needs auth */
-	__u8 has_asconf;	/* IN: have seen an asconf before */
+#define SCTP_CAN_FRTX 0x0
+#define SCTP_NEED_FRTX 0x1
+#define SCTP_DONT_FRTX 0x2
+	__u16	rtt_in_progress:1,	/* This chunk used for RTT calc? */
+		resent:1,		/* Has this chunk ever been resent. */
+		has_tsn:1,		/* Does this chunk have a TSN yet? */
+		has_ssn:1,		/* Does this chunk have a SSN yet? */
+		singleton:1,		/* Only chunk in the packet? */
+		end_of_packet:1,	/* Last chunk in the packet? */
+		ecn_ce_done:1,		/* Have we processed the ECN CE bit? */
+		pdiscard:1,		/* Discard the whole packet now? */
+		tsn_gap_acked:1,	/* Is this chunk acked by a GAP ACK? */
+		data_accepted:1,	/* At least 1 chunk accepted */
+		auth:1,			/* IN: was auth'ed | OUT: needs auth */
+		has_asconf:1,		/* IN: have seen an asconf before */
+		tsn_missing_report:2,	/* Data chunk missing counter. */
+		fast_retransmit:2;	/* Is this chunk fast retransmitted? */
 };
 
 void sctp_chunk_hold(struct sctp_chunk *);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 225c7123c41..c3f417f7ec6 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -699,7 +699,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
 	 *    When a Fast Retransmit is being performed the sender SHOULD
 	 *    ignore the value of cwnd and SHOULD NOT delay retransmission.
 	 */
-	if (chunk->fast_retransmit <= 0)
+	if (chunk->fast_retransmit != SCTP_NEED_FRTX)
 		if (transport->flight_size >= transport->cwnd) {
 			retval = SCTP_XMIT_RWND_FULL;
 			goto finish;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index da8d846301c..247ebc95c1e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -420,7 +420,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
 		 * be added to the retransmit queue.
 		 */
 		if ((reason == SCTP_RTXR_FAST_RTX  &&
-			    (chunk->fast_retransmit > 0)) ||
+			    (chunk->fast_retransmit == SCTP_NEED_FRTX)) ||
 		    (reason != SCTP_RTXR_FAST_RTX  && !chunk->tsn_gap_acked)) {
 			/* If this chunk was sent less then 1 rto ago, do not
 			 * retransmit this chunk, but give the peer time
@@ -650,8 +650,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			/* Mark the chunk as ineligible for fast retransmit
 			 * after it is retransmitted.
 			 */
-			if (chunk->fast_retransmit > 0)
-				chunk->fast_retransmit = -1;
+			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
+				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
 			/* Force start T3-rtx timer when fast retransmitting
 			 * the earliest outstanding TSN
@@ -680,8 +680,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 	 */
 	if (rtx_timeout || fast_rtx) {
 		list_for_each_entry(chunk1, lqueue, transmitted_list) {
-			if (chunk1->fast_retransmit > 0)
-				chunk1->fast_retransmit = -1;
+			if (chunk1->fast_retransmit == SCTP_NEED_FRTX)
+				chunk1->fast_retransmit = SCTP_DONT_FRTX;
 		}
 	}
 
@@ -1656,7 +1656,7 @@ static void sctp_mark_missing(struct sctp_outq *q,
 		 * chunk if it has NOT been fast retransmitted or marked for
 		 * fast retransmit already.
 		 */
-		if (!chunk->fast_retransmit &&
+		if (chunk->fast_retransmit == SCTP_CAN_FRTX &&
 		    !chunk->tsn_gap_acked &&
 		    TSN_lt(tsn, highest_new_tsn_in_sack)) {
 
@@ -1681,7 +1681,7 @@ static void sctp_mark_missing(struct sctp_outq *q,
 		 */
 
 		if (chunk->tsn_missing_report >= 3) {
-			chunk->fast_retransmit = 1;
+			chunk->fast_retransmit = SCTP_NEED_FRTX;
 			do_fast_retransmit = 1;
 		}
 	}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d68869f966c..99fe0747cc9 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1211,7 +1211,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
 	 */
 	retval->tsn_missing_report = 0;
 	retval->tsn_gap_acked = 0;
-	retval->fast_retransmit = 0;
+	retval->fast_retransmit = SCTP_CAN_FRTX;
 
 	/* If this is a fragmented message, track all fragments
 	 * of the message (for SEND_FAILED).
-- 
cgit v1.2.3-70-g09d2


From 52cae8f06babf9eed327479c1aa024ce3732f912 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 18 Aug 2008 10:34:34 -0400
Subject: sctp: try harder to figure out address family when checking wildcards

sctp_is_any() function that is used to check for wildcard addresses
only looks at the address itself to determine the address family.
This function is used in the API to check the address passed in from
the user.  If the user simply zerroes out the sockaddr_storage and
pass that in, we'll end up failing.  So, let's try harder to determine
the address family by also checking the socket if it's possible.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h |  2 +-
 net/sctp/bind_addr.c       | 16 +++++++++++++---
 net/sctp/ipv6.c            |  5 +++--
 net/sctp/socket.c          | 10 +++++-----
 4 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0dc1b7267c3..94c62e4ddea 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1228,7 +1228,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
 
 sctp_scope_t sctp_scope(const union sctp_addr *);
 int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope);
-int sctp_is_any(const union sctp_addr *addr);
+int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_addr_is_valid(const union sctp_addr *addr);
 
 
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index f62bc246893..6d5944a745d 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
 {
 	int error = 0;
 
-	if (sctp_is_any(addr)) {
+	if (sctp_is_any(NULL, addr)) {
 		error = sctp_copy_local_addr_list(dest, scope, gfp, flags);
 	} else if (sctp_in_scope(addr, scope)) {
 		/* Now that the address is in scope, check to see if
@@ -477,11 +477,21 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
 }
 
 /* Is this a wildcard address?  */
-int sctp_is_any(const union sctp_addr *addr)
+int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 {
-	struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family);
+	unsigned short fam = 0;
+	struct sctp_af *af;
+
+	/* Try to get the right address family */
+	if (addr->sa.sa_family != AF_UNSPEC)
+		fam = addr->sa.sa_family;
+	else if (sk)
+		fam = sk->sk_family;
+
+	af = sctp_get_af_specific(fam);
 	if (!af)
 		return 0;
+
 	return af->is_any(addr);
 }
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 47f91afa021..c78da3c9dd3 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -837,6 +837,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
 			       struct sctp_sock *opt)
 {
 	struct sctp_af *af1, *af2;
+	struct sock *sk = sctp_opt2sk(opt);
 
 	af1 = sctp_get_af_specific(addr1->sa.sa_family);
 	af2 = sctp_get_af_specific(addr2->sa.sa_family);
@@ -845,11 +846,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
 		return 0;
 
 	/* If the socket is IPv6 only, v4 addrs will not match */
-	if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2)
+	if (__ipv6_only_sock(sk) && af1 != af2)
 		return 0;
 
 	/* Today, wildcard AF_INET/AF_INET6. */
-	if (sctp_is_any(addr1) || sctp_is_any(addr2))
+	if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
 		return 1;
 
 	if (addr1->sa.sa_family != addr2->sa.sa_family)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5ffb9dec1c3..a1b904529d5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2309,7 +2309,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	/* If an address other than INADDR_ANY is specified, and
 	 * no transport is found, then the request is invalid.
 	 */
-	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+	if (!sctp_is_any(sk, ( union sctp_addr *)&params.spp_address)) {
 		trans = sctp_addr_id2transport(sk, &params.spp_address,
 					       params.spp_assoc_id);
 		if (!trans)
@@ -4062,7 +4062,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	/* If an address other than INADDR_ANY is specified, and
 	 * no transport is found, then the request is invalid.
 	 */
-	if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+	if (!sctp_is_any(sk, ( union sctp_addr *)&params.spp_address)) {
 		trans = sctp_addr_id2transport(sk, &params.spp_address,
 					       params.spp_assoc_id);
 		if (!trans) {
@@ -4414,7 +4414,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 	if (sctp_list_single_entry(&bp->address_list)) {
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
-		if (sctp_is_any(&addr->a)) {
+		if (sctp_is_any(sk, &addr->a)) {
 			rcu_read_lock();
 			list_for_each_entry_rcu(addr,
 						&sctp_local_addr_list, list) {
@@ -4602,7 +4602,7 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	if (sctp_list_single_entry(&bp->address_list)) {
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
-		if (sctp_is_any(&addr->a)) {
+		if (sctp_is_any(sk, &addr->a)) {
 			cnt = sctp_copy_laddrs_old(sk, bp->port,
 						   getaddrs.addr_num,
 						   addrs, &bytes_copied);
@@ -4695,7 +4695,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	if (sctp_list_single_entry(&bp->address_list)) {
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
-		if (sctp_is_any(&addr->a)) {
+		if (sctp_is_any(sk, &addr->a)) {
 			cnt = sctp_copy_laddrs(sk, bp->port, addrs,
 						space_left, &bytes_copied);
 			if (cnt < 0) {
-- 
cgit v1.2.3-70-g09d2


From 536428a9b9a98495f7ea54ad95cad83e22f1d47d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 5 Sep 2008 08:55:26 +0800
Subject: sctp: Fix to start T5-shutdown-guard timer while enter SHUTDOWN-SENT
 state

RFC 4960: Section 9.2
The sender of the SHUTDOWN MAY also start an overall guard timer
'T5-shutdown-guard' to bound the overall time for the shutdown
sequence.  At the expiration of this timer, the sender SHOULD abort
the association by sending an ABORT chunk.  If the 'T5-shutdown-
guard' timer is used, it SHOULD be set to the recommended value of 5
times 'RTO.Max'.

The timer 'T5-shutdown-guard' is used to counter the overall time
for shutdown sequence, and it's start by the sender of the SHUTDOWN.
So timer 'T5-shutdown-guard' should be start when we send the first
SHUTDOWN chunk and enter the SHUTDOWN-SENT state, not start when we
receipt of the SHUTDOWN primitive and enter SHUTDOWN-PENDING state.

If 'T5-shutdown-guard' timer is start at SHUTDOWN-PENDING state, the
association may be ABORT while data is still transmitting.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_statefuns.c  | 24 +++++++-----------------
 net/sctp/sm_statetable.c |  2 +-
 2 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7c622af2ce5..e57d1d3ad0a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2076,10 +2076,6 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
 		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
 
-	/* Stop the T5-shutdown guard timer.  */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
-			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
-
 	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
 }
 
@@ -4543,13 +4539,6 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING));
 
-	/* sctpimpguide-05 Section 2.12.2
-	 * The sender of the SHUTDOWN MAY also start an overall guard timer
-	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
-	 */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
-			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
-
 	disposition = SCTP_DISPOSITION_CONSUME;
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
 		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
@@ -4994,6 +4983,13 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
 
+	/* RFC 4960 Section 9.2
+	 * The sender of the SHUTDOWN MAY also start an overall guard timer
+	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
 	if (asoc->autoclose)
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
@@ -5520,12 +5516,6 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING));
 
-	/* sctpimpguide-05 Section 2.12.2
-	 * The sender of the SHUTDOWN MAY also start an overall guard timer
-	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
-	 */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
-			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 	disposition = SCTP_DISPOSITION_CONSUME;
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
 		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index d991237fb40..dd4ddc40c0a 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -897,7 +897,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
-- 
cgit v1.2.3-70-g09d2


From 8190f89dfd09dae0c117fb0745f5a820bd19a5a4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 8 Sep 2008 12:13:55 +0800
Subject: sctp: Fix the SNMP counter of SCTP_MIB_OUTOFBLUES

RFC3873 defined SCTP_MIB_OUTOFBLUES:

 sctpOutOfBlues OBJECT-TYPE
   SYNTAX         Counter32
   MAX-ACCESS     read-only
   STATUS         current
   DESCRIPTION
        "The number of out of the blue packets received by the host.
        An out of the blue packet is an SCTP packet correctly formed,
        including the proper checksum, but for which the receiver was
        unable to identify an appropriate association."
   REFERENCE
        "Section 8.4 in RFC2960 deals with the Out-Of-The-Blue
         (OOTB) packet definition and procedures."

But OOTB packet INIT, INIT-ACK and SHUTDOWN-ACK(COOKIE-WAIT or
COOKIE-ECHOED state) are not counted by SCTP_MIB_OUTOFBLUES.

Case 1(INIT):

Endpoint A               Endpoint B
(CLOSED)                 (CLOSED)

 INIT     ---------->
          <----------    ABORT

Case 2(INIT-ACK):

Endpoint A               Endpoint B
(CLOSED)                 (CLOSED)

 INIT-ACK  ---------->
           <----------   ABORT

Case 3(SHUTDOWN-ACK):

Endpoint A               Endpoint B
(CLOSED)                 (CLOSED)

          <----------    INIT
 SHUTDOWN-ACK  ---------->
           <----------   SHUTDOWN-COMPLETE

Case 4(SHUTDOWN-ACK):

Endpoint A               Endpoint B
(CLOSED)                 (COOKIE-ECHOED)

 SHUTDOWN-ACK  ---------->
           <----------   SHUTDOWN-COMPLETE

This patch fixed the problem.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_statefuns.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e57d1d3ad0a..81dfaee49b7 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -315,8 +315,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
+	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
+		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+	}
 
 	/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
 	 * Tag.
@@ -635,8 +637,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
+	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
+		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+	}
 
 	/* Make sure that the COOKIE_ECHO chunk has a valid length.
 	 * In this case, we check that we have enough for at least a
@@ -3378,6 +3382,8 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
 	 * packet and the state function that handles OOTB SHUTDOWN_ACK is
 	 * called with a NULL association.
 	 */
+	SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+
 	return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 96cd0d3d710e64c55e034b77052d7ac46f094759 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 8 Sep 2008 14:00:26 -0400
Subject: sctp: enable cookie-echo retransmission transport switch

This patch enables cookie-echo retransmission transport switch
feature. If COOKIE-ECHO retransmission happens, it will be sent
to the address other than the one last sent to.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_sideeffect.c | 78 +++++++++++++++++++++++++++---------------------
 net/sctp/sm_statefuns.c  |  2 ++
 2 files changed, 46 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 9732c797e8e..13d9eea5cf1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -889,6 +889,35 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
 		sctp_ulpq_tail_event(&asoc->ulpq, ev);
 }
 
+
+static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
+				    sctp_event_timeout_t timer,
+				    char *name)
+{
+	struct sctp_transport *t;
+
+	t = asoc->init_last_sent_to;
+	asoc->init_err_counter++;
+
+	if (t->init_sent_count > (asoc->init_cycle + 1)) {
+		asoc->timeouts[timer] *= 2;
+		if (asoc->timeouts[timer] > asoc->max_init_timeo) {
+			asoc->timeouts[timer] = asoc->max_init_timeo;
+		}
+		asoc->init_cycle++;
+		SCTP_DEBUG_PRINTK(
+			"T1 %s Timeout adjustment"
+			" init_err_counter: %d"
+			" cycle: %d"
+			" timeout: %ld\n",
+			name,
+			asoc->init_err_counter,
+			asoc->init_cycle,
+			asoc->timeouts[timer]);
+	}
+
+}
+
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
  * functionality there.
@@ -1196,6 +1225,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 				sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
 						SCTP_CHUNK(cmd->obj.ptr));
 
+			if (new_obj->transport) {
+				new_obj->transport->init_sent_count++;
+				asoc->init_last_sent_to = new_obj->transport;
+			}
+
 			/* FIXME - Eventually come up with a cleaner way to
 			 * enabling COOKIE-ECHO + DATA bundling during
 			 * multihoming stale cookie scenarios, the following
@@ -1345,26 +1379,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			 * all transports have been tried at the current
 			 * timeout.
 			 */
-			t = asoc->init_last_sent_to;
-			asoc->init_err_counter++;
-
-			if (t->init_sent_count > (asoc->init_cycle + 1)) {
-				asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2;
-				if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] >
-				    asoc->max_init_timeo) {
-					asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
-						asoc->max_init_timeo;
-				}
-				asoc->init_cycle++;
-				SCTP_DEBUG_PRINTK(
-					"T1 INIT Timeout adjustment"
-					" init_err_counter: %d"
-					" cycle: %d"
-					" timeout: %ld\n",
-					asoc->init_err_counter,
-					asoc->init_cycle,
-					asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]);
-			}
+			sctp_cmd_t1_timer_update(asoc,
+						SCTP_EVENT_TIMEOUT_T1_INIT,
+						"INIT");
 
 			sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 					SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -1377,20 +1394,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			 * all transports have been tried at the current
 			 * timeout.
 			 */
-			asoc->init_err_counter++;
-
-			asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2;
-			if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] >
-			    asoc->max_init_timeo) {
-				asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
-					asoc->max_init_timeo;
-			}
-			SCTP_DEBUG_PRINTK(
-				"T1 COOKIE Timeout adjustment"
-				" init_err_counter: %d"
-				" timeout: %ld\n",
-				asoc->init_err_counter,
-				asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
+			sctp_cmd_t1_timer_update(asoc,
+						SCTP_EVENT_TIMEOUT_T1_COOKIE,
+						"COOKIE");
 
 			/* If we've sent any data bundled with
 			 * COOKIE-ECHO we need to resend.
@@ -1422,6 +1428,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 		case SCTP_CMD_INIT_COUNTER_RESET:
 			asoc->init_err_counter = 0;
 			asoc->init_cycle = 0;
+			list_for_each_entry(t, &asoc->peer.transport_addr_list,
+					    transports) {
+				t->init_sent_count = 0;
+			}
 			break;
 
 		case SCTP_CMD_REPORT_DUP:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 81dfaee49b7..ea3a34cbe47 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5307,6 +5307,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
 		if (!repl)
 			return SCTP_DISPOSITION_NOMEM;
 
+		sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
+				SCTP_CHUNK(repl));
 		/* Issue a sideeffect to do the needed accounting. */
 		sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
-- 
cgit v1.2.3-70-g09d2


From e69c4e0f1210450841e40716894ba6a877b31d52 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 15 Sep 2008 16:29:49 -0400
Subject: sctp: correctly save sctp_adaptation from parameter.

The INIT perameter carries the adapatation value in network-byte
order.  We need to store it in host byte order as expected
by data types and the user API.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_make_chunk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 99fe0747cc9..76726bcff3e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2467,7 +2467,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_ADAPTATION_LAYER_IND:
-		asoc->peer.adaptation_ind = param.aind->adaptation_ind;
+		asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind);
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-- 
cgit v1.2.3-70-g09d2


From 2937391385807b3da9cd7a39345259caf550b032 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 3 Oct 2008 17:15:38 -0400
Subject: NLM: Remove unused argument from svc_addsock() function

Clean up: The svc_addsock() function no longer uses its "proto"
argument, so remove it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c               | 2 +-
 include/linux/sunrpc/svcsock.h | 5 +----
 net/sunrpc/svcsock.c           | 4 +---
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 862dff5247f..97543df5824 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -614,7 +614,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
 			return -EINVAL;
 		err = nfsd_create_serv();
 		if (!err) {
-			err = svc_addsock(nfsd_serv, fd, buf, NULL);
+			err = svc_addsock(nfsd_serv, fd, buf);
 			if (err >= 0) {
 				err = lockd_up();
 				if (err < 0)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 8cff696dedf..483e10380aa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -39,10 +39,7 @@ int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
 int		svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
-int		svc_addsock(struct svc_serv *serv,
-			    int fd,
-			    char *name_return,
-			    int *proto);
+int		svc_addsock(struct svc_serv *serv, int fd, char *name_return);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f91377c1495..95293f549e9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1167,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
 int svc_addsock(struct svc_serv *serv,
 		int fd,
-		char *name_return,
-		int *proto)
+		char *name_return)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1203,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
 		sockfd_put(so);
 		return err;
 	}
-	if (proto) *proto = so->sk->sk_protocol;
 	return one_sock_name(name_return, svsk);
 }
 EXPORT_SYMBOL_GPL(svc_addsock);
-- 
cgit v1.2.3-70-g09d2


From 25532824fb727744a302edb25c6a6ac10b82cb63 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Sun, 5 Oct 2008 11:14:27 -0700
Subject: Phonet: modules auto-loading support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/af_phonet.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 1d8df6b7e3d..0a74aeaf5ad 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -64,6 +64,11 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 	}
 
 	pnp = phonet_proto_get(protocol);
+#ifdef CONFIG_KMOD
+	if (pnp == NULL &&
+	    request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0)
+		pnp = phonet_proto_get(protocol);
+#endif
 	if (pnp == NULL)
 		return -EPROTONOSUPPORT;
 	if (sock->type != pnp->sock_type) {
@@ -94,7 +99,7 @@ out:
 }
 
 static struct net_proto_family phonet_proto_family = {
-	.family = AF_PHONET,
+	.family = PF_PHONET,
 	.create = pn_socket_create,
 	.owner = THIS_MODULE,
 };
@@ -447,7 +452,7 @@ static int __init phonet_init(void)
 
 err:
 	phonet_sysctl_exit();
-	sock_unregister(AF_PHONET);
+	sock_unregister(PF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
 	phonet_device_exit();
 	return err;
@@ -457,7 +462,7 @@ static void __exit phonet_exit(void)
 {
 	isi_unregister();
 	phonet_sysctl_exit();
-	sock_unregister(AF_PHONET);
+	sock_unregister(PF_PHONET);
 	dev_remove_pack(&phonet_packet_type);
 	phonet_device_exit();
 }
@@ -466,3 +471,4 @@ module_init(phonet_init);
 module_exit(phonet_exit);
 MODULE_DESCRIPTION("Phonet protocol stack for Linux");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_PHONET);
-- 
cgit v1.2.3-70-g09d2


From 9995a32b4d14dcda2f8df58030526bee91114c16 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Sun, 5 Oct 2008 11:14:48 -0700
Subject: Phonet: connected sockets glue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h | 43 +++++++++++++++++++++
 net/phonet/socket.c      | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 include/net/phonet/pep.h

(limited to 'net')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
new file mode 100644
index 00000000000..b2f8c54c533
--- /dev/null
+++ b/include/net/phonet/pep.h
@@ -0,0 +1,43 @@
+/*
+ * File: pep.h
+ *
+ * Phonet Pipe End Point sockets definitions
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef NET_PHONET_PEP_H
+#define NET_PHONET_PEP_H
+
+struct pep_sock {
+	struct pn_sock		pn_sk;
+
+	/* Listening socket stuff: */
+	struct hlist_head	ackq;
+
+	/* Connected socket stuff: */
+	u8			tx_credits;
+};
+
+static inline struct pep_sock *pep_sk(struct sock *sk)
+{
+	return (struct pep_sock *)sk;
+}
+
+extern const struct proto_ops phonet_stream_ops;
+
+#endif
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index dfd4061646d..cea1136cbe4 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -25,11 +25,13 @@
 
 #include <linux/kernel.h>
 #include <linux/net.h>
+#include <linux/poll.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
 #include <linux/phonet.h>
 #include <net/phonet/phonet.h>
+#include <net/phonet/pep.h>
 #include <net/phonet/pn_dev.h>
 
 static int pn_socket_release(struct socket *sock)
@@ -166,6 +168,24 @@ static int pn_socket_autobind(struct socket *sock)
 	return 0; /* socket was already bound */
 }
 
+static int pn_socket_accept(struct socket *sock, struct socket *newsock,
+				int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sock *newsk;
+	int err;
+
+	newsk = sk->sk_prot->accept(sk, flags, &err);
+	if (!newsk)
+		return err;
+
+	lock_sock(newsk);
+	sock_graft(newsk, newsock);
+	newsock->state = SS_CONNECTED;
+	release_sock(newsk);
+	return 0;
+}
+
 static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
 				int *sockaddr_len, int peer)
 {
@@ -182,6 +202,33 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
 	return 0;
 }
 
+static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
+					poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct pep_sock *pn = pep_sk(sk);
+	unsigned int mask = 0;
+
+	poll_wait(file, &sock->wait, wait);
+
+	switch (sk->sk_state) {
+	case TCP_LISTEN:
+		return hlist_empty(&pn->ackq) ? 0 : POLLIN;
+	case TCP_CLOSE:
+		return POLLERR;
+	}
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+	else if (sk->sk_state == TCP_CLOSE_WAIT)
+		return POLLHUP;
+
+	if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits)
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+
 static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
 				unsigned long arg)
 {
@@ -220,6 +267,30 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
 	return sk->sk_prot->ioctl(sk, cmd, arg);
 }
 
+static int pn_socket_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	if (sock->state != SS_UNCONNECTED)
+		return -EINVAL;
+	if (pn_socket_autobind(sock))
+		return -ENOBUFS;
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_CLOSE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sk->sk_state = TCP_LISTEN;
+	sk->sk_ack_backlog = 0;
+	sk->sk_max_ack_backlog = backlog;
+out:
+	release_sock(sk);
+	return err;
+}
+
 static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
 				struct msghdr *m, size_t total_len)
 {
@@ -256,6 +327,32 @@ const struct proto_ops phonet_dgram_ops = {
 	.sendpage	= sock_no_sendpage,
 };
 
+const struct proto_ops phonet_stream_ops = {
+	.family		= AF_PHONET,
+	.owner		= THIS_MODULE,
+	.release	= pn_socket_release,
+	.bind		= pn_socket_bind,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= pn_socket_accept,
+	.getname	= pn_socket_getname,
+	.poll		= pn_socket_poll,
+	.ioctl		= pn_socket_ioctl,
+	.listen		= pn_socket_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = sock_no_setsockopt,
+	.compat_getsockopt = compat_sock_no_getsockopt,
+#endif
+	.sendmsg	= pn_socket_sendmsg,
+	.recvmsg	= sock_common_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+EXPORT_SYMBOL(phonet_stream_ops);
+
 static DEFINE_MUTEX(port_mutex);
 
 /* allocate port for a socket */
-- 
cgit v1.2.3-70-g09d2


From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Sun, 5 Oct 2008 11:15:13 -0700
Subject: Phonet: Pipe End Point for Phonet Pipes protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This protocol provides some connection handling and negotiated
congestion control. Nokia cellular modems use it for bulk transfers.
It provides packet boundaries (hence SOCK_SEQPACKET). Congestion
control is per packet rather per byte, so we do not re-use the
generic socket memory accounting.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h   |   4 +-
 include/net/phonet/pep.h | 114 ++++++
 net/phonet/Makefile      |   4 +-
 net/phonet/af_phonet.c   |   3 +
 net/phonet/pep.c         | 908 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1031 insertions(+), 2 deletions(-)
 create mode 100644 net/phonet/pep.c

(limited to 'net')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 3a027f588a4..f9218524207 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -27,7 +27,9 @@
 #define PN_PROTO_TRANSPORT	0
 /* Phonet datagram socket */
 #define PN_PROTO_PHONET		1
-#define PHONET_NPROTO		2
+/* Phonet pipe */
+#define PN_PROTO_PIPE		2
+#define PHONET_NPROTO		3
 
 #define PNADDR_ANY		0
 #define PNPORT_RESOURCE_ROUTING	0
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index b2f8c54c533..fb024e18686 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -26,11 +26,21 @@
 struct pep_sock {
 	struct pn_sock		pn_sk;
 
+	/* XXX: union-ify listening vs connected stuff ? */
 	/* Listening socket stuff: */
 	struct hlist_head	ackq;
+	struct hlist_head	hlist;
 
 	/* Connected socket stuff: */
+	struct sock		*listener;
+	u16			peer_type;	/* peer type/subtype */
+	u8			pipe_handle;
+
+	u8			rx_credits;
 	u8			tx_credits;
+	u8			rx_fc;	/* RX flow control */
+	u8			tx_fc;	/* TX flow control */
+	u8			init_enable;	/* auto-enable at creation */
 };
 
 static inline struct pep_sock *pep_sk(struct sock *sk)
@@ -40,4 +50,108 @@ static inline struct pep_sock *pep_sk(struct sock *sk)
 
 extern const struct proto_ops phonet_stream_ops;
 
+/* Pipe protocol definitions */
+struct pnpipehdr {
+	u8			utid; /* transaction ID */
+	u8			message_id;
+	u8			pipe_handle;
+	union {
+		u8		state_after_connect;	/* connect request */
+		u8		state_after_reset;	/* reset request */
+		u8		error_code;		/* any response */
+		u8		pep_type;		/* status indication */
+		u8		data[1];
+	};
+};
+#define other_pep_type		data[1]
+
+static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
+{
+	return (struct pnpipehdr *)skb_transport_header(skb);
+}
+
+#define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4)
+
+enum {
+	PNS_PIPE_DATA = 0x20,
+
+	PNS_PEP_CONNECT_REQ = 0x40,
+	PNS_PEP_CONNECT_RESP,
+	PNS_PEP_DISCONNECT_REQ,
+	PNS_PEP_DISCONNECT_RESP,
+	PNS_PEP_RESET_REQ,
+	PNS_PEP_RESET_RESP,
+	PNS_PEP_ENABLE_REQ,
+	PNS_PEP_ENABLE_RESP,
+	PNS_PEP_CTRL_REQ,
+	PNS_PEP_CTRL_RESP,
+	PNS_PEP_DISABLE_REQ = 0x4C,
+	PNS_PEP_DISABLE_RESP,
+
+	PNS_PEP_STATUS_IND = 0x60,
+	PNS_PIPE_CREATED_IND,
+	PNS_PIPE_RESET_IND = 0x63,
+	PNS_PIPE_ENABLED_IND,
+	PNS_PIPE_REDIRECTED_IND,
+	PNS_PIPE_DISABLED_IND = 0x66,
+};
+
+#define PN_PIPE_INVALID_HANDLE	0xff
+#define PN_PEP_TYPE_COMMON	0x00
+
+/* Phonet pipe status indication */
+enum {
+	PN_PEP_IND_FLOW_CONTROL,
+	PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
+};
+
+/* Phonet pipe error codes */
+enum {
+	PN_PIPE_NO_ERROR,
+	PN_PIPE_ERR_INVALID_PARAM,
+	PN_PIPE_ERR_INVALID_HANDLE,
+	PN_PIPE_ERR_INVALID_CTRL_ID,
+	PN_PIPE_ERR_NOT_ALLOWED,
+	PN_PIPE_ERR_PEP_IN_USE,
+	PN_PIPE_ERR_OVERLOAD,
+	PN_PIPE_ERR_DEV_DISCONNECTED,
+	PN_PIPE_ERR_TIMEOUT,
+	PN_PIPE_ERR_ALL_PIPES_IN_USE,
+	PN_PIPE_ERR_GENERAL,
+	PN_PIPE_ERR_NOT_SUPPORTED,
+};
+
+/* Phonet pipe states */
+enum {
+	PN_PIPE_DISABLE,
+	PN_PIPE_ENABLE,
+};
+
+/* Phonet pipe sub-block types */
+enum {
+	PN_PIPE_SB_CREATE_REQ_PEP_SUB_TYPE,
+	PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE,
+	PN_PIPE_SB_REDIRECT_REQ_PEP_SUB_TYPE,
+	PN_PIPE_SB_NEGOTIATED_FC,
+	PN_PIPE_SB_REQUIRED_FC_TX,
+	PN_PIPE_SB_PREFERRED_FC_RX,
+};
+
+/* Phonet pipe flow control models */
+enum {
+	PN_NO_FLOW_CONTROL,
+	PN_LEGACY_FLOW_CONTROL,
+	PN_ONE_CREDIT_FLOW_CONTROL,
+	PN_MULTI_CREDIT_FLOW_CONTROL,
+};
+
+#define pn_flow_safe(fc) ((fc) >> 1)
+
+/* Phonet pipe flow control states */
+enum {
+	PEP_IND_EMPTY,
+	PEP_IND_BUSY,
+	PEP_IND_READY,
+};
+
 #endif
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index ae9c3ed5be8..505df2a04a8 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_PHONET) += phonet.o
+obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
 
 phonet-objs := \
 	pn_dev.o \
@@ -7,3 +7,5 @@ phonet-objs := \
 	datagram.o \
 	sysctl.o \
 	af_phonet.o
+
+pn_pep-objs := pep.o
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 0a74aeaf5ad..9e9c6fce11a 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -58,6 +58,9 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 		case SOCK_DGRAM:
 			protocol = PN_PROTO_PHONET;
 			break;
+		case SOCK_SEQPACKET:
+			protocol = PN_PROTO_PIPE;
+			break;
 		default:
 			return -EPROTONOSUPPORT;
 		}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
new file mode 100644
index 00000000000..c5dfecb207d
--- /dev/null
+++ b/net/phonet/pep.c
@@ -0,0 +1,908 @@
+/*
+ * File: pep.c
+ *
+ * Phonet pipe protocol end point socket
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <asm/ioctls.h>
+
+#include <linux/phonet.h>
+#include <net/phonet/phonet.h>
+#include <net/phonet/pep.h>
+
+/* sk_state values:
+ * TCP_CLOSE		sock not in use yet
+ * TCP_CLOSE_WAIT	disconnected pipe
+ * TCP_LISTEN		listening pipe endpoint
+ * TCP_SYN_RECV		connected pipe in disabled state
+ * TCP_ESTABLISHED	connected pipe in enabled state
+ *
+ * pep_sock locking:
+ *  - sk_state, ackq, hlist: sock lock needed
+ *  - listener: read only
+ *  - pipe_handle: read only
+ */
+
+#define CREDITS_MAX	10
+#define CREDITS_THR	7
+
+static const struct sockaddr_pn pipe_srv = {
+	.spn_family = AF_PHONET,
+	.spn_resource = 0xD9, /* pipe service */
+};
+
+#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */
+
+/* Get the next TLV sub-block. */
+static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen,
+					void *buf)
+{
+	void *data = NULL;
+	struct {
+		u8 sb_type;
+		u8 sb_len;
+	} *ph, h;
+	int buflen = *plen;
+
+	ph = skb_header_pointer(skb, 0, 2, &h);
+	if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len))
+		return NULL;
+	ph->sb_len -= 2;
+	*ptype = ph->sb_type;
+	*plen = ph->sb_len;
+
+	if (buflen > ph->sb_len)
+		buflen = ph->sb_len;
+	data = skb_header_pointer(skb, 2, buflen, buf);
+	__skb_pull(skb, 2 + ph->sb_len);
+	return data;
+}
+
+static int pep_reply(struct sock *sk, struct sk_buff *oskb,
+			u8 code, const void *data, int len, gfp_t priority)
+{
+	const struct pnpipehdr *oph = pnp_hdr(oskb);
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER);
+	__skb_put(skb, len);
+	skb_copy_to_linear_data(skb, data, len);
+	__skb_push(skb, sizeof(*ph));
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = oph->utid;
+	ph->message_id = oph->message_id + 1; /* REQ -> RESP */
+	ph->pipe_handle = oph->pipe_handle;
+	ph->error_code = code;
+
+	return pn_skb_send(sk, skb, &pipe_srv);
+}
+
+#define PAD 0x00
+static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
+{
+	static const u8 data[20] = {
+		PAD, PAD, PAD, 2 /* sub-blocks */,
+		PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD,
+			PN_MULTI_CREDIT_FLOW_CONTROL,
+			PN_ONE_CREDIT_FLOW_CONTROL,
+			PN_LEGACY_FLOW_CONTROL,
+			PAD,
+		PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD,
+			PN_MULTI_CREDIT_FLOW_CONTROL,
+			PN_ONE_CREDIT_FLOW_CONTROL,
+			PN_LEGACY_FLOW_CONTROL,
+			PAD,
+	};
+
+	might_sleep();
+	return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data),
+				GFP_KERNEL);
+}
+
+static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code)
+{
+	static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ };
+	WARN_ON(code == PN_PIPE_NO_ERROR);
+	return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC);
+}
+
+/* Control requests are not sent by the pipe service and have a specific
+ * message format. */
+static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code)
+{
+	const struct pnpipehdr *oph = pnp_hdr(oskb);
+	struct sk_buff *skb;
+	struct pnpipehdr *ph;
+	struct sockaddr_pn dst;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PHONET_HEADER);
+	ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4);
+
+	ph->utid = oph->utid;
+	ph->message_id = PNS_PEP_CTRL_RESP;
+	ph->pipe_handle = oph->pipe_handle;
+	ph->data[0] = oph->data[1]; /* CTRL id */
+	ph->data[1] = oph->data[0]; /* PEP type */
+	ph->data[2] = code; /* error code, at an usual offset */
+	ph->data[3] = PAD;
+	ph->data[4] = PAD;
+
+	pn_skb_get_src_sockaddr(oskb, &dst);
+	return pn_skb_send(sk, skb, &dst);
+}
+
+static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER + 4);
+	__skb_push(skb, sizeof(*ph) + 4);
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = 0;
+	ph->message_id = PNS_PEP_STATUS_IND;
+	ph->pipe_handle = pn->pipe_handle;
+	ph->pep_type = PN_PEP_TYPE_COMMON;
+	ph->data[1] = type;
+	ph->data[2] = PAD;
+	ph->data[3] = PAD;
+	ph->data[4] = status;
+
+	return pn_skb_send(sk, skb, &pipe_srv);
+}
+
+/* Send our RX flow control information to the sender.
+ * Socket must be locked. */
+static void pipe_grant_credits(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	BUG_ON(sk->sk_state != TCP_ESTABLISHED);
+
+	switch (pn->rx_fc) {
+	case PN_LEGACY_FLOW_CONTROL: /* TODO */
+		break;
+	case PN_ONE_CREDIT_FLOW_CONTROL:
+		pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
+				PEP_IND_READY, GFP_ATOMIC);
+		pn->rx_credits = 1;
+		break;
+	case PN_MULTI_CREDIT_FLOW_CONTROL:
+		if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX)
+			break;
+		if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
+					CREDITS_MAX - pn->rx_credits,
+					GFP_ATOMIC) == 0)
+			pn->rx_credits = CREDITS_MAX;
+		break;
+	}
+}
+
+static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+
+	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
+		return -EINVAL;
+
+	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n",
+				(unsigned)hdr->data[0]);
+		return -EOPNOTSUPP;
+	}
+
+	switch (hdr->data[1]) {
+	case PN_PEP_IND_FLOW_CONTROL:
+		switch (pn->tx_fc) {
+		case PN_LEGACY_FLOW_CONTROL:
+			switch (hdr->data[4]) {
+			case PEP_IND_BUSY:
+				pn->tx_credits = 0;
+				break;
+			case PEP_IND_READY:
+				pn->tx_credits = 1;
+				break;
+			}
+			break;
+		case PN_ONE_CREDIT_FLOW_CONTROL:
+			if (hdr->data[4] == PEP_IND_READY)
+				pn->tx_credits = 1;
+			break;
+		}
+		break;
+
+	case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
+		if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
+			break;
+		if (pn->tx_credits + hdr->data[4] > 0xff)
+			pn->tx_credits = 0xff;
+		else
+			pn->tx_credits += hdr->data[4];
+		break;
+
+	default:
+		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n",
+				(unsigned)hdr->data[1]);
+		return -EOPNOTSUPP;
+	}
+	if (pn->tx_credits)
+		sk->sk_write_space(sk);
+	return 0;
+}
+
+static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+	u8 n_sb = hdr->data[0];
+
+	pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
+	__skb_pull(skb, sizeof(*hdr));
+	while (n_sb > 0) {
+		u8 type, buf[2], len = sizeof(buf);
+		u8 *data = pep_get_sb(skb, &type, &len, buf);
+
+		if (data == NULL)
+			return -EINVAL;
+		switch (type) {
+		case PN_PIPE_SB_NEGOTIATED_FC:
+			if (len < 2 || (data[0] | data[1]) > 3)
+				break;
+			pn->tx_fc = data[0] & 3;
+			pn->rx_fc = data[1] & 3;
+			break;
+		}
+		n_sb--;
+	}
+	return 0;
+}
+
+/* Queue an skb to a connected sock.
+ * Socket lock must be held. */
+static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+	int err = 0;
+
+	BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
+
+	switch (hdr->message_id) {
+	case PNS_PEP_CONNECT_REQ:
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
+		break;
+
+	case PNS_PEP_DISCONNECT_REQ:
+		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
+		sk->sk_state = TCP_CLOSE_WAIT;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		break;
+
+	case PNS_PEP_ENABLE_REQ:
+		/* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
+		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
+		break;
+
+	case PNS_PEP_RESET_REQ:
+		switch (hdr->state_after_reset) {
+		case PN_PIPE_DISABLE:
+			pn->init_enable = 0;
+			break;
+		case PN_PIPE_ENABLE:
+			pn->init_enable = 1;
+			break;
+		default: /* not allowed to send an error here!? */
+			err = -EINVAL;
+			goto out;
+		}
+		/* fall through */
+	case PNS_PEP_DISABLE_REQ:
+		pn->tx_credits = 0;
+		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
+		break;
+
+	case PNS_PEP_CTRL_REQ:
+		/* TODO */
+		pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR);
+		break;
+
+	case PNS_PIPE_DATA:
+		__skb_pull(skb, 3); /* Pipe data header */
+		if (!pn_flow_safe(pn->rx_fc)) {
+			err = sock_queue_rcv_skb(sk, skb);
+			if (!err)
+				return 0;
+			break;
+		}
+
+		if (pn->rx_credits == 0) {
+			err = -ENOBUFS;
+			break;
+		}
+		pn->rx_credits--;
+		skb->dev = NULL;
+		skb_set_owner_r(skb, sk);
+		err = skb->len;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, err);
+		return 0;
+
+	case PNS_PEP_STATUS_IND:
+		pipe_rcv_status(sk, skb);
+		break;
+
+	case PNS_PIPE_REDIRECTED_IND:
+		err = pipe_rcv_created(sk, skb);
+		break;
+
+	case PNS_PIPE_CREATED_IND:
+		err = pipe_rcv_created(sk, skb);
+		if (err)
+			break;
+		/* fall through */
+	case PNS_PIPE_RESET_IND:
+		if (!pn->init_enable)
+			break;
+		/* fall through */
+	case PNS_PIPE_ENABLED_IND:
+		if (!pn_flow_safe(pn->tx_fc)) {
+			pn->tx_credits = 1;
+			sk->sk_write_space(sk);
+		}
+		if (sk->sk_state == TCP_ESTABLISHED)
+			break; /* Nothing to do */
+		sk->sk_state = TCP_ESTABLISHED;
+		pipe_grant_credits(sk);
+		break;
+
+	case PNS_PIPE_DISABLED_IND:
+		sk->sk_state = TCP_SYN_RECV;
+		pn->rx_credits = 0;
+		break;
+
+	default:
+		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n",
+				hdr->message_id);
+		err = -EINVAL;
+	}
+out:
+	kfree_skb(skb);
+	return err;
+}
+
+/* Destroy connected sock. */
+static void pipe_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct sock *newsk;
+	struct pep_sock *newpn, *pn = pep_sk(sk);
+	struct pnpipehdr *hdr;
+	struct sockaddr_pn dst;
+	u16 peer_type;
+	u8 pipe_handle, enabled, n_sb;
+
+	if (!pskb_pull(skb, sizeof(*hdr) + 4))
+		return -EINVAL;
+
+	hdr = pnp_hdr(skb);
+	pipe_handle = hdr->pipe_handle;
+	switch (hdr->state_after_connect) {
+	case PN_PIPE_DISABLE:
+		enabled = 0;
+		break;
+	case PN_PIPE_ENABLE:
+		enabled = 1;
+		break;
+	default:
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM);
+		return -EINVAL;
+	}
+	peer_type = hdr->other_pep_type << 8;
+
+	if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) {
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
+		return -ENOBUFS;
+	}
+
+	/* Parse sub-blocks (options) */
+	n_sb = hdr->data[4];
+	while (n_sb > 0) {
+		u8 type, buf[1], len = sizeof(buf);
+		const u8 *data = pep_get_sb(skb, &type, &len, buf);
+
+		if (data == NULL)
+			return -EINVAL;
+		switch (type) {
+		case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
+			if (len < 1)
+				return -EINVAL;
+			peer_type = (peer_type & 0xff00) | data[0];
+			break;
+		}
+		n_sb--;
+	}
+
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Create a new to-be-accepted sock */
+	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot);
+	if (!newsk) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	sock_init_data(NULL, newsk);
+	newsk->sk_state = TCP_SYN_RECV;
+	newsk->sk_backlog_rcv = pipe_do_rcv;
+	newsk->sk_protocol = sk->sk_protocol;
+	newsk->sk_destruct = pipe_destruct;
+
+	newpn = pep_sk(newsk);
+	pn_skb_get_dst_sockaddr(skb, &dst);
+	newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
+	newpn->pn_sk.resource = pn->pn_sk.resource;
+	newpn->pipe_handle = pipe_handle;
+	newpn->peer_type = peer_type;
+	newpn->rx_credits = newpn->tx_credits = 0;
+	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
+	newpn->init_enable = enabled;
+
+	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
+	skb_queue_head(&newsk->sk_receive_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, 0);
+
+	sk_acceptq_added(sk);
+	sk_add_node(newsk, &pn->ackq);
+	return 0;
+}
+
+/* Listening sock must be locked */
+static struct sock *pep_find_pipe(const struct hlist_head *hlist,
+					const struct sockaddr_pn *dst,
+					u8 pipe_handle)
+{
+	struct hlist_node *node;
+	struct sock *sknode;
+	u16 dobj = pn_sockaddr_get_object(dst);
+
+	sk_for_each(sknode, node, hlist) {
+		struct pep_sock *pnnode = pep_sk(sknode);
+
+		/* Ports match, but addresses might not: */
+		if (pnnode->pn_sk.sobject != dobj)
+			continue;
+		if (pnnode->pipe_handle != pipe_handle)
+			continue;
+		if (sknode->sk_state == TCP_CLOSE_WAIT)
+			continue;
+
+		sock_hold(sknode);
+		return sknode;
+	}
+	return NULL;
+}
+
+/*
+ * Deliver an skb to a listening sock.
+ * Socket lock must be held.
+ * We then queue the skb to the right connected sock (if any).
+ */
+static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct sock *sknode;
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+	struct sockaddr_pn dst;
+	int err = NET_RX_SUCCESS;
+	u8 pipe_handle;
+
+	if (!pskb_may_pull(skb, sizeof(*hdr)))
+		goto drop;
+
+	hdr = pnp_hdr(skb);
+	pipe_handle = hdr->pipe_handle;
+	if (pipe_handle == PN_PIPE_INVALID_HANDLE)
+		goto drop;
+
+	pn_skb_get_dst_sockaddr(skb, &dst);
+
+	/* Look for an existing pipe handle */
+	sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
+	if (sknode)
+		return sk_receive_skb(sknode, skb, 1);
+
+	/* Look for a pipe handle pending accept */
+	sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle);
+	if (sknode) {
+		sock_put(sknode);
+		if (net_ratelimit())
+			printk(KERN_WARNING"Phonet unconnected PEP ignored");
+		err = NET_RX_DROP;
+		goto drop;
+	}
+
+	switch (hdr->message_id) {
+	case PNS_PEP_CONNECT_REQ:
+		err = pep_connreq_rcv(sk, skb);
+		break;
+
+	case PNS_PEP_DISCONNECT_REQ:
+		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
+		break;
+
+	case PNS_PEP_CTRL_REQ:
+		pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE);
+		break;
+
+	case PNS_PEP_RESET_REQ:
+	case PNS_PEP_ENABLE_REQ:
+	case PNS_PEP_DISABLE_REQ:
+		/* invalid handle is not even allowed here! */
+	default:
+		err = NET_RX_DROP;
+	}
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+/* associated socket ceases to exist */
+static void pep_sock_close(struct sock *sk, long timeout)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	sk_common_release(sk);
+
+	lock_sock(sk);
+	if (sk->sk_state == TCP_LISTEN) {
+		/* Destroy the listen queue */
+		struct sock *sknode;
+		struct hlist_node *p, *n;
+
+		sk_for_each_safe(sknode, p, n, &pn->ackq)
+			sk_del_node_init(sknode);
+		sk->sk_state = TCP_CLOSE;
+	}
+	release_sock(sk);
+}
+
+static int pep_wait_connreq(struct sock *sk, int noblock)
+{
+	struct task_struct *tsk = current;
+	struct pep_sock *pn = pep_sk(sk);
+	long timeo = sock_rcvtimeo(sk, noblock);
+
+	for (;;) {
+		DEFINE_WAIT(wait);
+
+		if (sk->sk_state != TCP_LISTEN)
+			return -EINVAL;
+		if (!hlist_empty(&pn->ackq))
+			break;
+		if (!timeo)
+			return -EWOULDBLOCK;
+		if (signal_pending(tsk))
+			return sock_intr_errno(timeo);
+
+		prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait,
+						TASK_INTERRUPTIBLE);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		finish_wait(&sk->sk_socket->wait, &wait);
+	}
+
+	return 0;
+}
+
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct sock *newsk = NULL;
+	struct sk_buff *oskb;
+	int err;
+
+	lock_sock(sk);
+	err = pep_wait_connreq(sk, flags & O_NONBLOCK);
+	if (err)
+		goto out;
+
+	newsk = __sk_head(&pn->ackq);
+
+	oskb = skb_dequeue(&newsk->sk_receive_queue);
+	err = pep_accept_conn(newsk, oskb);
+	if (err) {
+		skb_queue_head(&newsk->sk_receive_queue, oskb);
+		newsk = NULL;
+		goto out;
+	}
+
+	sock_hold(sk);
+	pep_sk(newsk)->listener = sk;
+
+	sock_hold(newsk);
+	sk_del_node_init(newsk);
+	sk_acceptq_removed(sk);
+	sk_add_node(newsk, &pn->hlist);
+	__sock_put(newsk);
+
+out:
+	release_sock(sk);
+	*errp = err;
+	return newsk;
+}
+
+static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	int answ;
+
+	switch (cmd) {
+	case SIOCINQ:
+		if (sk->sk_state == TCP_LISTEN)
+			return -EINVAL;
+
+		lock_sock(sk);
+		if (!skb_queue_empty(&sk->sk_receive_queue))
+			answ = skb_peek(&sk->sk_receive_queue)->len;
+		else
+			answ = 0;
+		release_sock(sk);
+		return put_user(answ, (int __user *)arg);
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+static int pep_init(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	INIT_HLIST_HEAD(&pn->ackq);
+	INIT_HLIST_HEAD(&pn->hlist);
+	pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
+	return 0;
+}
+
+static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
+			struct msghdr *msg, size_t len)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct sk_buff *skb = NULL;
+	struct pnpipehdr *ph;
+	long timeo;
+	int flags = msg->msg_flags;
+	int err, done;
+
+	if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR))
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+	if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		/* Wait until the pipe gets to enabled state */
+disabled:
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err)
+			goto out;
+
+		if (sk->sk_state == TCP_CLOSE_WAIT) {
+			err = -ECONNRESET;
+			goto out;
+		}
+	}
+	BUG_ON(sk->sk_state != TCP_ESTABLISHED);
+
+	/* Wait until flow control allows TX */
+	done = pn->tx_credits > 0;
+	while (!done) {
+		DEFINE_WAIT(wait);
+
+		if (!timeo) {
+			err = -EAGAIN;
+			goto out;
+		}
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			goto out;
+		}
+
+		prepare_to_wait(&sk->sk_socket->wait, &wait,
+				TASK_INTERRUPTIBLE);
+		done = sk_wait_event(sk, &timeo, pn->tx_credits > 0);
+		finish_wait(&sk->sk_socket->wait, &wait);
+
+		if (sk->sk_state != TCP_ESTABLISHED)
+			goto disabled;
+	}
+
+	if (!skb) {
+		skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
+						flags & MSG_DONTWAIT, &err);
+		if (skb == NULL)
+			goto out;
+		skb_reserve(skb, MAX_PHONET_HEADER + 3);
+
+		if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits)
+			goto disabled; /* sock_alloc_send_skb might sleep */
+	}
+
+	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (err < 0)
+		goto out;
+
+	__skb_push(skb, 3);
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = 0;
+	ph->message_id = PNS_PIPE_DATA;
+	ph->pipe_handle = pn->pipe_handle;
+	if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */
+		pn->tx_credits--;
+
+	err = pn_skb_send(sk, skb, &pipe_srv);
+	if (err >= 0)
+		err = len; /* success! */
+	skb = NULL;
+out:
+	release_sock(sk);
+	kfree_skb(skb);
+	return err;
+}
+
+static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
+			struct msghdr *msg, size_t len, int noblock,
+			int flags, int *addr_len)
+{
+	struct sk_buff *skb;
+	int err;
+
+	if (unlikely(flags & MSG_OOB))
+		return -EOPNOTSUPP;
+	if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE)))
+		return -ENOTCONN;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	lock_sock(sk);
+	if (skb == NULL) {
+		if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT)
+			err = -ECONNRESET;
+		release_sock(sk);
+		return err;
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED)
+		pipe_grant_credits(sk);
+	release_sock(sk);
+
+	msg->msg_flags |= MSG_EOR;
+
+	if (skb->len > len)
+		msg->msg_flags |= MSG_TRUNC;
+	else
+		len = skb->len;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (!err)
+		err = (flags & MSG_TRUNC) ? skb->len : len;
+
+	skb_free_datagram(sk, skb);
+	return err;
+}
+
+static void pep_sock_unhash(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct sock *skparent = NULL;
+
+	lock_sock(sk);
+	if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
+		skparent = pn->listener;
+		sk_del_node_init(sk);
+		release_sock(sk);
+
+		sk = skparent;
+		pn = pep_sk(skparent);
+		lock_sock(sk);
+	}
+	/* Unhash a listening sock only when it is closed
+	 * and all of its active connected pipes are closed. */
+	if (hlist_empty(&pn->hlist))
+		pn_sock_unhash(&pn->pn_sk.sk);
+	release_sock(sk);
+
+	if (skparent)
+		sock_put(skparent);
+}
+
+static struct proto pep_proto = {
+	.close		= pep_sock_close,
+	.accept		= pep_sock_accept,
+	.ioctl		= pep_ioctl,
+	.init		= pep_init,
+	.sendmsg	= pep_sendmsg,
+	.recvmsg	= pep_recvmsg,
+	.backlog_rcv	= pep_do_rcv,
+	.hash		= pn_sock_hash,
+	.unhash		= pep_sock_unhash,
+	.get_port	= pn_sock_get_port,
+	.obj_size	= sizeof(struct pep_sock),
+	.owner		= THIS_MODULE,
+	.name		= "PNPIPE",
+};
+
+static struct phonet_protocol pep_pn_proto = {
+	.ops		= &phonet_stream_ops,
+	.prot		= &pep_proto,
+	.sock_type	= SOCK_SEQPACKET,
+};
+
+static int __init pep_register(void)
+{
+	return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto);
+}
+
+static void __exit pep_unregister(void)
+{
+	phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto);
+}
+
+module_init(pep_register);
+module_exit(pep_unregister);
+MODULE_AUTHOR("Remi Denis-Courmont, Nokia");
+MODULE_DESCRIPTION("Phonet pipe protocol");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE);
-- 
cgit v1.2.3-70-g09d2


From c41bd97f815720f9404f97da0c4f4400b52c243d Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Sun, 5 Oct 2008 11:15:43 -0700
Subject: Phonet: receive pipe control requests as out-of-band data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h |  2 ++
 net/phonet/pep.c         | 65 ++++++++++++++++++++++++++++++++++--------------
 net/phonet/socket.c      |  4 ++-
 3 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index fb024e18686..4d79564850a 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -33,6 +33,8 @@ struct pep_sock {
 
 	/* Connected socket stuff: */
 	struct sock		*listener;
+	struct sk_buff_head	ctrlreq_queue;
+#define PNPIPE_CTRLREQ_MAX	10
 	u16			peer_type;	/* peer type/subtype */
 	u8			pipe_handle;
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index c5dfecb207d..d564d07cb79 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -137,14 +137,15 @@ static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code)
 
 /* Control requests are not sent by the pipe service and have a specific
  * message format. */
-static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code)
+static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
+				gfp_t priority)
 {
 	const struct pnpipehdr *oph = pnp_hdr(oskb);
 	struct sk_buff *skb;
 	struct pnpipehdr *ph;
 	struct sockaddr_pn dst;
 
-	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC);
+	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
 	if (!skb)
 		return -ENOMEM;
 	skb_set_owner_w(skb, sk);
@@ -305,6 +306,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *hdr = pnp_hdr(skb);
+	struct sk_buff_head *queue;
 	int err = 0;
 
 	BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
@@ -345,9 +347,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		break;
 
 	case PNS_PEP_CTRL_REQ:
-		/* TODO */
-		pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR);
-		break;
+		if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX)
+			break;
+		__skb_pull(skb, 4);
+		queue = &pn->ctrlreq_queue;
+		goto queue;
 
 	case PNS_PIPE_DATA:
 		__skb_pull(skb, 3); /* Pipe data header */
@@ -363,13 +367,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 		pn->rx_credits--;
-		skb->dev = NULL;
-		skb_set_owner_r(skb, sk);
-		err = skb->len;
-		skb_queue_tail(&sk->sk_receive_queue, skb);
-		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_data_ready(sk, err);
-		return 0;
+		queue = &sk->sk_receive_queue;
+		goto queue;
 
 	case PNS_PEP_STATUS_IND:
 		pipe_rcv_status(sk, skb);
@@ -412,12 +411,24 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 out:
 	kfree_skb(skb);
 	return err;
+
+queue:
+	skb->dev = NULL;
+	skb_set_owner_r(skb, sk);
+	err = skb->len;
+	skb_queue_tail(queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, err);
+	return 0;
 }
 
 /* Destroy connected sock. */
 static void pipe_destruct(struct sock *sk)
 {
+	struct pep_sock *pn = pep_sk(sk);
+
 	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&pn->ctrlreq_queue);
 }
 
 static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
@@ -490,6 +501,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	pn_skb_get_dst_sockaddr(skb, &dst);
 	newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
 	newpn->pn_sk.resource = pn->pn_sk.resource;
+	skb_queue_head_init(&newpn->ctrlreq_queue);
 	newpn->pipe_handle = pipe_handle;
 	newpn->peer_type = peer_type;
 	newpn->rx_credits = newpn->tx_credits = 0;
@@ -581,7 +593,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 		break;
 
 	case PNS_PEP_CTRL_REQ:
-		pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE);
+		pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC);
 		break;
 
 	case PNS_PEP_RESET_REQ:
@@ -684,6 +696,7 @@ out:
 
 static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
+	struct pep_sock *pn = pep_sk(sk);
 	int answ;
 
 	switch (cmd) {
@@ -692,7 +705,10 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			return -EINVAL;
 
 		lock_sock(sk);
-		if (!skb_queue_empty(&sk->sk_receive_queue))
+		if (sock_flag(sk, SOCK_URGINLINE)
+		 && !skb_queue_empty(&pn->ctrlreq_queue))
+			answ = skb_peek(&pn->ctrlreq_queue)->len;
+		else if (!skb_queue_empty(&sk->sk_receive_queue))
 			answ = skb_peek(&sk->sk_receive_queue)->len;
 		else
 			answ = 0;
@@ -709,6 +725,7 @@ static int pep_init(struct sock *sk)
 
 	INIT_HLIST_HEAD(&pn->ackq);
 	INIT_HLIST_HEAD(&pn->hlist);
+	skb_queue_head_init(&pn->ctrlreq_queue);
 	pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
 	return 0;
 }
@@ -810,11 +827,24 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct sk_buff *skb;
 	int err;
 
-	if (unlikely(flags & MSG_OOB))
-		return -EOPNOTSUPP;
 	if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE)))
 		return -ENOTCONN;
 
+	if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) {
+		/* Dequeue and acknowledge control request */
+		struct pep_sock *pn = pep_sk(sk);
+
+		skb = skb_dequeue(&pn->ctrlreq_queue);
+		if (skb) {
+			pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR,
+						GFP_KERNEL);
+			msg->msg_flags |= MSG_OOB;
+			goto copy;
+		}
+		if (flags & MSG_OOB)
+			return -EINVAL;
+	}
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	lock_sock(sk);
 	if (skb == NULL) {
@@ -827,9 +857,8 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (sk->sk_state == TCP_ESTABLISHED)
 		pipe_grant_credits(sk);
 	release_sock(sk);
-
+copy:
 	msg->msg_flags |= MSG_EOR;
-
 	if (skb->len > len)
 		msg->msg_flags |= MSG_TRUNC;
 	else
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index cea1136cbe4..a9c3d1f2e9d 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -220,7 +220,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
-	else if (sk->sk_state == TCP_CLOSE_WAIT)
+	if (!skb_queue_empty(&pn->ctrlreq_queue))
+		mask |= POLLPRI;
+	if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
 		return POLLHUP;
 
 	if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits)
-- 
cgit v1.2.3-70-g09d2


From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Sun, 5 Oct 2008 11:16:16 -0700
Subject: Phonet: implement GPRS virtual interface over PEP socket
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h    |   8 ++
 include/linux/socket.h    |   1 +
 include/net/phonet/gprs.h |  38 +++++
 include/net/phonet/pep.h  |   1 +
 net/phonet/Makefile       |   2 +-
 net/phonet/pep-gprs.c     | 347 ++++++++++++++++++++++++++++++++++++++++++++++
 net/phonet/pep.c          | 161 +++++++++++++++++++--
 net/phonet/socket.c       |   8 +-
 8 files changed, 550 insertions(+), 16 deletions(-)
 create mode 100644 include/net/phonet/gprs.h
 create mode 100644 net/phonet/pep-gprs.c

(limited to 'net')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index f9218524207..c9609f9aeda 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -31,9 +31,17 @@
 #define PN_PROTO_PIPE		2
 #define PHONET_NPROTO		3
 
+/* Socket options for SOL_PNPIPE level */
+#define PNPIPE_ENCAP		1
+#define PNPIPE_IFINDEX		2
+
 #define PNADDR_ANY		0
 #define PNPORT_RESOURCE_ROUTING	0
 
+/* Values for PNPIPE_ENCAP option */
+#define PNPIPE_ENCAP_NONE	0
+#define PNPIPE_ENCAP_IP		1
+
 /* ioctls */
 #define SIOCPNGETOBJECT		(SIOCPROTOPRIVATE + 0)
 
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 818ca33bf79..20fc4bbfca4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -297,6 +297,7 @@ struct ucred {
 #define SOL_RXRPC	272
 #define SOL_PPPOL2TP	273
 #define SOL_BLUETOOTH	274
+#define SOL_PNPIPE	275
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h
new file mode 100644
index 00000000000..928daf595be
--- /dev/null
+++ b/include/net/phonet/gprs.h
@@ -0,0 +1,38 @@
+/*
+ * File: pep_gprs.h
+ *
+ * GPRS over Phonet pipe end point socket
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef NET_PHONET_GPRS_H
+#define NET_PHONET_GPRS_H
+
+struct sock;
+struct sk_buff;
+
+int pep_writeable(struct sock *sk);
+int pep_write(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *pep_read(struct sock *sk);
+
+int gprs_attach(struct sock *sk);
+void gprs_detach(struct sock *sk);
+
+#endif
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index 4d79564850a..fcd793030e4 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -35,6 +35,7 @@ struct pep_sock {
 	struct sock		*listener;
 	struct sk_buff_head	ctrlreq_queue;
 #define PNPIPE_CTRLREQ_MAX	10
+	int			ifindex;
 	u16			peer_type;	/* peer type/subtype */
 	u8			pipe_handle;
 
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index 505df2a04a8..d62bbba649b 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -8,4 +8,4 @@ phonet-objs := \
 	sysctl.o \
 	af_phonet.o
 
-pn_pep-objs := pep.o
+pn_pep-objs := pep.o pep-gprs.o
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
new file mode 100644
index 00000000000..9978afbd9f2
--- /dev/null
+++ b/net/phonet/pep-gprs.c
@@ -0,0 +1,347 @@
+/*
+ * File: pep-gprs.c
+ *
+ * GPRS over Phonet pipe end point socket
+ *
+ * Copyright (C) 2008 Nokia Corporation.
+ *
+ * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <net/sock.h>
+
+#include <linux/if_phonet.h>
+#include <net/tcp_states.h>
+#include <net/phonet/gprs.h>
+
+#define GPRS_DEFAULT_MTU 1400
+
+struct gprs_dev {
+	struct sock		*sk;
+	void			(*old_state_change)(struct sock *);
+	void			(*old_data_ready)(struct sock *, int);
+	void			(*old_write_space)(struct sock *);
+
+	struct net_device	*net;
+	struct net_device_stats	stats;
+
+	struct sk_buff_head	tx_queue;
+	struct work_struct	tx_work;
+	spinlock_t		tx_lock;
+	unsigned		tx_max;
+};
+
+static int gprs_type_trans(struct sk_buff *skb)
+{
+	const u8 *pvfc;
+	u8 buf;
+
+	pvfc = skb_header_pointer(skb, 0, 1, &buf);
+	if (!pvfc)
+		return 0;
+	/* Look at IP version field */
+	switch (*pvfc >> 4) {
+	case 4:
+		return htons(ETH_P_IP);
+	case 6:
+		return htons(ETH_P_IPV6);
+	}
+	return 0;
+}
+
+/*
+ * Socket callbacks
+ */
+
+static void gprs_state_change(struct sock *sk)
+{
+	struct gprs_dev *dev = sk->sk_user_data;
+
+	if (sk->sk_state == TCP_CLOSE_WAIT) {
+		netif_stop_queue(dev->net);
+		netif_carrier_off(dev->net);
+	}
+}
+
+static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb)
+{
+	int err = 0;
+	u16 protocol = gprs_type_trans(skb);
+
+	if (!protocol) {
+		err = -EINVAL;
+		goto drop;
+	}
+
+	if (likely(skb_headroom(skb) & 3)) {
+		struct sk_buff *rskb, *fs;
+		int flen = 0;
+
+		/* Phonet Pipe data header is misaligned (3 bytes),
+		 * so wrap the IP packet as a single fragment of an head-less
+		 * socket buffer. The network stack will pull what it needs,
+		 * but at least, the whole IP payload is not memcpy'd. */
+		rskb = netdev_alloc_skb(dev->net, 0);
+		if (!rskb) {
+			err = -ENOBUFS;
+			goto drop;
+		}
+		skb_shinfo(rskb)->frag_list = skb;
+		rskb->len += skb->len;
+		rskb->data_len += rskb->len;
+		rskb->truesize += rskb->len;
+
+		/* Avoid nested fragments */
+		for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next)
+			flen += fs->len;
+		skb->next = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = NULL;
+		skb->len -= flen;
+		skb->data_len -= flen;
+		skb->truesize -= flen;
+
+		skb = rskb;
+	}
+
+	skb->protocol = protocol;
+	skb_reset_mac_header(skb);
+	skb->dev = dev->net;
+
+	if (likely(dev->net->flags & IFF_UP)) {
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+		netif_rx(skb);
+		skb = NULL;
+	} else
+		err = -ENODEV;
+
+drop:
+	if (skb) {
+		dev_kfree_skb(skb);
+		dev->stats.rx_dropped++;
+	}
+	return err;
+}
+
+static void gprs_data_ready(struct sock *sk, int len)
+{
+	struct gprs_dev *dev = sk->sk_user_data;
+	struct sk_buff *skb;
+
+	while ((skb = pep_read(sk)) != NULL) {
+		skb_orphan(skb);
+		gprs_recv(dev, skb);
+	}
+}
+
+static void gprs_write_space(struct sock *sk)
+{
+	struct gprs_dev *dev = sk->sk_user_data;
+	unsigned credits = pep_writeable(sk);
+
+	spin_lock_bh(&dev->tx_lock);
+	dev->tx_max = credits;
+	if (credits > skb_queue_len(&dev->tx_queue))
+		netif_wake_queue(dev->net);
+	spin_unlock_bh(&dev->tx_lock);
+}
+
+/*
+ * Network device callbacks
+ */
+
+static int gprs_xmit(struct sk_buff *skb, struct net_device *net)
+{
+	struct gprs_dev *dev = netdev_priv(net);
+
+	switch (skb->protocol) {
+	case  htons(ETH_P_IP):
+	case  htons(ETH_P_IPV6):
+		break;
+	default:
+		dev_kfree_skb(skb);
+		return 0;
+	}
+
+	spin_lock(&dev->tx_lock);
+	if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) {
+		skb_queue_tail(&dev->tx_queue, skb);
+		skb = NULL;
+	}
+	if (skb_queue_len(&dev->tx_queue) >= dev->tx_max)
+		netif_stop_queue(net);
+	spin_unlock(&dev->tx_lock);
+
+	schedule_work(&dev->tx_work);
+	if (unlikely(skb))
+		dev_kfree_skb(skb);
+	return 0;
+}
+
+static void gprs_tx(struct work_struct *work)
+{
+	struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work);
+	struct sock *sk = dev->sk;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) {
+		int err;
+
+		dev->stats.tx_bytes += skb->len;
+		dev->stats.tx_packets++;
+
+		skb_orphan(skb);
+		skb_set_owner_w(skb, sk);
+
+		lock_sock(sk);
+		err = pep_write(sk, skb);
+		if (err) {
+			LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n",
+					dev->net->name, err);
+			dev->stats.tx_aborted_errors++;
+			dev->stats.tx_errors++;
+		}
+		release_sock(sk);
+	}
+
+	lock_sock(sk);
+	gprs_write_space(sk);
+	release_sock(sk);
+}
+
+static int gprs_set_mtu(struct net_device *net, int new_mtu)
+{
+	if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
+		return -EINVAL;
+
+	net->mtu = new_mtu;
+	return 0;
+}
+
+static struct net_device_stats *gprs_get_stats(struct net_device *net)
+{
+	struct gprs_dev *dev = netdev_priv(net);
+
+	return &dev->stats;
+}
+
+static void gprs_setup(struct net_device *net)
+{
+	net->features		= NETIF_F_FRAGLIST;
+	net->type		= ARPHRD_NONE;
+	net->flags		= IFF_POINTOPOINT | IFF_NOARP;
+	net->mtu		= GPRS_DEFAULT_MTU;
+	net->hard_header_len	= 0;
+	net->addr_len		= 0;
+	net->tx_queue_len	= 10;
+
+	net->destructor		= free_netdev;
+	net->hard_start_xmit	= gprs_xmit; /* mandatory */
+	net->change_mtu		= gprs_set_mtu;
+	net->get_stats		= gprs_get_stats;
+}
+
+/*
+ * External interface
+ */
+
+/*
+ * Attach a GPRS interface to a datagram socket.
+ * Returns the interface index on success, negative error code on error.
+ */
+int gprs_attach(struct sock *sk)
+{
+	static const char ifname[] = "gprs%d";
+	struct gprs_dev *dev;
+	struct net_device *net;
+	int err;
+
+	if (unlikely(sk->sk_type == SOCK_STREAM))
+		return -EINVAL; /* need packet boundaries */
+
+	/* Create net device */
+	net = alloc_netdev(sizeof(*dev), ifname, gprs_setup);
+	if (!net)
+		return -ENOMEM;
+	dev = netdev_priv(net);
+	dev->net = net;
+	dev->tx_max = 0;
+	spin_lock_init(&dev->tx_lock);
+	skb_queue_head_init(&dev->tx_queue);
+	INIT_WORK(&dev->tx_work, gprs_tx);
+
+	netif_stop_queue(net);
+	err = register_netdev(net);
+	if (err) {
+		free_netdev(net);
+		return err;
+	}
+
+	lock_sock(sk);
+	if (unlikely(sk->sk_user_data)) {
+		err = -EBUSY;
+		goto out_rel;
+	}
+	if (unlikely((1 << sk->sk_state & (TCPF_CLOSE|TCPF_LISTEN)) ||
+			sock_flag(sk, SOCK_DEAD))) {
+		err = -EINVAL;
+		goto out_rel;
+	}
+	sk->sk_user_data	= dev;
+	dev->old_state_change	= sk->sk_state_change;
+	dev->old_data_ready	= sk->sk_data_ready;
+	dev->old_write_space	= sk->sk_write_space;
+	sk->sk_state_change	= gprs_state_change;
+	sk->sk_data_ready	= gprs_data_ready;
+	sk->sk_write_space	= gprs_write_space;
+	release_sock(sk);
+
+	sock_hold(sk);
+	dev->sk = sk;
+
+	printk(KERN_DEBUG"%s: attached\n", net->name);
+	gprs_write_space(sk); /* kick off TX */
+	return net->ifindex;
+
+out_rel:
+	release_sock(sk);
+	unregister_netdev(net);
+	return err;
+}
+
+void gprs_detach(struct sock *sk)
+{
+	struct gprs_dev *dev = sk->sk_user_data;
+	struct net_device *net = dev->net;
+
+	lock_sock(sk);
+	sk->sk_user_data	= NULL;
+	sk->sk_state_change	= dev->old_state_change;
+	sk->sk_data_ready	= dev->old_data_ready;
+	sk->sk_write_space	= dev->old_write_space;
+	release_sock(sk);
+
+	printk(KERN_DEBUG"%s: detached\n", net->name);
+	unregister_netdev(net);
+	flush_scheduled_work();
+	sock_put(sk);
+	skb_queue_purge(&dev->tx_queue);
+}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index d564d07cb79..bc6d50f8324 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -31,6 +31,7 @@
 #include <linux/phonet.h>
 #include <net/phonet/phonet.h>
 #include <net/phonet/pep.h>
+#include <net/phonet/gprs.h>
 
 /* sk_state values:
  * TCP_CLOSE		sock not in use yet
@@ -612,6 +613,7 @@ drop:
 static void pep_sock_close(struct sock *sk, long timeout)
 {
 	struct pep_sock *pn = pep_sk(sk);
+	int ifindex = 0;
 
 	sk_common_release(sk);
 
@@ -625,7 +627,12 @@ static void pep_sock_close(struct sock *sk, long timeout)
 			sk_del_node_init(sknode);
 		sk->sk_state = TCP_CLOSE;
 	}
+	ifindex = pn->ifindex;
+	pn->ifindex = 0;
 	release_sock(sk);
+
+	if (ifindex)
+		gprs_detach(sk);
 }
 
 static int pep_wait_connreq(struct sock *sk, int noblock)
@@ -730,12 +737,107 @@ static int pep_init(struct sock *sk)
 	return 0;
 }
 
+static int pep_setsockopt(struct sock *sk, int level, int optname,
+				char __user *optval, int optlen)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	int val = 0, err = 0;
+
+	if (level != SOL_PNPIPE)
+		return -ENOPROTOOPT;
+	if (optlen >= sizeof(int)) {
+		if (get_user(val, (int __user *) optval))
+			return -EFAULT;
+	}
+
+	lock_sock(sk);
+	switch (optname) {
+	case PNPIPE_ENCAP:
+		if (val && val != PNPIPE_ENCAP_IP) {
+			err = -EINVAL;
+			break;
+		}
+		if (!pn->ifindex == !val)
+			break; /* Nothing to do! */
+		if (!capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
+			break;
+		}
+		if (val) {
+			release_sock(sk);
+			err = gprs_attach(sk);
+			if (err > 0) {
+				pn->ifindex = err;
+				err = 0;
+			}
+		} else {
+			pn->ifindex = 0;
+			release_sock(sk);
+			gprs_detach(sk);
+			err = 0;
+		}
+		goto out_norel;
+	default:
+		err = -ENOPROTOOPT;
+	}
+	release_sock(sk);
+
+out_norel:
+	return err;
+}
+
+static int pep_getsockopt(struct sock *sk, int level, int optname,
+				char __user *optval, int __user *optlen)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	int len, val;
+
+	if (level != SOL_PNPIPE)
+		return -ENOPROTOOPT;
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	switch (optname) {
+	case PNPIPE_ENCAP:
+		val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
+		break;
+	case PNPIPE_IFINDEX:
+		val = pn->ifindex;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	len = min_t(unsigned int, sizeof(int), len);
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (put_user(val, (int __user *) optval))
+		return -EFAULT;
+	return 0;
+}
+
+static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *ph;
+
+	skb_push(skb, 3);
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = 0;
+	ph->message_id = PNS_PIPE_DATA;
+	ph->pipe_handle = pn->pipe_handle;
+	if (pn_flow_safe(pn->tx_fc) && pn->tx_credits)
+		pn->tx_credits--;
+
+	return pn_skb_send(sk, skb, &pipe_srv);
+}
+
 static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
 			struct msghdr *msg, size_t len)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct sk_buff *skb = NULL;
-	struct pnpipehdr *ph;
 	long timeo;
 	int flags = msg->msg_flags;
 	int err, done;
@@ -801,16 +903,7 @@ disabled:
 	if (err < 0)
 		goto out;
 
-	__skb_push(skb, 3);
-	skb_reset_transport_header(skb);
-	ph = pnp_hdr(skb);
-	ph->utid = 0;
-	ph->message_id = PNS_PIPE_DATA;
-	ph->pipe_handle = pn->pipe_handle;
-	if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */
-		pn->tx_credits--;
-
-	err = pn_skb_send(sk, skb, &pipe_srv);
+	err = pipe_skb_send(sk, skb);
 	if (err >= 0)
 		err = len; /* success! */
 	skb = NULL;
@@ -820,6 +913,50 @@ out:
 	return err;
 }
 
+int pep_writeable(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0;
+}
+
+int pep_write(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *rskb, *fs;
+	int flen = 0;
+
+	rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
+	if (!rskb) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	skb_shinfo(rskb)->frag_list = skb;
+	rskb->len += skb->len;
+	rskb->data_len += rskb->len;
+	rskb->truesize += rskb->len;
+
+	/* Avoid nested fragments */
+	for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next)
+		flen += fs->len;
+	skb->next = skb_shinfo(skb)->frag_list;
+	skb_shinfo(skb)->frag_list = NULL;
+	skb->len -= flen;
+	skb->data_len -= flen;
+	skb->truesize -= flen;
+
+	skb_reserve(rskb, MAX_PHONET_HEADER + 3);
+	return pipe_skb_send(sk, rskb);
+}
+
+struct sk_buff *pep_read(struct sock *sk)
+{
+	struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
+
+	if (sk->sk_state == TCP_ESTABLISHED)
+		pipe_grant_credits(sk);
+	return skb;
+}
+
 static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 			struct msghdr *msg, size_t len, int noblock,
 			int flags, int *addr_len)
@@ -902,6 +1039,8 @@ static struct proto pep_proto = {
 	.accept		= pep_sock_accept,
 	.ioctl		= pep_ioctl,
 	.init		= pep_init,
+	.setsockopt	= pep_setsockopt,
+	.getsockopt	= pep_getsockopt,
 	.sendmsg	= pep_sendmsg,
 	.recvmsg	= pep_recvmsg,
 	.backlog_rcv	= pep_do_rcv,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index a9c3d1f2e9d..d81740187fb 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -342,11 +342,11 @@ const struct proto_ops phonet_stream_ops = {
 	.ioctl		= pn_socket_ioctl,
 	.listen		= pn_socket_listen,
 	.shutdown	= sock_no_shutdown,
-	.setsockopt	= sock_no_setsockopt,
-	.getsockopt	= sock_no_getsockopt,
+	.setsockopt	= sock_common_setsockopt,
+	.getsockopt	= sock_common_getsockopt,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = sock_no_setsockopt,
-	.compat_getsockopt = compat_sock_no_getsockopt,
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
 	.sendmsg	= pn_socket_sendmsg,
 	.recvmsg	= sock_common_recvmsg,
-- 
cgit v1.2.3-70-g09d2


From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Sun, 5 Oct 2008 13:33:42 -0700
Subject: xfrm: MIGRATE enhancements
 (draft-ebalard-mext-pfkey-enhanced-migrate)

Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism
specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated
PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS
attribute.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h | 13 +++++++-
 include/linux/xfrm.h    | 10 ++++++
 include/net/xfrm.h      | 15 +++++++--
 net/key/af_key.c        | 86 +++++++++++++++++++++++++++++++++++++++----------
 net/xfrm/xfrm_policy.c  |  5 +--
 net/xfrm/xfrm_state.c   |  5 +--
 net/xfrm/xfrm_user.c    | 57 +++++++++++++++++++++++++-------
 7 files changed, 154 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 700725ddcaa..01b262959f2 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -226,6 +226,15 @@ struct sadb_x_sec_ctx {
 } __attribute__((packed));
 /* sizeof(struct sadb_sec_ctx) = 8 */
 
+/* Used by MIGRATE to pass addresses IKE will use to perform
+ * negotiation with the peer */
+struct sadb_x_kmaddress {
+	uint16_t	sadb_x_kmaddress_len;
+	uint16_t	sadb_x_kmaddress_exttype;
+	uint32_t	sadb_x_kmaddress_reserved;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_kmaddress) == 8 */
+
 /* Message types */
 #define SADB_RESERVED		0
 #define SADB_GETSPI		1
@@ -346,7 +355,9 @@ struct sadb_x_sec_ctx {
 #define SADB_X_EXT_NAT_T_DPORT		22
 #define SADB_X_EXT_NAT_T_OA		23
 #define SADB_X_EXT_SEC_CTX		24
-#define SADB_EXT_MAX			24
+/* Used with MIGRATE to pass @ to IKE for negotiation */
+#define SADB_X_EXT_KMADDRESS		25
+#define SADB_EXT_MAX			25
 
 /* Identity Extension values */
 #define SADB_IDENTTYPE_RESERVED	0
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index fb0c215a305..4bc1e6b86cb 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -279,6 +279,7 @@ enum xfrm_attr_type_t {
 	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
 	XFRMA_MIGRATE,
 	XFRMA_ALG_AEAD,		/* struct xfrm_algo_aead */
+	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -415,6 +416,15 @@ struct xfrm_user_report {
 	struct xfrm_selector		sel;
 };
 
+/* Used by MIGRATE to pass addresses IKE should use to perform
+ * SA negotiation with the peer */
+struct xfrm_user_kmaddress {
+	xfrm_address_t                  local;
+	xfrm_address_t                  remote;
+	__u32				reserved;
+	__u16				family;
+};
+
 struct xfrm_user_migrate {
 	xfrm_address_t			old_daddr;
 	xfrm_address_t			old_saddr;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index b98d2056f27..11c890ad8eb 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -492,6 +492,13 @@ struct xfrm_policy
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
 };
 
+struct xfrm_kmaddress {
+	xfrm_address_t          local;
+	xfrm_address_t          remote;
+	u32			reserved;
+	u16			family;
+};
+
 struct xfrm_migrate {
 	xfrm_address_t		old_daddr;
 	xfrm_address_t		old_saddr;
@@ -531,7 +538,7 @@ struct xfrm_mgr
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
 	int			(*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
-	int			(*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles);
+	int			(*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k);
 };
 
 extern int xfrm_register_km(struct xfrm_mgr *km);
@@ -1432,12 +1439,14 @@ extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
 
 #ifdef CONFIG_XFRM_MIGRATE
 extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-		      struct xfrm_migrate *m, int num_bundles);
+		      struct xfrm_migrate *m, int num_bundles,
+		      struct xfrm_kmaddress *k);
 extern struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m);
 extern struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
 					      struct xfrm_migrate *m);
 extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			struct xfrm_migrate *m, int num_bundles);
+			struct xfrm_migrate *m, int num_bundles,
+			struct xfrm_kmaddress *k);
 #endif
 
 extern wait_queue_head_t km_waitq;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7ae641df70b..362fe317e1f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -398,6 +398,7 @@ static u8 sadb_ext_min_len[] = {
 	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
 	[SADB_X_EXT_SEC_CTX]		= (u8) sizeof(struct sadb_x_sec_ctx),
+	[SADB_X_EXT_KMADDRESS]		= (u8) sizeof(struct sadb_x_kmaddress),
 };
 
 /* Verify sadb_address_{len,prefixlen} against sa_family.  */
@@ -2384,24 +2385,21 @@ static int pfkey_sockaddr_pair_size(sa_family_t family)
 	return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2);
 }
 
-static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq,
+static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
 			       xfrm_address_t *saddr, xfrm_address_t *daddr,
 			       u16 *family)
 {
-	u8 *sa = (u8 *) (rq + 1);
 	int af, socklen;
 
-	if (rq->sadb_x_ipsecrequest_len <
-	    pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family))
+	if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
 		return -EINVAL;
 
-	af = pfkey_sockaddr_extract((struct sockaddr *) sa,
-				    saddr);
+	af = pfkey_sockaddr_extract(sa, saddr);
 	if (!af)
 		return -EINVAL;
 
 	socklen = pfkey_sockaddr_len(af);
-	if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen),
+	if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen),
 				   daddr) != af)
 		return -EINVAL;
 
@@ -2421,7 +2419,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 		return -EINVAL;
 
 	/* old endoints */
-	err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr,
+	err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1),
+				  rq1->sadb_x_ipsecrequest_len,
+				  &m->old_saddr, &m->old_daddr,
 				  &m->old_family);
 	if (err)
 		return err;
@@ -2434,7 +2434,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 		return -EINVAL;
 
 	/* new endpoints */
-	err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr,
+	err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1),
+				  rq2->sadb_x_ipsecrequest_len,
+				  &m->new_saddr, &m->new_daddr,
 				  &m->new_family);
 	if (err)
 		return err;
@@ -2460,29 +2462,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
 	int i, len, ret, err = -EINVAL;
 	u8 dir;
 	struct sadb_address *sa;
+	struct sadb_x_kmaddress *kma;
 	struct sadb_x_policy *pol;
 	struct sadb_x_ipsecrequest *rq;
 	struct xfrm_selector sel;
 	struct xfrm_migrate m[XFRM_MAX_DEPTH];
+	struct xfrm_kmaddress k;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1],
-	    ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) ||
+				     ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) ||
 	    !ext_hdrs[SADB_X_EXT_POLICY - 1]) {
 		err = -EINVAL;
 		goto out;
 	}
 
+	kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1];
 	pol = ext_hdrs[SADB_X_EXT_POLICY - 1];
-	if (!pol) {
-		err = -EINVAL;
-		goto out;
-	}
 
 	if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) {
 		err = -EINVAL;
 		goto out;
 	}
 
+	if (kma) {
+		/* convert sadb_x_kmaddress to xfrm_kmaddress */
+		k.reserved = kma->sadb_x_kmaddress_reserved;
+		ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1),
+					  8*(kma->sadb_x_kmaddress_len) - sizeof(*kma),
+					  &k.local, &k.remote, &k.family);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+	}
+
 	dir = pol->sadb_x_policy_dir - 1;
 	memset(&sel, 0, sizeof(sel));
 
@@ -2527,7 +2540,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
 		goto out;
 	}
 
-	return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i);
+	return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
+			    kma ? &k : NULL);
 
  out:
 	return err;
@@ -3319,6 +3333,32 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
 	return 0;
 }
 
+
+static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k)
+{
+	struct sadb_x_kmaddress *kma;
+	u8 *sa;
+	int family = k->family;
+	int socklen = pfkey_sockaddr_len(family);
+	int size_req;
+
+	size_req = (sizeof(struct sadb_x_kmaddress) +
+		    pfkey_sockaddr_pair_size(family));
+
+	kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req);
+	memset(kma, 0, size_req);
+	kma->sadb_x_kmaddress_len = size_req / 8;
+	kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS;
+	kma->sadb_x_kmaddress_reserved = k->reserved;
+
+	sa = (u8 *)(kma + 1);
+	if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) ||
+	    !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int set_ipsecrequest(struct sk_buff *skb,
 			    uint8_t proto, uint8_t mode, int level,
 			    uint32_t reqid, uint8_t family,
@@ -3351,7 +3391,8 @@ static int set_ipsecrequest(struct sk_buff *skb,
 
 #ifdef CONFIG_NET_KEY_MIGRATE
 static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			      struct xfrm_migrate *m, int num_bundles)
+			      struct xfrm_migrate *m, int num_bundles,
+			      struct xfrm_kmaddress *k)
 {
 	int i;
 	int sasize_sel;
@@ -3368,6 +3409,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH)
 		return -EINVAL;
 
+	if (k != NULL) {
+		/* addresses for KM */
+		size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) +
+				     pfkey_sockaddr_pair_size(k->family));
+	}
+
 	/* selector */
 	sasize_sel = pfkey_sockaddr_size(sel->family);
 	if (!sasize_sel)
@@ -3404,6 +3451,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	hdr->sadb_msg_seq = 0;
 	hdr->sadb_msg_pid = 0;
 
+	/* Addresses to be used by KM for negotiation, if ext is available */
+	if (k != NULL && (set_sadb_kmaddress(skb, k) < 0))
+		return -EINVAL;
+
 	/* selector src */
 	set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel);
 
@@ -3449,7 +3500,8 @@ err:
 }
 #else
 static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			      struct xfrm_migrate *m, int num_bundles)
+			      struct xfrm_migrate *m, int num_bundles,
+			      struct xfrm_kmaddress *k)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b7ec08025ff..832b47c1de8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2679,7 +2679,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
 }
 
 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-		 struct xfrm_migrate *m, int num_migrate)
+		 struct xfrm_migrate *m, int num_migrate,
+		 struct xfrm_kmaddress *k)
 {
 	int i, err, nx_cur = 0, nx_new = 0;
 	struct xfrm_policy *pol = NULL;
@@ -2723,7 +2724,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	}
 
 	/* Stage 5 - announce */
-	km_migrate(sel, dir, type, m, num_migrate);
+	km_migrate(sel, dir, type, m, num_migrate, k);
 
 	xfrm_pol_put(pol);
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 747fd8c291a..508337f9724 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1814,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired);
 
 #ifdef CONFIG_XFRM_MIGRATE
 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-	       struct xfrm_migrate *m, int num_migrate)
+	       struct xfrm_migrate *m, int num_migrate,
+	       struct xfrm_kmaddress *k)
 {
 	int err = -EINVAL;
 	int ret;
@@ -1823,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
 		if (km->migrate) {
-			ret = km->migrate(sel, dir, type, m, num_migrate);
+			ret = km->migrate(sel, dir, type, m, num_migrate, k);
 			if (!ret)
 				err = ret;
 		}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 76f75df21e1..4a8a1abb59e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1710,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 #ifdef CONFIG_XFRM_MIGRATE
 static int copy_from_user_migrate(struct xfrm_migrate *ma,
+				  struct xfrm_kmaddress *k,
 				  struct nlattr **attrs, int *num)
 {
 	struct nlattr *rt = attrs[XFRMA_MIGRATE];
 	struct xfrm_user_migrate *um;
 	int i, num_migrate;
 
+	if (k != NULL) {
+		struct xfrm_user_kmaddress *uk;
+
+		uk = nla_data(attrs[XFRMA_KMADDRESS]);
+		memcpy(&k->local, &uk->local, sizeof(k->local));
+		memcpy(&k->remote, &uk->remote, sizeof(k->remote));
+		k->family = uk->family;
+		k->reserved = uk->reserved;
+	}
+
 	um = nla_data(rt);
 	num_migrate = nla_len(rt) / sizeof(*um);
 
@@ -1745,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct xfrm_userpolicy_id *pi = nlmsg_data(nlh);
 	struct xfrm_migrate m[XFRM_MAX_DEPTH];
+	struct xfrm_kmaddress km, *kmp;
 	u8 type;
 	int err;
 	int n = 0;
@@ -1752,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (attrs[XFRMA_MIGRATE] == NULL)
 		return -EINVAL;
 
+	kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL;
+
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
 		return err;
 
-	err = copy_from_user_migrate((struct xfrm_migrate *)m,
-				     attrs, &n);
+	err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n);
 	if (err)
 		return err;
 
 	if (!n)
 		return 0;
 
-	xfrm_migrate(&pi->sel, pi->dir, type, m, n);
+	xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp);
 
 	return 0;
 }
@@ -1795,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)
 	return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um);
 }
 
-static inline size_t xfrm_migrate_msgsize(int num_migrate)
+static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb)
+{
+	struct xfrm_user_kmaddress uk;
+
+	memset(&uk, 0, sizeof(uk));
+	uk.family = k->family;
+	uk.reserved = k->reserved;
+	memcpy(&uk.local, &k->local, sizeof(uk.local));
+	memcpy(&uk.remote, &k->local, sizeof(uk.remote));
+
+	return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
+}
+
+static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
-	       + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate)
-	       + userpolicy_type_attrsize();
+	      + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
+	      + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate)
+	      + userpolicy_type_attrsize();
 }
 
 static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
-			 int num_migrate, struct xfrm_selector *sel,
-			 u8 dir, u8 type)
+			 int num_migrate, struct xfrm_kmaddress *k,
+			 struct xfrm_selector *sel, u8 dir, u8 type)
 {
 	struct xfrm_migrate *mp;
 	struct xfrm_userpolicy_id *pol_id;
@@ -1821,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 	memcpy(&pol_id->sel, sel, sizeof(pol_id->sel));
 	pol_id->dir = dir;
 
+	if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0))
+			goto nlmsg_failure;
+
 	if (copy_to_user_policy_type(type, skb) < 0)
 		goto nlmsg_failure;
 
@@ -1836,23 +1866,25 @@ nlmsg_failure:
 }
 
 static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			     struct xfrm_migrate *m, int num_migrate)
+			     struct xfrm_migrate *m, int num_migrate,
+			     struct xfrm_kmaddress *k)
 {
 	struct sk_buff *skb;
 
-	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC);
+	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
 
 	/* build migrate */
-	if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0)
+	if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
 		BUG();
 
 	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
 }
 #else
 static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			     struct xfrm_migrate *m, int num_migrate)
+			     struct xfrm_migrate *m, int num_migrate,
+			     struct xfrm_kmaddress *k)
 {
 	return -ENOPROTOOPT;
 }
@@ -1901,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_COADDR]		= { .len = sizeof(xfrm_address_t) },
 	[XFRMA_POLICY_TYPE]	= { .len = sizeof(struct xfrm_userpolicy_type)},
 	[XFRMA_MIGRATE]		= { .len = sizeof(struct xfrm_user_migrate) },
+	[XFRMA_KMADDRESS]	= { .len = sizeof(struct xfrm_user_kmaddress) },
 };
 
 static struct xfrm_link {
-- 
cgit v1.2.3-70-g09d2


From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 6 Oct 2008 09:54:39 -0700
Subject: pkt_sched: Fix handling of gso skbs on requeuing

Jay Cliburn noticed and diagnosed a bug triggered in
dev_gso_skb_destructor() after last change from qdisc->gso_skb
to qdisc->requeue list. Since gso_segmented skbs can't be queued
to another list this patch brings back qdisc->gso_skb for them.

Reported-by: Jay Cliburn <jcliburn@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/sch_generic.c   | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3b983e8a055..3fe49d80895 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -52,6 +52,7 @@ struct Qdisc
 	u32			parent;
 	atomic_t		refcnt;
 	unsigned long		state;
+	struct sk_buff		*gso_skb;
 	struct sk_buff_head	requeue;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5e7e0bd38fe..3db4cf1bd26 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,7 +44,10 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	__skb_queue_head(&q->requeue, skb);
+	if (unlikely(skb->next))
+		q->gso_skb = skb;
+	else
+		__skb_queue_head(&q->requeue, skb);
 
 	__netif_schedule(q);
 	return 0;
@@ -52,7 +55,10 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
-	struct sk_buff *skb = skb_peek(&q->requeue);
+	struct sk_buff *skb = q->gso_skb;
+
+	if (!skb)
+		skb = skb_peek(&q->requeue);
 
 	if (unlikely(skb)) {
 		struct net_device *dev = qdisc_dev(q);
@@ -60,10 +66,15 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
-			__skb_unlink(skb, &q->requeue);
-		else
+		if (!netif_tx_queue_stopped(txq) &&
+		    !netif_tx_queue_frozen(txq)) {
+			if (q->gso_skb)
+				q->gso_skb = NULL;
+			else
+				__skb_unlink(skb, &q->requeue);
+		} else {
 			skb = NULL;
+		}
 	} else {
 		skb = q->dequeue(q);
 	}
@@ -548,6 +559,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
+	kfree_skb(qdisc->gso_skb);
 	__skb_queue_purge(&qdisc->requeue);
 
 	kfree((char *) qdisc - qdisc->padded);
-- 
cgit v1.2.3-70-g09d2


From 6252352d16f7b45a0fd42224f7e70e0288dc4480 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 6 Oct 2008 10:41:50 -0700
Subject: pkt_sched: Simplify dev_requeue_skb and dequeue_skb

qdisc->requeue was planned to universally replace all requeuing code,
but at the top level we never requeue more than one skb, so qdisc->
gso_skb is enough for this. qdisc->requeue would be used on the lower
levels only for one level deep requeuing (like in sch_hfsc) after
finishing all the changes.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3db4cf1bd26..31f6b614b59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,12 +44,9 @@ static inline int qdisc_qlen(struct Qdisc *q)
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	if (unlikely(skb->next))
-		q->gso_skb = skb;
-	else
-		__skb_queue_head(&q->requeue, skb);
-
+	q->gso_skb = skb;
 	__netif_schedule(q);
+
 	return 0;
 }
 
@@ -57,24 +54,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 {
 	struct sk_buff *skb = q->gso_skb;
 
-	if (!skb)
-		skb = skb_peek(&q->requeue);
-
 	if (unlikely(skb)) {
 		struct net_device *dev = qdisc_dev(q);
 		struct netdev_queue *txq;
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) &&
-		    !netif_tx_queue_frozen(txq)) {
-			if (q->gso_skb)
-				q->gso_skb = NULL;
-			else
-				__skb_unlink(skb, &q->requeue);
-		} else {
+		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+			q->gso_skb = NULL;
+		else
 			skb = NULL;
-		}
 	} else {
 		skb = q->dequeue(q);
 	}
-- 
cgit v1.2.3-70-g09d2


From c7004482e8dcb7c3c72666395cfa98a216a4fb70 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Oct 2008 10:43:54 -0700
Subject: tcp: Respect SO_RCVLOWAT in tcp_poll().

Based upon a report by Vito Caputo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ab341e5d3e..7d81a1ee550 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -384,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 	/* Connected? */
 	if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+		int target = sock_rcvlowat(sk, 0, INT_MAX);
+
+		if (tp->urg_seq == tp->copied_seq &&
+		    !sock_flag(sk, SOCK_URGINLINE) &&
+		    tp->urg_data)
+			target--;
+
 		/* Potential race condition. If read of tp below will
 		 * escape above sk->sk_state, we can be illegally awaken
 		 * in SYN_* states. */
-		if ((tp->rcv_nxt != tp->copied_seq) &&
-		    (tp->urg_seq != tp->copied_seq ||
-		     tp->rcv_nxt != tp->copied_seq + 1 ||
-		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+		if (tp->rcv_nxt - tp->copied_seq >= target)
 			mask |= POLLIN | POLLRDNORM;
 
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-- 
cgit v1.2.3-70-g09d2


From 64be8608c163bd480cf5ec4b34366f11e0f3c87f Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Mon, 6 Oct 2008 14:45:18 -0500
Subject: svcrdma: Add FRMR get/put services

Add services for the allocating, freeing, and unmapping Fast Reg MR. These
services will be used by the transport connection setup, send and receive
routines.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   3 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 116 +++++++++++++++++++++++++++++--
 2 files changed, 114 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 49e458d9894..34252683671 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -214,6 +214,9 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
 extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
+extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
+			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern struct svc_xprt_class svc_rdma_class;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 900cb69728c..f0b5c5f2f62 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -100,6 +100,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	ctxt->xprt = xprt;
 	INIT_LIST_HEAD(&ctxt->dto_q);
 	ctxt->count = 0;
+	ctxt->frmr = NULL;
 	atomic_inc(&xprt->sc_ctxt_used);
 	return ctxt;
 }
@@ -109,11 +110,19 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	int i;
 	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
-		atomic_dec(&xprt->sc_dma_used);
-		ib_dma_unmap_single(xprt->sc_cm_id->device,
-				    ctxt->sge[i].addr,
-				    ctxt->sge[i].length,
-				    ctxt->direction);
+		/*
+		 * Unmap the DMA addr in the SGE if the lkey matches
+		 * the sc_dma_lkey, otherwise, ignore it since it is
+		 * an FRMR lkey and will be unmapped later when the
+		 * last WR that uses it completes.
+		 */
+		if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+			atomic_dec(&xprt->sc_dma_used);
+			ib_dma_unmap_single(xprt->sc_cm_id->device,
+					    ctxt->sge[i].addr,
+					    ctxt->sge[i].length,
+					    ctxt->direction);
+		}
 	}
 }
 
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
 	map->count = 0;
+	map->frmr = NULL;
 	return map;
 }
 
@@ -425,10 +435,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
+	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 
 	cma_xprt->sc_ord = svcrdma_ord;
 
@@ -686,6 +698,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	return ERR_PTR(ret);
 }
 
+static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
+{
+	struct ib_mr *mr;
+	struct ib_fast_reg_page_list *pl;
+	struct svc_rdma_fastreg_mr *frmr;
+
+	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
+	if (!frmr)
+		goto err;
+
+	mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+	if (!mr)
+		goto err_free_frmr;
+
+	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
+					 RPCSVC_MAXPAGES);
+	if (!pl)
+		goto err_free_mr;
+
+	frmr->mr = mr;
+	frmr->page_list = pl;
+	INIT_LIST_HEAD(&frmr->frmr_list);
+	return frmr;
+
+ err_free_mr:
+	ib_dereg_mr(mr);
+ err_free_frmr:
+	kfree(frmr);
+ err:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
+{
+	struct svc_rdma_fastreg_mr *frmr;
+
+	while (!list_empty(&xprt->sc_frmr_q)) {
+		frmr = list_entry(xprt->sc_frmr_q.next,
+				  struct svc_rdma_fastreg_mr, frmr_list);
+		list_del_init(&frmr->frmr_list);
+		ib_dereg_mr(frmr->mr);
+		ib_free_fast_reg_page_list(frmr->page_list);
+		kfree(frmr);
+	}
+}
+
+struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_fastreg_mr *frmr = NULL;
+
+	spin_lock_bh(&rdma->sc_frmr_q_lock);
+	if (!list_empty(&rdma->sc_frmr_q)) {
+		frmr = list_entry(rdma->sc_frmr_q.next,
+				  struct svc_rdma_fastreg_mr, frmr_list);
+		list_del_init(&frmr->frmr_list);
+		frmr->map_len = 0;
+		frmr->page_list_len = 0;
+	}
+	spin_unlock_bh(&rdma->sc_frmr_q_lock);
+	if (frmr)
+		return frmr;
+
+	return rdma_alloc_frmr(rdma);
+}
+
+static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
+			   struct svc_rdma_fastreg_mr *frmr)
+{
+	int page_no;
+	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+		dma_addr_t addr = frmr->page_list->page_list[page_no];
+		if (ib_dma_mapping_error(frmr->mr->device, addr))
+			continue;
+		atomic_dec(&xprt->sc_dma_used);
+		ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
+				    frmr->direction);
+	}
+}
+
+void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
+		       struct svc_rdma_fastreg_mr *frmr)
+{
+	if (frmr) {
+		frmr_unmap_dma(rdma, frmr);
+		spin_lock_bh(&rdma->sc_frmr_q_lock);
+		BUG_ON(!list_empty(&frmr->frmr_list));
+		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
+		spin_unlock_bh(&rdma->sc_frmr_q_lock);
+	}
+}
+
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -961,6 +1064,9 @@ static void __svc_rdma_free(struct work_struct *work)
 	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
 	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
 
+	/* De-allocate fastreg mr */
+	rdma_dealloc_frmr_q(rdma);
+
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
 		ib_destroy_qp(rdma->sc_qp);
-- 
cgit v1.2.3-70-g09d2


From 3a5c63803d0552a3ad93b85c262f12cd86471443 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Tue, 30 Sep 2008 13:46:13 -0500
Subject: svcrdma: Query device for Fast Reg support during connection setup

Query the device capabilities in the svc_rdma_accept function to determine
what advanced memory management capabilities are supported by the device.
Based on the query, select the most secure model available given the
requirements of the transport and capabilities of the adapter.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 76 +++++++++++++++++++++++++++++---
 1 file changed, 70 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f0b5c5f2f62..a8ec4b1eec5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -807,6 +807,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct rdma_conn_param conn_param;
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device_attr devattr;
+	int dma_mr_acc;
+	int need_dma_mr;
 	int ret;
 	int i;
 
@@ -922,15 +924,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	}
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-	/* Register all of physical memory */
-	newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
-					    IB_ACCESS_LOCAL_WRITE |
-					    IB_ACCESS_REMOTE_WRITE);
-	if (IS_ERR(newxprt->sc_phys_mr)) {
-		dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
+	/*
+	 * Use the most secure set of MR resources based on the
+	 * transport type and available memory management features in
+	 * the device. Here's the table implemented below:
+	 *
+	 *		Fast	Global	DMA	Remote WR
+	 *		Reg	LKEY	MR	Access
+	 *		Sup'd	Sup'd	Needed	Needed
+	 *
+	 * IWARP	N	N	Y	Y
+	 *		N	Y	Y	Y
+	 *		Y	N	Y	N
+	 *		Y	Y	N	-
+	 *
+	 * IB		N	N	Y	N
+	 *		N	Y	N	-
+	 *		Y	N	Y	N
+	 *		Y	Y	N	-
+	 *
+	 * NB:	iWARP requires remote write access for the data sink
+	 *	of an RDMA_READ. IB does not.
+	 */
+	if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+		newxprt->sc_frmr_pg_list_len =
+			devattr.max_fast_reg_page_list_len;
+		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+	}
+
+	/*
+	 * Determine if a DMA MR is required and if so, what privs are required
+	 */
+	switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
+	case RDMA_TRANSPORT_IWARP:
+		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
+		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+			need_dma_mr = 1;
+			dma_mr_acc =
+				(IB_ACCESS_LOCAL_WRITE |
+				 IB_ACCESS_REMOTE_WRITE);
+		} else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else
+			need_dma_mr = 0;
+		break;
+	case RDMA_TRANSPORT_IB:
+		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else
+			need_dma_mr = 0;
+		break;
+	default:
 		goto errout;
 	}
 
+	/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
+	if (need_dma_mr) {
+		/* Register all of physical memory */
+		newxprt->sc_phys_mr =
+			ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
+		if (IS_ERR(newxprt->sc_phys_mr)) {
+			dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
+				ret);
+			goto errout;
+		}
+		newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
+	} else
+		newxprt->sc_dma_lkey =
+			newxprt->sc_cm_id->device->local_dma_lkey;
+
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
 		ret = svc_rdma_post_recv(newxprt);
-- 
cgit v1.2.3-70-g09d2


From e1183210625cc8e02ce13eec78fb7a246567fc59 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Fri, 3 Oct 2008 15:22:18 -0500
Subject: svcrdma: Add a service to register a Fast Reg MR with the device

Fast Reg MR introduces a new WR type. Add a service to register the
region with the adapter and update the completion handling to support
completions with a NULL WR context.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   1 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 111 +++++++++++++++++++++----------
 2 files changed, 77 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 34252683671..1402d193b39 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -214,6 +214,7 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
 extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
 			      struct svc_rdma_fastreg_mr *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index a8ec4b1eec5..c3e8db0eeb3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -325,6 +325,45 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 		svc_xprt_enqueue(&xprt->sc_xprt);
 }
 
+/*
+ * Processs a completion context
+ */
+static void process_context(struct svcxprt_rdma *xprt,
+			    struct svc_rdma_op_ctxt *ctxt)
+{
+	svc_rdma_unmap_dma(ctxt);
+
+	switch (ctxt->wr_op) {
+	case IB_WR_SEND:
+		svc_rdma_put_context(ctxt, 1);
+		break;
+
+	case IB_WR_RDMA_WRITE:
+		svc_rdma_put_context(ctxt, 0);
+		break;
+
+	case IB_WR_RDMA_READ:
+		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+			BUG_ON(!read_hdr);
+			spin_lock_bh(&xprt->sc_rq_dto_lock);
+			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+			list_add_tail(&read_hdr->dto_q,
+				      &xprt->sc_read_complete_q);
+			spin_unlock_bh(&xprt->sc_rq_dto_lock);
+			svc_xprt_enqueue(&xprt->sc_xprt);
+		}
+		svc_rdma_put_context(ctxt, 0);
+		break;
+
+	default:
+		printk(KERN_ERR "svcrdma: unexpected completion type, "
+		       "opcode=%d\n",
+		       ctxt->wr_op);
+		break;
+	}
+}
+
 /*
  * Send Queue Completion Handler - potentially called on interrupt context.
  *
@@ -337,17 +376,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 	struct ib_cq *cq = xprt->sc_sq_cq;
 	int ret;
 
-
 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
 		return;
 
 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
 	atomic_inc(&rdma_stat_sq_poll);
 	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		xprt = ctxt->xprt;
-
-		svc_rdma_unmap_dma(ctxt);
 		if (wc.status != IB_WC_SUCCESS)
 			/* Close the transport */
 			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -356,35 +390,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 		atomic_dec(&xprt->sc_sq_count);
 		wake_up(&xprt->sc_send_wait);
 
-		switch (ctxt->wr_op) {
-		case IB_WR_SEND:
-			svc_rdma_put_context(ctxt, 1);
-			break;
-
-		case IB_WR_RDMA_WRITE:
-			svc_rdma_put_context(ctxt, 0);
-			break;
-
-		case IB_WR_RDMA_READ:
-			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-				struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-				BUG_ON(!read_hdr);
-				spin_lock_bh(&xprt->sc_rq_dto_lock);
-				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-				list_add_tail(&read_hdr->dto_q,
-					      &xprt->sc_read_complete_q);
-				spin_unlock_bh(&xprt->sc_rq_dto_lock);
-				svc_xprt_enqueue(&xprt->sc_xprt);
-			}
-			svc_rdma_put_context(ctxt, 0);
-			break;
+		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
+		if (ctxt)
+			process_context(xprt, ctxt);
 
-		default:
-			printk(KERN_ERR "svcrdma: unexpected completion type, "
-			       "opcode=%d, status=%d\n",
-			       wc.opcode, wc.status);
-			break;
-		}
 		svc_xprt_put(&xprt->sc_xprt);
 	}
 
@@ -1184,6 +1193,40 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+/*
+ * Attempt to register the kvec representing the RPC memory with the
+ * device.
+ *
+ * Returns:
+ *  NULL : The device does not support fastreg or there were no more
+ *         fastreg mr.
+ *  frmr : The kvec register request was successfully posted.
+ *    <0 : An error was encountered attempting to register the kvec.
+ */
+int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
+		     struct svc_rdma_fastreg_mr *frmr)
+{
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof fastreg_wr);
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	return svc_rdma_send(xprt, &fastreg_wr);
+}
+
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
 	struct ib_send_wr *bad_wr;
@@ -1193,8 +1236,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 		return -ENOTCONN;
 
 	BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
-	BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
-		wr->opcode);
 	/* If the SQ is full, wait until an SQ entry is available */
 	while (1) {
 		spin_lock_bh(&xprt->sc_lock);
-- 
cgit v1.2.3-70-g09d2


From a5abf4e81545d9c7280c49cae853cc45fd769ddf Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Tue, 30 Sep 2008 14:05:41 -0500
Subject: svcrdma: Modify post recv path to use local dma key

Update the svc_rdma_post_recv routine to use the adapter's global LKEY
instead of sc_phys_mr which is only valid when using a DMA MR.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c3e8db0eeb3..d9183cbcd96 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -483,7 +483,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	struct ib_recv_wr recv_wr, *bad_recv_wr;
 	struct svc_rdma_op_ctxt *ctxt;
 	struct page *page;
-	unsigned long pa;
+	dma_addr_t pa;
 	int sge_no;
 	int buflen;
 	int ret;
@@ -495,13 +495,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
-		atomic_inc(&xprt->sc_dma_used);
 		pa = ib_dma_map_page(xprt->sc_cm_id->device,
 				     page, 0, PAGE_SIZE,
 				     DMA_FROM_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
+			goto err_put_ctxt;
+		atomic_inc(&xprt->sc_dma_used);
 		ctxt->sge[sge_no].addr = pa;
 		ctxt->sge[sge_no].length = PAGE_SIZE;
-		ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
 		buflen += PAGE_SIZE;
 	}
 	ctxt->count = sge_no;
@@ -517,6 +519,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		svc_rdma_put_context(ctxt, 1);
 	}
 	return ret;
+
+ err_put_ctxt:
+	svc_rdma_put_context(ctxt, 1);
+	return -ENOMEM;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 5b180a9a64ca2217a658bd515ef910eafefc5e5a Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Mon, 11 Aug 2008 14:10:19 -0500
Subject: svcrdma: Add support to svc_rdma_send to handle chained WR

WR can be submitted as linked lists of WR. Update the svc_rdma_send
routine to handle WR chains. This will be used to submit a WR that
uses an FRMR with another WR that invalidates the FRMR.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d9183cbcd96..f22f5876766 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1235,17 +1235,23 @@ int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
 
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
-	struct ib_send_wr *bad_wr;
+	struct ib_send_wr *bad_wr, *n_wr;
+	int wr_count;
+	int i;
 	int ret;
 
 	if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
 		return -ENOTCONN;
 
 	BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
+	wr_count = 1;
+	for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
+		wr_count++;
+
 	/* If the SQ is full, wait until an SQ entry is available */
 	while (1) {
 		spin_lock_bh(&xprt->sc_lock);
-		if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
+		if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
 			spin_unlock_bh(&xprt->sc_lock);
 			atomic_inc(&rdma_stat_sq_starve);
 
@@ -1260,19 +1266,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 				return 0;
 			continue;
 		}
-		/* Bumped used SQ WR count and post */
-		svc_xprt_get(&xprt->sc_xprt);
+		/* Take a transport ref for each WR posted */
+		for (i = 0; i < wr_count; i++)
+			svc_xprt_get(&xprt->sc_xprt);
+
+		/* Bump used SQ WR count and post */
+		atomic_add(wr_count, &xprt->sc_sq_count);
 		ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
-		if (!ret)
-			atomic_inc(&xprt->sc_sq_count);
-		else {
-			svc_xprt_put(&xprt->sc_xprt);
+		if (ret) {
+			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			atomic_sub(wr_count, &xprt->sc_sq_count);
+			for (i = 0; i < wr_count; i ++)
+				svc_xprt_put(&xprt->sc_xprt);
 			dprintk("svcrdma: failed to post SQ WR rc=%d, "
 			       "sc_sq_count=%d, sc_sq_depth=%d\n",
 			       ret, atomic_read(&xprt->sc_sq_count),
 			       xprt->sc_sq_depth);
 		}
 		spin_unlock_bh(&xprt->sc_lock);
+		if (ret)
+			wake_up(&xprt->sc_send_wait);
 		break;
 	}
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 146b6df6a537939570c5772ebd7db826fdbd5d82 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Tue, 12 Aug 2008 15:12:10 -0500
Subject: svcrdma: Modify the RPC recv path to use FRMR when available

RPCRDMA requests that specify a read-list are fetched with RDMA_READ. Using
an FRMR to map the data sink improves NFSRDMA security on transports that
place the RDMA_READ data sink LKEY on the wire because the valid lifetime
of the MR is only the duration of the RDMA_READ. The LKEY is invalidated
when the last RDMA_READ WR completes.

Mapping the data sink also allows for very large amounts to data to be
fetched with a single WR, so if the client is also using FRMR, the entire
RPC read-list can be fetched with a single WR.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 187 +++++++++++++++++++++++++++----
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   5 +-
 3 files changed, 171 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 1402d193b39..c14fe86dac5 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -212,6 +212,7 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
+extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
 extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
 extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
 extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 74de31a0661..a4756576d68 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
  *
  * Assumptions:
  * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contigous and consists of
+ * - pages[] is not physically or virtually contiguous and consists of
  *   PAGE_SIZE elements.
  *
  * Output:
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
  *   chunk in the read list
  *
  */
-static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
+static int map_read_chunks(struct svcxprt_rdma *xprt,
 			   struct svc_rqst *rqstp,
 			   struct svc_rdma_op_ctxt *head,
 			   struct rpcrdma_msg *rmsgp,
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	return sge_no;
 }
 
-static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
-			      struct svc_rdma_op_ctxt *ctxt,
-			      struct kvec *vec,
-			      u64 *sgl_offset,
-			      int count)
+/* Map a read-chunk-list to an XDR and fast register the page-list.
+ *
+ * Assumptions:
+ * - chunk[0]	position points to pages[0] at an offset of 0
+ * - pages[]	will be made physically contiguous by creating a one-off memory
+ *		region using the fastreg verb.
+ * - byte_count is # of bytes in read-chunk-list
+ * - ch_count	is # of chunks in read-chunk-list
+ *
+ * Output:
+ * - sge array pointing into pages[] array.
+ * - chunk_sge array specifying sge index and count for each
+ *   chunk in the read list
+ */
+static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+				struct svc_rqst *rqstp,
+				struct svc_rdma_op_ctxt *head,
+				struct rpcrdma_msg *rmsgp,
+				struct svc_rdma_req_map *rpl_map,
+				struct svc_rdma_req_map *chl_map,
+				int ch_count,
+				int byte_count)
+{
+	int page_no;
+	int ch_no;
+	u32 offset;
+	struct rpcrdma_read_chunk *ch;
+	struct svc_rdma_fastreg_mr *frmr;
+	int ret = 0;
+
+	frmr = svc_rdma_get_frmr(xprt);
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+
+	head->frmr = frmr;
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count; /* save count of hdr pages */
+	head->arg.page_base = 0;
+	head->arg.page_len = byte_count;
+	head->arg.len = rqstp->rq_arg.len + byte_count;
+	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+
+	/* Fast register the page list */
+	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+	frmr->direction = DMA_FROM_DEVICE;
+	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+	frmr->map_len = byte_count;
+	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
+	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  page_address(rqstp->rq_arg.pages[page_no]),
+					  PAGE_SIZE, DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+		atomic_inc(&xprt->sc_dma_used);
+		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+	}
+	head->count += page_no;
+
+	/* rq_respages points one past arg pages */
+	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+
+	/* Create the reply and chunk maps */
+	offset = 0;
+	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+	for (ch_no = 0; ch_no < ch_count; ch_no++) {
+		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
+		rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
+		chl_map->ch[ch_no].count = 1;
+		chl_map->ch[ch_no].start = ch_no;
+		offset += ch->rc_target.rs_length;
+		ch++;
+	}
+
+	ret = svc_rdma_fastreg(xprt, frmr);
+	if (ret)
+		goto fatal_err;
+
+	return ch_no;
+
+ fatal_err:
+	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
+	svc_rdma_put_frmr(xprt, frmr);
+	return -EIO;
+}
+
+static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+			     struct svc_rdma_op_ctxt *ctxt,
+			     struct svc_rdma_fastreg_mr *frmr,
+			     struct kvec *vec,
+			     u64 *sgl_offset,
+			     int count)
 {
 	int i;
 
 	ctxt->count = count;
 	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
-		atomic_inc(&xprt->sc_dma_used);
-		ctxt->sge[i].addr =
-			ib_dma_map_single(xprt->sc_cm_id->device,
-					  vec[i].iov_base, vec[i].iov_len,
-					  DMA_FROM_DEVICE);
+		ctxt->sge[i].length = 0; /* in case map fails */
+		if (!frmr) {
+			ctxt->sge[i].addr =
+				ib_dma_map_single(xprt->sc_cm_id->device,
+						  vec[i].iov_base,
+						  vec[i].iov_len,
+						  DMA_FROM_DEVICE);
+			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+						 ctxt->sge[i].addr))
+				return -EINVAL;
+			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
+			atomic_inc(&xprt->sc_dma_used);
+		} else {
+			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
+			ctxt->sge[i].lkey = frmr->mr->lkey;
+		}
 		ctxt->sge[i].length = vec[i].iov_len;
-		ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
 		*sgl_offset = *sgl_offset + vec[i].iov_len;
 	}
+	return 0;
 }
 
 static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 			 struct svc_rdma_op_ctxt *hdr_ctxt)
 {
 	struct ib_send_wr read_wr;
+	struct ib_send_wr inv_wr;
 	int err = 0;
 	int ch_no;
 	int ch_count;
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
-	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
-				    rpl_map, chl_map,
-				    ch_count, byte_count);
+
+	if (!xprt->sc_frmr_pg_list_len)
+		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+					    rpl_map, chl_map, ch_count,
+					    byte_count);
+	else
+		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+						 rpl_map, chl_map, ch_count,
+						 byte_count);
+	if (sge_count < 0) {
+		err = -EIO;
+		goto out;
+	}
+
 	sgl_offset = 0;
 	ch_no = 0;
 
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 next_sge:
 		ctxt = svc_rdma_get_context(xprt);
 		ctxt->direction = DMA_FROM_DEVICE;
+		ctxt->frmr = hdr_ctxt->frmr;
+		ctxt->read_hdr = NULL;
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 		/* Prepare READ WR */
 		memset(&read_wr, 0, sizeof read_wr);
-		ctxt->wr_op = IB_WR_RDMA_READ;
 		read_wr.wr_id = (unsigned long)ctxt;
 		read_wr.opcode = IB_WR_RDMA_READ;
+		ctxt->wr_op = read_wr.opcode;
 		read_wr.send_flags = IB_SEND_SIGNALED;
 		read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
 		read_wr.wr.rdma.remote_addr =
@@ -327,10 +444,15 @@ next_sge:
 		read_wr.sg_list = ctxt->sge;
 		read_wr.num_sge =
 			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
-		rdma_set_ctxt_sge(xprt, ctxt,
-				  &rpl_map->sge[chl_map->ch[ch_no].start],
-				  &sgl_offset,
-				  read_wr.num_sge);
+		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
+					&rpl_map->sge[chl_map->ch[ch_no].start],
+					&sgl_offset,
+					read_wr.num_sge);
+		if (err) {
+			svc_rdma_unmap_dma(ctxt);
+			svc_rdma_put_context(ctxt, 0);
+			goto out;
+		}
 		if (((ch+1)->rc_discrim == 0) &&
 		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
 			/*
@@ -339,6 +461,29 @@ next_sge:
 			 * the client and the RPC needs to be enqueued.
 			 */
 			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+			if (hdr_ctxt->frmr) {
+				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+				/*
+				 * Invalidate the local MR used to map the data
+				 * sink.
+				 */
+				if (xprt->sc_dev_caps &
+				    SVCRDMA_DEVCAP_READ_W_INV) {
+					read_wr.opcode =
+						IB_WR_RDMA_READ_WITH_INV;
+					ctxt->wr_op = read_wr.opcode;
+					read_wr.ex.invalidate_rkey =
+						ctxt->frmr->mr->lkey;
+				} else {
+					/* Prepare INVALIDATE WR */
+					memset(&inv_wr, 0, sizeof inv_wr);
+					inv_wr.opcode = IB_WR_LOCAL_INV;
+					inv_wr.send_flags = IB_SEND_SIGNALED;
+					inv_wr.ex.invalidate_rkey =
+						hdr_ctxt->frmr->mr->lkey;
+					read_wr.next = &inv_wr;
+				}
+			}
 			ctxt->read_hdr = hdr_ctxt;
 		}
 		/* Post the read */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f22f5876766..fb0dff5e53e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -105,7 +105,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	return ctxt;
 }
 
-static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
+void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 {
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	int i;
@@ -343,9 +343,12 @@ static void process_context(struct svcxprt_rdma *xprt,
 		break;
 
 	case IB_WR_RDMA_READ:
+	case IB_WR_RDMA_READ_WITH_INV:
 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
 			BUG_ON(!read_hdr);
+			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+				svc_rdma_put_frmr(xprt, ctxt->frmr);
 			spin_lock_bh(&xprt->sc_rq_dto_lock);
 			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 			list_add_tail(&read_hdr->dto_q,
-- 
cgit v1.2.3-70-g09d2


From afd566ea080572499cc01d42d2f578bf4b54f20f Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Fri, 3 Oct 2008 15:45:03 -0500
Subject: svcrdma: Modify the RPC reply path to use FRMR when available

Use FRMR to map local RPC reply data. This allows RDMA_WRITE to send reply
data using a single WR. The FRMR is invalidated by linking the LOCAL_INV WR
to the RDMA_SEND message used to complete the reply.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 255 ++++++++++++++++++++++++++-----
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   2 +
 2 files changed, 217 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 84d328329d9..9a7a8e7ae03 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,9 +69,127 @@
  * array is only concerned with the reply we are assured that we have
  * on extra page for the RPCRMDA header.
  */
-static void xdr_to_sge(struct svcxprt_rdma *xprt,
-		       struct xdr_buf *xdr,
-		       struct svc_rdma_req_map *vec)
+int fast_reg_xdr(struct svcxprt_rdma *xprt,
+		 struct xdr_buf *xdr,
+		 struct svc_rdma_req_map *vec)
+{
+	int sge_no;
+	u32 sge_bytes;
+	u32 page_bytes;
+	u32 page_off;
+	int page_no = 0;
+	u8 *frva;
+	struct svc_rdma_fastreg_mr *frmr;
+
+	frmr = svc_rdma_get_frmr(xprt);
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+	vec->frmr = frmr;
+
+	/* Skip the RPCRDMA header */
+	sge_no = 1;
+
+	/* Map the head. */
+	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
+	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
+	vec->count = 2;
+	sge_no++;
+
+	/* Build the FRMR */
+	frmr->kva = frva;
+	frmr->direction = DMA_TO_DEVICE;
+	frmr->access_flags = 0;
+	frmr->map_len = PAGE_SIZE;
+	frmr->page_list_len = 1;
+	frmr->page_list->page_list[page_no] =
+		ib_dma_map_single(xprt->sc_cm_id->device,
+				  (void *)xdr->head[0].iov_base,
+				  PAGE_SIZE, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+				 frmr->page_list->page_list[page_no]))
+		goto fatal_err;
+	atomic_inc(&xprt->sc_dma_used);
+
+	page_off = xdr->page_base;
+	page_bytes = xdr->page_len + page_off;
+	if (!page_bytes)
+		goto encode_tail;
+
+	/* Map the pages */
+	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+	vec->sge[sge_no].iov_len = page_bytes;
+	sge_no++;
+	while (page_bytes) {
+		struct page *page;
+
+		page = xdr->pages[page_no++];
+		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
+		page_bytes -= sge_bytes;
+
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
+					  PAGE_SIZE, DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+
+		atomic_inc(&xprt->sc_dma_used);
+		page_off = 0; /* reset for next time through loop */
+		frmr->map_len += PAGE_SIZE;
+		frmr->page_list_len++;
+	}
+	vec->count++;
+
+ encode_tail:
+	/* Map tail */
+	if (0 == xdr->tail[0].iov_len)
+		goto done;
+
+	vec->count++;
+	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
+
+	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
+	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
+		/*
+		 * If head and tail use the same page, we don't need
+		 * to map it again.
+		 */
+		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+	} else {
+		void *va;
+
+		/* Map another page for the tail */
+		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
+		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+
+		frmr->page_list->page_list[page_no] =
+			ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
+					  DMA_TO_DEVICE);
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 frmr->page_list->page_list[page_no]))
+			goto fatal_err;
+		atomic_inc(&xprt->sc_dma_used);
+		frmr->map_len += PAGE_SIZE;
+		frmr->page_list_len++;
+	}
+
+ done:
+	if (svc_rdma_fastreg(xprt, frmr))
+		goto fatal_err;
+
+	return 0;
+
+ fatal_err:
+	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+	svc_rdma_put_frmr(xprt, frmr);
+	return -EIO;
+}
+
+static int map_xdr(struct svcxprt_rdma *xprt,
+		   struct xdr_buf *xdr,
+		   struct svc_rdma_req_map *vec)
 {
 	int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
 	int sge_no;
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
 	BUG_ON(xdr->len !=
 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
 
+	if (xprt->sc_frmr_pg_list_len)
+		return fast_reg_xdr(xprt, xdr, vec);
+
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
 
 	BUG_ON(sge_no > sge_max);
 	vec->count = sge_no;
+	return 0;
 }
 
 /* Assumptions:
+ * - We are using FRMR
+ *     - or -
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	sge_no = 0;
 
 	/* Copy the remaining SGE */
-	while (bc != 0 && xdr_sge_no < vec->count) {
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
-		sge_bytes = min((size_t)bc,
-				(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
+	while (bc != 0) {
+		sge_bytes = min_t(size_t,
+			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
 		sge[sge_no].length = sge_bytes;
-		atomic_inc(&xprt->sc_dma_used);
-		sge[sge_no].addr =
-			ib_dma_map_single(xprt->sc_cm_id->device,
-					  (void *)
-					  vec->sge[xdr_sge_no].iov_base + sge_off,
-					  sge_bytes, DMA_TO_DEVICE);
-		if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
-					sge[sge_no].addr))
-			goto err;
+		if (!vec->frmr) {
+			sge[sge_no].addr =
+				ib_dma_map_single(xprt->sc_cm_id->device,
+						  (void *)
+						  vec->sge[xdr_sge_no].iov_base + sge_off,
+						  sge_bytes, DMA_TO_DEVICE);
+			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+						 sge[sge_no].addr))
+				goto err;
+			atomic_inc(&xprt->sc_dma_used);
+			sge[sge_no].lkey = xprt->sc_dma_lkey;
+		} else {
+			sge[sge_no].addr = (unsigned long)
+				vec->sge[xdr_sge_no].iov_base + sge_off;
+			sge[sge_no].lkey = vec->frmr->mr->lkey;
+		}
+		ctxt->count++;
+		ctxt->frmr = vec->frmr;
 		sge_off = 0;
 		sge_no++;
-		ctxt->count++;
 		xdr_sge_no++;
+		BUG_ON(xdr_sge_no > vec->count);
 		bc -= sge_bytes;
 	}
 
-	BUG_ON(bc != 0);
-	BUG_ON(xdr_sge_no > vec->count);
-
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
 	ctxt->wr_op = IB_WR_RDMA_WRITE;
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+	if (vec->frmr)
+		max_write = vec->frmr->map_len;
+	else
+		max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* Write chunks start at the pagelist */
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+	if (vec->frmr)
+		max_write = vec->frmr->map_len;
+	else
+		max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* xdr offset starts at RPC message */
 	for (xdr_off = 0, chunk_no = 0;
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 		ch = &arg_ary->wc_array[chunk_no].wc_target;
 		write_len = min(xfer_len, ch->rs_length);
 
-
 		/* Prepare the reply chunk given the length actually
 		 * written */
 		rs_offset = get_unaligned(&(ch->rs_offset));
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
+	struct ib_send_wr inv_wr;
 	int sge_no;
 	int sge_bytes;
 	int page_no;
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	/* Prepare the context */
 	ctxt->pages[0] = page;
 	ctxt->count = 1;
+	ctxt->frmr = vec->frmr;
+	if (vec->frmr)
+		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
-	atomic_inc(&rdma->sc_dma_used);
 	ctxt->sge[0].addr =
 		ib_dma_map_page(rdma->sc_cm_id->device,
 				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+		goto err;
+	atomic_inc(&rdma->sc_dma_used);
+
 	ctxt->direction = DMA_TO_DEVICE;
+
 	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
-	ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
+	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
 
 	/* Determine how many of our SGE are to be transmitted */
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
-		atomic_inc(&rdma->sc_dma_used);
-		ctxt->sge[sge_no].addr =
-			ib_dma_map_single(rdma->sc_cm_id->device,
-					  vec->sge[sge_no].iov_base,
-					  sge_bytes, DMA_TO_DEVICE);
+		if (!vec->frmr) {
+			ctxt->sge[sge_no].addr =
+				ib_dma_map_single(rdma->sc_cm_id->device,
+						  vec->sge[sge_no].iov_base,
+						  sge_bytes, DMA_TO_DEVICE);
+			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+						 ctxt->sge[sge_no].addr))
+				goto err;
+			atomic_inc(&rdma->sc_dma_used);
+			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+		} else {
+			ctxt->sge[sge_no].addr = (unsigned long)
+				vec->sge[sge_no].iov_base;
+			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
+		}
 		ctxt->sge[sge_no].length = sge_bytes;
-		ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
 	}
 	BUG_ON(byte_count != 0);
 
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;
-		/* If there are more pages than SGE, terminate SGE list */
+		/*
+		 * If there are more pages than SGE, terminate SGE
+		 * list so that svc_rdma_unmap_dma doesn't attempt to
+		 * unmap garbage.
+		 */
 		if (page_no+1 >= sge_no)
 			ctxt->sge[page_no+1].length = 0;
 	}
 	BUG_ON(sge_no > rdma->sc_max_sge);
+	BUG_ON(sge_no > ctxt->count);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
 	send_wr.wr_id = (unsigned long)ctxt;
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
+	if (vec->frmr) {
+		/* Prepare INVALIDATE WR */
+		memset(&inv_wr, 0, sizeof inv_wr);
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED;
+		inv_wr.ex.invalidate_rkey =
+			vec->frmr->mr->lkey;
+		send_wr.next = &inv_wr;
+	}
 
 	ret = svc_rdma_send(rdma, &send_wr);
 	if (ret)
-		svc_rdma_put_context(ctxt, 1);
+		goto err;
 
-	return ret;
+	return 0;
+
+ err:
+	svc_rdma_put_frmr(rdma, vec->frmr);
+	svc_rdma_put_context(ctxt, 1);
+	return -EIO;
 }
 
 void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->direction = DMA_TO_DEVICE;
 	vec = svc_rdma_get_req_map();
-	xdr_to_sge(rdma, &rqstp->rq_res, vec);
-
+	ret = map_xdr(rdma, &rqstp->rq_res, vec);
+	if (ret)
+		goto err0;
 	inline_bytes = rqstp->rq_res.len;
 
 	/* Create the RDMA response header */
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
 		       ret);
-		goto error;
+		goto err1;
 	}
 	inline_bytes -= ret;
 
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (ret < 0) {
 		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
 		       ret);
-		goto error;
+		goto err1;
 	}
 	inline_bytes -= ret;
 
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	svc_rdma_put_req_map(vec);
 	dprintk("svcrdma: send_reply returns %d\n", ret);
 	return ret;
- error:
+
+ err1:
+	put_page(res_page);
+ err0:
 	svc_rdma_put_req_map(vec);
 	svc_rdma_put_context(ctxt, 0);
-	put_page(res_page);
 	return ret;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fb0dff5e53e..98f945c5a00 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -335,6 +335,8 @@ static void process_context(struct svcxprt_rdma *xprt,
 
 	switch (ctxt->wr_op) {
 	case IB_WR_SEND:
+		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+			svc_rdma_put_frmr(xprt, ctxt->frmr);
 		svc_rdma_put_context(ctxt, 1);
 		break;
 
-- 
cgit v1.2.3-70-g09d2


From 04911b539c9817aa88a6da8f563e65e3e0bc974b Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Mon, 11 Aug 2008 15:14:53 -0500
Subject: svcrdma: Update svc_rdma_send_error to use DMA LKEY

Update the svc_rdma_send_error code to use the DMA LKEY which is valid
regardless of the memory registration strategy in use.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 98f945c5a00..c0cd334fe1c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1314,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
-	atomic_inc(&xprt->sc_dma_used);
 	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
 				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	sge.lkey = xprt->sc_phys_mr->lkey;
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
+		put_page(p);
+		return;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+	sge.lkey = xprt->sc_dma_lkey;
 	sge.length = length;
 
 	ctxt = svc_rdma_get_context(xprt);
@@ -1338,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	if (ret) {
 		dprintk("svcrdma: Error %d posting send for protocol error\n",
 			ret);
+		ib_dma_unmap_page(xprt->sc_cm_id->device,
+				  sge.addr, PAGE_SIZE,
+				  DMA_FROM_DEVICE);
 		svc_rdma_put_context(ctxt, 1);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 67080c82361b7510b602c87b83399421aa2d2895 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Fri, 3 Oct 2008 12:41:14 -0500
Subject: svcrdma: Fix IRD/ORD polarity

The inititator/responder resources in the event have been swapped. They
no represent what the local peer would set their values to in order to
match the peer. Note that iWARP does not exchange these on the wire and
the provider is simply putting in the local device max.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c0cd334fe1c..6fb493cbd29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -598,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
 		dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
 			"event=%d\n", cma_id, cma_id->context, event->event);
 		handle_connect_req(cma_id,
-				   event->param.conn.responder_resources);
+				   event->param.conn.initiator_depth);
 		break;
 
 	case RDMA_CM_EVENT_ESTABLISHED:
-- 
cgit v1.2.3-70-g09d2


From 88a944eef8a4f9a2ca647eb16202a2b63f8ba7cd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Oct 2008 12:48:29 -0700
Subject: Revert "ax25: Fix std timer socket destroy handling."

This reverts commit 30902dc3cb0ea1cfc7ac2b17bcf478ff98420d74.

It causes all kinds of problems, based upon a report by
Bernard (f6bvp) and analysis by Jarek Poplawski.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_std_timer.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index cdc7e751ef3..96e4b927325 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -39,9 +39,11 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
-		if (!sk ||
-		    sock_flag(sk, SOCK_DESTROY) ||
-		    sock_flag(sk, SOCK_DEAD)) {
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN &&
+		     sock_flag(sk, SOCK_DEAD))) {
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
-- 
cgit v1.2.3-70-g09d2


From 33d1d2c52c3befa6c4df33b4ba58137d1c48894b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Oct 2008 12:53:50 -0700
Subject: ax25: Quick fix for making sure unaccepted sockets get destroyed.

Since we reverted 30902dc3cb0ea1cfc7ac2b17bcf478ff98420d74 ("ax25: Fix
std timer socket destroy handling.") we have to put some kind of fix
in to cure the issue whereby unaccepted connections do not get destroyed.

The approach used here is from Tihomir Heidelberg - 9a4gl

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 01c83e2a4c1..28c71574a78 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25)
 				/* Queue the unaccepted socket for death */
 				sock_orphan(skb->sk);
 
+				/* 9A4GL: hack to release unaccepted sockets */
+				skb->sk->sk_state = TCP_LISTEN;
+
 				ax25_start_heartbeat(sax25);
 				sax25->state = AX25_STATE_0;
 			}
-- 
cgit v1.2.3-70-g09d2


From 859f4c74d8de4dc344b3a115367d5e22a79bddaf Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 6 Oct 2008 12:54:57 -0700
Subject: netrom: Fix sock_orphan() use in nr_release

While debugging another bug it was found that NetRom socks
are sometimes seen unorphaned in sk_free(). This patch moves
sock_orphan() in nr_release() to the beginning (like in ax25,
or rose).

Reported-and-tested-by: Bernard Pidoux f6bvp <f6bvp@free.fr>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 532e4faa29f..9f1ea4a27b3 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -525,6 +525,7 @@ static int nr_release(struct socket *sock)
 	if (sk == NULL) return 0;
 
 	sock_hold(sk);
+	sock_orphan(sk);
 	lock_sock(sk);
 	nr = nr_sk(sk);
 
@@ -548,7 +549,6 @@ static int nr_release(struct socket *sock)
 		sk->sk_state    = TCP_CLOSE;
 		sk->sk_shutdown |= SEND_SHUTDOWN;
 		sk->sk_state_change(sk);
-		sock_orphan(sk);
 		sock_set_flag(sk, SOCK_DESTROY);
 		break;
 
-- 
cgit v1.2.3-70-g09d2


From cb7f6a7b716e801097b564dec3ccb58d330aef56 Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Fri, 19 Sep 2008 12:32:57 +0200
Subject: IPVS: Move IPVS to net/netfilter/ipvs

Since IPVS now has partial IPv6 support, this patch moves IPVS from
net/ipv4/ipvs to net/netfilter/ipvs. It's a result of:

$ git mv net/ipv4/ipvs net/netfilter

and adapting the relevant Kconfigs/Makefiles to the new path.

Signed-off-by: Julius Volz <juliusv@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/ipv4/Kconfig                        |    2 -
 net/ipv4/Makefile                       |    1 -
 net/ipv4/ipvs/Kconfig                   |  239 ---
 net/ipv4/ipvs/Makefile                  |   33 -
 net/ipv4/ipvs/ip_vs_app.c               |  622 ------
 net/ipv4/ipvs/ip_vs_conn.c              | 1110 ----------
 net/ipv4/ipvs/ip_vs_core.c              | 1542 --------------
 net/ipv4/ipvs/ip_vs_ctl.c               | 3443 -------------------------------
 net/ipv4/ipvs/ip_vs_est.c               |  166 --
 net/ipv4/ipvs/ip_vs_ftp.c               |  410 ----
 net/ipv4/ipvs/ip_vs_lblc.c              |  555 -----
 net/ipv4/ipvs/ip_vs_lblcr.c             |  755 -------
 net/ipv4/ipvs/ip_vs_lc.c                |  103 -
 net/ipv4/ipvs/ip_vs_nq.c                |  138 --
 net/ipv4/ipvs/ip_vs_proto.c             |  288 ---
 net/ipv4/ipvs/ip_vs_proto_ah_esp.c      |  235 ---
 net/ipv4/ipvs/ip_vs_proto_tcp.c         |  732 -------
 net/ipv4/ipvs/ip_vs_proto_udp.c         |  533 -----
 net/ipv4/ipvs/ip_vs_rr.c                |  112 -
 net/ipv4/ipvs/ip_vs_sched.c             |  251 ---
 net/ipv4/ipvs/ip_vs_sed.c               |  140 --
 net/ipv4/ipvs/ip_vs_sh.c                |  258 ---
 net/ipv4/ipvs/ip_vs_sync.c              |  942 ---------
 net/ipv4/ipvs/ip_vs_wlc.c               |  128 --
 net/ipv4/ipvs/ip_vs_wrr.c               |  237 ---
 net/ipv4/ipvs/ip_vs_xmit.c              | 1004 ---------
 net/netfilter/Kconfig                   |    2 +
 net/netfilter/Makefile                  |    3 +
 net/netfilter/ipvs/Kconfig              |  239 +++
 net/netfilter/ipvs/Makefile             |   33 +
 net/netfilter/ipvs/ip_vs_app.c          |  622 ++++++
 net/netfilter/ipvs/ip_vs_conn.c         | 1110 ++++++++++
 net/netfilter/ipvs/ip_vs_core.c         | 1542 ++++++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c          | 3443 +++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_dh.c           |  261 +++
 net/netfilter/ipvs/ip_vs_est.c          |  166 ++
 net/netfilter/ipvs/ip_vs_ftp.c          |  410 ++++
 net/netfilter/ipvs/ip_vs_lblc.c         |  555 +++++
 net/netfilter/ipvs/ip_vs_lblcr.c        |  755 +++++++
 net/netfilter/ipvs/ip_vs_lc.c           |  103 +
 net/netfilter/ipvs/ip_vs_nq.c           |  138 ++
 net/netfilter/ipvs/ip_vs_proto.c        |  288 +++
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c |  235 +++
 net/netfilter/ipvs/ip_vs_proto_tcp.c    |  732 +++++++
 net/netfilter/ipvs/ip_vs_proto_udp.c    |  533 +++++
 net/netfilter/ipvs/ip_vs_rr.c           |  112 +
 net/netfilter/ipvs/ip_vs_sched.c        |  251 +++
 net/netfilter/ipvs/ip_vs_sed.c          |  140 ++
 net/netfilter/ipvs/ip_vs_sh.c           |  258 +++
 net/netfilter/ipvs/ip_vs_sync.c         |  942 +++++++++
 net/netfilter/ipvs/ip_vs_wlc.c          |  128 ++
 net/netfilter/ipvs/ip_vs_wrr.c          |  237 +++
 net/netfilter/ipvs/ip_vs_xmit.c         | 1004 +++++++++
 53 files changed, 14242 insertions(+), 13979 deletions(-)
 delete mode 100644 net/ipv4/ipvs/Kconfig
 delete mode 100644 net/ipv4/ipvs/Makefile
 delete mode 100644 net/ipv4/ipvs/ip_vs_app.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_conn.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_core.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_ctl.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_est.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_ftp.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_lblc.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_lblcr.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_lc.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_nq.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto_ah_esp.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto_tcp.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_proto_udp.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_rr.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_sched.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_sed.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_sh.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_sync.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_wlc.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_wrr.c
 delete mode 100644 net/ipv4/ipvs/ip_vs_xmit.c
 create mode 100644 net/netfilter/ipvs/Kconfig
 create mode 100644 net/netfilter/ipvs/Makefile
 create mode 100644 net/netfilter/ipvs/ip_vs_app.c
 create mode 100644 net/netfilter/ipvs/ip_vs_conn.c
 create mode 100644 net/netfilter/ipvs/ip_vs_core.c
 create mode 100644 net/netfilter/ipvs/ip_vs_ctl.c
 create mode 100644 net/netfilter/ipvs/ip_vs_dh.c
 create mode 100644 net/netfilter/ipvs/ip_vs_est.c
 create mode 100644 net/netfilter/ipvs/ip_vs_ftp.c
 create mode 100644 net/netfilter/ipvs/ip_vs_lblc.c
 create mode 100644 net/netfilter/ipvs/ip_vs_lblcr.c
 create mode 100644 net/netfilter/ipvs/ip_vs_lc.c
 create mode 100644 net/netfilter/ipvs/ip_vs_nq.c
 create mode 100644 net/netfilter/ipvs/ip_vs_proto.c
 create mode 100644 net/netfilter/ipvs/ip_vs_proto_ah_esp.c
 create mode 100644 net/netfilter/ipvs/ip_vs_proto_tcp.c
 create mode 100644 net/netfilter/ipvs/ip_vs_proto_udp.c
 create mode 100644 net/netfilter/ipvs/ip_vs_rr.c
 create mode 100644 net/netfilter/ipvs/ip_vs_sched.c
 create mode 100644 net/netfilter/ipvs/ip_vs_sed.c
 create mode 100644 net/netfilter/ipvs/ip_vs_sh.c
 create mode 100644 net/netfilter/ipvs/ip_vs_sync.c
 create mode 100644 net/netfilter/ipvs/ip_vs_wlc.c
 create mode 100644 net/netfilter/ipvs/ip_vs_wrr.c
 create mode 100644 net/netfilter/ipvs/ip_vs_xmit.c

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 591ea23639c..691268f3a35 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -630,5 +630,3 @@ config TCP_MD5SIG
 
 	  If unsure, say N.
 
-source "net/ipv4/ipvs/Kconfig"
-
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ad40ef3f9eb..80ff87ce43a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
-obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
deleted file mode 100644
index de6004de80b..00000000000
--- a/net/ipv4/ipvs/Kconfig
+++ /dev/null
@@ -1,239 +0,0 @@
-#
-# IP Virtual Server configuration
-#
-menuconfig IP_VS
-	tristate "IP virtual server support (EXPERIMENTAL)"
-	depends on NETFILTER
-	---help---
-	  IP Virtual Server support will let you build a high-performance
-	  virtual server based on cluster of two or more real servers. This
-	  option must be enabled for at least one of the clustered computers
-	  that will take care of intercepting incoming connections to a
-	  single IP address and scheduling them to real servers.
-
-	  Three request dispatching techniques are implemented, they are
-	  virtual server via NAT, virtual server via tunneling and virtual
-	  server via direct routing. The several scheduling algorithms can
-	  be used to choose which server the connection is directed to,
-	  thus load balancing can be achieved among the servers.  For more
-	  information and its administration program, please visit the
-	  following URL: <http://www.linuxvirtualserver.org/>.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-if IP_VS
-
-config	IP_VS_IPV6
-	bool "IPv6 support for IPVS (DANGEROUS)"
-	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
-	---help---
-	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
-
-	  Say N if unsure.
-
-config	IP_VS_DEBUG
-	bool "IP virtual server debugging"
-	---help---
-	  Say Y here if you want to get additional messages useful in
-	  debugging the IP virtual server code. You can change the debug
-	  level in /proc/sys/net/ipv4/vs/debug_level
-
-config	IP_VS_TAB_BITS
-	int "IPVS connection table size (the Nth power of 2)"
-	range 8 20
-	default 12
-	---help---
-	  The IPVS connection hash table uses the chaining scheme to handle
-	  hash collisions. Using a big IPVS connection hash table will greatly
-	  reduce conflicts when there are hundreds of thousands of connections
-	  in the hash table.
-
-	  Note the table size must be power of 2. The table size will be the
-	  value of 2 to the your input number power. The number to choose is
-	  from 8 to 20, the default number is 12, which means the table size
-	  is 4096. Don't input the number too small, otherwise you will lose
-	  performance on it. You can adapt the table size yourself, according
-	  to your virtual server application. It is good to set the table size
-	  not far less than the number of connections per second multiplying
-	  average lasting time of connection in the table.  For example, your
-	  virtual server gets 200 connections per second, the connection lasts
-	  for 200 seconds in average in the connection table, the table size
-	  should be not far less than 200x200, it is good to set the table
-	  size 32768 (2**15).
-
-	  Another note that each connection occupies 128 bytes effectively and
-	  each hash entry uses 8 bytes, so you can estimate how much memory is
-	  needed for your box.
-
-comment "IPVS transport protocol load balancing support"
-
-config	IP_VS_PROTO_TCP
-	bool "TCP load balancing support"
-	---help---
-	  This option enables support for load balancing TCP transport
-	  protocol. Say Y if unsure.
-
-config	IP_VS_PROTO_UDP
-	bool "UDP load balancing support"
-	---help---
-	  This option enables support for load balancing UDP transport
-	  protocol. Say Y if unsure.
-
-config	IP_VS_PROTO_AH_ESP
-	bool
-	depends on UNDEFINED
-
-config	IP_VS_PROTO_ESP
-	bool "ESP load balancing support"
-	select IP_VS_PROTO_AH_ESP
-	---help---
-	  This option enables support for load balancing ESP (Encapsulation
-	  Security Payload) transport protocol. Say Y if unsure.
-
-config	IP_VS_PROTO_AH
-	bool "AH load balancing support"
-	select IP_VS_PROTO_AH_ESP
-	---help---
-	  This option enables support for load balancing AH (Authentication
-	  Header) transport protocol. Say Y if unsure.
-
-comment "IPVS scheduler"
-
-config	IP_VS_RR
-	tristate "round-robin scheduling"
-	---help---
-	  The robin-robin scheduling algorithm simply directs network
-	  connections to different real servers in a round-robin manner.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
- 
-config	IP_VS_WRR
-        tristate "weighted round-robin scheduling" 
-	---help---
-	  The weighted robin-robin scheduling algorithm directs network
-	  connections to different real servers based on server weights
-	  in a round-robin manner. Servers with higher weights receive
-	  new connections first than those with less weights, and servers
-	  with higher weights get more connections than those with less
-	  weights and servers with equal weights get equal connections.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_LC
-        tristate "least-connection scheduling"
-	---help---
-	  The least-connection scheduling algorithm directs network
-	  connections to the server with the least number of active 
-	  connections.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_WLC
-        tristate "weighted least-connection scheduling"
-	---help---
-	  The weighted least-connection scheduling algorithm directs network
-	  connections to the server with the least active connections
-	  normalized by the server weight.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_LBLC
-	tristate "locality-based least-connection scheduling"
-	---help---
-	  The locality-based least-connection scheduling algorithm is for
-	  destination IP load balancing. It is usually used in cache cluster.
-	  This algorithm usually directs packet destined for an IP address to
-	  its server if the server is alive and under load. If the server is
-	  overloaded (its active connection numbers is larger than its weight)
-	  and there is a server in its half load, then allocate the weighted
-	  least-connection server to this IP address.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config  IP_VS_LBLCR
-	tristate "locality-based least-connection with replication scheduling"
-	---help---
-	  The locality-based least-connection with replication scheduling
-	  algorithm is also for destination IP load balancing. It is 
-	  usually used in cache cluster. It differs from the LBLC scheduling
-	  as follows: the load balancer maintains mappings from a target
-	  to a set of server nodes that can serve the target. Requests for
-	  a target are assigned to the least-connection node in the target's
-	  server set. If all the node in the server set are over loaded,
-	  it picks up a least-connection node in the cluster and adds it
-	  in the sever set for the target. If the server set has not been
-	  modified for the specified time, the most loaded node is removed
-	  from the server set, in order to avoid high degree of replication.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_DH
-	tristate "destination hashing scheduling"
-	---help---
-	  The destination hashing scheduling algorithm assigns network
-	  connections to the servers through looking up a statically assigned
-	  hash table by their destination IP addresses.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_SH
-	tristate "source hashing scheduling"
-	---help---
-	  The source hashing scheduling algorithm assigns network
-	  connections to the servers through looking up a statically assigned
-	  hash table by their source IP addresses.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_SED
-	tristate "shortest expected delay scheduling"
-	---help---
-	  The shortest expected delay scheduling algorithm assigns network
-	  connections to the server with the shortest expected delay. The 
-	  expected delay that the job will experience is (Ci + 1) / Ui if 
-	  sent to the ith server, in which Ci is the number of connections
-	  on the ith server and Ui is the fixed service rate (weight)
-	  of the ith server.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-config	IP_VS_NQ
-	tristate "never queue scheduling"
-	---help---
-	  The never queue scheduling algorithm adopts a two-speed model.
-	  When there is an idle server available, the job will be sent to
-	  the idle server, instead of waiting for a fast one. When there
-	  is no idle server available, the job will be sent to the server
-	  that minimize its expected delay (The Shortest Expected Delay
-	  scheduling algorithm).
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-comment 'IPVS application helper'
-
-config	IP_VS_FTP
-  	tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP
-	---help---
-	  FTP is a protocol that transfers IP address and/or port number in
-	  the payload. In the virtual server via Network Address Translation,
-	  the IP address and port number of real servers cannot be sent to
-	  clients in ftp connections directly, so FTP protocol helper is
-	  required for tracking the connection and mangling it back to that of
-	  virtual service.
-
-	  If you want to compile it in kernel, say Y. To compile it as a
-	  module, choose M here. If unsure, say N.
-
-endif # IP_VS
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
deleted file mode 100644
index 73a46fe1fe4..00000000000
--- a/net/ipv4/ipvs/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# Makefile for the IPVS modules on top of IPv4.
-#
-
-# IPVS transport protocol load balancing support
-ip_vs_proto-objs-y :=
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
-
-ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
-		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
-		ip_vs_est.o ip_vs_proto.o 				   \
-		$(ip_vs_proto-objs-y)
-
-
-# IPVS core
-obj-$(CONFIG_IP_VS) += ip_vs.o
-
-# IPVS schedulers
-obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
-obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
-obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
-obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
-obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
-obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
-obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
-obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
-obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
-obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
-
-# IPVS application helpers
-obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
deleted file mode 100644
index 201b8ea3020..00000000000
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * ip_vs_app.c: Application module support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
- * is that ip_vs_app module handles the reverse direction (incoming requests
- * and outgoing responses).
- *
- *		IP_MASQ_APP application masquerading module
- *
- * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/net_namespace.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <asm/system.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-#include <net/ip_vs.h>
-
-EXPORT_SYMBOL(register_ip_vs_app);
-EXPORT_SYMBOL(unregister_ip_vs_app);
-EXPORT_SYMBOL(register_ip_vs_app_inc);
-
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
-/*
- *	Get an ip_vs_app object
- */
-static inline int ip_vs_app_get(struct ip_vs_app *app)
-{
-	return try_module_get(app->module);
-}
-
-
-static inline void ip_vs_app_put(struct ip_vs_app *app)
-{
-	module_put(app->module);
-}
-
-
-/*
- *	Allocate/initialize app incarnation and register it in proto apps.
- */
-static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
-{
-	struct ip_vs_protocol *pp;
-	struct ip_vs_app *inc;
-	int ret;
-
-	if (!(pp = ip_vs_proto_get(proto)))
-		return -EPROTONOSUPPORT;
-
-	if (!pp->unregister_app)
-		return -EOPNOTSUPP;
-
-	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
-	if (!inc)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&inc->p_list);
-	INIT_LIST_HEAD(&inc->incs_list);
-	inc->app = app;
-	inc->port = htons(port);
-	atomic_set(&inc->usecnt, 0);
-
-	if (app->timeouts) {
-		inc->timeout_table =
-			ip_vs_create_timeout_table(app->timeouts,
-						   app->timeouts_size);
-		if (!inc->timeout_table) {
-			ret = -ENOMEM;
-			goto out;
-		}
-	}
-
-	ret = pp->register_app(inc);
-	if (ret)
-		goto out;
-
-	list_add(&inc->a_list, &app->incs_list);
-	IP_VS_DBG(9, "%s application %s:%u registered\n",
-		  pp->name, inc->name, inc->port);
-
-	return 0;
-
-  out:
-	kfree(inc->timeout_table);
-	kfree(inc);
-	return ret;
-}
-
-
-/*
- *	Release app incarnation
- */
-static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
-{
-	struct ip_vs_protocol *pp;
-
-	if (!(pp = ip_vs_proto_get(inc->protocol)))
-		return;
-
-	if (pp->unregister_app)
-		pp->unregister_app(inc);
-
-	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
-		  pp->name, inc->name, inc->port);
-
-	list_del(&inc->a_list);
-
-	kfree(inc->timeout_table);
-	kfree(inc);
-}
-
-
-/*
- *	Get reference to app inc (only called from softirq)
- *
- */
-int ip_vs_app_inc_get(struct ip_vs_app *inc)
-{
-	int result;
-
-	atomic_inc(&inc->usecnt);
-	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
-		atomic_dec(&inc->usecnt);
-	return result;
-}
-
-
-/*
- *	Put the app inc (only called from timer or net softirq)
- */
-void ip_vs_app_inc_put(struct ip_vs_app *inc)
-{
-	ip_vs_app_put(inc->app);
-	atomic_dec(&inc->usecnt);
-}
-
-
-/*
- *	Register an application incarnation in protocol applications
- */
-int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
-{
-	int result;
-
-	mutex_lock(&__ip_vs_app_mutex);
-
-	result = ip_vs_app_inc_new(app, proto, port);
-
-	mutex_unlock(&__ip_vs_app_mutex);
-
-	return result;
-}
-
-
-/*
- *	ip_vs_app registration routine
- */
-int register_ip_vs_app(struct ip_vs_app *app)
-{
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
-	mutex_lock(&__ip_vs_app_mutex);
-
-	list_add(&app->a_list, &ip_vs_app_list);
-
-	mutex_unlock(&__ip_vs_app_mutex);
-
-	return 0;
-}
-
-
-/*
- *	ip_vs_app unregistration routine
- *	We are sure there are no app incarnations attached to services
- */
-void unregister_ip_vs_app(struct ip_vs_app *app)
-{
-	struct ip_vs_app *inc, *nxt;
-
-	mutex_lock(&__ip_vs_app_mutex);
-
-	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-		ip_vs_app_inc_release(inc);
-	}
-
-	list_del(&app->a_list);
-
-	mutex_unlock(&__ip_vs_app_mutex);
-
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-}
-
-
-/*
- *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
- */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
-{
-	return pp->app_conn_bind(cp);
-}
-
-
-/*
- *	Unbind cp from application incarnation (called by cp destructor)
- */
-void ip_vs_unbind_app(struct ip_vs_conn *cp)
-{
-	struct ip_vs_app *inc = cp->app;
-
-	if (!inc)
-		return;
-
-	if (inc->unbind_conn)
-		inc->unbind_conn(inc, cp);
-	if (inc->done_conn)
-		inc->done_conn(inc, cp);
-	ip_vs_app_inc_put(inc);
-	cp->app = NULL;
-}
-
-
-/*
- *	Fixes th->seq based on ip_vs_seq info.
- */
-static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
-{
-	__u32 seq = ntohl(th->seq);
-
-	/*
-	 *	Adjust seq with delta-offset for all packets after
-	 *	the most recent resized pkt seq and with previous_delta offset
-	 *	for all packets	before most recent resized pkt seq.
-	 */
-	if (vseq->delta || vseq->previous_delta) {
-		if(after(seq, vseq->init_seq)) {
-			th->seq = htonl(seq + vseq->delta);
-			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
-				  vseq->delta);
-		} else {
-			th->seq = htonl(seq + vseq->previous_delta);
-			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
-				  "(%d) to seq\n", vseq->previous_delta);
-		}
-	}
-}
-
-
-/*
- *	Fixes th->ack_seq based on ip_vs_seq info.
- */
-static inline void
-vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
-{
-	__u32 ack_seq = ntohl(th->ack_seq);
-
-	/*
-	 * Adjust ack_seq with delta-offset for
-	 * the packets AFTER most recent resized pkt has caused a shift
-	 * for packets before most recent resized pkt, use previous_delta
-	 */
-	if (vseq->delta || vseq->previous_delta) {
-		/* since ack_seq is the number of octet that is expected
-		   to receive next, so compare it with init_seq+delta */
-		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
-			th->ack_seq = htonl(ack_seq - vseq->delta);
-			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
-				  "(%d) from ack_seq\n", vseq->delta);
-
-		} else {
-			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
-			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
-				  "previous_delta (%d) from ack_seq\n",
-				  vseq->previous_delta);
-		}
-	}
-}
-
-
-/*
- *	Updates ip_vs_seq if pkt has been resized
- *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
- */
-static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
-				 unsigned flag, __u32 seq, int diff)
-{
-	/* spinlock is to keep updating cp->flags atomic */
-	spin_lock(&cp->lock);
-	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
-		vseq->previous_delta = vseq->delta;
-		vseq->delta += diff;
-		vseq->init_seq = seq;
-		cp->flags |= flag;
-	}
-	spin_unlock(&cp->lock);
-}
-
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
-				  struct ip_vs_app *app)
-{
-	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(skb);
-	struct tcphdr *th;
-	__u32 seq;
-
-	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
-		return 0;
-
-	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
-
-	/*
-	 *	Remember seq number in case this pkt gets resized
-	 */
-	seq = ntohl(th->seq);
-
-	/*
-	 *	Fix seq stuff if flagged as so.
-	 */
-	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
-		vs_fix_seq(&cp->out_seq, th);
-	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
-		vs_fix_ack_seq(&cp->in_seq, th);
-
-	/*
-	 *	Call private output hook function
-	 */
-	if (app->pkt_out == NULL)
-		return 1;
-
-	if (!app->pkt_out(app, cp, skb, &diff))
-		return 0;
-
-	/*
-	 *	Update ip_vs seq stuff if len has changed.
-	 */
-	if (diff != 0)
-		vs_seq_update(cp, &cp->out_seq,
-			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
-
-	return 1;
-}
-
-/*
- *	Output pkt hook. Will call bound ip_vs_app specific function
- *	called by ipvs packet handler, assumes previously checked cp!=NULL
- *	returns false if it can't handle packet (oom)
- */
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-	struct ip_vs_app *app;
-
-	/*
-	 *	check if application module is bound to
-	 *	this ip_vs_conn.
-	 */
-	if ((app = cp->app) == NULL)
-		return 1;
-
-	/* TCP is complicated */
-	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_out(cp, skb, app);
-
-	/*
-	 *	Call private output hook function
-	 */
-	if (app->pkt_out == NULL)
-		return 1;
-
-	return app->pkt_out(app, cp, skb, NULL);
-}
-
-
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
-				 struct ip_vs_app *app)
-{
-	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(skb);
-	struct tcphdr *th;
-	__u32 seq;
-
-	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
-		return 0;
-
-	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
-
-	/*
-	 *	Remember seq number in case this pkt gets resized
-	 */
-	seq = ntohl(th->seq);
-
-	/*
-	 *	Fix seq stuff if flagged as so.
-	 */
-	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
-		vs_fix_seq(&cp->in_seq, th);
-	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
-		vs_fix_ack_seq(&cp->out_seq, th);
-
-	/*
-	 *	Call private input hook function
-	 */
-	if (app->pkt_in == NULL)
-		return 1;
-
-	if (!app->pkt_in(app, cp, skb, &diff))
-		return 0;
-
-	/*
-	 *	Update ip_vs seq stuff if len has changed.
-	 */
-	if (diff != 0)
-		vs_seq_update(cp, &cp->in_seq,
-			      IP_VS_CONN_F_IN_SEQ, seq, diff);
-
-	return 1;
-}
-
-/*
- *	Input pkt hook. Will call bound ip_vs_app specific function
- *	called by ipvs packet handler, assumes previously checked cp!=NULL.
- *	returns false if can't handle packet (oom).
- */
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-	struct ip_vs_app *app;
-
-	/*
-	 *	check if application module is bound to
-	 *	this ip_vs_conn.
-	 */
-	if ((app = cp->app) == NULL)
-		return 1;
-
-	/* TCP is complicated */
-	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_in(cp, skb, app);
-
-	/*
-	 *	Call private input hook function
-	 */
-	if (app->pkt_in == NULL)
-		return 1;
-
-	return app->pkt_in(app, cp, skb, NULL);
-}
-
-
-#ifdef CONFIG_PROC_FS
-/*
- *	/proc/net/ip_vs_app entry function
- */
-
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
-{
-	struct ip_vs_app *app, *inc;
-
-	list_for_each_entry(app, &ip_vs_app_list, a_list) {
-		list_for_each_entry(inc, &app->incs_list, a_list) {
-			if (pos-- == 0)
-				return inc;
-		}
-	}
-	return NULL;
-
-}
-
-static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	mutex_lock(&__ip_vs_app_mutex);
-
-	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ip_vs_app *inc, *app;
-	struct list_head *e;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip_vs_app_idx(0);
-
-	inc = v;
-	app = inc->app;
-
-	if ((e = inc->a_list.next) != &app->incs_list)
-		return list_entry(e, struct ip_vs_app, a_list);
-
-	/* go on to next application */
-	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
-		app = list_entry(e, struct ip_vs_app, a_list);
-		list_for_each_entry(inc, &app->incs_list, a_list) {
-			return inc;
-		}
-	}
-	return NULL;
-}
-
-static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
-{
-	mutex_unlock(&__ip_vs_app_mutex);
-}
-
-static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "prot port    usecnt name\n");
-	else {
-		const struct ip_vs_app *inc = v;
-
-		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
-			   ip_vs_proto_name(inc->protocol),
-			   ntohs(inc->port),
-			   atomic_read(&inc->usecnt),
-			   inc->name);
-	}
-	return 0;
-}
-
-static const struct seq_operations ip_vs_app_seq_ops = {
-	.start = ip_vs_app_seq_start,
-	.next  = ip_vs_app_seq_next,
-	.stop  = ip_vs_app_seq_stop,
-	.show  = ip_vs_app_seq_show,
-};
-
-static int ip_vs_app_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ip_vs_app_seq_ops);
-}
-
-static const struct file_operations ip_vs_app_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = ip_vs_app_open,
-	.read	 = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-#endif
-
-
-/*
- *	Replace a segment of data with a new segment
- */
-int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
-		      char *o_buf, int o_len, char *n_buf, int n_len)
-{
-	int diff;
-	int o_offset;
-	int o_left;
-
-	EnterFunction(9);
-
-	diff = n_len - o_len;
-	o_offset = o_buf - (char *)skb->data;
-	/* The length of left data after o_buf+o_len in the skb data */
-	o_left = skb->len - (o_offset + o_len);
-
-	if (diff <= 0) {
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-		skb_trim(skb, skb->len + diff);
-	} else if (diff <= skb_tailroom(skb)) {
-		skb_put(skb, diff);
-		memmove(o_buf + n_len, o_buf + o_len, o_left);
-		memcpy(o_buf, n_buf, n_len);
-	} else {
-		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
-			return -ENOMEM;
-		skb_put(skb, diff);
-		memmove(skb->data + o_offset + n_len,
-			skb->data + o_offset + o_len, o_left);
-		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
-	}
-
-	/* must update the iph total length here */
-	ip_hdr(skb)->tot_len = htons(skb->len);
-
-	LeaveFunction(9);
-	return 0;
-}
-
-
-int __init ip_vs_app_init(void)
-{
-	/* we will replace it with proc_net_ipvs_create() soon */
-	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
-	return 0;
-}
-
-
-void ip_vs_app_cleanup(void)
-{
-	proc_net_remove(&init_net, "ip_vs_app");
-}
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
deleted file mode 100644
index 9a24332fbed..00000000000
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ /dev/null
@@ -1,1110 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
- * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
- * and others. Many code here is taken from IP MASQ code of kernel 2.2.
- *
- * Changes:
- *
- */
-
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/proc_fs.h>		/* for proc_net_* */
-#include <linux/seq_file.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
-
-#include <net/net_namespace.h>
-#include <net/ip_vs.h>
-
-
-/*
- *  Connection hash table: for input and output packets lookups of IPVS
- */
-static struct list_head *ip_vs_conn_tab;
-
-/*  SLAB cache for IPVS connections */
-static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
-/*  counter for no client port connections */
-static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
-
-/* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
-
-/*
- *  Fine locking granularity for big connection hash table
- */
-#define CT_LOCKARRAY_BITS  4
-#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
-#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
-
-struct ip_vs_aligned_lock
-{
-	rwlock_t	l;
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-
-/* lock array for conn table */
-static struct ip_vs_aligned_lock
-__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-
-static inline void ct_read_lock(unsigned key)
-{
-	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock(unsigned key)
-{
-	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_lock(unsigned key)
-{
-	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_unlock(unsigned key)
-{
-	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_lock_bh(unsigned key)
-{
-	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock_bh(unsigned key)
-{
-	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_lock_bh(unsigned key)
-{
-	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_unlock_bh(unsigned key)
-{
-	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-
-/*
- *	Returns hash value for IPVS connection entry
- */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
-				       const union nf_inet_addr *addr,
-				       __be16 port)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-				    (__force u32)port, proto, ip_vs_conn_rnd)
-			& IP_VS_CONN_TAB_MASK;
-#endif
-	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-			    ip_vs_conn_rnd)
-		& IP_VS_CONN_TAB_MASK;
-}
-
-
-/*
- *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
- *	returns bool success.
- */
-static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
-{
-	unsigned hash;
-	int ret;
-
-	/* Hash by protocol, client address and port */
-	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
-
-	ct_write_lock(hash);
-
-	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
-		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
-		cp->flags |= IP_VS_CONN_F_HASHED;
-		atomic_inc(&cp->refcnt);
-		ret = 1;
-	} else {
-		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
-		ret = 0;
-	}
-
-	ct_write_unlock(hash);
-
-	return ret;
-}
-
-
-/*
- *	UNhashes ip_vs_conn from ip_vs_conn_tab.
- *	returns bool success.
- */
-static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
-{
-	unsigned hash;
-	int ret;
-
-	/* unhash it and decrease its reference counter */
-	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
-
-	ct_write_lock(hash);
-
-	if (cp->flags & IP_VS_CONN_F_HASHED) {
-		list_del(&cp->c_list);
-		cp->flags &= ~IP_VS_CONN_F_HASHED;
-		atomic_dec(&cp->refcnt);
-		ret = 1;
-	} else
-		ret = 0;
-
-	ct_write_unlock(hash);
-
-	return ret;
-}
-
-
-/*
- *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
- *  Called for pkts coming from OUTside-to-INside.
- *	s_addr, s_port: pkt source address (foreign host)
- *	d_addr, d_port: pkt dest address (load balancer)
- */
-static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
-{
-	unsigned hash;
-	struct ip_vs_conn *cp;
-
-	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
-
-	ct_read_lock(hash);
-
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
-		    s_port == cp->cport && d_port == cp->vport &&
-		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-		    protocol == cp->protocol) {
-			/* HIT */
-			atomic_inc(&cp->refcnt);
-			ct_read_unlock(hash);
-			return cp;
-		}
-	}
-
-	ct_read_unlock(hash);
-
-	return NULL;
-}
-
-struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
-{
-	struct ip_vs_conn *cp;
-
-	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
-	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
-					 d_port);
-
-	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
-		      cp ? "hit" : "not hit");
-
-	return cp;
-}
-
-/* Get reference to connection template */
-struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
-{
-	unsigned hash;
-	struct ip_vs_conn *cp;
-
-	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
-
-	ct_read_lock(hash);
-
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
-		    s_port == cp->cport && d_port == cp->vport &&
-		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-		    protocol == cp->protocol) {
-			/* HIT */
-			atomic_inc(&cp->refcnt);
-			goto out;
-		}
-	}
-	cp = NULL;
-
-  out:
-	ct_read_unlock(hash);
-
-	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
-		      cp ? "hit" : "not hit");
-
-	return cp;
-}
-
-/*
- *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
- *  Called for pkts coming from inside-to-OUTside.
- *	s_addr, s_port: pkt source address (inside host)
- *	d_addr, d_port: pkt dest address (foreign host)
- */
-struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
-{
-	unsigned hash;
-	struct ip_vs_conn *cp, *ret=NULL;
-
-	/*
-	 *	Check for "full" addressed entries
-	 */
-	hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
-
-	ct_read_lock(hash);
-
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
-		    d_port == cp->cport && s_port == cp->dport &&
-		    protocol == cp->protocol) {
-			/* HIT */
-			atomic_inc(&cp->refcnt);
-			ret = cp;
-			break;
-		}
-	}
-
-	ct_read_unlock(hash);
-
-	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
-		      ret ? "hit" : "not hit");
-
-	return ret;
-}
-
-
-/*
- *      Put back the conn and restart its timer with its timeout
- */
-void ip_vs_conn_put(struct ip_vs_conn *cp)
-{
-	/* reset it expire in its timeout */
-	mod_timer(&cp->timer, jiffies+cp->timeout);
-
-	__ip_vs_conn_put(cp);
-}
-
-
-/*
- *	Fill a no_client_port connection with a client port number
- */
-void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
-{
-	if (ip_vs_conn_unhash(cp)) {
-		spin_lock(&cp->lock);
-		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
-			atomic_dec(&ip_vs_conn_no_cport_cnt);
-			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
-			cp->cport = cport;
-		}
-		spin_unlock(&cp->lock);
-
-		/* hash on new dport */
-		ip_vs_conn_hash(cp);
-	}
-}
-
-
-/*
- *	Bind a connection entry with the corresponding packet_xmit.
- *	Called by ip_vs_conn_new.
- */
-static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
-{
-	switch (IP_VS_FWD_METHOD(cp)) {
-	case IP_VS_CONN_F_MASQ:
-		cp->packet_xmit = ip_vs_nat_xmit;
-		break;
-
-	case IP_VS_CONN_F_TUNNEL:
-		cp->packet_xmit = ip_vs_tunnel_xmit;
-		break;
-
-	case IP_VS_CONN_F_DROUTE:
-		cp->packet_xmit = ip_vs_dr_xmit;
-		break;
-
-	case IP_VS_CONN_F_LOCALNODE:
-		cp->packet_xmit = ip_vs_null_xmit;
-		break;
-
-	case IP_VS_CONN_F_BYPASS:
-		cp->packet_xmit = ip_vs_bypass_xmit;
-		break;
-	}
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
-{
-	switch (IP_VS_FWD_METHOD(cp)) {
-	case IP_VS_CONN_F_MASQ:
-		cp->packet_xmit = ip_vs_nat_xmit_v6;
-		break;
-
-	case IP_VS_CONN_F_TUNNEL:
-		cp->packet_xmit = ip_vs_tunnel_xmit_v6;
-		break;
-
-	case IP_VS_CONN_F_DROUTE:
-		cp->packet_xmit = ip_vs_dr_xmit_v6;
-		break;
-
-	case IP_VS_CONN_F_LOCALNODE:
-		cp->packet_xmit = ip_vs_null_xmit;
-		break;
-
-	case IP_VS_CONN_F_BYPASS:
-		cp->packet_xmit = ip_vs_bypass_xmit_v6;
-		break;
-	}
-}
-#endif
-
-
-static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
-{
-	return atomic_read(&dest->activeconns)
-		+ atomic_read(&dest->inactconns);
-}
-
-/*
- *	Bind a connection entry with a virtual service destination
- *	Called just after a new connection entry is created.
- */
-static inline void
-ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
-{
-	/* if dest is NULL, then return directly */
-	if (!dest)
-		return;
-
-	/* Increase the refcnt counter of the dest */
-	atomic_inc(&dest->refcnt);
-
-	/* Bind with the destination and its corresponding transmitter */
-	if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-	    (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
-		/* if the connection is not template and is created
-		 * by sync, preserve the activity flag.
-		 */
-		cp->flags |= atomic_read(&dest->conn_flags) &
-			     (~IP_VS_CONN_F_INACTIVE);
-	else
-		cp->flags |= atomic_read(&dest->conn_flags);
-	cp->dest = dest;
-
-	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
-		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-		      "dest->refcnt:%d\n",
-		      ip_vs_proto_name(cp->protocol),
-		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
-		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
-		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
-		      ip_vs_fwd_tag(cp), cp->state,
-		      cp->flags, atomic_read(&cp->refcnt),
-		      atomic_read(&dest->refcnt));
-
-	/* Update the connection counters */
-	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-		/* It is a normal connection, so increase the inactive
-		   connection counter because it is in TCP SYNRECV
-		   state (inactive) or other protocol inacive state */
-		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
-			atomic_inc(&dest->activeconns);
-		else
-			atomic_inc(&dest->inactconns);
-	} else {
-		/* It is a persistent connection/template, so increase
-		   the peristent connection counter */
-		atomic_inc(&dest->persistconns);
-	}
-
-	if (dest->u_threshold != 0 &&
-	    ip_vs_dest_totalconns(dest) >= dest->u_threshold)
-		dest->flags |= IP_VS_DEST_F_OVERLOAD;
-}
-
-
-/*
- * Check if there is a destination for the connection, if so
- * bind the connection to the destination.
- */
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
-{
-	struct ip_vs_dest *dest;
-
-	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-				       &cp->vaddr, cp->vport,
-				       cp->protocol);
-		ip_vs_bind_dest(cp, dest);
-		return dest;
-	} else
-		return NULL;
-}
-
-
-/*
- *	Unbind a connection entry with its VS destination
- *	Called by the ip_vs_conn_expire function.
- */
-static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
-{
-	struct ip_vs_dest *dest = cp->dest;
-
-	if (!dest)
-		return;
-
-	IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
-		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-		      "dest->refcnt:%d\n",
-		      ip_vs_proto_name(cp->protocol),
-		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
-		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
-		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
-		      ip_vs_fwd_tag(cp), cp->state,
-		      cp->flags, atomic_read(&cp->refcnt),
-		      atomic_read(&dest->refcnt));
-
-	/* Update the connection counters */
-	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-		/* It is a normal connection, so decrease the inactconns
-		   or activeconns counter */
-		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
-			atomic_dec(&dest->inactconns);
-		} else {
-			atomic_dec(&dest->activeconns);
-		}
-	} else {
-		/* It is a persistent connection/template, so decrease
-		   the peristent connection counter */
-		atomic_dec(&dest->persistconns);
-	}
-
-	if (dest->l_threshold != 0) {
-		if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
-			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-	} else if (dest->u_threshold != 0) {
-		if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
-			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-	} else {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-	}
-
-	/*
-	 * Simply decrease the refcnt of the dest, because the
-	 * dest will be either in service's destination list
-	 * or in the trash.
-	 */
-	atomic_dec(&dest->refcnt);
-}
-
-
-/*
- *	Checking if the destination of a connection template is available.
- *	If available, return 1, otherwise invalidate this connection
- *	template and return 0.
- */
-int ip_vs_check_template(struct ip_vs_conn *ct)
-{
-	struct ip_vs_dest *dest = ct->dest;
-
-	/*
-	 * Checking the dest server status.
-	 */
-	if ((dest == NULL) ||
-	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-	    (sysctl_ip_vs_expire_quiescent_template &&
-	     (atomic_read(&dest->weight) == 0))) {
-		IP_VS_DBG_BUF(9, "check_template: dest not available for "
-			      "protocol %s s:%s:%d v:%s:%d "
-			      "-> d:%s:%d\n",
-			      ip_vs_proto_name(ct->protocol),
-			      IP_VS_DBG_ADDR(ct->af, &ct->caddr),
-			      ntohs(ct->cport),
-			      IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
-			      ntohs(ct->vport),
-			      IP_VS_DBG_ADDR(ct->af, &ct->daddr),
-			      ntohs(ct->dport));
-
-		/*
-		 * Invalidate the connection template
-		 */
-		if (ct->vport != htons(0xffff)) {
-			if (ip_vs_conn_unhash(ct)) {
-				ct->dport = htons(0xffff);
-				ct->vport = htons(0xffff);
-				ct->cport = 0;
-				ip_vs_conn_hash(ct);
-			}
-		}
-
-		/*
-		 * Simply decrease the refcnt of the template,
-		 * don't restart its timer.
-		 */
-		atomic_dec(&ct->refcnt);
-		return 0;
-	}
-	return 1;
-}
-
-static void ip_vs_conn_expire(unsigned long data)
-{
-	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
-
-	cp->timeout = 60*HZ;
-
-	/*
-	 *	hey, I'm using it
-	 */
-	atomic_inc(&cp->refcnt);
-
-	/*
-	 *	do I control anybody?
-	 */
-	if (atomic_read(&cp->n_control))
-		goto expire_later;
-
-	/*
-	 *	unhash it if it is hashed in the conn table
-	 */
-	if (!ip_vs_conn_unhash(cp))
-		goto expire_later;
-
-	/*
-	 *	refcnt==1 implies I'm the only one referrer
-	 */
-	if (likely(atomic_read(&cp->refcnt) == 1)) {
-		/* delete the timer if it is activated by other users */
-		if (timer_pending(&cp->timer))
-			del_timer(&cp->timer);
-
-		/* does anybody control me? */
-		if (cp->control)
-			ip_vs_control_del(cp);
-
-		if (unlikely(cp->app != NULL))
-			ip_vs_unbind_app(cp);
-		ip_vs_unbind_dest(cp);
-		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
-			atomic_dec(&ip_vs_conn_no_cport_cnt);
-		atomic_dec(&ip_vs_conn_count);
-
-		kmem_cache_free(ip_vs_conn_cachep, cp);
-		return;
-	}
-
-	/* hash it back to the table */
-	ip_vs_conn_hash(cp);
-
-  expire_later:
-	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
-		  atomic_read(&cp->refcnt)-1,
-		  atomic_read(&cp->n_control));
-
-	ip_vs_conn_put(cp);
-}
-
-
-void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
-{
-	if (del_timer(&cp->timer))
-		mod_timer(&cp->timer, jiffies);
-}
-
-
-/*
- *	Create a new connection entry and hash it into the ip_vs_conn_tab
- */
-struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
-	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest)
-{
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
-
-	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
-	if (cp == NULL) {
-		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&cp->c_list);
-	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
-	cp->af		   = af;
-	cp->protocol	   = proto;
-	ip_vs_addr_copy(af, &cp->caddr, caddr);
-	cp->cport	   = cport;
-	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
-	cp->vport	   = vport;
-	ip_vs_addr_copy(af, &cp->daddr, daddr);
-	cp->dport          = dport;
-	cp->flags	   = flags;
-	spin_lock_init(&cp->lock);
-
-	/*
-	 * Set the entry is referenced by the current thread before hashing
-	 * it in the table, so that other thread run ip_vs_random_dropentry
-	 * but cannot drop this entry.
-	 */
-	atomic_set(&cp->refcnt, 1);
-
-	atomic_set(&cp->n_control, 0);
-	atomic_set(&cp->in_pkts, 0);
-
-	atomic_inc(&ip_vs_conn_count);
-	if (flags & IP_VS_CONN_F_NO_CPORT)
-		atomic_inc(&ip_vs_conn_no_cport_cnt);
-
-	/* Bind the connection with a destination server */
-	ip_vs_bind_dest(cp, dest);
-
-	/* Set its state and timeout */
-	cp->state = 0;
-	cp->timeout = 3*HZ;
-
-	/* Bind its packet transmitter */
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		ip_vs_bind_xmit_v6(cp);
-	else
-#endif
-		ip_vs_bind_xmit(cp);
-
-	if (unlikely(pp && atomic_read(&pp->appcnt)))
-		ip_vs_bind_app(cp, pp);
-
-	/* Hash it in the ip_vs_conn_tab finally */
-	ip_vs_conn_hash(cp);
-
-	return cp;
-}
-
-
-/*
- *	/proc/net/ip_vs_conn entries
- */
-#ifdef CONFIG_PROC_FS
-
-static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
-{
-	int idx;
-	struct ip_vs_conn *cp;
-
-	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
-		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-			if (pos-- == 0) {
-				seq->private = &ip_vs_conn_tab[idx];
-				return cp;
-			}
-		}
-		ct_read_unlock_bh(idx);
-	}
-
-	return NULL;
-}
-
-static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	seq->private = NULL;
-	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
-}
-
-static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ip_vs_conn *cp = v;
-	struct list_head *e, *l = seq->private;
-	int idx;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip_vs_conn_array(seq, 0);
-
-	/* more on same hash chain? */
-	if ((e = cp->c_list.next) != l)
-		return list_entry(e, struct ip_vs_conn, c_list);
-
-	idx = l - ip_vs_conn_tab;
-	ct_read_unlock_bh(idx);
-
-	while (++idx < IP_VS_CONN_TAB_SIZE) {
-		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-			seq->private = &ip_vs_conn_tab[idx];
-			return cp;
-		}
-		ct_read_unlock_bh(idx);
-	}
-	seq->private = NULL;
-	return NULL;
-}
-
-static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
-{
-	struct list_head *l = seq->private;
-
-	if (l)
-		ct_read_unlock_bh(l - ip_vs_conn_tab);
-}
-
-static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
-{
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
-	else {
-		const struct ip_vs_conn *cp = v;
-
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			seq_printf(seq,
-				"%-3s " NIP6_FMT " %04X " NIP6_FMT
-				" %04X " NIP6_FMT " %04X %-11s %7lu\n",
-				ip_vs_proto_name(cp->protocol),
-				NIP6(cp->caddr.in6), ntohs(cp->cport),
-				NIP6(cp->vaddr.in6), ntohs(cp->vport),
-				NIP6(cp->daddr.in6), ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
-				(cp->timer.expires-jiffies)/HZ);
-		else
-#endif
-			seq_printf(seq,
-				"%-3s %08X %04X %08X %04X"
-				" %08X %04X %-11s %7lu\n",
-				ip_vs_proto_name(cp->protocol),
-				ntohl(cp->caddr.ip), ntohs(cp->cport),
-				ntohl(cp->vaddr.ip), ntohs(cp->vport),
-				ntohl(cp->daddr.ip), ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
-				(cp->timer.expires-jiffies)/HZ);
-	}
-	return 0;
-}
-
-static const struct seq_operations ip_vs_conn_seq_ops = {
-	.start = ip_vs_conn_seq_start,
-	.next  = ip_vs_conn_seq_next,
-	.stop  = ip_vs_conn_seq_stop,
-	.show  = ip_vs_conn_seq_show,
-};
-
-static int ip_vs_conn_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ip_vs_conn_seq_ops);
-}
-
-static const struct file_operations ip_vs_conn_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ip_vs_conn_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-static const char *ip_vs_origin_name(unsigned flags)
-{
-	if (flags & IP_VS_CONN_F_SYNC)
-		return "SYNC";
-	else
-		return "LOCAL";
-}
-
-static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
-{
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq,
-   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
-	else {
-		const struct ip_vs_conn *cp = v;
-
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			seq_printf(seq,
-				"%-3s " NIP6_FMT " %04X " NIP6_FMT
-				" %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
-				ip_vs_proto_name(cp->protocol),
-				NIP6(cp->caddr.in6), ntohs(cp->cport),
-				NIP6(cp->vaddr.in6), ntohs(cp->vport),
-				NIP6(cp->daddr.in6), ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
-				ip_vs_origin_name(cp->flags),
-				(cp->timer.expires-jiffies)/HZ);
-		else
-#endif
-			seq_printf(seq,
-				"%-3s %08X %04X %08X %04X "
-				"%08X %04X %-11s %-6s %7lu\n",
-				ip_vs_proto_name(cp->protocol),
-				ntohl(cp->caddr.ip), ntohs(cp->cport),
-				ntohl(cp->vaddr.ip), ntohs(cp->vport),
-				ntohl(cp->daddr.ip), ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
-				ip_vs_origin_name(cp->flags),
-				(cp->timer.expires-jiffies)/HZ);
-	}
-	return 0;
-}
-
-static const struct seq_operations ip_vs_conn_sync_seq_ops = {
-	.start = ip_vs_conn_seq_start,
-	.next  = ip_vs_conn_seq_next,
-	.stop  = ip_vs_conn_seq_stop,
-	.show  = ip_vs_conn_sync_seq_show,
-};
-
-static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ip_vs_conn_sync_seq_ops);
-}
-
-static const struct file_operations ip_vs_conn_sync_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ip_vs_conn_sync_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-#endif
-
-
-/*
- *      Randomly drop connection entries before running out of memory
- */
-static inline int todrop_entry(struct ip_vs_conn *cp)
-{
-	/*
-	 * The drop rate array needs tuning for real environments.
-	 * Called from timer bh only => no locking
-	 */
-	static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
-	static char todrop_counter[9] = {0};
-	int i;
-
-	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
-	   This will leave enough time for normal connection to get
-	   through. */
-	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
-		return 0;
-
-	/* Don't drop the entry if its number of incoming packets is not
-	   located in [0, 8] */
-	i = atomic_read(&cp->in_pkts);
-	if (i > 8 || i < 0) return 0;
-
-	if (!todrop_rate[i]) return 0;
-	if (--todrop_counter[i] > 0) return 0;
-
-	todrop_counter[i] = todrop_rate[i];
-	return 1;
-}
-
-/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
-{
-	int idx;
-	struct ip_vs_conn *cp;
-
-	/*
-	 * Randomly scan 1/32 of the whole table every second
-	 */
-	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
-		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
-
-		/*
-		 *  Lock is actually needed in this loop.
-		 */
-		ct_write_lock_bh(hash);
-
-		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
-				/* connection template */
-				continue;
-
-			if (cp->protocol == IPPROTO_TCP) {
-				switch(cp->state) {
-				case IP_VS_TCP_S_SYN_RECV:
-				case IP_VS_TCP_S_SYNACK:
-					break;
-
-				case IP_VS_TCP_S_ESTABLISHED:
-					if (todrop_entry(cp))
-						break;
-					continue;
-
-				default:
-					continue;
-				}
-			} else {
-				if (!todrop_entry(cp))
-					continue;
-			}
-
-			IP_VS_DBG(4, "del connection\n");
-			ip_vs_conn_expire_now(cp);
-			if (cp->control) {
-				IP_VS_DBG(4, "del conn template\n");
-				ip_vs_conn_expire_now(cp->control);
-			}
-		}
-		ct_write_unlock_bh(hash);
-	}
-}
-
-
-/*
- *      Flush all the connection entries in the ip_vs_conn_tab
- */
-static void ip_vs_conn_flush(void)
-{
-	int idx;
-	struct ip_vs_conn *cp;
-
-  flush_again:
-	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
-		/*
-		 *  Lock is actually needed in this loop.
-		 */
-		ct_write_lock_bh(idx);
-
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
-			IP_VS_DBG(4, "del connection\n");
-			ip_vs_conn_expire_now(cp);
-			if (cp->control) {
-				IP_VS_DBG(4, "del conn template\n");
-				ip_vs_conn_expire_now(cp->control);
-			}
-		}
-		ct_write_unlock_bh(idx);
-	}
-
-	/* the counter may be not NULL, because maybe some conn entries
-	   are run by slow timer handler or unhashed but still referred */
-	if (atomic_read(&ip_vs_conn_count) != 0) {
-		schedule();
-		goto flush_again;
-	}
-}
-
-
-int __init ip_vs_conn_init(void)
-{
-	int idx;
-
-	/*
-	 * Allocate the connection hash table and initialize its list heads
-	 */
-	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
-	if (!ip_vs_conn_tab)
-		return -ENOMEM;
-
-	/* Allocate ip_vs_conn slab cache */
-	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
-					      sizeof(struct ip_vs_conn), 0,
-					      SLAB_HWCACHE_ALIGN, NULL);
-	if (!ip_vs_conn_cachep) {
-		vfree(ip_vs_conn_tab);
-		return -ENOMEM;
-	}
-
-	IP_VS_INFO("Connection hash table configured "
-		   "(size=%d, memory=%ldKbytes)\n",
-		   IP_VS_CONN_TAB_SIZE,
-		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
-	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
-		  sizeof(struct ip_vs_conn));
-
-	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
-		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
-	}
-
-	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
-		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
-	}
-
-	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
-
-	/* calculate the random value for connection hash */
-	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
-
-	return 0;
-}
-
-
-void ip_vs_conn_cleanup(void)
-{
-	/* flush all the connection entries first */
-	ip_vs_conn_flush();
-
-	/* Release the empty cache */
-	kmem_cache_destroy(ip_vs_conn_cachep);
-	proc_net_remove(&init_net, "ip_vs_conn");
-	proc_net_remove(&init_net, "ip_vs_conn_sync");
-	vfree(ip_vs_conn_tab);
-}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
deleted file mode 100644
index 958abf3e5f8..00000000000
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ /dev/null
@@ -1,1542 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
- * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
- * and others.
- *
- * Changes:
- *	Paul `Rusty' Russell		properly handle non-linear skbs
- *	Harald Welte			don't use nfcache
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/icmp.h>
-
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <net/icmp.h>                   /* for icmp_send */
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#ifdef CONFIG_IP_VS_IPV6
-#include <net/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#endif
-
-#include <net/ip_vs.h>
-
-
-EXPORT_SYMBOL(register_ip_vs_scheduler);
-EXPORT_SYMBOL(unregister_ip_vs_scheduler);
-EXPORT_SYMBOL(ip_vs_skb_replace);
-EXPORT_SYMBOL(ip_vs_proto_name);
-EXPORT_SYMBOL(ip_vs_conn_new);
-EXPORT_SYMBOL(ip_vs_conn_in_get);
-EXPORT_SYMBOL(ip_vs_conn_out_get);
-#ifdef CONFIG_IP_VS_PROTO_TCP
-EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
-#endif
-EXPORT_SYMBOL(ip_vs_conn_put);
-#ifdef CONFIG_IP_VS_DEBUG
-EXPORT_SYMBOL(ip_vs_get_debug_level);
-#endif
-
-
-/* ID used in ICMP lookups */
-#define icmp_id(icmph)          (((icmph)->un).echo.id)
-#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
-
-const char *ip_vs_proto_name(unsigned proto)
-{
-	static char buf[20];
-
-	switch (proto) {
-	case IPPROTO_IP:
-		return "IP";
-	case IPPROTO_UDP:
-		return "UDP";
-	case IPPROTO_TCP:
-		return "TCP";
-	case IPPROTO_ICMP:
-		return "ICMP";
-#ifdef CONFIG_IP_VS_IPV6
-	case IPPROTO_ICMPV6:
-		return "ICMPv6";
-#endif
-	default:
-		sprintf(buf, "IP_%d", proto);
-		return buf;
-	}
-}
-
-void ip_vs_init_hash_table(struct list_head *table, int rows)
-{
-	while (--rows >= 0)
-		INIT_LIST_HEAD(&table[rows]);
-}
-
-static inline void
-ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest = cp->dest;
-	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.inpkts++;
-		dest->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.inpkts++;
-		dest->svc->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.inpkts++;
-		ip_vs_stats.ustats.inbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
-	}
-}
-
-
-static inline void
-ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest = cp->dest;
-	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.outpkts++;
-		dest->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.outpkts++;
-		dest->svc->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.outpkts++;
-		ip_vs_stats.ustats.outbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
-	}
-}
-
-
-static inline void
-ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
-{
-	spin_lock(&cp->dest->stats.lock);
-	cp->dest->stats.ustats.conns++;
-	spin_unlock(&cp->dest->stats.lock);
-
-	spin_lock(&svc->stats.lock);
-	svc->stats.ustats.conns++;
-	spin_unlock(&svc->stats.lock);
-
-	spin_lock(&ip_vs_stats.lock);
-	ip_vs_stats.ustats.conns++;
-	spin_unlock(&ip_vs_stats.lock);
-}
-
-
-static inline int
-ip_vs_set_state(struct ip_vs_conn *cp, int direction,
-		const struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
-{
-	if (unlikely(!pp->state_transition))
-		return 0;
-	return pp->state_transition(cp, direction, skb, pp);
-}
-
-
-/*
- *  IPVS persistent scheduling function
- *  It creates a connection entry according to its template if exists,
- *  or selects a server and creates a connection entry plus a template.
- *  Locking: we are svc user (svc->refcnt), so we hold all dests too
- *  Protocols supported: TCP, UDP
- */
-static struct ip_vs_conn *
-ip_vs_sched_persist(struct ip_vs_service *svc,
-		    const struct sk_buff *skb,
-		    __be16 ports[2])
-{
-	struct ip_vs_conn *cp = NULL;
-	struct ip_vs_iphdr iph;
-	struct ip_vs_dest *dest;
-	struct ip_vs_conn *ct;
-	__be16  dport;			/* destination port to forward */
-	union nf_inet_addr snet;	/* source network of the client,
-					   after masking */
-
-	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
-
-	/* Mask saddr with the netmask to adjust template granularity */
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6)
-		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
-	else
-#endif
-		snet.ip = iph.saddr.ip & svc->netmask;
-
-	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
-		      "mnet %s\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
-		      IP_VS_DBG_ADDR(svc->af, &snet));
-
-	/*
-	 * As far as we know, FTP is a very complicated network protocol, and
-	 * it uses control connection and data connections. For active FTP,
-	 * FTP server initialize data connection to the client, its source port
-	 * is often 20. For passive FTP, FTP server tells the clients the port
-	 * that it passively listens to,  and the client issues the data
-	 * connection. In the tunneling or direct routing mode, the load
-	 * balancer is on the client-to-server half of connection, the port
-	 * number is unknown to the load balancer. So, a conn template like
-	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
-	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
-	 * is created for other persistent services.
-	 */
-	if (ports[1] == svc->port) {
-		/* Check if a template already exists */
-		if (svc->port != FTPPORT)
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, ports[1]);
-		else
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, 0);
-
-		if (!ct || !ip_vs_check_template(ct)) {
-			/*
-			 * No template found or the dest of the connection
-			 * template is not available.
-			 */
-			dest = svc->scheduler->schedule(svc, skb);
-			if (dest == NULL) {
-				IP_VS_DBG(1, "p-schedule: no dest found.\n");
-				return NULL;
-			}
-
-			/*
-			 * Create a template like <protocol,caddr,0,
-			 * vaddr,vport,daddr,dport> for non-ftp service,
-			 * and <protocol,caddr,0,vaddr,0,daddr,0>
-			 * for ftp service.
-			 */
-			if (svc->port != FTPPORT)
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
-						    &snet, 0,
-						    &iph.daddr,
-						    ports[1],
-						    &dest->addr, dest->port,
-						    IP_VS_CONN_F_TEMPLATE,
-						    dest);
-			else
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
-						    &snet, 0,
-						    &iph.daddr, 0,
-						    &dest->addr, 0,
-						    IP_VS_CONN_F_TEMPLATE,
-						    dest);
-			if (ct == NULL)
-				return NULL;
-
-			ct->timeout = svc->timeout;
-		} else {
-			/* set destination with the found template */
-			dest = ct->dest;
-		}
-		dport = dest->port;
-	} else {
-		/*
-		 * Note: persistent fwmark-based services and persistent
-		 * port zero service are handled here.
-		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
-		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
-		 */
-		if (svc->fwmark) {
-			union nf_inet_addr fwmark = {
-				.all = { 0, 0, 0, htonl(svc->fwmark) }
-			};
-
-			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
-					     &fwmark, 0);
-		} else
-			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
-					     &iph.daddr, 0);
-
-		if (!ct || !ip_vs_check_template(ct)) {
-			/*
-			 * If it is not persistent port zero, return NULL,
-			 * otherwise create a connection template.
-			 */
-			if (svc->port)
-				return NULL;
-
-			dest = svc->scheduler->schedule(svc, skb);
-			if (dest == NULL) {
-				IP_VS_DBG(1, "p-schedule: no dest found.\n");
-				return NULL;
-			}
-
-			/*
-			 * Create a template according to the service
-			 */
-			if (svc->fwmark) {
-				union nf_inet_addr fwmark = {
-					.all = { 0, 0, 0, htonl(svc->fwmark) }
-				};
-
-				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
-						    &snet, 0,
-						    &fwmark, 0,
-						    &dest->addr, 0,
-						    IP_VS_CONN_F_TEMPLATE,
-						    dest);
-			} else
-				ct = ip_vs_conn_new(svc->af, iph.protocol,
-						    &snet, 0,
-						    &iph.daddr, 0,
-						    &dest->addr, 0,
-						    IP_VS_CONN_F_TEMPLATE,
-						    dest);
-			if (ct == NULL)
-				return NULL;
-
-			ct->timeout = svc->timeout;
-		} else {
-			/* set destination with the found template */
-			dest = ct->dest;
-		}
-		dport = ports[1];
-	}
-
-	/*
-	 *    Create a new connection according to the template
-	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
-			    &iph.saddr, ports[0],
-			    &iph.daddr, ports[1],
-			    &dest->addr, dport,
-			    0,
-			    dest);
-	if (cp == NULL) {
-		ip_vs_conn_put(ct);
-		return NULL;
-	}
-
-	/*
-	 *    Add its control
-	 */
-	ip_vs_control_add(cp, ct);
-	ip_vs_conn_put(ct);
-
-	ip_vs_conn_stats(cp, svc);
-	return cp;
-}
-
-
-/*
- *  IPVS main scheduling function
- *  It selects a server according to the virtual service, and
- *  creates a connection entry.
- *  Protocols supported: TCP, UDP
- */
-struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_conn *cp = NULL;
-	struct ip_vs_iphdr iph;
-	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
-
-	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
-	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	/*
-	 *    Persistent service
-	 */
-	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-		return ip_vs_sched_persist(svc, skb, pptr);
-
-	/*
-	 *    Non-persistent service
-	 */
-	if (!svc->fwmark && pptr[1] != svc->port) {
-		if (!svc->port)
-			IP_VS_ERR("Schedule: port zero only supported "
-				  "in persistent services, "
-				  "check your ipvs configuration\n");
-		return NULL;
-	}
-
-	dest = svc->scheduler->schedule(svc, skb);
-	if (dest == NULL) {
-		IP_VS_DBG(1, "Schedule: no dest found.\n");
-		return NULL;
-	}
-
-	/*
-	 *    Create a connection entry.
-	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
-			    &iph.saddr, pptr[0],
-			    &iph.daddr, pptr[1],
-			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    0,
-			    dest);
-	if (cp == NULL)
-		return NULL;
-
-	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
-		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
-		      ip_vs_fwd_tag(cp),
-		      IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
-		      IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
-		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
-		      cp->flags, atomic_read(&cp->refcnt));
-
-	ip_vs_conn_stats(cp, svc);
-	return cp;
-}
-
-
-/*
- *  Pass or drop the packet.
- *  Called by ip_vs_in, when the virtual service is available but
- *  no destination is available for a new connection.
- */
-int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
-{
-	__be16 _ports[2], *pptr;
-	struct ip_vs_iphdr iph;
-	int unicast;
-	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
-
-	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
-	if (pptr == NULL) {
-		ip_vs_service_put(svc);
-		return NF_DROP;
-	}
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6)
-		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
-	else
-#endif
-		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
-
-	/* if it is fwmark-based service, the cache_bypass sysctl is up
-	   and the destination is a non-local unicast, then create
-	   a cache_bypass connection entry */
-	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
-		int ret, cs;
-		struct ip_vs_conn *cp;
-		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
-
-		ip_vs_service_put(svc);
-
-		/* create a new connection entry */
-		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
-		cp = ip_vs_conn_new(svc->af, iph.protocol,
-				    &iph.saddr, pptr[0],
-				    &iph.daddr, pptr[1],
-				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS,
-				    NULL);
-		if (cp == NULL)
-			return NF_DROP;
-
-		/* statistics */
-		ip_vs_in_stats(cp, skb);
-
-		/* set state */
-		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
-
-		/* transmit the first SYN packet */
-		ret = cp->packet_xmit(skb, cp, pp);
-		/* do not touch skb anymore */
-
-		atomic_inc(&cp->in_pkts);
-		ip_vs_conn_put(cp);
-		return ret;
-	}
-
-	/*
-	 * When the virtual ftp service is presented, packets destined
-	 * for other services on the VIP may get here (except services
-	 * listed in the ipvs table), pass the packets, because it is
-	 * not ipvs job to decide to drop the packets.
-	 */
-	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
-		ip_vs_service_put(svc);
-		return NF_ACCEPT;
-	}
-
-	ip_vs_service_put(svc);
-
-	/*
-	 * Notify the client that the destination is unreachable, and
-	 * release the socket buffer.
-	 * Since it is in IP layer, the TCP socket is not actually
-	 * created, the TCP RST packet cannot be sent, instead that
-	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
-	 */
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6)
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
-			    skb->dev);
-	else
-#endif
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-
-	return NF_DROP;
-}
-
-
-/*
- *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- *      chain, and is used for VS/NAT.
- *      It detects packets for VS/NAT connections and sends the packets
- *      immediately. This can avoid that iptable_nat mangles the packets
- *      for VS/NAT.
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
-				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
-{
-	if (!skb->ipvs_property)
-		return NF_ACCEPT;
-	/* The packet was sent from IPVS, exit this chain */
-	return NF_STOP;
-}
-
-__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
-{
-	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
-}
-
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-	int err = ip_defrag(skb, user);
-
-	if (!err)
-		ip_send_check(ip_hdr(skb));
-
-	return err;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
-{
-	/* TODO IPv6: Find out what to do here for IPv6 */
-	return 0;
-}
-#endif
-
-/*
- * Packet has been made sufficiently writable in caller
- * - inout: 1=in->out, 0=out->in
- */
-void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
-		    struct ip_vs_conn *cp, int inout)
-{
-	struct iphdr *iph	 = ip_hdr(skb);
-	unsigned int icmp_offset = iph->ihl*4;
-	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
-						      icmp_offset);
-	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
-
-	if (inout) {
-		iph->saddr = cp->vaddr.ip;
-		ip_send_check(iph);
-		ciph->daddr = cp->vaddr.ip;
-		ip_send_check(ciph);
-	} else {
-		iph->daddr = cp->daddr.ip;
-		ip_send_check(iph);
-		ciph->saddr = cp->daddr.ip;
-		ip_send_check(ciph);
-	}
-
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
-		__be16 *ports = (void *)ciph + ciph->ihl*4;
-
-		if (inout)
-			ports[1] = cp->vport;
-		else
-			ports[0] = cp->dport;
-	}
-
-	/* And finally the ICMP checksum */
-	icmph->checksum = 0;
-	icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	if (inout)
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered outgoing ICMP");
-	else
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered incoming ICMP");
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
-		    struct ip_vs_conn *cp, int inout)
-{
-	struct ipv6hdr *iph	 = ipv6_hdr(skb);
-	unsigned int icmp_offset = sizeof(struct ipv6hdr);
-	struct icmp6hdr *icmph	 = (struct icmp6hdr *)(skb_network_header(skb) +
-						      icmp_offset);
-	struct ipv6hdr *ciph	 = (struct ipv6hdr *)(icmph + 1);
-
-	if (inout) {
-		iph->saddr = cp->vaddr.in6;
-		ciph->daddr = cp->vaddr.in6;
-	} else {
-		iph->daddr = cp->daddr.in6;
-		ciph->saddr = cp->daddr.in6;
-	}
-
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
-		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
-
-		if (inout)
-			ports[1] = cp->vport;
-		else
-			ports[0] = cp->dport;
-	}
-
-	/* And finally the ICMP checksum */
-	icmph->icmp6_cksum = 0;
-	/* TODO IPv6: is this correct for ICMPv6? */
-	ip_vs_checksum_complete(skb, icmp_offset);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	if (inout)
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered outgoing ICMPv6");
-	else
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered incoming ICMPv6");
-}
-#endif
-
-/* Handle relevant response ICMP messages - forward to the right
- * destination host. Used for NAT and local client.
- */
-static int handle_response_icmp(int af, struct sk_buff *skb,
-				union nf_inet_addr *snet,
-				__u8 protocol, struct ip_vs_conn *cp,
-				struct ip_vs_protocol *pp,
-				unsigned int offset, unsigned int ihl)
-{
-	unsigned int verdict = NF_DROP;
-
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		IP_VS_ERR("shouldn't reach here, because the box is on the "
-			  "half connection in the tun/dr module.\n");
-	}
-
-	/* Ensure the checksum is correct */
-	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-		/* Failed checksum! */
-		IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
-			      IP_VS_DBG_ADDR(af, snet));
-		goto out;
-	}
-
-	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
-		offset += 2 * sizeof(__u16);
-	if (!skb_make_writable(skb, offset))
-		goto out;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
-	else
-#endif
-		ip_vs_nat_icmp(skb, pp, cp, 1);
-
-	/* do the statistics and put it back */
-	ip_vs_out_stats(cp, skb);
-
-	skb->ipvs_property = 1;
-	verdict = NF_ACCEPT;
-
-out:
-	__ip_vs_conn_put(cp);
-
-	return verdict;
-}
-
-/*
- *	Handle ICMP messages in the inside-to-outside direction (outgoing).
- *	Find any that might be relevant, check against existing connections.
- *	Currently handles error types - unreachable, quench, ttl exceeded.
- */
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
-{
-	struct iphdr *iph;
-	struct icmphdr	_icmph, *ic;
-	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
-	struct ip_vs_iphdr ciph;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
-	unsigned int offset, ihl;
-	union nf_inet_addr snet;
-
-	*related = 1;
-
-	/* reassemble IP fragments */
-	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-			return NF_STOLEN;
-	}
-
-	iph = ip_hdr(skb);
-	offset = ihl = iph->ihl * 4;
-	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-	if (ic == NULL)
-		return NF_DROP;
-
-	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
-		  ic->type, ntohs(icmp_id(ic)),
-		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-
-	/*
-	 * Work through seeing if this is for us.
-	 * These checks are supposed to be in an order that means easy
-	 * things are checked first to speed up processing.... however
-	 * this means that some packets will manage to get a long way
-	 * down this stack and then be rejected, but that's life.
-	 */
-	if ((ic->type != ICMP_DEST_UNREACH) &&
-	    (ic->type != ICMP_SOURCE_QUENCH) &&
-	    (ic->type != ICMP_TIME_EXCEEDED)) {
-		*related = 0;
-		return NF_ACCEPT;
-	}
-
-	/* Now find the contained IP header */
-	offset += sizeof(_icmph);
-	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-	if (cih == NULL)
-		return NF_ACCEPT; /* The packet looks wrong, ignore */
-
-	pp = ip_vs_proto_get(cih->protocol);
-	if (!pp)
-		return NF_ACCEPT;
-
-	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
-		     pp->dont_defrag))
-		return NF_ACCEPT;
-
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
-
-	offset += cih->ihl * 4;
-
-	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
-	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
-	if (!cp)
-		return NF_ACCEPT;
-
-	snet.ip = iph->saddr;
-	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
-				    pp, offset, ihl);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
-{
-	struct ipv6hdr *iph;
-	struct icmp6hdr	_icmph, *ic;
-	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
-					   within the ICMP */
-	struct ip_vs_iphdr ciph;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
-	unsigned int offset;
-	union nf_inet_addr snet;
-
-	*related = 1;
-
-	/* reassemble IP fragments */
-	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
-		if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
-			return NF_STOLEN;
-	}
-
-	iph = ipv6_hdr(skb);
-	offset = sizeof(struct ipv6hdr);
-	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-	if (ic == NULL)
-		return NF_DROP;
-
-	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
-		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
-		  NIP6(iph->saddr), NIP6(iph->daddr));
-
-	/*
-	 * Work through seeing if this is for us.
-	 * These checks are supposed to be in an order that means easy
-	 * things are checked first to speed up processing.... however
-	 * this means that some packets will manage to get a long way
-	 * down this stack and then be rejected, but that's life.
-	 */
-	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
-	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
-	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
-		*related = 0;
-		return NF_ACCEPT;
-	}
-
-	/* Now find the contained IP header */
-	offset += sizeof(_icmph);
-	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-	if (cih == NULL)
-		return NF_ACCEPT; /* The packet looks wrong, ignore */
-
-	pp = ip_vs_proto_get(cih->nexthdr);
-	if (!pp)
-		return NF_ACCEPT;
-
-	/* Is the embedded protocol header present? */
-	/* TODO: we don't support fragmentation at the moment anyways */
-	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
-		return NF_ACCEPT;
-
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
-
-	offset += sizeof(struct ipv6hdr);
-
-	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
-	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
-	if (!cp)
-		return NF_ACCEPT;
-
-	ipv6_addr_copy(&snet.in6, &iph->saddr);
-	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
-				    pp, offset, sizeof(struct ipv6hdr));
-}
-#endif
-
-static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
-{
-	struct tcphdr _tcph, *th;
-
-	th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return 0;
-	return th->rst;
-}
-
-/* Handle response packets: rewrite addresses and send away...
- * Used for NAT and local client.
- */
-static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
-		struct ip_vs_conn *cp, int ihl)
-{
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
-	if (!skb_make_writable(skb, ihl))
-		goto drop;
-
-	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-		goto drop;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
-	else
-#endif
-	{
-		ip_hdr(skb)->saddr = cp->vaddr.ip;
-		ip_send_check(ip_hdr(skb));
-	}
-
-	/* For policy routing, packets originating from this
-	 * machine itself may be routed differently to packets
-	 * passing through.  We want this packet to be routed as
-	 * if it came from this machine itself.  So re-compute
-	 * the routing information.
-	 */
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ip6_route_me_harder(skb) != 0)
-			goto drop;
-	} else
-#endif
-		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto drop;
-
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
-	ip_vs_out_stats(cp, skb);
-	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-	ip_vs_conn_put(cp);
-
-	skb->ipvs_property = 1;
-
-	LeaveFunction(11);
-	return NF_ACCEPT;
-
-drop:
-	ip_vs_conn_put(cp);
-	kfree_skb(skb);
-	return NF_STOLEN;
-}
-
-/*
- *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *	Check if outgoing packet belongs to the established ip_vs_conn.
- */
-static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
-	  const struct net_device *in, const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	struct ip_vs_iphdr iph;
-	struct ip_vs_protocol *pp;
-	struct ip_vs_conn *cp;
-	int af;
-
-	EnterFunction(11);
-
-	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
-
-	if (skb->ipvs_property)
-		return NF_ACCEPT;
-
-	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
-			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
-
-			if (related)
-				return verdict;
-			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-		}
-	} else
-#endif
-		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
-			int related, verdict = ip_vs_out_icmp(skb, &related);
-
-			if (related)
-				return verdict;
-			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-		}
-
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
-		return NF_ACCEPT;
-
-	/* reassemble IP fragments */
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
-			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
-
-			if (related)
-				return verdict;
-
-			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-		}
-	} else
-#endif
-		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
-			     !pp->dont_defrag)) {
-			if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-				return NF_STOLEN;
-
-			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-		}
-
-	/*
-	 * Check if the packet belongs to an existing entry
-	 */
-	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
-
-	if (unlikely(!cp)) {
-		if (sysctl_ip_vs_nat_icmp_send &&
-		    (pp->protocol == IPPROTO_TCP ||
-		     pp->protocol == IPPROTO_UDP)) {
-			__be16 _ports[2], *pptr;
-
-			pptr = skb_header_pointer(skb, iph.len,
-						  sizeof(_ports), _ports);
-			if (pptr == NULL)
-				return NF_ACCEPT;	/* Not for me */
-			if (ip_vs_lookup_real_service(af, iph.protocol,
-						      &iph.saddr,
-						      pptr[0])) {
-				/*
-				 * Notify the real server: there is no
-				 * existing entry if it is not RST
-				 * packet or not TCP packet.
-				 */
-				if (iph.protocol != IPPROTO_TCP
-				    || !is_tcp_reset(skb, iph.len)) {
-#ifdef CONFIG_IP_VS_IPV6
-					if (af == AF_INET6)
-						icmpv6_send(skb,
-							    ICMPV6_DEST_UNREACH,
-							    ICMPV6_PORT_UNREACH,
-							    0, skb->dev);
-					else
-#endif
-						icmp_send(skb,
-							  ICMP_DEST_UNREACH,
-							  ICMP_PORT_UNREACH, 0);
-					return NF_DROP;
-				}
-			}
-		}
-		IP_VS_DBG_PKT(12, pp, skb, 0,
-			      "packet continues traversal as normal");
-		return NF_ACCEPT;
-	}
-
-	return handle_response(af, skb, pp, cp, iph.len);
-}
-
-
-/*
- *	Handle ICMP messages in the outside-to-inside direction (incoming).
- *	Find any that might be relevant, check against existing connections,
- *	forward to the right destination host if relevant.
- *	Currently handles error types - unreachable, quench, ttl exceeded.
- */
-static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
-{
-	struct iphdr *iph;
-	struct icmphdr	_icmph, *ic;
-	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
-	struct ip_vs_iphdr ciph;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
-	unsigned int offset, ihl, verdict;
-	union nf_inet_addr snet;
-
-	*related = 1;
-
-	/* reassemble IP fragments */
-	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
-					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
-			return NF_STOLEN;
-	}
-
-	iph = ip_hdr(skb);
-	offset = ihl = iph->ihl * 4;
-	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-	if (ic == NULL)
-		return NF_DROP;
-
-	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
-		  ic->type, ntohs(icmp_id(ic)),
-		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-
-	/*
-	 * Work through seeing if this is for us.
-	 * These checks are supposed to be in an order that means easy
-	 * things are checked first to speed up processing.... however
-	 * this means that some packets will manage to get a long way
-	 * down this stack and then be rejected, but that's life.
-	 */
-	if ((ic->type != ICMP_DEST_UNREACH) &&
-	    (ic->type != ICMP_SOURCE_QUENCH) &&
-	    (ic->type != ICMP_TIME_EXCEEDED)) {
-		*related = 0;
-		return NF_ACCEPT;
-	}
-
-	/* Now find the contained IP header */
-	offset += sizeof(_icmph);
-	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-	if (cih == NULL)
-		return NF_ACCEPT; /* The packet looks wrong, ignore */
-
-	pp = ip_vs_proto_get(cih->protocol);
-	if (!pp)
-		return NF_ACCEPT;
-
-	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
-		     pp->dont_defrag))
-		return NF_ACCEPT;
-
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
-
-	offset += cih->ihl * 4;
-
-	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
-	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
-	if (!cp) {
-		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
-		if (cp) {
-			snet.ip = iph->saddr;
-			return handle_response_icmp(AF_INET, skb, &snet,
-						    cih->protocol, cp, pp,
-						    offset, ihl);
-		}
-		return NF_ACCEPT;
-	}
-
-	verdict = NF_DROP;
-
-	/* Ensure the checksum is correct */
-	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-		/* Failed checksum! */
-		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
-			  NIPQUAD(iph->saddr));
-		goto out;
-	}
-
-	/* do the statistics and put it back */
-	ip_vs_in_stats(cp, skb);
-	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-		offset += 2 * sizeof(__u16);
-	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
-	/* do not touch skb anymore */
-
-  out:
-	__ip_vs_conn_put(cp);
-
-	return verdict;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static int
-ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
-{
-	struct ipv6hdr *iph;
-	struct icmp6hdr	_icmph, *ic;
-	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
-					   within the ICMP */
-	struct ip_vs_iphdr ciph;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
-	unsigned int offset, verdict;
-	union nf_inet_addr snet;
-
-	*related = 1;
-
-	/* reassemble IP fragments */
-	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
-		if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
-					       IP_DEFRAG_VS_IN :
-					       IP_DEFRAG_VS_FWD))
-			return NF_STOLEN;
-	}
-
-	iph = ipv6_hdr(skb);
-	offset = sizeof(struct ipv6hdr);
-	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-	if (ic == NULL)
-		return NF_DROP;
-
-	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
-		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
-		  NIP6(iph->saddr), NIP6(iph->daddr));
-
-	/*
-	 * Work through seeing if this is for us.
-	 * These checks are supposed to be in an order that means easy
-	 * things are checked first to speed up processing.... however
-	 * this means that some packets will manage to get a long way
-	 * down this stack and then be rejected, but that's life.
-	 */
-	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
-	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
-	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
-		*related = 0;
-		return NF_ACCEPT;
-	}
-
-	/* Now find the contained IP header */
-	offset += sizeof(_icmph);
-	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-	if (cih == NULL)
-		return NF_ACCEPT; /* The packet looks wrong, ignore */
-
-	pp = ip_vs_proto_get(cih->nexthdr);
-	if (!pp)
-		return NF_ACCEPT;
-
-	/* Is the embedded protocol header present? */
-	/* TODO: we don't support fragmentation at the moment anyways */
-	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
-		return NF_ACCEPT;
-
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
-
-	offset += sizeof(struct ipv6hdr);
-
-	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
-	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
-	if (!cp) {
-		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
-		if (cp) {
-			ipv6_addr_copy(&snet.in6, &iph->saddr);
-			return handle_response_icmp(AF_INET6, skb, &snet,
-						    cih->nexthdr,
-						    cp, pp, offset,
-						    sizeof(struct ipv6hdr));
-		}
-		return NF_ACCEPT;
-	}
-
-	verdict = NF_DROP;
-
-	/* do the statistics and put it back */
-	ip_vs_in_stats(cp, skb);
-	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
-		offset += 2 * sizeof(__u16);
-	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
-	/* do not touch skb anymore */
-
-	__ip_vs_conn_put(cp);
-
-	return verdict;
-}
-#endif
-
-
-/*
- *	Check if it's for virtual services, look it up,
- *	and send it on its way...
- */
-static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
-	 const struct net_device *in, const struct net_device *out,
-	 int (*okfn)(struct sk_buff *))
-{
-	struct ip_vs_iphdr iph;
-	struct ip_vs_protocol *pp;
-	struct ip_vs_conn *cp;
-	int ret, restart, af;
-
-	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
-
-	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
-	/*
-	 *	Big tappo: only PACKET_HOST, including loopback for local client
-	 *	Don't handle local packets on IPv6 for now
-	 */
-	if (unlikely(skb->pkt_type != PACKET_HOST)) {
-		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
-			      skb->pkt_type,
-			      iph.protocol,
-			      IP_VS_DBG_ADDR(af, &iph.daddr));
-		return NF_ACCEPT;
-	}
-
-	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
-
-		if (related)
-			return verdict;
-		ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-	}
-
-	/* Protocol supported? */
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
-		return NF_ACCEPT;
-
-	/*
-	 * Check if the packet belongs to an existing connection entry
-	 */
-	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
-
-	if (unlikely(!cp)) {
-		int v;
-
-		/* For local client packets, it could be a response */
-		cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
-		if (cp)
-			return handle_response(af, skb, pp, cp, iph.len);
-
-		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
-			return v;
-	}
-
-	if (unlikely(!cp)) {
-		/* sorry, all this trouble for a no-hit :) */
-		IP_VS_DBG_PKT(12, pp, skb, 0,
-			      "packet continues traversal as normal");
-		return NF_ACCEPT;
-	}
-
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
-
-	/* Check the server status */
-	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		/* the destination server is not available */
-
-		if (sysctl_ip_vs_expire_nodest_conn) {
-			/* try to expire the connection immediately */
-			ip_vs_conn_expire_now(cp);
-		}
-		/* don't restart its timer, and silently
-		   drop the packet. */
-		__ip_vs_conn_put(cp);
-		return NF_DROP;
-	}
-
-	ip_vs_in_stats(cp, skb);
-	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
-	if (cp->packet_xmit)
-		ret = cp->packet_xmit(skb, cp, pp);
-		/* do not touch skb anymore */
-	else {
-		IP_VS_DBG_RL("warning: packet_xmit is null");
-		ret = NF_ACCEPT;
-	}
-
-	/* Increase its packet counter and check if it is needed
-	 * to be synchronized
-	 *
-	 * Sync connection if it is about to close to
-	 * encorage the standby servers to update the connections timeout
-	 */
-	atomic_inc(&cp->in_pkts);
-	if (af == AF_INET &&
-	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
-	    (((cp->protocol != IPPROTO_TCP ||
-	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
-	       == sysctl_ip_vs_sync_threshold[0])) ||
-	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
-	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(cp);
-	cp->old_state = cp->state;
-
-	ip_vs_conn_put(cp);
-	return ret;
-}
-
-
-/*
- *	It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
- *      related packets destined for 0.0.0.0/0.
- *      When fwmark-based virtual service is used, such as transparent
- *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
- *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
- *      and send them to ip_vs_in_icmp.
- */
-static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
-		   const struct net_device *in, const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
-{
-	int r;
-
-	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
-		return NF_ACCEPT;
-
-	return ip_vs_in_icmp(skb, &r, hooknum);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
-		      const struct net_device *in, const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
-{
-	int r;
-
-	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
-		return NF_ACCEPT;
-
-	return ip_vs_in_icmp_v6(skb, &r, hooknum);
-}
-#endif
-
-
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
-	/* After packet filtering, forward packet through VS/DR, VS/TUN,
-	 * or VS/NAT(change destination), so that filtering rules can be
-	 * applied to IPVS. */
-	{
-		.hook		= ip_vs_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_LOCAL_IN,
-		.priority       = 100,
-	},
-	/* After packet filtering, change source only for VS/NAT */
-	{
-		.hook		= ip_vs_out,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 100,
-	},
-	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
-	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
-	{
-		.hook		= ip_vs_forward_icmp,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 99,
-	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP_PRI_NAT_SRC-1,
-	},
-#ifdef CONFIG_IP_VS_IPV6
-	/* After packet filtering, forward packet through VS/DR, VS/TUN,
-	 * or VS/NAT(change destination), so that filtering rules can be
-	 * applied to IPVS. */
-	{
-		.hook		= ip_vs_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_LOCAL_IN,
-		.priority       = 100,
-	},
-	/* After packet filtering, change source only for VS/NAT */
-	{
-		.hook		= ip_vs_out,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 100,
-	},
-	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
-	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
-	{
-		.hook		= ip_vs_forward_icmp_v6,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 99,
-	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP6_PRI_NAT_SRC-1,
-	},
-#endif
-};
-
-
-/*
- *	Initialize IP Virtual Server
- */
-static int __init ip_vs_init(void)
-{
-	int ret;
-
-	ip_vs_estimator_init();
-
-	ret = ip_vs_control_init();
-	if (ret < 0) {
-		IP_VS_ERR("can't setup control.\n");
-		goto cleanup_estimator;
-	}
-
-	ip_vs_protocol_init();
-
-	ret = ip_vs_app_init();
-	if (ret < 0) {
-		IP_VS_ERR("can't setup application helper.\n");
-		goto cleanup_protocol;
-	}
-
-	ret = ip_vs_conn_init();
-	if (ret < 0) {
-		IP_VS_ERR("can't setup connection table.\n");
-		goto cleanup_app;
-	}
-
-	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-	if (ret < 0) {
-		IP_VS_ERR("can't register hooks.\n");
-		goto cleanup_conn;
-	}
-
-	IP_VS_INFO("ipvs loaded.\n");
-	return ret;
-
-  cleanup_conn:
-	ip_vs_conn_cleanup();
-  cleanup_app:
-	ip_vs_app_cleanup();
-  cleanup_protocol:
-	ip_vs_protocol_cleanup();
-	ip_vs_control_cleanup();
-  cleanup_estimator:
-	ip_vs_estimator_cleanup();
-	return ret;
-}
-
-static void __exit ip_vs_cleanup(void)
-{
-	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-	ip_vs_conn_cleanup();
-	ip_vs_app_cleanup();
-	ip_vs_protocol_cleanup();
-	ip_vs_control_cleanup();
-	ip_vs_estimator_cleanup();
-	IP_VS_INFO("ipvs unloaded.\n");
-}
-
-module_init(ip_vs_init);
-module_exit(ip_vs_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
deleted file mode 100644
index 0302cf3e503..00000000000
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ /dev/null
@@ -1,3443 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the NetFilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/swap.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/mutex.h>
-
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#ifdef CONFIG_IP_VS_IPV6
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#endif
-#include <net/route.h>
-#include <net/sock.h>
-#include <net/genetlink.h>
-
-#include <asm/uaccess.h>
-
-#include <net/ip_vs.h>
-
-/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
-static DEFINE_MUTEX(__ip_vs_mutex);
-
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_svc_lock);
-
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
-/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-
-
-#ifdef CONFIG_IP_VS_DEBUG
-static int sysctl_ip_vs_debug_level = 0;
-
-int ip_vs_get_debug_level(void)
-{
-	return sysctl_ip_vs_debug_level;
-}
-#endif
-
-#ifdef CONFIG_IP_VS_IPV6
-/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
-{
-	struct rt6_info *rt;
-	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = *addr,
-				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
-	};
-
-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
-	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
-			return 1;
-
-	return 0;
-}
-#endif
-/*
- *	update_defense_level is called from keventd and from sysctl,
- *	so it needs to protect itself from softirqs
- */
-static void update_defense_level(void)
-{
-	struct sysinfo i;
-	static int old_secure_tcp = 0;
-	int availmem;
-	int nomem;
-	int to_change = -1;
-
-	/* we only count free and buffered memory (in pages) */
-	si_meminfo(&i);
-	availmem = i.freeram + i.bufferram;
-	/* however in linux 2.5 the i.bufferram is total page cache size,
-	   we need adjust it */
-	/* si_swapinfo(&i); */
-	/* availmem = availmem - (i.totalswap - i.freeswap); */
-
-	nomem = (availmem < sysctl_ip_vs_amemthresh);
-
-	local_bh_disable();
-
-	/* drop_entry */
-	spin_lock(&__ip_vs_dropentry_lock);
-	switch (sysctl_ip_vs_drop_entry) {
-	case 0:
-		atomic_set(&ip_vs_dropentry, 0);
-		break;
-	case 1:
-		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
-			sysctl_ip_vs_drop_entry = 2;
-		} else {
-			atomic_set(&ip_vs_dropentry, 0);
-		}
-		break;
-	case 2:
-		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
-		} else {
-			atomic_set(&ip_vs_dropentry, 0);
-			sysctl_ip_vs_drop_entry = 1;
-		};
-		break;
-	case 3:
-		atomic_set(&ip_vs_dropentry, 1);
-		break;
-	}
-	spin_unlock(&__ip_vs_dropentry_lock);
-
-	/* drop_packet */
-	spin_lock(&__ip_vs_droppacket_lock);
-	switch (sysctl_ip_vs_drop_packet) {
-	case 0:
-		ip_vs_drop_rate = 0;
-		break;
-	case 1:
-		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
-			sysctl_ip_vs_drop_packet = 2;
-		} else {
-			ip_vs_drop_rate = 0;
-		}
-		break;
-	case 2:
-		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
-		} else {
-			ip_vs_drop_rate = 0;
-			sysctl_ip_vs_drop_packet = 1;
-		}
-		break;
-	case 3:
-		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
-		break;
-	}
-	spin_unlock(&__ip_vs_droppacket_lock);
-
-	/* secure_tcp */
-	write_lock(&__ip_vs_securetcp_lock);
-	switch (sysctl_ip_vs_secure_tcp) {
-	case 0:
-		if (old_secure_tcp >= 2)
-			to_change = 0;
-		break;
-	case 1:
-		if (nomem) {
-			if (old_secure_tcp < 2)
-				to_change = 1;
-			sysctl_ip_vs_secure_tcp = 2;
-		} else {
-			if (old_secure_tcp >= 2)
-				to_change = 0;
-		}
-		break;
-	case 2:
-		if (nomem) {
-			if (old_secure_tcp < 2)
-				to_change = 1;
-		} else {
-			if (old_secure_tcp >= 2)
-				to_change = 0;
-			sysctl_ip_vs_secure_tcp = 1;
-		}
-		break;
-	case 3:
-		if (old_secure_tcp < 2)
-			to_change = 1;
-		break;
-	}
-	old_secure_tcp = sysctl_ip_vs_secure_tcp;
-	if (to_change >= 0)
-		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-	write_unlock(&__ip_vs_securetcp_lock);
-
-	local_bh_enable();
-}
-
-
-/*
- *	Timer for checking the defense
- */
-#define DEFENSE_TIMER_PERIOD	1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
-
-static void defense_work_handler(struct work_struct *work)
-{
-	update_defense_level();
-	if (atomic_read(&ip_vs_dropentry))
-		ip_vs_random_dropentry();
-
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-}
-
-int
-ip_vs_use_count_inc(void)
-{
-	return try_module_get(THIS_MODULE);
-}
-
-void
-ip_vs_use_count_dec(void)
-{
-	module_put(THIS_MODULE);
-}
-
-
-/*
- *	Hash table: for virtual service lookups
- */
-#define IP_VS_SVC_TAB_BITS 8
-#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
-#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
-
-/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
-/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
-
-/*
- *	Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *	Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *	FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
-
-/*
- *	Returns hash value for virtual service
- */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-		  __be16 port)
-{
-	register unsigned porth = ntohs(port);
-	__be32 addr_fold = addr->ip;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		addr_fold = addr->ip6[0]^addr->ip6[1]^
-			    addr->ip6[2]^addr->ip6[3];
-#endif
-
-	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
-		& IP_VS_SVC_TAB_MASK;
-}
-
-/*
- *	Returns hash value of fwmark for virtual service lookup
- */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
-{
-	return fwmark & IP_VS_SVC_TAB_MASK;
-}
-
-/*
- *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
- *	or in the ip_vs_svc_fwm_table by fwmark.
- *	Should be called with locked tables.
- */
-static int ip_vs_svc_hash(struct ip_vs_service *svc)
-{
-	unsigned hash;
-
-	if (svc->flags & IP_VS_SVC_F_HASHED) {
-		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	if (svc->fwmark == 0) {
-		/*
-		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
-		 */
-		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-					 svc->port);
-		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
-	} else {
-		/*
-		 *  Hash it by fwmark in ip_vs_svc_fwm_table
-		 */
-		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
-		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
-	}
-
-	svc->flags |= IP_VS_SVC_F_HASHED;
-	/* increase its refcnt because it is referenced by the svc table */
-	atomic_inc(&svc->refcnt);
-	return 1;
-}
-
-
-/*
- *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
- *	Should be called with locked tables.
- */
-static int ip_vs_svc_unhash(struct ip_vs_service *svc)
-{
-	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
-			  "called from %p\n", __builtin_return_address(0));
-		return 0;
-	}
-
-	if (svc->fwmark == 0) {
-		/* Remove it from the ip_vs_svc_table table */
-		list_del(&svc->s_list);
-	} else {
-		/* Remove it from the ip_vs_svc_fwm_table table */
-		list_del(&svc->f_list);
-	}
-
-	svc->flags &= ~IP_VS_SVC_F_HASHED;
-	atomic_dec(&svc->refcnt);
-	return 1;
-}
-
-
-/*
- *	Get service by {proto,addr,port} in the service table.
- */
-static inline struct ip_vs_service *
-__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-		    __be16 vport)
-{
-	unsigned hash;
-	struct ip_vs_service *svc;
-
-	/* Check for "full" addressed entries */
-	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
-
-	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
-		if ((svc->af == af)
-		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
-		    && (svc->port == vport)
-		    && (svc->protocol == protocol)) {
-			/* HIT */
-			atomic_inc(&svc->usecnt);
-			return svc;
-		}
-	}
-
-	return NULL;
-}
-
-
-/*
- *	Get service by {fwmark} in the service table.
- */
-static inline struct ip_vs_service *
-__ip_vs_svc_fwm_get(int af, __u32 fwmark)
-{
-	unsigned hash;
-	struct ip_vs_service *svc;
-
-	/* Check for fwmark addressed entries */
-	hash = ip_vs_svc_fwm_hashkey(fwmark);
-
-	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-		if (svc->fwmark == fwmark && svc->af == af) {
-			/* HIT */
-			atomic_inc(&svc->usecnt);
-			return svc;
-		}
-	}
-
-	return NULL;
-}
-
-struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
-		  const union nf_inet_addr *vaddr, __be16 vport)
-{
-	struct ip_vs_service *svc;
-
-	read_lock(&__ip_vs_svc_lock);
-
-	/*
-	 *	Check the table hashed by fwmark first
-	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
-		goto out;
-
-	/*
-	 *	Check the table hashed by <protocol,addr,port>
-	 *	for "full" addressed entries
-	 */
-	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
-
-	if (svc == NULL
-	    && protocol == IPPROTO_TCP
-	    && atomic_read(&ip_vs_ftpsvc_counter)
-	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
-		/*
-		 * Check if ftp service entry exists, the packet
-		 * might belong to FTP data connections.
-		 */
-		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
-	}
-
-	if (svc == NULL
-	    && atomic_read(&ip_vs_nullsvc_counter)) {
-		/*
-		 * Check if the catch-all port (port zero) exists
-		 */
-		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
-	}
-
-  out:
-	read_unlock(&__ip_vs_svc_lock);
-
-	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
-		      fwmark, ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
-		      svc ? "hit" : "not hit");
-
-	return svc;
-}
-
-
-static inline void
-__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-	atomic_inc(&svc->refcnt);
-	dest->svc = svc;
-}
-
-static inline void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
-{
-	struct ip_vs_service *svc = dest->svc;
-
-	dest->svc = NULL;
-	if (atomic_dec_and_test(&svc->refcnt))
-		kfree(svc);
-}
-
-
-/*
- *	Returns hash value for real service
- */
-static inline unsigned ip_vs_rs_hashkey(int af,
-					    const union nf_inet_addr *addr,
-					    __be16 port)
-{
-	register unsigned porth = ntohs(port);
-	__be32 addr_fold = addr->ip;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		addr_fold = addr->ip6[0]^addr->ip6[1]^
-			    addr->ip6[2]^addr->ip6[3];
-#endif
-
-	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
-		& IP_VS_RTAB_MASK;
-}
-
-/*
- *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
- *	should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
-{
-	unsigned hash;
-
-	if (!list_empty(&dest->d_list)) {
-		return 0;
-	}
-
-	/*
-	 *	Hash by proto,addr,port,
-	 *	which are the parameters of the real service.
-	 */
-	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
-
-	list_add(&dest->d_list, &ip_vs_rtable[hash]);
-
-	return 1;
-}
-
-/*
- *	UNhashes ip_vs_dest from ip_vs_rtable.
- *	should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
-{
-	/*
-	 * Remove it from the ip_vs_rtable table.
-	 */
-	if (!list_empty(&dest->d_list)) {
-		list_del(&dest->d_list);
-		INIT_LIST_HEAD(&dest->d_list);
-	}
-
-	return 1;
-}
-
-/*
- *	Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
-			  const union nf_inet_addr *daddr,
-			  __be16 dport)
-{
-	unsigned hash;
-	struct ip_vs_dest *dest;
-
-	/*
-	 *	Check for "full" addressed entries
-	 *	Return the first found entry
-	 */
-	hash = ip_vs_rs_hashkey(af, daddr, dport);
-
-	read_lock(&__ip_vs_rs_lock);
-	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
-		if ((dest->af == af)
-		    && ip_vs_addr_equal(af, &dest->addr, daddr)
-		    && (dest->port == dport)
-		    && ((dest->protocol == protocol) ||
-			dest->vfwmark)) {
-			/* HIT */
-			read_unlock(&__ip_vs_rs_lock);
-			return dest;
-		}
-	}
-	read_unlock(&__ip_vs_rs_lock);
-
-	return NULL;
-}
-
-/*
- *	Lookup destination by {addr,port} in the given service
- */
-static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
-		  __be16 dport)
-{
-	struct ip_vs_dest *dest;
-
-	/*
-	 * Find the destination for the given service
-	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if ((dest->af == svc->af)
-		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
-		    && (dest->port == dport)) {
-			/* HIT */
-			return dest;
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Find destination by {daddr,dport,vaddr,protocol}
- * Cretaed to be used in ip_vs_process_message() in
- * the backup synchronization daemon. It finds the
- * destination to be bound to the received connection
- * on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
- */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
-				   __be16 dport,
-				   const union nf_inet_addr *vaddr,
-				   __be16 vport, __u16 protocol)
-{
-	struct ip_vs_dest *dest;
-	struct ip_vs_service *svc;
-
-	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
-	if (!svc)
-		return NULL;
-	dest = ip_vs_lookup_dest(svc, daddr, dport);
-	if (dest)
-		atomic_inc(&dest->refcnt);
-	ip_vs_service_put(svc);
-	return dest;
-}
-
-/*
- *  Lookup dest by {svc,addr,port} in the destination trash.
- *  The destination trash is used to hold the destinations that are removed
- *  from the service table but are still referenced by some conn entries.
- *  The reason to add the destination trash is when the dest is temporary
- *  down (either by administrator or by monitor program), the dest can be
- *  picked back from the trash, the remaining connections to the dest can
- *  continue, and the counting information of the dest is also useful for
- *  scheduling.
- */
-static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
-		     __be16 dport)
-{
-	struct ip_vs_dest *dest, *nxt;
-
-	/*
-	 * Find the destination in trash
-	 */
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
-			      "dest->refcnt=%d\n",
-			      dest->vfwmark,
-			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
-			      ntohs(dest->port),
-			      atomic_read(&dest->refcnt));
-		if (dest->af == svc->af &&
-		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
-		    dest->port == dport &&
-		    dest->vfwmark == svc->fwmark &&
-		    dest->protocol == svc->protocol &&
-		    (svc->fwmark ||
-		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
-		      dest->vport == svc->port))) {
-			/* HIT */
-			return dest;
-		}
-
-		/*
-		 * Try to purge the destination from trash if not referenced
-		 */
-		if (atomic_read(&dest->refcnt) == 1) {
-			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
-				      "from trash\n",
-				      dest->vfwmark,
-				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
-				      ntohs(dest->port));
-			list_del(&dest->n_list);
-			ip_vs_dst_reset(dest);
-			__ip_vs_unbind_svc(dest);
-			kfree(dest);
-		}
-	}
-
-	return NULL;
-}
-
-
-/*
- *  Clean up all the destinations in the trash
- *  Called by the ip_vs_control_cleanup()
- *
- *  When the ip_vs_control_clearup is activated by ipvs module exit,
- *  the service tables must have been flushed and all the connections
- *  are expired, and the refcnt of each destination in the trash must
- *  be 1, so we simply release them here.
- */
-static void ip_vs_trash_cleanup(void)
-{
-	struct ip_vs_dest *dest, *nxt;
-
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-		list_del(&dest->n_list);
-		ip_vs_dst_reset(dest);
-		__ip_vs_unbind_svc(dest);
-		kfree(dest);
-	}
-}
-
-
-static void
-ip_vs_zero_stats(struct ip_vs_stats *stats)
-{
-	spin_lock_bh(&stats->lock);
-
-	memset(&stats->ustats, 0, sizeof(stats->ustats));
-	ip_vs_zero_estimator(stats);
-
-	spin_unlock_bh(&stats->lock);
-}
-
-/*
- *	Update a destination in the given service
- */
-static void
-__ip_vs_update_dest(struct ip_vs_service *svc,
-		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
-{
-	int conn_flags;
-
-	/* set the weight and the flags */
-	atomic_set(&dest->weight, udest->weight);
-	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
-
-	/* check if local node and update the flags */
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6) {
-		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
-			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-				| IP_VS_CONN_F_LOCALNODE;
-		}
-	} else
-#endif
-		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
-			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-				| IP_VS_CONN_F_LOCALNODE;
-		}
-
-	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
-	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
-		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
-	} else {
-		/*
-		 *    Put the real service in ip_vs_rtable if not present.
-		 *    For now only for NAT!
-		 */
-		write_lock_bh(&__ip_vs_rs_lock);
-		ip_vs_rs_hash(dest);
-		write_unlock_bh(&__ip_vs_rs_lock);
-	}
-	atomic_set(&dest->conn_flags, conn_flags);
-
-	/* bind the service */
-	if (!dest->svc) {
-		__ip_vs_bind_svc(dest, svc);
-	} else {
-		if (dest->svc != svc) {
-			__ip_vs_unbind_svc(dest);
-			ip_vs_zero_stats(&dest->stats);
-			__ip_vs_bind_svc(dest, svc);
-		}
-	}
-
-	/* set the dest status flags */
-	dest->flags |= IP_VS_DEST_F_AVAILABLE;
-
-	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
-		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-	dest->u_threshold = udest->u_threshold;
-	dest->l_threshold = udest->l_threshold;
-}
-
-
-/*
- *	Create a destination for the given service
- */
-static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
-	       struct ip_vs_dest **dest_p)
-{
-	struct ip_vs_dest *dest;
-	unsigned atype;
-
-	EnterFunction(2);
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6) {
-		atype = ipv6_addr_type(&udest->addr.in6);
-		if ((!(atype & IPV6_ADDR_UNICAST) ||
-			atype & IPV6_ADDR_LINKLOCAL) &&
-			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
-			return -EINVAL;
-	} else
-#endif
-	{
-		atype = inet_addr_type(&init_net, udest->addr.ip);
-		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
-			return -EINVAL;
-	}
-
-	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
-	if (dest == NULL) {
-		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
-		return -ENOMEM;
-	}
-
-	dest->af = svc->af;
-	dest->protocol = svc->protocol;
-	dest->vaddr = svc->addr;
-	dest->vport = svc->port;
-	dest->vfwmark = svc->fwmark;
-	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
-	dest->port = udest->port;
-
-	atomic_set(&dest->activeconns, 0);
-	atomic_set(&dest->inactconns, 0);
-	atomic_set(&dest->persistconns, 0);
-	atomic_set(&dest->refcnt, 0);
-
-	INIT_LIST_HEAD(&dest->d_list);
-	spin_lock_init(&dest->dst_lock);
-	spin_lock_init(&dest->stats.lock);
-	__ip_vs_update_dest(svc, dest, udest);
-	ip_vs_new_estimator(&dest->stats);
-
-	*dest_p = dest;
-
-	LeaveFunction(2);
-	return 0;
-}
-
-
-/*
- *	Add a destination into an existing service
- */
-static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
-{
-	struct ip_vs_dest *dest;
-	union nf_inet_addr daddr;
-	__be16 dport = udest->port;
-	int ret;
-
-	EnterFunction(2);
-
-	if (udest->weight < 0) {
-		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
-		return -ERANGE;
-	}
-
-	if (udest->l_threshold > udest->u_threshold) {
-		IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
-			  "upper threshold\n");
-		return -ERANGE;
-	}
-
-	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
-
-	/*
-	 * Check if the dest already exists in the list
-	 */
-	dest = ip_vs_lookup_dest(svc, &daddr, dport);
-
-	if (dest != NULL) {
-		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
-		return -EEXIST;
-	}
-
-	/*
-	 * Check if the dest already exists in the trash and
-	 * is from the same service
-	 */
-	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
-
-	if (dest != NULL) {
-		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
-			      "dest->refcnt=%d, service %u/%s:%u\n",
-			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
-			      atomic_read(&dest->refcnt),
-			      dest->vfwmark,
-			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
-			      ntohs(dest->vport));
-
-		__ip_vs_update_dest(svc, dest, udest);
-
-		/*
-		 * Get the destination from the trash
-		 */
-		list_del(&dest->n_list);
-
-		ip_vs_new_estimator(&dest->stats);
-
-		write_lock_bh(&__ip_vs_svc_lock);
-
-		/*
-		 * Wait until all other svc users go away.
-		 */
-		IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-		list_add(&dest->n_list, &svc->destinations);
-		svc->num_dests++;
-
-		/* call the update_service function of its scheduler */
-		if (svc->scheduler->update_service)
-			svc->scheduler->update_service(svc);
-
-		write_unlock_bh(&__ip_vs_svc_lock);
-		return 0;
-	}
-
-	/*
-	 * Allocate and initialize the dest structure
-	 */
-	ret = ip_vs_new_dest(svc, udest, &dest);
-	if (ret) {
-		return ret;
-	}
-
-	/*
-	 * Add the dest entry into the list
-	 */
-	atomic_inc(&dest->refcnt);
-
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 * Wait until all other svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-	list_add(&dest->n_list, &svc->destinations);
-	svc->num_dests++;
-
-	/* call the update_service function of its scheduler */
-	if (svc->scheduler->update_service)
-		svc->scheduler->update_service(svc);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
-
-	LeaveFunction(2);
-
-	return 0;
-}
-
-
-/*
- *	Edit a destination in the given service
- */
-static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
-{
-	struct ip_vs_dest *dest;
-	union nf_inet_addr daddr;
-	__be16 dport = udest->port;
-
-	EnterFunction(2);
-
-	if (udest->weight < 0) {
-		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
-		return -ERANGE;
-	}
-
-	if (udest->l_threshold > udest->u_threshold) {
-		IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
-			  "upper threshold\n");
-		return -ERANGE;
-	}
-
-	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
-
-	/*
-	 *  Lookup the destination list
-	 */
-	dest = ip_vs_lookup_dest(svc, &daddr, dport);
-
-	if (dest == NULL) {
-		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
-		return -ENOENT;
-	}
-
-	__ip_vs_update_dest(svc, dest, udest);
-
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/* Wait until all other svc users go away */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-	/* call the update_service, because server weight may be changed */
-	if (svc->scheduler->update_service)
-		svc->scheduler->update_service(svc);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
-
-	LeaveFunction(2);
-
-	return 0;
-}
-
-
-/*
- *	Delete a destination (must be already unlinked from the service)
- */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
-{
-	ip_vs_kill_estimator(&dest->stats);
-
-	/*
-	 *  Remove it from the d-linked list with the real services.
-	 */
-	write_lock_bh(&__ip_vs_rs_lock);
-	ip_vs_rs_unhash(dest);
-	write_unlock_bh(&__ip_vs_rs_lock);
-
-	/*
-	 *  Decrease the refcnt of the dest, and free the dest
-	 *  if nobody refers to it (refcnt=0). Otherwise, throw
-	 *  the destination into the trash.
-	 */
-	if (atomic_dec_and_test(&dest->refcnt)) {
-		ip_vs_dst_reset(dest);
-		/* simply decrease svc->refcnt here, let the caller check
-		   and release the service if nobody refers to it.
-		   Only user context can release destination and service,
-		   and only one user context can update virtual service at a
-		   time, so the operation here is OK */
-		atomic_dec(&dest->svc->refcnt);
-		kfree(dest);
-	} else {
-		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
-			      "dest->refcnt=%d\n",
-			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
-			      ntohs(dest->port),
-			      atomic_read(&dest->refcnt));
-		list_add(&dest->n_list, &ip_vs_dest_trash);
-		atomic_inc(&dest->refcnt);
-	}
-}
-
-
-/*
- *	Unlink a destination from the given service
- */
-static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
-				struct ip_vs_dest *dest,
-				int svcupd)
-{
-	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
-
-	/*
-	 *  Remove it from the d-linked destination list.
-	 */
-	list_del(&dest->n_list);
-	svc->num_dests--;
-
-	/*
-	 *  Call the update_service function of its scheduler
-	 */
-	if (svcupd && svc->scheduler->update_service)
-			svc->scheduler->update_service(svc);
-}
-
-
-/*
- *	Delete a destination server in the given service
- */
-static int
-ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
-{
-	struct ip_vs_dest *dest;
-	__be16 dport = udest->port;
-
-	EnterFunction(2);
-
-	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
-
-	if (dest == NULL) {
-		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
-		return -ENOENT;
-	}
-
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 *	Wait until all other svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-	/*
-	 *	Unlink dest from the service
-	 */
-	__ip_vs_unlink_dest(svc, dest, 1);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 *	Delete the destination
-	 */
-	__ip_vs_del_dest(dest);
-
-	LeaveFunction(2);
-
-	return 0;
-}
-
-
-/*
- *	Add a service into the service hash table
- */
-static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
-		  struct ip_vs_service **svc_p)
-{
-	int ret = 0;
-	struct ip_vs_scheduler *sched = NULL;
-	struct ip_vs_service *svc = NULL;
-
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
-	/* Lookup the scheduler by 'u->sched_name' */
-	sched = ip_vs_scheduler_get(u->sched_name);
-	if (sched == NULL) {
-		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-			   u->sched_name);
-		ret = -ENOENT;
-		goto out_mod_dec;
-	}
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6) {
-		if (!sched->supports_ipv6) {
-			ret = -EAFNOSUPPORT;
-			goto out_err;
-		}
-		if ((u->netmask < 1) || (u->netmask > 128)) {
-			ret = -EINVAL;
-			goto out_err;
-		}
-	}
-#endif
-
-	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
-	if (svc == NULL) {
-		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
-		ret = -ENOMEM;
-		goto out_err;
-	}
-
-	/* I'm the first user of the service */
-	atomic_set(&svc->usecnt, 1);
-	atomic_set(&svc->refcnt, 0);
-
-	svc->af = u->af;
-	svc->protocol = u->protocol;
-	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
-	svc->port = u->port;
-	svc->fwmark = u->fwmark;
-	svc->flags = u->flags;
-	svc->timeout = u->timeout * HZ;
-	svc->netmask = u->netmask;
-
-	INIT_LIST_HEAD(&svc->destinations);
-	rwlock_init(&svc->sched_lock);
-	spin_lock_init(&svc->stats.lock);
-
-	/* Bind the scheduler */
-	ret = ip_vs_bind_scheduler(svc, sched);
-	if (ret)
-		goto out_err;
-	sched = NULL;
-
-	/* Update the virtual service counters */
-	if (svc->port == FTPPORT)
-		atomic_inc(&ip_vs_ftpsvc_counter);
-	else if (svc->port == 0)
-		atomic_inc(&ip_vs_nullsvc_counter);
-
-	ip_vs_new_estimator(&svc->stats);
-
-	/* Count only IPv4 services for old get/setsockopt interface */
-	if (svc->af == AF_INET)
-		ip_vs_num_services++;
-
-	/* Hash the service into the service table */
-	write_lock_bh(&__ip_vs_svc_lock);
-	ip_vs_svc_hash(svc);
-	write_unlock_bh(&__ip_vs_svc_lock);
-
-	*svc_p = svc;
-	return 0;
-
-  out_err:
-	if (svc != NULL) {
-		if (svc->scheduler)
-			ip_vs_unbind_scheduler(svc);
-		if (svc->inc) {
-			local_bh_disable();
-			ip_vs_app_inc_put(svc->inc);
-			local_bh_enable();
-		}
-		kfree(svc);
-	}
-	ip_vs_scheduler_put(sched);
-
-  out_mod_dec:
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-
-	return ret;
-}
-
-
-/*
- *	Edit a service and bind it with a new scheduler
- */
-static int
-ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
-{
-	struct ip_vs_scheduler *sched, *old_sched;
-	int ret = 0;
-
-	/*
-	 * Lookup the scheduler, by 'u->sched_name'
-	 */
-	sched = ip_vs_scheduler_get(u->sched_name);
-	if (sched == NULL) {
-		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-			   u->sched_name);
-		return -ENOENT;
-	}
-	old_sched = sched;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6) {
-		if (!sched->supports_ipv6) {
-			ret = -EAFNOSUPPORT;
-			goto out;
-		}
-		if ((u->netmask < 1) || (u->netmask > 128)) {
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-#endif
-
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	/*
-	 * Wait until all other svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-	/*
-	 * Set the flags and timeout value
-	 */
-	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
-	svc->timeout = u->timeout * HZ;
-	svc->netmask = u->netmask;
-
-	old_sched = svc->scheduler;
-	if (sched != old_sched) {
-		/*
-		 * Unbind the old scheduler
-		 */
-		if ((ret = ip_vs_unbind_scheduler(svc))) {
-			old_sched = sched;
-			goto out_unlock;
-		}
-
-		/*
-		 * Bind the new scheduler
-		 */
-		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
-			/*
-			 * If ip_vs_bind_scheduler fails, restore the old
-			 * scheduler.
-			 * The main reason of failure is out of memory.
-			 *
-			 * The question is if the old scheduler can be
-			 * restored all the time. TODO: if it cannot be
-			 * restored some time, we must delete the service,
-			 * otherwise the system may crash.
-			 */
-			ip_vs_bind_scheduler(svc, old_sched);
-			old_sched = sched;
-			goto out_unlock;
-		}
-	}
-
-  out_unlock:
-	write_unlock_bh(&__ip_vs_svc_lock);
-#ifdef CONFIG_IP_VS_IPV6
-  out:
-#endif
-
-	if (old_sched)
-		ip_vs_scheduler_put(old_sched);
-
-	return ret;
-}
-
-
-/*
- *	Delete a service from the service list
- *	- The service must be unlinked, unlocked and not referenced!
- *	- We are called under _bh lock
- */
-static void __ip_vs_del_service(struct ip_vs_service *svc)
-{
-	struct ip_vs_dest *dest, *nxt;
-	struct ip_vs_scheduler *old_sched;
-
-	/* Count only IPv4 services for old get/setsockopt interface */
-	if (svc->af == AF_INET)
-		ip_vs_num_services--;
-
-	ip_vs_kill_estimator(&svc->stats);
-
-	/* Unbind scheduler */
-	old_sched = svc->scheduler;
-	ip_vs_unbind_scheduler(svc);
-	if (old_sched)
-		ip_vs_scheduler_put(old_sched);
-
-	/* Unbind app inc */
-	if (svc->inc) {
-		ip_vs_app_inc_put(svc->inc);
-		svc->inc = NULL;
-	}
-
-	/*
-	 *    Unlink the whole destination list
-	 */
-	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
-		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(dest);
-	}
-
-	/*
-	 *    Update the virtual service counters
-	 */
-	if (svc->port == FTPPORT)
-		atomic_dec(&ip_vs_ftpsvc_counter);
-	else if (svc->port == 0)
-		atomic_dec(&ip_vs_nullsvc_counter);
-
-	/*
-	 *    Free the service if nobody refers to it
-	 */
-	if (atomic_read(&svc->refcnt) == 0)
-		kfree(svc);
-
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-}
-
-/*
- *	Delete a service from the service list
- */
-static int ip_vs_del_service(struct ip_vs_service *svc)
-{
-	if (svc == NULL)
-		return -EEXIST;
-
-	/*
-	 * Unhash it from the service table
-	 */
-	write_lock_bh(&__ip_vs_svc_lock);
-
-	ip_vs_svc_unhash(svc);
-
-	/*
-	 * Wait until all the svc users go away.
-	 */
-	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-	__ip_vs_del_service(svc);
-
-	write_unlock_bh(&__ip_vs_svc_lock);
-
-	return 0;
-}
-
-
-/*
- *	Flush all the virtual services
- */
-static int ip_vs_flush(void)
-{
-	int idx;
-	struct ip_vs_service *svc, *nxt;
-
-	/*
-	 * Flush the service table hashed by <protocol,addr,port>
-	 */
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-			write_lock_bh(&__ip_vs_svc_lock);
-			ip_vs_svc_unhash(svc);
-			/*
-			 * Wait until all the svc users go away.
-			 */
-			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-			__ip_vs_del_service(svc);
-			write_unlock_bh(&__ip_vs_svc_lock);
-		}
-	}
-
-	/*
-	 * Flush the service table hashed by fwmark
-	 */
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt,
-					 &ip_vs_svc_fwm_table[idx], f_list) {
-			write_lock_bh(&__ip_vs_svc_lock);
-			ip_vs_svc_unhash(svc);
-			/*
-			 * Wait until all the svc users go away.
-			 */
-			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-			__ip_vs_del_service(svc);
-			write_unlock_bh(&__ip_vs_svc_lock);
-		}
-	}
-
-	return 0;
-}
-
-
-/*
- *	Zero counters in a service or all services
- */
-static int ip_vs_zero_service(struct ip_vs_service *svc)
-{
-	struct ip_vs_dest *dest;
-
-	write_lock_bh(&__ip_vs_svc_lock);
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		ip_vs_zero_stats(&dest->stats);
-	}
-	ip_vs_zero_stats(&svc->stats);
-	write_unlock_bh(&__ip_vs_svc_lock);
-	return 0;
-}
-
-static int ip_vs_zero_all(void)
-{
-	int idx;
-	struct ip_vs_service *svc;
-
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			ip_vs_zero_service(svc);
-		}
-	}
-
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			ip_vs_zero_service(svc);
-		}
-	}
-
-	ip_vs_zero_stats(&ip_vs_stats);
-	return 0;
-}
-
-
-static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int *valp = table->data;
-	int val = *valp;
-	int rc;
-
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
-	if (write && (*valp != val)) {
-		if ((*valp < 0) || (*valp > 3)) {
-			/* Restore the correct value */
-			*valp = val;
-		} else {
-			update_defense_level();
-		}
-	}
-	return rc;
-}
-
-
-static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int *valp = table->data;
-	int val[2];
-	int rc;
-
-	/* backup the value first */
-	memcpy(val, valp, sizeof(val));
-
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
-	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
-		/* Restore the correct value */
-		memcpy(valp, val, sizeof(val));
-	}
-	return rc;
-}
-
-
-/*
- *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
- */
-
-static struct ctl_table vs_vars[] = {
-	{
-		.procname	= "amemthresh",
-		.data		= &sysctl_ip_vs_amemthresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-#ifdef CONFIG_IP_VS_DEBUG
-	{
-		.procname	= "debug_level",
-		.data		= &sysctl_ip_vs_debug_level,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "am_droprate",
-		.data		= &sysctl_ip_vs_am_droprate,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.procname	= "drop_entry",
-		.data		= &sysctl_ip_vs_drop_entry,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_defense_mode,
-	},
-	{
-		.procname	= "drop_packet",
-		.data		= &sysctl_ip_vs_drop_packet,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_defense_mode,
-	},
-	{
-		.procname	= "secure_tcp",
-		.data		= &sysctl_ip_vs_secure_tcp,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_defense_mode,
-	},
-#if 0
-	{
-		.procname	= "timeout_established",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_synsent",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_synrecv",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_finwait",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_timewait",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_close",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_closewait",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_lastack",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_listen",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_synack",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_udp",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "timeout_icmp",
-		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-#endif
-	{
-		.procname	= "cache_bypass",
-		.data		= &sysctl_ip_vs_cache_bypass,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.procname	= "expire_nodest_conn",
-		.data		= &sysctl_ip_vs_expire_nodest_conn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.procname	= "expire_quiescent_template",
-		.data		= &sysctl_ip_vs_expire_quiescent_template,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.procname	= "sync_threshold",
-		.data		= &sysctl_ip_vs_sync_threshold,
-		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
-		.mode		= 0644,
-		.proc_handler	= &proc_do_sync_threshold,
-	},
-	{
-		.procname	= "nat_icmp_send",
-		.data		= &sysctl_ip_vs_nat_icmp_send,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{ .ctl_name = 0 }
-};
-
-const struct ctl_path net_vs_ctl_path[] = {
-	{ .procname = "net", .ctl_name = CTL_NET, },
-	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
-	{ .procname = "vs", },
-	{ }
-};
-EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-
-static struct ctl_table_header * sysctl_header;
-
-#ifdef CONFIG_PROC_FS
-
-struct ip_vs_iter {
-	struct list_head *table;
-	int bucket;
-};
-
-/*
- *	Write the contents of the VS rule table to a PROCfs file.
- *	(It is kept just for backward compatibility)
- */
-static inline const char *ip_vs_fwd_name(unsigned flags)
-{
-	switch (flags & IP_VS_CONN_F_FWD_MASK) {
-	case IP_VS_CONN_F_LOCALNODE:
-		return "Local";
-	case IP_VS_CONN_F_TUNNEL:
-		return "Tunnel";
-	case IP_VS_CONN_F_DROUTE:
-		return "Route";
-	default:
-		return "Masq";
-	}
-}
-
-
-/* Get the Nth entry in the two lists */
-static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
-{
-	struct ip_vs_iter *iter = seq->private;
-	int idx;
-	struct ip_vs_service *svc;
-
-	/* look in hash by protocol */
-	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			if (pos-- == 0){
-				iter->table = ip_vs_svc_table;
-				iter->bucket = idx;
-				return svc;
-			}
-		}
-	}
-
-	/* keep looking in fwmark */
-	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			if (pos-- == 0) {
-				iter->table = ip_vs_svc_fwm_table;
-				iter->bucket = idx;
-				return svc;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
-__acquires(__ip_vs_svc_lock)
-{
-
-	read_lock_bh(&__ip_vs_svc_lock);
-	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-
-
-static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct list_head *e;
-	struct ip_vs_iter *iter;
-	struct ip_vs_service *svc;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip_vs_info_array(seq,0);
-
-	svc = v;
-	iter = seq->private;
-
-	if (iter->table == ip_vs_svc_table) {
-		/* next service in table hashed by protocol */
-		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
-			return list_entry(e, struct ip_vs_service, s_list);
-
-
-		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
-					    s_list) {
-				return svc;
-			}
-		}
-
-		iter->table = ip_vs_svc_fwm_table;
-		iter->bucket = -1;
-		goto scan_fwmark;
-	}
-
-	/* next service in hashed by fwmark */
-	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
-		return list_entry(e, struct ip_vs_service, f_list);
-
- scan_fwmark:
-	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
-				    f_list)
-			return svc;
-	}
-
-	return NULL;
-}
-
-static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
-__releases(__ip_vs_svc_lock)
-{
-	read_unlock_bh(&__ip_vs_svc_lock);
-}
-
-
-static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq,
-			"IP Virtual Server version %d.%d.%d (size=%d)\n",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
-		seq_puts(seq,
-			 "Prot LocalAddress:Port Scheduler Flags\n");
-		seq_puts(seq,
-			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
-	} else {
-		const struct ip_vs_service *svc = v;
-		const struct ip_vs_iter *iter = seq->private;
-		const struct ip_vs_dest *dest;
-
-		if (iter->table == ip_vs_svc_table) {
-#ifdef CONFIG_IP_VS_IPV6
-			if (svc->af == AF_INET6)
-				seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
-					   ip_vs_proto_name(svc->protocol),
-					   NIP6(svc->addr.in6),
-					   ntohs(svc->port),
-					   svc->scheduler->name);
-			else
-#endif
-				seq_printf(seq, "%s  %08X:%04X %s ",
-					   ip_vs_proto_name(svc->protocol),
-					   ntohl(svc->addr.ip),
-					   ntohs(svc->port),
-					   svc->scheduler->name);
-		} else {
-			seq_printf(seq, "FWM  %08X %s ",
-				   svc->fwmark, svc->scheduler->name);
-		}
-
-		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-			seq_printf(seq, "persistent %d %08X\n",
-				svc->timeout,
-				ntohl(svc->netmask));
-		else
-			seq_putc(seq, '\n');
-
-		list_for_each_entry(dest, &svc->destinations, n_list) {
-#ifdef CONFIG_IP_VS_IPV6
-			if (dest->af == AF_INET6)
-				seq_printf(seq,
-					   "  -> [" NIP6_FMT "]:%04X"
-					   "      %-7s %-6d %-10d %-10d\n",
-					   NIP6(dest->addr.in6),
-					   ntohs(dest->port),
-					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
-					   atomic_read(&dest->weight),
-					   atomic_read(&dest->activeconns),
-					   atomic_read(&dest->inactconns));
-			else
-#endif
-				seq_printf(seq,
-					   "  -> %08X:%04X      "
-					   "%-7s %-6d %-10d %-10d\n",
-					   ntohl(dest->addr.ip),
-					   ntohs(dest->port),
-					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
-					   atomic_read(&dest->weight),
-					   atomic_read(&dest->activeconns),
-					   atomic_read(&dest->inactconns));
-
-		}
-	}
-	return 0;
-}
-
-static const struct seq_operations ip_vs_info_seq_ops = {
-	.start = ip_vs_info_seq_start,
-	.next  = ip_vs_info_seq_next,
-	.stop  = ip_vs_info_seq_stop,
-	.show  = ip_vs_info_seq_show,
-};
-
-static int ip_vs_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &ip_vs_info_seq_ops,
-			sizeof(struct ip_vs_iter));
-}
-
-static const struct file_operations ip_vs_info_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ip_vs_info_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_private,
-};
-
-#endif
-
-struct ip_vs_stats ip_vs_stats = {
-	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
-#ifdef CONFIG_PROC_FS
-static int ip_vs_stats_show(struct seq_file *seq, void *v)
-{
-
-/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
-	seq_puts(seq,
-		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
-	seq_printf(seq,
-		   "   Conns  Packets  Packets            Bytes            Bytes\n");
-
-	spin_lock_bh(&ip_vs_stats.lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-		   (unsigned long long) ip_vs_stats.ustats.inbytes,
-		   (unsigned long long) ip_vs_stats.ustats.outbytes);
-
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
-	seq_puts(seq,
-		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			ip_vs_stats.ustats.cps,
-			ip_vs_stats.ustats.inpps,
-			ip_vs_stats.ustats.outpps,
-			ip_vs_stats.ustats.inbps,
-			ip_vs_stats.ustats.outbps);
-	spin_unlock_bh(&ip_vs_stats.lock);
-
-	return 0;
-}
-
-static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ip_vs_stats_show, NULL);
-}
-
-static const struct file_operations ip_vs_stats_fops = {
-	.owner = THIS_MODULE,
-	.open = ip_vs_stats_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-#endif
-
-/*
- *	Set timeout values for tcp tcpfin udp in the timeout_table.
- */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
-{
-	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
-		  u->tcp_timeout,
-		  u->tcp_fin_timeout,
-		  u->udp_timeout);
-
-#ifdef CONFIG_IP_VS_PROTO_TCP
-	if (u->tcp_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
-			= u->tcp_timeout * HZ;
-	}
-
-	if (u->tcp_fin_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
-			= u->tcp_fin_timeout * HZ;
-	}
-#endif
-
-#ifdef CONFIG_IP_VS_PROTO_UDP
-	if (u->udp_timeout) {
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
-			= u->udp_timeout * HZ;
-	}
-#endif
-	return 0;
-}
-
-
-#define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
-#define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
-#define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
-				 sizeof(struct ip_vs_dest_user))
-#define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
-#define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
-#define MAX_ARG_LEN		SVCDEST_ARG_LEN
-
-static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
-	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
-	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
-	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
-};
-
-static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
-				  struct ip_vs_service_user *usvc_compat)
-{
-	usvc->af		= AF_INET;
-	usvc->protocol		= usvc_compat->protocol;
-	usvc->addr.ip		= usvc_compat->addr;
-	usvc->port		= usvc_compat->port;
-	usvc->fwmark		= usvc_compat->fwmark;
-
-	/* Deep copy of sched_name is not needed here */
-	usvc->sched_name	= usvc_compat->sched_name;
-
-	usvc->flags		= usvc_compat->flags;
-	usvc->timeout		= usvc_compat->timeout;
-	usvc->netmask		= usvc_compat->netmask;
-}
-
-static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
-				   struct ip_vs_dest_user *udest_compat)
-{
-	udest->addr.ip		= udest_compat->addr;
-	udest->port		= udest_compat->port;
-	udest->conn_flags	= udest_compat->conn_flags;
-	udest->weight		= udest_compat->weight;
-	udest->u_threshold	= udest_compat->u_threshold;
-	udest->l_threshold	= udest_compat->l_threshold;
-}
-
-static int
-do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
-{
-	int ret;
-	unsigned char arg[MAX_ARG_LEN];
-	struct ip_vs_service_user *usvc_compat;
-	struct ip_vs_service_user_kern usvc;
-	struct ip_vs_service *svc;
-	struct ip_vs_dest_user *udest_compat;
-	struct ip_vs_dest_user_kern udest;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (len != set_arglen[SET_CMDID(cmd)]) {
-		IP_VS_ERR("set_ctl: len %u != %u\n",
-			  len, set_arglen[SET_CMDID(cmd)]);
-		return -EINVAL;
-	}
-
-	if (copy_from_user(arg, user, len) != 0)
-		return -EFAULT;
-
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
-	if (mutex_lock_interruptible(&__ip_vs_mutex)) {
-		ret = -ERESTARTSYS;
-		goto out_dec;
-	}
-
-	if (cmd == IP_VS_SO_SET_FLUSH) {
-		/* Flush the virtual service */
-		ret = ip_vs_flush();
-		goto out_unlock;
-	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
-		/* Set timeout values for (tcp tcpfin udp) */
-		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
-		goto out_unlock;
-	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
-		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
-		goto out_unlock;
-	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
-		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = stop_sync_thread(dm->state);
-		goto out_unlock;
-	}
-
-	usvc_compat = (struct ip_vs_service_user *)arg;
-	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
-
-	/* We only use the new structs internally, so copy userspace compat
-	 * structs to extended internal versions */
-	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
-	ip_vs_copy_udest_compat(&udest, udest_compat);
-
-	if (cmd == IP_VS_SO_SET_ZERO) {
-		/* if no service address is set, zero counters in all */
-		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-			ret = ip_vs_zero_all();
-			goto out_unlock;
-		}
-	}
-
-	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
-		IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
-			  usvc.protocol, NIPQUAD(usvc.addr.ip),
-			  ntohs(usvc.port), usvc.sched_name);
-		ret = -EFAULT;
-		goto out_unlock;
-	}
-
-	/* Lookup the exact service by <protocol, addr, port> or fwmark */
-	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
-					  &usvc.addr, usvc.port);
-	else
-		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
-
-	if (cmd != IP_VS_SO_SET_ADD
-	    && (svc == NULL || svc->protocol != usvc.protocol)) {
-		ret = -ESRCH;
-		goto out_unlock;
-	}
-
-	switch (cmd) {
-	case IP_VS_SO_SET_ADD:
-		if (svc != NULL)
-			ret = -EEXIST;
-		else
-			ret = ip_vs_add_service(&usvc, &svc);
-		break;
-	case IP_VS_SO_SET_EDIT:
-		ret = ip_vs_edit_service(svc, &usvc);
-		break;
-	case IP_VS_SO_SET_DEL:
-		ret = ip_vs_del_service(svc);
-		if (!ret)
-			goto out_unlock;
-		break;
-	case IP_VS_SO_SET_ZERO:
-		ret = ip_vs_zero_service(svc);
-		break;
-	case IP_VS_SO_SET_ADDDEST:
-		ret = ip_vs_add_dest(svc, &udest);
-		break;
-	case IP_VS_SO_SET_EDITDEST:
-		ret = ip_vs_edit_dest(svc, &udest);
-		break;
-	case IP_VS_SO_SET_DELDEST:
-		ret = ip_vs_del_dest(svc, &udest);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	if (svc)
-		ip_vs_service_put(svc);
-
-  out_unlock:
-	mutex_unlock(&__ip_vs_mutex);
-  out_dec:
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-
-	return ret;
-}
-
-
-static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
-	spin_lock_bh(&src->lock);
-	memcpy(dst, &src->ustats, sizeof(*dst));
-	spin_unlock_bh(&src->lock);
-}
-
-static void
-ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
-{
-	dst->protocol = src->protocol;
-	dst->addr = src->addr.ip;
-	dst->port = src->port;
-	dst->fwmark = src->fwmark;
-	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
-	dst->flags = src->flags;
-	dst->timeout = src->timeout / HZ;
-	dst->netmask = src->netmask;
-	dst->num_dests = src->num_dests;
-	ip_vs_copy_stats(&dst->stats, &src->stats);
-}
-
-static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
-			    struct ip_vs_get_services __user *uptr)
-{
-	int idx, count=0;
-	struct ip_vs_service *svc;
-	struct ip_vs_service_entry entry;
-	int ret = 0;
-
-	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
-				continue;
-
-			if (count >= get->num_services)
-				goto out;
-			memset(&entry, 0, sizeof(entry));
-			ip_vs_copy_service(&entry, svc);
-			if (copy_to_user(&uptr->entrytable[count],
-					 &entry, sizeof(entry))) {
-				ret = -EFAULT;
-				goto out;
-			}
-			count++;
-		}
-	}
-
-	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
-				continue;
-
-			if (count >= get->num_services)
-				goto out;
-			memset(&entry, 0, sizeof(entry));
-			ip_vs_copy_service(&entry, svc);
-			if (copy_to_user(&uptr->entrytable[count],
-					 &entry, sizeof(entry))) {
-				ret = -EFAULT;
-				goto out;
-			}
-			count++;
-		}
-	}
-  out:
-	return ret;
-}
-
-static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
-			 struct ip_vs_get_dests __user *uptr)
-{
-	struct ip_vs_service *svc;
-	union nf_inet_addr addr = { .ip = get->addr };
-	int ret = 0;
-
-	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
-	else
-		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
-					  get->port);
-
-	if (svc) {
-		int count = 0;
-		struct ip_vs_dest *dest;
-		struct ip_vs_dest_entry entry;
-
-		list_for_each_entry(dest, &svc->destinations, n_list) {
-			if (count >= get->num_dests)
-				break;
-
-			entry.addr = dest->addr.ip;
-			entry.port = dest->port;
-			entry.conn_flags = atomic_read(&dest->conn_flags);
-			entry.weight = atomic_read(&dest->weight);
-			entry.u_threshold = dest->u_threshold;
-			entry.l_threshold = dest->l_threshold;
-			entry.activeconns = atomic_read(&dest->activeconns);
-			entry.inactconns = atomic_read(&dest->inactconns);
-			entry.persistconns = atomic_read(&dest->persistconns);
-			ip_vs_copy_stats(&entry.stats, &dest->stats);
-			if (copy_to_user(&uptr->entrytable[count],
-					 &entry, sizeof(entry))) {
-				ret = -EFAULT;
-				break;
-			}
-			count++;
-		}
-		ip_vs_service_put(svc);
-	} else
-		ret = -ESRCH;
-	return ret;
-}
-
-static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
-{
-#ifdef CONFIG_IP_VS_PROTO_TCP
-	u->tcp_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-	u->tcp_fin_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-	u->udp_timeout =
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
-#endif
-}
-
-
-#define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
-#define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
-#define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
-#define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
-#define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
-#define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
-#define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
-
-static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
-	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
-	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
-	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
-	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
-	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
-	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
-	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
-};
-
-static int
-do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-{
-	unsigned char arg[128];
-	int ret = 0;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (*len < get_arglen[GET_CMDID(cmd)]) {
-		IP_VS_ERR("get_ctl: len %u < %u\n",
-			  *len, get_arglen[GET_CMDID(cmd)]);
-		return -EINVAL;
-	}
-
-	if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
-		return -EFAULT;
-
-	if (mutex_lock_interruptible(&__ip_vs_mutex))
-		return -ERESTARTSYS;
-
-	switch (cmd) {
-	case IP_VS_SO_GET_VERSION:
-	{
-		char buf[64];
-
-		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
-		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
-			ret = -EFAULT;
-			goto out;
-		}
-		*len = strlen(buf)+1;
-	}
-	break;
-
-	case IP_VS_SO_GET_INFO:
-	{
-		struct ip_vs_getinfo info;
-		info.version = IP_VS_VERSION_CODE;
-		info.size = IP_VS_CONN_TAB_SIZE;
-		info.num_services = ip_vs_num_services;
-		if (copy_to_user(user, &info, sizeof(info)) != 0)
-			ret = -EFAULT;
-	}
-	break;
-
-	case IP_VS_SO_GET_SERVICES:
-	{
-		struct ip_vs_get_services *get;
-		int size;
-
-		get = (struct ip_vs_get_services *)arg;
-		size = sizeof(*get) +
-			sizeof(struct ip_vs_service_entry) * get->num_services;
-		if (*len != size) {
-			IP_VS_ERR("length: %u != %u\n", *len, size);
-			ret = -EINVAL;
-			goto out;
-		}
-		ret = __ip_vs_get_service_entries(get, user);
-	}
-	break;
-
-	case IP_VS_SO_GET_SERVICE:
-	{
-		struct ip_vs_service_entry *entry;
-		struct ip_vs_service *svc;
-		union nf_inet_addr addr;
-
-		entry = (struct ip_vs_service_entry *)arg;
-		addr.ip = entry->addr;
-		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
-		else
-			svc = __ip_vs_service_get(AF_INET, entry->protocol,
-						  &addr, entry->port);
-		if (svc) {
-			ip_vs_copy_service(entry, svc);
-			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
-				ret = -EFAULT;
-			ip_vs_service_put(svc);
-		} else
-			ret = -ESRCH;
-	}
-	break;
-
-	case IP_VS_SO_GET_DESTS:
-	{
-		struct ip_vs_get_dests *get;
-		int size;
-
-		get = (struct ip_vs_get_dests *)arg;
-		size = sizeof(*get) +
-			sizeof(struct ip_vs_dest_entry) * get->num_dests;
-		if (*len != size) {
-			IP_VS_ERR("length: %u != %u\n", *len, size);
-			ret = -EINVAL;
-			goto out;
-		}
-		ret = __ip_vs_get_dest_entries(get, user);
-	}
-	break;
-
-	case IP_VS_SO_GET_TIMEOUT:
-	{
-		struct ip_vs_timeout_user t;
-
-		__ip_vs_get_timeouts(&t);
-		if (copy_to_user(user, &t, sizeof(t)) != 0)
-			ret = -EFAULT;
-	}
-	break;
-
-	case IP_VS_SO_GET_DAEMON:
-	{
-		struct ip_vs_daemon_user d[2];
-
-		memset(&d, 0, sizeof(d));
-		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
-			d[0].state = IP_VS_STATE_MASTER;
-			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-			d[0].syncid = ip_vs_master_syncid;
-		}
-		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
-			d[1].state = IP_VS_STATE_BACKUP;
-			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-			d[1].syncid = ip_vs_backup_syncid;
-		}
-		if (copy_to_user(user, &d, sizeof(d)) != 0)
-			ret = -EFAULT;
-	}
-	break;
-
-	default:
-		ret = -EINVAL;
-	}
-
-  out:
-	mutex_unlock(&__ip_vs_mutex);
-	return ret;
-}
-
-
-static struct nf_sockopt_ops ip_vs_sockopts = {
-	.pf		= PF_INET,
-	.set_optmin	= IP_VS_BASE_CTL,
-	.set_optmax	= IP_VS_SO_SET_MAX+1,
-	.set		= do_ip_vs_set_ctl,
-	.get_optmin	= IP_VS_BASE_CTL,
-	.get_optmax	= IP_VS_SO_GET_MAX+1,
-	.get		= do_ip_vs_get_ctl,
-	.owner		= THIS_MODULE,
-};
-
-/*
- * Generic Netlink interface
- */
-
-/* IPVS genetlink family */
-static struct genl_family ip_vs_genl_family = {
-	.id		= GENL_ID_GENERATE,
-	.hdrsize	= 0,
-	.name		= IPVS_GENL_NAME,
-	.version	= IPVS_GENL_VERSION,
-	.maxattr	= IPVS_CMD_MAX,
-};
-
-/* Policy used for first-level command attributes */
-static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
-	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
-	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
-	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
-	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
-	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
-	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
-};
-
-/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
-static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
-	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
-	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_IFNAME_MAXLEN },
-	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
-};
-
-/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
-static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
-	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
-	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
-	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
-					    .len = sizeof(union nf_inet_addr) },
-	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
-	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
-	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_SCHEDNAME_MAXLEN },
-	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
-					    .len = sizeof(struct ip_vs_flags) },
-	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
-	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
-	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
-};
-
-/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
-static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
-	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
-					    .len = sizeof(union nf_inet_addr) },
-	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
-	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
-	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
-};
-
-static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
-				 struct ip_vs_stats *stats)
-{
-	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
-	if (!nl_stats)
-		return -EMSGSIZE;
-
-	spin_lock_bh(&stats->lock);
-
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
-
-	spin_unlock_bh(&stats->lock);
-
-	nla_nest_end(skb, nl_stats);
-
-	return 0;
-
-nla_put_failure:
-	spin_unlock_bh(&stats->lock);
-	nla_nest_cancel(skb, nl_stats);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_fill_service(struct sk_buff *skb,
-				   struct ip_vs_service *svc)
-{
-	struct nlattr *nl_service;
-	struct ip_vs_flags flags = { .flags = svc->flags,
-				     .mask = ~0 };
-
-	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
-	if (!nl_service)
-		return -EMSGSIZE;
-
-	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
-
-	if (svc->fwmark) {
-		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
-	} else {
-		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
-		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
-		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
-	}
-
-	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
-	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
-	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
-	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
-
-	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, nl_service);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nl_service);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_service(struct sk_buff *skb,
-				   struct ip_vs_service *svc,
-				   struct netlink_callback *cb)
-{
-	void *hdr;
-
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
-			  &ip_vs_genl_family, NLM_F_MULTI,
-			  IPVS_CMD_NEW_SERVICE);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (ip_vs_genl_fill_service(skb, svc) < 0)
-		goto nla_put_failure;
-
-	return genlmsg_end(skb, hdr);
-
-nla_put_failure:
-	genlmsg_cancel(skb, hdr);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_services(struct sk_buff *skb,
-				    struct netlink_callback *cb)
-{
-	int idx = 0, i;
-	int start = cb->args[0];
-	struct ip_vs_service *svc;
-
-	mutex_lock(&__ip_vs_mutex);
-	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-			if (++idx <= start)
-				continue;
-			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
-				idx--;
-				goto nla_put_failure;
-			}
-		}
-	}
-
-	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
-		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-			if (++idx <= start)
-				continue;
-			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
-				idx--;
-				goto nla_put_failure;
-			}
-		}
-	}
-
-nla_put_failure:
-	mutex_unlock(&__ip_vs_mutex);
-	cb->args[0] = idx;
-
-	return skb->len;
-}
-
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
-				    struct nlattr *nla, int full_entry)
-{
-	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
-	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
-
-	/* Parse mandatory identifying service fields first */
-	if (nla == NULL ||
-	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
-		return -EINVAL;
-
-	nla_af		= attrs[IPVS_SVC_ATTR_AF];
-	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
-	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
-	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
-	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
-
-	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
-		return -EINVAL;
-
-	usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
-	if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
-	if (usvc->af != AF_INET)
-#endif
-		return -EAFNOSUPPORT;
-
-	if (nla_fwmark) {
-		usvc->protocol = IPPROTO_TCP;
-		usvc->fwmark = nla_get_u32(nla_fwmark);
-	} else {
-		usvc->protocol = nla_get_u16(nla_protocol);
-		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
-		usvc->port = nla_get_u16(nla_port);
-		usvc->fwmark = 0;
-	}
-
-	/* If a full entry was requested, check for the additional fields */
-	if (full_entry) {
-		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
-			      *nla_netmask;
-		struct ip_vs_flags flags;
-		struct ip_vs_service *svc;
-
-		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
-		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
-		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
-		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
-
-		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
-			return -EINVAL;
-
-		nla_memcpy(&flags, nla_flags, sizeof(flags));
-
-		/* prefill flags from service if it already exists */
-		if (usvc->fwmark)
-			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
-		else
-			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
-						  &usvc->addr, usvc->port);
-		if (svc) {
-			usvc->flags = svc->flags;
-			ip_vs_service_put(svc);
-		} else
-			usvc->flags = 0;
-
-		/* set new flags from userland */
-		usvc->flags = (usvc->flags & ~flags.mask) |
-			      (flags.flags & flags.mask);
-		usvc->sched_name = nla_data(nla_sched);
-		usvc->timeout = nla_get_u32(nla_timeout);
-		usvc->netmask = nla_get_u32(nla_netmask);
-	}
-
-	return 0;
-}
-
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
-{
-	struct ip_vs_service_user_kern usvc;
-	int ret;
-
-	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
-	if (ret)
-		return ERR_PTR(ret);
-
-	if (usvc.fwmark)
-		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
-	else
-		return __ip_vs_service_get(usvc.af, usvc.protocol,
-					   &usvc.addr, usvc.port);
-}
-
-static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
-{
-	struct nlattr *nl_dest;
-
-	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
-	if (!nl_dest)
-		return -EMSGSIZE;
-
-	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
-	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
-
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
-		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
-		    atomic_read(&dest->activeconns));
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
-		    atomic_read(&dest->inactconns));
-	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
-		    atomic_read(&dest->persistconns));
-
-	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, nl_dest);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nl_dest);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
-				struct netlink_callback *cb)
-{
-	void *hdr;
-
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
-			  &ip_vs_genl_family, NLM_F_MULTI,
-			  IPVS_CMD_NEW_DEST);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (ip_vs_genl_fill_dest(skb, dest) < 0)
-		goto nla_put_failure;
-
-	return genlmsg_end(skb, hdr);
-
-nla_put_failure:
-	genlmsg_cancel(skb, hdr);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_dests(struct sk_buff *skb,
-				 struct netlink_callback *cb)
-{
-	int idx = 0;
-	int start = cb->args[0];
-	struct ip_vs_service *svc;
-	struct ip_vs_dest *dest;
-	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
-
-	mutex_lock(&__ip_vs_mutex);
-
-	/* Try to find the service for which to dump destinations */
-	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
-			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
-		goto out_err;
-
-	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
-	if (IS_ERR(svc) || svc == NULL)
-		goto out_err;
-
-	/* Dump the destinations */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (++idx <= start)
-			continue;
-		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
-			idx--;
-			goto nla_put_failure;
-		}
-	}
-
-nla_put_failure:
-	cb->args[0] = idx;
-	ip_vs_service_put(svc);
-
-out_err:
-	mutex_unlock(&__ip_vs_mutex);
-
-	return skb->len;
-}
-
-static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
-				 struct nlattr *nla, int full_entry)
-{
-	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
-	struct nlattr *nla_addr, *nla_port;
-
-	/* Parse mandatory identifying destination fields first */
-	if (nla == NULL ||
-	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
-		return -EINVAL;
-
-	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
-	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
-
-	if (!(nla_addr && nla_port))
-		return -EINVAL;
-
-	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
-	udest->port = nla_get_u16(nla_port);
-
-	/* If a full entry was requested, check for the additional fields */
-	if (full_entry) {
-		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
-			      *nla_l_thresh;
-
-		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
-		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
-		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
-		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
-
-		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
-			return -EINVAL;
-
-		udest->conn_flags = nla_get_u32(nla_fwd)
-				    & IP_VS_CONN_F_FWD_MASK;
-		udest->weight = nla_get_u32(nla_weight);
-		udest->u_threshold = nla_get_u32(nla_u_thresh);
-		udest->l_threshold = nla_get_u32(nla_l_thresh);
-	}
-
-	return 0;
-}
-
-static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid)
-{
-	struct nlattr *nl_daemon;
-
-	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
-	if (!nl_daemon)
-		return -EMSGSIZE;
-
-	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
-	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
-	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
-
-	nla_nest_end(skb, nl_daemon);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nl_daemon);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid,
-				  struct netlink_callback *cb)
-{
-	void *hdr;
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
-			  &ip_vs_genl_family, NLM_F_MULTI,
-			  IPVS_CMD_NEW_DAEMON);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
-		goto nla_put_failure;
-
-	return genlmsg_end(skb, hdr);
-
-nla_put_failure:
-	genlmsg_cancel(skb, hdr);
-	return -EMSGSIZE;
-}
-
-static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
-				   struct netlink_callback *cb)
-{
-	mutex_lock(&__ip_vs_mutex);
-	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
-		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-					   ip_vs_master_mcast_ifn,
-					   ip_vs_master_syncid, cb) < 0)
-			goto nla_put_failure;
-
-		cb->args[0] = 1;
-	}
-
-	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
-		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-					   ip_vs_backup_mcast_ifn,
-					   ip_vs_backup_syncid, cb) < 0)
-			goto nla_put_failure;
-
-		cb->args[1] = 1;
-	}
-
-nla_put_failure:
-	mutex_unlock(&__ip_vs_mutex);
-
-	return skb->len;
-}
-
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
-{
-	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
-	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
-	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
-		return -EINVAL;
-
-	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
-				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
-				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
-}
-
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
-{
-	if (!attrs[IPVS_DAEMON_ATTR_STATE])
-		return -EINVAL;
-
-	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
-}
-
-static int ip_vs_genl_set_config(struct nlattr **attrs)
-{
-	struct ip_vs_timeout_user t;
-
-	__ip_vs_get_timeouts(&t);
-
-	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
-		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
-
-	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
-		t.tcp_fin_timeout =
-			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
-
-	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
-		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
-
-	return ip_vs_set_timeout(&t);
-}
-
-static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
-{
-	struct ip_vs_service *svc = NULL;
-	struct ip_vs_service_user_kern usvc;
-	struct ip_vs_dest_user_kern udest;
-	int ret = 0, cmd;
-	int need_full_svc = 0, need_full_dest = 0;
-
-	cmd = info->genlhdr->cmd;
-
-	mutex_lock(&__ip_vs_mutex);
-
-	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush();
-		goto out;
-	} else if (cmd == IPVS_CMD_SET_CONFIG) {
-		ret = ip_vs_genl_set_config(info->attrs);
-		goto out;
-	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
-		   cmd == IPVS_CMD_DEL_DAEMON) {
-
-		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
-
-		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
-		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
-				     info->attrs[IPVS_CMD_ATTR_DAEMON],
-				     ip_vs_daemon_policy)) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		if (cmd == IPVS_CMD_NEW_DAEMON)
-			ret = ip_vs_genl_new_daemon(daemon_attrs);
-		else
-			ret = ip_vs_genl_del_daemon(daemon_attrs);
-		goto out;
-	} else if (cmd == IPVS_CMD_ZERO &&
-		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-		ret = ip_vs_zero_all();
-		goto out;
-	}
-
-	/* All following commands require a service argument, so check if we
-	 * received a valid one. We need a full service specification when
-	 * adding / editing a service. Only identifying members otherwise. */
-	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
-		need_full_svc = 1;
-
-	ret = ip_vs_genl_parse_service(&usvc,
-				       info->attrs[IPVS_CMD_ATTR_SERVICE],
-				       need_full_svc);
-	if (ret)
-		goto out;
-
-	/* Lookup the exact service by <protocol, addr, port> or fwmark */
-	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
-					  &usvc.addr, usvc.port);
-	else
-		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
-
-	/* Unless we're adding a new service, the service must already exist */
-	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
-		ret = -ESRCH;
-		goto out;
-	}
-
-	/* Destination commands require a valid destination argument. For
-	 * adding / editing a destination, we need a full destination
-	 * specification. */
-	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
-	    cmd == IPVS_CMD_DEL_DEST) {
-		if (cmd != IPVS_CMD_DEL_DEST)
-			need_full_dest = 1;
-
-		ret = ip_vs_genl_parse_dest(&udest,
-					    info->attrs[IPVS_CMD_ATTR_DEST],
-					    need_full_dest);
-		if (ret)
-			goto out;
-	}
-
-	switch (cmd) {
-	case IPVS_CMD_NEW_SERVICE:
-		if (svc == NULL)
-			ret = ip_vs_add_service(&usvc, &svc);
-		else
-			ret = -EEXIST;
-		break;
-	case IPVS_CMD_SET_SERVICE:
-		ret = ip_vs_edit_service(svc, &usvc);
-		break;
-	case IPVS_CMD_DEL_SERVICE:
-		ret = ip_vs_del_service(svc);
-		break;
-	case IPVS_CMD_NEW_DEST:
-		ret = ip_vs_add_dest(svc, &udest);
-		break;
-	case IPVS_CMD_SET_DEST:
-		ret = ip_vs_edit_dest(svc, &udest);
-		break;
-	case IPVS_CMD_DEL_DEST:
-		ret = ip_vs_del_dest(svc, &udest);
-		break;
-	case IPVS_CMD_ZERO:
-		ret = ip_vs_zero_service(svc);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-out:
-	if (svc)
-		ip_vs_service_put(svc);
-	mutex_unlock(&__ip_vs_mutex);
-
-	return ret;
-}
-
-static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
-{
-	struct sk_buff *msg;
-	void *reply;
-	int ret, cmd, reply_cmd;
-
-	cmd = info->genlhdr->cmd;
-
-	if (cmd == IPVS_CMD_GET_SERVICE)
-		reply_cmd = IPVS_CMD_NEW_SERVICE;
-	else if (cmd == IPVS_CMD_GET_INFO)
-		reply_cmd = IPVS_CMD_SET_INFO;
-	else if (cmd == IPVS_CMD_GET_CONFIG)
-		reply_cmd = IPVS_CMD_SET_CONFIG;
-	else {
-		IP_VS_ERR("unknown Generic Netlink command\n");
-		return -EINVAL;
-	}
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	mutex_lock(&__ip_vs_mutex);
-
-	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
-	if (reply == NULL)
-		goto nla_put_failure;
-
-	switch (cmd) {
-	case IPVS_CMD_GET_SERVICE:
-	{
-		struct ip_vs_service *svc;
-
-		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
-		if (IS_ERR(svc)) {
-			ret = PTR_ERR(svc);
-			goto out_err;
-		} else if (svc) {
-			ret = ip_vs_genl_fill_service(msg, svc);
-			ip_vs_service_put(svc);
-			if (ret)
-				goto nla_put_failure;
-		} else {
-			ret = -ESRCH;
-			goto out_err;
-		}
-
-		break;
-	}
-
-	case IPVS_CMD_GET_CONFIG:
-	{
-		struct ip_vs_timeout_user t;
-
-		__ip_vs_get_timeouts(&t);
-#ifdef CONFIG_IP_VS_PROTO_TCP
-		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
-		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
-			    t.tcp_fin_timeout);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
-#endif
-
-		break;
-	}
-
-	case IPVS_CMD_GET_INFO:
-		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
-		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
-			    IP_VS_CONN_TAB_SIZE);
-		break;
-	}
-
-	genlmsg_end(msg, reply);
-	ret = genlmsg_unicast(msg, info->snd_pid);
-	goto out;
-
-nla_put_failure:
-	IP_VS_ERR("not enough space in Netlink message\n");
-	ret = -EMSGSIZE;
-
-out_err:
-	nlmsg_free(msg);
-out:
-	mutex_unlock(&__ip_vs_mutex);
-
-	return ret;
-}
-
-
-static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
-	{
-		.cmd	= IPVS_CMD_NEW_SERVICE,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_SET_SERVICE,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_DEL_SERVICE,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_GET_SERVICE,
-		.flags	= GENL_ADMIN_PERM,
-		.doit	= ip_vs_genl_get_cmd,
-		.dumpit	= ip_vs_genl_dump_services,
-		.policy	= ip_vs_cmd_policy,
-	},
-	{
-		.cmd	= IPVS_CMD_NEW_DEST,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_SET_DEST,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_DEL_DEST,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_GET_DEST,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.dumpit	= ip_vs_genl_dump_dests,
-	},
-	{
-		.cmd	= IPVS_CMD_NEW_DAEMON,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_DEL_DAEMON,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_GET_DAEMON,
-		.flags	= GENL_ADMIN_PERM,
-		.dumpit	= ip_vs_genl_dump_daemons,
-	},
-	{
-		.cmd	= IPVS_CMD_SET_CONFIG,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_GET_CONFIG,
-		.flags	= GENL_ADMIN_PERM,
-		.doit	= ip_vs_genl_get_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_GET_INFO,
-		.flags	= GENL_ADMIN_PERM,
-		.doit	= ip_vs_genl_get_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_ZERO,
-		.flags	= GENL_ADMIN_PERM,
-		.policy	= ip_vs_cmd_policy,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-	{
-		.cmd	= IPVS_CMD_FLUSH,
-		.flags	= GENL_ADMIN_PERM,
-		.doit	= ip_vs_genl_set_cmd,
-	},
-};
-
-static int __init ip_vs_genl_register(void)
-{
-	int ret, i;
-
-	ret = genl_register_family(&ip_vs_genl_family);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
-		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
-		if (ret)
-			goto err_out;
-	}
-	return 0;
-
-err_out:
-	genl_unregister_family(&ip_vs_genl_family);
-	return ret;
-}
-
-static void ip_vs_genl_unregister(void)
-{
-	genl_unregister_family(&ip_vs_genl_family);
-}
-
-/* End of Generic Netlink interface definitions */
-
-
-int __init ip_vs_control_init(void)
-{
-	int ret;
-	int idx;
-
-	EnterFunction(2);
-
-	ret = nf_register_sockopt(&ip_vs_sockopts);
-	if (ret) {
-		IP_VS_ERR("cannot register sockopt.\n");
-		return ret;
-	}
-
-	ret = ip_vs_genl_register();
-	if (ret) {
-		IP_VS_ERR("cannot register Generic Netlink interface.\n");
-		nf_unregister_sockopt(&ip_vs_sockopts);
-		return ret;
-	}
-
-	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
-	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
-		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
-	}
-	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
-	}
-
-	ip_vs_new_estimator(&ip_vs_stats);
-
-	/* Hook the defense timer */
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
-	LeaveFunction(2);
-	return 0;
-}
-
-
-void ip_vs_control_cleanup(void)
-{
-	EnterFunction(2);
-	ip_vs_trash_cleanup();
-	cancel_rearming_delayed_work(&defense_work);
-	cancel_work_sync(&defense_work.work);
-	ip_vs_kill_estimator(&ip_vs_stats);
-	unregister_sysctl_table(sysctl_header);
-	proc_net_remove(&init_net, "ip_vs_stats");
-	proc_net_remove(&init_net, "ip_vs");
-	ip_vs_genl_unregister();
-	nf_unregister_sockopt(&ip_vs_sockopts);
-	LeaveFunction(2);
-}
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
deleted file mode 100644
index 2eb2860dabb..00000000000
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * ip_vs_est.c: simple rate estimator for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/list.h>
-
-#include <net/ip_vs.h>
-
-/*
-  This code is to estimate rate in a shorter interval (such as 8
-  seconds) for virtual services and real servers. For measure rate in a
-  long interval, it is easy to implement a user level daemon which
-  periodically reads those statistical counters and measure rate.
-
-  Currently, the measurement is activated by slow timer handler. Hope
-  this measurement will not introduce too much load.
-
-  We measure rate during the last 8 seconds every 2 seconds:
-
-    avgrate = avgrate*(1-W) + rate*W
-
-    where W = 2^(-2)
-
-  NOTES.
-
-  * The stored value for average bps is scaled by 2^5, so that maximal
-    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
-
-  * A lot code is taken from net/sched/estimator.c
- */
-
-
-static void estimation_timer(unsigned long arg);
-
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
-
-static void estimation_timer(unsigned long arg)
-{
-	struct ip_vs_estimator *e;
-	struct ip_vs_stats *s;
-	u32 n_conns;
-	u32 n_inpkts, n_outpkts;
-	u64 n_inbytes, n_outbytes;
-	u32 rate;
-
-	spin_lock(&est_lock);
-	list_for_each_entry(e, &est_list, list) {
-		s = container_of(e, struct ip_vs_stats, est);
-
-		spin_lock(&s->lock);
-		n_conns = s->ustats.conns;
-		n_inpkts = s->ustats.inpkts;
-		n_outpkts = s->ustats.outpkts;
-		n_inbytes = s->ustats.inbytes;
-		n_outbytes = s->ustats.outbytes;
-
-		/* scaled by 2^10, but divided 2 seconds */
-		rate = (n_conns - e->last_conns)<<9;
-		e->last_conns = n_conns;
-		e->cps += ((long)rate - (long)e->cps)>>2;
-		s->ustats.cps = (e->cps+0x1FF)>>10;
-
-		rate = (n_inpkts - e->last_inpkts)<<9;
-		e->last_inpkts = n_inpkts;
-		e->inpps += ((long)rate - (long)e->inpps)>>2;
-		s->ustats.inpps = (e->inpps+0x1FF)>>10;
-
-		rate = (n_outpkts - e->last_outpkts)<<9;
-		e->last_outpkts = n_outpkts;
-		e->outpps += ((long)rate - (long)e->outpps)>>2;
-		s->ustats.outpps = (e->outpps+0x1FF)>>10;
-
-		rate = (n_inbytes - e->last_inbytes)<<4;
-		e->last_inbytes = n_inbytes;
-		e->inbps += ((long)rate - (long)e->inbps)>>2;
-		s->ustats.inbps = (e->inbps+0xF)>>5;
-
-		rate = (n_outbytes - e->last_outbytes)<<4;
-		e->last_outbytes = n_outbytes;
-		e->outbps += ((long)rate - (long)e->outbps)>>2;
-		s->ustats.outbps = (e->outbps+0xF)>>5;
-		spin_unlock(&s->lock);
-	}
-	spin_unlock(&est_lock);
-	mod_timer(&est_timer, jiffies + 2*HZ);
-}
-
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
-{
-	struct ip_vs_estimator *est = &stats->est;
-
-	INIT_LIST_HEAD(&est->list);
-
-	est->last_conns = stats->ustats.conns;
-	est->cps = stats->ustats.cps<<10;
-
-	est->last_inpkts = stats->ustats.inpkts;
-	est->inpps = stats->ustats.inpps<<10;
-
-	est->last_outpkts = stats->ustats.outpkts;
-	est->outpps = stats->ustats.outpps<<10;
-
-	est->last_inbytes = stats->ustats.inbytes;
-	est->inbps = stats->ustats.inbps<<5;
-
-	est->last_outbytes = stats->ustats.outbytes;
-	est->outbps = stats->ustats.outbps<<5;
-
-	spin_lock_bh(&est_lock);
-	list_add(&est->list, &est_list);
-	spin_unlock_bh(&est_lock);
-}
-
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
-{
-	struct ip_vs_estimator *est = &stats->est;
-
-	spin_lock_bh(&est_lock);
-	list_del(&est->list);
-	spin_unlock_bh(&est_lock);
-}
-
-void ip_vs_zero_estimator(struct ip_vs_stats *stats)
-{
-	struct ip_vs_estimator *est = &stats->est;
-
-	/* set counters zero, caller must hold the stats->lock lock */
-	est->last_inbytes = 0;
-	est->last_outbytes = 0;
-	est->last_conns = 0;
-	est->last_inpkts = 0;
-	est->last_outpkts = 0;
-	est->cps = 0;
-	est->inpps = 0;
-	est->outpps = 0;
-	est->inbps = 0;
-	est->outbps = 0;
-}
-
-int __init ip_vs_estimator_init(void)
-{
-	mod_timer(&est_timer, jiffies + 2 * HZ);
-	return 0;
-}
-
-void ip_vs_estimator_cleanup(void)
-{
-	del_timer_sync(&est_timer);
-}
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
deleted file mode 100644
index 2e7dbd8b73a..00000000000
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * ip_vs_ftp.c: IPVS ftp application module
- *
- * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * Changes:
- *
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
- * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
- *
- *		IP_MASQ_FTP ftp masquerading module
- *
- * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
- *
- * Author:	Wouter Gadeyne
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <asm/unaligned.h>
-
-#include <net/ip_vs.h>
-
-
-#define SERVER_STRING "227 Entering Passive Mode ("
-#define CLIENT_STRING "PORT "
-
-
-/*
- * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
- * First port is set to the default port.
- */
-static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
-module_param_array(ports, ushort, NULL, 0);
-MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
-
-
-/*	Dummy variable */
-static int ip_vs_ftp_pasv;
-
-
-static int
-ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
-{
-	return 0;
-}
-
-
-static int
-ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
-{
-	return 0;
-}
-
-
-/*
- * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
- * with the "pattern" and terminated with the "term" character.
- * <addr,port> is in network order.
- */
-static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
-				  const char *pattern, size_t plen, char term,
-				  __be32 *addr, __be16 *port,
-				  char **start, char **end)
-{
-	unsigned char p[6];
-	int i = 0;
-
-	if (data_limit - data < plen) {
-		/* check if there is partial match */
-		if (strnicmp(data, pattern, data_limit - data) == 0)
-			return -1;
-		else
-			return 0;
-	}
-
-	if (strnicmp(data, pattern, plen) != 0) {
-		return 0;
-	}
-	*start = data + plen;
-
-	for (data = *start; *data != term; data++) {
-		if (data == data_limit)
-			return -1;
-	}
-	*end = data;
-
-	memset(p, 0, sizeof(p));
-	for (data = *start; data != *end; data++) {
-		if (*data >= '0' && *data <= '9') {
-			p[i] = p[i]*10 + *data - '0';
-		} else if (*data == ',' && i < 5) {
-			i++;
-		} else {
-			/* unexpected character */
-			return -1;
-		}
-	}
-
-	if (i != 5)
-		return -1;
-
-	*addr = get_unaligned((__be32 *)p);
-	*port = get_unaligned((__be16 *)(p + 4));
-	return 1;
-}
-
-
-/*
- * Look at outgoing ftp packets to catch the response to a PASV command
- * from the server (inside-to-outside).
- * When we see one, we build a connection entry with the client address,
- * client port 0 (unknown at the moment), the server address and the
- * server port.  Mark the current connection entry as a control channel
- * of the new entry. All this work is just to make the data connection
- * can be scheduled to the right server later.
- *
- * The outgoing packet should be something like
- *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
- * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
- */
-static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			 struct sk_buff *skb, int *diff)
-{
-	struct iphdr *iph;
-	struct tcphdr *th;
-	char *data, *data_limit;
-	char *start, *end;
-	union nf_inet_addr from;
-	__be16 port;
-	struct ip_vs_conn *n_cp;
-	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
-	unsigned buf_len;
-	int ret;
-
-#ifdef CONFIG_IP_VS_IPV6
-	/* This application helper doesn't work with IPv6 yet,
-	 * so turn this into a no-op for IPv6 packets
-	 */
-	if (cp->af == AF_INET6)
-		return 1;
-#endif
-
-	*diff = 0;
-
-	/* Only useful for established sessions */
-	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
-		return 1;
-
-	/* Linear packets are much easier to deal with. */
-	if (!skb_make_writable(skb, skb->len))
-		return 0;
-
-	if (cp->app_data == &ip_vs_ftp_pasv) {
-		iph = ip_hdr(skb);
-		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
-		data = (char *)th + (th->doff << 2);
-		data_limit = skb_tail_pointer(skb);
-
-		if (ip_vs_ftp_get_addrport(data, data_limit,
-					   SERVER_STRING,
-					   sizeof(SERVER_STRING)-1, ')',
-					   &from.ip, &port,
-					   &start, &end) != 1)
-			return 1;
-
-		IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
-			  "%u.%u.%u.%u:%d detected\n",
-			  NIPQUAD(from.ip), ntohs(port),
-			  NIPQUAD(cp->caddr.ip), 0);
-
-		/*
-		 * Now update or create an connection entry for it
-		 */
-		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
-					  &cp->caddr, 0);
-		if (!n_cp) {
-			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
-					      &cp->caddr, 0,
-					      &cp->vaddr, port,
-					      &from, port,
-					      IP_VS_CONN_F_NO_CPORT,
-					      cp->dest);
-			if (!n_cp)
-				return 0;
-
-			/* add its controller */
-			ip_vs_control_add(n_cp, cp);
-		}
-
-		/*
-		 * Replace the old passive address with the new one
-		 */
-		from.ip = n_cp->vaddr.ip;
-		port = n_cp->vport;
-		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
-			(ntohs(port)>>8)&255, ntohs(port)&255);
-		buf_len = strlen(buf);
-
-		/*
-		 * Calculate required delta-offset to keep TCP happy
-		 */
-		*diff = buf_len - (end-start);
-
-		if (*diff == 0) {
-			/* simply replace it with new passive address */
-			memcpy(start, buf, buf_len);
-			ret = 1;
-		} else {
-			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
-					  end-start, buf, buf_len);
-		}
-
-		cp->app_data = NULL;
-		ip_vs_tcp_conn_listen(n_cp);
-		ip_vs_conn_put(n_cp);
-		return ret;
-	}
-	return 1;
-}
-
-
-/*
- * Look at incoming ftp packets to catch the PASV/PORT command
- * (outside-to-inside).
- *
- * The incoming packet having the PORT command should be something like
- *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
- * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
- * In this case, we create a connection entry using the client address and
- * port, so that the active ftp data connection from the server can reach
- * the client.
- */
-static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			struct sk_buff *skb, int *diff)
-{
-	struct iphdr *iph;
-	struct tcphdr *th;
-	char *data, *data_start, *data_limit;
-	char *start, *end;
-	union nf_inet_addr to;
-	__be16 port;
-	struct ip_vs_conn *n_cp;
-
-#ifdef CONFIG_IP_VS_IPV6
-	/* This application helper doesn't work with IPv6 yet,
-	 * so turn this into a no-op for IPv6 packets
-	 */
-	if (cp->af == AF_INET6)
-		return 1;
-#endif
-
-	/* no diff required for incoming packets */
-	*diff = 0;
-
-	/* Only useful for established sessions */
-	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
-		return 1;
-
-	/* Linear packets are much easier to deal with. */
-	if (!skb_make_writable(skb, skb->len))
-		return 0;
-
-	/*
-	 * Detecting whether it is passive
-	 */
-	iph = ip_hdr(skb);
-	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
-
-	/* Since there may be OPTIONS in the TCP packet and the HLEN is
-	   the length of the header in 32-bit multiples, it is accurate
-	   to calculate data address by th+HLEN*4 */
-	data = data_start = (char *)th + (th->doff << 2);
-	data_limit = skb_tail_pointer(skb);
-
-	while (data <= data_limit - 6) {
-		if (strnicmp(data, "PASV\r\n", 6) == 0) {
-			/* Passive mode on */
-			IP_VS_DBG(7, "got PASV at %td of %td\n",
-				  data - data_start,
-				  data_limit - data_start);
-			cp->app_data = &ip_vs_ftp_pasv;
-			return 1;
-		}
-		data++;
-	}
-
-	/*
-	 * To support virtual FTP server, the scenerio is as follows:
-	 *       FTP client ----> Load Balancer ----> FTP server
-	 * First detect the port number in the application data,
-	 * then create a new connection entry for the coming data
-	 * connection.
-	 */
-	if (ip_vs_ftp_get_addrport(data_start, data_limit,
-				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
-				   '\r', &to.ip, &port,
-				   &start, &end) != 1)
-		return 1;
-
-	IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
-		  NIPQUAD(to.ip), ntohs(port));
-
-	/* Passive mode off */
-	cp->app_data = NULL;
-
-	/*
-	 * Now update or create a connection entry for it
-	 */
-	IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
-		  ip_vs_proto_name(iph->protocol),
-		  NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
-
-	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
-				 &to, port,
-				 &cp->vaddr, htons(ntohs(cp->vport)-1));
-	if (!n_cp) {
-		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
-				      &to, port,
-				      &cp->vaddr, htons(ntohs(cp->vport)-1),
-				      &cp->daddr, htons(ntohs(cp->dport)-1),
-				      0,
-				      cp->dest);
-		if (!n_cp)
-			return 0;
-
-		/* add its controller */
-		ip_vs_control_add(n_cp, cp);
-	}
-
-	/*
-	 *	Move tunnel to listen state
-	 */
-	ip_vs_tcp_conn_listen(n_cp);
-	ip_vs_conn_put(n_cp);
-
-	return 1;
-}
-
-
-static struct ip_vs_app ip_vs_ftp = {
-	.name =		"ftp",
-	.type =		IP_VS_APP_TYPE_FTP,
-	.protocol =	IPPROTO_TCP,
-	.module =	THIS_MODULE,
-	.incs_list =	LIST_HEAD_INIT(ip_vs_ftp.incs_list),
-	.init_conn =	ip_vs_ftp_init_conn,
-	.done_conn =	ip_vs_ftp_done_conn,
-	.bind_conn =	NULL,
-	.unbind_conn =	NULL,
-	.pkt_out =	ip_vs_ftp_out,
-	.pkt_in =	ip_vs_ftp_in,
-};
-
-
-/*
- *	ip_vs_ftp initialization
- */
-static int __init ip_vs_ftp_init(void)
-{
-	int i, ret;
-	struct ip_vs_app *app = &ip_vs_ftp;
-
-	ret = register_ip_vs_app(app);
-	if (ret)
-		return ret;
-
-	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
-		if (!ports[i])
-			continue;
-		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
-		if (ret)
-			break;
-		IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
-			   app->name, i, ports[i]);
-	}
-
-	if (ret)
-		unregister_ip_vs_app(app);
-
-	return ret;
-}
-
-
-/*
- *	ip_vs_ftp finish.
- */
-static void __exit ip_vs_ftp_exit(void)
-{
-	unregister_ip_vs_app(&ip_vs_ftp);
-}
-
-
-module_init(ip_vs_ftp_init);
-module_exit(ip_vs_ftp_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
deleted file mode 100644
index 6ecef3518ca..00000000000
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ /dev/null
@@ -1,555 +0,0 @@
-/*
- * IPVS:        Locality-Based Least-Connection scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Martin Hamilton         :    fixed the terrible locking bugs
- *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
- *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
- *     Wensong Zhang           :    added doing full expiration check to
- *                                   collect stale entries of 24+ hours when
- *                                   no partial expire check in a half hour
- *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
- *                                   to avoid the possible race between timer
- *                                   handler and del_timer thread in SMP
- *
- */
-
-/*
- * The lblc algorithm is as follows (pseudo code):
- *
- *       if cachenode[dest_ip] is null then
- *               n, cachenode[dest_ip] <- {weighted least-conn node};
- *       else
- *               n <- cachenode[dest_ip];
- *               if (n is dead) OR
- *                  (n.conns>n.weight AND
- *                   there is a node m with m.conns<m.weight/2) then
- *                 n, cachenode[dest_ip] <- {weighted least-conn node};
- *
- *       return n;
- *
- * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
- * me to write this module.
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-
-/* for sysctl */
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- *    It is for garbage collection of stale IPVS lblc entries,
- *    when the table is full.
- */
-#define CHECK_EXPIRE_INTERVAL   (60*HZ)
-#define ENTRY_TIMEOUT           (6*60*HZ)
-
-/*
- *    It is for full expiration check.
- *    When there is no partial expiration check (garbage collection)
- *    in a half hour, do a full expiration check to collect stale
- *    entries that haven't been touched for a day.
- */
-#define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
-
-
-/*
- *     for IPVS lblc entry hash table
- */
-#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
-#define CONFIG_IP_VS_LBLC_TAB_BITS      10
-#endif
-#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
-#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
-#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
-
-
-/*
- *      IPVS lblc entry represents an association between destination
- *      IP address and its destination server
- */
-struct ip_vs_lblc_entry {
-	struct list_head        list;
-	__be32                  addr;           /* destination IP address */
-	struct ip_vs_dest       *dest;          /* real server (cache) */
-	unsigned long           lastuse;        /* last used time */
-};
-
-
-/*
- *      IPVS lblc hash table
- */
-struct ip_vs_lblc_table {
-	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
-	atomic_t                entries;        /* number of entries */
-	int                     max_size;       /* maximum size of entries */
-	struct timer_list       periodic_timer; /* collect stale entries */
-	int                     rover;          /* rover for expire check */
-	int                     counter;        /* counter for no expire */
-};
-
-
-/*
- *      IPVS LBLC sysctl table
- */
-
-static ctl_table vs_vars_table[] = {
-	{
-		.procname	= "lblc_expiration",
-		.data		= &sysctl_ip_vs_lblc_expiration,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{ .ctl_name = 0 }
-};
-
-static struct ctl_table_header * sysctl_header;
-
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
-{
-	list_del(&en->list);
-	/*
-	 * We don't kfree dest because it is refered either by its service
-	 * or the trash dest list.
-	 */
-	atomic_dec(&en->dest->refcnt);
-	kfree(en);
-}
-
-
-/*
- *	Returns hash value for IPVS LBLC entry
- */
-static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
-{
-	return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
-}
-
-
-/*
- *	Hash an entry in the ip_vs_lblc_table.
- *	returns bool success.
- */
-static void
-ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
-{
-	unsigned hash = ip_vs_lblc_hashkey(en->addr);
-
-	list_add(&en->list, &tbl->bucket[hash]);
-	atomic_inc(&tbl->entries);
-}
-
-
-/*
- *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read
- *  lock
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
-{
-	unsigned hash = ip_vs_lblc_hashkey(addr);
-	struct ip_vs_lblc_entry *en;
-
-	list_for_each_entry(en, &tbl->bucket[hash], list)
-		if (en->addr == addr)
-			return en;
-
-	return NULL;
-}
-
-
-/*
- * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
- * address to a server. Called under write lock.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
-	       struct ip_vs_dest *dest)
-{
-	struct ip_vs_lblc_entry *en;
-
-	en = ip_vs_lblc_get(tbl, daddr);
-	if (!en) {
-		en = kmalloc(sizeof(*en), GFP_ATOMIC);
-		if (!en) {
-			IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
-			return NULL;
-		}
-
-		en->addr = daddr;
-		en->lastuse = jiffies;
-
-		atomic_inc(&dest->refcnt);
-		en->dest = dest;
-
-		ip_vs_lblc_hash(tbl, en);
-	} else if (en->dest != dest) {
-		atomic_dec(&en->dest->refcnt);
-		atomic_inc(&dest->refcnt);
-		en->dest = dest;
-	}
-
-	return en;
-}
-
-
-/*
- *      Flush all the entries of the specified table.
- */
-static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
-{
-	struct ip_vs_lblc_entry *en, *nxt;
-	int i;
-
-	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
-			ip_vs_lblc_free(en);
-			atomic_dec(&tbl->entries);
-		}
-	}
-}
-
-
-static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
-{
-	struct ip_vs_lblc_table *tbl = svc->sched_data;
-	struct ip_vs_lblc_entry *en, *nxt;
-	unsigned long now = jiffies;
-	int i, j;
-
-	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
-
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_before(now,
-					en->lastuse + sysctl_ip_vs_lblc_expiration))
-				continue;
-
-			ip_vs_lblc_free(en);
-			atomic_dec(&tbl->entries);
-		}
-		write_unlock(&svc->sched_lock);
-	}
-	tbl->rover = j;
-}
-
-
-/*
- *      Periodical timer handler for IPVS lblc table
- *      It is used to collect stale entries when the number of entries
- *      exceeds the maximum size of the table.
- *
- *      Fixme: we probably need more complicated algorithm to collect
- *             entries that have not been used for a long time even
- *             if the number of entries doesn't exceed the maximum size
- *             of the table.
- *      The full expiration check is for this purpose now.
- */
-static void ip_vs_lblc_check_expire(unsigned long data)
-{
-	struct ip_vs_service *svc = (struct ip_vs_service *) data;
-	struct ip_vs_lblc_table *tbl = svc->sched_data;
-	unsigned long now = jiffies;
-	int goal;
-	int i, j;
-	struct ip_vs_lblc_entry *en, *nxt;
-
-	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
-		/* do full expiration check */
-		ip_vs_lblc_full_check(svc);
-		tbl->counter = 1;
-		goto out;
-	}
-
-	if (atomic_read(&tbl->entries) <= tbl->max_size) {
-		tbl->counter++;
-		goto out;
-	}
-
-	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
-	if (goal > tbl->max_size/2)
-		goal = tbl->max_size/2;
-
-	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
-
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
-				continue;
-
-			ip_vs_lblc_free(en);
-			atomic_dec(&tbl->entries);
-			goal--;
-		}
-		write_unlock(&svc->sched_lock);
-		if (goal <= 0)
-			break;
-	}
-	tbl->rover = j;
-
-  out:
-	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
-}
-
-
-static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
-{
-	int i;
-	struct ip_vs_lblc_table *tbl;
-
-	/*
-	 *    Allocate the ip_vs_lblc_table for this service
-	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
-	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
-		return -ENOMEM;
-	}
-	svc->sched_data = tbl;
-	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
-		  "current service\n", sizeof(*tbl));
-
-	/*
-	 *    Initialize the hash buckets
-	 */
-	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-		INIT_LIST_HEAD(&tbl->bucket[i]);
-	}
-	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
-	tbl->rover = 0;
-	tbl->counter = 1;
-
-	/*
-	 *    Hook periodic timer for garbage collection
-	 */
-	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
-			(unsigned long)svc);
-	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
-
-	return 0;
-}
-
-
-static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_lblc_table *tbl = svc->sched_data;
-
-	/* remove periodic timer */
-	del_timer_sync(&tbl->periodic_timer);
-
-	/* got to clean up table entries here */
-	ip_vs_lblc_flush(tbl);
-
-	/* release the table itself */
-	kfree(tbl);
-	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
-		  sizeof(*tbl));
-
-	return 0;
-}
-
-
-static inline struct ip_vs_dest *
-__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
-{
-	struct ip_vs_dest *dest, *least;
-	int loh, doh;
-
-	/*
-	 * We think the overhead of processing active connections is fifty
-	 * times higher than that of inactive connections in average. (This
-	 * fifty times might not be accurate, we will change it later.) We
-	 * use the following formula to estimate the overhead:
-	 *                dest->activeconns*50 + dest->inactconns
-	 * and the load:
-	 *                (dest overhead) / dest->weight
-	 *
-	 * Remember -- no floats in kernel mode!!!
-	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
-	 *                h1/w1 > h2/w2
-	 * if every weight is larger than zero.
-	 *
-	 * The server with weight=0 is quiesced and will not receive any
-	 * new connection.
-	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-		if (atomic_read(&dest->weight) > 0) {
-			least = dest;
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/*
-	 *    Find the destination with the least load.
-	 */
-  nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
-
-	return least;
-}
-
-
-/*
- *   If this destination server is overloaded and there is a less loaded
- *   server, then return true.
- */
-static inline int
-is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
-		struct ip_vs_dest *d;
-
-		list_for_each_entry(d, &svc->destinations, n_list) {
-			if (atomic_read(&d->activeconns)*2
-			    < atomic_read(&d->weight)) {
-				return 1;
-			}
-		}
-	}
-	return 0;
-}
-
-
-/*
- *    Locality-Based (weighted) Least-Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_lblc_table *tbl = svc->sched_data;
-	struct iphdr *iph = ip_hdr(skb);
-	struct ip_vs_dest *dest = NULL;
-	struct ip_vs_lblc_entry *en;
-
-	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
-
-	/* First look in our cache */
-	read_lock(&svc->sched_lock);
-	en = ip_vs_lblc_get(tbl, iph->daddr);
-	if (en) {
-		/* We only hold a read lock, but this is atomic */
-		en->lastuse = jiffies;
-
-		/*
-		 * If the destination is not available, i.e. it's in the trash,
-		 * we must ignore it, as it may be removed from under our feet,
-		 * if someone drops our reference count. Our caller only makes
-		 * sure that destinations, that are not in the trash, are not
-		 * moved to the trash, while we are scheduling. But anyone can
-		 * free up entries from the trash at any time.
-		 */
-
-		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
-			dest = en->dest;
-	}
-	read_unlock(&svc->sched_lock);
-
-	/* If the destination has a weight and is not overloaded, use it */
-	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
-		goto out;
-
-	/* No cache entry or it is invalid, time to schedule */
-	dest = __ip_vs_lblc_schedule(svc, iph);
-	if (!dest) {
-		IP_VS_DBG(1, "no destination available\n");
-		return NULL;
-	}
-
-	/* If we fail to create a cache entry, we'll just use the valid dest */
-	write_lock(&svc->sched_lock);
-	ip_vs_lblc_new(tbl, iph->daddr, dest);
-	write_unlock(&svc->sched_lock);
-
-out:
-	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
-		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr.ip),
-		  ntohs(dest->port));
-
-	return dest;
-}
-
-
-/*
- *      IPVS LBLC Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_lblc_scheduler =
-{
-	.name =			"lblc",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	0,
-#endif
-	.init_service =		ip_vs_lblc_init_svc,
-	.done_service =		ip_vs_lblc_done_svc,
-	.schedule =		ip_vs_lblc_schedule,
-};
-
-
-static int __init ip_vs_lblc_init(void)
-{
-	int ret;
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
-	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
-	if (ret)
-		unregister_sysctl_table(sysctl_header);
-	return ret;
-}
-
-
-static void __exit ip_vs_lblc_cleanup(void)
-{
-	unregister_sysctl_table(sysctl_header);
-	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
-}
-
-
-module_init(ip_vs_lblc_init);
-module_exit(ip_vs_lblc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
deleted file mode 100644
index 1f75ea83bcf..00000000000
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * IPVS:        Locality-Based Least-Connection with Replication scheduler
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Julian Anastasov        :    Added the missing (dest->weight>0)
- *                                  condition in the ip_vs_dest_set_max.
- *
- */
-
-/*
- * The lblc/r algorithm is as follows (pseudo code):
- *
- *       if serverSet[dest_ip] is null then
- *               n, serverSet[dest_ip] <- {weighted least-conn node};
- *       else
- *               n <- {least-conn (alive) node in serverSet[dest_ip]};
- *               if (n is null) OR
- *                  (n.conns>n.weight AND
- *                   there is a node m with m.conns<m.weight/2) then
- *                   n <- {weighted least-conn node};
- *                   add n to serverSet[dest_ip];
- *               if |serverSet[dest_ip]| > 1 AND
- *                   now - serverSet[dest_ip].lastMod > T then
- *                   m <- {most conn node in serverSet[dest_ip]};
- *                   remove m from serverSet[dest_ip];
- *       if serverSet[dest_ip] changed then
- *               serverSet[dest_ip].lastMod <- now;
- *
- *       return n;
- *
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-
-/* for sysctl */
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-#include <net/net_namespace.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- *    It is for garbage collection of stale IPVS lblcr entries,
- *    when the table is full.
- */
-#define CHECK_EXPIRE_INTERVAL   (60*HZ)
-#define ENTRY_TIMEOUT           (6*60*HZ)
-
-/*
- *    It is for full expiration check.
- *    When there is no partial expiration check (garbage collection)
- *    in a half hour, do a full expiration check to collect stale
- *    entries that haven't been touched for a day.
- */
-#define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
-
-/*
- *     for IPVS lblcr entry hash table
- */
-#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
-#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
-#endif
-#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
-#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
-#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
-
-
-/*
- *      IPVS destination set structure and operations
- */
-struct ip_vs_dest_list {
-	struct ip_vs_dest_list  *next;          /* list link */
-	struct ip_vs_dest       *dest;          /* destination server */
-};
-
-struct ip_vs_dest_set {
-	atomic_t                size;           /* set size */
-	unsigned long           lastmod;        /* last modified time */
-	struct ip_vs_dest_list  *list;          /* destination list */
-	rwlock_t	        lock;           /* lock for this list */
-};
-
-
-static struct ip_vs_dest_list *
-ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
-{
-	struct ip_vs_dest_list *e;
-
-	for (e=set->list; e!=NULL; e=e->next) {
-		if (e->dest == dest)
-			/* already existed */
-			return NULL;
-	}
-
-	e = kmalloc(sizeof(*e), GFP_ATOMIC);
-	if (e == NULL) {
-		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
-		return NULL;
-	}
-
-	atomic_inc(&dest->refcnt);
-	e->dest = dest;
-
-	/* link it to the list */
-	e->next = set->list;
-	set->list = e;
-	atomic_inc(&set->size);
-
-	set->lastmod = jiffies;
-	return e;
-}
-
-static void
-ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
-{
-	struct ip_vs_dest_list *e, **ep;
-
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-		if (e->dest == dest) {
-			/* HIT */
-			*ep = e->next;
-			atomic_dec(&set->size);
-			set->lastmod = jiffies;
-			atomic_dec(&e->dest->refcnt);
-			kfree(e);
-			break;
-		}
-		ep = &e->next;
-	}
-}
-
-static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
-{
-	struct ip_vs_dest_list *e, **ep;
-
-	write_lock(&set->lock);
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-		*ep = e->next;
-		/*
-		 * We don't kfree dest because it is refered either
-		 * by its service or by the trash dest list.
-		 */
-		atomic_dec(&e->dest->refcnt);
-		kfree(e);
-	}
-	write_unlock(&set->lock);
-}
-
-/* get weighted least-connection node in the destination set */
-static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
-{
-	register struct ip_vs_dest_list *e;
-	struct ip_vs_dest *dest, *least;
-	int loh, doh;
-
-	if (set == NULL)
-		return NULL;
-
-	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
-		least = e->dest;
-		if (least->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-
-		if ((atomic_read(&least->weight) > 0)
-		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/* find the destination with the weighted least load */
-  nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
-		dest = e->dest;
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
-		if ((loh * atomic_read(&dest->weight) >
-		     doh * atomic_read(&least->weight))
-		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
-	return least;
-}
-
-
-/* get weighted most-connection node in the destination set */
-static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
-{
-	register struct ip_vs_dest_list *e;
-	struct ip_vs_dest *dest, *most;
-	int moh, doh;
-
-	if (set == NULL)
-		return NULL;
-
-	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
-		most = e->dest;
-		if (atomic_read(&most->weight) > 0) {
-			moh = atomic_read(&most->activeconns) * 50
-				+ atomic_read(&most->inactconns);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/* find the destination with the weighted most load */
-  nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
-		dest = e->dest;
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
-		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
-		if ((moh * atomic_read(&dest->weight) <
-		     doh * atomic_read(&most->weight))
-		    && (atomic_read(&dest->weight) > 0)) {
-			most = dest;
-			moh = doh;
-		}
-	}
-
-	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(most->addr.ip), ntohs(most->port),
-		  atomic_read(&most->activeconns),
-		  atomic_read(&most->refcnt),
-		  atomic_read(&most->weight), moh);
-	return most;
-}
-
-
-/*
- *      IPVS lblcr entry represents an association between destination
- *      IP address and its destination server set
- */
-struct ip_vs_lblcr_entry {
-	struct list_head        list;
-	__be32                   addr;           /* destination IP address */
-	struct ip_vs_dest_set   set;            /* destination server set */
-	unsigned long           lastuse;        /* last used time */
-};
-
-
-/*
- *      IPVS lblcr hash table
- */
-struct ip_vs_lblcr_table {
-	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
-	atomic_t                entries;        /* number of entries */
-	int                     max_size;       /* maximum size of entries */
-	struct timer_list       periodic_timer; /* collect stale entries */
-	int                     rover;          /* rover for expire check */
-	int                     counter;        /* counter for no expire */
-};
-
-
-/*
- *      IPVS LBLCR sysctl table
- */
-
-static ctl_table vs_vars_table[] = {
-	{
-		.procname	= "lblcr_expiration",
-		.data		= &sysctl_ip_vs_lblcr_expiration,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{ .ctl_name = 0 }
-};
-
-static struct ctl_table_header * sysctl_header;
-
-static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
-{
-	list_del(&en->list);
-	ip_vs_dest_set_eraseall(&en->set);
-	kfree(en);
-}
-
-
-/*
- *	Returns hash value for IPVS LBLCR entry
- */
-static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
-{
-	return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
-}
-
-
-/*
- *	Hash an entry in the ip_vs_lblcr_table.
- *	returns bool success.
- */
-static void
-ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
-{
-	unsigned hash = ip_vs_lblcr_hashkey(en->addr);
-
-	list_add(&en->list, &tbl->bucket[hash]);
-	atomic_inc(&tbl->entries);
-}
-
-
-/*
- *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
- *  read lock.
- */
-static inline struct ip_vs_lblcr_entry *
-ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
-{
-	unsigned hash = ip_vs_lblcr_hashkey(addr);
-	struct ip_vs_lblcr_entry *en;
-
-	list_for_each_entry(en, &tbl->bucket[hash], list)
-		if (en->addr == addr)
-			return en;
-
-	return NULL;
-}
-
-
-/*
- * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server. Called under write lock.
- */
-static inline struct ip_vs_lblcr_entry *
-ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl,  __be32 daddr,
-		struct ip_vs_dest *dest)
-{
-	struct ip_vs_lblcr_entry *en;
-
-	en = ip_vs_lblcr_get(tbl, daddr);
-	if (!en) {
-		en = kmalloc(sizeof(*en), GFP_ATOMIC);
-		if (!en) {
-			IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
-			return NULL;
-		}
-
-		en->addr = daddr;
-		en->lastuse = jiffies;
-
-		/* initilize its dest set */
-		atomic_set(&(en->set.size), 0);
-		en->set.list = NULL;
-		rwlock_init(&en->set.lock);
-
-		ip_vs_lblcr_hash(tbl, en);
-	}
-
-	write_lock(&en->set.lock);
-	ip_vs_dest_set_insert(&en->set, dest);
-	write_unlock(&en->set.lock);
-
-	return en;
-}
-
-
-/*
- *      Flush all the entries of the specified table.
- */
-static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
-{
-	int i;
-	struct ip_vs_lblcr_entry *en, *nxt;
-
-	/* No locking required, only called during cleanup. */
-	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
-			ip_vs_lblcr_free(en);
-		}
-	}
-}
-
-
-static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
-{
-	struct ip_vs_lblcr_table *tbl = svc->sched_data;
-	unsigned long now = jiffies;
-	int i, j;
-	struct ip_vs_lblcr_entry *en, *nxt;
-
-	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
-
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-				       now))
-				continue;
-
-			ip_vs_lblcr_free(en);
-			atomic_dec(&tbl->entries);
-		}
-		write_unlock(&svc->sched_lock);
-	}
-	tbl->rover = j;
-}
-
-
-/*
- *      Periodical timer handler for IPVS lblcr table
- *      It is used to collect stale entries when the number of entries
- *      exceeds the maximum size of the table.
- *
- *      Fixme: we probably need more complicated algorithm to collect
- *             entries that have not been used for a long time even
- *             if the number of entries doesn't exceed the maximum size
- *             of the table.
- *      The full expiration check is for this purpose now.
- */
-static void ip_vs_lblcr_check_expire(unsigned long data)
-{
-	struct ip_vs_service *svc = (struct ip_vs_service *) data;
-	struct ip_vs_lblcr_table *tbl = svc->sched_data;
-	unsigned long now = jiffies;
-	int goal;
-	int i, j;
-	struct ip_vs_lblcr_entry *en, *nxt;
-
-	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
-		/* do full expiration check */
-		ip_vs_lblcr_full_check(svc);
-		tbl->counter = 1;
-		goto out;
-	}
-
-	if (atomic_read(&tbl->entries) <= tbl->max_size) {
-		tbl->counter++;
-		goto out;
-	}
-
-	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
-	if (goal > tbl->max_size/2)
-		goal = tbl->max_size/2;
-
-	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
-
-		write_lock(&svc->sched_lock);
-		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
-				continue;
-
-			ip_vs_lblcr_free(en);
-			atomic_dec(&tbl->entries);
-			goal--;
-		}
-		write_unlock(&svc->sched_lock);
-		if (goal <= 0)
-			break;
-	}
-	tbl->rover = j;
-
-  out:
-	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
-}
-
-static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
-{
-	int i;
-	struct ip_vs_lblcr_table *tbl;
-
-	/*
-	 *    Allocate the ip_vs_lblcr_table for this service
-	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
-	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
-		return -ENOMEM;
-	}
-	svc->sched_data = tbl;
-	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
-		  "current service\n", sizeof(*tbl));
-
-	/*
-	 *    Initialize the hash buckets
-	 */
-	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-		INIT_LIST_HEAD(&tbl->bucket[i]);
-	}
-	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
-	tbl->rover = 0;
-	tbl->counter = 1;
-
-	/*
-	 *    Hook periodic timer for garbage collection
-	 */
-	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
-			(unsigned long)svc);
-	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
-
-	return 0;
-}
-
-
-static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_lblcr_table *tbl = svc->sched_data;
-
-	/* remove periodic timer */
-	del_timer_sync(&tbl->periodic_timer);
-
-	/* got to clean up table entries here */
-	ip_vs_lblcr_flush(tbl);
-
-	/* release the table itself */
-	kfree(tbl);
-	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
-		  sizeof(*tbl));
-
-	return 0;
-}
-
-
-static inline struct ip_vs_dest *
-__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
-{
-	struct ip_vs_dest *dest, *least;
-	int loh, doh;
-
-	/*
-	 * We think the overhead of processing active connections is fifty
-	 * times higher than that of inactive connections in average. (This
-	 * fifty times might not be accurate, we will change it later.) We
-	 * use the following formula to estimate the overhead:
-	 *                dest->activeconns*50 + dest->inactconns
-	 * and the load:
-	 *                (dest overhead) / dest->weight
-	 *
-	 * Remember -- no floats in kernel mode!!!
-	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
-	 *                h1/w1 > h2/w2
-	 * if every weight is larger than zero.
-	 *
-	 * The server with weight=0 is quiesced and will not receive any
-	 * new connection.
-	 */
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-
-		if (atomic_read(&dest->weight) > 0) {
-			least = dest;
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/*
-	 *    Find the destination with the least load.
-	 */
-  nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
-		  "activeconns %d refcnt %d weight %d overhead %d\n",
-		  NIPQUAD(least->addr.ip), ntohs(least->port),
-		  atomic_read(&least->activeconns),
-		  atomic_read(&least->refcnt),
-		  atomic_read(&least->weight), loh);
-
-	return least;
-}
-
-
-/*
- *   If this destination server is overloaded and there is a less loaded
- *   server, then return true.
- */
-static inline int
-is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
-		struct ip_vs_dest *d;
-
-		list_for_each_entry(d, &svc->destinations, n_list) {
-			if (atomic_read(&d->activeconns)*2
-			    < atomic_read(&d->weight)) {
-				return 1;
-			}
-		}
-	}
-	return 0;
-}
-
-
-/*
- *    Locality-Based (weighted) Least-Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_lblcr_table *tbl = svc->sched_data;
-	struct iphdr *iph = ip_hdr(skb);
-	struct ip_vs_dest *dest = NULL;
-	struct ip_vs_lblcr_entry *en;
-
-	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
-
-	/* First look in our cache */
-	read_lock(&svc->sched_lock);
-	en = ip_vs_lblcr_get(tbl, iph->daddr);
-	if (en) {
-		/* We only hold a read lock, but this is atomic */
-		en->lastuse = jiffies;
-
-		/* Get the least loaded destination */
-		read_lock(&en->set.lock);
-		dest = ip_vs_dest_set_min(&en->set);
-		read_unlock(&en->set.lock);
-
-		/* More than one destination + enough time passed by, cleanup */
-		if (atomic_read(&en->set.size) > 1 &&
-				time_after(jiffies, en->set.lastmod +
-				sysctl_ip_vs_lblcr_expiration)) {
-			struct ip_vs_dest *m;
-
-			write_lock(&en->set.lock);
-			m = ip_vs_dest_set_max(&en->set);
-			if (m)
-				ip_vs_dest_set_erase(&en->set, m);
-			write_unlock(&en->set.lock);
-		}
-
-		/* If the destination is not overloaded, use it */
-		if (dest && !is_overloaded(dest, svc)) {
-			read_unlock(&svc->sched_lock);
-			goto out;
-		}
-
-		/* The cache entry is invalid, time to schedule */
-		dest = __ip_vs_lblcr_schedule(svc, iph);
-		if (!dest) {
-			IP_VS_DBG(1, "no destination available\n");
-			read_unlock(&svc->sched_lock);
-			return NULL;
-		}
-
-		/* Update our cache entry */
-		write_lock(&en->set.lock);
-		ip_vs_dest_set_insert(&en->set, dest);
-		write_unlock(&en->set.lock);
-	}
-	read_unlock(&svc->sched_lock);
-
-	if (dest)
-		goto out;
-
-	/* No cache entry, time to schedule */
-	dest = __ip_vs_lblcr_schedule(svc, iph);
-	if (!dest) {
-		IP_VS_DBG(1, "no destination available\n");
-		return NULL;
-	}
-
-	/* If we fail to create a cache entry, we'll just use the valid dest */
-	write_lock(&svc->sched_lock);
-	ip_vs_lblcr_new(tbl, iph->daddr, dest);
-	write_unlock(&svc->sched_lock);
-
-out:
-	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
-		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr.ip),
-		  ntohs(dest->port));
-
-	return dest;
-}
-
-
-/*
- *      IPVS LBLCR Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
-{
-	.name =			"lblcr",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	0,
-#endif
-	.init_service =		ip_vs_lblcr_init_svc,
-	.done_service =		ip_vs_lblcr_done_svc,
-	.schedule =		ip_vs_lblcr_schedule,
-};
-
-
-static int __init ip_vs_lblcr_init(void)
-{
-	int ret;
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
-	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-	if (ret)
-		unregister_sysctl_table(sysctl_header);
-	return ret;
-}
-
-
-static void __exit ip_vs_lblcr_cleanup(void)
-{
-	unregister_sysctl_table(sysctl_header);
-	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-}
-
-
-module_init(ip_vs_lblcr_init);
-module_exit(ip_vs_lblcr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
deleted file mode 100644
index b69f808ac46..00000000000
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * IPVS:        Least-Connection Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     added the ip_vs_lc_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <net/ip_vs.h>
-
-
-static inline unsigned int
-ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We think the overhead of processing active connections is 256
-	 * times higher than that of inactive connections in average. (This
-	 * 256 times might not be accurate, we will change it later) We
-	 * use the following formula to estimate the overhead now:
-	 *		  dest->activeconns*256 + dest->inactconns
-	 */
-	return (atomic_read(&dest->activeconns) << 8) +
-		atomic_read(&dest->inactconns);
-}
-
-
-/*
- *	Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest, *least = NULL;
-	unsigned int loh = 0, doh;
-
-	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
-
-	/*
-	 * Simply select the server with the least number of
-	 *        (activeconns<<5) + inactconns
-	 * Except whose weight is equal to zero.
-	 * If the weight is equal to zero, it means that the server is
-	 * quiesced, the existing connections to the server still get
-	 * served, but no new connection is assigned to the server.
-	 */
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
-		    atomic_read(&dest->weight) == 0)
-			continue;
-		doh = ip_vs_lc_dest_overhead(dest);
-		if (!least || doh < loh) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	if (least)
-	IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
-		      atomic_read(&least->activeconns),
-		      atomic_read(&least->inactconns));
-
-	return least;
-}
-
-
-static struct ip_vs_scheduler ip_vs_lc_scheduler = {
-	.name =			"lc",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.schedule =		ip_vs_lc_schedule,
-};
-
-
-static int __init ip_vs_lc_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
-}
-
-static void __exit ip_vs_lc_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
-}
-
-module_init(ip_vs_lc_init);
-module_exit(ip_vs_lc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
deleted file mode 100644
index 9a2d8033f08..00000000000
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * IPVS:        Never Queue scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-/*
- * The NQ algorithm adopts a two-speed model. When there is an idle server
- * available, the job will be sent to the idle server, instead of waiting
- * for a fast one. When there is no idle server available, the job will be
- * sent to the server that minimize its expected delay (The Shortest
- * Expected Delay scheduling algorithm).
- *
- * See the following paper for more information:
- * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
- * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
- * pages 986-994, 1988.
- *
- * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
- *
- * The difference between NQ and SED is that NQ can improve overall
- * system utilization.
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <net/ip_vs.h>
-
-
-static inline unsigned int
-ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We only use the active connection number in the cost
-	 * calculation here.
-	 */
-	return atomic_read(&dest->activeconns) + 1;
-}
-
-
-/*
- *	Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest, *least = NULL;
-	unsigned int loh = 0, doh;
-
-	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
-
-	/*
-	 * We calculate the load of each dest server as follows:
-	 *	(server expected overhead) / dest->weight
-	 *
-	 * Remember -- no floats in kernel mode!!!
-	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
-	 *		  h1/w1 > h2/w2
-	 * if every weight is larger than zero.
-	 *
-	 * The server with weight=0 is quiesced and will not receive any
-	 * new connections.
-	 */
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
-		    !atomic_read(&dest->weight))
-			continue;
-
-		doh = ip_vs_nq_dest_overhead(dest);
-
-		/* return the server directly if it is idle */
-		if (atomic_read(&dest->activeconns) == 0) {
-			least = dest;
-			loh = doh;
-			goto out;
-		}
-
-		if (!least ||
-		    (loh * atomic_read(&dest->weight) >
-		     doh * atomic_read(&least->weight))) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	if (!least)
-		return NULL;
-
-  out:
-	IP_VS_DBG_BUF(6, "NQ: server %s:%u "
-		      "activeconns %d refcnt %d weight %d overhead %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
-		      atomic_read(&least->activeconns),
-		      atomic_read(&least->refcnt),
-		      atomic_read(&least->weight), loh);
-
-	return least;
-}
-
-
-static struct ip_vs_scheduler ip_vs_nq_scheduler =
-{
-	.name =			"nq",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.schedule =		ip_vs_nq_schedule,
-};
-
-
-static int __init ip_vs_nq_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
-}
-
-static void __exit ip_vs_nq_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
-}
-
-module_init(ip_vs_nq_init);
-module_exit(ip_vs_nq_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
deleted file mode 100644
index 0791f9e08fe..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * ip_vs_proto.c: transport protocol load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <asm/system.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- * IPVS protocols can only be registered/unregistered when the ipvs
- * module is loaded/unloaded, so no lock is needed in accessing the
- * ipvs protocol table.
- */
-
-#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
-#define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))
-
-static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
-
-
-/*
- *	register an ipvs protocol
- */
-static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
-{
-	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
-
-	pp->next = ip_vs_proto_table[hash];
-	ip_vs_proto_table[hash] = pp;
-
-	if (pp->init != NULL)
-		pp->init(pp);
-
-	return 0;
-}
-
-
-/*
- *	unregister an ipvs protocol
- */
-static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
-{
-	struct ip_vs_protocol **pp_p;
-	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
-
-	pp_p = &ip_vs_proto_table[hash];
-	for (; *pp_p; pp_p = &(*pp_p)->next) {
-		if (*pp_p == pp) {
-			*pp_p = pp->next;
-			if (pp->exit != NULL)
-				pp->exit(pp);
-			return 0;
-		}
-	}
-
-	return -ESRCH;
-}
-
-
-/*
- *	get ip_vs_protocol object by its proto.
- */
-struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
-{
-	struct ip_vs_protocol *pp;
-	unsigned hash = IP_VS_PROTO_HASH(proto);
-
-	for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
-		if (pp->protocol == proto)
-			return pp;
-	}
-
-	return NULL;
-}
-
-
-/*
- *	Propagate event for state change to all protocols
- */
-void ip_vs_protocol_timeout_change(int flags)
-{
-	struct ip_vs_protocol *pp;
-	int i;
-
-	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-			if (pp->timeout_change)
-				pp->timeout_change(pp, flags);
-		}
-	}
-}
-
-
-int *
-ip_vs_create_timeout_table(int *table, int size)
-{
-	return kmemdup(table, size, GFP_ATOMIC);
-}
-
-
-/*
- *	Set timeout value for state specified by name
- */
-int
-ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
-{
-	int i;
-
-	if (!table || !name || !to)
-		return -EINVAL;
-
-	for (i = 0; i < num; i++) {
-		if (strcmp(names[i], name))
-			continue;
-		table[i] = to * HZ;
-		return 0;
-	}
-	return -ENOENT;
-}
-
-
-const char * ip_vs_state_name(__u16 proto, int state)
-{
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
-
-	if (pp == NULL || pp->state_name == NULL)
-		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
-	return pp->state_name(state);
-}
-
-
-static void
-ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
-			     const struct sk_buff *skb,
-			     int offset,
-			     const char *msg)
-{
-	char buf[128];
-	struct iphdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else if (ih->frag_off & htons(IP_OFFSET))
-		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
-			pp->name, NIPQUAD(ih->saddr),
-			NIPQUAD(ih->daddr));
-	else {
-		__be16 _ports[2], *pptr
-;
-		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
-					  sizeof(_ports), _ports);
-		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
-				pp->name,
-				NIPQUAD(ih->saddr),
-				NIPQUAD(ih->daddr));
-		else
-			sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
-				pp->name,
-				NIPQUAD(ih->saddr),
-				ntohs(pptr[0]),
-				NIPQUAD(ih->daddr),
-				ntohs(pptr[1]));
-	}
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static void
-ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
-			     const struct sk_buff *skb,
-			     int offset,
-			     const char *msg)
-{
-	char buf[192];
-	struct ipv6hdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else if (ih->nexthdr == IPPROTO_FRAGMENT)
-		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
-			pp->name, NIP6(ih->saddr),
-			NIP6(ih->daddr));
-	else {
-		__be16 _ports[2], *pptr;
-
-		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
-					  sizeof(_ports), _ports);
-		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
-				pp->name,
-				NIP6(ih->saddr),
-				NIP6(ih->daddr));
-		else
-			sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
-				pp->name,
-				NIP6(ih->saddr),
-				ntohs(pptr[0]),
-				NIP6(ih->daddr),
-				ntohs(pptr[1]));
-	}
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-#endif
-
-
-void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
-			  const struct sk_buff *skb,
-			  int offset,
-			  const char *msg)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == htons(ETH_P_IPV6))
-		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
-	else
-#endif
-		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
-}
-
-
-int __init ip_vs_protocol_init(void)
-{
-	char protocols[64];
-#define REGISTER_PROTOCOL(p)			\
-	do {					\
-		register_ip_vs_protocol(p);	\
-		strcat(protocols, ", ");	\
-		strcat(protocols, (p)->name);	\
-	} while (0)
-
-	protocols[0] = '\0';
-	protocols[2] = '\0';
-#ifdef CONFIG_IP_VS_PROTO_TCP
-	REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_AH
-	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_ESP
-	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
-#endif
-	IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
-
-	return 0;
-}
-
-
-void ip_vs_protocol_cleanup(void)
-{
-	struct ip_vs_protocol *pp;
-	int i;
-
-	/* unregister all the ipvs protocols */
-	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-		while ((pp = ip_vs_proto_table[i]) != NULL)
-			unregister_ip_vs_protocol(pp);
-	}
-}
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
deleted file mode 100644
index 80ab0c8e5b4..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
- *
- * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
- *		Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		version 2 as published by the Free Software Foundation;
- *
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-/* TODO:
-
-struct isakmp_hdr {
-	__u8		icookie[8];
-	__u8		rcookie[8];
-	__u8		np;
-	__u8		version;
-	__u8		xchgtype;
-	__u8		flags;
-	__u32		msgid;
-	__u32		length;
-};
-
-*/
-
-#define PORT_ISAKMP	500
-
-
-static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		   int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
-				       &iph->saddr,
-				       htons(PORT_ISAKMP),
-				       &iph->daddr,
-				       htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
-				       &iph->daddr,
-				       htons(PORT_ISAKMP),
-				       &iph->saddr,
-				       htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		/*
-		 * We are not sure if the packet is from our
-		 * service, so our conn_schedule hook should return NF_ACCEPT
-		 */
-		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
-			      "%s%s %s->%s\n",
-			      inverse ? "ICMP+" : "",
-			      pp->name,
-			      IP_VS_DBG_ADDR(af, &iph->saddr),
-			      IP_VS_DBG_ADDR(af, &iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-		    struct ip_vs_protocol *pp,
-		    const struct ip_vs_iphdr *iph,
-		    unsigned int proto_off,
-		    int inverse)
-{
-	struct ip_vs_conn *cp;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
-					&iph->saddr,
-					htons(PORT_ISAKMP),
-					&iph->daddr,
-					htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
-					&iph->daddr,
-					htons(PORT_ISAKMP),
-					&iph->saddr,
-					htons(PORT_ISAKMP));
-	}
-
-	if (!cp) {
-		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
-			      "%s%s %s->%s\n",
-			      inverse ? "ICMP+" : "",
-			      pp->name,
-			      IP_VS_DBG_ADDR(af, &iph->saddr),
-			      IP_VS_DBG_ADDR(af, &iph->daddr));
-	}
-
-	return cp;
-}
-
-
-static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
-		     int *verdict, struct ip_vs_conn **cpp)
-{
-	/*
-	 * AH/ESP is only related traffic. Pass the packet to IP stack.
-	 */
-	*verdict = NF_ACCEPT;
-	return 0;
-}
-
-
-static void
-ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		       int offset, const char *msg)
-{
-	char buf[256];
-	struct iphdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else
-		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
-			pp->name, NIPQUAD(ih->saddr),
-			NIPQUAD(ih->daddr));
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static void
-ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		       int offset, const char *msg)
-{
-	char buf[256];
-	struct ipv6hdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
-	else
-		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
-			pp->name, NIP6(ih->saddr),
-			NIP6(ih->daddr));
-
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-#endif
-
-static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		    int offset, const char *msg)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == htons(ETH_P_IPV6))
-		ah_esp_debug_packet_v6(pp, skb, offset, msg);
-	else
-#endif
-		ah_esp_debug_packet_v4(pp, skb, offset, msg);
-}
-
-
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-#ifdef CONFIG_IP_VS_PROTO_AH
-struct ip_vs_protocol ip_vs_protocol_ah = {
-	.name =			"AH",
-	.protocol =		IPPROTO_AH,
-	.num_states =		1,
-	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
-	.conn_schedule =	ah_esp_conn_schedule,
-	.conn_in_get =		ah_esp_conn_in_get,
-	.conn_out_get =		ah_esp_conn_out_get,
-	.snat_handler =		NULL,
-	.dnat_handler =		NULL,
-	.csum_check =		NULL,
-	.state_transition =	NULL,
-	.register_app =		NULL,
-	.unregister_app =	NULL,
-	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
-	.timeout_change =	NULL,		/* ISAKMP */
-	.set_state_timeout =	NULL,
-};
-#endif
-
-#ifdef CONFIG_IP_VS_PROTO_ESP
-struct ip_vs_protocol ip_vs_protocol_esp = {
-	.name =			"ESP",
-	.protocol =		IPPROTO_ESP,
-	.num_states =		1,
-	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
-	.conn_schedule =	ah_esp_conn_schedule,
-	.conn_in_get =		ah_esp_conn_in_get,
-	.conn_out_get =		ah_esp_conn_out_get,
-	.snat_handler =		NULL,
-	.dnat_handler =		NULL,
-	.csum_check =		NULL,
-	.state_transition =	NULL,
-	.register_app =		NULL,
-	.unregister_app =	NULL,
-	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
-	.timeout_change =	NULL,		/* ISAKMP */
-};
-#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
deleted file mode 100644
index dd4566ea2bf..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ /dev/null
@@ -1,732 +0,0 @@
-/*
- * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>                  /* for tcphdr */
-#include <net/ip.h>
-#include <net/tcp.h>                    /* for csum_tcpudp_magic */
-#include <net/ip6_checksum.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-static struct ip_vs_conn *
-tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
-	}
-}
-
-static struct ip_vs_conn *
-tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	} else {
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
-	}
-}
-
-
-static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
-		  int *verdict, struct ip_vs_conn **cpp)
-{
-	struct ip_vs_service *svc;
-	struct tcphdr _tcph, *th;
-	struct ip_vs_iphdr iph;
-
-	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
-	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		*verdict = NF_DROP;
-		return 0;
-	}
-
-	if (th->syn &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-				     th->dest))) {
-		if (ip_vs_todrop()) {
-			/*
-			 * It seems that we are very loaded.
-			 * We have to drop this packet :(
-			 */
-			ip_vs_service_put(svc);
-			*verdict = NF_DROP;
-			return 0;
-		}
-
-		/*
-		 * Let the virtual server select a real server for the
-		 * incoming connection, and create a connection entry.
-		 */
-		*cpp = ip_vs_schedule(svc, skb);
-		if (!*cpp) {
-			*verdict = ip_vs_leave(svc, skb, pp);
-			return 0;
-		}
-		ip_vs_service_put(svc);
-	}
-	return 1;
-}
-
-
-static inline void
-tcp_fast_csum_update(int af, struct tcphdr *tcph,
-		     const union nf_inet_addr *oldip,
-		     const union nf_inet_addr *newip,
-		     __be16 oldport, __be16 newport)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		tcph->check =
-			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
-					 ip_vs_check_diff2(oldport, newport,
-						~csum_unfold(tcph->check))));
-	else
-#endif
-	tcph->check =
-		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
-				 ip_vs_check_diff2(oldport, newport,
-						~csum_unfold(tcph->check))));
-}
-
-
-static inline void
-tcp_partial_csum_update(int af, struct tcphdr *tcph,
-		     const union nf_inet_addr *oldip,
-		     const union nf_inet_addr *newip,
-		     __be16 oldlen, __be16 newlen)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		tcph->check =
-			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
-					 ip_vs_check_diff2(oldlen, newlen,
-						~csum_unfold(tcph->check))));
-	else
-#endif
-	tcph->check =
-		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
-				ip_vs_check_diff2(oldlen, newlen,
-						~csum_unfold(tcph->check))));
-}
-
-
-static int
-tcp_snat_handler(struct sk_buff *skb,
-		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-	struct tcphdr *tcph;
-	unsigned int tcphoff;
-	int oldlen;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (cp->af == AF_INET6)
-		tcphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		tcphoff = ip_hdrlen(skb);
-	oldlen = skb->len - tcphoff;
-
-	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
-		return 0;
-
-	if (unlikely(cp->app != NULL)) {
-		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
-			return 0;
-
-		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, skb))
-			return 0;
-	}
-
-	tcph = (void *)skb_network_header(skb) + tcphoff;
-	tcph->source = cp->vport;
-
-	/* Adjust TCP checksums */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
-					htonl(oldlen),
-					htonl(skb->len - tcphoff));
-	} else if (!cp->app) {
-		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
-				     cp->dport, cp->vport);
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		/* full checksum calculation */
-		tcph->check = 0;
-		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
-						      &cp->caddr.in6,
-						      skb->len - tcphoff,
-						      cp->protocol, skb->csum);
-		else
-#endif
-			tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
-							cp->caddr.ip,
-							skb->len - tcphoff,
-							cp->protocol,
-							skb->csum);
-
-		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
-			  pp->name, tcph->check,
-			  (char*)&(tcph->check) - (char*)tcph);
-	}
-	return 1;
-}
-
-
-static int
-tcp_dnat_handler(struct sk_buff *skb,
-		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-	struct tcphdr *tcph;
-	unsigned int tcphoff;
-	int oldlen;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (cp->af == AF_INET6)
-		tcphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		tcphoff = ip_hdrlen(skb);
-	oldlen = skb->len - tcphoff;
-
-	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
-		return 0;
-
-	if (unlikely(cp->app != NULL)) {
-		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
-			return 0;
-
-		/*
-		 *	Attempt ip_vs_app call.
-		 *	It will fix ip_vs_conn and iph ack_seq stuff
-		 */
-		if (!ip_vs_app_pkt_in(cp, skb))
-			return 0;
-	}
-
-	tcph = (void *)skb_network_header(skb) + tcphoff;
-	tcph->dest = cp->dport;
-
-	/*
-	 *	Adjust TCP checksums
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
-					htonl(oldlen),
-					htonl(skb->len - tcphoff));
-	} else if (!cp->app) {
-		/* Only port and addr are changed, do fast csum update */
-		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
-				     cp->vport, cp->dport);
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		/* full checksum calculation */
-		tcph->check = 0;
-		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			tcph->check = csum_ipv6_magic(&cp->caddr.in6,
-						      &cp->daddr.in6,
-						      skb->len - tcphoff,
-						      cp->protocol, skb->csum);
-		else
-#endif
-			tcph->check = csum_tcpudp_magic(cp->caddr.ip,
-							cp->daddr.ip,
-							skb->len - tcphoff,
-							cp->protocol,
-							skb->csum);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-	return 1;
-}
-
-
-static int
-tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
-{
-	unsigned int tcphoff;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		tcphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		tcphoff = ip_hdrlen(skb);
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_NONE:
-		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-	case CHECKSUM_COMPLETE:
-#ifdef CONFIG_IP_VS_IPV6
-		if (af == AF_INET6) {
-			if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-					    &ipv6_hdr(skb)->daddr,
-					    skb->len - tcphoff,
-					    ipv6_hdr(skb)->nexthdr,
-					    skb->csum)) {
-				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-						 "Failed checksum for");
-				return 0;
-			}
-		} else
-#endif
-			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
-					      ip_hdr(skb)->daddr,
-					      skb->len - tcphoff,
-					      ip_hdr(skb)->protocol,
-					      skb->csum)) {
-				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-						 "Failed checksum for");
-				return 0;
-			}
-		break;
-	default:
-		/* No need to checksum. */
-		break;
-	}
-
-	return 1;
-}
-
-
-#define TCP_DIR_INPUT		0
-#define TCP_DIR_OUTPUT		4
-#define TCP_DIR_INPUT_ONLY	8
-
-static const int tcp_state_off[IP_VS_DIR_LAST] = {
-	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
-	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
-	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
-};
-
-/*
- *	Timeout table[state]
- */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
-	[IP_VS_TCP_S_NONE]		=	2*HZ,
-	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
-	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
-	[IP_VS_TCP_S_SYN_RECV]		=	1*60*HZ,
-	[IP_VS_TCP_S_FIN_WAIT]		=	2*60*HZ,
-	[IP_VS_TCP_S_TIME_WAIT]		=	2*60*HZ,
-	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
-	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
-	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
-	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
-	[IP_VS_TCP_S_SYNACK]		=	120*HZ,
-	[IP_VS_TCP_S_LAST]		=	2*HZ,
-};
-
-static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
-	[IP_VS_TCP_S_NONE]		=	"NONE",
-	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
-	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
-	[IP_VS_TCP_S_SYN_RECV]		=	"SYN_RECV",
-	[IP_VS_TCP_S_FIN_WAIT]		=	"FIN_WAIT",
-	[IP_VS_TCP_S_TIME_WAIT]		=	"TIME_WAIT",
-	[IP_VS_TCP_S_CLOSE]		=	"CLOSE",
-	[IP_VS_TCP_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
-	[IP_VS_TCP_S_LAST_ACK]		=	"LAST_ACK",
-	[IP_VS_TCP_S_LISTEN]		=	"LISTEN",
-	[IP_VS_TCP_S_SYNACK]		=	"SYNACK",
-	[IP_VS_TCP_S_LAST]		=	"BUG!",
-};
-
-#define sNO IP_VS_TCP_S_NONE
-#define sES IP_VS_TCP_S_ESTABLISHED
-#define sSS IP_VS_TCP_S_SYN_SENT
-#define sSR IP_VS_TCP_S_SYN_RECV
-#define sFW IP_VS_TCP_S_FIN_WAIT
-#define sTW IP_VS_TCP_S_TIME_WAIT
-#define sCL IP_VS_TCP_S_CLOSE
-#define sCW IP_VS_TCP_S_CLOSE_WAIT
-#define sLA IP_VS_TCP_S_LAST_ACK
-#define sLI IP_VS_TCP_S_LISTEN
-#define sSA IP_VS_TCP_S_SYNACK
-
-struct tcp_states_t {
-	int next_state[IP_VS_TCP_S_LAST];
-};
-
-static const char * tcp_state_name(int state)
-{
-	if (state >= IP_VS_TCP_S_LAST)
-		return "ERR!";
-	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
-}
-
-static struct tcp_states_t tcp_states [] = {
-/*	INPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
-/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
-
-/*	OUTPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
-/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
-/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
-/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
-
-/*	INPUT-ONLY */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
-/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-};
-
-static struct tcp_states_t tcp_states_dos [] = {
-/*	INPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
-/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
-/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-
-/*	OUTPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
-/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
-/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
-/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
-
-/*	INPUT-ONLY */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
-/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
-/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-};
-
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-	int on = (flags & 1);		/* secure_tcp */
-
-	/*
-	** FIXME: change secure_tcp to independent sysctl var
-	** or make it per-service or per-app because it is valid
-	** for most if not for all of the applications. Something
-	** like "capabilities" (flags) for each object.
-	*/
-	tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-				       tcp_state_name_table, sname, to);
-}
-
-static inline int tcp_state_idx(struct tcphdr *th)
-{
-	if (th->rst)
-		return 3;
-	if (th->syn)
-		return 0;
-	if (th->fin)
-		return 1;
-	if (th->ack)
-		return 2;
-	return -1;
-}
-
-static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
-	      int direction, struct tcphdr *th)
-{
-	int state_idx;
-	int new_state = IP_VS_TCP_S_CLOSE;
-	int state_off = tcp_state_off[direction];
-
-	/*
-	 *    Update state offset to INPUT_ONLY if necessary
-	 *    or delete NO_OUTPUT flag if output packet detected
-	 */
-	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
-		if (state_off == TCP_DIR_OUTPUT)
-			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
-		else
-			state_off = TCP_DIR_INPUT_ONLY;
-	}
-
-	if ((state_idx = tcp_state_idx(th)) < 0) {
-		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
-		goto tcp_state_out;
-	}
-
-	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
-
-  tcp_state_out:
-	if (new_state != cp->state) {
-		struct ip_vs_dest *dest = cp->dest;
-
-		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
-			      "%s:%d state: %s->%s conn->refcnt:%d\n",
-			      pp->name,
-			      ((state_off == TCP_DIR_OUTPUT) ?
-			       "output " : "input "),
-			      th->syn ? 'S' : '.',
-			      th->fin ? 'F' : '.',
-			      th->ack ? 'A' : '.',
-			      th->rst ? 'R' : '.',
-			      IP_VS_DBG_ADDR(cp->af, &cp->daddr),
-			      ntohs(cp->dport),
-			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
-			      ntohs(cp->cport),
-			      tcp_state_name(cp->state),
-			      tcp_state_name(new_state),
-			      atomic_read(&cp->refcnt));
-
-		if (dest) {
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_dec(&dest->activeconns);
-				atomic_inc(&dest->inactconns);
-				cp->flags |= IP_VS_CONN_F_INACTIVE;
-			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_inc(&dest->activeconns);
-				atomic_dec(&dest->inactconns);
-				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-			}
-		}
-	}
-
-	cp->timeout = pp->timeout_table[cp->state = new_state];
-}
-
-
-/*
- *	Handle state transitions
- */
-static int
-tcp_state_transition(struct ip_vs_conn *cp, int direction,
-		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
-{
-	struct tcphdr _tcph, *th;
-
-#ifdef CONFIG_IP_VS_IPV6
-	int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
-#else
-	int ihl = ip_hdrlen(skb);
-#endif
-
-	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return 0;
-
-	spin_lock(&cp->lock);
-	set_tcp_state(pp, cp, direction, th);
-	spin_unlock(&cp->lock);
-
-	return 1;
-}
-
-
-/*
- *	Hash table for TCP application incarnations
- */
-#define	TCP_APP_TAB_BITS	4
-#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
-#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
-static inline __u16 tcp_app_hashkey(__be16 port)
-{
-	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
-		& TCP_APP_TAB_MASK;
-}
-
-
-static int tcp_register_app(struct ip_vs_app *inc)
-{
-	struct ip_vs_app *i;
-	__u16 hash;
-	__be16 port = inc->port;
-	int ret = 0;
-
-	hash = tcp_app_hashkey(port);
-
-	spin_lock_bh(&tcp_app_lock);
-	list_for_each_entry(i, &tcp_apps[hash], p_list) {
-		if (i->port == port) {
-			ret = -EEXIST;
-			goto out;
-		}
-	}
-	list_add(&inc->p_list, &tcp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_tcp.appcnt);
-
-  out:
-	spin_unlock_bh(&tcp_app_lock);
-	return ret;
-}
-
-
-static void
-tcp_unregister_app(struct ip_vs_app *inc)
-{
-	spin_lock_bh(&tcp_app_lock);
-	atomic_dec(&ip_vs_protocol_tcp.appcnt);
-	list_del(&inc->p_list);
-	spin_unlock_bh(&tcp_app_lock);
-}
-
-
-static int
-tcp_app_conn_bind(struct ip_vs_conn *cp)
-{
-	int hash;
-	struct ip_vs_app *inc;
-	int result = 0;
-
-	/* Default binding: bind app only for NAT */
-	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
-		return 0;
-
-	/* Lookup application incarnations and bind the right one */
-	hash = tcp_app_hashkey(cp->vport);
-
-	spin_lock(&tcp_app_lock);
-	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
-		if (inc->port == cp->vport) {
-			if (unlikely(!ip_vs_app_inc_get(inc)))
-				break;
-			spin_unlock(&tcp_app_lock);
-
-			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
-				      "%s:%u to app %s on port %u\n",
-				      __func__,
-				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
-				      ntohs(cp->cport),
-				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
-				      ntohs(cp->vport),
-				      inc->name, ntohs(inc->port));
-
-			cp->app = inc;
-			if (inc->init_conn)
-				result = inc->init_conn(inc, cp);
-			goto out;
-		}
-	}
-	spin_unlock(&tcp_app_lock);
-
-  out:
-	return result;
-}
-
-
-/*
- *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
- */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
-{
-	spin_lock(&cp->lock);
-	cp->state = IP_VS_TCP_S_LISTEN;
-	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
-	spin_unlock(&cp->lock);
-}
-
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
-{
-	IP_VS_INIT_HASH_TABLE(tcp_apps);
-	pp->timeout_table = tcp_timeouts;
-}
-
-
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
-{
-}
-
-
-struct ip_vs_protocol ip_vs_protocol_tcp = {
-	.name =			"TCP",
-	.protocol =		IPPROTO_TCP,
-	.num_states =		IP_VS_TCP_S_LAST,
-	.dont_defrag =		0,
-	.appcnt =		ATOMIC_INIT(0),
-	.init =			ip_vs_tcp_init,
-	.exit =			ip_vs_tcp_exit,
-	.register_app =		tcp_register_app,
-	.unregister_app =	tcp_unregister_app,
-	.conn_schedule =	tcp_conn_schedule,
-	.conn_in_get =		tcp_conn_in_get,
-	.conn_out_get =		tcp_conn_out_get,
-	.snat_handler =		tcp_snat_handler,
-	.dnat_handler =		tcp_dnat_handler,
-	.csum_check =		tcp_csum_check,
-	.state_name =		tcp_state_name,
-	.state_transition =	tcp_state_transition,
-	.app_conn_bind =	tcp_app_conn_bind,
-	.debug_packet =		ip_vs_tcpudp_debug_packet,
-	.timeout_change =	tcp_timeout_change,
-	.set_state_timeout =	tcp_set_state_timeout,
-};
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
deleted file mode 100644
index 6eb6039d634..00000000000
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/udp.h>
-
-#include <net/ip_vs.h>
-#include <net/ip.h>
-#include <net/ip6_checksum.h>
-
-static struct ip_vs_conn *
-udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->saddr, pptr[0],
-				       &iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_in_get(af, iph->protocol,
-				       &iph->daddr, pptr[1],
-				       &iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
-static struct ip_vs_conn *
-udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
-		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
-		 int inverse)
-{
-	struct ip_vs_conn *cp;
-	__be16 _ports[2], *pptr;
-
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
-		return NULL;
-
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->saddr, pptr[0],
-					&iph->daddr, pptr[1]);
-	} else {
-		cp = ip_vs_conn_out_get(af, iph->protocol,
-					&iph->daddr, pptr[1],
-					&iph->saddr, pptr[0]);
-	}
-
-	return cp;
-}
-
-
-static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
-		  int *verdict, struct ip_vs_conn **cpp)
-{
-	struct ip_vs_service *svc;
-	struct udphdr _udph, *uh;
-	struct ip_vs_iphdr iph;
-
-	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
-
-	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		*verdict = NF_DROP;
-		return 0;
-	}
-
-	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
-				&iph.daddr, uh->dest);
-	if (svc) {
-		if (ip_vs_todrop()) {
-			/*
-			 * It seems that we are very loaded.
-			 * We have to drop this packet :(
-			 */
-			ip_vs_service_put(svc);
-			*verdict = NF_DROP;
-			return 0;
-		}
-
-		/*
-		 * Let the virtual server select a real server for the
-		 * incoming connection, and create a connection entry.
-		 */
-		*cpp = ip_vs_schedule(svc, skb);
-		if (!*cpp) {
-			*verdict = ip_vs_leave(svc, skb, pp);
-			return 0;
-		}
-		ip_vs_service_put(svc);
-	}
-	return 1;
-}
-
-
-static inline void
-udp_fast_csum_update(int af, struct udphdr *uhdr,
-		     const union nf_inet_addr *oldip,
-		     const union nf_inet_addr *newip,
-		     __be16 oldport, __be16 newport)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		uhdr->check =
-			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
-					 ip_vs_check_diff2(oldport, newport,
-						~csum_unfold(uhdr->check))));
-	else
-#endif
-		uhdr->check =
-			csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
-					 ip_vs_check_diff2(oldport, newport,
-						~csum_unfold(uhdr->check))));
-	if (!uhdr->check)
-		uhdr->check = CSUM_MANGLED_0;
-}
-
-static inline void
-udp_partial_csum_update(int af, struct udphdr *uhdr,
-		     const union nf_inet_addr *oldip,
-		     const union nf_inet_addr *newip,
-		     __be16 oldlen, __be16 newlen)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		uhdr->check =
-			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
-					 ip_vs_check_diff2(oldlen, newlen,
-						~csum_unfold(uhdr->check))));
-	else
-#endif
-	uhdr->check =
-		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
-				ip_vs_check_diff2(oldlen, newlen,
-						~csum_unfold(uhdr->check))));
-}
-
-
-static int
-udp_snat_handler(struct sk_buff *skb,
-		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-	struct udphdr *udph;
-	unsigned int udphoff;
-	int oldlen;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (cp->af == AF_INET6)
-		udphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		udphoff = ip_hdrlen(skb);
-	oldlen = skb->len - udphoff;
-
-	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
-		return 0;
-
-	if (unlikely(cp->app != NULL)) {
-		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
-			return 0;
-
-		/*
-		 *	Call application helper if needed
-		 */
-		if (!ip_vs_app_pkt_out(cp, skb))
-			return 0;
-	}
-
-	udph = (void *)skb_network_header(skb) + udphoff;
-	udph->source = cp->vport;
-
-	/*
-	 *	Adjust UDP checksums
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
-					htonl(oldlen),
-					htonl(skb->len - udphoff));
-	} else if (!cp->app && (udph->check != 0)) {
-		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
-				     cp->dport, cp->vport);
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		/* full checksum calculation */
-		udph->check = 0;
-		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			udph->check = csum_ipv6_magic(&cp->vaddr.in6,
-						      &cp->caddr.in6,
-						      skb->len - udphoff,
-						      cp->protocol, skb->csum);
-		else
-#endif
-			udph->check = csum_tcpudp_magic(cp->vaddr.ip,
-							cp->caddr.ip,
-							skb->len - udphoff,
-							cp->protocol,
-							skb->csum);
-		if (udph->check == 0)
-			udph->check = CSUM_MANGLED_0;
-		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
-			  pp->name, udph->check,
-			  (char*)&(udph->check) - (char*)udph);
-	}
-	return 1;
-}
-
-
-static int
-udp_dnat_handler(struct sk_buff *skb,
-		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-	struct udphdr *udph;
-	unsigned int udphoff;
-	int oldlen;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (cp->af == AF_INET6)
-		udphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		udphoff = ip_hdrlen(skb);
-	oldlen = skb->len - udphoff;
-
-	/* csum_check requires unshared skb */
-	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
-		return 0;
-
-	if (unlikely(cp->app != NULL)) {
-		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
-			return 0;
-
-		/*
-		 *	Attempt ip_vs_app call.
-		 *	It will fix ip_vs_conn
-		 */
-		if (!ip_vs_app_pkt_in(cp, skb))
-			return 0;
-	}
-
-	udph = (void *)skb_network_header(skb) + udphoff;
-	udph->dest = cp->dport;
-
-	/*
-	 *	Adjust UDP checksums
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
-					htonl(oldlen),
-					htonl(skb->len - udphoff));
-	} else if (!cp->app && (udph->check != 0)) {
-		/* Only port and addr are changed, do fast csum update */
-		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
-				     cp->vport, cp->dport);
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		/* full checksum calculation */
-		udph->check = 0;
-		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-#ifdef CONFIG_IP_VS_IPV6
-		if (cp->af == AF_INET6)
-			udph->check = csum_ipv6_magic(&cp->caddr.in6,
-						      &cp->daddr.in6,
-						      skb->len - udphoff,
-						      cp->protocol, skb->csum);
-		else
-#endif
-			udph->check = csum_tcpudp_magic(cp->caddr.ip,
-							cp->daddr.ip,
-							skb->len - udphoff,
-							cp->protocol,
-							skb->csum);
-		if (udph->check == 0)
-			udph->check = CSUM_MANGLED_0;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-	return 1;
-}
-
-
-static int
-udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
-{
-	struct udphdr _udph, *uh;
-	unsigned int udphoff;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		udphoff = sizeof(struct ipv6hdr);
-	else
-#endif
-		udphoff = ip_hdrlen(skb);
-
-	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
-	if (uh == NULL)
-		return 0;
-
-	if (uh->check != 0) {
-		switch (skb->ip_summed) {
-		case CHECKSUM_NONE:
-			skb->csum = skb_checksum(skb, udphoff,
-						 skb->len - udphoff, 0);
-		case CHECKSUM_COMPLETE:
-#ifdef CONFIG_IP_VS_IPV6
-			if (af == AF_INET6) {
-				if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-						    &ipv6_hdr(skb)->daddr,
-						    skb->len - udphoff,
-						    ipv6_hdr(skb)->nexthdr,
-						    skb->csum)) {
-					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-							 "Failed checksum for");
-					return 0;
-				}
-			} else
-#endif
-				if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
-						      ip_hdr(skb)->daddr,
-						      skb->len - udphoff,
-						      ip_hdr(skb)->protocol,
-						      skb->csum)) {
-					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-							 "Failed checksum for");
-					return 0;
-				}
-			break;
-		default:
-			/* No need to checksum. */
-			break;
-		}
-	}
-	return 1;
-}
-
-
-/*
- *	Note: the caller guarantees that only one of register_app,
- *	unregister_app or app_conn_bind is called each time.
- */
-
-#define	UDP_APP_TAB_BITS	4
-#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
-#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
-static inline __u16 udp_app_hashkey(__be16 port)
-{
-	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
-		& UDP_APP_TAB_MASK;
-}
-
-
-static int udp_register_app(struct ip_vs_app *inc)
-{
-	struct ip_vs_app *i;
-	__u16 hash;
-	__be16 port = inc->port;
-	int ret = 0;
-
-	hash = udp_app_hashkey(port);
-
-
-	spin_lock_bh(&udp_app_lock);
-	list_for_each_entry(i, &udp_apps[hash], p_list) {
-		if (i->port == port) {
-			ret = -EEXIST;
-			goto out;
-		}
-	}
-	list_add(&inc->p_list, &udp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_udp.appcnt);
-
-  out:
-	spin_unlock_bh(&udp_app_lock);
-	return ret;
-}
-
-
-static void
-udp_unregister_app(struct ip_vs_app *inc)
-{
-	spin_lock_bh(&udp_app_lock);
-	atomic_dec(&ip_vs_protocol_udp.appcnt);
-	list_del(&inc->p_list);
-	spin_unlock_bh(&udp_app_lock);
-}
-
-
-static int udp_app_conn_bind(struct ip_vs_conn *cp)
-{
-	int hash;
-	struct ip_vs_app *inc;
-	int result = 0;
-
-	/* Default binding: bind app only for NAT */
-	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
-		return 0;
-
-	/* Lookup application incarnations and bind the right one */
-	hash = udp_app_hashkey(cp->vport);
-
-	spin_lock(&udp_app_lock);
-	list_for_each_entry(inc, &udp_apps[hash], p_list) {
-		if (inc->port == cp->vport) {
-			if (unlikely(!ip_vs_app_inc_get(inc)))
-				break;
-			spin_unlock(&udp_app_lock);
-
-			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
-				      "%s:%u to app %s on port %u\n",
-				      __func__,
-				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
-				      ntohs(cp->cport),
-				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
-				      ntohs(cp->vport),
-				      inc->name, ntohs(inc->port));
-
-			cp->app = inc;
-			if (inc->init_conn)
-				result = inc->init_conn(inc, cp);
-			goto out;
-		}
-	}
-	spin_unlock(&udp_app_lock);
-
-  out:
-	return result;
-}
-
-
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
-	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
-	[IP_VS_UDP_S_LAST]		=	2*HZ,
-};
-
-static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
-	[IP_VS_UDP_S_NORMAL]		=	"UDP",
-	[IP_VS_UDP_S_LAST]		=	"BUG!",
-};
-
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-				       udp_state_name_table, sname, to);
-}
-
-static const char * udp_state_name(int state)
-{
-	if (state >= IP_VS_UDP_S_LAST)
-		return "ERR!";
-	return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
-}
-
-static int
-udp_state_transition(struct ip_vs_conn *cp, int direction,
-		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
-{
-	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
-	return 1;
-}
-
-static void udp_init(struct ip_vs_protocol *pp)
-{
-	IP_VS_INIT_HASH_TABLE(udp_apps);
-	pp->timeout_table = udp_timeouts;
-}
-
-static void udp_exit(struct ip_vs_protocol *pp)
-{
-}
-
-
-struct ip_vs_protocol ip_vs_protocol_udp = {
-	.name =			"UDP",
-	.protocol =		IPPROTO_UDP,
-	.num_states =		IP_VS_UDP_S_LAST,
-	.dont_defrag =		0,
-	.init =			udp_init,
-	.exit =			udp_exit,
-	.conn_schedule =	udp_conn_schedule,
-	.conn_in_get =		udp_conn_in_get,
-	.conn_out_get =		udp_conn_out_get,
-	.snat_handler =		udp_snat_handler,
-	.dnat_handler =		udp_dnat_handler,
-	.csum_check =		udp_csum_check,
-	.state_transition =	udp_state_transition,
-	.state_name =		udp_state_name,
-	.register_app =		udp_register_app,
-	.unregister_app =	udp_unregister_app,
-	.app_conn_bind =	udp_app_conn_bind,
-	.debug_packet =		ip_vs_tcpudp_debug_packet,
-	.timeout_change =	NULL,
-	.set_state_timeout =	udp_set_state_timeout,
-};
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
deleted file mode 100644
index a22195f68ac..00000000000
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * IPVS:        Round-Robin Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Fixes/Changes:
- *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
- *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_rr_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <net/ip_vs.h>
-
-
-static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
-{
-	svc->sched_data = &svc->destinations;
-	return 0;
-}
-
-
-static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
-{
-	svc->sched_data = &svc->destinations;
-	return 0;
-}
-
-
-/*
- * Round-Robin Scheduling
- */
-static struct ip_vs_dest *
-ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct list_head *p, *q;
-	struct ip_vs_dest *dest;
-
-	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
-
-	write_lock(&svc->sched_lock);
-	p = (struct list_head *)svc->sched_data;
-	p = p->next;
-	q = p;
-	do {
-		/* skip list head */
-		if (q == &svc->destinations) {
-			q = q->next;
-			continue;
-		}
-
-		dest = list_entry(q, struct ip_vs_dest, n_list);
-		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-		    atomic_read(&dest->weight) > 0)
-			/* HIT */
-			goto out;
-		q = q->next;
-	} while (q != p);
-	write_unlock(&svc->sched_lock);
-	return NULL;
-
-  out:
-	svc->sched_data = q;
-	write_unlock(&svc->sched_lock);
-	IP_VS_DBG_BUF(6, "RR: server %s:%u "
-		      "activeconns %d refcnt %d weight %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
-		      atomic_read(&dest->activeconns),
-		      atomic_read(&dest->refcnt), atomic_read(&dest->weight));
-
-	return dest;
-}
-
-
-static struct ip_vs_scheduler ip_vs_rr_scheduler = {
-	.name =			"rr",			/* name */
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.init_service =		ip_vs_rr_init_svc,
-	.update_service =	ip_vs_rr_update_svc,
-	.schedule =		ip_vs_rr_schedule,
-};
-
-static int __init ip_vs_rr_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
-}
-
-static void __exit ip_vs_rr_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
-}
-
-module_init(ip_vs_rr_init);
-module_exit(ip_vs_rr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
deleted file mode 100644
index a46ad9e3501..00000000000
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <asm/string.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#include <net/ip_vs.h>
-
-/*
- *  IPVS scheduler list
- */
-static LIST_HEAD(ip_vs_schedulers);
-
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_sched_lock);
-
-
-/*
- *  Bind a service with a scheduler
- */
-int ip_vs_bind_scheduler(struct ip_vs_service *svc,
-			 struct ip_vs_scheduler *scheduler)
-{
-	int ret;
-
-	if (svc == NULL) {
-		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
-		return -EINVAL;
-	}
-	if (scheduler == NULL) {
-		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
-		return -EINVAL;
-	}
-
-	svc->scheduler = scheduler;
-
-	if (scheduler->init_service) {
-		ret = scheduler->init_service(svc);
-		if (ret) {
-			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-
-/*
- *  Unbind a service with its scheduler
- */
-int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
-{
-	struct ip_vs_scheduler *sched;
-
-	if (svc == NULL) {
-		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
-		return -EINVAL;
-	}
-
-	sched = svc->scheduler;
-	if (sched == NULL) {
-		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
-		return -EINVAL;
-	}
-
-	if (sched->done_service) {
-		if (sched->done_service(svc) != 0) {
-			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
-			return -EINVAL;
-		}
-	}
-
-	svc->scheduler = NULL;
-	return 0;
-}
-
-
-/*
- *  Get scheduler in the scheduler list by name
- */
-static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
-{
-	struct ip_vs_scheduler *sched;
-
-	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
-		  sched_name);
-
-	read_lock_bh(&__ip_vs_sched_lock);
-
-	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
-		/*
-		 * Test and get the modules atomically
-		 */
-		if (sched->module && !try_module_get(sched->module)) {
-			/*
-			 * This scheduler is just deleted
-			 */
-			continue;
-		}
-		if (strcmp(sched_name, sched->name)==0) {
-			/* HIT */
-			read_unlock_bh(&__ip_vs_sched_lock);
-			return sched;
-		}
-		if (sched->module)
-			module_put(sched->module);
-	}
-
-	read_unlock_bh(&__ip_vs_sched_lock);
-	return NULL;
-}
-
-
-/*
- *  Lookup scheduler and try to load it if it doesn't exist
- */
-struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
-{
-	struct ip_vs_scheduler *sched;
-
-	/*
-	 *  Search for the scheduler by sched_name
-	 */
-	sched = ip_vs_sched_getbyname(sched_name);
-
-	/*
-	 *  If scheduler not found, load the module and search again
-	 */
-	if (sched == NULL) {
-		request_module("ip_vs_%s", sched_name);
-		sched = ip_vs_sched_getbyname(sched_name);
-	}
-
-	return sched;
-}
-
-void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
-{
-	if (scheduler->module)
-		module_put(scheduler->module);
-}
-
-
-/*
- *  Register a scheduler in the scheduler list
- */
-int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
-{
-	struct ip_vs_scheduler *sched;
-
-	if (!scheduler) {
-		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
-		return -EINVAL;
-	}
-
-	if (!scheduler->name) {
-		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
-		return -EINVAL;
-	}
-
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
-	write_lock_bh(&__ip_vs_sched_lock);
-
-	if (!list_empty(&scheduler->n_list)) {
-		write_unlock_bh(&__ip_vs_sched_lock);
-		ip_vs_use_count_dec();
-		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-			  "already linked\n", scheduler->name);
-		return -EINVAL;
-	}
-
-	/*
-	 *  Make sure that the scheduler with this name doesn't exist
-	 *  in the scheduler list.
-	 */
-	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
-		if (strcmp(scheduler->name, sched->name) == 0) {
-			write_unlock_bh(&__ip_vs_sched_lock);
-			ip_vs_use_count_dec();
-			IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-					"already existed in the system\n",
-					scheduler->name);
-			return -EINVAL;
-		}
-	}
-	/*
-	 *	Add it into the d-linked scheduler list
-	 */
-	list_add(&scheduler->n_list, &ip_vs_schedulers);
-	write_unlock_bh(&__ip_vs_sched_lock);
-
-	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
-
-	return 0;
-}
-
-
-/*
- *  Unregister a scheduler from the scheduler list
- */
-int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
-{
-	if (!scheduler) {
-		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
-		return -EINVAL;
-	}
-
-	write_lock_bh(&__ip_vs_sched_lock);
-	if (list_empty(&scheduler->n_list)) {
-		write_unlock_bh(&__ip_vs_sched_lock);
-		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
-			  "is not in the list. failed\n", scheduler->name);
-		return -EINVAL;
-	}
-
-	/*
-	 *	Remove it from the d-linked scheduler list
-	 */
-	list_del(&scheduler->n_list);
-	write_unlock_bh(&__ip_vs_sched_lock);
-
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-
-	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
-
-	return 0;
-}
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
deleted file mode 100644
index 7d2f22f04b8..00000000000
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * IPVS:        Shortest Expected Delay scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-/*
- * The SED algorithm attempts to minimize each job's expected delay until
- * completion. The expected delay that the job will experience is
- * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
- * jobs on the ith server and Ui is the fixed service rate (weight) of
- * the ith server. The SED algorithm adopts a greedy policy that each does
- * what is in its own best interest, i.e. to join the queue which would
- * minimize its expected delay of completion.
- *
- * See the following paper for more information:
- * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
- * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
- * pages 986-994, 1988.
- *
- * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
- *
- * The difference between SED and WLC is that SED includes the incoming
- * job in the cost function (the increment of 1). SED may outperform
- * WLC, while scheduling big jobs under larger heterogeneous systems
- * (the server weight varies a lot).
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <net/ip_vs.h>
-
-
-static inline unsigned int
-ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We only use the active connection number in the cost
-	 * calculation here.
-	 */
-	return atomic_read(&dest->activeconns) + 1;
-}
-
-
-/*
- *	Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest, *least;
-	unsigned int loh, doh;
-
-	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
-
-	/*
-	 * We calculate the load of each dest server as follows:
-	 *	(server expected overhead) / dest->weight
-	 *
-	 * Remember -- no floats in kernel mode!!!
-	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
-	 *		  h1/w1 > h2/w2
-	 * if every weight is larger than zero.
-	 *
-	 * The server with weight=0 is quiesced and will not receive any
-	 * new connections.
-	 */
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-		    atomic_read(&dest->weight) > 0) {
-			least = dest;
-			loh = ip_vs_sed_dest_overhead(least);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/*
-	 *    Find the destination with the least load.
-	 */
-  nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-		doh = ip_vs_sed_dest_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	IP_VS_DBG_BUF(6, "SED: server %s:%u "
-		      "activeconns %d refcnt %d weight %d overhead %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
-		      atomic_read(&least->activeconns),
-		      atomic_read(&least->refcnt),
-		      atomic_read(&least->weight), loh);
-
-	return least;
-}
-
-
-static struct ip_vs_scheduler ip_vs_sed_scheduler =
-{
-	.name =			"sed",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.schedule =		ip_vs_sed_schedule,
-};
-
-
-static int __init ip_vs_sed_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
-}
-
-static void __exit ip_vs_sed_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
-}
-
-module_init(ip_vs_sed_init);
-module_exit(ip_vs_sed_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
deleted file mode 100644
index 1d96de27fef..00000000000
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * IPVS:        Source Hashing scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-/*
- * The sh algorithm is to select server by the hash key of source IP
- * address. The pseudo code is as follows:
- *
- *       n <- servernode[src_ip];
- *       if (n is dead) OR
- *          (n is overloaded) or (n.weight <= 0) then
- *                 return NULL;
- *
- *       return n;
- *
- * Notes that servernode is a 256-bucket hash table that maps the hash
- * index derived from packet source IP address to the current server
- * array. If the sh scheduler is used in cache cluster, it is good to
- * combine it with cache_bypass feature. When the statically assigned
- * server is dead or overloaded, the load balancer can bypass the cache
- * server and send requests to the original server directly.
- *
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- *      IPVS SH bucket
- */
-struct ip_vs_sh_bucket {
-	struct ip_vs_dest       *dest;          /* real server (cache) */
-};
-
-/*
- *     for IPVS SH entry hash table
- */
-#ifndef CONFIG_IP_VS_SH_TAB_BITS
-#define CONFIG_IP_VS_SH_TAB_BITS        8
-#endif
-#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
-#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
-#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
-
-
-/*
- *	Returns hash value for IPVS SH entry
- */
-static inline unsigned ip_vs_sh_hashkey(__be32 addr)
-{
-	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
-}
-
-
-/*
- *      Get ip_vs_dest associated with supplied parameters.
- */
-static inline struct ip_vs_dest *
-ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
-{
-	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
-}
-
-
-/*
- *      Assign all the hash buckets of the specified table with the service.
- */
-static int
-ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
-{
-	int i;
-	struct ip_vs_sh_bucket *b;
-	struct list_head *p;
-	struct ip_vs_dest *dest;
-
-	b = tbl;
-	p = &svc->destinations;
-	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-		if (list_empty(p)) {
-			b->dest = NULL;
-		} else {
-			if (p == &svc->destinations)
-				p = p->next;
-
-			dest = list_entry(p, struct ip_vs_dest, n_list);
-			atomic_inc(&dest->refcnt);
-			b->dest = dest;
-
-			p = p->next;
-		}
-		b++;
-	}
-	return 0;
-}
-
-
-/*
- *      Flush all the hash buckets of the specified table.
- */
-static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
-{
-	int i;
-	struct ip_vs_sh_bucket *b;
-
-	b = tbl;
-	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-		if (b->dest) {
-			atomic_dec(&b->dest->refcnt);
-			b->dest = NULL;
-		}
-		b++;
-	}
-}
-
-
-static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_sh_bucket *tbl;
-
-	/* allocate the SH table for this service */
-	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
-		      GFP_ATOMIC);
-	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
-		return -ENOMEM;
-	}
-	svc->sched_data = tbl;
-	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
-		  "current service\n",
-		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-
-	/* assign the hash buckets with the updated service */
-	ip_vs_sh_assign(tbl, svc);
-
-	return 0;
-}
-
-
-static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_sh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_sh_flush(tbl);
-
-	/* release the table itself */
-	kfree(svc->sched_data);
-	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
-		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-
-	return 0;
-}
-
-
-static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_sh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_sh_flush(tbl);
-
-	/* assign the hash buckets with the updated service */
-	ip_vs_sh_assign(tbl, svc);
-
-	return 0;
-}
-
-
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-	return dest->flags & IP_VS_DEST_F_OVERLOAD;
-}
-
-
-/*
- *      Source Hashing scheduling
- */
-static struct ip_vs_dest *
-ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest;
-	struct ip_vs_sh_bucket *tbl;
-	struct iphdr *iph = ip_hdr(skb);
-
-	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
-
-	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
-	dest = ip_vs_sh_get(tbl, iph->saddr);
-	if (!dest
-	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-	    || atomic_read(&dest->weight) <= 0
-	    || is_overloaded(dest)) {
-		return NULL;
-	}
-
-	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
-		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(iph->saddr),
-		  NIPQUAD(dest->addr.ip),
-		  ntohs(dest->port));
-
-	return dest;
-}
-
-
-/*
- *      IPVS SH Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_sh_scheduler =
-{
-	.name =			"sh",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	0,
-#endif
-	.init_service =		ip_vs_sh_init_svc,
-	.done_service =		ip_vs_sh_done_svc,
-	.update_service =	ip_vs_sh_update_svc,
-	.schedule =		ip_vs_sh_schedule,
-};
-
-
-static int __init ip_vs_sh_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
-}
-
-
-static void __exit ip_vs_sh_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
-}
-
-
-module_init(ip_vs_sh_init);
-module_exit(ip_vs_sh_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
deleted file mode 100644
index de5e7e118ee..00000000000
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ /dev/null
@@ -1,942 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the NetFilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * ip_vs_sync:  sync connection info from master load balancer to backups
- *              through multicast
- *
- * Changes:
- *	Alexandre Cassen	:	Added master & backup support at a time.
- *	Alexandre Cassen	:	Added SyncID support for incoming sync
- *					messages filtering.
- *	Justin Ossevoort	:	Fix endian problem on sync message size.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/inetdevice.h>
-#include <linux/net.h>
-#include <linux/completion.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/igmp.h>                 /* for ip_mc_join_group */
-#include <linux/udp.h>
-#include <linux/err.h>
-#include <linux/kthread.h>
-#include <linux/wait.h>
-#include <linux/kernel.h>
-
-#include <net/ip.h>
-#include <net/sock.h>
-
-#include <net/ip_vs.h>
-
-#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
-#define IP_VS_SYNC_PORT  8848          /* multicast port */
-
-
-/*
- *	IPVS sync connection entry
- */
-struct ip_vs_sync_conn {
-	__u8			reserved;
-
-	/* Protocol, addresses and port numbers */
-	__u8			protocol;       /* Which protocol (TCP/UDP) */
-	__be16			cport;
-	__be16                  vport;
-	__be16                  dport;
-	__be32                  caddr;          /* client address */
-	__be32                  vaddr;          /* virtual address */
-	__be32                  daddr;          /* destination address */
-
-	/* Flags and state transition */
-	__be16                  flags;          /* status flags */
-	__be16                  state;          /* state info */
-
-	/* The sequence options start here */
-};
-
-struct ip_vs_sync_conn_options {
-	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
-	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
-};
-
-struct ip_vs_sync_thread_data {
-	struct socket *sock;
-	char *buf;
-};
-
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
-#define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
-
-
-/*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
-
-       0                   1                   2                   3
-       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                                                               |
-      |                    IPVS Sync Connection (1)                   |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                            .                                  |
-      |                            .                                  |
-      |                            .                                  |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                                                               |
-      |                    IPVS Sync Connection (n)                   |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*/
-
-#define SYNC_MESG_HEADER_LEN	4
-#define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
-
-struct ip_vs_sync_mesg {
-	__u8                    nr_conns;
-	__u8                    syncid;
-	__u16                   size;
-
-	/* ip_vs_sync_conn entries start here */
-};
-
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
-
-struct ip_vs_sync_buff {
-	struct list_head        list;
-	unsigned long           firstuse;
-
-	/* pointers for the message data */
-	struct ip_vs_sync_mesg  *mesg;
-	unsigned char           *head;
-	unsigned char           *end;
-};
-
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
-	.sin_family		= AF_INET,
-	.sin_port		= __constant_htons(IP_VS_SYNC_PORT),
-	.sin_addr.s_addr	= __constant_htonl(IP_VS_SYNC_GROUP),
-};
-
-
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
-{
-	struct ip_vs_sync_buff *sb;
-
-	spin_lock_bh(&ip_vs_sync_lock);
-	if (list_empty(&ip_vs_sync_queue)) {
-		sb = NULL;
-	} else {
-		sb = list_entry(ip_vs_sync_queue.next,
-				struct ip_vs_sync_buff,
-				list);
-		list_del(&sb->list);
-	}
-	spin_unlock_bh(&ip_vs_sync_lock);
-
-	return sb;
-}
-
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
-{
-	struct ip_vs_sync_buff *sb;
-
-	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
-		return NULL;
-
-	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
-		kfree(sb);
-		return NULL;
-	}
-	sb->mesg->nr_conns = 0;
-	sb->mesg->syncid = ip_vs_master_syncid;
-	sb->mesg->size = 4;
-	sb->head = (unsigned char *)sb->mesg + 4;
-	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
-	sb->firstuse = jiffies;
-	return sb;
-}
-
-static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
-{
-	kfree(sb->mesg);
-	kfree(sb);
-}
-
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
-	spin_lock(&ip_vs_sync_lock);
-	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-		list_add_tail(&sb->list, &ip_vs_sync_queue);
-	else
-		ip_vs_sync_buff_release(sb);
-	spin_unlock(&ip_vs_sync_lock);
-}
-
-/*
- *	Get the current sync buffer if it has been created for more
- *	than the specified time or the specified time is zero.
- */
-static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
-{
-	struct ip_vs_sync_buff *sb;
-
-	spin_lock_bh(&curr_sb_lock);
-	if (curr_sb && (time == 0 ||
-			time_before(jiffies - curr_sb->firstuse, time))) {
-		sb = curr_sb;
-		curr_sb = NULL;
-	} else
-		sb = NULL;
-	spin_unlock_bh(&curr_sb_lock);
-	return sb;
-}
-
-
-/*
- *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
- */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
-{
-	struct ip_vs_sync_mesg *m;
-	struct ip_vs_sync_conn *s;
-	int len;
-
-	spin_lock(&curr_sb_lock);
-	if (!curr_sb) {
-		if (!(curr_sb=ip_vs_sync_buff_create())) {
-			spin_unlock(&curr_sb_lock);
-			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
-			return;
-		}
-	}
-
-	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
-		SIMPLE_CONN_SIZE;
-	m = curr_sb->mesg;
-	s = (struct ip_vs_sync_conn *)curr_sb->head;
-
-	/* copy members */
-	s->protocol = cp->protocol;
-	s->cport = cp->cport;
-	s->vport = cp->vport;
-	s->dport = cp->dport;
-	s->caddr = cp->caddr.ip;
-	s->vaddr = cp->vaddr.ip;
-	s->daddr = cp->daddr.ip;
-	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
-	s->state = htons(cp->state);
-	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
-		struct ip_vs_sync_conn_options *opt =
-			(struct ip_vs_sync_conn_options *)&s[1];
-		memcpy(opt, &cp->in_seq, sizeof(*opt));
-	}
-
-	m->nr_conns++;
-	m->size += len;
-	curr_sb->head += len;
-
-	/* check if there is a space for next one */
-	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-		sb_queue_tail(curr_sb);
-		curr_sb = NULL;
-	}
-	spin_unlock(&curr_sb_lock);
-
-	/* synchronize its controller if it has */
-	if (cp->control)
-		ip_vs_sync_conn(cp->control);
-}
-
-
-/*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
- */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
-{
-	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-	struct ip_vs_sync_conn *s;
-	struct ip_vs_sync_conn_options *opt;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
-	struct ip_vs_dest *dest;
-	char *p;
-	int i;
-
-	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-		IP_VS_ERR_RL("sync message header too short\n");
-		return;
-	}
-
-	/* Convert size back to host byte order */
-	m->size = ntohs(m->size);
-
-	if (buflen != m->size) {
-		IP_VS_ERR_RL("bogus sync message size\n");
-		return;
-	}
-
-	/* SyncID sanity check */
-	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-			  m->syncid);
-		return;
-	}
-
-	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
-	for (i=0; i<m->nr_conns; i++) {
-		unsigned flags, state;
-
-		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-			IP_VS_ERR_RL("bogus conn in sync message\n");
-			return;
-		}
-		s = (struct ip_vs_sync_conn *) p;
-		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
-		flags &= ~IP_VS_CONN_F_HASHED;
-		if (flags & IP_VS_CONN_F_SEQ_MASK) {
-			opt = (struct ip_vs_sync_conn_options *)&s[1];
-			p += FULL_CONN_SIZE;
-			if (p > buffer+buflen) {
-				IP_VS_ERR_RL("bogus conn options in sync message\n");
-				return;
-			}
-		} else {
-			opt = NULL;
-			p += SIMPLE_CONN_SIZE;
-		}
-
-		state = ntohs(s->state);
-		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
-			pp = ip_vs_proto_get(s->protocol);
-			if (!pp) {
-				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
-					s->protocol);
-				continue;
-			}
-			if (state >= pp->num_states) {
-				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
-					pp->name, state);
-				continue;
-			}
-		} else {
-			/* protocol in templates is not used for state/timeout */
-			pp = NULL;
-			if (state > 0) {
-				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
-					state);
-				state = 0;
-			}
-		}
-
-		if (!(flags & IP_VS_CONN_F_TEMPLATE))
-			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
-					       (union nf_inet_addr *)&s->caddr,
-					       s->cport,
-					       (union nf_inet_addr *)&s->vaddr,
-					       s->vport);
-		else
-			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
-					     (union nf_inet_addr *)&s->caddr,
-					     s->cport,
-					     (union nf_inet_addr *)&s->vaddr,
-					     s->vport);
-		if (!cp) {
-			/*
-			 * Find the appropriate destination for the connection.
-			 * If it is not found the connection will remain unbound
-			 * but still handled.
-			 */
-			dest = ip_vs_find_dest(AF_INET,
-					       (union nf_inet_addr *)&s->daddr,
-					       s->dport,
-					       (union nf_inet_addr *)&s->vaddr,
-					       s->vport,
-					       s->protocol);
-			/*  Set the approprite ativity flag */
-			if (s->protocol == IPPROTO_TCP) {
-				if (state != IP_VS_TCP_S_ESTABLISHED)
-					flags |= IP_VS_CONN_F_INACTIVE;
-				else
-					flags &= ~IP_VS_CONN_F_INACTIVE;
-			}
-			cp = ip_vs_conn_new(AF_INET, s->protocol,
-					    (union nf_inet_addr *)&s->caddr,
-					    s->cport,
-					    (union nf_inet_addr *)&s->vaddr,
-					    s->vport,
-					    (union nf_inet_addr *)&s->daddr,
-					    s->dport,
-					    flags, dest);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-			if (!cp) {
-				IP_VS_ERR("ip_vs_conn_new failed\n");
-				return;
-			}
-		} else if (!cp->dest) {
-			dest = ip_vs_try_bind_dest(cp);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-			   (cp->state != state)) {
-			/* update active/inactive flag for the connection */
-			dest = cp->dest;
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state != IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_dec(&dest->activeconns);
-				atomic_inc(&dest->inactconns);
-				cp->flags |= IP_VS_CONN_F_INACTIVE;
-			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state == IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_inc(&dest->activeconns);
-				atomic_dec(&dest->inactconns);
-				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-			}
-		}
-
-		if (opt)
-			memcpy(&cp->in_seq, opt, sizeof(*opt));
-		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-		cp->state = state;
-		cp->old_state = cp->state;
-		/*
-		 * We can not recover the right timeout for templates
-		 * in all cases, we can not find the right fwmark
-		 * virtual service. If needed, we can do it for
-		 * non-fwmark persistent services.
-		 */
-		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-			cp->timeout = pp->timeout_table[state];
-		else
-			cp->timeout = (3*60*HZ);
-		ip_vs_conn_put(cp);
-	}
-}
-
-
-/*
- *      Setup loopback of outgoing multicasts on a sending socket
- */
-static void set_mcast_loop(struct sock *sk, u_char loop)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
-	lock_sock(sk);
-	inet->mc_loop = loop ? 1 : 0;
-	release_sock(sk);
-}
-
-/*
- *      Specify TTL for outgoing multicasts on a sending socket
- */
-static void set_mcast_ttl(struct sock *sk, u_char ttl)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
-	lock_sock(sk);
-	inet->mc_ttl = ttl;
-	release_sock(sk);
-}
-
-/*
- *      Specifiy default interface for outgoing multicasts
- */
-static int set_mcast_if(struct sock *sk, char *ifname)
-{
-	struct net_device *dev;
-	struct inet_sock *inet = inet_sk(sk);
-
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-		return -ENODEV;
-
-	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
-		return -EINVAL;
-
-	lock_sock(sk);
-	inet->mc_index = dev->ifindex;
-	/*  inet->mc_addr  = 0; */
-	release_sock(sk);
-
-	return 0;
-}
-
-
-/*
- *	Set the maximum length of sync message according to the
- *	specified interface's MTU.
- */
-static int set_sync_mesg_maxlen(int sync_state)
-{
-	struct net_device *dev;
-	int num;
-
-	if (sync_state == IP_VS_STATE_MASTER) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
-			return -ENODEV;
-
-		num = (dev->mtu - sizeof(struct iphdr) -
-		       sizeof(struct udphdr) -
-		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
-			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
-		IP_VS_DBG(7, "setting the maximum length of sync sending "
-			  "message %d.\n", sync_send_mesg_maxlen);
-	} else if (sync_state == IP_VS_STATE_BACKUP) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
-			return -ENODEV;
-
-		sync_recv_mesg_maxlen = dev->mtu -
-			sizeof(struct iphdr) - sizeof(struct udphdr);
-		IP_VS_DBG(7, "setting the maximum length of sync receiving "
-			  "message %d.\n", sync_recv_mesg_maxlen);
-	}
-
-	return 0;
-}
-
-
-/*
- *      Join a multicast group.
- *      the group is specified by a class D multicast address 224.0.0.0/8
- *      in the in_addr structure passed in as a parameter.
- */
-static int
-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
-{
-	struct ip_mreqn mreq;
-	struct net_device *dev;
-	int ret;
-
-	memset(&mreq, 0, sizeof(mreq));
-	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
-
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-		return -ENODEV;
-	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
-		return -EINVAL;
-
-	mreq.imr_ifindex = dev->ifindex;
-
-	lock_sock(sk);
-	ret = ip_mc_join_group(sk, &mreq);
-	release_sock(sk);
-
-	return ret;
-}
-
-
-static int bind_mcastif_addr(struct socket *sock, char *ifname)
-{
-	struct net_device *dev;
-	__be32 addr;
-	struct sockaddr_in sin;
-
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-		return -ENODEV;
-
-	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
-	if (!addr)
-		IP_VS_ERR("You probably need to specify IP address on "
-			  "multicast interface.\n");
-
-	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
-		  ifname, NIPQUAD(addr));
-
-	/* Now bind the socket with the address of multicast interface */
-	sin.sin_family	     = AF_INET;
-	sin.sin_addr.s_addr  = addr;
-	sin.sin_port         = 0;
-
-	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
-}
-
-/*
- *      Set up sending multicast socket over UDP
- */
-static struct socket * make_send_sock(void)
-{
-	struct socket *sock;
-	int result;
-
-	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
-	if (result < 0) {
-		IP_VS_ERR("Error during creation of socket; terminating\n");
-		return ERR_PTR(result);
-	}
-
-	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
-	if (result < 0) {
-		IP_VS_ERR("Error setting outbound mcast interface\n");
-		goto error;
-	}
-
-	set_mcast_loop(sock->sk, 0);
-	set_mcast_ttl(sock->sk, 1);
-
-	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
-	if (result < 0) {
-		IP_VS_ERR("Error binding address of the mcast interface\n");
-		goto error;
-	}
-
-	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
-			sizeof(struct sockaddr), 0);
-	if (result < 0) {
-		IP_VS_ERR("Error connecting to the multicast addr\n");
-		goto error;
-	}
-
-	return sock;
-
-  error:
-	sock_release(sock);
-	return ERR_PTR(result);
-}
-
-
-/*
- *      Set up receiving multicast socket over UDP
- */
-static struct socket * make_receive_sock(void)
-{
-	struct socket *sock;
-	int result;
-
-	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
-	if (result < 0) {
-		IP_VS_ERR("Error during creation of socket; terminating\n");
-		return ERR_PTR(result);
-	}
-
-	/* it is equivalent to the REUSEADDR option in user-space */
-	sock->sk->sk_reuse = 1;
-
-	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
-			sizeof(struct sockaddr));
-	if (result < 0) {
-		IP_VS_ERR("Error binding to the multicast addr\n");
-		goto error;
-	}
-
-	/* join the multicast group */
-	result = join_mcast_group(sock->sk,
-			(struct in_addr *) &mcast_addr.sin_addr,
-			ip_vs_backup_mcast_ifn);
-	if (result < 0) {
-		IP_VS_ERR("Error joining to the multicast group\n");
-		goto error;
-	}
-
-	return sock;
-
-  error:
-	sock_release(sock);
-	return ERR_PTR(result);
-}
-
-
-static int
-ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
-{
-	struct msghdr	msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
-	struct kvec	iov;
-	int		len;
-
-	EnterFunction(7);
-	iov.iov_base     = (void *)buffer;
-	iov.iov_len      = length;
-
-	len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
-
-	LeaveFunction(7);
-	return len;
-}
-
-static void
-ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
-{
-	int msize;
-
-	msize = msg->size;
-
-	/* Put size in network byte order */
-	msg->size = htons(msg->size);
-
-	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
-		IP_VS_ERR("ip_vs_send_async error\n");
-}
-
-static int
-ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
-{
-	struct msghdr		msg = {NULL,};
-	struct kvec		iov;
-	int			len;
-
-	EnterFunction(7);
-
-	/* Receive a packet */
-	iov.iov_base     = buffer;
-	iov.iov_len      = (size_t)buflen;
-
-	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
-
-	if (len < 0)
-		return -1;
-
-	LeaveFunction(7);
-	return len;
-}
-
-
-static int sync_thread_master(void *data)
-{
-	struct ip_vs_sync_thread_data *tinfo = data;
-	struct ip_vs_sync_buff *sb;
-
-	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
-		   "syncid = %d\n",
-		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
-
-	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue())) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-			ip_vs_sync_buff_release(sb);
-		}
-
-		/* check if entries stay in curr_sb for 2 seconds */
-		sb = get_curr_sync_buff(2 * HZ);
-		if (sb) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-			ip_vs_sync_buff_release(sb);
-		}
-
-		schedule_timeout_interruptible(HZ);
-	}
-
-	/* clean up the sync_buff queue */
-	while ((sb=sb_dequeue())) {
-		ip_vs_sync_buff_release(sb);
-	}
-
-	/* clean up the current sync_buff */
-	if ((sb = get_curr_sync_buff(0))) {
-		ip_vs_sync_buff_release(sb);
-	}
-
-	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
-	kfree(tinfo);
-
-	return 0;
-}
-
-
-static int sync_thread_backup(void *data)
-{
-	struct ip_vs_sync_thread_data *tinfo = data;
-	int len;
-
-	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
-		   "syncid = %d\n",
-		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
-
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
-			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
-			 || kthread_should_stop());
-
-		/* do we have data now? */
-		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
-			len = ip_vs_receive(tinfo->sock, tinfo->buf,
-					sync_recv_mesg_maxlen);
-			if (len <= 0) {
-				IP_VS_ERR("receiving message error\n");
-				break;
-			}
-
-			/* disable bottom half, because it accesses the data
-			   shared by softirq while getting/creating conns */
-			local_bh_disable();
-			ip_vs_process_message(tinfo->buf, len);
-			local_bh_enable();
-		}
-	}
-
-	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
-	kfree(tinfo->buf);
-	kfree(tinfo);
-
-	return 0;
-}
-
-
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
-{
-	struct ip_vs_sync_thread_data *tinfo;
-	struct task_struct **realtask, *task;
-	struct socket *sock;
-	char *name, *buf = NULL;
-	int (*threadfn)(void *data);
-	int result = -ENOMEM;
-
-	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-		  sizeof(struct ip_vs_sync_conn));
-
-	if (state == IP_VS_STATE_MASTER) {
-		if (sync_master_thread)
-			return -EEXIST;
-
-		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_master_mcast_ifn));
-		ip_vs_master_syncid = syncid;
-		realtask = &sync_master_thread;
-		name = "ipvs_syncmaster";
-		threadfn = sync_thread_master;
-		sock = make_send_sock();
-	} else if (state == IP_VS_STATE_BACKUP) {
-		if (sync_backup_thread)
-			return -EEXIST;
-
-		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_backup_mcast_ifn));
-		ip_vs_backup_syncid = syncid;
-		realtask = &sync_backup_thread;
-		name = "ipvs_syncbackup";
-		threadfn = sync_thread_backup;
-		sock = make_receive_sock();
-	} else {
-		return -EINVAL;
-	}
-
-	if (IS_ERR(sock)) {
-		result = PTR_ERR(sock);
-		goto out;
-	}
-
-	set_sync_mesg_maxlen(state);
-	if (state == IP_VS_STATE_BACKUP) {
-		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
-		if (!buf)
-			goto outsocket;
-	}
-
-	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
-	if (!tinfo)
-		goto outbuf;
-
-	tinfo->sock = sock;
-	tinfo->buf = buf;
-
-	task = kthread_run(threadfn, tinfo, name);
-	if (IS_ERR(task)) {
-		result = PTR_ERR(task);
-		goto outtinfo;
-	}
-
-	/* mark as active */
-	*realtask = task;
-	ip_vs_sync_state |= state;
-
-	/* increase the module use count */
-	ip_vs_use_count_inc();
-
-	return 0;
-
-outtinfo:
-	kfree(tinfo);
-outbuf:
-	kfree(buf);
-outsocket:
-	sock_release(sock);
-out:
-	return result;
-}
-
-
-int stop_sync_thread(int state)
-{
-	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-
-	if (state == IP_VS_STATE_MASTER) {
-		if (!sync_master_thread)
-			return -ESRCH;
-
-		IP_VS_INFO("stopping master sync thread %d ...\n",
-			   task_pid_nr(sync_master_thread));
-
-		/*
-		 * The lock synchronizes with sb_queue_tail(), so that we don't
-		 * add sync buffers to the queue, when we are already in
-		 * progress of stopping the master sync daemon.
-		 */
-
-		spin_lock_bh(&ip_vs_sync_lock);
-		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock_bh(&ip_vs_sync_lock);
-		kthread_stop(sync_master_thread);
-		sync_master_thread = NULL;
-	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!sync_backup_thread)
-			return -ESRCH;
-
-		IP_VS_INFO("stopping backup sync thread %d ...\n",
-			   task_pid_nr(sync_backup_thread));
-
-		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(sync_backup_thread);
-		sync_backup_thread = NULL;
-	} else {
-		return -EINVAL;
-	}
-
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
-
-	return 0;
-}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
deleted file mode 100644
index 8c596e71259..00000000000
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * IPVS:        Weighted Least-Connection Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
- *     Wensong Zhang            :     changed to use the inactconns in scheduling
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <net/ip_vs.h>
-
-
-static inline unsigned int
-ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We think the overhead of processing active connections is 256
-	 * times higher than that of inactive connections in average. (This
-	 * 256 times might not be accurate, we will change it later) We
-	 * use the following formula to estimate the overhead now:
-	 *		  dest->activeconns*256 + dest->inactconns
-	 */
-	return (atomic_read(&dest->activeconns) << 8) +
-		atomic_read(&dest->inactconns);
-}
-
-
-/*
- *	Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest, *least;
-	unsigned int loh, doh;
-
-	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
-
-	/*
-	 * We calculate the load of each dest server as follows:
-	 *		  (dest overhead) / dest->weight
-	 *
-	 * Remember -- no floats in kernel mode!!!
-	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
-	 *		  h1/w1 > h2/w2
-	 * if every weight is larger than zero.
-	 *
-	 * The server with weight=0 is quiesced and will not receive any
-	 * new connections.
-	 */
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-		    atomic_read(&dest->weight) > 0) {
-			least = dest;
-			loh = ip_vs_wlc_dest_overhead(least);
-			goto nextstage;
-		}
-	}
-	return NULL;
-
-	/*
-	 *    Find the destination with the least load.
-	 */
-  nextstage:
-	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-			continue;
-		doh = ip_vs_wlc_dest_overhead(dest);
-		if (loh * atomic_read(&dest->weight) >
-		    doh * atomic_read(&least->weight)) {
-			least = dest;
-			loh = doh;
-		}
-	}
-
-	IP_VS_DBG_BUF(6, "WLC: server %s:%u "
-		      "activeconns %d refcnt %d weight %d overhead %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
-		      atomic_read(&least->activeconns),
-		      atomic_read(&least->refcnt),
-		      atomic_read(&least->weight), loh);
-
-	return least;
-}
-
-
-static struct ip_vs_scheduler ip_vs_wlc_scheduler =
-{
-	.name =			"wlc",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.schedule =		ip_vs_wlc_schedule,
-};
-
-
-static int __init ip_vs_wlc_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
-}
-
-static void __exit ip_vs_wlc_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
-}
-
-module_init(ip_vs_wlc_init);
-module_exit(ip_vs_wlc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
deleted file mode 100644
index 7ea92fed50b..00000000000
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * IPVS:        Weighted Round-Robin Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
- *     Julian Anastasov         :     fixed the bug of returning destination
- *                                    with weight 0 when all weights are zero
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/net.h>
-
-#include <net/ip_vs.h>
-
-/*
- * current destination pointer for weighted round-robin scheduling
- */
-struct ip_vs_wrr_mark {
-	struct list_head *cl;	/* current list head */
-	int cw;			/* current weight */
-	int mw;			/* maximum weight */
-	int di;			/* decreasing interval */
-};
-
-
-/*
- *    Get the gcd of server weights
- */
-static int gcd(int a, int b)
-{
-	int c;
-
-	while ((c = a % b)) {
-		a = b;
-		b = c;
-	}
-	return b;
-}
-
-static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
-{
-	struct ip_vs_dest *dest;
-	int weight;
-	int g = 0;
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		weight = atomic_read(&dest->weight);
-		if (weight > 0) {
-			if (g > 0)
-				g = gcd(weight, g);
-			else
-				g = weight;
-		}
-	}
-	return g ? g : 1;
-}
-
-
-/*
- *    Get the maximum weight of the service destinations.
- */
-static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
-{
-	struct ip_vs_dest *dest;
-	int weight = 0;
-
-	list_for_each_entry(dest, &svc->destinations, n_list) {
-		if (atomic_read(&dest->weight) > weight)
-			weight = atomic_read(&dest->weight);
-	}
-
-	return weight;
-}
-
-
-static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_wrr_mark *mark;
-
-	/*
-	 *    Allocate the mark variable for WRR scheduling
-	 */
-	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
-	if (mark == NULL) {
-		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
-		return -ENOMEM;
-	}
-	mark->cl = &svc->destinations;
-	mark->cw = 0;
-	mark->mw = ip_vs_wrr_max_weight(svc);
-	mark->di = ip_vs_wrr_gcd_weight(svc);
-	svc->sched_data = mark;
-
-	return 0;
-}
-
-
-static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
-{
-	/*
-	 *    Release the mark variable
-	 */
-	kfree(svc->sched_data);
-
-	return 0;
-}
-
-
-static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_wrr_mark *mark = svc->sched_data;
-
-	mark->cl = &svc->destinations;
-	mark->mw = ip_vs_wrr_max_weight(svc);
-	mark->di = ip_vs_wrr_gcd_weight(svc);
-	if (mark->cw > mark->mw)
-		mark->cw = 0;
-	return 0;
-}
-
-
-/*
- *    Weighted Round-Robin Scheduling
- */
-static struct ip_vs_dest *
-ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest;
-	struct ip_vs_wrr_mark *mark = svc->sched_data;
-	struct list_head *p;
-
-	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
-
-	/*
-	 * This loop will always terminate, because mark->cw in (0, max_weight]
-	 * and at least one server has its weight equal to max_weight.
-	 */
-	write_lock(&svc->sched_lock);
-	p = mark->cl;
-	while (1) {
-		if (mark->cl == &svc->destinations) {
-			/* it is at the head of the destination list */
-
-			if (mark->cl == mark->cl->next) {
-				/* no dest entry */
-				dest = NULL;
-				goto out;
-			}
-
-			mark->cl = svc->destinations.next;
-			mark->cw -= mark->di;
-			if (mark->cw <= 0) {
-				mark->cw = mark->mw;
-				/*
-				 * Still zero, which means no available servers.
-				 */
-				if (mark->cw == 0) {
-					mark->cl = &svc->destinations;
-					IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
-						   "no available servers\n");
-					dest = NULL;
-					goto out;
-				}
-			}
-		} else
-			mark->cl = mark->cl->next;
-
-		if (mark->cl != &svc->destinations) {
-			/* not at the head of the list */
-			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
-			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-			    atomic_read(&dest->weight) >= mark->cw) {
-				/* got it */
-				break;
-			}
-		}
-
-		if (mark->cl == p && mark->cw == mark->di) {
-			/* back to the start, and no dest is found.
-			   It is only possible when all dests are OVERLOADED */
-			dest = NULL;
-			goto out;
-		}
-	}
-
-	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
-		      "activeconns %d refcnt %d weight %d\n",
-		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
-		      atomic_read(&dest->activeconns),
-		      atomic_read(&dest->refcnt),
-		      atomic_read(&dest->weight));
-
-  out:
-	write_unlock(&svc->sched_lock);
-	return dest;
-}
-
-
-static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
-	.name =			"wrr",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
-	.init_service =		ip_vs_wrr_init_svc,
-	.done_service =		ip_vs_wrr_done_svc,
-	.update_service =	ip_vs_wrr_update_svc,
-	.schedule =		ip_vs_wrr_schedule,
-};
-
-static int __init ip_vs_wrr_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
-}
-
-static void __exit ip_vs_wrr_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
-}
-
-module_init(ip_vs_wrr_init);
-module_exit(ip_vs_wrr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
deleted file mode 100644
index 02ddc2b3ce2..00000000000
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-/*
- * ip_vs_xmit.c: various packet transmitters for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/tcp.h>                  /* for tcphdr */
-#include <net/ip.h>
-#include <net/tcp.h>                    /* for csum_tcpudp_magic */
-#include <net/udp.h>
-#include <net/icmp.h>                   /* for icmp_send */
-#include <net/route.h>                  /* for ip_route_output */
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <linux/icmpv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- *      Destination cache to speed up outgoing route lookup
- */
-static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
-{
-	struct dst_entry *old_dst;
-
-	old_dst = dest->dst_cache;
-	dest->dst_cache = dst;
-	dest->dst_rtos = rtos;
-	dst_release(old_dst);
-}
-
-static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
-{
-	struct dst_entry *dst = dest->dst_cache;
-
-	if (!dst)
-		return NULL;
-	if ((dst->obsolete
-	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
-	    dst->ops->check(dst, cookie) == NULL) {
-		dest->dst_cache = NULL;
-		dst_release(dst);
-		return NULL;
-	}
-	dst_hold(dst);
-	return dst;
-}
-
-static struct rtable *
-__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
-{
-	struct rtable *rt;			/* Route to the other host */
-	struct ip_vs_dest *dest = cp->dest;
-
-	if (dest) {
-		spin_lock(&dest->dst_lock);
-		if (!(rt = (struct rtable *)
-		      __ip_vs_dst_check(dest, rtos, 0))) {
-			struct flowi fl = {
-				.oif = 0,
-				.nl_u = {
-					.ip4_u = {
-						.daddr = dest->addr.ip,
-						.saddr = 0,
-						.tos = rtos, } },
-			};
-
-			if (ip_route_output_key(&init_net, &rt, &fl)) {
-				spin_unlock(&dest->dst_lock);
-				IP_VS_DBG_RL("ip_route_output error, "
-					     "dest: %u.%u.%u.%u\n",
-					     NIPQUAD(dest->addr.ip));
-				return NULL;
-			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
-			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
-				  NIPQUAD(dest->addr.ip),
-				  atomic_read(&rt->u.dst.__refcnt), rtos);
-		}
-		spin_unlock(&dest->dst_lock);
-	} else {
-		struct flowi fl = {
-			.oif = 0,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = cp->daddr.ip,
-					.saddr = 0,
-					.tos = rtos, } },
-		};
-
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
-			IP_VS_DBG_RL("ip_route_output error, dest: "
-				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
-			return NULL;
-		}
-	}
-
-	return rt;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static struct rt6_info *
-__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
-{
-	struct rt6_info *rt;			/* Route to the other host */
-	struct ip_vs_dest *dest = cp->dest;
-
-	if (dest) {
-		spin_lock(&dest->dst_lock);
-		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
-		if (!rt) {
-			struct flowi fl = {
-				.oif = 0,
-				.nl_u = {
-					.ip6_u = {
-						.daddr = dest->addr.in6,
-						.saddr = {
-							.s6_addr32 =
-								{ 0, 0, 0, 0 },
-						},
-					},
-				},
-			};
-
-			rt = (struct rt6_info *)ip6_route_output(&init_net,
-								 NULL, &fl);
-			if (!rt) {
-				spin_unlock(&dest->dst_lock);
-				IP_VS_DBG_RL("ip6_route_output error, "
-					     "dest: " NIP6_FMT "\n",
-					     NIP6(dest->addr.in6));
-				return NULL;
-			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
-			IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
-				  NIP6(dest->addr.in6),
-				  atomic_read(&rt->u.dst.__refcnt));
-		}
-		spin_unlock(&dest->dst_lock);
-	} else {
-		struct flowi fl = {
-			.oif = 0,
-			.nl_u = {
-				.ip6_u = {
-					.daddr = cp->daddr.in6,
-					.saddr = {
-						.s6_addr32 = { 0, 0, 0, 0 },
-					},
-				},
-			},
-		};
-
-		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
-		if (!rt) {
-			IP_VS_DBG_RL("ip6_route_output error, dest: "
-				     NIP6_FMT "\n", NIP6(cp->daddr.in6));
-			return NULL;
-		}
-	}
-
-	return rt;
-}
-#endif
-
-
-/*
- *	Release dest->dst_cache before a dest is removed
- */
-void
-ip_vs_dst_reset(struct ip_vs_dest *dest)
-{
-	struct dst_entry *old_dst;
-
-	old_dst = dest->dst_cache;
-	dest->dst_cache = NULL;
-	dst_release(old_dst);
-}
-
-#define IP_VS_XMIT(pf, skb, rt)				\
-do {							\
-	(skb)->ipvs_property = 1;			\
-	skb_forward_csum(skb);				\
-	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->u.dst.dev, dst_output);		\
-} while (0)
-
-
-/*
- *      NULL transmitter (do nothing except return NF_ACCEPT)
- */
-int
-ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-		struct ip_vs_protocol *pp)
-{
-	/* we do not touch skb and do not need pskb ptr */
-	return NF_ACCEPT;
-}
-
-
-/*
- *      Bypass transmitter
- *      Let packets bypass the destination when the destination is not
- *      available, it may be only used in transparent cache cluster.
- */
-int
-ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-		  struct ip_vs_protocol *pp)
-{
-	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = ip_hdr(skb);
-	u8     tos = iph->tos;
-	int    mtu;
-	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip4_u = {
-				.daddr = iph->daddr,
-				.saddr = 0,
-				.tos = RT_TOS(tos), } },
-	};
-
-	EnterFunction(10);
-
-	if (ip_route_output_key(&init_net, &rt, &fl)) {
-		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
-			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
-		goto tx_error_icmp;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
-		ip_rt_put(rt);
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-		ip_rt_put(rt);
-		return NF_STOLEN;
-	}
-	ip_send_check(ip_hdr(skb));
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
- tx_error_icmp:
-	dst_link_failure(skb);
- tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-int
-ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		     struct ip_vs_protocol *pp)
-{
-	struct rt6_info *rt;			/* Route to the other host */
-	struct ipv6hdr  *iph = ipv6_hdr(skb);
-	int    mtu;
-	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = iph->daddr,
-				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
-	};
-
-	EnterFunction(10);
-
-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
-	if (!rt) {
-		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
-			     "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
-		goto tx_error_icmp;
-	}
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
-		return NF_STOLEN;
-	}
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET6, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
- tx_error_icmp:
-	dst_link_failure(skb);
- tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-#endif
-
-/*
- *      NAT transmitter (only for outside-to-inside nat forwarding)
- *      Not used for related ICMP
- */
-int
-ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-	       struct ip_vs_protocol *pp)
-{
-	struct rtable *rt;		/* Route to the other host */
-	int mtu;
-	struct iphdr *iph = ip_hdr(skb);
-
-	EnterFunction(10);
-
-	/* check if it is a connection of no-client-port */
-	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
-		__be16 _pt, *p;
-		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
-		if (p == NULL)
-			goto tx_error;
-		ip_vs_conn_fill_cport(cp, *p);
-		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
-	}
-
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
-		ip_rt_put(rt);
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
-		goto tx_error;
-	}
-
-	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, sizeof(struct iphdr)))
-		goto tx_error_put;
-
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-		goto tx_error_put;
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* mangle the packet */
-	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
-		goto tx_error;
-	ip_hdr(skb)->daddr = cp->daddr.ip;
-	ip_send_check(ip_hdr(skb));
-
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
-
-	/* FIXME: when application helper enlarges the packet and the length
-	   is larger than the MTU of outgoing device, there will be still
-	   MTU problem. */
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
-  tx_error_icmp:
-	dst_link_failure(skb);
-  tx_error:
-	LeaveFunction(10);
-	kfree_skb(skb);
-	return NF_STOLEN;
-  tx_error_put:
-	ip_rt_put(rt);
-	goto tx_error;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-int
-ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		  struct ip_vs_protocol *pp)
-{
-	struct rt6_info *rt;		/* Route to the other host */
-	int mtu;
-
-	EnterFunction(10);
-
-	/* check if it is a connection of no-client-port */
-	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
-		__be16 _pt, *p;
-		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
-				       sizeof(_pt), &_pt);
-		if (p == NULL)
-			goto tx_error;
-		ip_vs_conn_fill_cport(cp, *p);
-		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
-	}
-
-	rt = __ip_vs_get_out_rt_v6(cp);
-	if (!rt)
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-				 "ip_vs_nat_xmit_v6(): frag needed for");
-		goto tx_error;
-	}
-
-	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
-		goto tx_error_put;
-
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-		goto tx_error_put;
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* mangle the packet */
-	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
-		goto tx_error;
-	ipv6_hdr(skb)->daddr = cp->daddr.in6;
-
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
-
-	/* FIXME: when application helper enlarges the packet and the length
-	   is larger than the MTU of outgoing device, there will be still
-	   MTU problem. */
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET6, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
-tx_error_icmp:
-	dst_link_failure(skb);
-tx_error:
-	LeaveFunction(10);
-	kfree_skb(skb);
-	return NF_STOLEN;
-tx_error_put:
-	dst_release(&rt->u.dst);
-	goto tx_error;
-}
-#endif
-
-
-/*
- *   IP Tunneling transmitter
- *
- *   This function encapsulates the packet in a new IP packet, its
- *   destination will be set to cp->daddr. Most code of this function
- *   is taken from ipip.c.
- *
- *   It is used in VS/TUN cluster. The load balancer selects a real
- *   server from a cluster based on a scheduling algorithm,
- *   encapsulates the request packet and forwards it to the selected
- *   server. For example, all real servers are configured with
- *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
- *   the encapsulated packet, it will decapsulate the packet, processe
- *   the request and return the response packets directly to the client
- *   without passing the load balancer. This can greatly increase the
- *   scalability of virtual server.
- *
- *   Used for ANY protocol
- */
-int
-ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-		  struct ip_vs_protocol *pp)
-{
-	struct rtable *rt;			/* Route to the other host */
-	struct net_device *tdev;		/* Device to other host */
-	struct iphdr  *old_iph = ip_hdr(skb);
-	u8     tos = old_iph->tos;
-	__be16 df = old_iph->frag_off;
-	sk_buff_data_t old_transport_header = skb->transport_header;
-	struct iphdr  *iph;			/* Our new IP header */
-	unsigned int max_headroom;		/* The extra header space needed */
-	int    mtu;
-
-	EnterFunction(10);
-
-	if (skb->protocol != htons(ETH_P_IP)) {
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
-			     "ETH_P_IP: %d, skb protocol: %d\n",
-			     htons(ETH_P_IP), skb->protocol);
-		goto tx_error;
-	}
-
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
-		goto tx_error_icmp;
-
-	tdev = rt->u.dst.dev;
-
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-	if (mtu < 68) {
-		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
-		goto tx_error;
-	}
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
-
-	df |= (old_iph->frag_off & htons(IP_DF));
-
-	if ((old_iph->frag_off & htons(IP_DF))
-	    && mtu < ntohs(old_iph->tot_len)) {
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Okay, now see if we can stuff it in the buffer as-is.
-	 */
-	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
-
-	if (skb_headroom(skb) < max_headroom
-	    || skb_cloned(skb) || skb_shared(skb)) {
-		struct sk_buff *new_skb =
-			skb_realloc_headroom(skb, max_headroom);
-		if (!new_skb) {
-			ip_rt_put(rt);
-			kfree_skb(skb);
-			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
-			return NF_STOLEN;
-		}
-		kfree_skb(skb);
-		skb = new_skb;
-		old_iph = ip_hdr(skb);
-	}
-
-	skb->transport_header = old_transport_header;
-
-	/* fix old IP header checksum */
-	ip_send_check(old_iph);
-
-	skb_push(skb, sizeof(struct iphdr));
-	skb_reset_network_header(skb);
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/*
-	 *	Push down and install the IPIP header.
-	 */
-	iph			=	ip_hdr(skb);
-	iph->version		=	4;
-	iph->ihl		=	sizeof(struct iphdr)>>2;
-	iph->frag_off		=	df;
-	iph->protocol		=	IPPROTO_IPIP;
-	iph->tos		=	tos;
-	iph->daddr		=	rt->rt_dst;
-	iph->saddr		=	rt->rt_src;
-	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->u.dst, NULL);
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	ip_local_out(skb);
-
-	LeaveFunction(10);
-
-	return NF_STOLEN;
-
-  tx_error_icmp:
-	dst_link_failure(skb);
-  tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-int
-ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		     struct ip_vs_protocol *pp)
-{
-	struct rt6_info *rt;		/* Route to the other host */
-	struct net_device *tdev;	/* Device to other host */
-	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
-	sk_buff_data_t old_transport_header = skb->transport_header;
-	struct ipv6hdr  *iph;		/* Our new IP header */
-	unsigned int max_headroom;	/* The extra header space needed */
-	int    mtu;
-
-	EnterFunction(10);
-
-	if (skb->protocol != htons(ETH_P_IPV6)) {
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
-			     "ETH_P_IPV6: %d, skb protocol: %d\n",
-			     htons(ETH_P_IPV6), skb->protocol);
-		goto tx_error;
-	}
-
-	rt = __ip_vs_get_out_rt_v6(cp);
-	if (!rt)
-		goto tx_error_icmp;
-
-	tdev = rt->u.dst.dev;
-
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
-	/* TODO IPv6: do we need this check in IPv6? */
-	if (mtu < 1280) {
-		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
-		goto tx_error;
-	}
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
-
-	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Okay, now see if we can stuff it in the buffer as-is.
-	 */
-	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
-
-	if (skb_headroom(skb) < max_headroom
-	    || skb_cloned(skb) || skb_shared(skb)) {
-		struct sk_buff *new_skb =
-			skb_realloc_headroom(skb, max_headroom);
-		if (!new_skb) {
-			dst_release(&rt->u.dst);
-			kfree_skb(skb);
-			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
-			return NF_STOLEN;
-		}
-		kfree_skb(skb);
-		skb = new_skb;
-		old_iph = ipv6_hdr(skb);
-	}
-
-	skb->transport_header = old_transport_header;
-
-	skb_push(skb, sizeof(struct ipv6hdr));
-	skb_reset_network_header(skb);
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/*
-	 *	Push down and install the IPIP header.
-	 */
-	iph			=	ipv6_hdr(skb);
-	iph->version		=	6;
-	iph->nexthdr		=	IPPROTO_IPV6;
-	iph->payload_len	=	old_iph->payload_len + sizeof(old_iph);
-	iph->priority		=	old_iph->priority;
-	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
-	iph->daddr		=	rt->rt6i_dst.addr;
-	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
-	iph->hop_limit		=	old_iph->hop_limit;
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	ip6_local_out(skb);
-
-	LeaveFunction(10);
-
-	return NF_STOLEN;
-
-tx_error_icmp:
-	dst_link_failure(skb);
-tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-#endif
-
-
-/*
- *      Direct Routing transmitter
- *      Used for ANY protocol
- */
-int
-ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-	      struct ip_vs_protocol *pp)
-{
-	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = ip_hdr(skb);
-	int    mtu;
-
-	EnterFunction(10);
-
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
-		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-		ip_rt_put(rt);
-		return NF_STOLEN;
-	}
-	ip_send_check(ip_hdr(skb));
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
-  tx_error_icmp:
-	dst_link_failure(skb);
-  tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-int
-ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		 struct ip_vs_protocol *pp)
-{
-	struct rt6_info *rt;			/* Route to the other host */
-	int    mtu;
-
-	EnterFunction(10);
-
-	rt = __ip_vs_get_out_rt_v6(cp);
-	if (!rt)
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
-		goto tx_error;
-	}
-
-	/*
-	 * Call ip_send_check because we are not sure it is called
-	 * after ip_defrag. Is copy-on-write needed?
-	 */
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
-		return NF_STOLEN;
-	}
-
-	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET6, skb, rt);
-
-	LeaveFunction(10);
-	return NF_STOLEN;
-
-tx_error_icmp:
-	dst_link_failure(skb);
-tx_error:
-	kfree_skb(skb);
-	LeaveFunction(10);
-	return NF_STOLEN;
-}
-#endif
-
-
-/*
- *	ICMP packet transmitter
- *	called by the ip_vs_in_icmp
- */
-int
-ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-		struct ip_vs_protocol *pp, int offset)
-{
-	struct rtable	*rt;	/* Route to the other host */
-	int mtu;
-	int rc;
-
-	EnterFunction(10);
-
-	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
-	   forwarded directly here, because there is no need to
-	   translate address/port back */
-	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
-		if (cp->packet_xmit)
-			rc = cp->packet_xmit(skb, cp, pp);
-		else
-			rc = NF_ACCEPT;
-		/* do not touch skb anymore */
-		atomic_inc(&cp->in_pkts);
-		goto out;
-	}
-
-	/*
-	 * mangle and send the packet here (only for VS/NAT)
-	 */
-
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
-		ip_rt_put(rt);
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
-		goto tx_error;
-	}
-
-	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, offset))
-		goto tx_error_put;
-
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-		goto tx_error_put;
-
-	/* drop the old route when skb is not shared */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	ip_vs_nat_icmp(skb, pp, cp, 0);
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET, skb, rt);
-
-	rc = NF_STOLEN;
-	goto out;
-
-  tx_error_icmp:
-	dst_link_failure(skb);
-  tx_error:
-	dev_kfree_skb(skb);
-	rc = NF_STOLEN;
-  out:
-	LeaveFunction(10);
-	return rc;
-  tx_error_put:
-	ip_rt_put(rt);
-	goto tx_error;
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-int
-ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		struct ip_vs_protocol *pp, int offset)
-{
-	struct rt6_info	*rt;	/* Route to the other host */
-	int mtu;
-	int rc;
-
-	EnterFunction(10);
-
-	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
-	   forwarded directly here, because there is no need to
-	   translate address/port back */
-	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
-		if (cp->packet_xmit)
-			rc = cp->packet_xmit(skb, cp, pp);
-		else
-			rc = NF_ACCEPT;
-		/* do not touch skb anymore */
-		atomic_inc(&cp->in_pkts);
-		goto out;
-	}
-
-	/*
-	 * mangle and send the packet here (only for VS/NAT)
-	 */
-
-	rt = __ip_vs_get_out_rt_v6(cp);
-	if (!rt)
-		goto tx_error_icmp;
-
-	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
-	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
-		goto tx_error;
-	}
-
-	/* copy-on-write the packet before mangling it */
-	if (!skb_make_writable(skb, offset))
-		goto tx_error_put;
-
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-		goto tx_error_put;
-
-	/* drop the old route when skb is not shared */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
-
-	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
-
-	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
-
-	IP_VS_XMIT(PF_INET6, skb, rt);
-
-	rc = NF_STOLEN;
-	goto out;
-
-tx_error_icmp:
-	dst_link_failure(skb);
-tx_error:
-	dev_kfree_skb(skb);
-	rc = NF_STOLEN;
-out:
-	LeaveFunction(10);
-	return rc;
-tx_error_put:
-	dst_release(&rt->u.dst);
-	goto tx_error;
-}
-#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ee898e74808..73f9378e1bf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -838,3 +838,5 @@ config NETFILTER_XT_MATCH_HASHLIMIT
 
 endmenu
 
+source "net/netfilter/ipvs/Kconfig"
+
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3bd2cc556ae..cf75055be83 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -83,3 +83,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
+
+# IPVS
+obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
new file mode 100644
index 00000000000..de6004de80b
--- /dev/null
+++ b/net/netfilter/ipvs/Kconfig
@@ -0,0 +1,239 @@
+#
+# IP Virtual Server configuration
+#
+menuconfig IP_VS
+	tristate "IP virtual server support (EXPERIMENTAL)"
+	depends on NETFILTER
+	---help---
+	  IP Virtual Server support will let you build a high-performance
+	  virtual server based on cluster of two or more real servers. This
+	  option must be enabled for at least one of the clustered computers
+	  that will take care of intercepting incoming connections to a
+	  single IP address and scheduling them to real servers.
+
+	  Three request dispatching techniques are implemented, they are
+	  virtual server via NAT, virtual server via tunneling and virtual
+	  server via direct routing. The several scheduling algorithms can
+	  be used to choose which server the connection is directed to,
+	  thus load balancing can be achieved among the servers.  For more
+	  information and its administration program, please visit the
+	  following URL: <http://www.linuxvirtualserver.org/>.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+if IP_VS
+
+config	IP_VS_IPV6
+	bool "IPv6 support for IPVS (DANGEROUS)"
+	depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+	---help---
+	  Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+	  Say N if unsure.
+
+config	IP_VS_DEBUG
+	bool "IP virtual server debugging"
+	---help---
+	  Say Y here if you want to get additional messages useful in
+	  debugging the IP virtual server code. You can change the debug
+	  level in /proc/sys/net/ipv4/vs/debug_level
+
+config	IP_VS_TAB_BITS
+	int "IPVS connection table size (the Nth power of 2)"
+	range 8 20
+	default 12
+	---help---
+	  The IPVS connection hash table uses the chaining scheme to handle
+	  hash collisions. Using a big IPVS connection hash table will greatly
+	  reduce conflicts when there are hundreds of thousands of connections
+	  in the hash table.
+
+	  Note the table size must be power of 2. The table size will be the
+	  value of 2 to the your input number power. The number to choose is
+	  from 8 to 20, the default number is 12, which means the table size
+	  is 4096. Don't input the number too small, otherwise you will lose
+	  performance on it. You can adapt the table size yourself, according
+	  to your virtual server application. It is good to set the table size
+	  not far less than the number of connections per second multiplying
+	  average lasting time of connection in the table.  For example, your
+	  virtual server gets 200 connections per second, the connection lasts
+	  for 200 seconds in average in the connection table, the table size
+	  should be not far less than 200x200, it is good to set the table
+	  size 32768 (2**15).
+
+	  Another note that each connection occupies 128 bytes effectively and
+	  each hash entry uses 8 bytes, so you can estimate how much memory is
+	  needed for your box.
+
+comment "IPVS transport protocol load balancing support"
+
+config	IP_VS_PROTO_TCP
+	bool "TCP load balancing support"
+	---help---
+	  This option enables support for load balancing TCP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_UDP
+	bool "UDP load balancing support"
+	---help---
+	  This option enables support for load balancing UDP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH_ESP
+	bool
+	depends on UNDEFINED
+
+config	IP_VS_PROTO_ESP
+	bool "ESP load balancing support"
+	select IP_VS_PROTO_AH_ESP
+	---help---
+	  This option enables support for load balancing ESP (Encapsulation
+	  Security Payload) transport protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH
+	bool "AH load balancing support"
+	select IP_VS_PROTO_AH_ESP
+	---help---
+	  This option enables support for load balancing AH (Authentication
+	  Header) transport protocol. Say Y if unsure.
+
+comment "IPVS scheduler"
+
+config	IP_VS_RR
+	tristate "round-robin scheduling"
+	---help---
+	  The robin-robin scheduling algorithm simply directs network
+	  connections to different real servers in a round-robin manner.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+ 
+config	IP_VS_WRR
+        tristate "weighted round-robin scheduling" 
+	---help---
+	  The weighted robin-robin scheduling algorithm directs network
+	  connections to different real servers based on server weights
+	  in a round-robin manner. Servers with higher weights receive
+	  new connections first than those with less weights, and servers
+	  with higher weights get more connections than those with less
+	  weights and servers with equal weights get equal connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LC
+        tristate "least-connection scheduling"
+	---help---
+	  The least-connection scheduling algorithm directs network
+	  connections to the server with the least number of active 
+	  connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_WLC
+        tristate "weighted least-connection scheduling"
+	---help---
+	  The weighted least-connection scheduling algorithm directs network
+	  connections to the server with the least active connections
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LBLC
+	tristate "locality-based least-connection scheduling"
+	---help---
+	  The locality-based least-connection scheduling algorithm is for
+	  destination IP load balancing. It is usually used in cache cluster.
+	  This algorithm usually directs packet destined for an IP address to
+	  its server if the server is alive and under load. If the server is
+	  overloaded (its active connection numbers is larger than its weight)
+	  and there is a server in its half load, then allocate the weighted
+	  least-connection server to this IP address.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_LBLCR
+	tristate "locality-based least-connection with replication scheduling"
+	---help---
+	  The locality-based least-connection with replication scheduling
+	  algorithm is also for destination IP load balancing. It is 
+	  usually used in cache cluster. It differs from the LBLC scheduling
+	  as follows: the load balancer maintains mappings from a target
+	  to a set of server nodes that can serve the target. Requests for
+	  a target are assigned to the least-connection node in the target's
+	  server set. If all the node in the server set are over loaded,
+	  it picks up a least-connection node in the cluster and adds it
+	  in the sever set for the target. If the server set has not been
+	  modified for the specified time, the most loaded node is removed
+	  from the server set, in order to avoid high degree of replication.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_DH
+	tristate "destination hashing scheduling"
+	---help---
+	  The destination hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their destination IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SH
+	tristate "source hashing scheduling"
+	---help---
+	  The source hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their source IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SED
+	tristate "shortest expected delay scheduling"
+	---help---
+	  The shortest expected delay scheduling algorithm assigns network
+	  connections to the server with the shortest expected delay. The 
+	  expected delay that the job will experience is (Ci + 1) / Ui if 
+	  sent to the ith server, in which Ci is the number of connections
+	  on the ith server and Ui is the fixed service rate (weight)
+	  of the ith server.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_NQ
+	tristate "never queue scheduling"
+	---help---
+	  The never queue scheduling algorithm adopts a two-speed model.
+	  When there is an idle server available, the job will be sent to
+	  the idle server, instead of waiting for a fast one. When there
+	  is no idle server available, the job will be sent to the server
+	  that minimize its expected delay (The Shortest Expected Delay
+	  scheduling algorithm).
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+comment 'IPVS application helper'
+
+config	IP_VS_FTP
+  	tristate "FTP protocol helper"
+        depends on IP_VS_PROTO_TCP
+	---help---
+	  FTP is a protocol that transfers IP address and/or port number in
+	  the payload. In the virtual server via Network Address Translation,
+	  the IP address and port number of real servers cannot be sent to
+	  clients in ftp connections directly, so FTP protocol helper is
+	  required for tracking the connection and mangling it back to that of
+	  virtual service.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
new file mode 100644
index 00000000000..73a46fe1fe4
--- /dev/null
+++ b/net/netfilter/ipvs/Makefile
@@ -0,0 +1,33 @@
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+
+# IPVS transport protocol load balancing support
+ip_vs_proto-objs-y :=
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+
+ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
+		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
+		ip_vs_est.o ip_vs_proto.o 				   \
+		$(ip_vs_proto-objs-y)
+
+
+# IPVS core
+obj-$(CONFIG_IP_VS) += ip_vs.o
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
new file mode 100644
index 00000000000..201b8ea3020
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -0,0 +1,622 @@
+/*
+ * ip_vs_app.c: Application module support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses).
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/net_namespace.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+EXPORT_SYMBOL(register_ip_vs_app_inc);
+
+/* ipvs application list head */
+static LIST_HEAD(ip_vs_app_list);
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
+
+/*
+ *	Get an ip_vs_app object
+ */
+static inline int ip_vs_app_get(struct ip_vs_app *app)
+{
+	return try_module_get(app->module);
+}
+
+
+static inline void ip_vs_app_put(struct ip_vs_app *app)
+{
+	module_put(app->module);
+}
+
+
+/*
+ *	Allocate/initialize app incarnation and register it in proto apps.
+ */
+static int
+ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	struct ip_vs_protocol *pp;
+	struct ip_vs_app *inc;
+	int ret;
+
+	if (!(pp = ip_vs_proto_get(proto)))
+		return -EPROTONOSUPPORT;
+
+	if (!pp->unregister_app)
+		return -EOPNOTSUPP;
+
+	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
+	if (!inc)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&inc->p_list);
+	INIT_LIST_HEAD(&inc->incs_list);
+	inc->app = app;
+	inc->port = htons(port);
+	atomic_set(&inc->usecnt, 0);
+
+	if (app->timeouts) {
+		inc->timeout_table =
+			ip_vs_create_timeout_table(app->timeouts,
+						   app->timeouts_size);
+		if (!inc->timeout_table) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = pp->register_app(inc);
+	if (ret)
+		goto out;
+
+	list_add(&inc->a_list, &app->incs_list);
+	IP_VS_DBG(9, "%s application %s:%u registered\n",
+		  pp->name, inc->name, inc->port);
+
+	return 0;
+
+  out:
+	kfree(inc->timeout_table);
+	kfree(inc);
+	return ret;
+}
+
+
+/*
+ *	Release app incarnation
+ */
+static void
+ip_vs_app_inc_release(struct ip_vs_app *inc)
+{
+	struct ip_vs_protocol *pp;
+
+	if (!(pp = ip_vs_proto_get(inc->protocol)))
+		return;
+
+	if (pp->unregister_app)
+		pp->unregister_app(inc);
+
+	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
+		  pp->name, inc->name, inc->port);
+
+	list_del(&inc->a_list);
+
+	kfree(inc->timeout_table);
+	kfree(inc);
+}
+
+
+/*
+ *	Get reference to app inc (only called from softirq)
+ *
+ */
+int ip_vs_app_inc_get(struct ip_vs_app *inc)
+{
+	int result;
+
+	atomic_inc(&inc->usecnt);
+	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
+		atomic_dec(&inc->usecnt);
+	return result;
+}
+
+
+/*
+ *	Put the app inc (only called from timer or net softirq)
+ */
+void ip_vs_app_inc_put(struct ip_vs_app *inc)
+{
+	ip_vs_app_put(inc->app);
+	atomic_dec(&inc->usecnt);
+}
+
+
+/*
+ *	Register an application incarnation in protocol applications
+ */
+int
+register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	int result;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	result = ip_vs_app_inc_new(app, proto, port);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return result;
+}
+
+
+/*
+ *	ip_vs_app registration routine
+ */
+int register_ip_vs_app(struct ip_vs_app *app)
+{
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	list_add(&app->a_list, &ip_vs_app_list);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	return 0;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine
+ *	We are sure there are no app incarnations attached to services
+ */
+void unregister_ip_vs_app(struct ip_vs_app *app)
+{
+	struct ip_vs_app *inc, *nxt;
+
+	mutex_lock(&__ip_vs_app_mutex);
+
+	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
+		ip_vs_app_inc_release(inc);
+	}
+
+	list_del(&app->a_list);
+
+	mutex_unlock(&__ip_vs_app_mutex);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
+ */
+int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+{
+	return pp->app_conn_bind(cp);
+}
+
+
+/*
+ *	Unbind cp from application incarnation (called by cp destructor)
+ */
+void ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *inc = cp->app;
+
+	if (!inc)
+		return;
+
+	if (inc->unbind_conn)
+		inc->unbind_conn(inc, cp);
+	if (inc->done_conn)
+		inc->done_conn(inc, cp);
+	ip_vs_app_inc_put(inc);
+	cp->app = NULL;
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+				  vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+				  "(%d) to seq\n", vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+				  "(%d) from ack_seq\n", vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock(&cp->lock);
+	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock(&cp->lock);
+}
+
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+				  struct ip_vs_app *app)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_seq(&cp->out_seq, th);
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_ack_seq(&cp->in_seq, th);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	if (!app->pkt_out(app, cp, skb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL
+ *	returns false if it can't handle packet (oom)
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_out(cp, skb, app);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	return app->pkt_out(app, cp, skb, NULL);
+}
+
+
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+				 struct ip_vs_app *app)
+{
+	int diff;
+	const unsigned int tcp_offset = ip_hdrlen(skb);
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_seq(&cp->in_seq, th);
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_ack_seq(&cp->out_seq, th);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	if (!app->pkt_in(app, cp, skb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL.
+ *	returns false if can't handle packet (oom).
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_in(cp, skb, app);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	return app->pkt_in(app, cp, skb, NULL);
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+
+static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+{
+	struct ip_vs_app *app, *inc;
+
+	list_for_each_entry(app, &ip_vs_app_list, a_list) {
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			if (pos-- == 0)
+				return inc;
+		}
+	}
+	return NULL;
+
+}
+
+static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	mutex_lock(&__ip_vs_app_mutex);
+
+	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_app *inc, *app;
+	struct list_head *e;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_app_idx(0);
+
+	inc = v;
+	app = inc->app;
+
+	if ((e = inc->a_list.next) != &app->incs_list)
+		return list_entry(e, struct ip_vs_app, a_list);
+
+	/* go on to next application */
+	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+		app = list_entry(e, struct ip_vs_app, a_list);
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			return inc;
+		}
+	}
+	return NULL;
+}
+
+static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
+{
+	mutex_unlock(&__ip_vs_app_mutex);
+}
+
+static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "prot port    usecnt name\n");
+	else {
+		const struct ip_vs_app *inc = v;
+
+		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
+			   ip_vs_proto_name(inc->protocol),
+			   ntohs(inc->port),
+			   atomic_read(&inc->usecnt),
+			   inc->name);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_app_seq_ops = {
+	.start = ip_vs_app_seq_start,
+	.next  = ip_vs_app_seq_next,
+	.stop  = ip_vs_app_seq_stop,
+	.show  = ip_vs_app_seq_show,
+};
+
+static int ip_vs_app_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_app_seq_ops);
+}
+
+static const struct file_operations ip_vs_app_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = ip_vs_app_open,
+	.read	 = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+#endif
+
+
+/*
+ *	Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
+		      char *o_buf, int o_len, char *n_buf, int n_len)
+{
+	int diff;
+	int o_offset;
+	int o_left;
+
+	EnterFunction(9);
+
+	diff = n_len - o_len;
+	o_offset = o_buf - (char *)skb->data;
+	/* The length of left data after o_buf+o_len in the skb data */
+	o_left = skb->len - (o_offset + o_len);
+
+	if (diff <= 0) {
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+		skb_trim(skb, skb->len + diff);
+	} else if (diff <= skb_tailroom(skb)) {
+		skb_put(skb, diff);
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+	} else {
+		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+			return -ENOMEM;
+		skb_put(skb, diff);
+		memmove(skb->data + o_offset + n_len,
+			skb->data + o_offset + o_len, o_left);
+		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
+	}
+
+	/* must update the iph total length here */
+	ip_hdr(skb)->tot_len = htons(skb->len);
+
+	LeaveFunction(9);
+	return 0;
+}
+
+
+int __init ip_vs_app_init(void)
+{
+	/* we will replace it with proc_net_ipvs_create() soon */
+	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+	return 0;
+}
+
+
+void ip_vs_app_cleanup(void)
+{
+	proc_net_remove(&init_net, "ip_vs_app");
+}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
new file mode 100644
index 00000000000..9a24332fbed
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -0,0 +1,1110 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/seq_file.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#include <net/net_namespace.h>
+#include <net/ip_vs.h>
+
+
+/*
+ *  Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct list_head *ip_vs_conn_tab;
+
+/*  SLAB cache for IPVS connections */
+static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
+
+/*  counter for current IPVS connections */
+static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
+
+/*  counter for no client port connections */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+/* random value for IPVS connection hash */
+static unsigned int ip_vs_conn_rnd;
+
+/*
+ *  Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
+
+struct ip_vs_aligned_lock
+{
+	rwlock_t	l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+static struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_read_lock(unsigned key)
+{
+	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock(unsigned key)
+{
+	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock(unsigned key)
+{
+	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock(unsigned key)
+{
+	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_lock_bh(unsigned key)
+{
+	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock_bh(unsigned key)
+{
+	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock_bh(unsigned key)
+{
+	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned key)
+{
+	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+
+/*
+ *	Returns hash value for IPVS connection entry
+ */
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+				       const union nf_inet_addr *addr,
+				       __be16 port)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd)
+			& IP_VS_CONN_TAB_MASK;
+#endif
+	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd)
+		& IP_VS_CONN_TAB_MASK;
+}
+
+
+/*
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* Hash by protocol, client address and port */
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+		cp->flags |= IP_VS_CONN_F_HASHED;
+		atomic_inc(&cp->refcnt);
+		ret = 1;
+	} else {
+		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		ret = 0;
+	}
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *	UNhashes ip_vs_conn from ip_vs_conn_tab.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* unhash it and decrease its reference counter */
+	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		list_del(&cp->c_list);
+		cp->flags &= ~IP_VS_CONN_F_HASHED;
+		atomic_dec(&cp->refcnt);
+		ret = 1;
+	} else
+		ret = 0;
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from OUTside-to-INside.
+ *	s_addr, s_port: pkt source address (foreign host)
+ *	d_addr, d_port: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *__ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
+		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ct_read_unlock(hash);
+			return cp;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	struct ip_vs_conn *cp;
+
+	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
+		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+					 d_port);
+
+	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    s_port == cp->cport && d_port == cp->vport &&
+		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			goto out;
+		}
+	}
+	cp = NULL;
+
+  out:
+	ct_read_unlock(hash);
+
+	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      cp ? "hit" : "not hit");
+
+	return cp;
+}
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from inside-to-OUTside.
+ *	s_addr, s_port: pkt source address (inside host)
+ *	d_addr, d_port: pkt dest address (foreign host)
+ */
+struct ip_vs_conn *ip_vs_conn_out_get
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp, *ret=NULL;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (cp->af == af &&
+		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+		    d_port == cp->cport && s_port == cp->dport &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ret = cp;
+			break;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+		      ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ret ? "hit" : "not hit");
+
+	return ret;
+}
+
+
+/*
+ *      Put back the conn and restart its timer with its timeout
+ */
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	/* reset it expire in its timeout */
+	mod_timer(&cp->timer, jiffies+cp->timeout);
+
+	__ip_vs_conn_put(cp);
+}
+
+
+/*
+ *	Fill a no_client_port connection with a client port number
+ */
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+{
+	if (ip_vs_conn_unhash(cp)) {
+		spin_lock(&cp->lock);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+			cp->cport = cport;
+		}
+		spin_unlock(&cp->lock);
+
+		/* hash on new dport */
+		ip_vs_conn_hash(cp);
+	}
+}
+
+
+/*
+ *	Bind a connection entry with the corresponding packet_xmit.
+ *	Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit;
+		break;
+	}
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit_v6;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit_v6;
+		break;
+	}
+}
+#endif
+
+
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->activeconns)
+		+ atomic_read(&dest->inactconns);
+}
+
+/*
+ *	Bind a connection entry with a virtual service destination
+ *	Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+	/* if dest is NULL, then return directly */
+	if (!dest)
+		return;
+
+	/* Increase the refcnt counter of the dest */
+	atomic_inc(&dest->refcnt);
+
+	/* Bind with the destination and its corresponding transmitter */
+	if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+	    (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+		/* if the connection is not template and is created
+		 * by sync, preserve the activity flag.
+		 */
+		cp->flags |= atomic_read(&dest->conn_flags) &
+			     (~IP_VS_CONN_F_INACTIVE);
+	else
+		cp->flags |= atomic_read(&dest->conn_flags);
+	cp->dest = dest;
+
+	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so increase the inactive
+		   connection counter because it is in TCP SYNRECV
+		   state (inactive) or other protocol inacive state */
+		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+			atomic_inc(&dest->activeconns);
+		else
+			atomic_inc(&dest->inactconns);
+	} else {
+		/* It is a persistent connection/template, so increase
+		   the peristent connection counter */
+		atomic_inc(&dest->persistconns);
+	}
+
+	if (dest->u_threshold != 0 &&
+	    ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+		dest->flags |= IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest;
+
+	if ((cp) && (!cp->dest)) {
+		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+				       &cp->vaddr, cp->vport,
+				       cp->protocol);
+		ip_vs_bind_dest(cp, dest);
+		return dest;
+	} else
+		return NULL;
+}
+
+
+/*
+ *	Unbind a connection entry with its VS destination
+ *	Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (!dest)
+		return;
+
+	IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+		      "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+		      "dest->refcnt:%d\n",
+		      ip_vs_proto_name(cp->protocol),
+		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+		      ip_vs_fwd_tag(cp), cp->state,
+		      cp->flags, atomic_read(&cp->refcnt),
+		      atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so decrease the inactconns
+		   or activeconns counter */
+		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+			atomic_dec(&dest->inactconns);
+		} else {
+			atomic_dec(&dest->activeconns);
+		}
+	} else {
+		/* It is a persistent connection/template, so decrease
+		   the peristent connection counter */
+		atomic_dec(&dest->persistconns);
+	}
+
+	if (dest->l_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else if (dest->u_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	}
+
+	/*
+	 * Simply decrease the refcnt of the dest, because the
+	 * dest will be either in service's destination list
+	 * or in the trash.
+	 */
+	atomic_dec(&dest->refcnt);
+}
+
+
+/*
+ *	Checking if the destination of a connection template is available.
+ *	If available, return 1, otherwise invalidate this connection
+ *	template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct)
+{
+	struct ip_vs_dest *dest = ct->dest;
+
+	/*
+	 * Checking the dest server status.
+	 */
+	if ((dest == NULL) ||
+	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+	    (sysctl_ip_vs_expire_quiescent_template &&
+	     (atomic_read(&dest->weight) == 0))) {
+		IP_VS_DBG_BUF(9, "check_template: dest not available for "
+			      "protocol %s s:%s:%d v:%s:%d "
+			      "-> d:%s:%d\n",
+			      ip_vs_proto_name(ct->protocol),
+			      IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+			      ntohs(ct->cport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+			      ntohs(ct->vport),
+			      IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+			      ntohs(ct->dport));
+
+		/*
+		 * Invalidate the connection template
+		 */
+		if (ct->vport != htons(0xffff)) {
+			if (ip_vs_conn_unhash(ct)) {
+				ct->dport = htons(0xffff);
+				ct->vport = htons(0xffff);
+				ct->cport = 0;
+				ip_vs_conn_hash(ct);
+			}
+		}
+
+		/*
+		 * Simply decrease the refcnt of the template,
+		 * don't restart its timer.
+		 */
+		atomic_dec(&ct->refcnt);
+		return 0;
+	}
+	return 1;
+}
+
+static void ip_vs_conn_expire(unsigned long data)
+{
+	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+
+	cp->timeout = 60*HZ;
+
+	/*
+	 *	hey, I'm using it
+	 */
+	atomic_inc(&cp->refcnt);
+
+	/*
+	 *	do I control anybody?
+	 */
+	if (atomic_read(&cp->n_control))
+		goto expire_later;
+
+	/*
+	 *	unhash it if it is hashed in the conn table
+	 */
+	if (!ip_vs_conn_unhash(cp))
+		goto expire_later;
+
+	/*
+	 *	refcnt==1 implies I'm the only one referrer
+	 */
+	if (likely(atomic_read(&cp->refcnt) == 1)) {
+		/* delete the timer if it is activated by other users */
+		if (timer_pending(&cp->timer))
+			del_timer(&cp->timer);
+
+		/* does anybody control me? */
+		if (cp->control)
+			ip_vs_control_del(cp);
+
+		if (unlikely(cp->app != NULL))
+			ip_vs_unbind_app(cp);
+		ip_vs_unbind_dest(cp);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		atomic_dec(&ip_vs_conn_count);
+
+		kmem_cache_free(ip_vs_conn_cachep, cp);
+		return;
+	}
+
+	/* hash it back to the table */
+	ip_vs_conn_hash(cp);
+
+  expire_later:
+	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
+		  atomic_read(&cp->refcnt)-1,
+		  atomic_read(&cp->n_control));
+
+	ip_vs_conn_put(cp);
+}
+
+
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer))
+		mod_timer(&cp->timer, jiffies);
+}
+
+
+/*
+ *	Create a new connection entry and hash it into the ip_vs_conn_tab
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+	       const union nf_inet_addr *vaddr, __be16 vport,
+	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	if (cp == NULL) {
+		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&cp->c_list);
+	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	cp->af		   = af;
+	cp->protocol	   = proto;
+	ip_vs_addr_copy(af, &cp->caddr, caddr);
+	cp->cport	   = cport;
+	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
+	cp->vport	   = vport;
+	ip_vs_addr_copy(af, &cp->daddr, daddr);
+	cp->dport          = dport;
+	cp->flags	   = flags;
+	spin_lock_init(&cp->lock);
+
+	/*
+	 * Set the entry is referenced by the current thread before hashing
+	 * it in the table, so that other thread run ip_vs_random_dropentry
+	 * but cannot drop this entry.
+	 */
+	atomic_set(&cp->refcnt, 1);
+
+	atomic_set(&cp->n_control, 0);
+	atomic_set(&cp->in_pkts, 0);
+
+	atomic_inc(&ip_vs_conn_count);
+	if (flags & IP_VS_CONN_F_NO_CPORT)
+		atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+	/* Bind the connection with a destination server */
+	ip_vs_bind_dest(cp, dest);
+
+	/* Set its state and timeout */
+	cp->state = 0;
+	cp->timeout = 3*HZ;
+
+	/* Bind its packet transmitter */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_bind_xmit_v6(cp);
+	else
+#endif
+		ip_vs_bind_xmit(cp);
+
+	if (unlikely(pp && atomic_read(&pp->appcnt)))
+		ip_vs_bind_app(cp, pp);
+
+	/* Hash it in the ip_vs_conn_tab finally */
+	ip_vs_conn_hash(cp);
+
+	return cp;
+}
+
+
+/*
+ *	/proc/net/ip_vs_conn entries
+ */
+#ifdef CONFIG_PROC_FS
+
+static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			if (pos-- == 0) {
+				seq->private = &ip_vs_conn_tab[idx];
+				return cp;
+			}
+		}
+		ct_read_unlock_bh(idx);
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	seq->private = NULL;
+	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
+}
+
+static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_conn *cp = v;
+	struct list_head *e, *l = seq->private;
+	int idx;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_conn_array(seq, 0);
+
+	/* more on same hash chain? */
+	if ((e = cp->c_list.next) != l)
+		return list_entry(e, struct ip_vs_conn, c_list);
+
+	idx = l - ip_vs_conn_tab;
+	ct_read_unlock_bh(idx);
+
+	while (++idx < IP_VS_CONN_TAB_SIZE) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			seq->private = &ip_vs_conn_tab[idx];
+			return cp;
+		}
+		ct_read_unlock_bh(idx);
+	}
+	seq->private = NULL;
+	return NULL;
+}
+
+static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+{
+	struct list_head *l = seq->private;
+
+	if (l)
+		ct_read_unlock_bh(l - ip_vs_conn_tab);
+}
+
+static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X"
+				" %08X %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_seq_show,
+};
+
+static int ip_vs_conn_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+	if (flags & IP_VS_CONN_F_SYNC)
+		return "SYNC";
+	else
+		return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			seq_printf(seq,
+				"%-3s " NIP6_FMT " %04X " NIP6_FMT
+				" %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				NIP6(cp->caddr.in6), ntohs(cp->cport),
+				NIP6(cp->vaddr.in6), ntohs(cp->vport),
+				NIP6(cp->daddr.in6), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+		else
+#endif
+			seq_printf(seq,
+				"%-3s %08X %04X %08X %04X "
+				"%08X %04X %-11s %-6s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr.ip), ntohs(cp->cport),
+				ntohl(cp->vaddr.ip), ntohs(cp->vport),
+				ntohl(cp->daddr.ip), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_origin_name(cp->flags),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_sync_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+
+/*
+ *      Randomly drop connection entries before running out of memory
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+	/*
+	 * The drop rate array needs tuning for real environments.
+	 * Called from timer bh only => no locking
+	 */
+	static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static char todrop_counter[9] = {0};
+	int i;
+
+	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+	   This will leave enough time for normal connection to get
+	   through. */
+	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
+		return 0;
+
+	/* Don't drop the entry if its number of incoming packets is not
+	   located in [0, 8] */
+	i = atomic_read(&cp->in_pkts);
+	if (i > 8 || i < 0) return 0;
+
+	if (!todrop_rate[i]) return 0;
+	if (--todrop_counter[i] > 0) return 0;
+
+	todrop_counter[i] = todrop_rate[i];
+	return 1;
+}
+
+/* Called from keventd and must protect itself from softirqs */
+void ip_vs_random_dropentry(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+	/*
+	 * Randomly scan 1/32 of the whole table every second
+	 */
+	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
+		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(hash);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+				/* connection template */
+				continue;
+
+			if (cp->protocol == IPPROTO_TCP) {
+				switch(cp->state) {
+				case IP_VS_TCP_S_SYN_RECV:
+				case IP_VS_TCP_S_SYNACK:
+					break;
+
+				case IP_VS_TCP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+
+				default:
+					continue;
+				}
+			} else {
+				if (!todrop_entry(cp))
+					continue;
+			}
+
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (cp->control) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(cp->control);
+			}
+		}
+		ct_write_unlock_bh(hash);
+	}
+}
+
+
+/*
+ *      Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+
+  flush_again:
+	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(idx);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (cp->control) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(cp->control);
+			}
+		}
+		ct_write_unlock_bh(idx);
+	}
+
+	/* the counter may be not NULL, because maybe some conn entries
+	   are run by slow timer handler or unhashed but still referred */
+	if (atomic_read(&ip_vs_conn_count) != 0) {
+		schedule();
+		goto flush_again;
+	}
+}
+
+
+int __init ip_vs_conn_init(void)
+{
+	int idx;
+
+	/*
+	 * Allocate the connection hash table and initialize its list heads
+	 */
+	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	if (!ip_vs_conn_tab)
+		return -ENOMEM;
+
+	/* Allocate ip_vs_conn slab cache */
+	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+					      sizeof(struct ip_vs_conn), 0,
+					      SLAB_HWCACHE_ALIGN, NULL);
+	if (!ip_vs_conn_cachep) {
+		vfree(ip_vs_conn_tab);
+		return -ENOMEM;
+	}
+
+	IP_VS_INFO("Connection hash table configured "
+		   "(size=%d, memory=%ldKbytes)\n",
+		   IP_VS_CONN_TAB_SIZE,
+		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
+		  sizeof(struct ip_vs_conn));
+
+	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+	}
+
+	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
+		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+	}
+
+	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+
+	/* calculate the random value for connection hash */
+	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+
+	return 0;
+}
+
+
+void ip_vs_conn_cleanup(void)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush();
+
+	/* Release the empty cache */
+	kmem_cache_destroy(ip_vs_conn_cachep);
+	proc_net_remove(&init_net, "ip_vs_conn");
+	proc_net_remove(&init_net, "ip_vs_conn_sync");
+	vfree(ip_vs_conn_tab);
+}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
new file mode 100644
index 00000000000..958abf3e5f8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -0,0 +1,1542 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ *	Paul `Rusty' Russell		properly handle non-linear skbs
+ *	Harald Welte			don't use nfcache
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
+#include <net/ip_vs.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_skb_replace);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
+#endif
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)          (((icmph)->un).echo.id)
+#define icmpv6_id(icmph)        (icmph->icmp6_dataun.u_echo.identifier)
+
+const char *ip_vs_proto_name(unsigned proto)
+{
+	static char buf[20];
+
+	switch (proto) {
+	case IPPROTO_IP:
+		return "IP";
+	case IPPROTO_UDP:
+		return "UDP";
+	case IPPROTO_TCP:
+		return "TCP";
+	case IPPROTO_ICMP:
+		return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+	case IPPROTO_ICMPV6:
+		return "ICMPv6";
+#endif
+	default:
+		sprintf(buf, "IP_%d", proto);
+		return buf;
+	}
+}
+
+void ip_vs_init_hash_table(struct list_head *table, int rows)
+{
+	while (--rows >= 0)
+		INIT_LIST_HEAD(&table[rows]);
+}
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.ustats.inpkts++;
+		dest->stats.ustats.inbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.ustats.inpkts++;
+		dest->svc->stats.ustats.inbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.ustats.inpkts++;
+		ip_vs_stats.ustats.inbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.ustats.outpkts++;
+		dest->stats.ustats.outbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.ustats.outpkts++;
+		dest->svc->stats.ustats.outbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.ustats.outpkts++;
+		ip_vs_stats.ustats.outbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+	spin_lock(&cp->dest->stats.lock);
+	cp->dest->stats.ustats.conns++;
+	spin_unlock(&cp->dest->stats.lock);
+
+	spin_lock(&svc->stats.lock);
+	svc->stats.ustats.conns++;
+	spin_unlock(&svc->stats.lock);
+
+	spin_lock(&ip_vs_stats.lock);
+	ip_vs_stats.ustats.conns++;
+	spin_unlock(&ip_vs_stats.lock);
+}
+
+
+static inline int
+ip_vs_set_state(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	if (unlikely(!pp->state_transition))
+		return 0;
+	return pp->state_transition(cp, direction, skb, pp);
+}
+
+
+/*
+ *  IPVS persistent scheduling function
+ *  It creates a connection entry according to its template if exists,
+ *  or selects a server and creates a connection entry plus a template.
+ *  Locking: we are svc user (svc->refcnt), so we hold all dests too
+ *  Protocols supported: TCP, UDP
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc,
+		    const struct sk_buff *skb,
+		    __be16 ports[2])
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *ct;
+	__be16  dport;			/* destination port to forward */
+	union nf_inet_addr snet;	/* source network of the client,
+					   after masking */
+
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+	/* Mask saddr with the netmask to adjust template granularity */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+	else
+#endif
+		snet.ip = iph.saddr.ip & svc->netmask;
+
+	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+		      "mnet %s\n",
+		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+		      IP_VS_DBG_ADDR(svc->af, &snet));
+
+	/*
+	 * As far as we know, FTP is a very complicated network protocol, and
+	 * it uses control connection and data connections. For active FTP,
+	 * FTP server initialize data connection to the client, its source port
+	 * is often 20. For passive FTP, FTP server tells the clients the port
+	 * that it passively listens to,  and the client issues the data
+	 * connection. In the tunneling or direct routing mode, the load
+	 * balancer is on the client-to-server half of connection, the port
+	 * number is unknown to the load balancer. So, a conn template like
+	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+	 * is created for other persistent services.
+	 */
+	if (ports[1] == svc->port) {
+		/* Check if a template already exists */
+		if (svc->port != FTPPORT)
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, ports[1]);
+		else
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * No template found or the dest of the connection
+			 * template is not available.
+			 */
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template like <protocol,caddr,0,
+			 * vaddr,vport,daddr,dport> for non-ftp service,
+			 * and <protocol,caddr,0,vaddr,0,daddr,0>
+			 * for ftp service.
+			 */
+			if (svc->port != FTPPORT)
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr,
+						    ports[1],
+						    &dest->addr, dest->port,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			else
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = dest->port;
+	} else {
+		/*
+		 * Note: persistent fwmark-based services and persistent
+		 * port zero service are handled here.
+		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
+		 */
+		if (svc->fwmark) {
+			union nf_inet_addr fwmark = {
+				.all = { 0, 0, 0, htonl(svc->fwmark) }
+			};
+
+			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
+					     &fwmark, 0);
+		} else
+			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+					     &iph.daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * If it is not persistent port zero, return NULL,
+			 * otherwise create a connection template.
+			 */
+			if (svc->port)
+				return NULL;
+
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template according to the service
+			 */
+			if (svc->fwmark) {
+				union nf_inet_addr fwmark = {
+					.all = { 0, 0, 0, htonl(svc->fwmark) }
+				};
+
+				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+						    &snet, 0,
+						    &fwmark, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			} else
+				ct = ip_vs_conn_new(svc->af, iph.protocol,
+						    &snet, 0,
+						    &iph.daddr, 0,
+						    &dest->addr, 0,
+						    IP_VS_CONN_F_TEMPLATE,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = ports[1];
+	}
+
+	/*
+	 *    Create a new connection according to the template
+	 */
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
+			    &iph.saddr, ports[0],
+			    &iph.daddr, ports[1],
+			    &dest->addr, dport,
+			    0,
+			    dest);
+	if (cp == NULL) {
+		ip_vs_conn_put(ct);
+		return NULL;
+	}
+
+	/*
+	 *    Add its control
+	 */
+	ip_vs_control_add(cp, ct);
+	ip_vs_conn_put(ct);
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  IPVS main scheduling function
+ *  It selects a server according to the virtual service, and
+ *  creates a connection entry.
+ *  Protocols supported: TCP, UDP
+ */
+struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct ip_vs_iphdr iph;
+	struct ip_vs_dest *dest;
+	__be16 _ports[2], *pptr;
+
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	/*
+	 *    Persistent service
+	 */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, pptr);
+
+	/*
+	 *    Non-persistent service
+	 */
+	if (!svc->fwmark && pptr[1] != svc->port) {
+		if (!svc->port)
+			IP_VS_ERR("Schedule: port zero only supported "
+				  "in persistent services, "
+				  "check your ipvs configuration\n");
+		return NULL;
+	}
+
+	dest = svc->scheduler->schedule(svc, skb);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "Schedule: no dest found.\n");
+		return NULL;
+	}
+
+	/*
+	 *    Create a connection entry.
+	 */
+	cp = ip_vs_conn_new(svc->af, iph.protocol,
+			    &iph.saddr, pptr[0],
+			    &iph.daddr, pptr[1],
+			    &dest->addr, dest->port ? dest->port : pptr[1],
+			    0,
+			    dest);
+	if (cp == NULL)
+		return NULL;
+
+	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+		      ip_vs_fwd_tag(cp),
+		      IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+		      cp->flags, atomic_read(&cp->refcnt));
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  Pass or drop the packet.
+ *  Called by ip_vs_in, when the virtual service is available but
+ *  no destination is available for a new connection.
+ */
+int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	__be16 _ports[2], *pptr;
+	struct ip_vs_iphdr iph;
+	int unicast;
+	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+
+	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		ip_vs_service_put(svc);
+		return NF_DROP;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+	else
+#endif
+		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
+	/* if it is fwmark-based service, the cache_bypass sysctl is up
+	   and the destination is a non-local unicast, then create
+	   a cache_bypass connection entry */
+	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+		int ret, cs;
+		struct ip_vs_conn *cp;
+		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
+
+		ip_vs_service_put(svc);
+
+		/* create a new connection entry */
+		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+		cp = ip_vs_conn_new(svc->af, iph.protocol,
+				    &iph.saddr, pptr[0],
+				    &iph.daddr, pptr[1],
+				    &daddr, 0,
+				    IP_VS_CONN_F_BYPASS,
+				    NULL);
+		if (cp == NULL)
+			return NF_DROP;
+
+		/* statistics */
+		ip_vs_in_stats(cp, skb);
+
+		/* set state */
+		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+
+		/* transmit the first SYN packet */
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+
+		atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * When the virtual ftp service is presented, packets destined
+	 * for other services on the VIP may get here (except services
+	 * listed in the ipvs table), pass the packets, because it is
+	 * not ipvs job to decide to drop the packets.
+	 */
+	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
+		ip_vs_service_put(svc);
+		return NF_ACCEPT;
+	}
+
+	ip_vs_service_put(svc);
+
+	/*
+	 * Notify the client that the destination is unreachable, and
+	 * release the socket buffer.
+	 * Since it is in IP layer, the TCP socket is not actually
+	 * created, the TCP RST packet cannot be sent, instead that
+	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6)
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+			    skb->dev);
+	else
+#endif
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+	return NF_DROP;
+}
+
+
+/*
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
+ *      chain, and is used for VS/NAT.
+ *      It detects packets for VS/NAT connections and sends the packets
+ *      immediately. This can avoid that iptable_nat mangles the packets
+ *      for VS/NAT.
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	if (!skb->ipvs_property)
+		return NF_ACCEPT;
+	/* The packet was sent from IPVS, exit this chain */
+	return NF_STOP;
+}
+
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+{
+	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+}
+
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	int err = ip_defrag(skb, user);
+
+	if (!err)
+		ip_send_check(ip_hdr(skb));
+
+	return err;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+	/* TODO IPv6: Find out what to do here for IPv6 */
+	return 0;
+}
+#endif
+
+/*
+ * Packet has been made sufficiently writable in caller
+ * - inout: 1=in->out, 0=out->in
+ */
+void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct iphdr *iph	 = ip_hdr(skb);
+	unsigned int icmp_offset = iph->ihl*4;
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.ip;
+		ip_send_check(iph);
+		ciph->daddr = cp->vaddr.ip;
+		ip_send_check(ciph);
+	} else {
+		iph->daddr = cp->daddr.ip;
+		ip_send_check(iph);
+		ciph->saddr = cp->daddr.ip;
+		ip_send_check(ciph);
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+		__be16 *ports = (void *)ciph + ciph->ihl*4;
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMP");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMP");
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct ipv6hdr *iph	 = ipv6_hdr(skb);
+	unsigned int icmp_offset = sizeof(struct ipv6hdr);
+	struct icmp6hdr *icmph	 = (struct icmp6hdr *)(skb_network_header(skb) +
+						      icmp_offset);
+	struct ipv6hdr *ciph	 = (struct ipv6hdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr.in6;
+		ciph->daddr = cp->vaddr.in6;
+	} else {
+		iph->daddr = cp->daddr.in6;
+		ciph->saddr = cp->daddr.in6;
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->icmp6_cksum = 0;
+	/* TODO IPv6: is this correct for ICMPv6? */
+	ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMPv6");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+				union nf_inet_addr *snet,
+				__u8 protocol, struct ip_vs_conn *cp,
+				struct ip_vs_protocol *pp,
+				unsigned int offset, unsigned int ihl)
+{
+	unsigned int verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the "
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+			      IP_VS_DBG_ADDR(af, snet));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+		offset += 2 * sizeof(__u16);
+	if (!skb_make_writable(skb, offset))
+		goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+	else
+#endif
+		ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+/*
+ *	Handle ICMP messages in the inside-to-outside direction (outgoing).
+ *	Find any that might be relevant, check against existing connections.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+
+	offset += cih->ihl * 4;
+
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	snet.ip = iph->saddr;
+	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+				    pp, offset, ihl);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	ipv6_addr_copy(&snet.in6, &iph->saddr);
+	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+				    pp, offset, sizeof(struct ipv6hdr));
+}
+#endif
+
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
+{
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+	return th->rst;
+}
+
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		struct ip_vs_conn *cp, int ihl)
+{
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+	if (!skb_make_writable(skb, ihl))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+		goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (ip6_route_me_harder(skb) != 0)
+			goto drop;
+	} else
+#endif
+		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			goto drop;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_conn_put(cp);
+
+	skb->ipvs_property = 1;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
+ *	Check if outgoing packet belongs to the established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
+	  const struct net_device *in, const struct net_device *out,
+	  int (*okfn)(struct sk_buff *))
+{
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int af;
+
+	EnterFunction(11);
+
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+	if (skb->ipvs_property)
+		return NF_ACCEPT;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+			int related, verdict = ip_vs_out_icmp(skb, &related);
+
+			if (related)
+				return verdict;
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	/* reassemble IP fragments */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+			if (related)
+				return verdict;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+	} else
+#endif
+		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+			     !pp->dont_defrag)) {
+			if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+				return NF_STOLEN;
+
+			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+		}
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+
+	if (unlikely(!cp)) {
+		if (sysctl_ip_vs_nat_icmp_send &&
+		    (pp->protocol == IPPROTO_TCP ||
+		     pp->protocol == IPPROTO_UDP)) {
+			__be16 _ports[2], *pptr;
+
+			pptr = skb_header_pointer(skb, iph.len,
+						  sizeof(_ports), _ports);
+			if (pptr == NULL)
+				return NF_ACCEPT;	/* Not for me */
+			if (ip_vs_lookup_real_service(af, iph.protocol,
+						      &iph.saddr,
+						      pptr[0])) {
+				/*
+				 * Notify the real server: there is no
+				 * existing entry if it is not RST
+				 * packet or not TCP packet.
+				 */
+				if (iph.protocol != IPPROTO_TCP
+				    || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+					if (af == AF_INET6)
+						icmpv6_send(skb,
+							    ICMPV6_DEST_UNREACH,
+							    ICMPV6_PORT_UNREACH,
+							    0, skb->dev);
+					else
+#endif
+						icmp_send(skb,
+							  ICMP_DEST_UNREACH,
+							  ICMP_PORT_UNREACH, 0);
+					return NF_DROP;
+				}
+			}
+		}
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	return handle_response(af, skb, pp, cp, iph.len);
+}
+
+
+/*
+ *	Handle ICMP messages in the outside-to-inside direction (incoming).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl, verdict;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
+					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
+			return NF_STOLEN;
+	}
+
+	iph = ip_hdr(skb);
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+
+	offset += cih->ihl * 4;
+
+	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+		if (cp) {
+			snet.ip = iph->saddr;
+			return handle_response_icmp(AF_INET, skb, &snet,
+						    cih->protocol, cp, pp,
+						    offset, ihl);
+		}
+		return NF_ACCEPT;
+	}
+
+	verdict = NF_DROP;
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+  out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+	struct ipv6hdr *iph;
+	struct icmp6hdr	_icmph, *ic;
+	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
+					   within the ICMP */
+	struct ip_vs_iphdr ciph;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, verdict;
+	union nf_inet_addr snet;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+		if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+					       IP_DEFRAG_VS_IN :
+					       IP_DEFRAG_VS_FWD))
+			return NF_STOLEN;
+	}
+
+	iph = ipv6_hdr(skb);
+	offset = sizeof(struct ipv6hdr);
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
+		  NIP6(iph->saddr), NIP6(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+	    (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+	    (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->nexthdr);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	/* TODO: we don't support fragmentation at the moment anyways */
+	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+	offset += sizeof(struct ipv6hdr);
+
+	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+		if (cp) {
+			ipv6_addr_copy(&snet.in6, &iph->saddr);
+			return handle_response_icmp(AF_INET6, skb, &snet,
+						    cih->nexthdr,
+						    cp, pp, offset,
+						    sizeof(struct ipv6hdr));
+		}
+		return NF_ACCEPT;
+	}
+
+	verdict = NF_DROP;
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+#endif
+
+
+/*
+ *	Check if it's for virtual services, look it up,
+ *	and send it on its way...
+ */
+static unsigned int
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
+	 const struct net_device *in, const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	struct ip_vs_iphdr iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int ret, restart, af;
+
+	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	/*
+	 *	Big tappo: only PACKET_HOST, including loopback for local client
+	 *	Don't handle local packets on IPv6 for now
+	 */
+	if (unlikely(skb->pkt_type != PACKET_HOST)) {
+		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+			      skb->pkt_type,
+			      iph.protocol,
+			      IP_VS_DBG_ADDR(af, &iph.daddr));
+		return NF_ACCEPT;
+	}
+
+	if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
+
+		if (related)
+			return verdict;
+		ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+	}
+
+	/* Protocol supported? */
+	pp = ip_vs_proto_get(iph.protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	/*
+	 * Check if the packet belongs to an existing connection entry
+	 */
+	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+
+	if (unlikely(!cp)) {
+		int v;
+
+		/* For local client packets, it could be a response */
+		cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+		if (cp)
+			return handle_response(af, skb, pp, cp, iph.len);
+
+		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+			return v;
+	}
+
+	if (unlikely(!cp)) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+
+	/* Check the server status */
+	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		/* the destination server is not available */
+
+		if (sysctl_ip_vs_expire_nodest_conn) {
+			/* try to expire the connection immediately */
+			ip_vs_conn_expire_now(cp);
+		}
+		/* don't restart its timer, and silently
+		   drop the packet. */
+		__ip_vs_conn_put(cp);
+		return NF_DROP;
+	}
+
+	ip_vs_in_stats(cp, skb);
+	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+	if (cp->packet_xmit)
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+	else {
+		IP_VS_DBG_RL("warning: packet_xmit is null");
+		ret = NF_ACCEPT;
+	}
+
+	/* Increase its packet counter and check if it is needed
+	 * to be synchronized
+	 *
+	 * Sync connection if it is about to close to
+	 * encorage the standby servers to update the connections timeout
+	 */
+	atomic_inc(&cp->in_pkts);
+	if (af == AF_INET &&
+	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    (((cp->protocol != IPPROTO_TCP ||
+	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+	       == sysctl_ip_vs_sync_threshold[0])) ||
+	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
+		ip_vs_sync_conn(cp);
+	cp->old_state = cp->state;
+
+	ip_vs_conn_put(cp);
+	return ret;
+}
+
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
+ *      related packets destined for 0.0.0.0/0.
+ *      When fwmark-based virtual service is used, such as transparent
+ *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
+ *      and send them to ip_vs_in_icmp.
+ */
+static unsigned int
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+		   const struct net_device *in, const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp(skb, &r, hooknum);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+		      const struct net_device *in, const struct net_device *out,
+		      int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
+
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP_PRI_NAT_SRC-1,
+	},
+#ifdef CONFIG_IP_VS_IPV6
+	/* After packet filtering, forward packet through VS/DR, VS/TUN,
+	 * or VS/NAT(change destination), so that filtering rules can be
+	 * applied to IPVS. */
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_out,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 100,
+	},
+	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+	{
+		.hook		= ip_vs_forward_icmp_v6,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_FORWARD,
+		.priority       = 99,
+	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP6_PRI_NAT_SRC-1,
+	},
+#endif
+};
+
+
+/*
+ *	Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+	int ret;
+
+	ip_vs_estimator_init();
+
+	ret = ip_vs_control_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup control.\n");
+		goto cleanup_estimator;
+	}
+
+	ip_vs_protocol_init();
+
+	ret = ip_vs_app_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup application helper.\n");
+		goto cleanup_protocol;
+	}
+
+	ret = ip_vs_conn_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup connection table.\n");
+		goto cleanup_app;
+	}
+
+	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	if (ret < 0) {
+		IP_VS_ERR("can't register hooks.\n");
+		goto cleanup_conn;
+	}
+
+	IP_VS_INFO("ipvs loaded.\n");
+	return ret;
+
+  cleanup_conn:
+	ip_vs_conn_cleanup();
+  cleanup_app:
+	ip_vs_app_cleanup();
+  cleanup_protocol:
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+  cleanup_estimator:
+	ip_vs_estimator_cleanup();
+	return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_conn_cleanup();
+	ip_vs_app_cleanup();
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+	ip_vs_estimator_cleanup();
+	IP_VS_INFO("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
new file mode 100644
index 00000000000..0302cf3e503
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -0,0 +1,3443 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/swap.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/mutex.h>
+
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+#include <asm/uaccess.h>
+
+#include <net/ip_vs.h>
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DEFINE_MUTEX(__ip_vs_mutex);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_svc_lock);
+
+/* lock for table with the real services */
+static DEFINE_RWLOCK(__ip_vs_rs_lock);
+
+/* lock for state and timeout tables */
+static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
+
+/* lock for drop entry handling */
+static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
+
+/* lock for drop packet handling */
+static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
+
+/* 1/rate drop and drop-entry variables */
+int ip_vs_drop_rate = 0;
+int ip_vs_drop_counter = 0;
+static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+
+/* number of virtual services */
+static int ip_vs_num_services = 0;
+
+/* sysctl variables */
+static int sysctl_ip_vs_drop_entry = 0;
+static int sysctl_ip_vs_drop_packet = 0;
+static int sysctl_ip_vs_secure_tcp = 0;
+static int sysctl_ip_vs_amemthresh = 1024;
+static int sysctl_ip_vs_am_droprate = 10;
+int sysctl_ip_vs_cache_bypass = 0;
+int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_expire_quiescent_template = 0;
+int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+int sysctl_ip_vs_nat_icmp_send = 0;
+
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+	return sysctl_ip_vs_debug_level;
+}
+#endif
+
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+	struct rt6_info *rt;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *addr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+			return 1;
+
+	return 0;
+}
+#endif
+/*
+ *	update_defense_level is called from keventd and from sysctl,
+ *	so it needs to protect itself from softirqs
+ */
+static void update_defense_level(void)
+{
+	struct sysinfo i;
+	static int old_secure_tcp = 0;
+	int availmem;
+	int nomem;
+	int to_change = -1;
+
+	/* we only count free and buffered memory (in pages) */
+	si_meminfo(&i);
+	availmem = i.freeram + i.bufferram;
+	/* however in linux 2.5 the i.bufferram is total page cache size,
+	   we need adjust it */
+	/* si_swapinfo(&i); */
+	/* availmem = availmem - (i.totalswap - i.freeswap); */
+
+	nomem = (availmem < sysctl_ip_vs_amemthresh);
+
+	local_bh_disable();
+
+	/* drop_entry */
+	spin_lock(&__ip_vs_dropentry_lock);
+	switch (sysctl_ip_vs_drop_entry) {
+	case 0:
+		atomic_set(&ip_vs_dropentry, 0);
+		break;
+	case 1:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+			sysctl_ip_vs_drop_entry = 2;
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+			sysctl_ip_vs_drop_entry = 1;
+		};
+		break;
+	case 3:
+		atomic_set(&ip_vs_dropentry, 1);
+		break;
+	}
+	spin_unlock(&__ip_vs_dropentry_lock);
+
+	/* drop_packet */
+	spin_lock(&__ip_vs_droppacket_lock);
+	switch (sysctl_ip_vs_drop_packet) {
+	case 0:
+		ip_vs_drop_rate = 0;
+		break;
+	case 1:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+			sysctl_ip_vs_drop_packet = 2;
+		} else {
+			ip_vs_drop_rate = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+		} else {
+			ip_vs_drop_rate = 0;
+			sysctl_ip_vs_drop_packet = 1;
+		}
+		break;
+	case 3:
+		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		break;
+	}
+	spin_unlock(&__ip_vs_droppacket_lock);
+
+	/* secure_tcp */
+	write_lock(&__ip_vs_securetcp_lock);
+	switch (sysctl_ip_vs_secure_tcp) {
+	case 0:
+		if (old_secure_tcp >= 2)
+			to_change = 0;
+		break;
+	case 1:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+			sysctl_ip_vs_secure_tcp = 2;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+			sysctl_ip_vs_secure_tcp = 1;
+		}
+		break;
+	case 3:
+		if (old_secure_tcp < 2)
+			to_change = 1;
+		break;
+	}
+	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	if (to_change >= 0)
+		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+	write_unlock(&__ip_vs_securetcp_lock);
+
+	local_bh_enable();
+}
+
+
+/*
+ *	Timer for checking the defense
+ */
+#define DEFENSE_TIMER_PERIOD	1*HZ
+static void defense_work_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
+
+static void defense_work_handler(struct work_struct *work)
+{
+	update_defense_level();
+	if (atomic_read(&ip_vs_dropentry))
+		ip_vs_random_dropentry();
+
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+}
+
+int
+ip_vs_use_count_inc(void)
+{
+	return try_module_get(THIS_MODULE);
+}
+
+void
+ip_vs_use_count_dec(void)
+{
+	module_put(THIS_MODULE);
+}
+
+
+/*
+ *	Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+/*
+ *	Hash table: for real service lookups
+ */
+#define IP_VS_RTAB_BITS 4
+#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
+
+/*
+ *	Trash for destinations
+ */
+static LIST_HEAD(ip_vs_dest_trash);
+
+/*
+ *	FTP & NULL virtual service counters
+ */
+static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
+static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
+
+
+/*
+ *	Returns hash value for virtual service
+ */
+static __inline__ unsigned
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+		  __be16 port)
+{
+	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+		& IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Returns hash value of fwmark for virtual service lookup
+ */
+static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+{
+	return fwmark & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *	or in the ip_vs_svc_fwm_table by fwmark.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+	unsigned hash;
+
+	if (svc->flags & IP_VS_SVC_F_HASHED) {
+		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 */
+		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
+					 svc->port);
+		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+	} else {
+		/*
+		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 */
+		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+	}
+
+	svc->flags |= IP_VS_SVC_F_HASHED;
+	/* increase its refcnt because it is referenced by the svc table */
+	atomic_inc(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/* Remove it from the ip_vs_svc_table table */
+		list_del(&svc->s_list);
+	} else {
+		/* Remove it from the ip_vs_svc_fwm_table table */
+		list_del(&svc->f_list);
+	}
+
+	svc->flags &= ~IP_VS_SVC_F_HASHED;
+	atomic_dec(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Get service by {proto,addr,port} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+		    __be16 vport)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+
+	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+		if ((svc->af == af)
+		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
+		    && (svc->port == vport)
+		    && (svc->protocol == protocol)) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Get service by {fwmark} in the service table.
+ */
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for fwmark addressed entries */
+	hash = ip_vs_svc_fwm_hashkey(fwmark);
+
+	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+		if (svc->fwmark == fwmark && svc->af == af) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+struct ip_vs_service *
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+		  const union nf_inet_addr *vaddr, __be16 vport)
+{
+	struct ip_vs_service *svc;
+
+	read_lock(&__ip_vs_svc_lock);
+
+	/*
+	 *	Check the table hashed by fwmark first
+	 */
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
+		goto out;
+
+	/*
+	 *	Check the table hashed by <protocol,addr,port>
+	 *	for "full" addressed entries
+	 */
+	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
+
+	if (svc == NULL
+	    && protocol == IPPROTO_TCP
+	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+		/*
+		 * Check if ftp service entry exists, the packet
+		 * might belong to FTP data connections.
+		 */
+		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
+	}
+
+	if (svc == NULL
+	    && atomic_read(&ip_vs_nullsvc_counter)) {
+		/*
+		 * Check if the catch-all port (port zero) exists
+		 */
+		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
+	}
+
+  out:
+	read_unlock(&__ip_vs_svc_lock);
+
+	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+		      fwmark, ip_vs_proto_name(protocol),
+		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+		      svc ? "hit" : "not hit");
+
+	return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	atomic_inc(&svc->refcnt);
+	dest->svc = svc;
+}
+
+static inline void
+__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+{
+	struct ip_vs_service *svc = dest->svc;
+
+	dest->svc = NULL;
+	if (atomic_dec_and_test(&svc->refcnt))
+		kfree(svc);
+}
+
+
+/*
+ *	Returns hash value for real service
+ */
+static inline unsigned ip_vs_rs_hashkey(int af,
+					    const union nf_inet_addr *addr,
+					    __be16 port)
+{
+	register unsigned porth = ntohs(port);
+	__be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		addr_fold = addr->ip6[0]^addr->ip6[1]^
+			    addr->ip6[2]^addr->ip6[3];
+#endif
+
+	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
+		& IP_VS_RTAB_MASK;
+}
+
+/*
+ *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+{
+	unsigned hash;
+
+	if (!list_empty(&dest->d_list)) {
+		return 0;
+	}
+
+	/*
+	 *	Hash by proto,addr,port,
+	 *	which are the parameters of the real service.
+	 */
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
+	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+
+	return 1;
+}
+
+/*
+ *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+	/*
+	 * Remove it from the ip_vs_rtable table.
+	 */
+	if (!list_empty(&dest->d_list)) {
+		list_del(&dest->d_list);
+		INIT_LIST_HEAD(&dest->d_list);
+	}
+
+	return 1;
+}
+
+/*
+ *	Lookup real service by <proto,addr,port> in the real service table.
+ */
+struct ip_vs_dest *
+ip_vs_lookup_real_service(int af, __u16 protocol,
+			  const union nf_inet_addr *daddr,
+			  __be16 dport)
+{
+	unsigned hash;
+	struct ip_vs_dest *dest;
+
+	/*
+	 *	Check for "full" addressed entries
+	 *	Return the first found entry
+	 */
+	hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+	read_lock(&__ip_vs_rs_lock);
+	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+		if ((dest->af == af)
+		    && ip_vs_addr_equal(af, &dest->addr, daddr)
+		    && (dest->port == dport)
+		    && ((dest->protocol == protocol) ||
+			dest->vfwmark)) {
+			/* HIT */
+			read_unlock(&__ip_vs_rs_lock);
+			return dest;
+		}
+	}
+	read_unlock(&__ip_vs_rs_lock);
+
+	return NULL;
+}
+
+/*
+ *	Lookup destination by {addr,port} in the given service
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		  __be16 dport)
+{
+	struct ip_vs_dest *dest;
+
+	/*
+	 * Find the destination for the given service
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->af == svc->af)
+		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+		    && (dest->port == dport)) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+				   __be16 dport,
+				   const union nf_inet_addr *vaddr,
+				   __be16 vport, __u16 protocol)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_service *svc;
+
+	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+	if (!svc)
+		return NULL;
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest)
+		atomic_inc(&dest->refcnt);
+	ip_vs_service_put(svc);
+	return dest;
+}
+
+/*
+ *  Lookup dest by {svc,addr,port} in the destination trash.
+ *  The destination trash is used to hold the destinations that are removed
+ *  from the service table but are still referenced by some conn entries.
+ *  The reason to add the destination trash is when the dest is temporary
+ *  down (either by administrator or by monitor program), the dest can be
+ *  picked back from the trash, the remaining connections to the dest can
+ *  continue, and the counting information of the dest is also useful for
+ *  scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+		     __be16 dport)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	/*
+	 * Find the destination in trash
+	 */
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+			      "dest->refcnt=%d\n",
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		if (dest->af == svc->af &&
+		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+		    dest->port == dport &&
+		    dest->vfwmark == svc->fwmark &&
+		    dest->protocol == svc->protocol &&
+		    (svc->fwmark ||
+		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
+		      dest->vport == svc->port))) {
+			/* HIT */
+			return dest;
+		}
+
+		/*
+		 * Try to purge the destination from trash if not referenced
+		 */
+		if (atomic_read(&dest->refcnt) == 1) {
+			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+				      "from trash\n",
+				      dest->vfwmark,
+				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+				      ntohs(dest->port));
+			list_del(&dest->n_list);
+			ip_vs_dst_reset(dest);
+			__ip_vs_unbind_svc(dest);
+			kfree(dest);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Clean up all the destinations in the trash
+ *  Called by the ip_vs_control_cleanup()
+ *
+ *  When the ip_vs_control_clearup is activated by ipvs module exit,
+ *  the service tables must have been flushed and all the connections
+ *  are expired, and the refcnt of each destination in the trash must
+ *  be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(void)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		list_del(&dest->n_list);
+		ip_vs_dst_reset(dest);
+		__ip_vs_unbind_svc(dest);
+		kfree(dest);
+	}
+}
+
+
+static void
+ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+	spin_lock_bh(&stats->lock);
+
+	memset(&stats->ustats, 0, sizeof(stats->ustats));
+	ip_vs_zero_estimator(stats);
+
+	spin_unlock_bh(&stats->lock);
+}
+
+/*
+ *	Update a destination in the given service
+ */
+static void
+__ip_vs_update_dest(struct ip_vs_service *svc,
+		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
+{
+	int conn_flags;
+
+	/* set the weight and the flags */
+	atomic_set(&dest->weight, udest->weight);
+	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+	/* check if local node and update the flags */
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
+	} else
+#endif
+		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+				| IP_VS_CONN_F_LOCALNODE;
+		}
+
+	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+	} else {
+		/*
+		 *    Put the real service in ip_vs_rtable if not present.
+		 *    For now only for NAT!
+		 */
+		write_lock_bh(&__ip_vs_rs_lock);
+		ip_vs_rs_hash(dest);
+		write_unlock_bh(&__ip_vs_rs_lock);
+	}
+	atomic_set(&dest->conn_flags, conn_flags);
+
+	/* bind the service */
+	if (!dest->svc) {
+		__ip_vs_bind_svc(dest, svc);
+	} else {
+		if (dest->svc != svc) {
+			__ip_vs_unbind_svc(dest);
+			ip_vs_zero_stats(&dest->stats);
+			__ip_vs_bind_svc(dest, svc);
+		}
+	}
+
+	/* set the dest status flags */
+	dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
+		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	dest->u_threshold = udest->u_threshold;
+	dest->l_threshold = udest->l_threshold;
+}
+
+
+/*
+ *	Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
+	       struct ip_vs_dest **dest_p)
+{
+	struct ip_vs_dest *dest;
+	unsigned atype;
+
+	EnterFunction(2);
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (svc->af == AF_INET6) {
+		atype = ipv6_addr_type(&udest->addr.in6);
+		if ((!(atype & IPV6_ADDR_UNICAST) ||
+			atype & IPV6_ADDR_LINKLOCAL) &&
+			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			return -EINVAL;
+	} else
+#endif
+	{
+		atype = inet_addr_type(&init_net, udest->addr.ip);
+		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+			return -EINVAL;
+	}
+
+	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+	if (dest == NULL) {
+		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+		return -ENOMEM;
+	}
+
+	dest->af = svc->af;
+	dest->protocol = svc->protocol;
+	dest->vaddr = svc->addr;
+	dest->vport = svc->port;
+	dest->vfwmark = svc->fwmark;
+	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+	dest->port = udest->port;
+
+	atomic_set(&dest->activeconns, 0);
+	atomic_set(&dest->inactconns, 0);
+	atomic_set(&dest->persistconns, 0);
+	atomic_set(&dest->refcnt, 0);
+
+	INIT_LIST_HEAD(&dest->d_list);
+	spin_lock_init(&dest->dst_lock);
+	spin_lock_init(&dest->stats.lock);
+	__ip_vs_update_dest(svc, dest, udest);
+	ip_vs_new_estimator(&dest->stats);
+
+	*dest_p = dest;
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+/*
+ *	Add a destination into an existing service
+ */
+static int
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+	int ret;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+	/*
+	 * Check if the dest already exists in the list
+	 */
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+	if (dest != NULL) {
+		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+		return -EEXIST;
+	}
+
+	/*
+	 * Check if the dest already exists in the trash and
+	 * is from the same service
+	 */
+	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
+	if (dest != NULL) {
+		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+			      "dest->refcnt=%d, service %u/%s:%u\n",
+			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+			      atomic_read(&dest->refcnt),
+			      dest->vfwmark,
+			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+			      ntohs(dest->vport));
+
+		__ip_vs_update_dest(svc, dest, udest);
+
+		/*
+		 * Get the destination from the trash
+		 */
+		list_del(&dest->n_list);
+
+		ip_vs_new_estimator(&dest->stats);
+
+		write_lock_bh(&__ip_vs_svc_lock);
+
+		/*
+		 * Wait until all other svc users go away.
+		 */
+		IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+		list_add(&dest->n_list, &svc->destinations);
+		svc->num_dests++;
+
+		/* call the update_service function of its scheduler */
+		if (svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
+
+		write_unlock_bh(&__ip_vs_svc_lock);
+		return 0;
+	}
+
+	/*
+	 * Allocate and initialize the dest structure
+	 */
+	ret = ip_vs_new_dest(svc, udest, &dest);
+	if (ret) {
+		return ret;
+	}
+
+	/*
+	 * Add the dest entry into the list
+	 */
+	atomic_inc(&dest->refcnt);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	list_add(&dest->n_list, &svc->destinations);
+	svc->num_dests++;
+
+	/* call the update_service function of its scheduler */
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Edit a destination in the given service
+ */
+static int
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	union nf_inet_addr daddr;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
+	/*
+	 *  Lookup the destination list
+	 */
+	dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+		return -ENOENT;
+	}
+
+	__ip_vs_update_dest(svc, dest, udest);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/* Wait until all other svc users go away */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/* call the update_service, because server weight may be changed */
+	if (svc->scheduler->update_service)
+		svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+{
+	ip_vs_kill_estimator(&dest->stats);
+
+	/*
+	 *  Remove it from the d-linked list with the real services.
+	 */
+	write_lock_bh(&__ip_vs_rs_lock);
+	ip_vs_rs_unhash(dest);
+	write_unlock_bh(&__ip_vs_rs_lock);
+
+	/*
+	 *  Decrease the refcnt of the dest, and free the dest
+	 *  if nobody refers to it (refcnt=0). Otherwise, throw
+	 *  the destination into the trash.
+	 */
+	if (atomic_dec_and_test(&dest->refcnt)) {
+		ip_vs_dst_reset(dest);
+		/* simply decrease svc->refcnt here, let the caller check
+		   and release the service if nobody refers to it.
+		   Only user context can release destination and service,
+		   and only one user context can update virtual service at a
+		   time, so the operation here is OK */
+		atomic_dec(&dest->svc->refcnt);
+		kfree(dest);
+	} else {
+		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+			      "dest->refcnt=%d\n",
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		list_add(&dest->n_list, &ip_vs_dest_trash);
+		atomic_inc(&dest->refcnt);
+	}
+}
+
+
+/*
+ *	Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				int svcupd)
+{
+	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+	/*
+	 *  Remove it from the d-linked destination list.
+	 */
+	list_del(&dest->n_list);
+	svc->num_dests--;
+
+	/*
+	 *  Call the update_service function of its scheduler
+	 */
+	if (svcupd && svc->scheduler->update_service)
+			svc->scheduler->update_service(svc);
+}
+
+
+/*
+ *	Delete a destination server in the given service
+ */
+static int
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+{
+	struct ip_vs_dest *dest;
+	__be16 dport = udest->port;
+
+	EnterFunction(2);
+
+	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+		return -ENOENT;
+	}
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 *	Unlink dest from the service
+	 */
+	__ip_vs_unlink_dest(svc, dest, 1);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Delete the destination
+	 */
+	__ip_vs_del_dest(dest);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+		  struct ip_vs_service **svc_p)
+{
+	int ret = 0;
+	struct ip_vs_scheduler *sched = NULL;
+	struct ip_vs_service *svc = NULL;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	/* Lookup the scheduler by 'u->sched_name' */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		ret = -ENOENT;
+		goto out_mod_dec;
+	}
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = -EAFNOSUPPORT;
+			goto out_err;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = -EINVAL;
+			goto out_err;
+		}
+	}
+#endif
+
+	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+	if (svc == NULL) {
+		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	/* I'm the first user of the service */
+	atomic_set(&svc->usecnt, 1);
+	atomic_set(&svc->refcnt, 0);
+
+	svc->af = u->af;
+	svc->protocol = u->protocol;
+	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
+	svc->port = u->port;
+	svc->fwmark = u->fwmark;
+	svc->flags = u->flags;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	INIT_LIST_HEAD(&svc->destinations);
+	rwlock_init(&svc->sched_lock);
+	spin_lock_init(&svc->stats.lock);
+
+	/* Bind the scheduler */
+	ret = ip_vs_bind_scheduler(svc, sched);
+	if (ret)
+		goto out_err;
+	sched = NULL;
+
+	/* Update the virtual service counters */
+	if (svc->port == FTPPORT)
+		atomic_inc(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_inc(&ip_vs_nullsvc_counter);
+
+	ip_vs_new_estimator(&svc->stats);
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services++;
+
+	/* Hash the service into the service table */
+	write_lock_bh(&__ip_vs_svc_lock);
+	ip_vs_svc_hash(svc);
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	*svc_p = svc;
+	return 0;
+
+  out_err:
+	if (svc != NULL) {
+		if (svc->scheduler)
+			ip_vs_unbind_scheduler(svc);
+		if (svc->inc) {
+			local_bh_disable();
+			ip_vs_app_inc_put(svc->inc);
+			local_bh_enable();
+		}
+		kfree(svc);
+	}
+	ip_vs_scheduler_put(sched);
+
+  out_mod_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+/*
+ *	Edit a service and bind it with a new scheduler
+ */
+static int
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
+{
+	struct ip_vs_scheduler *sched, *old_sched;
+	int ret = 0;
+
+	/*
+	 * Lookup the scheduler, by 'u->sched_name'
+	 */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		return -ENOENT;
+	}
+	old_sched = sched;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (u->af == AF_INET6) {
+		if (!sched->supports_ipv6) {
+			ret = -EAFNOSUPPORT;
+			goto out;
+		}
+		if ((u->netmask < 1) || (u->netmask > 128)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+#endif
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 * Set the flags and timeout value
+	 */
+	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	old_sched = svc->scheduler;
+	if (sched != old_sched) {
+		/*
+		 * Unbind the old scheduler
+		 */
+		if ((ret = ip_vs_unbind_scheduler(svc))) {
+			old_sched = sched;
+			goto out_unlock;
+		}
+
+		/*
+		 * Bind the new scheduler
+		 */
+		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
+			/*
+			 * If ip_vs_bind_scheduler fails, restore the old
+			 * scheduler.
+			 * The main reason of failure is out of memory.
+			 *
+			 * The question is if the old scheduler can be
+			 * restored all the time. TODO: if it cannot be
+			 * restored some time, we must delete the service,
+			 * otherwise the system may crash.
+			 */
+			ip_vs_bind_scheduler(svc, old_sched);
+			old_sched = sched;
+			goto out_unlock;
+		}
+	}
+
+  out_unlock:
+	write_unlock_bh(&__ip_vs_svc_lock);
+#ifdef CONFIG_IP_VS_IPV6
+  out:
+#endif
+
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	return ret;
+}
+
+
+/*
+ *	Delete a service from the service list
+ *	- The service must be unlinked, unlocked and not referenced!
+ *	- We are called under _bh lock
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest, *nxt;
+	struct ip_vs_scheduler *old_sched;
+
+	/* Count only IPv4 services for old get/setsockopt interface */
+	if (svc->af == AF_INET)
+		ip_vs_num_services--;
+
+	ip_vs_kill_estimator(&svc->stats);
+
+	/* Unbind scheduler */
+	old_sched = svc->scheduler;
+	ip_vs_unbind_scheduler(svc);
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	/* Unbind app inc */
+	if (svc->inc) {
+		ip_vs_app_inc_put(svc->inc);
+		svc->inc = NULL;
+	}
+
+	/*
+	 *    Unlink the whole destination list
+	 */
+	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
+		__ip_vs_unlink_dest(svc, dest, 0);
+		__ip_vs_del_dest(dest);
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_dec(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_dec(&ip_vs_nullsvc_counter);
+
+	/*
+	 *    Free the service if nobody refers to it
+	 */
+	if (atomic_read(&svc->refcnt) == 0)
+		kfree(svc);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+/*
+ *	Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+	if (svc == NULL)
+		return -EEXIST;
+
+	/*
+	 * Unhash it from the service table
+	 */
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	ip_vs_svc_unhash(svc);
+
+	/*
+	 * Wait until all the svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	__ip_vs_del_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	return 0;
+}
+
+
+/*
+ *	Flush all the virtual services
+ */
+static int ip_vs_flush(void)
+{
+	int idx;
+	struct ip_vs_service *svc, *nxt;
+
+	/*
+	 * Flush the service table hashed by <protocol,addr,port>
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	/*
+	 * Flush the service table hashed by fwmark
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt,
+					 &ip_vs_svc_fwm_table[idx], f_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Zero counters in a service or all services
+ */
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+
+	write_lock_bh(&__ip_vs_svc_lock);
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		ip_vs_zero_stats(&dest->stats);
+	}
+	ip_vs_zero_stats(&svc->stats);
+	write_unlock_bh(&__ip_vs_svc_lock);
+	return 0;
+}
+
+static int ip_vs_zero_all(void)
+{
+	int idx;
+	struct ip_vs_service *svc;
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	ip_vs_zero_stats(&ip_vs_stats);
+	return 0;
+}
+
+
+static int
+proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 3)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			update_defense_level();
+		}
+	}
+	return rc;
+}
+
+
+static int
+proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+		       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val[2];
+	int rc;
+
+	/* backup the value first */
+	memcpy(val, valp, sizeof(val));
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+		/* Restore the correct value */
+		memcpy(valp, val, sizeof(val));
+	}
+	return rc;
+}
+
+
+/*
+ *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ */
+
+static struct ctl_table vs_vars[] = {
+	{
+		.procname	= "amemthresh",
+		.data		= &sysctl_ip_vs_amemthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "am_droprate",
+		.data		= &sysctl_ip_vs_am_droprate,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "drop_entry",
+		.data		= &sysctl_ip_vs_drop_entry,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.procname	= "drop_packet",
+		.data		= &sysctl_ip_vs_drop_packet,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.procname	= "secure_tcp",
+		.data		= &sysctl_ip_vs_secure_tcp,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+#if 0
+	{
+		.procname	= "timeout_established",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synsent",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synrecv",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_finwait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_timewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_close",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_closewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_lastack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_listen",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_synack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_udp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "timeout_icmp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+#endif
+	{
+		.procname	= "cache_bypass",
+		.data		= &sysctl_ip_vs_cache_bypass,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.data		= &sysctl_ip_vs_expire_nodest_conn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.data		= &sysctl_ip_vs_expire_quiescent_template,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.data		= &sysctl_ip_vs_sync_threshold,
+		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_threshold,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.data		= &sysctl_ip_vs_nat_icmp_send,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+const struct ctl_path net_vs_ctl_path[] = {
+	{ .procname = "net", .ctl_name = CTL_NET, },
+	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
+	{ .procname = "vs", },
+	{ }
+};
+EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+
+static struct ctl_table_header * sysctl_header;
+
+#ifdef CONFIG_PROC_FS
+
+struct ip_vs_iter {
+	struct list_head *table;
+	int bucket;
+};
+
+/*
+ *	Write the contents of the VS rule table to a PROCfs file.
+ *	(It is kept just for backward compatibility)
+ */
+static inline const char *ip_vs_fwd_name(unsigned flags)
+{
+	switch (flags & IP_VS_CONN_F_FWD_MASK) {
+	case IP_VS_CONN_F_LOCALNODE:
+		return "Local";
+	case IP_VS_CONN_F_TUNNEL:
+		return "Tunnel";
+	case IP_VS_CONN_F_DROUTE:
+		return "Route";
+	default:
+		return "Masq";
+	}
+}
+
+
+/* Get the Nth entry in the two lists */
+static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
+{
+	struct ip_vs_iter *iter = seq->private;
+	int idx;
+	struct ip_vs_service *svc;
+
+	/* look in hash by protocol */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (pos-- == 0){
+				iter->table = ip_vs_svc_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	/* keep looking in fwmark */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (pos-- == 0) {
+				iter->table = ip_vs_svc_fwm_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+__acquires(__ip_vs_svc_lock)
+{
+
+	read_lock_bh(&__ip_vs_svc_lock);
+	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+
+static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *e;
+	struct ip_vs_iter *iter;
+	struct ip_vs_service *svc;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_info_array(seq,0);
+
+	svc = v;
+	iter = seq->private;
+
+	if (iter->table == ip_vs_svc_table) {
+		/* next service in table hashed by protocol */
+		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+			return list_entry(e, struct ip_vs_service, s_list);
+
+
+		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+					    s_list) {
+				return svc;
+			}
+		}
+
+		iter->table = ip_vs_svc_fwm_table;
+		iter->bucket = -1;
+		goto scan_fwmark;
+	}
+
+	/* next service in hashed by fwmark */
+	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+		return list_entry(e, struct ip_vs_service, f_list);
+
+ scan_fwmark:
+	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+				    f_list)
+			return svc;
+	}
+
+	return NULL;
+}
+
+static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+__releases(__ip_vs_svc_lock)
+{
+	read_unlock_bh(&__ip_vs_svc_lock);
+}
+
+
+static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq,
+			"IP Virtual Server version %d.%d.%d (size=%d)\n",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		seq_puts(seq,
+			 "Prot LocalAddress:Port Scheduler Flags\n");
+		seq_puts(seq,
+			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
+	} else {
+		const struct ip_vs_service *svc = v;
+		const struct ip_vs_iter *iter = seq->private;
+		const struct ip_vs_dest *dest;
+
+		if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (svc->af == AF_INET6)
+				seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   NIP6(svc->addr.in6),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+			else
+#endif
+				seq_printf(seq, "%s  %08X:%04X %s ",
+					   ip_vs_proto_name(svc->protocol),
+					   ntohl(svc->addr.ip),
+					   ntohs(svc->port),
+					   svc->scheduler->name);
+		} else {
+			seq_printf(seq, "FWM  %08X %s ",
+				   svc->fwmark, svc->scheduler->name);
+		}
+
+		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+			seq_printf(seq, "persistent %d %08X\n",
+				svc->timeout,
+				ntohl(svc->netmask));
+		else
+			seq_putc(seq, '\n');
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+#ifdef CONFIG_IP_VS_IPV6
+			if (dest->af == AF_INET6)
+				seq_printf(seq,
+					   "  -> [" NIP6_FMT "]:%04X"
+					   "      %-7s %-6d %-10d %-10d\n",
+					   NIP6(dest->addr.in6),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+			else
+#endif
+				seq_printf(seq,
+					   "  -> %08X:%04X      "
+					   "%-7s %-6d %-10d %-10d\n",
+					   ntohl(dest->addr.ip),
+					   ntohs(dest->port),
+					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+					   atomic_read(&dest->weight),
+					   atomic_read(&dest->activeconns),
+					   atomic_read(&dest->inactconns));
+
+		}
+	}
+	return 0;
+}
+
+static const struct seq_operations ip_vs_info_seq_ops = {
+	.start = ip_vs_info_seq_start,
+	.next  = ip_vs_info_seq_next,
+	.stop  = ip_vs_info_seq_stop,
+	.show  = ip_vs_info_seq_show,
+};
+
+static int ip_vs_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &ip_vs_info_seq_ops,
+			sizeof(struct ip_vs_iter));
+}
+
+static const struct file_operations ip_vs_info_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_info_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+#endif
+
+struct ip_vs_stats ip_vs_stats = {
+	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
+};
+
+#ifdef CONFIG_PROC_FS
+static int ip_vs_stats_show(struct seq_file *seq, void *v)
+{
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_printf(seq,
+		   "   Conns  Packets  Packets            Bytes            Bytes\n");
+
+	spin_lock_bh(&ip_vs_stats.lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+		   (unsigned long long) ip_vs_stats.ustats.inbytes,
+		   (unsigned long long) ip_vs_stats.ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
+			ip_vs_stats.ustats.cps,
+			ip_vs_stats.ustats.inpps,
+			ip_vs_stats.ustats.outpps,
+			ip_vs_stats.ustats.inbps,
+			ip_vs_stats.ustats.outbps);
+	spin_unlock_bh(&ip_vs_stats.lock);
+
+	return 0;
+}
+
+static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ip_vs_stats_show, NULL);
+}
+
+static const struct file_operations ip_vs_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = ip_vs_stats_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif
+
+/*
+ *	Set timeout values for tcp tcpfin udp in the timeout_table.
+ */
+static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+{
+	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+		  u->tcp_timeout,
+		  u->tcp_fin_timeout,
+		  u->udp_timeout);
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+			= u->tcp_timeout * HZ;
+	}
+
+	if (u->tcp_fin_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+			= u->tcp_fin_timeout * HZ;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout) {
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+			= u->udp_timeout * HZ;
+	}
+#endif
+	return 0;
+}
+
+
+#define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
+#define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
+				 sizeof(struct ip_vs_dest_user))
+#define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
+#define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
+#define MAX_ARG_LEN		SVCDEST_ARG_LEN
+
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
+	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
+};
+
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+				  struct ip_vs_service_user *usvc_compat)
+{
+	usvc->af		= AF_INET;
+	usvc->protocol		= usvc_compat->protocol;
+	usvc->addr.ip		= usvc_compat->addr;
+	usvc->port		= usvc_compat->port;
+	usvc->fwmark		= usvc_compat->fwmark;
+
+	/* Deep copy of sched_name is not needed here */
+	usvc->sched_name	= usvc_compat->sched_name;
+
+	usvc->flags		= usvc_compat->flags;
+	usvc->timeout		= usvc_compat->timeout;
+	usvc->netmask		= usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+				   struct ip_vs_dest_user *udest_compat)
+{
+	udest->addr.ip		= udest_compat->addr;
+	udest->port		= udest_compat->port;
+	udest->conn_flags	= udest_compat->conn_flags;
+	udest->weight		= udest_compat->weight;
+	udest->u_threshold	= udest_compat->u_threshold;
+	udest->l_threshold	= udest_compat->l_threshold;
+}
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+	unsigned char arg[MAX_ARG_LEN];
+	struct ip_vs_service_user *usvc_compat;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_service *svc;
+	struct ip_vs_dest_user *udest_compat;
+	struct ip_vs_dest_user_kern udest;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (len != set_arglen[SET_CMDID(cmd)]) {
+		IP_VS_ERR("set_ctl: len %u != %u\n",
+			  len, set_arglen[SET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, len) != 0)
+		return -EFAULT;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	if (mutex_lock_interruptible(&__ip_vs_mutex)) {
+		ret = -ERESTARTSYS;
+		goto out_dec;
+	}
+
+	if (cmd == IP_VS_SO_SET_FLUSH) {
+		/* Flush the virtual service */
+		ret = ip_vs_flush();
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
+		/* Set timeout values for (tcp tcpfin udp) */
+		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = stop_sync_thread(dm->state);
+		goto out_unlock;
+	}
+
+	usvc_compat = (struct ip_vs_service_user *)arg;
+	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+	/* We only use the new structs internally, so copy userspace compat
+	 * structs to extended internal versions */
+	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+	ip_vs_copy_udest_compat(&udest, udest_compat);
+
+	if (cmd == IP_VS_SO_SET_ZERO) {
+		/* if no service address is set, zero counters in all */
+		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
+			ret = ip_vs_zero_all();
+			goto out_unlock;
+		}
+	}
+
+	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+		IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
+			  usvc.protocol, NIPQUAD(usvc.addr.ip),
+			  ntohs(usvc.port), usvc.sched_name);
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+	if (cmd != IP_VS_SO_SET_ADD
+	    && (svc == NULL || svc->protocol != usvc.protocol)) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	switch (cmd) {
+	case IP_VS_SO_SET_ADD:
+		if (svc != NULL)
+			ret = -EEXIST;
+		else
+			ret = ip_vs_add_service(&usvc, &svc);
+		break;
+	case IP_VS_SO_SET_EDIT:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IP_VS_SO_SET_DEL:
+		ret = ip_vs_del_service(svc);
+		if (!ret)
+			goto out_unlock;
+		break;
+	case IP_VS_SO_SET_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	case IP_VS_SO_SET_ADDDEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_EDITDEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IP_VS_SO_SET_DELDEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (svc)
+		ip_vs_service_put(svc);
+
+  out_unlock:
+	mutex_unlock(&__ip_vs_mutex);
+  out_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+	spin_lock_bh(&src->lock);
+	memcpy(dst, &src->ustats, sizeof(*dst));
+	spin_unlock_bh(&src->lock);
+}
+
+static void
+ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
+{
+	dst->protocol = src->protocol;
+	dst->addr = src->addr.ip;
+	dst->port = src->port;
+	dst->fwmark = src->fwmark;
+	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+	dst->flags = src->flags;
+	dst->timeout = src->timeout / HZ;
+	dst->netmask = src->netmask;
+	dst->num_dests = src->num_dests;
+	ip_vs_copy_stats(&dst->stats, &src->stats);
+}
+
+static inline int
+__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+			    struct ip_vs_get_services __user *uptr)
+{
+	int idx, count=0;
+	struct ip_vs_service *svc;
+	struct ip_vs_service_entry entry;
+	int ret = 0;
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			/* Only expose IPv4 entries to old interface */
+			if (svc->af != AF_INET)
+				continue;
+
+			if (count >= get->num_services)
+				goto out;
+			memset(&entry, 0, sizeof(entry));
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+  out:
+	return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+			 struct ip_vs_get_dests __user *uptr)
+{
+	struct ip_vs_service *svc;
+	union nf_inet_addr addr = { .ip = get->addr };
+	int ret = 0;
+
+	if (get->fwmark)
+		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
+	else
+		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+					  get->port);
+
+	if (svc) {
+		int count = 0;
+		struct ip_vs_dest *dest;
+		struct ip_vs_dest_entry entry;
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+			if (count >= get->num_dests)
+				break;
+
+			entry.addr = dest->addr.ip;
+			entry.port = dest->port;
+			entry.conn_flags = atomic_read(&dest->conn_flags);
+			entry.weight = atomic_read(&dest->weight);
+			entry.u_threshold = dest->u_threshold;
+			entry.l_threshold = dest->l_threshold;
+			entry.activeconns = atomic_read(&dest->activeconns);
+			entry.inactconns = atomic_read(&dest->inactconns);
+			entry.persistconns = atomic_read(&dest->persistconns);
+			ip_vs_copy_stats(&entry.stats, &dest->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				break;
+			}
+			count++;
+		}
+		ip_vs_service_put(svc);
+	} else
+		ret = -ESRCH;
+	return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	u->tcp_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	u->udp_timeout =
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+#endif
+}
+
+
+#define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
+#define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
+#define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
+#define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
+#define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
+#define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
+
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
+	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
+};
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	unsigned char arg[128];
+	int ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (*len < get_arglen[GET_CMDID(cmd)]) {
+		IP_VS_ERR("get_ctl: len %u < %u\n",
+			  *len, get_arglen[GET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
+		return -EFAULT;
+
+	if (mutex_lock_interruptible(&__ip_vs_mutex))
+		return -ERESTARTSYS;
+
+	switch (cmd) {
+	case IP_VS_SO_GET_VERSION:
+	{
+		char buf[64];
+
+		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+		*len = strlen(buf)+1;
+	}
+	break;
+
+	case IP_VS_SO_GET_INFO:
+	{
+		struct ip_vs_getinfo info;
+		info.version = IP_VS_VERSION_CODE;
+		info.size = IP_VS_CONN_TAB_SIZE;
+		info.num_services = ip_vs_num_services;
+		if (copy_to_user(user, &info, sizeof(info)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICES:
+	{
+		struct ip_vs_get_services *get;
+		int size;
+
+		get = (struct ip_vs_get_services *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_service_entry) * get->num_services;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_service_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICE:
+	{
+		struct ip_vs_service_entry *entry;
+		struct ip_vs_service *svc;
+		union nf_inet_addr addr;
+
+		entry = (struct ip_vs_service_entry *)arg;
+		addr.ip = entry->addr;
+		if (entry->fwmark)
+			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
+		else
+			svc = __ip_vs_service_get(AF_INET, entry->protocol,
+						  &addr, entry->port);
+		if (svc) {
+			ip_vs_copy_service(entry, svc);
+			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
+				ret = -EFAULT;
+			ip_vs_service_put(svc);
+		} else
+			ret = -ESRCH;
+	}
+	break;
+
+	case IP_VS_SO_GET_DESTS:
+	{
+		struct ip_vs_get_dests *get;
+		int size;
+
+		get = (struct ip_vs_get_dests *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_dest_entry) * get->num_dests;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_dest_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_TIMEOUT:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+		if (copy_to_user(user, &t, sizeof(t)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_DAEMON:
+	{
+		struct ip_vs_daemon_user d[2];
+
+		memset(&d, 0, sizeof(d));
+		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+			d[0].state = IP_VS_STATE_MASTER;
+			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
+			d[0].syncid = ip_vs_master_syncid;
+		}
+		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+			d[1].state = IP_VS_STATE_BACKUP;
+			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
+			d[1].syncid = ip_vs_backup_syncid;
+		}
+		if (copy_to_user(user, &d, sizeof(d)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+  out:
+	mutex_unlock(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= IP_VS_BASE_CTL,
+	.set_optmax	= IP_VS_SO_SET_MAX+1,
+	.set		= do_ip_vs_set_ctl,
+	.get_optmin	= IP_VS_BASE_CTL,
+	.get_optmax	= IP_VS_SO_GET_MAX+1,
+	.get		= do_ip_vs_get_ctl,
+	.owner		= THIS_MODULE,
+};
+
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	usvc->af = nla_get_u16(nla_af);
+#ifdef CONFIG_IP_VS_IPV6
+	if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+	if (usvc->af != AF_INET)
+#endif
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+						  &usvc->addr, usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+		usvc->sched_name = nla_data(nla_sched);
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user_kern usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.af, usvc.protocol,
+					   &usvc.addr, usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user_kern usvc;
+	struct ip_vs_dest_user_kern udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+					  &usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+static int __init ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
+
+int __init ip_vs_control_init(void)
+{
+	int ret;
+	int idx;
+
+	EnterFunction(2);
+
+	ret = nf_register_sockopt(&ip_vs_sockopts);
+	if (ret) {
+		IP_VS_ERR("cannot register sockopt.\n");
+		return ret;
+	}
+
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
+	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
+
+	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+	}
+
+	ip_vs_new_estimator(&ip_vs_stats);
+
+	/* Hook the defense timer */
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+void ip_vs_control_cleanup(void)
+{
+	EnterFunction(2);
+	ip_vs_trash_cleanup();
+	cancel_rearming_delayed_work(&defense_work);
+	cancel_work_sync(&defense_work.work);
+	ip_vs_kill_estimator(&ip_vs_stats);
+	unregister_sysctl_table(sysctl_header);
+	proc_net_remove(&init_net, "ip_vs_stats");
+	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
+	nf_unregister_sockopt(&ip_vs_sockopts);
+	LeaveFunction(2);
+}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
new file mode 100644
index 00000000000..a16943fd72f
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -0,0 +1,261 @@
+/*
+ * IPVS:        Destination Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              Inspired by the consistent hashing scheduler patch from
+ *              Thomas Proell <proellt@gmx.de>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[dest_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) OR (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS        8
+#endif
+#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS DH entry
+ */
+static inline unsigned ip_vs_dh_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
+{
+	return (tbl[ip_vs_dh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl;
+
+	/* allocate the DH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_dh_bucket *tbl;
+	struct iphdr *iph = ip_hdr(skb);
+
+	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
+	dest = ip_vs_dh_get(tbl, iph->daddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+	.name =			"dh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_dh_init_svc,
+	.done_service =		ip_vs_dh_done_svc,
+	.update_service =	ip_vs_dh_update_svc,
+	.schedule =		ip_vs_dh_schedule,
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
new file mode 100644
index 00000000000..2eb2860dabb
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -0,0 +1,166 @@
+/*
+ * ip_vs_est.c: simple rate estimator for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+
+#include <net/ip_vs.h>
+
+/*
+  This code is to estimate rate in a shorter interval (such as 8
+  seconds) for virtual services and real servers. For measure rate in a
+  long interval, it is easy to implement a user level daemon which
+  periodically reads those statistical counters and measure rate.
+
+  Currently, the measurement is activated by slow timer handler. Hope
+  this measurement will not introduce too much load.
+
+  We measure rate during the last 8 seconds every 2 seconds:
+
+    avgrate = avgrate*(1-W) + rate*W
+
+    where W = 2^(-2)
+
+  NOTES.
+
+  * The stored value for average bps is scaled by 2^5, so that maximal
+    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+
+  * A lot code is taken from net/sched/estimator.c
+ */
+
+
+static void estimation_timer(unsigned long arg);
+
+static LIST_HEAD(est_list);
+static DEFINE_SPINLOCK(est_lock);
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
+
+static void estimation_timer(unsigned long arg)
+{
+	struct ip_vs_estimator *e;
+	struct ip_vs_stats *s;
+	u32 n_conns;
+	u32 n_inpkts, n_outpkts;
+	u64 n_inbytes, n_outbytes;
+	u32 rate;
+
+	spin_lock(&est_lock);
+	list_for_each_entry(e, &est_list, list) {
+		s = container_of(e, struct ip_vs_stats, est);
+
+		spin_lock(&s->lock);
+		n_conns = s->ustats.conns;
+		n_inpkts = s->ustats.inpkts;
+		n_outpkts = s->ustats.outpkts;
+		n_inbytes = s->ustats.inbytes;
+		n_outbytes = s->ustats.outbytes;
+
+		/* scaled by 2^10, but divided 2 seconds */
+		rate = (n_conns - e->last_conns)<<9;
+		e->last_conns = n_conns;
+		e->cps += ((long)rate - (long)e->cps)>>2;
+		s->ustats.cps = (e->cps+0x1FF)>>10;
+
+		rate = (n_inpkts - e->last_inpkts)<<9;
+		e->last_inpkts = n_inpkts;
+		e->inpps += ((long)rate - (long)e->inpps)>>2;
+		s->ustats.inpps = (e->inpps+0x1FF)>>10;
+
+		rate = (n_outpkts - e->last_outpkts)<<9;
+		e->last_outpkts = n_outpkts;
+		e->outpps += ((long)rate - (long)e->outpps)>>2;
+		s->ustats.outpps = (e->outpps+0x1FF)>>10;
+
+		rate = (n_inbytes - e->last_inbytes)<<4;
+		e->last_inbytes = n_inbytes;
+		e->inbps += ((long)rate - (long)e->inbps)>>2;
+		s->ustats.inbps = (e->inbps+0xF)>>5;
+
+		rate = (n_outbytes - e->last_outbytes)<<4;
+		e->last_outbytes = n_outbytes;
+		e->outbps += ((long)rate - (long)e->outbps)>>2;
+		s->ustats.outbps = (e->outbps+0xF)>>5;
+		spin_unlock(&s->lock);
+	}
+	spin_unlock(&est_lock);
+	mod_timer(&est_timer, jiffies + 2*HZ);
+}
+
+void ip_vs_new_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	INIT_LIST_HEAD(&est->list);
+
+	est->last_conns = stats->ustats.conns;
+	est->cps = stats->ustats.cps<<10;
+
+	est->last_inpkts = stats->ustats.inpkts;
+	est->inpps = stats->ustats.inpps<<10;
+
+	est->last_outpkts = stats->ustats.outpkts;
+	est->outpps = stats->ustats.outpps<<10;
+
+	est->last_inbytes = stats->ustats.inbytes;
+	est->inbps = stats->ustats.inbps<<5;
+
+	est->last_outbytes = stats->ustats.outbytes;
+	est->outbps = stats->ustats.outbps<<5;
+
+	spin_lock_bh(&est_lock);
+	list_add(&est->list, &est_list);
+	spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	spin_lock_bh(&est_lock);
+	list_del(&est->list);
+	spin_unlock_bh(&est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est = &stats->est;
+
+	/* set counters zero, caller must hold the stats->lock lock */
+	est->last_inbytes = 0;
+	est->last_outbytes = 0;
+	est->last_conns = 0;
+	est->last_inpkts = 0;
+	est->last_outpkts = 0;
+	est->cps = 0;
+	est->inpps = 0;
+	est->outpps = 0;
+	est->inbps = 0;
+	est->outbps = 0;
+}
+
+int __init ip_vs_estimator_init(void)
+{
+	mod_timer(&est_timer, jiffies + 2 * HZ);
+	return 0;
+}
+
+void ip_vs_estimator_cleanup(void)
+{
+	del_timer_sync(&est_timer);
+}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
new file mode 100644
index 00000000000..2e7dbd8b73a
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -0,0 +1,410 @@
+/*
+ * ip_vs_ftp.c: IPVS ftp application module
+ *
+ * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <asm/unaligned.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING "227 Entering Passive Mode ("
+#define CLIENT_STRING "PORT "
+
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, ushort, NULL, 0);
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
+
+
+/*	Dummy variable */
+static int ip_vs_ftp_pasv;
+
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+/*
+ * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern" and terminated with the "term" character.
+ * <addr,port> is in network order.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+				  const char *pattern, size_t plen, char term,
+				  __be32 *addr, __be16 *port,
+				  char **start, char **end)
+{
+	unsigned char p[6];
+	int i = 0;
+
+	if (data_limit - data < plen) {
+		/* check if there is partial match */
+		if (strnicmp(data, pattern, data_limit - data) == 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	if (strnicmp(data, pattern, plen) != 0) {
+		return 0;
+	}
+	*start = data + plen;
+
+	for (data = *start; *data != term; data++) {
+		if (data == data_limit)
+			return -1;
+	}
+	*end = data;
+
+	memset(p, 0, sizeof(p));
+	for (data = *start; data != *end; data++) {
+		if (*data >= '0' && *data <= '9') {
+			p[i] = p[i]*10 + *data - '0';
+		} else if (*data == ',' && i < 5) {
+			i++;
+		} else {
+			/* unexpected character */
+			return -1;
+		}
+	}
+
+	if (i != 5)
+		return -1;
+
+	*addr = get_unaligned((__be32 *)p);
+	*port = get_unaligned((__be16 *)(p + 4));
+	return 1;
+}
+
+
+/*
+ * Look at outgoing ftp packets to catch the response to a PASV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port.  Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			 struct sk_buff *skb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	char *start, *end;
+	union nf_inet_addr from;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+	unsigned buf_len;
+	int ret;
+
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	if (cp->app_data == &ip_vs_ftp_pasv) {
+		iph = ip_hdr(skb);
+		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+		data = (char *)th + (th->doff << 2);
+		data_limit = skb_tail_pointer(skb);
+
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING,
+					   sizeof(SERVER_STRING)-1, ')',
+					   &from.ip, &port,
+					   &start, &end) != 1)
+			return 1;
+
+		IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
+			  "%u.%u.%u.%u:%d detected\n",
+			  NIPQUAD(from.ip), ntohs(port),
+			  NIPQUAD(cp->caddr.ip), 0);
+
+		/*
+		 * Now update or create an connection entry for it
+		 */
+		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+					  &cp->caddr, 0);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+					      &cp->caddr, 0,
+					      &cp->vaddr, port,
+					      &from, port,
+					      IP_VS_CONN_F_NO_CPORT,
+					      cp->dest);
+			if (!n_cp)
+				return 0;
+
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+		}
+
+		/*
+		 * Replace the old passive address with the new one
+		 */
+		from.ip = n_cp->vaddr.ip;
+		port = n_cp->vport;
+		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+			(ntohs(port)>>8)&255, ntohs(port)&255);
+		buf_len = strlen(buf);
+
+		/*
+		 * Calculate required delta-offset to keep TCP happy
+		 */
+		*diff = buf_len - (end-start);
+
+		if (*diff == 0) {
+			/* simply replace it with new passive address */
+			memcpy(start, buf, buf_len);
+			ret = 1;
+		} else {
+			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
+					  end-start, buf, buf_len);
+		}
+
+		cp->app_data = NULL;
+		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_conn_put(n_cp);
+		return ret;
+	}
+	return 1;
+}
+
+
+/*
+ * Look at incoming ftp packets to catch the PASV/PORT command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			struct sk_buff *skb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_start, *data_limit;
+	char *start, *end;
+	union nf_inet_addr to;
+	__be16 port;
+	struct ip_vs_conn *n_cp;
+
+#ifdef CONFIG_IP_VS_IPV6
+	/* This application helper doesn't work with IPv6 yet,
+	 * so turn this into a no-op for IPv6 packets
+	 */
+	if (cp->af == AF_INET6)
+		return 1;
+#endif
+
+	/* no diff required for incoming packets */
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	/*
+	 * Detecting whether it is passive
+	 */
+	iph = ip_hdr(skb);
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Since there may be OPTIONS in the TCP packet and the HLEN is
+	   the length of the header in 32-bit multiples, it is accurate
+	   to calculate data address by th+HLEN*4 */
+	data = data_start = (char *)th + (th->doff << 2);
+	data_limit = skb_tail_pointer(skb);
+
+	while (data <= data_limit - 6) {
+		if (strnicmp(data, "PASV\r\n", 6) == 0) {
+			/* Passive mode on */
+			IP_VS_DBG(7, "got PASV at %td of %td\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = &ip_vs_ftp_pasv;
+			return 1;
+		}
+		data++;
+	}
+
+	/*
+	 * To support virtual FTP server, the scenerio is as follows:
+	 *       FTP client ----> Load Balancer ----> FTP server
+	 * First detect the port number in the application data,
+	 * then create a new connection entry for the coming data
+	 * connection.
+	 */
+	if (ip_vs_ftp_get_addrport(data_start, data_limit,
+				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
+				   '\r', &to.ip, &port,
+				   &start, &end) != 1)
+		return 1;
+
+	IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
+		  NIPQUAD(to.ip), ntohs(port));
+
+	/* Passive mode off */
+	cp->app_data = NULL;
+
+	/*
+	 * Now update or create a connection entry for it
+	 */
+	IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
+
+	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+				 &to, port,
+				 &cp->vaddr, htons(ntohs(cp->vport)-1));
+	if (!n_cp) {
+		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+				      &to, port,
+				      &cp->vaddr, htons(ntohs(cp->vport)-1),
+				      &cp->daddr, htons(ntohs(cp->dport)-1),
+				      0,
+				      cp->dest);
+		if (!n_cp)
+			return 0;
+
+		/* add its controller */
+		ip_vs_control_add(n_cp, cp);
+	}
+
+	/*
+	 *	Move tunnel to listen state
+	 */
+	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+
+	return 1;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+	.name =		"ftp",
+	.type =		IP_VS_APP_TYPE_FTP,
+	.protocol =	IPPROTO_TCP,
+	.module =	THIS_MODULE,
+	.incs_list =	LIST_HEAD_INIT(ip_vs_ftp.incs_list),
+	.init_conn =	ip_vs_ftp_init_conn,
+	.done_conn =	ip_vs_ftp_done_conn,
+	.bind_conn =	NULL,
+	.unbind_conn =	NULL,
+	.pkt_out =	ip_vs_ftp_out,
+	.pkt_in =	ip_vs_ftp_in,
+};
+
+
+/*
+ *	ip_vs_ftp initialization
+ */
+static int __init ip_vs_ftp_init(void)
+{
+	int i, ret;
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	ret = register_ip_vs_app(app);
+	if (ret)
+		return ret;
+
+	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+		if (!ports[i])
+			continue;
+		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		if (ret)
+			break;
+		IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
+			   app->name, i, ports[i]);
+	}
+
+	if (ret)
+		unregister_ip_vs_app(app);
+
+	return ret;
+}
+
+
+/*
+ *	ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+	unregister_ip_vs_app(&ip_vs_ftp);
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
new file mode 100644
index 00000000000..6ecef3518ca
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -0,0 +1,555 @@
+/*
+ * IPVS:        Locality-Based Least-Connection scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Martin Hamilton         :    fixed the terrible locking bugs
+ *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
+ *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
+ *     Wensong Zhang           :    added doing full expiration check to
+ *                                   collect stale entries of 24+ hours when
+ *                                   no partial expire check in a half hour
+ *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
+ *                                   to avoid the possible race between timer
+ *                                   handler and del_timer thread in SMP
+ *
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ *       if cachenode[dest_ip] is null then
+ *               n, cachenode[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- cachenode[dest_ip];
+ *               if (n is dead) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                 n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ *       return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblc entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS      10
+#endif
+#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS lblc entry represents an association between destination
+ *      IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+	struct list_head        list;
+	__be32                  addr;           /* destination IP address */
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLC sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblc_expiration",
+		.data		= &sysctl_ip_vs_lblc_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+{
+	list_del(&en->list);
+	/*
+	 * We don't kfree dest because it is refered either by its service
+	 * or the trash dest list.
+	 */
+	atomic_dec(&en->dest->refcnt);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+	unsigned hash = ip_vs_lblc_hashkey(en->addr);
+
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/*
+ *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read
+ *  lock
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
+{
+	unsigned hash = ip_vs_lblc_hashkey(addr);
+	struct ip_vs_lblc_entry *en;
+
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = ip_vs_lblc_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+			return NULL;
+		}
+
+		en->addr = daddr;
+		en->lastuse = jiffies;
+
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+
+		ip_vs_lblc_hash(tbl, en);
+	} else if (en->dest != dest) {
+		atomic_dec(&en->dest->refcnt);
+		atomic_inc(&dest->refcnt);
+		en->dest = dest;
+	}
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+{
+	struct ip_vs_lblc_entry *en, *nxt;
+	int i;
+
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+	}
+}
+
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_entry *en, *nxt;
+	unsigned long now = jiffies;
+	int i, j;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now,
+					en->lastuse + sysctl_ip_vs_lblc_expiration))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblc table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(unsigned long data)
+{
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblc_entry *en, *nxt;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblc_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblc_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblc_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblc_flush(tbl);
+
+	/* release the table itself */
+	kfree(tbl);
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
+		  sizeof(*tbl));
+
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblc_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
+	en = ip_vs_lblc_get(tbl, iph->daddr);
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/*
+		 * If the destination is not available, i.e. it's in the trash,
+		 * we must ignore it, as it may be removed from under our feet,
+		 * if someone drops our reference count. Our caller only makes
+		 * sure that destinations, that are not in the trash, are not
+		 * moved to the trash, while we are scheduling. But anyone can
+		 * free up entries from the trash at any time.
+		 */
+
+		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
+			dest = en->dest;
+	}
+	read_unlock(&svc->sched_lock);
+
+	/* If the destination has a weight and is not overloaded, use it */
+	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+		goto out;
+
+	/* No cache entry or it is invalid, time to schedule */
+	dest = __ip_vs_lblc_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblc_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
+	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler =
+{
+	.name =			"lblc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_lblc_init_svc,
+	.done_service =		ip_vs_lblc_done_svc,
+	.schedule =		ip_vs_lblc_schedule,
+};
+
+
+static int __init ip_vs_lblc_init(void)
+{
+	int ret;
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	if (ret)
+		unregister_sysctl_table(sysctl_header);
+	return ret;
+}
+
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
new file mode 100644
index 00000000000..1f75ea83bcf
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -0,0 +1,755 @@
+/*
+ * IPVS:        Locality-Based Least-Connection with Replication scheduler
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Julian Anastasov        :    Added the missing (dest->weight>0)
+ *                                  condition in the ip_vs_dest_set_max.
+ *
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ *       if serverSet[dest_ip] is null then
+ *               n, serverSet[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- {least-conn (alive) node in serverSet[dest_ip]};
+ *               if (n is null) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                   n <- {weighted least-conn node};
+ *                   add n to serverSet[dest_ip];
+ *               if |serverSet[dest_ip]| > 1 AND
+ *                   now - serverSet[dest_ip].lastMod > T then
+ *                   m <- {most conn node in serverSet[dest_ip]};
+ *                   remove m from serverSet[dest_ip];
+ *       if serverSet[dest_ip] changed then
+ *               serverSet[dest_ip].lastMod <- now;
+ *
+ *       return n;
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/jiffies.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblcr entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
+#endif
+#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS destination set structure and operations
+ */
+struct ip_vs_dest_list {
+	struct ip_vs_dest_list  *next;          /* list link */
+	struct ip_vs_dest       *dest;          /* destination server */
+};
+
+struct ip_vs_dest_set {
+	atomic_t                size;           /* set size */
+	unsigned long           lastmod;        /* last modified time */
+	struct ip_vs_dest_list  *list;          /* destination list */
+	rwlock_t	        lock;           /* lock for this list */
+};
+
+
+static struct ip_vs_dest_list *
+ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e;
+
+	for (e=set->list; e!=NULL; e=e->next) {
+		if (e->dest == dest)
+			/* already existed */
+			return NULL;
+	}
+
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (e == NULL) {
+		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+		return NULL;
+	}
+
+	atomic_inc(&dest->refcnt);
+	e->dest = dest;
+
+	/* link it to the list */
+	e->next = set->list;
+	set->list = e;
+	atomic_inc(&set->size);
+
+	set->lastmod = jiffies;
+	return e;
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		if (e->dest == dest) {
+			/* HIT */
+			*ep = e->next;
+			atomic_dec(&set->size);
+			set->lastmod = jiffies;
+			atomic_dec(&e->dest->refcnt);
+			kfree(e);
+			break;
+		}
+		ep = &e->next;
+	}
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		*ep = e->next;
+		/*
+		 * We don't kfree dest because it is refered either
+		 * by its service or by the trash dest list.
+		 */
+		atomic_dec(&e->dest->refcnt);
+		kfree(e);
+	}
+	write_unlock(&set->lock);
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		least = e->dest;
+		if (least->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if ((atomic_read(&least->weight) > 0)
+		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted least load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if ((loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))
+		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+	return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *most;
+	int moh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		most = e->dest;
+		if (atomic_read(&most->weight) > 0) {
+			moh = atomic_read(&most->activeconns) * 50
+				+ atomic_read(&most->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/* find the destination with the weighted most load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+		if ((moh * atomic_read(&dest->weight) <
+		     doh * atomic_read(&most->weight))
+		    && (atomic_read(&dest->weight) > 0)) {
+			most = dest;
+			moh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(most->addr.ip), ntohs(most->port),
+		  atomic_read(&most->activeconns),
+		  atomic_read(&most->refcnt),
+		  atomic_read(&most->weight), moh);
+	return most;
+}
+
+
+/*
+ *      IPVS lblcr entry represents an association between destination
+ *      IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+	struct list_head        list;
+	__be32                   addr;           /* destination IP address */
+	struct ip_vs_dest_set   set;            /* destination server set */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLCR sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.procname	= "lblcr_expiration",
+		.data		= &sysctl_ip_vs_lblcr_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+	list_del(&en->list);
+	ip_vs_dest_set_eraseall(&en->set);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static void
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+	unsigned hash = ip_vs_lblcr_hashkey(en->addr);
+
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+}
+
+
+/*
+ *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ *  read lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
+{
+	unsigned hash = ip_vs_lblcr_hashkey(addr);
+	struct ip_vs_lblcr_entry *en;
+
+	list_for_each_entry(en, &tbl->bucket[hash], list)
+		if (en->addr == addr)
+			return en;
+
+	return NULL;
+}
+
+
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl,  __be32 daddr,
+		struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = ip_vs_lblcr_get(tbl, daddr);
+	if (!en) {
+		en = kmalloc(sizeof(*en), GFP_ATOMIC);
+		if (!en) {
+			IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+			return NULL;
+		}
+
+		en->addr = daddr;
+		en->lastuse = jiffies;
+
+		/* initilize its dest set */
+		atomic_set(&(en->set.size), 0);
+		en->set.list = NULL;
+		rwlock_init(&en->set.lock);
+
+		ip_vs_lblcr_hash(tbl, en);
+	}
+
+	write_lock(&en->set.lock);
+	ip_vs_dest_set_insert(&en->set, dest);
+	write_unlock(&en->set.lock);
+
+	return en;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+{
+	int i;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	/* No locking required, only called during cleanup. */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblcr_free(en);
+		}
+	}
+}
+
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
+				       now))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&svc->sched_lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblcr table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(unsigned long data)
+{
+	struct ip_vs_service *svc = (struct ip_vs_service *) data;
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblcr_full_check(svc);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&svc->sched_lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&svc->sched_lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblcr_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblcr_table for this service
+	 */
+	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
+		  "current service\n", sizeof(*tbl));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
+			(unsigned long)svc);
+	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
+
+	return 0;
+}
+
+
+static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblcr_flush(tbl);
+
+	/* release the table itself */
+	kfree(tbl);
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
+		  sizeof(*tbl));
+
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr.ip), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	struct iphdr *iph = ip_hdr(skb);
+	struct ip_vs_dest *dest = NULL;
+	struct ip_vs_lblcr_entry *en;
+
+	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+
+	/* First look in our cache */
+	read_lock(&svc->sched_lock);
+	en = ip_vs_lblcr_get(tbl, iph->daddr);
+	if (en) {
+		/* We only hold a read lock, but this is atomic */
+		en->lastuse = jiffies;
+
+		/* Get the least loaded destination */
+		read_lock(&en->set.lock);
+		dest = ip_vs_dest_set_min(&en->set);
+		read_unlock(&en->set.lock);
+
+		/* More than one destination + enough time passed by, cleanup */
+		if (atomic_read(&en->set.size) > 1 &&
+				time_after(jiffies, en->set.lastmod +
+				sysctl_ip_vs_lblcr_expiration)) {
+			struct ip_vs_dest *m;
+
+			write_lock(&en->set.lock);
+			m = ip_vs_dest_set_max(&en->set);
+			if (m)
+				ip_vs_dest_set_erase(&en->set, m);
+			write_unlock(&en->set.lock);
+		}
+
+		/* If the destination is not overloaded, use it */
+		if (dest && !is_overloaded(dest, svc)) {
+			read_unlock(&svc->sched_lock);
+			goto out;
+		}
+
+		/* The cache entry is invalid, time to schedule */
+		dest = __ip_vs_lblcr_schedule(svc, iph);
+		if (!dest) {
+			IP_VS_DBG(1, "no destination available\n");
+			read_unlock(&svc->sched_lock);
+			return NULL;
+		}
+
+		/* Update our cache entry */
+		write_lock(&en->set.lock);
+		ip_vs_dest_set_insert(&en->set, dest);
+		write_unlock(&en->set.lock);
+	}
+	read_unlock(&svc->sched_lock);
+
+	if (dest)
+		goto out;
+
+	/* No cache entry, time to schedule */
+	dest = __ip_vs_lblcr_schedule(svc, iph);
+	if (!dest) {
+		IP_VS_DBG(1, "no destination available\n");
+		return NULL;
+	}
+
+	/* If we fail to create a cache entry, we'll just use the valid dest */
+	write_lock(&svc->sched_lock);
+	ip_vs_lblcr_new(tbl, iph->daddr, dest);
+	write_unlock(&svc->sched_lock);
+
+out:
+	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+	.name =			"lblcr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_lblcr_init_svc,
+	.done_service =		ip_vs_lblcr_done_svc,
+	.schedule =		ip_vs_lblcr_schedule,
+};
+
+
+static int __init ip_vs_lblcr_init(void)
+{
+	int ret;
+
+	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	if (ret)
+		unregister_sysctl_table(sysctl_header);
+	return ret;
+}
+
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
new file mode 100644
index 00000000000..b69f808ac46
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -0,0 +1,103 @@
+/*
+ * IPVS:        Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     added the ip_vs_lc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+
+	/*
+	 * Simply select the server with the least number of
+	 *        (activeconns<<5) + inactconns
+	 * Except whose weight is equal to zero.
+	 * If the weight is equal to zero, it means that the server is
+	 * quiesced, the existing connections to the server still get
+	 * served, but no new connection is assigned to the server.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->weight) == 0)
+			continue;
+		doh = ip_vs_lc_dest_overhead(dest);
+		if (!least || doh < loh) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (least)
+	IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->inactconns));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+	.name =			"lc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_lc_schedule,
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
new file mode 100644
index 00000000000..9a2d8033f08
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -0,0 +1,138 @@
+/*
+ * IPVS:        Never Queue scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
+		    !atomic_read(&dest->weight))
+			continue;
+
+		doh = ip_vs_nq_dest_overhead(dest);
+
+		/* return the server directly if it is idle */
+		if (atomic_read(&dest->activeconns) == 0) {
+			least = dest;
+			loh = doh;
+			goto out;
+		}
+
+		if (!least ||
+		    (loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (!least)
+		return NULL;
+
+  out:
+	IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+	.name =			"nq",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
new file mode 100644
index 00000000000..0791f9e08fe
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -0,0 +1,288 @@
+/*
+ * ip_vs_proto.c: transport protocol load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS protocols can only be registered/unregistered when the ipvs
+ * module is loaded/unloaded, so no lock is needed in accessing the
+ * ipvs protocol table.
+ */
+
+#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+#define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))
+
+static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+
+
+/*
+ *	register an ipvs protocol
+ */
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp->next = ip_vs_proto_table[hash];
+	ip_vs_proto_table[hash] = pp;
+
+	if (pp->init != NULL)
+		pp->init(pp);
+
+	return 0;
+}
+
+
+/*
+ *	unregister an ipvs protocol
+ */
+static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	struct ip_vs_protocol **pp_p;
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp_p = &ip_vs_proto_table[hash];
+	for (; *pp_p; pp_p = &(*pp_p)->next) {
+		if (*pp_p == pp) {
+			*pp_p = pp->next;
+			if (pp->exit != NULL)
+				pp->exit(pp);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+
+/*
+ *	get ip_vs_protocol object by its proto.
+ */
+struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
+{
+	struct ip_vs_protocol *pp;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+
+	for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
+		if (pp->protocol == proto)
+			return pp;
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Propagate event for state change to all protocols
+ */
+void ip_vs_protocol_timeout_change(int flags)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
+			if (pp->timeout_change)
+				pp->timeout_change(pp, flags);
+		}
+	}
+}
+
+
+int *
+ip_vs_create_timeout_table(int *table, int size)
+{
+	return kmemdup(table, size, GFP_ATOMIC);
+}
+
+
+/*
+ *	Set timeout value for state specified by name
+ */
+int
+ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
+{
+	int i;
+
+	if (!table || !name || !to)
+		return -EINVAL;
+
+	for (i = 0; i < num; i++) {
+		if (strcmp(names[i], name))
+			continue;
+		table[i] = to * HZ;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+
+const char * ip_vs_state_name(__u16 proto, int state)
+{
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	if (pp == NULL || pp->state_name == NULL)
+		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+	return pp->state_name(state);
+}
+
+
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[128];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->frag_off & htons(IP_OFFSET))
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+	else {
+		__be16 _ports[2], *pptr
+;
+		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				NIPQUAD(ih->daddr));
+		else
+			sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				ntohs(pptr[0]),
+				NIPQUAD(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+			     const struct sk_buff *skb,
+			     int offset,
+			     const char *msg)
+{
+	char buf[192];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->nexthdr == IPPROTO_FRAGMENT)
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+	else {
+		__be16 _ports[2], *pptr;
+
+		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+				pp->name,
+				NIP6(ih->saddr),
+				NIP6(ih->daddr));
+		else
+			sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+				pp->name,
+				NIP6(ih->saddr),
+				ntohs(pptr[0]),
+				NIP6(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
+			  int offset,
+			  const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+int __init ip_vs_protocol_init(void)
+{
+	char protocols[64];
+#define REGISTER_PROTOCOL(p)			\
+	do {					\
+		register_ip_vs_protocol(p);	\
+		strcat(protocols, ", ");	\
+		strcat(protocols, (p)->name);	\
+	} while (0)
+
+	protocols[0] = '\0';
+	protocols[2] = '\0';
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
+#endif
+	IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
+
+	return 0;
+}
+
+
+void ip_vs_protocol_cleanup(void)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	/* unregister all the ipvs protocols */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pp = ip_vs_proto_table[i]) != NULL)
+			unregister_ip_vs_protocol(pp);
+	}
+}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
new file mode 100644
index 00000000000..80ab0c8e5b4
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -0,0 +1,235 @@
+/*
+ * ip_vs_proto_ah_esp.c:	AH/ESP IPSec load balancing support for IPVS
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+
+static struct ip_vs_conn *
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		   int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->saddr,
+				       htons(PORT_ISAKMP),
+				       &iph->daddr,
+				       htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+				       &iph->daddr,
+				       htons(PORT_ISAKMP),
+				       &iph->saddr,
+				       htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+		    struct ip_vs_protocol *pp,
+		    const struct ip_vs_iphdr *iph,
+		    unsigned int proto_off,
+		    int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->saddr,
+					htons(PORT_ISAKMP),
+					&iph->daddr,
+					htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+					&iph->daddr,
+					htons(PORT_ISAKMP),
+					&iph->saddr,
+					htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+			      "%s%s %s->%s\n",
+			      inverse ? "ICMP+" : "",
+			      pp->name,
+			      IP_VS_DBG_ADDR(af, &iph->saddr),
+			      IP_VS_DBG_ADDR(af, &iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		     int *verdict, struct ip_vs_conn **cpp)
+{
+	/*
+	 * AH/ESP is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+
+static void
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		       int offset, const char *msg)
+{
+	char buf[256];
+	struct ipv6hdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+			pp->name, NIP6(ih->saddr),
+			NIP6(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		    int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ah_esp_debug_packet_v6(pp, skb, offset, msg);
+	else
+#endif
+		ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
+
+static void ah_esp_init(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+static void ah_esp_exit(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+#ifdef CONFIG_IP_VS_PROTO_AH
+struct ip_vs_protocol ip_vs_protocol_ah = {
+	.name =			"AH",
+	.protocol =		IPPROTO_AH,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+	.set_state_timeout =	NULL,
+};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.num_states =		1,
+	.dont_defrag =		1,
+	.init =			ah_esp_init,
+	.exit =			ah_esp_exit,
+	.conn_schedule =	ah_esp_conn_schedule,
+	.conn_in_get =		ah_esp_conn_in_get,
+	.conn_out_get =		ah_esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
+#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
new file mode 100644
index 00000000000..dd4566ea2bf
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -0,0 +1,732 @@
+/*
+ * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+	}
+}
+
+static struct ip_vs_conn *
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+	}
+}
+
+
+static int
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct tcphdr _tcph, *th;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if (th->syn &&
+	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+				     th->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(tcph->check))));
+}
+
+
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcph->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+	else
+#endif
+	tcph->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(tcph->check))));
+}
+
+
+static int
+tcp_snat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->source = cp->vport;
+
+	/* Adjust TCP checksums */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, tcph->check,
+			  (char*)&(tcph->check) - (char*)tcph);
+	}
+	return 1;
+}
+
+
+static int
+tcp_dnat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+	oldlen = skb->len - tcphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn and iph ack_seq stuff
+		 */
+		if (!ip_vs_app_pkt_in(cp, skb))
+			return 0;
+	}
+
+	tcph = (void *)skb_network_header(skb) + tcphoff;
+	tcph->dest = cp->dport;
+
+	/*
+	 *	Adjust TCP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - tcphoff));
+	} else if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - tcphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - tcphoff,
+							cp->protocol,
+							skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		tcphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		tcphoff = ip_hdrlen(skb);
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+	case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+		if (af == AF_INET6) {
+			if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len - tcphoff,
+					    ipv6_hdr(skb)->nexthdr,
+					    skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		} else
+#endif
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
+					      skb->len - tcphoff,
+					      ip_hdr(skb)->protocol,
+					      skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+		break;
+	default:
+		/* No need to checksum. */
+		break;
+	}
+
+	return 1;
+}
+
+
+#define TCP_DIR_INPUT		0
+#define TCP_DIR_OUTPUT		4
+#define TCP_DIR_INPUT_ONLY	8
+
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
+	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
+	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
+	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
+};
+
+/*
+ *	Timeout table[state]
+ */
+static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	2*HZ,
+	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
+	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYN_RECV]		=	1*60*HZ,
+	[IP_VS_TCP_S_FIN_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_TIME_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
+	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
+	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYNACK]		=	120*HZ,
+	[IP_VS_TCP_S_LAST]		=	2*HZ,
+};
+
+static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	"NONE",
+	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
+	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
+	[IP_VS_TCP_S_SYN_RECV]		=	"SYN_RECV",
+	[IP_VS_TCP_S_FIN_WAIT]		=	"FIN_WAIT",
+	[IP_VS_TCP_S_TIME_WAIT]		=	"TIME_WAIT",
+	[IP_VS_TCP_S_CLOSE]		=	"CLOSE",
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
+	[IP_VS_TCP_S_LAST_ACK]		=	"LAST_ACK",
+	[IP_VS_TCP_S_LISTEN]		=	"LISTEN",
+	[IP_VS_TCP_S_SYNACK]		=	"SYNACK",
+	[IP_VS_TCP_S_LAST]		=	"BUG!",
+};
+
+#define sNO IP_VS_TCP_S_NONE
+#define sES IP_VS_TCP_S_ESTABLISHED
+#define sSS IP_VS_TCP_S_SYN_SENT
+#define sSR IP_VS_TCP_S_SYN_RECV
+#define sFW IP_VS_TCP_S_FIN_WAIT
+#define sTW IP_VS_TCP_S_TIME_WAIT
+#define sCL IP_VS_TCP_S_CLOSE
+#define sCW IP_VS_TCP_S_CLOSE_WAIT
+#define sLA IP_VS_TCP_S_LAST_ACK
+#define sLI IP_VS_TCP_S_LISTEN
+#define sSA IP_VS_TCP_S_SYNACK
+
+struct tcp_states_t {
+	int next_state[IP_VS_TCP_S_LAST];
+};
+
+static const char * tcp_state_name(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return "ERR!";
+	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
+}
+
+static struct tcp_states_t tcp_states [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t tcp_states_dos [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t *tcp_state_table = tcp_states;
+
+
+static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+	int on = (flags & 1);		/* secure_tcp */
+
+	/*
+	** FIXME: change secure_tcp to independent sysctl var
+	** or make it per-service or per-app because it is valid
+	** for most if not for all of the applications. Something
+	** like "capabilities" (flags) for each object.
+	*/
+	tcp_state_table = (on? tcp_states_dos : tcp_states);
+}
+
+static int
+tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
+				       tcp_state_name_table, sname, to);
+}
+
+static inline int tcp_state_idx(struct tcphdr *th)
+{
+	if (th->rst)
+		return 3;
+	if (th->syn)
+		return 0;
+	if (th->fin)
+		return 1;
+	if (th->ack)
+		return 2;
+	return -1;
+}
+
+static inline void
+set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+	      int direction, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_VS_TCP_S_CLOSE;
+	int state_off = tcp_state_off[direction];
+
+	/*
+	 *    Update state offset to INPUT_ONLY if necessary
+	 *    or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (state_off == TCP_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			state_off = TCP_DIR_INPUT_ONLY;
+	}
+
+	if ((state_idx = tcp_state_idx(th)) < 0) {
+		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+
+  tcp_state_out:
+	if (new_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+			      "%s:%d state: %s->%s conn->refcnt:%d\n",
+			      pp->name,
+			      ((state_off == TCP_DIR_OUTPUT) ?
+			       "output " : "input "),
+			      th->syn ? 'S' : '.',
+			      th->fin ? 'F' : '.',
+			      th->ack ? 'A' : '.',
+			      th->rst ? 'R' : '.',
+			      IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+			      ntohs(cp->dport),
+			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+			      ntohs(cp->cport),
+			      tcp_state_name(cp->state),
+			      tcp_state_name(new_state),
+			      atomic_read(&cp->refcnt));
+
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	cp->timeout = pp->timeout_table[cp->state = new_state];
+}
+
+
+/*
+ *	Handle state transitions
+ */
+static int
+tcp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	struct tcphdr _tcph, *th;
+
+#ifdef CONFIG_IP_VS_IPV6
+	int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	int ihl = ip_hdrlen(skb);
+#endif
+
+	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+
+	spin_lock(&cp->lock);
+	set_tcp_state(pp, cp, direction, th);
+	spin_unlock(&cp->lock);
+
+	return 1;
+}
+
+
+/*
+ *	Hash table for TCP application incarnations
+ */
+#define	TCP_APP_TAB_BITS	4
+#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+
+static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(tcp_app_lock);
+
+static inline __u16 tcp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
+		& TCP_APP_TAB_MASK;
+}
+
+
+static int tcp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = tcp_app_hashkey(port);
+
+	spin_lock_bh(&tcp_app_lock);
+	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &tcp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+
+  out:
+	spin_unlock_bh(&tcp_app_lock);
+	return ret;
+}
+
+
+static void
+tcp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&tcp_app_lock);
+	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&tcp_app_lock);
+}
+
+
+static int
+tcp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = tcp_app_hashkey(cp->vport);
+
+	spin_lock(&tcp_app_lock);
+	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&tcp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&tcp_app_lock);
+
+  out:
+	return result;
+}
+
+
+/*
+ *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+{
+	spin_lock(&cp->lock);
+	cp->state = IP_VS_TCP_S_LISTEN;
+	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	spin_unlock(&cp->lock);
+}
+
+
+static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(tcp_apps);
+	pp->timeout_table = tcp_timeouts;
+}
+
+
+static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_tcp = {
+	.name =			"TCP",
+	.protocol =		IPPROTO_TCP,
+	.num_states =		IP_VS_TCP_S_LAST,
+	.dont_defrag =		0,
+	.appcnt =		ATOMIC_INIT(0),
+	.init =			ip_vs_tcp_init,
+	.exit =			ip_vs_tcp_exit,
+	.register_app =		tcp_register_app,
+	.unregister_app =	tcp_unregister_app,
+	.conn_schedule =	tcp_conn_schedule,
+	.conn_in_get =		tcp_conn_in_get,
+	.conn_out_get =		tcp_conn_out_get,
+	.snat_handler =		tcp_snat_handler,
+	.dnat_handler =		tcp_dnat_handler,
+	.csum_check =		tcp_csum_check,
+	.state_name =		tcp_state_name,
+	.state_transition =	tcp_state_transition,
+	.app_conn_bind =	tcp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	tcp_timeout_change,
+	.set_state_timeout =	tcp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
new file mode 100644
index 00000000000..6eb6039d634
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -0,0 +1,533 @@
+/*
+ * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
+
+#include <net/ip_vs.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+
+static struct ip_vs_conn *
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		int inverse)
+{
+	struct ip_vs_conn *cp;
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->saddr, pptr[0],
+				       &iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_in_get(af, iph->protocol,
+				       &iph->daddr, pptr[1],
+				       &iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph, unsigned int proto_off,
+		 int inverse)
+{
+	struct ip_vs_conn *cp;
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->saddr, pptr[0],
+					&iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_out_get(af, iph->protocol,
+					&iph->daddr, pptr[1],
+					&iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static int
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct udphdr _udph, *uh;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+				&iph.daddr, uh->dest);
+	if (svc) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldport, __be16 newport)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+		uhdr->check =
+			csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+					 ip_vs_check_diff2(oldport, newport,
+						~csum_unfold(uhdr->check))));
+	if (!uhdr->check)
+		uhdr->check = CSUM_MANGLED_0;
+}
+
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+		     const union nf_inet_addr *oldip,
+		     const union nf_inet_addr *newip,
+		     __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		uhdr->check =
+			csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+					 ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+	else
+#endif
+	uhdr->check =
+		csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+				ip_vs_check_diff2(oldlen, newlen,
+						~csum_unfold(uhdr->check))));
+}
+
+
+static int
+udp_snat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Call application helper if needed
+		 */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->source = cp->vport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+				     cp->dport, cp->vport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+						      &cp->caddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+							cp->caddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, udph->check,
+			  (char*)&(udph->check) - (char*)udph);
+	}
+	return 1;
+}
+
+
+static int
+udp_dnat_handler(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff;
+	int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+	oldlen = skb->len - udphoff;
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn
+		 */
+		if (!ip_vs_app_pkt_in(cp, skb))
+			return 0;
+	}
+
+	udph = (void *)skb_network_header(skb) + udphoff;
+	udph->dest = cp->dport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+					htonl(oldlen),
+					htonl(skb->len - udphoff));
+	} else if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
+				     cp->vport, cp->dport);
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			udph->check = csum_ipv6_magic(&cp->caddr.in6,
+						      &cp->daddr.in6,
+						      skb->len - udphoff,
+						      cp->protocol, skb->csum);
+		else
+#endif
+			udph->check = csum_tcpudp_magic(cp->caddr.ip,
+							cp->daddr.ip,
+							skb->len - udphoff,
+							cp->protocol,
+							skb->csum);
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct udphdr _udph, *uh;
+	unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		udphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		udphoff = ip_hdrlen(skb);
+
+	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+	if (uh == NULL)
+		return 0;
+
+	if (uh->check != 0) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = skb_checksum(skb, udphoff,
+						 skb->len - udphoff, 0);
+		case CHECKSUM_COMPLETE:
+#ifdef CONFIG_IP_VS_IPV6
+			if (af == AF_INET6) {
+				if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						    &ipv6_hdr(skb)->daddr,
+						    skb->len - udphoff,
+						    ipv6_hdr(skb)->nexthdr,
+						    skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			} else
+#endif
+				if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						      ip_hdr(skb)->daddr,
+						      skb->len - udphoff,
+						      ip_hdr(skb)->protocol,
+						      skb->csum)) {
+					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+							 "Failed checksum for");
+					return 0;
+				}
+			break;
+		default:
+			/* No need to checksum. */
+			break;
+		}
+	}
+	return 1;
+}
+
+
+/*
+ *	Note: the caller guarantees that only one of register_app,
+ *	unregister_app or app_conn_bind is called each time.
+ */
+
+#define	UDP_APP_TAB_BITS	4
+#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+
+static struct list_head udp_apps[UDP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(udp_app_lock);
+
+static inline __u16 udp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
+		& UDP_APP_TAB_MASK;
+}
+
+
+static int udp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = udp_app_hashkey(port);
+
+
+	spin_lock_bh(&udp_app_lock);
+	list_for_each_entry(i, &udp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &udp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_udp.appcnt);
+
+  out:
+	spin_unlock_bh(&udp_app_lock);
+	return ret;
+}
+
+
+static void
+udp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&udp_app_lock);
+	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&udp_app_lock);
+}
+
+
+static int udp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = udp_app_hashkey(cp->vport);
+
+	spin_lock(&udp_app_lock);
+	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&udp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+				      "%s:%u to app %s on port %u\n",
+				      __func__,
+				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				      ntohs(cp->cport),
+				      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+				      ntohs(cp->vport),
+				      inc->name, ntohs(inc->port));
+
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&udp_app_lock);
+
+  out:
+	return result;
+}
+
+
+static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
+	[IP_VS_UDP_S_LAST]		=	2*HZ,
+};
+
+static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	"UDP",
+	[IP_VS_UDP_S_LAST]		=	"BUG!",
+};
+
+
+static int
+udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
+				       udp_state_name_table, sname, to);
+}
+
+static const char * udp_state_name(int state)
+{
+	if (state >= IP_VS_UDP_S_LAST)
+		return "ERR!";
+	return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
+}
+
+static int
+udp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	return 1;
+}
+
+static void udp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(udp_apps);
+	pp->timeout_table = udp_timeouts;
+}
+
+static void udp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_udp = {
+	.name =			"UDP",
+	.protocol =		IPPROTO_UDP,
+	.num_states =		IP_VS_UDP_S_LAST,
+	.dont_defrag =		0,
+	.init =			udp_init,
+	.exit =			udp_exit,
+	.conn_schedule =	udp_conn_schedule,
+	.conn_in_get =		udp_conn_in_get,
+	.conn_out_get =		udp_conn_out_get,
+	.snat_handler =		udp_snat_handler,
+	.dnat_handler =		udp_dnat_handler,
+	.csum_check =		udp_csum_check,
+	.state_transition =	udp_state_transition,
+	.state_name =		udp_state_name,
+	.register_app =		udp_register_app,
+	.unregister_app =	udp_unregister_app,
+	.app_conn_bind =	udp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,
+	.set_state_timeout =	udp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
new file mode 100644
index 00000000000..a22195f68ac
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -0,0 +1,112 @@
+/*
+ * IPVS:        Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Fixes/Changes:
+ *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
+ *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_rr_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct list_head *p, *q;
+	struct ip_vs_dest *dest;
+
+	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+
+	write_lock(&svc->sched_lock);
+	p = (struct list_head *)svc->sched_data;
+	p = p->next;
+	q = p;
+	do {
+		/* skip list head */
+		if (q == &svc->destinations) {
+			q = q->next;
+			continue;
+		}
+
+		dest = list_entry(q, struct ip_vs_dest, n_list);
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0)
+			/* HIT */
+			goto out;
+		q = q->next;
+	} while (q != p);
+	write_unlock(&svc->sched_lock);
+	return NULL;
+
+  out:
+	svc->sched_data = q;
+	write_unlock(&svc->sched_lock);
+	IP_VS_DBG_BUF(6, "RR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+	.name =			"rr",			/* name */
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.init_service =		ip_vs_rr_init_svc,
+	.update_service =	ip_vs_rr_update_svc,
+	.schedule =		ip_vs_rr_schedule,
+};
+
+static int __init ip_vs_rr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
new file mode 100644
index 00000000000..a46ad9e3501
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -0,0 +1,251 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *  IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_sched_lock);
+
+
+/*
+ *  Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+			 struct ip_vs_scheduler *scheduler)
+{
+	int ret;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+	if (scheduler == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+		return -EINVAL;
+	}
+
+	svc->scheduler = scheduler;
+
+	if (scheduler->init_service) {
+		ret = scheduler->init_service(svc);
+		if (ret) {
+			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  Unbind a service with its scheduler
+ */
+int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+
+	sched = svc->scheduler;
+	if (sched == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+		return -EINVAL;
+	}
+
+	if (sched->done_service) {
+		if (sched->done_service(svc) != 0) {
+			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+			return -EINVAL;
+		}
+	}
+
+	svc->scheduler = NULL;
+	return 0;
+}
+
+
+/*
+ *  Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
+		  sched_name);
+
+	read_lock_bh(&__ip_vs_sched_lock);
+
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		/*
+		 * Test and get the modules atomically
+		 */
+		if (sched->module && !try_module_get(sched->module)) {
+			/*
+			 * This scheduler is just deleted
+			 */
+			continue;
+		}
+		if (strcmp(sched_name, sched->name)==0) {
+			/* HIT */
+			read_unlock_bh(&__ip_vs_sched_lock);
+			return sched;
+		}
+		if (sched->module)
+			module_put(sched->module);
+	}
+
+	read_unlock_bh(&__ip_vs_sched_lock);
+	return NULL;
+}
+
+
+/*
+ *  Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	/*
+	 *  Search for the scheduler by sched_name
+	 */
+	sched = ip_vs_sched_getbyname(sched_name);
+
+	/*
+	 *  If scheduler not found, load the module and search again
+	 */
+	if (sched == NULL) {
+		request_module("ip_vs_%s", sched_name);
+		sched = ip_vs_sched_getbyname(sched_name);
+	}
+
+	return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+	if (scheduler->module)
+		module_put(scheduler->module);
+}
+
+
+/*
+ *  Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (!scheduler) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	if (!scheduler->name) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+		return -EINVAL;
+	}
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	write_lock_bh(&__ip_vs_sched_lock);
+
+	if (!list_empty(&scheduler->n_list)) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		ip_vs_use_count_dec();
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already linked\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *  Make sure that the scheduler with this name doesn't exist
+	 *  in the scheduler list.
+	 */
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		if (strcmp(scheduler->name, sched->name) == 0) {
+			write_unlock_bh(&__ip_vs_sched_lock);
+			ip_vs_use_count_dec();
+			IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+					"already existed in the system\n",
+					scheduler->name);
+			return -EINVAL;
+		}
+	}
+	/*
+	 *	Add it into the d-linked scheduler list
+	 */
+	list_add(&scheduler->n_list, &ip_vs_schedulers);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+
+	return 0;
+}
+
+
+/*
+ *  Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	if (!scheduler) {
+		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+	if (list_empty(&scheduler->n_list)) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
+			  "is not in the list. failed\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Remove it from the d-linked scheduler list
+	 */
+	list_del(&scheduler->n_list);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+
+	return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
new file mode 100644
index 00000000000..7d2f22f04b8
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -0,0 +1,140 @@
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_sed_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_sed_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "SED: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+	.name =			"sed",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
new file mode 100644
index 00000000000..1d96de27fef
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -0,0 +1,258 @@
+/*
+ * IPVS:        Source Hashing scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[src_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) or (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS        8
+#endif
+#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS SH entry
+ */
+static inline unsigned ip_vs_sh_hashkey(__be32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
+{
+	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl;
+
+	/* allocate the SH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_sh_bucket *tbl;
+	struct iphdr *iph = ip_hdr(skb);
+
+	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
+	dest = ip_vs_sh_get(tbl, iph->saddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->saddr),
+		  NIPQUAD(dest->addr.ip),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+	.name =			"sh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	0,
+#endif
+	.init_service =		ip_vs_sh_init_svc,
+	.done_service =		ip_vs_sh_done_svc,
+	.update_service =	ip_vs_sh_update_svc,
+	.schedule =		ip_vs_sh_schedule,
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
new file mode 100644
index 00000000000..de5e7e118ee
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -0,0 +1,942 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync:  sync connection info from master load balancer to backups
+ *              through multicast
+ *
+ * Changes:
+ *	Alexandre Cassen	:	Added master & backup support at a time.
+ *	Alexandre Cassen	:	Added SyncID support for incoming sync
+ *					messages filtering.
+ *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/inetdevice.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT  8848          /* multicast port */
+
+
+/*
+ *	IPVS sync connection entry
+ */
+struct ip_vs_sync_conn {
+	__u8			reserved;
+
+	/* Protocol, addresses and port numbers */
+	__u8			protocol;       /* Which protocol (TCP/UDP) */
+	__be16			cport;
+	__be16                  vport;
+	__be16                  dport;
+	__be32                  caddr;          /* client address */
+	__be32                  vaddr;          /* virtual address */
+	__be32                  daddr;          /* destination address */
+
+	/* Flags and state transition */
+	__be16                  flags;          /* status flags */
+	__be16                  state;          /* state info */
+
+	/* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+struct ip_vs_sync_thread_data {
+	struct socket *sock;
+	char *buf;
+};
+
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+#define FULL_CONN_SIZE  \
+(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+  The master mulitcasts messages to the backup load balancers in the
+  following format.
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (1)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                            .                                  |
+      |                            .                                  |
+      |                            .                                  |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (n)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define SYNC_MESG_HEADER_LEN	4
+#define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
+
+struct ip_vs_sync_mesg {
+	__u8                    nr_conns;
+	__u8                    syncid;
+	__u16                   size;
+
+	/* ip_vs_sync_conn entries start here */
+};
+
+/* the maximum length of sync (sending/receiving) message */
+static int sync_send_mesg_maxlen;
+static int sync_recv_mesg_maxlen;
+
+struct ip_vs_sync_buff {
+	struct list_head        list;
+	unsigned long           firstuse;
+
+	/* pointers for the message data */
+	struct ip_vs_sync_mesg  *mesg;
+	unsigned char           *head;
+	unsigned char           *end;
+};
+
+
+/* the sync_buff list head and the lock */
+static LIST_HEAD(ip_vs_sync_queue);
+static DEFINE_SPINLOCK(ip_vs_sync_lock);
+
+/* current sync_buff for accepting new conn entries */
+static struct ip_vs_sync_buff   *curr_sb = NULL;
+static DEFINE_SPINLOCK(curr_sb_lock);
+
+/* ipvs sync daemon state */
+volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
+volatile int ip_vs_master_syncid = 0;
+volatile int ip_vs_backup_syncid = 0;
+
+/* multicast interface name */
+char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
+/* multicast addr */
+static struct sockaddr_in mcast_addr = {
+	.sin_family		= AF_INET,
+	.sin_port		= __constant_htons(IP_VS_SYNC_PORT),
+	.sin_addr.s_addr	= __constant_htonl(IP_VS_SYNC_GROUP),
+};
+
+
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ip_vs_sync_lock);
+	if (list_empty(&ip_vs_sync_queue)) {
+		sb = NULL;
+	} else {
+		sb = list_entry(ip_vs_sync_queue.next,
+				struct ip_vs_sync_buff,
+				list);
+		list_del(&sb->list);
+	}
+	spin_unlock_bh(&ip_vs_sync_lock);
+
+	return sb;
+}
+
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+		kfree(sb);
+		return NULL;
+	}
+	sb->mesg->nr_conns = 0;
+	sb->mesg->syncid = ip_vs_master_syncid;
+	sb->mesg->size = 4;
+	sb->head = (unsigned char *)sb->mesg + 4;
+	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+	kfree(sb->mesg);
+	kfree(sb);
+}
+
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+	spin_lock(&ip_vs_sync_lock);
+	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	else
+		ip_vs_sync_buff_release(sb);
+	spin_unlock(&ip_vs_sync_lock);
+}
+
+/*
+ *	Get the current sync buffer if it has been created for more
+ *	than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(unsigned long time)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&curr_sb_lock);
+	if (curr_sb && (time == 0 ||
+			time_before(jiffies - curr_sb->firstuse, time))) {
+		sb = curr_sb;
+		curr_sb = NULL;
+	} else
+		sb = NULL;
+	spin_unlock_bh(&curr_sb_lock);
+	return sb;
+}
+
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+	struct ip_vs_sync_mesg *m;
+	struct ip_vs_sync_conn *s;
+	int len;
+
+	spin_lock(&curr_sb_lock);
+	if (!curr_sb) {
+		if (!(curr_sb=ip_vs_sync_buff_create())) {
+			spin_unlock(&curr_sb_lock);
+			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
+	m = curr_sb->mesg;
+	s = (struct ip_vs_sync_conn *)curr_sb->head;
+
+	/* copy members */
+	s->protocol = cp->protocol;
+	s->cport = cp->cport;
+	s->vport = cp->vport;
+	s->dport = cp->dport;
+	s->caddr = cp->caddr.ip;
+	s->vaddr = cp->vaddr.ip;
+	s->daddr = cp->daddr.ip;
+	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->state = htons(cp->state);
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		struct ip_vs_sync_conn_options *opt =
+			(struct ip_vs_sync_conn_options *)&s[1];
+		memcpy(opt, &cp->in_seq, sizeof(*opt));
+	}
+
+	m->nr_conns++;
+	m->size += len;
+	curr_sb->head += len;
+
+	/* check if there is a space for next one */
+	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+		sb_queue_tail(curr_sb);
+		curr_sb = NULL;
+	}
+	spin_unlock(&curr_sb_lock);
+
+	/* synchronize its controller if it has */
+	if (cp->control)
+		ip_vs_sync_conn(cp->control);
+}
+
+
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ */
+static void ip_vs_process_message(const char *buffer, const size_t buflen)
+{
+	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+	struct ip_vs_sync_conn *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_dest *dest;
+	char *p;
+	int i;
+
+	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+		IP_VS_ERR_RL("sync message header too short\n");
+		return;
+	}
+
+	/* Convert size back to host byte order */
+	m->size = ntohs(m->size);
+
+	if (buflen != m->size) {
+		IP_VS_ERR_RL("bogus sync message size\n");
+		return;
+	}
+
+	/* SyncID sanity check */
+	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
+			  m->syncid);
+		return;
+	}
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+	for (i=0; i<m->nr_conns; i++) {
+		unsigned flags, state;
+
+		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+			IP_VS_ERR_RL("bogus conn in sync message\n");
+			return;
+		}
+		s = (struct ip_vs_sync_conn *) p;
+		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+		flags &= ~IP_VS_CONN_F_HASHED;
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			p += FULL_CONN_SIZE;
+			if (p > buffer+buflen) {
+				IP_VS_ERR_RL("bogus conn options in sync message\n");
+				return;
+			}
+		} else {
+			opt = NULL;
+			p += SIMPLE_CONN_SIZE;
+		}
+
+		state = ntohs(s->state);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+			pp = ip_vs_proto_get(s->protocol);
+			if (!pp) {
+				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+					s->protocol);
+				continue;
+			}
+			if (state >= pp->num_states) {
+				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+					pp->name, state);
+				continue;
+			}
+		} else {
+			/* protocol in templates is not used for state/timeout */
+			pp = NULL;
+			if (state > 0) {
+				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+					state);
+				state = 0;
+			}
+		}
+
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+					       (union nf_inet_addr *)&s->caddr,
+					       s->cport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport);
+		else
+			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+					     (union nf_inet_addr *)&s->caddr,
+					     s->cport,
+					     (union nf_inet_addr *)&s->vaddr,
+					     s->vport);
+		if (!cp) {
+			/*
+			 * Find the appropriate destination for the connection.
+			 * If it is not found the connection will remain unbound
+			 * but still handled.
+			 */
+			dest = ip_vs_find_dest(AF_INET,
+					       (union nf_inet_addr *)&s->daddr,
+					       s->dport,
+					       (union nf_inet_addr *)&s->vaddr,
+					       s->vport,
+					       s->protocol);
+			/*  Set the approprite ativity flag */
+			if (s->protocol == IPPROTO_TCP) {
+				if (state != IP_VS_TCP_S_ESTABLISHED)
+					flags |= IP_VS_CONN_F_INACTIVE;
+				else
+					flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+			cp = ip_vs_conn_new(AF_INET, s->protocol,
+					    (union nf_inet_addr *)&s->caddr,
+					    s->cport,
+					    (union nf_inet_addr *)&s->vaddr,
+					    s->vport,
+					    (union nf_inet_addr *)&s->daddr,
+					    s->dport,
+					    flags, dest);
+			if (dest)
+				atomic_dec(&dest->refcnt);
+			if (!cp) {
+				IP_VS_ERR("ip_vs_conn_new failed\n");
+				return;
+			}
+		} else if (!cp->dest) {
+			dest = ip_vs_try_bind_dest(cp);
+			if (dest)
+				atomic_dec(&dest->refcnt);
+		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+			   (cp->state != state)) {
+			/* update active/inactive flag for the connection */
+			dest = cp->dest;
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+
+		if (opt)
+			memcpy(&cp->in_seq, opt, sizeof(*opt));
+		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		cp->state = state;
+		cp->old_state = cp->state;
+		/*
+		 * We can not recover the right timeout for templates
+		 * in all cases, we can not find the right fwmark
+		 * virtual service. If needed, we can do it for
+		 * non-fwmark persistent services.
+		 */
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+			cp->timeout = pp->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
+		ip_vs_conn_put(cp);
+	}
+}
+
+
+/*
+ *      Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+	lock_sock(sk);
+	inet->mc_loop = loop ? 1 : 0;
+	release_sock(sk);
+}
+
+/*
+ *      Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+	lock_sock(sk);
+	inet->mc_ttl = ttl;
+	release_sock(sk);
+}
+
+/*
+ *      Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, char *ifname)
+{
+	struct net_device *dev;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	inet->mc_index = dev->ifindex;
+	/*  inet->mc_addr  = 0; */
+	release_sock(sk);
+
+	return 0;
+}
+
+
+/*
+ *	Set the maximum length of sync message according to the
+ *	specified interface's MTU.
+ */
+static int set_sync_mesg_maxlen(int sync_state)
+{
+	struct net_device *dev;
+	int num;
+
+	if (sync_state == IP_VS_STATE_MASTER) {
+		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		num = (dev->mtu - sizeof(struct iphdr) -
+		       sizeof(struct udphdr) -
+		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
+		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
+		IP_VS_DBG(7, "setting the maximum length of sync sending "
+			  "message %d.\n", sync_send_mesg_maxlen);
+	} else if (sync_state == IP_VS_STATE_BACKUP) {
+		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		sync_recv_mesg_maxlen = dev->mtu -
+			sizeof(struct iphdr) - sizeof(struct udphdr);
+		IP_VS_DBG(7, "setting the maximum length of sync receiving "
+			  "message %d.\n", sync_recv_mesg_maxlen);
+	}
+
+	return 0;
+}
+
+
+/*
+ *      Join a multicast group.
+ *      the group is specified by a class D multicast address 224.0.0.0/8
+ *      in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+{
+	struct ip_mreqn mreq;
+	struct net_device *dev;
+	int ret;
+
+	memset(&mreq, 0, sizeof(mreq));
+	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	mreq.imr_ifindex = dev->ifindex;
+
+	lock_sock(sk);
+	ret = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
+
+static int bind_mcastif_addr(struct socket *sock, char *ifname)
+{
+	struct net_device *dev;
+	__be32 addr;
+	struct sockaddr_in sin;
+
+	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+		return -ENODEV;
+
+	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	if (!addr)
+		IP_VS_ERR("You probably need to specify IP address on "
+			  "multicast interface.\n");
+
+	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
+		  ifname, NIPQUAD(addr));
+
+	/* Now bind the socket with the address of multicast interface */
+	sin.sin_family	     = AF_INET;
+	sin.sin_addr.s_addr  = addr;
+	sin.sin_port         = 0;
+
+	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+}
+
+/*
+ *      Set up sending multicast socket over UDP
+ */
+static struct socket * make_send_sock(void)
+{
+	struct socket *sock;
+	int result;
+
+	/* First create a socket */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return ERR_PTR(result);
+	}
+
+	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error setting outbound mcast interface\n");
+		goto error;
+	}
+
+	set_mcast_loop(sock->sk, 0);
+	set_mcast_ttl(sock->sk, 1);
+
+	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error binding address of the mcast interface\n");
+		goto error;
+	}
+
+	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr), 0);
+	if (result < 0) {
+		IP_VS_ERR("Error connecting to the multicast addr\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return ERR_PTR(result);
+}
+
+
+/*
+ *      Set up receiving multicast socket over UDP
+ */
+static struct socket * make_receive_sock(void)
+{
+	struct socket *sock;
+	int result;
+
+	/* First create a socket */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	if (result < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return ERR_PTR(result);
+	}
+
+	/* it is equivalent to the REUSEADDR option in user-space */
+	sock->sk->sk_reuse = 1;
+
+	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+			sizeof(struct sockaddr));
+	if (result < 0) {
+		IP_VS_ERR("Error binding to the multicast addr\n");
+		goto error;
+	}
+
+	/* join the multicast group */
+	result = join_mcast_group(sock->sk,
+			(struct in_addr *) &mcast_addr.sin_addr,
+			ip_vs_backup_mcast_ifn);
+	if (result < 0) {
+		IP_VS_ERR("Error joining to the multicast group\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return ERR_PTR(result);
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+	struct msghdr	msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
+	struct kvec	iov;
+	int		len;
+
+	EnterFunction(7);
+	iov.iov_base     = (void *)buffer;
+	iov.iov_len      = length;
+
+	len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
+
+	LeaveFunction(7);
+	return len;
+}
+
+static void
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+	int msize;
+
+	msize = msg->size;
+
+	/* Put size in network byte order */
+	msg->size = htons(msg->size);
+
+	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
+		IP_VS_ERR("ip_vs_send_async error\n");
+}
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+	struct msghdr		msg = {NULL,};
+	struct kvec		iov;
+	int			len;
+
+	EnterFunction(7);
+
+	/* Receive a packet */
+	iov.iov_base     = buffer;
+	iov.iov_len      = (size_t)buflen;
+
+	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+
+	if (len < 0)
+		return -1;
+
+	LeaveFunction(7);
+	return len;
+}
+
+
+static int sync_thread_master(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	struct ip_vs_sync_buff *sb;
+
+	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+
+	while (!kthread_should_stop()) {
+		while ((sb = sb_dequeue())) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		/* check if entries stay in curr_sb for 2 seconds */
+		sb = get_curr_sync_buff(2 * HZ);
+		if (sb) {
+			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		schedule_timeout_interruptible(HZ);
+	}
+
+	/* clean up the sync_buff queue */
+	while ((sb=sb_dequeue())) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* clean up the current sync_buff */
+	if ((sb = get_curr_sync_buff(0))) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(tinfo->sock);
+	kfree(tinfo);
+
+	return 0;
+}
+
+
+static int sync_thread_backup(void *data)
+{
+	struct ip_vs_sync_thread_data *tinfo = data;
+	int len;
+
+	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+			 || kthread_should_stop());
+
+		/* do we have data now? */
+		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+			len = ip_vs_receive(tinfo->sock, tinfo->buf,
+					sync_recv_mesg_maxlen);
+			if (len <= 0) {
+				IP_VS_ERR("receiving message error\n");
+				break;
+			}
+
+			/* disable bottom half, because it accesses the data
+			   shared by softirq while getting/creating conns */
+			local_bh_disable();
+			ip_vs_process_message(tinfo->buf, len);
+			local_bh_enable();
+		}
+	}
+
+	/* release the sending multicast socket */
+	sock_release(tinfo->sock);
+	kfree(tinfo->buf);
+	kfree(tinfo);
+
+	return 0;
+}
+
+
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+{
+	struct ip_vs_sync_thread_data *tinfo;
+	struct task_struct **realtask, *task;
+	struct socket *sock;
+	char *name, *buf = NULL;
+	int (*threadfn)(void *data);
+	int result = -ENOMEM;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+		  sizeof(struct ip_vs_sync_conn));
+
+	if (state == IP_VS_STATE_MASTER) {
+		if (sync_master_thread)
+			return -EEXIST;
+
+		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_master_mcast_ifn));
+		ip_vs_master_syncid = syncid;
+		realtask = &sync_master_thread;
+		name = "ipvs_syncmaster";
+		threadfn = sync_thread_master;
+		sock = make_send_sock();
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (sync_backup_thread)
+			return -EEXIST;
+
+		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+			sizeof(ip_vs_backup_mcast_ifn));
+		ip_vs_backup_syncid = syncid;
+		realtask = &sync_backup_thread;
+		name = "ipvs_syncbackup";
+		threadfn = sync_thread_backup;
+		sock = make_receive_sock();
+	} else {
+		return -EINVAL;
+	}
+
+	if (IS_ERR(sock)) {
+		result = PTR_ERR(sock);
+		goto out;
+	}
+
+	set_sync_mesg_maxlen(state);
+	if (state == IP_VS_STATE_BACKUP) {
+		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		if (!buf)
+			goto outsocket;
+	}
+
+	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+	if (!tinfo)
+		goto outbuf;
+
+	tinfo->sock = sock;
+	tinfo->buf = buf;
+
+	task = kthread_run(threadfn, tinfo, name);
+	if (IS_ERR(task)) {
+		result = PTR_ERR(task);
+		goto outtinfo;
+	}
+
+	/* mark as active */
+	*realtask = task;
+	ip_vs_sync_state |= state;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	return 0;
+
+outtinfo:
+	kfree(tinfo);
+outbuf:
+	kfree(buf);
+outsocket:
+	sock_release(sock);
+out:
+	return result;
+}
+
+
+int stop_sync_thread(int state)
+{
+	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+
+	if (state == IP_VS_STATE_MASTER) {
+		if (!sync_master_thread)
+			return -ESRCH;
+
+		IP_VS_INFO("stopping master sync thread %d ...\n",
+			   task_pid_nr(sync_master_thread));
+
+		/*
+		 * The lock synchronizes with sb_queue_tail(), so that we don't
+		 * add sync buffers to the queue, when we are already in
+		 * progress of stopping the master sync daemon.
+		 */
+
+		spin_lock_bh(&ip_vs_sync_lock);
+		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock_bh(&ip_vs_sync_lock);
+		kthread_stop(sync_master_thread);
+		sync_master_thread = NULL;
+	} else if (state == IP_VS_STATE_BACKUP) {
+		if (!sync_backup_thread)
+			return -ESRCH;
+
+		IP_VS_INFO("stopping backup sync thread %d ...\n",
+			   task_pid_nr(sync_backup_thread));
+
+		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(sync_backup_thread);
+		sync_backup_thread = NULL;
+	} else {
+		return -EINVAL;
+	}
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return 0;
+}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
new file mode 100644
index 00000000000..8c596e71259
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -0,0 +1,128 @@
+/*
+ * IPVS:        Weighted Least-Connection Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
+ *     Wensong Zhang            :     changed to use the inactconns in scheduling
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static inline unsigned int
+ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *		  (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_wlc_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_wlc_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+		      atomic_read(&least->activeconns),
+		      atomic_read(&least->refcnt),
+		      atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+	.name =			"wlc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.schedule =		ip_vs_wlc_schedule,
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
new file mode 100644
index 00000000000..7ea92fed50b
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -0,0 +1,237 @@
+/*
+ * IPVS:        Weighted Round-Robin Scheduling module
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
+ *     Julian Anastasov         :     fixed the bug of returning destination
+ *                                    with weight 0 when all weights are zero
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+	struct list_head *cl;	/* current list head */
+	int cw;			/* current weight */
+	int mw;			/* maximum weight */
+	int di;			/* decreasing interval */
+};
+
+
+/*
+ *    Get the gcd of server weights
+ */
+static int gcd(int a, int b)
+{
+	int c;
+
+	while ((c = a % b)) {
+		a = b;
+		b = c;
+	}
+	return b;
+}
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		weight = atomic_read(&dest->weight);
+		if (weight > 0) {
+			if (g > 0)
+				g = gcd(weight, g);
+			else
+				g = weight;
+		}
+	}
+	return g ? g : 1;
+}
+
+
+/*
+ *    Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (atomic_read(&dest->weight) > weight)
+			weight = atomic_read(&dest->weight);
+	}
+
+	return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark;
+
+	/*
+	 *    Allocate the mark variable for WRR scheduling
+	 */
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	if (mark == NULL) {
+		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	mark->cl = &svc->destinations;
+	mark->cw = 0;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	svc->sched_data = mark;
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+	/*
+	 *    Release the mark variable
+	 */
+	kfree(svc->sched_data);
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	mark->cl = &svc->destinations;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	if (mark->cw > mark->mw)
+		mark->cw = 0;
+	return 0;
+}
+
+
+/*
+ *    Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+	struct list_head *p;
+
+	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+
+	/*
+	 * This loop will always terminate, because mark->cw in (0, max_weight]
+	 * and at least one server has its weight equal to max_weight.
+	 */
+	write_lock(&svc->sched_lock);
+	p = mark->cl;
+	while (1) {
+		if (mark->cl == &svc->destinations) {
+			/* it is at the head of the destination list */
+
+			if (mark->cl == mark->cl->next) {
+				/* no dest entry */
+				dest = NULL;
+				goto out;
+			}
+
+			mark->cl = svc->destinations.next;
+			mark->cw -= mark->di;
+			if (mark->cw <= 0) {
+				mark->cw = mark->mw;
+				/*
+				 * Still zero, which means no available servers.
+				 */
+				if (mark->cw == 0) {
+					mark->cl = &svc->destinations;
+					IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
+						   "no available servers\n");
+					dest = NULL;
+					goto out;
+				}
+			}
+		} else
+			mark->cl = mark->cl->next;
+
+		if (mark->cl != &svc->destinations) {
+			/* not at the head of the list */
+			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) >= mark->cw) {
+				/* got it */
+				break;
+			}
+		}
+
+		if (mark->cl == p && mark->cw == mark->di) {
+			/* back to the start, and no dest is found.
+			   It is only possible when all dests are OVERLOADED */
+			dest = NULL;
+			goto out;
+		}
+	}
+
+	IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+		      "activeconns %d refcnt %d weight %d\n",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+		      atomic_read(&dest->activeconns),
+		      atomic_read(&dest->refcnt),
+		      atomic_read(&dest->weight));
+
+  out:
+	write_unlock(&svc->sched_lock);
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+	.name =			"wrr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+	.supports_ipv6 =	1,
+#endif
+	.init_service =		ip_vs_wrr_init_svc,
+	.done_service =		ip_vs_wrr_done_svc,
+	.update_service =	ip_vs_wrr_update_svc,
+	.schedule =		ip_vs_wrr_schedule,
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
new file mode 100644
index 00000000000..02ddc2b3ce2
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -0,0 +1,1004 @@
+/*
+ * ip_vs_xmit.c: various packet transmitters for IPVS
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>                  /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      Destination cache to speed up outgoing route lookup
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = dst;
+	dest->dst_rtos = rtos;
+	dst_release(old_dst);
+}
+
+static inline struct dst_entry *
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+{
+	struct dst_entry *dst = dest->dst_cache;
+
+	if (!dst)
+		return NULL;
+	if ((dst->obsolete
+	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
+	    dst->ops->check(dst, cookie) == NULL) {
+		dest->dst_cache = NULL;
+		dst_release(dst);
+		return NULL;
+	}
+	dst_hold(dst);
+	return dst;
+}
+
+static struct rtable *
+__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		if (!(rt = (struct rtable *)
+		      __ip_vs_dst_check(dest, rtos, 0))) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip4_u = {
+						.daddr = dest->addr.ip,
+						.saddr = 0,
+						.tos = rtos, } },
+			};
+
+			if (ip_route_output_key(&init_net, &rt, &fl)) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip_route_output error, "
+					     "dest: %u.%u.%u.%u\n",
+					     NIPQUAD(dest->addr.ip));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
+				  NIPQUAD(dest->addr.ip),
+				  atomic_read(&rt->u.dst.__refcnt), rtos);
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = cp->daddr.ip,
+					.saddr = 0,
+					.tos = rtos, } },
+		};
+
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
+			IP_VS_DBG_RL("ip_route_output error, dest: "
+				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+		if (!rt) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip6_u = {
+						.daddr = dest->addr.in6,
+						.saddr = {
+							.s6_addr32 =
+								{ 0, 0, 0, 0 },
+						},
+					},
+				},
+			};
+
+			rt = (struct rt6_info *)ip6_route_output(&init_net,
+								 NULL, &fl);
+			if (!rt) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip6_route_output error, "
+					     "dest: " NIP6_FMT "\n",
+					     NIP6(dest->addr.in6));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+				  NIP6(dest->addr.in6),
+				  atomic_read(&rt->u.dst.__refcnt));
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip6_u = {
+					.daddr = cp->daddr.in6,
+					.saddr = {
+						.s6_addr32 = { 0, 0, 0, 0 },
+					},
+				},
+			},
+		};
+
+		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+		if (!rt) {
+			IP_VS_DBG_RL("ip6_route_output error, dest: "
+				     NIP6_FMT "\n", NIP6(cp->daddr.in6));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+#endif
+
+
+/*
+ *	Release dest->dst_cache before a dest is removed
+ */
+void
+ip_vs_dst_reset(struct ip_vs_dest *dest)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = NULL;
+	dst_release(old_dst);
+}
+
+#define IP_VS_XMIT(pf, skb, rt)				\
+do {							\
+	(skb)->ipvs_property = 1;			\
+	skb_forward_csum(skb);				\
+	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
+		(rt)->u.dst.dev, dst_output);		\
+} while (0)
+
+
+/*
+ *      NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+int
+ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp)
+{
+	/* we do not touch skb and do not need pskb ptr */
+	return NF_ACCEPT;
+}
+
+
+/*
+ *      Bypass transmitter
+ *      Let packets bypass the destination when the destination is not
+ *      available, it may be only used in transparent cache cluster.
+ */
+int
+ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = ip_hdr(skb);
+	u8     tos = iph->tos;
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip4_u = {
+				.daddr = iph->daddr,
+				.saddr = 0,
+				.tos = RT_TOS(tos), } },
+	};
+
+	EnterFunction(10);
+
+	if (ip_route_output_key(&init_net, &rt, &fl)) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(ip_hdr(skb));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	struct ipv6hdr  *iph = ipv6_hdr(skb);
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+	};
+
+	EnterFunction(10);
+
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	if (!rt) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+			     "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+/*
+ *      NAT transmitter (only for outside-to-inside nat forwarding)
+ *      Not used for related ICMP
+ */
+int
+ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	       struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;		/* Route to the other host */
+	int mtu;
+	struct iphdr *iph = ip_hdr(skb);
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, sizeof(struct iphdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+		goto tx_error;
+	ip_hdr(skb)->daddr = cp->daddr.ip;
+	ip_send_check(ip_hdr(skb));
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	int mtu;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__be16 _pt, *p;
+		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+				       sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				 "ip_vs_nat_xmit_v6(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+		goto tx_error;
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
+
+
+/*
+ *   IP Tunneling transmitter
+ *
+ *   This function encapsulates the packet in a new IP packet, its
+ *   destination will be set to cp->daddr. Most code of this function
+ *   is taken from ipip.c.
+ *
+ *   It is used in VS/TUN cluster. The load balancer selects a real
+ *   server from a cluster based on a scheduling algorithm,
+ *   encapsulates the request packet and forwards it to the selected
+ *   server. For example, all real servers are configured with
+ *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ *   the encapsulated packet, it will decapsulate the packet, processe
+ *   the request and return the response packets directly to the client
+ *   without passing the load balancer. This can greatly increase the
+ *   scalability of virtual server.
+ *
+ *   Used for ANY protocol
+ */
+int
+ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct net_device *tdev;		/* Device to other host */
+	struct iphdr  *old_iph = ip_hdr(skb);
+	u8     tos = old_iph->tos;
+	__be16 df = old_iph->frag_off;
+	sk_buff_data_t old_transport_header = skb->transport_header;
+	struct iphdr  *iph;			/* Our new IP header */
+	unsigned int max_headroom;		/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != htons(ETH_P_IP)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+			     "ETH_P_IP: %d, skb protocol: %d\n",
+			     htons(ETH_P_IP), skb->protocol);
+		goto tx_error;
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	if (mtu < 68) {
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	df |= (old_iph->frag_off & htons(IP_DF));
+
+	if ((old_iph->frag_off & htons(IP_DF))
+	    && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = ip_hdr(skb);
+	}
+
+	skb->transport_header = old_transport_header;
+
+	/* fix old IP header checksum */
+	ip_send_check(old_iph);
+
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ip_hdr(skb);
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+	iph->ttl		=	old_iph->ttl;
+	ip_select_ident(iph, &rt->u.dst, NULL);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	ip_local_out(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		     struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;		/* Route to the other host */
+	struct net_device *tdev;	/* Device to other host */
+	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
+	sk_buff_data_t old_transport_header = skb->transport_header;
+	struct ipv6hdr  *iph;		/* Our new IP header */
+	unsigned int max_headroom;	/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+			     "ETH_P_IPV6: %d, skb protocol: %d\n",
+			     htons(ETH_P_IPV6), skb->protocol);
+		goto tx_error;
+	}
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	/* TODO IPv6: do we need this check in IPv6? */
+	if (mtu < 1280) {
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			dst_release(&rt->u.dst);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = ipv6_hdr(skb);
+	}
+
+	skb->transport_header = old_transport_header;
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	ipv6_hdr(skb);
+	iph->version		=	6;
+	iph->nexthdr		=	IPPROTO_IPV6;
+	iph->payload_len	=	old_iph->payload_len + sizeof(old_iph);
+	iph->priority		=	old_iph->priority;
+	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+	iph->daddr		=	rt->rt6i_dst.addr;
+	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
+	iph->hop_limit		=	old_iph->hop_limit;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	ip6_local_out(skb);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *      Direct Routing transmitter
+ *      Used for ANY protocol
+ */
+int
+ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	      struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = ip_hdr(skb);
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(ip_hdr(skb));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		 struct ip_vs_protocol *pp)
+{
+	struct rt6_info *rt;			/* Route to the other host */
+	int    mtu;
+
+	EnterFunction(10);
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		dst_release(&rt->u.dst);
+		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(skb == NULL)) {
+		dst_release(&rt->u.dst);
+		return NF_STOLEN;
+	}
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+#endif
+
+
+/*
+ *	ICMP packet transmitter
+ *	called by the ip_vs_in_icmp
+ */
+int
+ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rtable	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET, skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+  out:
+	LeaveFunction(10);
+	return rc;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rt6_info	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	rt = __ip_vs_get_out_rt_v6(cp);
+	if (!rt)
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if (skb->len > mtu) {
+		dst_release(&rt->u.dst);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!skb_make_writable(skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(PF_INET6, skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+out:
+	LeaveFunction(10);
+	return rc;
+tx_error_put:
+	dst_release(&rt->u.dst);
+	goto tx_error;
+}
+#endif
-- 
cgit v1.2.3-70-g09d2


From 74af025073461b9ebe82771e48a5b8596c3cf75c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 5 Sep 2008 12:38:09 -0700
Subject: wireless: restore revert lost to merge damage

Restore revert "mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM",
originally reverted in commit bf7394ccc13fe291d9258f01113b4c61214ddeae.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e859a0ab616..edc339d649c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -650,20 +650,51 @@ static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 					 struct ieee80211_if_sta *ifsta)
 {
+	char *buf;
+	size_t len;
+	int i;
 	union iwreq_data wrqu;
 
+	if (!ifsta->assocreq_ies && !ifsta->assocresp_ies)
+		return;
+
+	buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len +
+				ifsta->assocresp_ies_len), GFP_KERNEL);
+	if (!buf)
+		return;
+
+	len = sprintf(buf, "ASSOCINFO(");
 	if (ifsta->assocreq_ies) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = ifsta->assocreq_ies_len;
-		wireless_send_event(sdata->dev, IWEVASSOCREQIE, &wrqu,
-				    ifsta->assocreq_ies);
+		len += sprintf(buf + len, "ReqIEs=");
+		for (i = 0; i < ifsta->assocreq_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocreq_ies[i]);
+		}
 	}
 	if (ifsta->assocresp_ies) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = ifsta->assocresp_ies_len;
-		wireless_send_event(sdata->dev, IWEVASSOCRESPIE, &wrqu,
-				    ifsta->assocresp_ies);
+		if (ifsta->assocreq_ies)
+			len += sprintf(buf + len, " ");
+		len += sprintf(buf + len, "RespIEs=");
+		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocresp_ies[i]);
+		}
 	}
+	len += sprintf(buf + len, ")");
+
+	if (len > IW_CUSTOM_MAX) {
+		len = sprintf(buf, "ASSOCRESPIE=");
+		for (i = 0; i < ifsta->assocresp_ies_len; i++) {
+			len += sprintf(buf + len, "%02x",
+				       ifsta->assocresp_ies[i]);
+		}
+	}
+
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.data.length = len;
+	wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+
+	kfree(buf);
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 8ef9dad3f7c0bdae84cd57f2bc6d4f53421406a8 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sat, 27 Sep 2008 22:58:18 +0300
Subject: mac80211: remove shadowed variables in ieee80211_master_start_xmit

This patch removes doubly defined variables in ieee80211_master_start_xmit

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0cc2e23f082..226ce77363d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1255,8 +1255,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 	return 0;
 }
 
-int ieee80211_master_start_xmit(struct sk_buff *skb,
-				struct net_device *dev)
+int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
 	struct ieee80211_local *local = mpriv->local;
@@ -1308,8 +1307,6 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		}
 	} else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) {
 		struct ieee80211_sub_if_data *sdata;
-		struct ieee80211_local *local = osdata->local;
-		struct ieee80211_hdr *hdr;
 		int hdrlen;
 		u16 len_rthdr;
 
-- 
cgit v1.2.3-70-g09d2


From 5d6ffc533678c936e366809acaff8401af43a4af Mon Sep 17 00:00:00 2001
From: Davide Pesavento <davidepesa@gmail.com>
Date: Tue, 30 Sep 2008 19:56:34 +0200
Subject: wireless: fix typo in Kconfig.

Signed-off-by: Davide Pesavento <davidepesa@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index b97bd9fe6b7..7d82be07fa1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -15,7 +15,7 @@ config NL80211
 	  If unsure, say Y.
 
 config WIRELESS_OLD_REGULATORY
-	bool "Old wireless static regulatory defintions"
+	bool "Old wireless static regulatory definitions"
 	default n
 	---help---
 	  This option enables the old static regulatory information
-- 
cgit v1.2.3-70-g09d2


From f74b6a5498049bab28419a03e4b31fcdbe7a900d Mon Sep 17 00:00:00 2001
From: Rami Rosen <roszenrami@gmail.com>
Date: Thu, 2 Oct 2008 16:48:22 +0300
Subject: mac80211: remove redundant check in ieee80211_master_start_xmit
 (net/mac80211/tx.c)

 - This patch (against the linux-wireless-next git tree) removes a
redundant check in ieee80211_master_start_xmit (net/mac80211/tx.c)
and adjust indentation in this method accordingly.

 In this method, there is no need to call again the
ieee80211_is_data() method; this is checked immediately before, in the
"if" command (we will not enter this block unless ieee80211_is_data()
is true, so that the "and" (&&) condition in that "if" command will be
fullfilled ).

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 226ce77363d..d7153bbcdb2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1295,16 +1295,14 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (ieee80211_vif_is_mesh(&osdata->vif) &&
 	    ieee80211_is_data(hdr->frame_control)) {
-		if (ieee80211_is_data(hdr->frame_control)) {
-			if (is_multicast_ether_addr(hdr->addr3))
-				memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
-			else
-				if (mesh_nexthop_lookup(skb, osdata))
-					return  0;
-			if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
-				IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
-							     fwded_frames);
-		}
+		if (is_multicast_ether_addr(hdr->addr3))
+			memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
+		else
+			if (mesh_nexthop_lookup(skb, osdata))
+				return  0;
+		if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
+			IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
+							    fwded_frames);
 	} else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) {
 		struct ieee80211_sub_if_data *sdata;
 		int hdrlen;
-- 
cgit v1.2.3-70-g09d2


From 417bd25ac4c6f76c8aafe8a584f3620f4a936b72 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Fri, 3 Oct 2008 16:58:05 -0300
Subject: rfkill: update LEDs for all state changes

The LED state was not being updated by rfkill_force_state(), which
will cause regressions in wireless drivers that had old-style rfkill
support and are updated to use rfkill_force_state().

The LED state was not being updated when a change was detected through
the rfkill->get_state() hook, either.

Move the LED trigger update calls into notify_rfkill_state_change(),
where it should have been in the first place.  This takes care of both
issues above.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/rfkill.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index ea0dc04b3c7..f949a482b00 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -125,6 +125,7 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
 
 static void notify_rfkill_state_change(struct rfkill *rfkill)
 {
+	rfkill_led_trigger(rfkill, rfkill->state);
 	blocking_notifier_call_chain(&rfkill_notifier_list,
 			RFKILL_STATE_CHANGED,
 			rfkill);
@@ -217,10 +218,8 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 			rfkill->state = state;
 	}
 
-	if (force || rfkill->state != oldstate) {
-		rfkill_led_trigger(rfkill, rfkill->state);
+	if (force || rfkill->state != oldstate)
 		notify_rfkill_state_change(rfkill);
-	}
 
 	return retval;
 }
-- 
cgit v1.2.3-70-g09d2


From 76708dee382a69b2f9d0e50f413f99fefb2dc509 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 5 Oct 2008 18:02:48 +0200
Subject: mac80211: free up 2 bytes in skb->cb

Free up 2 bytes in skb->cb to be used for multi-rate retry later.
Move iv_len and icv_len initialization into key alloc.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c          |  2 +-
 drivers/net/wireless/ath9k/xmit.c          |  2 +-
 drivers/net/wireless/b43/xmit.c            |  4 ++--
 drivers/net/wireless/b43legacy/xmit.c      |  4 ++--
 drivers/net/wireless/rt2x00/rt2x00crypto.c |  4 ++--
 drivers/net/wireless/rt2x00/rt2x00queue.c  |  7 +++++--
 include/net/mac80211.h                     |  4 ++--
 net/mac80211/key.c                         | 14 ++++++++++++++
 net/mac80211/wep.c                         |  3 ---
 net/mac80211/wpa.c                         |  6 ------
 10 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index c151588aa48..47be49acf55 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1188,7 +1188,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 
 	if (info->control.hw_key) {
 		keyidx = info->control.hw_key->hw_key_idx;
-		pktlen += info->control.icv_len;
+		pktlen += info->control.hw_key->icv_len;
 	}
 	ret = ah->ah_setup_tx_desc(ah, ds, pktlen,
 		ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL,
diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c
index bdcb9e1799c..3a4757942b3 100644
--- a/drivers/net/wireless/ath9k/xmit.c
+++ b/drivers/net/wireless/ath9k/xmit.c
@@ -237,7 +237,7 @@ static int ath_tx_prepare(struct ath_softc *sc,
 
 	if (tx_info->control.hw_key) {
 		txctl->keyix = tx_info->control.hw_key->hw_key_idx;
-		txctl->frmlen += tx_info->control.icv_len;
+		txctl->frmlen += tx_info->control.hw_key->icv_len;
 
 		if (tx_info->control.hw_key->alg == ALG_WEP)
 			txctl->keytype = ATH9K_KEY_TYPE_WEP;
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 5e0b71c3ad0..e0749c0074c 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -252,7 +252,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		}
 
 		/* Hardware appends ICV. */
-		plcp_fragment_len += info->control.icv_len;
+		plcp_fragment_len += info->control.hw_key->icv_len;
 
 		key_idx = b43_kidx_to_fw(dev, key_idx);
 		mac_ctl |= (key_idx << B43_TXH_MAC_KEYIDX_SHIFT) &
@@ -260,7 +260,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		mac_ctl |= (key->algorithm << B43_TXH_MAC_KEYALG_SHIFT) &
 			   B43_TXH_MAC_KEYALG;
 		wlhdr_len = ieee80211_hdrlen(fctl);
-		iv_len = min((size_t) info->control.iv_len,
+		iv_len = min((size_t) info->control.hw_key->iv_len,
 			     ARRAY_SIZE(txhdr->iv));
 		memcpy(txhdr->iv, ((u8 *) wlhdr) + wlhdr_len, iv_len);
 	}
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index 6835064758f..a894169411c 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -243,7 +243,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 
 		if (key->enabled) {
 			/* Hardware appends ICV. */
-			plcp_fragment_len += info->control.icv_len;
+			plcp_fragment_len += info->control.hw_key->icv_len;
 
 			key_idx = b43legacy_kidx_to_fw(dev, key_idx);
 			mac_ctl |= (key_idx << B43legacy_TX4_MAC_KEYIDX_SHIFT) &
@@ -252,7 +252,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 				   B43legacy_TX4_MAC_KEYALG_SHIFT) &
 				   B43legacy_TX4_MAC_KEYALG;
 			wlhdr_len = ieee80211_hdrlen(wlhdr->frame_control);
-			iv_len = min((size_t)info->control.iv_len,
+			iv_len = min((size_t)info->control.hw_key->iv_len,
 				     ARRAY_SIZE(txhdr->iv));
 			memcpy(txhdr->iv, ((u8 *)wlhdr) + wlhdr_len, iv_len);
 		} else {
diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c
index e1448cfa944..5a858e5106c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00crypto.c
+++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c
@@ -56,10 +56,10 @@ unsigned int rt2x00crypto_tx_overhead(struct ieee80211_tx_info *tx_info)
 	 * note that these lengths should only be added when
 	 * mac80211 does not generate it.
 	 */
-	overhead += tx_info->control.icv_len;
+	overhead += key->icv_len;
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_IV))
-		overhead += tx_info->control.iv_len;
+		overhead += key->iv_len;
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
 		if (key->alg == ALG_TKIP)
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index b7f4fe8fba6..1676ac48479 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -374,7 +374,7 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
 	struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX);
 	struct txentry_desc txdesc;
 	struct skb_frame_desc *skbdesc;
-	unsigned int iv_len = IEEE80211_SKB_CB(skb)->control.iv_len;
+	unsigned int iv_len;
 
 	if (unlikely(rt2x00queue_full(queue)))
 		return -EINVAL;
@@ -410,8 +410,11 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
 	 * the frame so we can provide it to the driver seperately.
 	 */
 	if (test_bit(ENTRY_TXD_ENCRYPT, &txdesc.flags) &&
-	    !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags))
+	    !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags) &&
+		(IEEE80211_SKB_CB(skb)->control.hw_key != NULL)) {
+		iv_len = IEEE80211_SKB_CB(skb)->control.hw_key->iv_len;
 		rt2x00crypto_tx_remove_iv(skb, iv_len);
+	}
 
 	/*
 	 * It could be possible that the queue was corrupted and this
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f5f5b1ff158..feb3be81dfa 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -337,8 +337,6 @@ struct ieee80211_tx_info {
 			unsigned long jiffies;
 			s8 rts_cts_rate_idx, alt_retry_rate_idx;
 			u8 retry_limit;
-			u8 icv_len;
-			u8 iv_len;
 		} control;
 		struct {
 			u64 ampdu_ack_map;
@@ -635,6 +633,8 @@ enum ieee80211_key_flags {
  */
 struct ieee80211_key_conf {
 	enum ieee80211_key_alg alg;
+	u8 icv_len;
+	u8 iv_len;
 	u8 hw_key_idx;
 	u8 flags;
 	s8 keyidx;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 57afcd38cd9..a5b06fe7198 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -281,6 +281,20 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	key->conf.alg = alg;
 	key->conf.keyidx = idx;
 	key->conf.keylen = key_len;
+	switch (alg) {
+	case ALG_WEP:
+		key->conf.iv_len = WEP_IV_LEN;
+		key->conf.icv_len = WEP_ICV_LEN;
+		break;
+	case ALG_TKIP:
+		key->conf.iv_len = TKIP_IV_LEN;
+		key->conf.icv_len = TKIP_ICV_LEN;
+		break;
+	case ALG_CCMP:
+		key->conf.iv_len = CCMP_HDR_LEN;
+		key->conf.icv_len = CCMP_MIC_LEN;
+		break;
+	}
 	memcpy(key->conf.key, key_data, key_len);
 	INIT_LIST_HEAD(&key->list);
 	INIT_LIST_HEAD(&key->todo);
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 376c84987e4..f0e2d3ecb5c 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -313,9 +313,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	info->control.iv_len = WEP_IV_LEN;
-	info->control.icv_len = WEP_ICV_LEN;
-
 	if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) {
 		if (ieee80211_wep_encrypt(tx->local, skb, tx->key))
 			return -1;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 37ae9a959f6..6db649480e8 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -152,9 +152,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	int len, tail;
 	u8 *pos;
 
-	info->control.icv_len = TKIP_ICV_LEN;
-	info->control.iv_len = TKIP_IV_LEN;
-
 	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
 		/* hwaccel - with no need for preallocated room for IV/ICV */
@@ -374,9 +371,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	u8 *pos, *pn;
 	int i;
 
-	info->control.icv_len = CCMP_MIC_LEN;
-	info->control.iv_len = CCMP_HDR_LEN;
-
 	if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
 		/* hwaccel - with no need for preallocated room for CCMP "
-- 
cgit v1.2.3-70-g09d2


From 870abdf67170daa9f1022e55a35c469239fcc74c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 5 Oct 2008 18:04:24 +0200
Subject: mac80211: add multi-rate retry support

This patch adjusts the rate control API to allow multi-rate retry
if supported by the driver. The ieee80211_hw struct specifies how
many alternate rate selections the driver supports.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c       |  1 +
 drivers/net/wireless/b43/xmit.c       |  2 +-
 drivers/net/wireless/b43legacy/main.c |  1 +
 drivers/net/wireless/b43legacy/xmit.c |  2 +-
 drivers/net/wireless/rtl8180_dev.c    |  5 +++--
 include/net/mac80211.h                | 29 +++++++++++++++++++++++++----
 net/mac80211/tx.c                     | 13 +++++++------
 7 files changed, 39 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 3bf74e236ab..14c44df584d 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4588,6 +4588,7 @@ static int b43_wireless_init(struct ssb_device *dev)
 		BIT(NL80211_IFTYPE_ADHOC);
 
 	hw->queues = b43_modparam_qos ? 4 : 1;
+	hw->max_altrates = 1;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
 		SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac);
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index e0749c0074c..2fabcf8f047 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -208,7 +208,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	txrate = ieee80211_get_tx_rate(dev->wl->hw, info);
 	rate = txrate ? txrate->hw_value : B43_CCK_RATE_1MB;
 	rate_ofdm = b43_is_ofdm_rate(rate);
-	fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : txrate;
+	fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, info, 0) ? : txrate;
 	rate_fb = fbrate->hw_value;
 	rate_fb_ofdm = b43_is_ofdm_rate(rate_fb);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 9fb1421cbec..c66d57560e7 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3710,6 +3710,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 		BIT(NL80211_IFTYPE_WDS) |
 		BIT(NL80211_IFTYPE_ADHOC);
 	hw->queues = 1; /* FIXME: hardware has more queues */
+	hw->max_altrates = 1;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
 		SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac);
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index a894169411c..65e83378160 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -210,7 +210,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 
 	rate = tx_rate->hw_value;
 	rate_ofdm = b43legacy_is_ofdm_rate(rate);
-	rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : tx_rate;
+	rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, info, 0) ? : tx_rate;
 	rate_fb_ofdm = b43legacy_is_ofdm_rate(rate_fb->hw_value);
 
 	txhdr->mac_frame_ctl = wlhdr->frame_control;
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index abcd641c54b..df7e78ee8a8 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -292,8 +292,8 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	entry->plcp_len = cpu_to_le16(plcp_len);
 	entry->tx_buf = cpu_to_le32(mapping);
 	entry->frame_len = cpu_to_le32(skb->len);
-	entry->flags2 = info->control.alt_retry_rate_idx >= 0 ?
-		ieee80211_get_alt_retry_rate(dev, info)->bitrate << 4 : 0;
+	entry->flags2 = info->control.retries[0].rate_idx >= 0 ?
+		ieee80211_get_alt_retry_rate(dev, info, 0)->bitrate << 4 : 0;
 	entry->retry_limit = info->control.retry_limit;
 	entry->flags = cpu_to_le32(tx_flags);
 	__skb_queue_tail(&ring->queue, skb);
@@ -855,6 +855,7 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev,
 	priv = dev->priv;
 	priv->pdev = pdev;
 
+	dev->max_altrates = 1;
 	SET_IEEE80211_DEV(dev, &pdev->dev);
 	pci_set_drvdata(pdev, dev);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index feb3be81dfa..5617a1613c9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -292,6 +292,20 @@ enum mac80211_tx_control_flags {
 #define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \
 	(IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *))
 
+/* maximum number of alternate rate retry stages */
+#define IEEE80211_TX_MAX_ALTRATE	3
+
+/**
+ * struct ieee80211_tx_altrate - alternate rate selection/status
+ *
+ * @rate_idx: rate index to attempt to send with
+ * @limit: number of retries before fallback
+ */
+struct ieee80211_tx_altrate {
+	s8 rate_idx;
+	u8 limit;
+};
+
 /**
  * struct ieee80211_tx_info - skb transmit information
  *
@@ -335,12 +349,14 @@ struct ieee80211_tx_info {
 			struct ieee80211_key_conf *hw_key;
 			struct ieee80211_sta *sta;
 			unsigned long jiffies;
-			s8 rts_cts_rate_idx, alt_retry_rate_idx;
+			s8 rts_cts_rate_idx;
 			u8 retry_limit;
+			struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE];
 		} control;
 		struct {
 			u64 ampdu_ack_map;
 			int ack_signal;
+			struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1];
 			u8 retry_count;
 			bool excessive_retries;
 			u8 ampdu_ack_len;
@@ -828,6 +844,9 @@ enum ieee80211_hw_flags {
  *	within &struct ieee80211_vif.
  * @sta_data_size: size (in bytes) of the drv_priv data area
  *	within &struct ieee80211_sta.
+ *
+ * @max_altrates: maximum number of alternate rate retry stages
+ * @max_altrate_tries: maximum number of tries for each stage
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -844,6 +863,8 @@ struct ieee80211_hw {
 	u16 ampdu_queues;
 	u16 max_listen_interval;
 	s8 max_signal;
+	u8 max_altrates;
+	u8 max_altrate_tries;
 };
 
 struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy);
@@ -900,11 +921,11 @@ ieee80211_get_rts_cts_rate(const struct ieee80211_hw *hw,
 
 static inline struct ieee80211_rate *
 ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
-			     const struct ieee80211_tx_info *c)
+			     const struct ieee80211_tx_info *c, int idx)
 {
-	if (c->control.alt_retry_rate_idx < 0)
+	if (c->control.retries[idx].rate_idx < 0)
 		return NULL;
-	return &hw->wiphy->bands[c->band]->bitrates[c->control.alt_retry_rate_idx];
+	return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx];
 }
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d7153bbcdb2..1460537faf3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -454,15 +454,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		if (unlikely(rsel.probe_idx >= 0)) {
 			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-			info->control.alt_retry_rate_idx = tx->rate_idx;
+			info->control.retries[0].rate_idx = tx->rate_idx;
+			info->control.retries[0].limit = tx->local->hw.max_altrate_tries;
 			tx->rate_idx = rsel.probe_idx;
-		} else
-			info->control.alt_retry_rate_idx = -1;
+		} else if (info->control.retries[0].limit == 0)
+			info->control.retries[0].rate_idx = -1;
 
 		if (unlikely(tx->rate_idx < 0))
 			return TX_DROP;
 	} else
-		info->control.alt_retry_rate_idx = -1;
+		info->control.retries[0].rate_idx = -1;
 
 	if (tx->sdata->bss_conf.use_cts_prot &&
 	    (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) {
@@ -521,7 +522,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		 * frames.
 		 * TODO: The last fragment could still use multiple retry
 		 * rates. */
-		info->control.alt_retry_rate_idx = -1;
+		info->control.retries[0].rate_idx = -1;
 	}
 
 	/* Use CTS protection for unicast frames sent using extended rates if
@@ -551,7 +552,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		int idx;
 
 		/* Do not use multiple retry rates when using RTS/CTS */
-		info->control.alt_retry_rate_idx = -1;
+		info->control.retries[0].rate_idx = -1;
 
 		/* Use min(data rate, max base rate) as CTS/RTS rate */
 		rate = &sband->bitrates[tx->rate_idx];
-- 
cgit v1.2.3-70-g09d2


From cccf129f820e431d84690729254a32f1709328fb Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 5 Oct 2008 18:07:45 +0200
Subject: mac80211: add the 'minstrel' rate control algorithm

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig                    |  13 +
 net/mac80211/Makefile                   |   4 +
 net/mac80211/main.c                     |   5 +
 net/mac80211/rate.h                     |  14 +
 net/mac80211/rc80211_minstrel.c         | 583 ++++++++++++++++++++++++++++++++
 net/mac80211/rc80211_minstrel.h         |  85 +++++
 net/mac80211/rc80211_minstrel_debugfs.c | 164 +++++++++
 7 files changed, 868 insertions(+)
 create mode 100644 net/mac80211/rc80211_minstrel.c
 create mode 100644 net/mac80211/rc80211_minstrel.h
 create mode 100644 net/mac80211/rc80211_minstrel_debugfs.c

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 8427518e4f2..7f710a27e91 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -22,6 +22,11 @@ config MAC80211_RC_PID
 	  mac80211 that uses a PID controller to select the TX
 	  rate.
 
+config MAC80211_RC_MINSTREL
+	bool "Minstrel"
+	---help---
+	  This option enables the 'minstrel' TX rate control algorithm
+
 choice
 	prompt "Default rate control algorithm"
 	default MAC80211_RC_DEFAULT_PID
@@ -39,11 +44,19 @@ config MAC80211_RC_DEFAULT_PID
 	  default rate control algorithm. You should choose
 	  this unless you know what you are doing.
 
+config MAC80211_RC_DEFAULT_MINSTREL
+	bool "Minstrel"
+	depends on MAC80211_RC_MINSTREL
+	---help---
+	  Select Minstrel as the default rate control algorithm.
+
+
 endchoice
 
 config MAC80211_RC_DEFAULT
 	string
 	default "pid" if MAC80211_RC_DEFAULT_PID
+	default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
 	default ""
 
 endmenu
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 2dc8f2bff27..31cfd1f89a7 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -41,4 +41,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
 rc80211_pid-y := rc80211_pid_algo.o
 rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
 
+rc80211_minstrel-y := rc80211_minstrel.o
+rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
+
 mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y)
+mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d608c44047c..ae62ad40ad6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1015,6 +1015,10 @@ static int __init ieee80211_init(void)
 	BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) +
 	             IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb));
 
+	ret = rc80211_minstrel_init();
+	if (ret)
+		return ret;
+
 	ret = rc80211_pid_init();
 	if (ret)
 		return ret;
@@ -1027,6 +1031,7 @@ static int __init ieee80211_init(void)
 static void __exit ieee80211_exit(void)
 {
 	rc80211_pid_exit();
+	rc80211_minstrel_exit();
 
 	/*
 	 * For key todo, it'll be empty by now but the work
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index eb94e584d24..d0092f847f8 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -125,4 +125,18 @@ static inline void rc80211_pid_exit(void)
 }
 #endif
 
+#ifdef CONFIG_MAC80211_RC_MINSTREL
+extern int rc80211_minstrel_init(void);
+extern void rc80211_minstrel_exit(void);
+#else
+static inline int rc80211_minstrel_init(void)
+{
+	return 0;
+}
+static inline void rc80211_minstrel_exit(void)
+{
+}
+#endif
+
+
 #endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
new file mode 100644
index 00000000000..f6d69dab07a
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on minstrel.c:
+ *   Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz>
+ *   Sponsored by Indranet Technologies Ltd
+ *
+ * Based on sample.c:
+ *   Copyright (c) 2005 John Bicket
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer,
+ *      without modification.
+ *   2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *      similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
+ *      redistribution must be conditioned upon including a substantially
+ *      similar Disclaimer requirement for further binary redistribution.
+ *   3. Neither the names of the above-listed copyright holders nor the names
+ *      of any contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *   Alternatively, this software may be distributed under the terms of the
+ *   GNU General Public License ("GPL") version 2 as published by the Free
+ *   Software Foundation.
+ *
+ *   NO WARRANTY
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
+ *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ *   THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
+ *   OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGES.
+ */
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/ieee80211.h>
+#include <net/mac80211.h>
+#include "rate.h"
+#include "rc80211_minstrel.h"
+
+#define SAMPLE_COLUMNS	10
+#define SAMPLE_TBL(_mi, _idx, _col) \
+		_mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
+
+/* convert mac80211 rate index to local array index */
+static inline int
+rix_to_ndx(struct minstrel_sta_info *mi, int rix)
+{
+	int i = rix;
+	for (i = rix; i >= 0; i--)
+		if (mi->r[i].rix == rix)
+			break;
+	WARN_ON(mi->r[i].rix != rix);
+	return i;
+}
+
+static inline bool
+use_low_rate(struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	u16 fc;
+
+	fc = le16_to_cpu(hdr->frame_control);
+
+	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) ||
+		(fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
+		is_multicast_ether_addr(hdr->addr1));
+}
+
+
+static void
+minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
+{
+	u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0;
+	u32 max_prob = 0, index_max_prob = 0;
+	u32 usecs;
+	u32 p;
+	int i;
+
+	mi->stats_update = jiffies;
+	for (i = 0; i < mi->n_rates; i++) {
+		struct minstrel_rate *mr = &mi->r[i];
+
+		usecs = mr->perfect_tx_time;
+		if (!usecs)
+			usecs = 1000000;
+
+		/* To avoid rounding issues, probabilities scale from 0 (0%)
+		 * to 18000 (100%) */
+		if (mr->attempts) {
+			p = (mr->success * 18000) / mr->attempts;
+			mr->succ_hist += mr->success;
+			mr->att_hist += mr->attempts;
+			mr->cur_prob = p;
+			p = ((p * (100 - mp->ewma_level)) + (mr->probability *
+				mp->ewma_level)) / 100;
+			mr->probability = p;
+			mr->cur_tp = p * (1000000 / usecs);
+		}
+
+		mr->last_success = mr->success;
+		mr->last_attempts = mr->attempts;
+		mr->success = 0;
+		mr->attempts = 0;
+
+		/* Sample less often below the 10% chance of success.
+		 * Sample less often above the 95% chance of success. */
+		if ((mr->probability > 17100) || (mr->probability < 1800)) {
+			mr->adjusted_retry_count = mr->retry_count >> 1;
+			if (mr->adjusted_retry_count > 2)
+				mr->adjusted_retry_count = 2;
+		} else {
+			mr->adjusted_retry_count = mr->retry_count;
+		}
+		if (!mr->adjusted_retry_count)
+			mr->adjusted_retry_count = 2;
+	}
+
+	for (i = 0; i < mi->n_rates; i++) {
+		struct minstrel_rate *mr = &mi->r[i];
+		if (max_tp < mr->cur_tp) {
+			index_max_tp = i;
+			max_tp = mr->cur_tp;
+		}
+		if (max_prob < mr->probability) {
+			index_max_prob = i;
+			max_prob = mr->probability;
+		}
+	}
+
+	max_tp = 0;
+	for (i = 0; i < mi->n_rates; i++) {
+		struct minstrel_rate *mr = &mi->r[i];
+
+		if (i == index_max_tp)
+			continue;
+
+		if (max_tp < mr->cur_tp) {
+			index_max_tp2 = i;
+			max_tp = mr->cur_tp;
+		}
+	}
+	mi->max_tp_rate = index_max_tp;
+	mi->max_tp_rate2 = index_max_tp2;
+	mi->max_prob_rate = index_max_prob;
+}
+
+static void
+minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
+                   struct ieee80211_sta *sta, void *priv_sta,
+		   struct sk_buff *skb)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_altrate *ar = info->status.retries;
+	struct minstrel_priv *mp = priv;
+	int i, ndx, tries;
+	int success = 0;
+
+	if (!info->status.excessive_retries)
+		success = 1;
+
+	if (!mp->has_mrr || (ar[0].rate_idx < 0)) {
+		ndx = rix_to_ndx(mi, info->tx_rate_idx);
+		tries = info->status.retry_count + 1;
+		mi->r[ndx].success += success;
+		mi->r[ndx].attempts += tries;
+		return;
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (ar[i].rate_idx < 0)
+			break;
+
+		ndx = rix_to_ndx(mi, ar[i].rate_idx);
+		mi->r[ndx].attempts += ar[i].limit + 1;
+
+		if ((i != 3) && (ar[i + 1].rate_idx < 0))
+			mi->r[ndx].success += success;
+	}
+
+	if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
+		mi->sample_count++;
+
+	if (mi->sample_deferred > 0)
+		mi->sample_deferred--;
+}
+
+
+static inline unsigned int
+minstrel_get_retry_count(struct minstrel_rate *mr,
+                         struct ieee80211_tx_info *info)
+{
+	unsigned int retry = mr->adjusted_retry_count;
+
+	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+		retry = max(2U, min(mr->retry_count_rtscts, retry));
+	else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+		retry = max(2U, min(mr->retry_count_cts, retry));
+	return retry;
+}
+
+
+static int
+minstrel_get_next_sample(struct minstrel_sta_info *mi)
+{
+	unsigned int sample_ndx;
+	sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column);
+	mi->sample_idx++;
+	if (mi->sample_idx > (mi->n_rates - 2)) {
+		mi->sample_idx = 0;
+		mi->sample_column++;
+		if (mi->sample_column >= SAMPLE_COLUMNS)
+			mi->sample_column = 0;
+	}
+	return sample_ndx;
+}
+
+void
+minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband,
+                  struct ieee80211_sta *sta, void *priv_sta,
+                  struct sk_buff *skb, struct rate_selection *sel)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct minstrel_sta_info *mi = priv_sta;
+	struct minstrel_priv *mp = priv;
+	struct ieee80211_tx_altrate *ar = info->control.retries;
+	unsigned int ndx, sample_ndx = 0;
+	bool mrr;
+	bool sample_slower = false;
+	bool sample = false;
+	int i, delta;
+	int mrr_ndx[3];
+	int sample_rate;
+
+	if (!sta || !mi || use_low_rate(skb)) {
+		sel->rate_idx = rate_lowest_index(sband, sta);
+		return;
+	}
+
+	mrr = mp->has_mrr;
+
+	/* mac80211 does not allow mrr for RTS/CTS */
+	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
+	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT))
+		mrr = false;
+
+	if (time_after(jiffies, mi->stats_update + (mp->update_interval *
+			HZ) / 1000))
+		minstrel_update_stats(mp, mi);
+
+	ndx = mi->max_tp_rate;
+
+	if (mrr)
+		sample_rate = mp->lookaround_rate_mrr;
+	else
+		sample_rate = mp->lookaround_rate;
+
+	mi->packet_count++;
+	delta = (mi->packet_count * sample_rate / 100) -
+			(mi->sample_count + mi->sample_deferred / 2);
+
+	/* delta > 0: sampling required */
+	if (delta > 0) {
+		if (mi->packet_count >= 10000) {
+			mi->sample_deferred = 0;
+			mi->sample_count = 0;
+			mi->packet_count = 0;
+		} else if (delta > mi->n_rates * 2) {
+			/* With multi-rate retry, not every planned sample
+			 * attempt actually gets used, due to the way the retry
+			 * chain is set up - [max_tp,sample,prob,lowest] for
+			 * sample_rate < max_tp.
+			 *
+			 * If there's too much sampling backlog and the link
+			 * starts getting worse, minstrel would start bursting
+			 * out lots of sampling frames, which would result
+			 * in a large throughput loss. */
+			mi->sample_count += (delta - mi->n_rates * 2);
+		}
+
+		sample_ndx = minstrel_get_next_sample(mi);
+		sample = true;
+		sample_slower = mrr && (mi->r[sample_ndx].perfect_tx_time >
+			mi->r[ndx].perfect_tx_time);
+
+		if (!sample_slower) {
+			ndx = sample_ndx;
+			mi->sample_count++;
+		} else {
+			/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
+			 * packets that have the sampling rate deferred to the
+			 * second MRR stage. Increase the sample counter only
+			 * if the deferred sample rate was actually used.
+			 * Use the sample_deferred counter to make sure that
+			 * the sampling is not done in large bursts */
+			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+			mi->sample_deferred++;
+		}
+	}
+	sel->rate_idx = mi->r[ndx].rix;
+	info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info);
+
+	if (!mrr) {
+		ar[0].rate_idx = mi->lowest_rix;
+		ar[0].limit = mp->max_retry;
+		ar[1].rate_idx = -1;
+		return;
+	}
+
+	/* MRR setup */
+	if (sample) {
+		if (sample_slower)
+			mrr_ndx[0] = sample_ndx;
+		else
+			mrr_ndx[0] = mi->max_tp_rate;
+	} else {
+		mrr_ndx[0] = mi->max_tp_rate2;
+	}
+	mrr_ndx[1] = mi->max_prob_rate;
+	mrr_ndx[2] = 0;
+	for (i = 0; i < 3; i++) {
+		ar[i].rate_idx = mi->r[mrr_ndx[i]].rix;
+		ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count;
+	}
+}
+
+
+static void
+calc_rate_durations(struct minstrel_sta_info *mi, struct ieee80211_local *local,
+                    struct minstrel_rate *d, struct ieee80211_rate *rate)
+{
+	int erp = !!(rate->flags & IEEE80211_RATE_ERP_G);
+
+	d->perfect_tx_time = ieee80211_frame_duration(local, 1200,
+			rate->bitrate, erp, 1);
+	d->ack_time = ieee80211_frame_duration(local, 10,
+			rate->bitrate, erp, 1);
+}
+
+static void
+init_sample_table(struct minstrel_sta_info *mi)
+{
+	unsigned int i, col, new_idx;
+	unsigned int n_srates = mi->n_rates - 1;
+	u8 rnd[8];
+
+	mi->sample_column = 0;
+	mi->sample_idx = 0;
+	memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates);
+
+	for (col = 0; col < SAMPLE_COLUMNS; col++) {
+		for (i = 0; i < n_srates; i++) {
+			get_random_bytes(rnd, sizeof(rnd));
+			new_idx = (i + rnd[i & 7]) % n_srates;
+
+			while (SAMPLE_TBL(mi, new_idx, col) != 0)
+				new_idx = (new_idx + 1) % n_srates;
+
+			/* Don't sample the slowest rate (i.e. slowest base
+			 * rate). We must presume that the slowest rate works
+			 * fine, or else other management frames will also be
+			 * failing and the link will break */
+			SAMPLE_TBL(mi, new_idx, col) = i + 1;
+		}
+	}
+}
+
+static void
+minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
+               struct ieee80211_sta *sta, void *priv_sta)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+	struct minstrel_priv *mp = priv;
+	struct minstrel_rate *mr_ctl;
+	unsigned int i, n = 0;
+	unsigned int t_slot = 9; /* FIXME: get real slot time */
+
+	mi->lowest_rix = rate_lowest_index(sband, sta);
+	mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)];
+	mi->sp_ack_dur = mr_ctl->ack_time;
+
+	for (i = 0; i < sband->n_bitrates; i++) {
+		struct minstrel_rate *mr = &mi->r[n];
+		unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
+		unsigned int tx_time_single;
+		unsigned int cw = mp->cw_min;
+
+		if (!rate_supported(sta, sband->band, i))
+			continue;
+		n++;
+		memset(mr, 0, sizeof(*mr));
+
+		mr->rix = i;
+		mr->bitrate = sband->bitrates[i].bitrate / 5;
+		calc_rate_durations(mi, hw_to_local(mp->hw), mr,
+				&sband->bitrates[i]);
+
+		/* calculate maximum number of retransmissions before
+		 * fallback (based on maximum segment size) */
+		mr->retry_count = 1;
+		mr->retry_count_cts = 1;
+		mr->retry_count_rtscts = 1;
+		tx_time = mr->perfect_tx_time + mi->sp_ack_dur;
+		do {
+			/* add one retransmission */
+			tx_time_single = mr->ack_time + mr->perfect_tx_time;
+
+			/* contention window */
+			tx_time_single += t_slot + min(cw, mp->cw_max);
+			cw = (cw + 1) << 1;
+
+			tx_time += tx_time_single;
+			tx_time_cts += tx_time_single + mi->sp_ack_dur;
+			tx_time_rtscts += tx_time_single + 2 * mi->sp_ack_dur;
+			if ((tx_time_cts < mp->segment_size) &&
+				(mr->retry_count_cts < mp->max_retry))
+				mr->retry_count_cts++;
+			if ((tx_time_rtscts < mp->segment_size) &&
+				(mr->retry_count_rtscts < mp->max_retry))
+				mr->retry_count_rtscts++;
+		} while ((tx_time < mp->segment_size) &&
+				(++mr->retry_count < mp->max_retry));
+		mr->adjusted_retry_count = mr->retry_count;
+	}
+
+	for (i = n; i < sband->n_bitrates; i++) {
+		struct minstrel_rate *mr = &mi->r[i];
+		mr->rix = -1;
+	}
+
+	mi->n_rates = n;
+	mi->stats_update = jiffies;
+
+	init_sample_table(mi);
+}
+
+static void *
+minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
+{
+	struct ieee80211_supported_band *sband;
+	struct minstrel_sta_info *mi;
+	struct minstrel_priv *mp = priv;
+	struct ieee80211_hw *hw = mp->hw;
+	int max_rates = 0;
+	int i;
+
+	mi = kzalloc(sizeof(struct minstrel_sta_info), gfp);
+	if (!mi)
+		return NULL;
+
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = hw->wiphy->bands[hw->conf.channel->band];
+		if (sband->n_bitrates > max_rates)
+			max_rates = sband->n_bitrates;
+	}
+
+	mi->r = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp);
+	if (!mi->r)
+		goto error;
+
+	mi->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp);
+	if (!mi->sample_table)
+		goto error1;
+
+	mi->stats_update = jiffies;
+	return mi;
+
+error1:
+	kfree(mi->r);
+error:
+	kfree(mi);
+	return NULL;
+}
+
+static void
+minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+
+	kfree(mi->sample_table);
+	kfree(mi->r);
+	kfree(mi);
+}
+
+static void
+minstrel_clear(void *priv)
+{
+}
+
+static void *
+minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+{
+	struct minstrel_priv *mp;
+
+	mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
+	if (!mp)
+		return NULL;
+
+	/* contention window settings
+	 * Just an approximation. Using the per-queue values would complicate
+	 * the calculations and is probably unnecessary */
+	mp->cw_min = 15;
+	mp->cw_max = 1023;
+
+	/* number of packets (in %) to use for sampling other rates
+	 * sample less often for non-mrr packets, because the overhead
+	 * is much higher than with mrr */
+	mp->lookaround_rate = 5;
+	mp->lookaround_rate_mrr = 10;
+
+	/* moving average weight for EWMA */
+	mp->ewma_level = 75;
+
+	/* maximum time that the hw is allowed to stay in one MRR segment */
+	mp->segment_size = 6000;
+
+	if (hw->max_altrate_tries > 0)
+		mp->max_retry = hw->max_altrate_tries;
+	else
+		/* safe default, does not necessarily have to match hw properties */
+		mp->max_retry = 7;
+
+	if (hw->max_altrates >= 3)
+		mp->has_mrr = true;
+
+	mp->hw = hw;
+	mp->update_interval = 100;
+
+	return mp;
+}
+
+static void
+minstrel_free(void *priv)
+{
+	kfree(priv);
+}
+
+static struct rate_control_ops mac80211_minstrel = {
+	.name = "minstrel",
+	.tx_status = minstrel_tx_status,
+	.get_rate = minstrel_get_rate,
+	.rate_init = minstrel_rate_init,
+	.clear = minstrel_clear,
+	.alloc = minstrel_alloc,
+	.free = minstrel_free,
+	.alloc_sta = minstrel_alloc_sta,
+	.free_sta = minstrel_free_sta,
+#ifdef CONFIG_MAC80211_DEBUGFS
+	.add_sta_debugfs = minstrel_add_sta_debugfs,
+	.remove_sta_debugfs = minstrel_remove_sta_debugfs,
+#endif
+};
+
+int __init
+rc80211_minstrel_init(void)
+{
+	return ieee80211_rate_control_register(&mac80211_minstrel);
+}
+
+void
+rc80211_minstrel_exit(void)
+{
+	ieee80211_rate_control_unregister(&mac80211_minstrel);
+}
+
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
new file mode 100644
index 00000000000..9a90a6aee04
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __RC_MINSTREL_H
+#define __RC_MINSTREL_H
+
+struct minstrel_rate {
+	int bitrate;
+	int rix;
+
+	unsigned int perfect_tx_time;
+	unsigned int ack_time;
+
+	unsigned int retry_count;
+	unsigned int retry_count_cts;
+	unsigned int retry_count_rtscts;
+	unsigned int adjusted_retry_count;
+
+	u32 success;
+	u32 attempts;
+	u32 last_attempts;
+	u32 last_success;
+
+	/* parts per thousand */
+	u32 cur_prob;
+	u32 probability;
+
+	/* per-rate throughput */
+	u32 cur_tp;
+	u32 throughput;
+
+	u64 succ_hist;
+	u64 att_hist;
+};
+
+struct minstrel_sta_info {
+	unsigned long stats_update;
+	unsigned int sp_ack_dur;
+	unsigned int rate_avg;
+
+	unsigned int lowest_rix;
+
+	unsigned int max_tp_rate;
+	unsigned int max_tp_rate2;
+	unsigned int max_prob_rate;
+	unsigned int packet_count;
+	unsigned int sample_count;
+	int sample_deferred;
+
+	unsigned int sample_idx;
+	unsigned int sample_column;
+
+	int n_rates;
+	struct minstrel_rate *r;
+
+	/* sampling table */
+	u8 *sample_table;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	struct dentry *dbg_stats;
+#endif
+};
+
+struct minstrel_priv {
+	struct ieee80211_hw *hw;
+	bool has_mrr;
+	unsigned int cw_min;
+	unsigned int cw_max;
+	unsigned int max_retry;
+	unsigned int ewma_level;
+	unsigned int segment_size;
+	unsigned int update_interval;
+	unsigned int lookaround_rate;
+	unsigned int lookaround_rate_mrr;
+};
+
+void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
+void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
+
+#endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
new file mode 100644
index 00000000000..0b024cd6b80
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on minstrel.c:
+ *   Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz>
+ *   Sponsored by Indranet Technologies Ltd
+ *
+ * Based on sample.c:
+ *   Copyright (c) 2005 John Bicket
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer,
+ *      without modification.
+ *   2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *      similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
+ *      redistribution must be conditioned upon including a substantially
+ *      similar Disclaimer requirement for further binary redistribution.
+ *   3. Neither the names of the above-listed copyright holders nor the names
+ *      of any contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *   Alternatively, this software may be distributed under the terms of the
+ *   GNU General Public License ("GPL") version 2 as published by the Free
+ *   Software Foundation.
+ *
+ *   NO WARRANTY
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
+ *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ *   THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
+ *   OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGES.
+ */
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/debugfs.h>
+#include <linux/ieee80211.h>
+#include <net/mac80211.h>
+#include "rc80211_minstrel.h"
+
+struct minstrel_stats_info {
+	struct minstrel_sta_info *mi;
+	char buf[4096];
+	size_t len;
+};
+
+static int
+minstrel_stats_open(struct inode *inode, struct file *file)
+{
+	struct minstrel_sta_info *mi = inode->i_private;
+	struct minstrel_stats_info *ms;
+	unsigned int i, tp, prob, eprob;
+	char *p;
+
+	ms = kmalloc(sizeof(*ms), GFP_KERNEL);
+	if (!ms)
+		return -ENOMEM;
+
+	file->private_data = ms;
+	p = ms->buf;
+	p += sprintf(p, "rate     throughput  ewma prob   this prob  "
+			"this succ/attempt   success    attempts\n");
+	for (i = 0; i < mi->n_rates; i++) {
+		struct minstrel_rate *mr = &mi->r[i];
+
+		*(p++) = (i == mi->max_tp_rate) ? 'T' : ' ';
+		*(p++) = (i == mi->max_tp_rate2) ? 't' : ' ';
+		*(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
+		p += sprintf(p, "%3u%s", mr->bitrate / 2,
+				(mr->bitrate & 1 ? ".5" : "  "));
+
+		tp = ((mr->cur_tp * 96) / 18000) >> 10;
+		prob = mr->cur_prob / 18;
+		eprob = mr->probability / 18;
+
+		p += sprintf(p, "  %6u.%1u   %6u.%1u   %6u.%1u        "
+				"%3u(%3u)   %8llu    %8llu\n",
+				tp / 10, tp % 10,
+				eprob / 10, eprob % 10,
+				prob / 10, prob % 10,
+				mr->last_success,
+				mr->last_attempts,
+				mr->succ_hist,
+				mr->att_hist);
+	}
+	p += sprintf(p, "\nTotal packet count::    ideal %d      "
+			"lookaround %d\n\n",
+			mi->packet_count - mi->sample_count,
+			mi->sample_count);
+	ms->len = p - ms->buf;
+
+	return 0;
+}
+
+static int
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o)
+{
+	struct minstrel_stats_info *ms;
+	char *src;
+
+	ms = file->private_data;
+	src = ms->buf;
+
+	len = min(len, ms->len);
+	if (len <= *o)
+		return 0;
+
+	src += *o;
+	len -= *o;
+	*o += len;
+
+	if (copy_to_user(buf, src, len))
+		return -EFAULT;
+
+	return len;
+}
+
+static int
+minstrel_stats_release(struct inode *inode, struct file *file)
+{
+	struct minstrel_stats_info *ms = file->private_data;
+
+	kfree(ms);
+
+	return 0;
+}
+
+static struct file_operations minstrel_stat_fops = {
+	.owner = THIS_MODULE,
+	.open = minstrel_stats_open,
+	.read = minstrel_stats_read,
+	.release = minstrel_stats_release,
+};
+
+void
+minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+
+	mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi,
+			&minstrel_stat_fops);
+}
+
+void
+minstrel_remove_sta_debugfs(void *priv, void *priv_sta)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+
+	debugfs_remove(mi->dbg_stats);
+}
-- 
cgit v1.2.3-70-g09d2


From ad788b5e079484aa1d48aa90a3ebd7d954d2e7db Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 1 Oct 2008 15:45:02 -0400
Subject: mac80211: avoid "Wireless Event too big" message for assoc response

The association response IEs are sent to userland with an IWEVCUSTOM
event, which unfortunately is limited to a little more than 100 bytes
of IE information with the encoding used.  Many APs send so much
IE information that this message overflows.  When the IWEVCUSTOM
event is too large, the kernel doesn't send it to userland anyway --
better just not to send it.

An attempt was made by Jouni Malinen to correct this issue by
converting to use IWEVASSOCREQIE and IWEVASSOCRESPIE messages instead
("mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM").  Unfortunately,
that caused a problem due to 32-/64-bit interactions on some systems and
was reverted after the 'userland ABI' rule was invoked.  That leaves
us with this option instead of a proper fix, at least until we move
to a cfg80211-based solution.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index edc339d649c..49f86fa56bf 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -690,9 +690,11 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	memset(&wrqu, 0, sizeof(wrqu));
-	wrqu.data.length = len;
-	wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+	if (len <= IW_CUSTOM_MAX) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = len;
+		wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf);
+	}
 
 	kfree(buf);
 }
-- 
cgit v1.2.3-70-g09d2


From 9a1f27c48065ce713eb47f2fd475b717e63ef239 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 7 Oct 2008 11:41:57 -0700
Subject: inet_hashtables: Add inet_lookup_skb helpers

To be able to use the cached socket reference in the skb during input
processing we add a new set of lookup functions that receive the skb on
their argument list.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 11 +++++++++++
 include/net/inet_hashtables.h  | 14 ++++++++++++++
 net/dccp/ipv4.c                |  5 ++---
 net/dccp/ipv6.c                |  6 ++----
 net/ipv4/tcp_ipv4.c            |  3 +--
 net/ipv6/tcp_ipv6.c            |  6 +-----
 6 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index e48989f04c2..995efbb031a 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -91,6 +91,17 @@ static inline struct sock *__inet6_lookup(struct net *net,
 	return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif);
 }
 
+static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
+					      struct sk_buff *skb,
+					      const __be16 sport,
+					      const __be16 dport)
+{
+	return __inet6_lookup(dev_net(skb->dst->dev), hashinfo,
+			      &ipv6_hdr(skb)->saddr, sport,
+			      &ipv6_hdr(skb)->daddr, ntohs(dport),
+			      inet6_iif(skb));
+}
+
 extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 				 const struct in6_addr *saddr, const __be16 sport,
 				 const struct in6_addr *daddr, const __be16 dport,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index bb619d80f2e..3522bbcd546 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -16,6 +16,7 @@
 
 
 #include <linux/interrupt.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -28,6 +29,7 @@
 #include <net/inet_connection_sock.h>
 #include <net/inet_sock.h>
 #include <net/sock.h>
+#include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/netns/hash.h>
 
@@ -371,6 +373,18 @@ static inline struct sock *inet_lookup(struct net *net,
 	return sk;
 }
 
+static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
+					     struct sk_buff *skb,
+					     const __be16 sport,
+					     const __be16 dport)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return __inet_lookup(dev_net(skb->dst->dev), hashinfo,
+			     iph->saddr, sport,
+			     iph->daddr, dport, inet_iif(skb));
+}
+
 extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 882c5c4de69..e3dfddab21c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -811,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo,
-			   iph->saddr, dh->dccph_sport,
-			   iph->daddr, dh->dccph_dport, inet_iif(skb));
+	sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+			       dh->dccph_sport, dh->dccph_dport);
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5e1ee0da2c4..caa7f346962 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -805,10 +805,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo,
-			    &ipv6_hdr(skb)->saddr, dh->dccph_sport,
-			    &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
-			    inet6_iif(skb));
+	sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+			        dh->dccph_sport, dh->dccph_dport);
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8b24bd833cb..24ffc5e1d3d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1577,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags	 = iph->tos;
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
-	sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
-			th->source, iph->daddr, th->dest, inet_iif(skb));
+	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index df16b68644e..6268d266c03 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1681,11 +1681,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(net, &tcp_hashinfo,
-			&ipv6_hdr(skb)->saddr, th->source,
-			&ipv6_hdr(skb)->daddr, ntohs(th->dest),
-			inet6_iif(skb));
-
+	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
 
-- 
cgit v1.2.3-70-g09d2


From 607c4aaf03041c8bd81555a0218050c0f895088e Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Tue, 7 Oct 2008 12:38:32 -0700
Subject: inet: Add udplib_lookup_skb() helpers

To be able to use the cached socket reference in the skb during input
processing we add a new set of lookup functions that receive the skb on
their argument list.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 14 ++++++++++++--
 net/ipv6/udp.c | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c83d0ef469c..c7a90b546b2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -302,6 +302,17 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	return result;
 }
 
+static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
+						 __be16 sport, __be16 dport,
+						 struct hlist_head udptable[])
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
+				 iph->daddr, dport, inet_iif(skb),
+				 udptable);
+}
+
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif)
 {
@@ -1208,8 +1219,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		return __udp4_lib_mcast_deliver(net, skb, uh,
 				saddr, daddr, udptable);
 
-	sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
-			uh->dest, inet_iif(skb), udptable);
+	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 
 	if (sk != NULL) {
 		int ret = udp_queue_rcv_skb(sk, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a6aecf76a71..ce26c41d157 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -107,6 +107,17 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 	return result;
 }
 
+static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
+					  __be16 sport, __be16 dport,
+					  struct hlist_head udptable[])
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport,
+				 &iph->daddr, dport, inet6_iif(skb),
+				 udptable);
+}
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -488,8 +499,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	 * check socket cache ... must talk to Alan about his plans
 	 * for sock caches... i'll skip this for now.
 	 */
-	sk = __udp6_lib_lookup(net, saddr, uh->source,
-			       daddr, uh->dest, inet6_iif(skb), udptable);
+	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 
 	if (sk == NULL) {
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
-- 
cgit v1.2.3-70-g09d2


From 23542618deb77cfed312842fe8c41ed19fb16470 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Tue, 7 Oct 2008 12:41:01 -0700
Subject: inet: Don't lookup the socket if there's a socket attached to the skb

Use the socket cached in the skb if it's present.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 12 ++++++++----
 include/net/inet_hashtables.h  | 10 +++++++---
 include/net/sock.h             | 12 ++++++++++++
 net/ipv4/udp.c                 | 10 +++++++---
 net/ipv6/udp.c                 | 10 +++++++---
 5 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 995efbb031a..f74665d7bea 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -96,10 +96,14 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 					      const __be16 sport,
 					      const __be16 dport)
 {
-	return __inet6_lookup(dev_net(skb->dst->dev), hashinfo,
-			      &ipv6_hdr(skb)->saddr, sport,
-			      &ipv6_hdr(skb)->daddr, ntohs(dport),
-			      inet6_iif(skb));
+	struct sock *sk;
+
+	if (unlikely(sk = skb_steal_sock(skb)))
+		return sk;
+	else return __inet6_lookup(dev_net(skb->dst->dev), hashinfo,
+				   &ipv6_hdr(skb)->saddr, sport,
+				   &ipv6_hdr(skb)->daddr, ntohs(dport),
+				   inet6_iif(skb));
 }
 
 extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 3522bbcd546..5cc182f9eca 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -378,11 +378,15 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 					     const __be16 sport,
 					     const __be16 dport)
 {
+	struct sock *sk;
 	const struct iphdr *iph = ip_hdr(skb);
 
-	return __inet_lookup(dev_net(skb->dst->dev), hashinfo,
-			     iph->saddr, sport,
-			     iph->daddr, dport, inet_iif(skb));
+	if (unlikely(sk = skb_steal_sock(skb)))
+		return sk;
+	else
+		return __inet_lookup(dev_net(skb->dst->dev), hashinfo,
+				     iph->saddr, sport,
+				     iph->daddr, dport, inet_iif(skb));
 }
 
 extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
diff --git a/include/net/sock.h b/include/net/sock.h
index 75a312d3888..18f96708f3a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1324,6 +1324,18 @@ static inline void sk_change_net(struct sock *sk, struct net *net)
 	sock_net_set(sk, hold_net(net));
 }
 
+static inline struct sock *skb_steal_sock(struct sk_buff *skb)
+{
+	if (unlikely(skb->sk)) {
+		struct sock *sk = skb->sk;
+
+		skb->destructor = NULL;
+		skb->sk = NULL;
+		return sk;
+	}
+	return NULL;
+}
+
 extern void sock_enable_timestamp(struct sock *sk);
 extern int sock_get_timestamp(struct sock *, struct timeval __user *);
 extern int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c7a90b546b2..822c9deac83 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -306,11 +306,15 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 						 __be16 sport, __be16 dport,
 						 struct hlist_head udptable[])
 {
+	struct sock *sk;
 	const struct iphdr *iph = ip_hdr(skb);
 
-	return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
-				 iph->daddr, dport, inet_iif(skb),
-				 udptable);
+	if (unlikely(sk = skb_steal_sock(skb)))
+		return sk;
+	else
+		return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
+					 iph->daddr, dport, inet_iif(skb),
+					 udptable);
 }
 
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ce26c41d157..e51da8c092f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -111,11 +111,15 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  __be16 sport, __be16 dport,
 					  struct hlist_head udptable[])
 {
+	struct sock *sk;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 
-	return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport,
-				 &iph->daddr, dport, inet6_iif(skb),
-				 udptable);
+	if (unlikely(sk = skb_steal_sock(skb)))
+		return sk;
+	else
+		return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport,
+					 &iph->daddr, dport, inet6_iif(skb),
+					 udptable);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 68fffc679694d5f7c02fdeb684b481416cd8213b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Oct 2008 14:12:10 -0700
Subject: ipv6: clean up ip6_route_net_init() error handling

ip6_route_net_init() error handling looked less than solid, fix 'er up.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f4385a6569c..635d97d54b0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2631,10 +2631,8 @@ static int ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
 					       sizeof(*net->ipv6.ip6_prohibit_entry),
 					       GFP_KERNEL);
-	if (!net->ipv6.ip6_prohibit_entry) {
-		kfree(net->ipv6.ip6_null_entry);
-		goto out;
-	}
+	if (!net->ipv6.ip6_prohibit_entry)
+		goto out_ip6_null_entry;
 	net->ipv6.ip6_prohibit_entry->u.dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
@@ -2642,11 +2640,8 @@ static int ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
 					       GFP_KERNEL);
-	if (!net->ipv6.ip6_blk_hole_entry) {
-		kfree(net->ipv6.ip6_null_entry);
-		kfree(net->ipv6.ip6_prohibit_entry);
-		goto out;
-	}
+	if (!net->ipv6.ip6_blk_hole_entry)
+		goto out_ip6_prohibit_entry;
 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
@@ -2662,6 +2657,12 @@ static int ip6_route_net_init(struct net *net)
 out:
 	return ret;
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_ip6_prohibit_entry:
+	kfree(net->ipv6.ip6_prohibit_entry);
+out_ip6_null_entry:
+	kfree(net->ipv6.ip6_null_entry);
+#endif
 out_ip6_dst_ops:
 	release_net(net->ipv6.ip6_dst_ops->dst_net);
 	kfree(net->ipv6.ip6_dst_ops);
-- 
cgit v1.2.3-70-g09d2


From b339a47c370ec669f789c5989f54eec1d78574bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Oct 2008 14:15:00 -0700
Subject: ipv6: initialize ip6_route sysctl vars in ip6_route_net_init()

This makes that ip6_route_net_init() does all of the route init code.
There used to be a race between ip6_route_net_init() and ip6_net_init()
and someone relying on the combined result was left out cold.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 8 --------
 net/ipv6/route.c    | 9 +++++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f018704ecb8..af90905fd0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -840,14 +840,6 @@ static int inet6_net_init(struct net *net)
 	int err = 0;
 
 	net->ipv6.sysctl.bindv6only = 0;
-	net->ipv6.sysctl.flush_delay = 0;
-	net->ipv6.sysctl.ip6_rt_max_size = 4096;
-	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
-	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
-	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
-	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
-	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
-	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 635d97d54b0..e10a1701550 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2647,6 +2647,15 @@ static int ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
 #endif
 
+	net->ipv6.sysctl.flush_delay = 0;
+	net->ipv6.sysctl.ip6_rt_max_size = 4096;
+	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
+	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
+	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
+	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+
 #ifdef CONFIG_PROC_FS
 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
-- 
cgit v1.2.3-70-g09d2


From c57943a1c96214ee68f3890bb6772841ffbfd606 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Oct 2008 14:18:42 -0700
Subject: net: wrap sk->sk_backlog_rcv()

Wrap calling sk->sk_backlog_rcv() in a function. This will allow extending the
generic sk_backlog_rcv behaviour.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h   | 5 +++++
 include/net/tcp.h    | 2 +-
 net/core/sock.c      | 4 ++--
 net/ipv4/tcp.c       | 2 +-
 net/ipv4/tcp_timer.c | 2 +-
 5 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 18f96708f3a..ada50c04d09 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -482,6 +482,11 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	return sk->sk_backlog_rcv(sk, skb);
+}
+
 #define sk_wait_event(__sk, __timeo, __condition)			\
 	({	int __rc;						\
 		release_sock(__sk);					\
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f6cc3414315..438014d5761 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -896,7 +896,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 			BUG_ON(sock_owned_by_user(sk));
 
 			while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
-				sk->sk_backlog_rcv(sk, skb1);
+				sk_backlog_rcv(sk, skb1);
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED);
 			}
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 2d358dd8a03..5e2a3132a8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,7 +327,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		 */
 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 
-		rc = sk->sk_backlog_rcv(sk, skb);
+		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 	} else
@@ -1374,7 +1374,7 @@ static void __release_sock(struct sock *sk)
 			struct sk_buff *next = skb->next;
 
 			skb->next = NULL;
-			sk->sk_backlog_rcv(sk, skb);
+			sk_backlog_rcv(sk, skb);
 
 			/*
 			 * We are in process context here with softirqs
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7d81a1ee550..7d3fe571d15 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1161,7 +1161,7 @@ static void tcp_prequeue_process(struct sock *sk)
 	 * necessary */
 	local_bh_disable();
 	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-		sk->sk_backlog_rcv(sk, skb);
+		sk_backlog_rcv(sk, skb);
 	local_bh_enable();
 
 	/* Clear memory counter. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5ab6ba19c3c..6b6dff1164b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -201,7 +201,7 @@ static void tcp_delack_timer(unsigned long data)
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
 
 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk->sk_backlog_rcv(sk, skb);
+			sk_backlog_rcv(sk, skb);
 
 		tp->ucopy.memory = 0;
 	}
-- 
cgit v1.2.3-70-g09d2


From 654bed16cf86a9ef94495d9e6131b7ff7840a3dd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Oct 2008 14:22:33 -0700
Subject: net: packet split receive api

Add some packet-split receive hooks.

For one this allows to do NUMA node affine page allocs. Later on these
hooks will be extended to do emergency reserve allocations for
fragments.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 23 +++++++++++++++++++++++
 net/core/skbuff.c      | 20 ++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 720b688c22b..2725f4e5a9b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -968,6 +968,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
+extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
+			    int off, int size);
+
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->frag_list)
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
@@ -1382,6 +1385,26 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
 	return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
 }
 
+extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask);
+
+/**
+ *	netdev_alloc_page - allocate a page for ps-rx on a specific device
+ *	@dev: network device to receive on
+ *
+ * 	Allocate a new page node local to the specified device.
+ *
+ * 	%NULL is returned if there is no free memory.
+ */
+static inline struct page *netdev_alloc_page(struct net_device *dev)
+{
+	return __netdev_alloc_page(dev, GFP_ATOMIC);
+}
+
+static inline void netdev_free_page(struct net_device *dev, struct page *page)
+{
+	__free_page(page);
+}
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8bd248a6487..7f7bb1a636d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -263,6 +263,26 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 	return skb;
 }
 
+struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
+{
+	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
+	struct page *page;
+
+	page = alloc_pages_node(node, gfp_mask, 0);
+	return page;
+}
+EXPORT_SYMBOL(__netdev_alloc_page);
+
+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+		int size)
+{
+	skb_fill_page_desc(skb, i, page, off, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += size;
+}
+EXPORT_SYMBOL(skb_add_rx_frag);
+
 /**
  *	dev_alloc_skb - allocate an skbuff for receiving
  *	@length: length to allocate
-- 
cgit v1.2.3-70-g09d2


From 33f5f57eeb0c6386fdd85f9c690dc8d700ba7928 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 7 Oct 2008 14:43:06 -0700
Subject: tcp: kill pointless urg_mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It all started from me noticing that this urgent check in
tcp_clean_rtx_queue is unnecessarily inside the loop. Then
I took a longer look to it and found out that the users of
urg_mode can trivially do without, well almost, there was
one gotcha.

Bonus: those funny people who use urg with >= 2^31 write_seq -
snd_una could now rejoice too (that's the only purpose for the
between being there, otherwise a simple compare would have done
the thing). Not that I assume that the rest of the tcp code
happily lives with such mind-boggling numbers :-). Alas, it
turned out to be impossible to set wmem to such numbers anyway,
yes I really tried a big sendfile after setting some wmem but
nothing happened :-). ...Tcp_wmem is int and so is sk_sndbuf...
So I hacked a bit variable to long and found out that it seems
to work... :-)

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  9 ++++-----
 net/ipv4/tcp.c           |  4 +---
 net/ipv4/tcp_input.c     | 11 ++++++-----
 net/ipv4/tcp_minisocks.c |  1 +
 net/ipv4/tcp_output.c    | 18 ++++++++++++------
 5 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 76729062829..fe77e1499ab 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -312,8 +312,11 @@ struct tcp_sock {
 	u32	retrans_out;	/* Retransmitted packets out		*/
 
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
-	u8	urg_mode;	/* In urgent mode		*/
 	u8	ecn_flags;	/* ECN status bits.			*/
+	u8	reordering;	/* Packet reordering metric.		*/
+	u32	snd_up;		/* Urgent pointer		*/
+
+	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 /*
  *      Options received (usually on last packet, some only on SYN packets).
  */
@@ -361,8 +364,6 @@ struct tcp_sock {
 
 	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */
 
-	u8	reordering;	/* Packet reordering metric.		*/
-	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/
 	u32	high_seq;	/* snd_nxt at onset of congestion	*/
 
@@ -374,8 +375,6 @@ struct tcp_sock {
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
 	u32	urg_seq;	/* Seq of received urgent pointer */
-	u32	snd_up;		/* Urgent pointer		*/
-
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7d3fe571d15..eccb7165a80 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -497,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 				struct sk_buff *skb)
 {
-	if (flags & MSG_OOB) {
-		tp->urg_mode = 1;
+	if (flags & MSG_OOB)
 		tp->snd_up = tp->write_seq;
-	}
 }
 
 static inline void tcp_push(struct sock *sk, int flags, int mss_now,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3b76bce769d..c19f429dc44 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2836,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
+			       u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2903,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
 
-		if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
-			tp->urg_mode = 0;
-
 		tp->packets_out -= acked_pcount;
 		pkts_acked += acked_pcount;
 
@@ -2935,6 +2933,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 			tp->lost_skb_hint = NULL;
 	}
 
+	if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
+		tp->snd_up = tp->snd_una;
+
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
@@ -3311,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, prior_fackets);
+	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
 
 	if (tp->frto_counter)
 		frto_cwnd = tcp_process_frto(sk, flag);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f976fc57892..779f2e9d068 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->pred_flags = 0;
 		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
 		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
+		newtp->snd_up = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 493553c71d3..990a5849323 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -345,6 +345,11 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
 
+static inline int tcp_urg_mode(const struct tcp_sock *tp)
+{
+	return tp->snd_una != tp->snd_up;
+}
+
 #define OPTION_SACK_ADVERTISE	(1 << 0)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
@@ -646,7 +651,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	th->check		= 0;
 	th->urg_ptr		= 0;
 
-	if (unlikely(tp->urg_mode &&
+	/* The urg_mode check is necessary during a below snd_una win probe */
+	if (unlikely(tcp_urg_mode(tp) &&
 		     between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
 		th->urg_ptr		= htons(tp->snd_up - tcb->seq);
 		th->urg			= 1;
@@ -1012,7 +1018,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
  *
- * LARGESEND note: !urg_mode is overkill, only frames up to snd_up
+ * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up
  * cannot be large. However, taking into account rare use of URG, this
  * is not a big flaw.
  */
@@ -1029,7 +1035,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 	mss_now = tp->mss_cache;
 
-	if (large_allowed && sk_can_gso(sk) && !tp->urg_mode)
+	if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp))
 		doing_tso = 1;
 
 	if (dst) {
@@ -1193,7 +1199,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	/* Don't use the nagle rule for urgent data (or for the final FIN).
 	 * Nagle can be ignored during F-RTO too (see RFC4138).
 	 */
-	if (tp->urg_mode || (tp->frto_counter == 2) ||
+	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
 		return 1;
 
@@ -2358,6 +2364,7 @@ static void tcp_connect_init(struct sock *sk)
 	tcp_init_wl(tp, tp->write_seq, 0);
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
+	tp->snd_up = tp->write_seq;
 	tp->rcv_nxt = 0;
 	tp->rcv_wup = 0;
 	tp->copied_seq = 0;
@@ -2567,8 +2574,7 @@ int tcp_write_wakeup(struct sock *sk)
 			tcp_event_new_data_sent(sk, skb);
 		return err;
 	} else {
-		if (tp->urg_mode &&
-		    between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
+		if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
 			tcp_xmit_probe_skb(sk, 1);
 		return tcp_xmit_probe_skb(sk, 0);
 	}
-- 
cgit v1.2.3-70-g09d2


From 4a7e56098f06d505f23f8d7c8d6762221065922a Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 7 Oct 2008 14:43:31 -0700
Subject: tcp: cleanup messy initializer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I'm quite sure that if I give this function in its old format
for you to inspect, you start to wonder what is the type of
demanded or if it's a global variable.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c19f429dc44..63da39372d4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4461,8 +4461,8 @@ static void tcp_new_space(struct sock *sk)
 
 	if (tcp_should_expand_sndbuf(sk)) {
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
-		    demanded = max_t(unsigned int, tp->snd_cwnd,
+			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+		int demanded = max_t(unsigned int, tp->snd_cwnd,
 				     tp->reordering + 1);
 		sndmem *= 2 * demanded;
 		if (sndmem > sk->sk_sndbuf)
-- 
cgit v1.2.3-70-g09d2


From 835bcc0497e18f54153ac9e32b598dd8ffb7aa66 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:45:55 -0700
Subject: netns: move /proc/net/dev_snmp6 to struct net

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  4 ++++
 net/ipv6/proc.c         | 20 +++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 44914760464..ffcef5de3d1 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -11,6 +11,10 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
 	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 	DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct proc_dir_entry *proc_net_devsnmp6;
+#endif
 };
 
 #endif
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 0179b66864f..16ebf85d4ad 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -29,8 +29,6 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
-static struct proc_dir_entry *proc_net_devsnmp6;
-
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq->private;
@@ -210,18 +208,20 @@ static const struct file_operations snmp6_seq_fops = {
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
+	struct net *net;
 
 	if (!idev || !idev->dev)
 		return -EINVAL;
 
-	if (!net_eq(dev_net(idev->dev), &init_net))
+	net = dev_net(idev->dev);
+	if (!net_eq(net, &init_net))
 		return 0;
 
-	if (!proc_net_devsnmp6)
+	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
 	p = proc_create_data(idev->dev->name, S_IRUGO,
-			     proc_net_devsnmp6, &snmp6_seq_fops, idev);
+			     net->mib.proc_net_devsnmp6, &snmp6_seq_fops, idev);
 	if (!p)
 		return -ENOMEM;
 
@@ -231,12 +231,13 @@ int snmp6_register_dev(struct inet6_dev *idev)
 
 int snmp6_unregister_dev(struct inet6_dev *idev)
 {
-	if (!proc_net_devsnmp6)
+	struct net *net = dev_net(idev->dev);
+	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 	if (!idev || !idev->stats.proc_dir_entry)
 		return -EINVAL;
 	remove_proc_entry(idev->stats.proc_dir_entry->name,
-			  proc_net_devsnmp6);
+			  net->mib.proc_net_devsnmp6);
 	idev->stats.proc_dir_entry = NULL;
 	return 0;
 }
@@ -269,8 +270,9 @@ int __init ipv6_misc_proc_init(void)
 	if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net);
-	if (!proc_net_devsnmp6)
+	init_net.mib.proc_net_devsnmp6 =
+		proc_mkdir("dev_snmp6", init_net.proc_net);
+	if (!init_net.mib.proc_net_devsnmp6)
 		goto proc_dev_snmp6_fail;
 out:
 	return rc;
-- 
cgit v1.2.3-70-g09d2


From 06f38527decedbea0588256ecbb5784d4bb35b81 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:46:18 -0700
Subject: netns: register /proc/net/dev_snmp6/* in each ns

Do the same for /proc/net/snmp6.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 16ebf85d4ad..57640620c16 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -247,12 +247,27 @@ static int ipv6_proc_init_net(struct net *net)
 	if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
 			&sockstat6_seq_fops))
 		return -ENOMEM;
+
+	if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+		goto proc_snmp6_fail;
+
+	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
+	if (!net->mib.proc_net_devsnmp6)
+		goto proc_dev_snmp6_fail;
 	return 0;
+
+proc_snmp6_fail:
+	proc_net_remove(net, "sockstat6");
+proc_dev_snmp6_fail:
+	proc_net_remove(net, "dev_snmp6");
+	return -ENOMEM;
 }
 
 static void ipv6_proc_exit_net(struct net *net)
 {
 	proc_net_remove(net, "sockstat6");
+	proc_net_remove(net, "dev_snmp6");
+	proc_net_remove(net, "snmp6");
 }
 
 static struct pernet_operations ipv6_proc_ops = {
@@ -262,34 +277,11 @@ static struct pernet_operations ipv6_proc_ops = {
 
 int __init ipv6_misc_proc_init(void)
 {
-	int rc = 0;
-
-	if (register_pernet_subsys(&ipv6_proc_ops))
-		goto proc_net_fail;
-
-	if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
-		goto proc_snmp6_fail;
-
-	init_net.mib.proc_net_devsnmp6 =
-		proc_mkdir("dev_snmp6", init_net.proc_net);
-	if (!init_net.mib.proc_net_devsnmp6)
-		goto proc_dev_snmp6_fail;
-out:
-	return rc;
-
-proc_dev_snmp6_fail:
-	proc_net_remove(&init_net, "snmp6");
-proc_snmp6_fail:
-	unregister_pernet_subsys(&ipv6_proc_ops);
-proc_net_fail:
-	rc = -ENOMEM;
-	goto out;
+	return register_pernet_subsys(&ipv6_proc_ops);
 }
 
 void ipv6_misc_proc_exit(void)
 {
-	proc_net_remove(&init_net, "dev_snmp6");
-	proc_net_remove(&init_net, "snmp6");
 	unregister_pernet_subsys(&ipv6_proc_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 35f0a5df6cbc315da031c40541e135a059bfde7d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:46:47 -0700
Subject: ipv6: consolidate ipv6 sock_stat code at the beginning of
 net/ipv6/proc.c

Simple, comsolidate sockstat6 staff in one place, at the beginning of
the file. Right now sockstat6_seq_open/sockstat6_seq_fops looks like an
intrusion in the middle of snmp6 code.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 57640620c16..25eda8bf218 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -46,6 +46,19 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static int sockstat6_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open_net(inode, file, sockstat6_seq_show);
+}
+
+static const struct file_operations sockstat6_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = sockstat6_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release_net,
+};
+
 static struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
 	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES),
@@ -179,19 +192,6 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static int sockstat6_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, sockstat6_seq_show);
-}
-
-static const struct file_operations sockstat6_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = sockstat6_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 static int snmp6_seq_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, snmp6_seq_show, PDE(inode)->data);
-- 
cgit v1.2.3-70-g09d2


From 7b43ccecc77480353a5657d993d671cae9e94efd Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:47:12 -0700
Subject: ipv6: separate seq_ops for global & per/device ipv6 statistics

idev has been stored on seq->private. NULL has been stored for global
statistics.

The situation is changed with net namespace. We need to store pointer to
struct net and the only place is seq->private. So, we'll have for
/proc/net/dev_snmp6/* and for /proc/net/snmp6 pointers of two different
types stored in the same field.

This effectively requires to separate seq_ops of these files.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 25eda8bf218..7601f56ce91 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -175,26 +175,17 @@ snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
 
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
-	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
-
-	if (idev) {
-		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
-		snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
-		snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg);
-	} else {
-		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
-		snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
-		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
-		snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
-	}
+	snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
+	snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
+	snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+	snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
 	return 0;
 }
 
 static int snmp6_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, snmp6_seq_show, PDE(inode)->data);
+	return single_open(file, snmp6_seq_show, NULL);
 }
 
 static const struct file_operations snmp6_seq_fops = {
@@ -205,6 +196,30 @@ static const struct file_operations snmp6_seq_fops = {
 	.release = single_release,
 };
 
+static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
+{
+	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
+
+	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+	snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list);
+	snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg);
+	return 0;
+}
+
+static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, snmp6_dev_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations snmp6_dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = snmp6_dev_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
@@ -221,7 +236,8 @@ int snmp6_register_dev(struct inet6_dev *idev)
 		return -ENOENT;
 
 	p = proc_create_data(idev->dev->name, S_IRUGO,
-			     net->mib.proc_net_devsnmp6, &snmp6_seq_fops, idev);
+			     net->mib.proc_net_devsnmp6,
+			     &snmp6_dev_seq_fops, idev);
 	if (!p)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-70-g09d2


From 2b4209e4b7ba9c7d70910a665ae4b60d49b75fcd Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:47:37 -0700
Subject: netns: register global ipv6 mibs statistics in each namespace

Unused net variable will become used very soon.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 7601f56ce91..c38c9e55406 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -175,6 +175,8 @@ snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
 
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = (struct net *)seq->private;
+
 	snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
 	snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
@@ -185,7 +187,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 
 static int snmp6_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, snmp6_seq_show, NULL);
+	return single_open_net(inode, file, snmp6_seq_show);
 }
 
 static const struct file_operations snmp6_seq_fops = {
@@ -193,7 +195,7 @@ static const struct file_operations snmp6_seq_fops = {
 	.open	 = snmp6_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 
 static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.3-70-g09d2


From ab38dc7a70e59a4888ab4acb51daf3c6012ce4b8 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:47:55 -0700
Subject: netns: allow per device ipv6 snmp statistics in non-initial namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index c38c9e55406..23e567fb1d3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -231,9 +231,6 @@ int snmp6_register_dev(struct inet6_dev *idev)
 		return -EINVAL;
 
 	net = dev_net(idev->dev);
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
-- 
cgit v1.2.3-70-g09d2


From e43291cb37406dae405d50332eaa1ba2264c8dce Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:48:53 -0700
Subject: netns: add stub functions for per/namespace mibs allocation

The content of init_ipv6_mibs/cleanup_ipv6_mibs will be moved to new
calls one by one next.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index af90905fd0f..e8f82eca160 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -835,6 +835,15 @@ static void cleanup_ipv6_mibs(void)
 	snmp_mib_free((void **)udplite_stats_in6);
 }
 
+static int __net_init ipv6_init_mibs(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit ipv6_cleanup_mibs(struct net *net)
+{
+}
+
 static int inet6_net_init(struct net *net)
 {
 	int err = 0;
@@ -842,6 +851,9 @@ static int inet6_net_init(struct net *net)
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 
+	err = ipv6_init_mibs(net);
+	if (err)
+		return err;
 #ifdef CONFIG_PROC_FS
 	err = udp6_proc_init(net);
 	if (err)
@@ -852,7 +864,6 @@ static int inet6_net_init(struct net *net)
 	err = ac6_proc_init(net);
 	if (err)
 		goto proc_ac6_fail;
-out:
 #endif
 	return err;
 
@@ -861,7 +872,9 @@ proc_ac6_fail:
 	tcp6_proc_exit(net);
 proc_tcp6_fail:
 	udp6_proc_exit(net);
-	goto out;
+out:
+	ipv6_cleanup_mibs(net);
+	return err;
 #endif
 }
 
@@ -872,6 +885,7 @@ static void inet6_net_exit(struct net *net)
 	tcp6_proc_exit(net);
 	ac6_proc_exit(net);
 #endif
+	ipv6_cleanup_mibs(net);
 }
 
 static struct pernet_operations inet6_net_ops = {
-- 
cgit v1.2.3-70-g09d2


From 0c7ed677fb7013c8028045d409a48ac42151187a Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:49:36 -0700
Subject: netns: make udpv6 mib per/namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  1 +
 include/net/udp.h       | 12 ++++++------
 net/ipv4/udp.c          |  3 ---
 net/ipv6/af_inet6.c     |  9 ++++-----
 net/ipv6/proc.c         |  3 ++-
 5 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index ffcef5de3d1..ba622b24b84 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -14,6 +14,7 @@ struct netns_mib {
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct proc_dir_entry *proc_net_devsnmp6;
+	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 #endif
 };
 
diff --git a/include/net/udp.h b/include/net/udp.h
index d38f6f2419f..72f28c3ddaa 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -152,8 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 				    __be32 daddr, __be16 dport,
 				    int dif);
 
-DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-
 /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
 DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 
@@ -167,12 +165,14 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 	if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field);         \
 	else		SNMP_INC_STATS_BH((net)->mib.udp_statistics, field);    }  while(0)
 
-#define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { (void)net;  \
+#define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { \
 	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
-	else		SNMP_INC_STATS_BH(udp_stats_in6, field);    } while(0)
-#define UDP6_INC_STATS_USER(net, field, is_udplite)	    do { (void)net;    \
+	else		SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field);  \
+} while(0)
+#define UDP6_INC_STATS_USER(net, field, is_udplite)	    do { \
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field);         \
-	else		SNMP_INC_STATS_USER(udp_stats_in6, field);    } while(0)
+	else		SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field);  \
+} while(0)
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #define UDPX_INC_STATS_BH(sk, field) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 822c9deac83..85f8e8e10b1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -108,9 +108,6 @@
  *	Snmp MIB for the UDP layer
  */
 
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
-EXPORT_SYMBOL(udp_stats_in6);
-
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e8f82eca160..e09139122ef 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -806,16 +806,12 @@ static int __init init_ipv6_mibs(void)
 	if (snmp_mib_init((void **)icmpv6msg_statistics,
 			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
-		goto err_udp_mib;
 	if (snmp_mib_init((void **)udplite_stats_in6,
 			  sizeof (struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	return 0;
 
 err_udplite_mib:
-	snmp_mib_free((void **)udp_stats_in6);
-err_udp_mib:
 	snmp_mib_free((void **)icmpv6msg_statistics);
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmpv6_statistics);
@@ -831,17 +827,20 @@ static void cleanup_ipv6_mibs(void)
 	snmp_mib_free((void **)ipv6_statistics);
 	snmp_mib_free((void **)icmpv6_statistics);
 	snmp_mib_free((void **)icmpv6msg_statistics);
-	snmp_mib_free((void **)udp_stats_in6);
 	snmp_mib_free((void **)udplite_stats_in6);
 }
 
 static int __net_init ipv6_init_mibs(struct net *net)
 {
+	if (snmp_mib_init((void **)net->mib.udp_stats_in6,
+			  sizeof (struct udp_mib)) < 0)
+		return -ENOMEM;
 	return 0;
 }
 
 static void __net_exit ipv6_cleanup_mibs(struct net *net)
 {
+	snmp_mib_free((void **)net->mib.udp_stats_in6);
 }
 
 static int inet6_net_init(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 23e567fb1d3..3eaf20bf998 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -180,7 +180,8 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 	snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
 	snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
-	snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+	snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6,
+			    snmp6_udp6_list);
 	snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From be713a443ee019489890e93654557916fbf72612 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 7 Oct 2008 14:50:06 -0700
Subject: netns: make uplitev6 mib per/namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  1 +
 include/net/udp.h       | 11 ++++-------
 net/ipv6/af_inet6.c     | 14 ++++++++------
 net/ipv6/proc.c         |  3 ++-
 net/ipv6/udplite.c      |  2 --
 5 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index ba622b24b84..4e58f0519ce 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -15,6 +15,7 @@ struct netns_mib {
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct proc_dir_entry *proc_net_devsnmp6;
 	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
+	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 #endif
 };
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 72f28c3ddaa..1e205095ea6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -152,9 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 				    __be32 daddr, __be16 dport,
 				    int dif);
 
-/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
-DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
-
 /*
  * 	SNMP statistics for UDP and UDP-Lite
  */
@@ -166,12 +163,12 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 	else		SNMP_INC_STATS_BH((net)->mib.udp_statistics, field);    }  while(0)
 
 #define UDP6_INC_STATS_BH(net, field, is_udplite) 	    do { \
-	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
+	if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\
 	else		SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field);  \
 } while(0)
-#define UDP6_INC_STATS_USER(net, field, is_udplite)	    do { \
-	if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field);         \
-	else		SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field);  \
+#define UDP6_INC_STATS_USER(net, field, __lite)		    do { \
+	if (__lite) SNMP_INC_STATS_USER((net)->mib.udplite_stats_in6, field);  \
+	else	    SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field);      \
 } while(0)
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e09139122ef..127b240d2d8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -806,13 +806,8 @@ static int __init init_ipv6_mibs(void)
 	if (snmp_mib_init((void **)icmpv6msg_statistics,
 			  sizeof(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
-	if (snmp_mib_init((void **)udplite_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
-		goto err_udplite_mib;
 	return 0;
 
-err_udplite_mib:
-	snmp_mib_free((void **)icmpv6msg_statistics);
 err_icmpmsg_mib:
 	snmp_mib_free((void **)icmpv6_statistics);
 err_icmp_mib:
@@ -827,7 +822,6 @@ static void cleanup_ipv6_mibs(void)
 	snmp_mib_free((void **)ipv6_statistics);
 	snmp_mib_free((void **)icmpv6_statistics);
 	snmp_mib_free((void **)icmpv6msg_statistics);
-	snmp_mib_free((void **)udplite_stats_in6);
 }
 
 static int __net_init ipv6_init_mibs(struct net *net)
@@ -835,12 +829,20 @@ static int __net_init ipv6_init_mibs(struct net *net)
 	if (snmp_mib_init((void **)net->mib.udp_stats_in6,
 			  sizeof (struct udp_mib)) < 0)
 		return -ENOMEM;
+	if (snmp_mib_init((void **)net->mib.udplite_stats_in6,
+			  sizeof (struct udp_mib)) < 0)
+		goto err_udplite_mib;
 	return 0;
+
+err_udplite_mib:
+	snmp_mib_free((void **)net->mib.udp_stats_in6);
+	return -ENOMEM;
 }
 
 static void __net_exit ipv6_cleanup_mibs(struct net *net)
 {
 	snmp_mib_free((void **)net->mib.udp_stats_in6);
+	snmp_mib_free((void **)net->mib.udplite_stats_in6);
 }
 
 static int inet6_net_init(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 3eaf20bf998..c78cf754ef3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -182,7 +182,8 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 	snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
 	snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6,
 			    snmp6_udp6_list);
-	snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
+	snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6,
+			    snmp6_udplite6_list);
 	return 0;
 }
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f6cdcb348e0..3cd1a1ac3d6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -13,8 +13,6 @@
  */
 #include "udp_impl.h"
 
-DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
-
 static int udplitev6_rcv(struct sk_buff *skb)
 {
 	return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
-- 
cgit v1.2.3-70-g09d2


From b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 7 Oct 2008 15:26:48 -0700
Subject: net: only invoke dev->change_rx_flags when device is UP

Jesper Dangaard Brouer <hawk@comx.dk> reported a bug when setting a VLAN
device down that is in promiscous mode:

When the VLAN device is set down, the promiscous count on the real
device is decremented by one by vlan_dev_stop(). When removing the
promiscous flag from the VLAN device afterwards, the promiscous
count on the real device is decremented a second time by the
vlan_change_rx_flags() callback.

The root cause for this is that the ->change_rx_flags() callback is
invoked while the device is down. The synchronization is meant to mirror
the behaviour of the ->set_rx_mode callbacks, meaning the ->open function
is responsible for doing a full sync on open, the ->close() function is
responsible for doing full cleanup on ->stop() and ->change_rx_flags()
is meant to do incremental changes while the device is UP.

Only invoke ->change_rx_flags() while the device is UP to provide the
intended behaviour.

Tested-by: Jesper Dangaard Brouer <jdb@comx.dk>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e8eb2b47834..fd992c0f271 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2918,6 +2918,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	return 0;
 }
 
+static void dev_change_rx_flags(struct net_device *dev, int flags)
+{
+	if (dev->flags & IFF_UP && dev->change_rx_flags)
+		dev->change_rx_flags(dev, flags);
+}
+
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
@@ -2955,8 +2961,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
 				current->uid, current->gid,
 				audit_get_sessionid(current));
 
-		if (dev->change_rx_flags)
-			dev->change_rx_flags(dev, IFF_PROMISC);
+		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
 	return 0;
 }
@@ -3022,8 +3027,7 @@ int dev_set_allmulti(struct net_device *dev, int inc)
 		}
 	}
 	if (dev->flags ^ old_flags) {
-		if (dev->change_rx_flags)
-			dev->change_rx_flags(dev, IFF_ALLMULTI);
+		dev_change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
 	}
 	return 0;
@@ -3347,8 +3351,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	 *	Load in the correct multicast list now the flags have changed.
 	 */
 
-	if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
-		dev->change_rx_flags(dev, IFF_MULTICAST);
+	if ((old_flags ^ flags) & IFF_MULTICAST)
+		dev_change_rx_flags(dev, IFF_MULTICAST);
 
 	dev_set_rx_mode(dev);
 
-- 
cgit v1.2.3-70-g09d2


From 53240c208776d557dba9d7afedbcdbf512774c16 Mon Sep 17 00:00:00 2001
From: Ali Saidi <saidi@engin.umich.edu>
Date: Tue, 7 Oct 2008 15:31:19 -0700
Subject: tcp: Fix possible double-ack w/ user dma

From: Ali Saidi <saidi@engin.umich.edu>

When TCP receive copy offload is enabled it's possible that
tcp_rcv_established() will cause two acks to be sent for a single
packet. In the case that a tcp_dma_early_copy() is successful,
copied_early is set to true which causes tcp_cleanup_rbuf() to be
called early which can send an ack. Further along in
tcp_rcv_established(), __tcp_ack_snd_check() is called and will
schedule a delayed ACK. If no packets are processed before the delayed
ack timer expires the packet will be acked twice.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2a96b..7abc6b80d47 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4879,7 +4879,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					goto no_ack;
 			}
 
-			__tcp_ack_snd_check(sk, 0);
+			if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
+				__tcp_ack_snd_check(sk, 0);
 no_ack:
 #ifdef CONFIG_NET_DMA
 			if (copied_early)
-- 
cgit v1.2.3-70-g09d2


From b8bae41ed6a53cce56c50811a91cd963e3187d1c Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 7 Oct 2008 15:34:37 -0700
Subject: ipv4: add mc_count to in_device.

This patch add mc_count to struct in_device and updates
increment/decrement/initilaize of this field in IPv4 and in IPv6.

- Also printing the vfs /proc entry (/proc/net/igmp) is adjusted to
use the new mc_count.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 net/ipv4/igmp.c            | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c6f51ad52d5..06fcdb45106 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -25,6 +25,7 @@ struct in_device
 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/
 	rwlock_t		mc_list_lock;
 	struct ip_mc_list	*mc_list;	/* IP multicast filter chain    */
+	int			mc_count;	          /* Number of installed mcasts	*/
 	spinlock_t		mc_tomb_lock;
 	struct ip_mc_list	*mc_tomb;
 	unsigned long		mr_v1_seen;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f70fac61259..7f9e337e390 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1234,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	write_lock_bh(&in_dev->mc_list_lock);
 	im->next=in_dev->mc_list;
 	in_dev->mc_list=im;
+	in_dev->mc_count++;
 	write_unlock_bh(&in_dev->mc_list_lock);
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1282,6 +1283,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 			if (--i->users == 0) {
 				write_lock_bh(&in_dev->mc_list_lock);
 				*ip = i->next;
+				in_dev->mc_count--;
 				write_unlock_bh(&in_dev->mc_list_lock);
 				igmp_group_dropped(i);
 
@@ -1330,6 +1332,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
 	setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
 			(unsigned long)in_dev);
 	in_dev->mr_ifc_count = 0;
+	in_dev->mc_count     = 0;
 	setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
 			(unsigned long)in_dev);
 	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
@@ -1369,8 +1372,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	write_lock_bh(&in_dev->mc_list_lock);
 	while ((i = in_dev->mc_list) != NULL) {
 		in_dev->mc_list = i->next;
+		in_dev->mc_count--;
 		write_unlock_bh(&in_dev->mc_list_lock);
-
 		igmp_group_dropped(i);
 		ip_ma_put(i);
 
@@ -2383,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 
 		if (state->in_dev->mc_list == im) {
 			seq_printf(seq, "%d\t%-10s: %5d %7s\n",
-				   state->dev->ifindex, state->dev->name, state->dev->mc_count, querier);
+				   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
 		}
 
 		seq_printf(seq,
-- 
cgit v1.2.3-70-g09d2


From 58ec3b4db9eb5a28e3aec5f407a54e28f7039c19 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 7 Oct 2008 15:50:03 -0700
Subject: net: Fix netdev_run_todo dead-lock

Benjamin Thery tracked down a bug that explains many instances
of the error

unregister_netdevice: waiting for %s to become free. Usage count = %d

It turns out that netdev_run_todo can dead-lock with itself if
a second instance of it is run in a thread that will then free
a reference to the device waited on by the first instance.

The problem is really quite silly.  We were trying to create
parallelism where none was required.  As netdev_run_todo always
follows a RTNL section, and that todo tasks can only be added
with the RTNL held, by definition you should only need to wait
for the very ones that you've added and be done with it.

There is no need for a second mutex or spinlock.

This is exactly what the following patch does.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 27 ++++++---------------------
 net/core/rtnetlink.c |  2 +-
 2 files changed, 7 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index fd992c0f271..0ae08d3f57e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3812,14 +3812,11 @@ static int dev_new_index(struct net *net)
 }
 
 /* Delayed registration/unregisteration */
-static DEFINE_SPINLOCK(net_todo_list_lock);
 static LIST_HEAD(net_todo_list);
 
 static void net_set_todo(struct net_device *dev)
 {
-	spin_lock(&net_todo_list_lock);
 	list_add_tail(&dev->todo_list, &net_todo_list);
-	spin_unlock(&net_todo_list_lock);
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -4146,33 +4143,24 @@ static void netdev_wait_allrefs(struct net_device *dev)
  *	free_netdev(y1);
  *	free_netdev(y2);
  *
- * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * We are invoked by rtnl_unlock().
  * This allows us to deal with problems:
  * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
+ *
+ * We must not return until all unregister events added during
+ * the interval the lock was held have been completed.
  */
-static DEFINE_MUTEX(net_todo_run_mutex);
 void netdev_run_todo(void)
 {
 	struct list_head list;
 
-	/* Need to guard against multiple cpu's getting out of order. */
-	mutex_lock(&net_todo_run_mutex);
-
-	/* Not safe to do outside the semaphore.  We must not return
-	 * until all unregister events invoked by the local processor
-	 * have been completed (either by this todo run, or one on
-	 * another cpu).
-	 */
-	if (list_empty(&net_todo_list))
-		goto out;
-
 	/* Snapshot list, allow later requests */
-	spin_lock(&net_todo_list_lock);
 	list_replace_init(&net_todo_list, &list);
-	spin_unlock(&net_todo_list_lock);
+
+	__rtnl_unlock();
 
 	while (!list_empty(&list)) {
 		struct net_device *dev
@@ -4204,9 +4192,6 @@ void netdev_run_todo(void)
 		/* Free network device */
 		kobject_put(&dev->dev.kobj);
 	}
-
-out:
-	mutex_unlock(&net_todo_run_mutex);
 }
 
 static struct net_device_stats *internal_stats(struct net_device *dev)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 71edb8b3634..d6381c2a469 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -73,7 +73,7 @@ void __rtnl_unlock(void)
 
 void rtnl_unlock(void)
 {
-	mutex_unlock(&rtnl_mutex);
+	/* This fellow will unlock it for us. */
 	netdev_run_todo();
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9d2c27e17b7574023b5adb5c6a50d7aaeb915543 Mon Sep 17 00:00:00 2001
From: Daniele Lacamera <root@danielinux.net>
Date: Tue, 7 Oct 2008 15:58:17 -0700
Subject: tcp: Fix tcp_hybla zero congestion window growth with small rho and
 large cwnd.

Because of rounding, in certain conditions, i.e. when in congestion
avoidance state rho is smaller than 1/128 of the current cwnd, TCP
Hybla congestion control starves and the cwnd is kept constant
forever.

This patch forces an increment by one segment after #send_cwnd calls
without increments(newreno behavior).

Signed-off-by: Daniele Lacamera <root@danielinux.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_hybla.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index bfcbd148a89..c209e054a63 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 		ca->snd_cwnd_cents -= 128;
 		tp->snd_cwnd_cnt = 0;
 	}
-
+	/* check when cwnd has not been incremented for a while */
+	if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+		tp->snd_cwnd++;
+		tp->snd_cwnd_cnt = 0;
+	}
 	/* clamp down slowstart cwnd to ssthresh value. */
 	if (is_slowstart)
 		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-- 
cgit v1.2.3-70-g09d2


From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:00 +0200
Subject: netfilter: Use unsigned types for hooknum and pf vars

and (try to) consistently use u_int8_t for the L3 family.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h                      | 30 ++++++++--------
 include/linux/netfilter/x_tables.h             | 28 +++++++--------
 include/net/netfilter/nf_conntrack_core.h      |  2 +-
 include/net/netfilter/nf_conntrack_expect.h    |  2 +-
 include/net/netfilter/nf_conntrack_l4proto.h   |  4 +--
 include/net/netfilter/nf_log.h                 |  8 ++---
 include/net/netfilter/nf_queue.h               |  6 ++--
 net/bridge/br_netfilter.c                      |  4 +--
 net/bridge/netfilter/ebt_log.c                 |  2 +-
 net/bridge/netfilter/ebt_ulog.c                |  2 +-
 net/ipv4/netfilter/ipt_LOG.c                   |  2 +-
 net/ipv4/netfilter/ipt_ULOG.c                  |  2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  4 +--
 net/ipv6/netfilter/ip6t_LOG.c                  |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  4 +--
 net/netfilter/core.c                           |  4 +--
 net/netfilter/nf_conntrack_core.c              |  6 ++--
 net/netfilter/nf_conntrack_expect.c            |  2 +-
 net/netfilter/nf_conntrack_h323_main.c         |  3 +-
 net/netfilter/nf_conntrack_proto_dccp.c        |  4 +--
 net/netfilter/nf_conntrack_proto_generic.c     |  2 +-
 net/netfilter/nf_conntrack_proto_gre.c         |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c        |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c         |  6 ++--
 net/netfilter/nf_conntrack_proto_udp.c         |  4 +--
 net/netfilter/nf_conntrack_proto_udplite.c     |  4 +--
 net/netfilter/nf_internals.h                   |  4 +--
 net/netfilter/nf_log.c                         |  6 ++--
 net/netfilter/nf_queue.c                       | 10 +++---
 net/netfilter/nf_sockopt.c                     | 15 ++++----
 net/netfilter/nfnetlink_log.c                  |  4 +--
 net/netfilter/x_tables.c                       | 47 ++++++++++++++------------
 net/netfilter/xt_connlimit.c                   |  2 +-
 net/netfilter/xt_conntrack.c                   |  8 ++---
 net/netfilter/xt_hashlimit.c                   | 11 +++---
 35 files changed, 127 insertions(+), 121 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0c5eb7ed8b3..8c83d2e23bd 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -92,8 +92,8 @@ struct nf_hook_ops
 	/* User fills in from here down. */
 	nf_hookfn *hook;
 	struct module *owner;
-	int pf;
-	int hooknum;
+	u_int8_t pf;
+	unsigned int hooknum;
 	/* Hooks are ordered in ascending priority. */
 	int priority;
 };
@@ -102,7 +102,7 @@ struct nf_sockopt_ops
 {
 	struct list_head list;
 
-	int pf;
+	u_int8_t pf;
 
 	/* Non-inclusive ranges: use 0/0/NULL to never get called. */
 	int set_optmin;
@@ -140,7 +140,7 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
 
 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
 
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev, struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *), int thresh);
 
@@ -151,7 +151,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
  *	okfn must be invoked by the caller in this case.  Any other return
  *	value indicates the packet has been consumed by the hook.
  */
-static inline int nf_hook_thresh(int pf, unsigned int hook,
+static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
@@ -167,7 +167,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook,
 	return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
 }
 
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
@@ -212,14 +212,14 @@ __ret;})
 	NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
 
 /* Call setsockopt() */
-int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, 
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  int len);
-int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt,
+int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  int *len);
 
-int compat_nf_setsockopt(struct sock *sk, int pf, int optval,
+int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, int len);
-int compat_nf_getsockopt(struct sock *sk, int pf, int optval,
+int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, int *len);
 
 /* Call this before modifying an existing packet: ensures it is
@@ -292,7 +292,7 @@ extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
 extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
 
 static inline void
-nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family)
+nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 {
 #ifdef CONFIG_NF_NAT_NEEDED
 	void (*decodefn)(struct sk_buff *, struct flowi *);
@@ -315,7 +315,7 @@ extern struct proc_dir_entry *proc_net_netfilter;
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
 #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
-static inline int nf_hook_thresh(int pf, unsigned int hook,
+static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
@@ -324,7 +324,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook,
 {
 	return okfn(skb);
 }
-static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
@@ -332,7 +332,9 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
 }
 struct flowi;
 static inline void
-nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
+nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
+{
+}
 #endif /*CONFIG_NETFILTER*/
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 2326296b6f2..6989b22716e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -292,7 +292,7 @@ struct xt_table
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
-	int af;		/* address/protocol family */
+	u_int8_t af;		/* address/protocol family */
 };
 
 #include <linux/netfilter_ipv4.h>
@@ -346,19 +346,19 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 					      struct xt_table_info *newinfo,
 					      int *error);
 
-extern struct xt_match *xt_find_match(int af, const char *name, u8 revision);
-extern struct xt_target *xt_find_target(int af, const char *name, u8 revision);
-extern struct xt_target *xt_request_find_target(int af, const char *name, 
+extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
+extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+extern struct xt_target *xt_request_find_target(u8 af, const char *name,
 						u8 revision);
-extern int xt_find_revision(int af, const char *name, u8 revision, int target,
-			    int *err);
+extern int xt_find_revision(u8 af, const char *name, u8 revision,
+			    int target, int *err);
 
-extern struct xt_table *xt_find_table_lock(struct net *net, int af,
+extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 					   const char *name);
 extern void xt_table_unlock(struct xt_table *t);
 
-extern int xt_proto_init(struct net *net, int af);
-extern void xt_proto_fini(struct net *net, int af);
+extern int xt_proto_init(struct net *net, u_int8_t af);
+extern void xt_proto_fini(struct net *net, u_int8_t af);
 
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
@@ -423,12 +423,12 @@ struct compat_xt_counters_info
 #define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
 		& ~(__alignof__(struct compat_xt_counters)-1))
 
-extern void xt_compat_lock(int af);
-extern void xt_compat_unlock(int af);
+extern void xt_compat_lock(u_int8_t af);
+extern void xt_compat_unlock(u_int8_t af);
 
-extern int xt_compat_add_offset(int af, unsigned int offset, short delta);
-extern void xt_compat_flush_offsets(int af);
-extern short xt_compat_calc_jump(int af, unsigned int offset);
+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
+extern void xt_compat_flush_offsets(u_int8_t af);
+extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 
 extern int xt_compat_match_offset(const struct xt_match *match);
 extern int xt_compat_match_from_user(struct xt_entry_match *m,
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index a8177121093..05760d6a706 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,7 +20,7 @@
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
-extern unsigned int nf_conntrack_in(int pf,
+extern unsigned int nf_conntrack_in(u_int8_t pf,
 				    unsigned int hooknum,
 				    struct sk_buff *skb);
 
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index dfdf4b45947..4c4d894cb9b 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -86,7 +86,7 @@ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
-void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, int,
+void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
 		       const union nf_inet_addr *,
 		       const union nf_inet_addr *,
 		       u_int8_t, const __be16 *, const __be16 *);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 723df9d1cc3..d4376e97bae 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -39,7 +39,7 @@ struct nf_conntrack_l4proto
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum);
 
 	/* Called when a new connection for this protocol found;
@@ -52,7 +52,7 @@ struct nf_conntrack_l4proto
 
 	int (*error)(struct sk_buff *skb, unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
-		     int pf, unsigned int hooknum);
+		     u_int8_t pf, unsigned int hooknum);
 
 	/* Print out the per-protocol part of the tuple. Return like seq_* */
 	int (*print_tuple)(struct seq_file *s,
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 8c6b5ae4553..7182c06974f 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -28,7 +28,7 @@ struct nf_loginfo {
 	} u;
 };
 
-typedef void nf_logfn(unsigned int pf,
+typedef void nf_logfn(u_int8_t pf,
 		      unsigned int hooknum,
 		      const struct sk_buff *skb,
 		      const struct net_device *in,
@@ -43,12 +43,12 @@ struct nf_logger {
 };
 
 /* Function to register/unregister log function. */
-int nf_log_register(int pf, const struct nf_logger *logger);
+int nf_log_register(u_int8_t pf, const struct nf_logger *logger);
 void nf_log_unregister(const struct nf_logger *logger);
-void nf_log_unregister_pf(int pf);
+void nf_log_unregister_pf(u_int8_t pf);
 
 /* Calls the registered backend logging function */
-void nf_log_packet(int pf,
+void nf_log_packet(u_int8_t pf,
 		   unsigned int hooknum,
 		   const struct sk_buff *skb,
 		   const struct net_device *in,
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index d030044e923..252fd1010b7 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -8,7 +8,7 @@ struct nf_queue_entry {
 	unsigned int		id;
 
 	struct nf_hook_ops	*elem;
-	int			pf;
+	u_int8_t		pf;
 	unsigned int		hook;
 	struct net_device	*indev;
 	struct net_device	*outdev;
@@ -24,9 +24,9 @@ struct nf_queue_handler {
 	char			*name;
 };
 
-extern int nf_register_queue_handler(int pf,
+extern int nf_register_queue_handler(u_int8_t pf,
 				     const struct nf_queue_handler *qh);
-extern int nf_unregister_queue_handler(int pf,
+extern int nf_unregister_queue_handler(u_int8_t pf,
 				       const struct nf_queue_handler *qh);
 extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6a9a6cd74b1..a4abed5b4c4 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -657,7 +657,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 {
 	struct nf_bridge_info *nf_bridge;
 	struct net_device *parent;
-	int pf;
+	u_int8_t pf;
 
 	if (!skb->nf_bridge)
 		return NF_ACCEPT;
@@ -791,7 +791,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct net_device *realoutdev = bridge_parent(skb->dev);
-	int pf;
+	u_int8_t pf;
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	/* Be very paranoid. This probably won't happen anymore, but let's
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 2f430d4ae91..3770cd8a7b3 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -84,7 +84,7 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
 
 #define myNIPQUAD(a) a[0], a[1], a[2], a[3]
 static void
-ebt_log_packet(unsigned int pf, unsigned int hooknum,
+ebt_log_packet(u_int8_t pf, unsigned int hooknum,
    const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, const struct nf_loginfo *loginfo,
    const char *prefix)
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 2d4c9ef909f..c84bda61afb 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -223,7 +223,7 @@ alloc_failure:
 }
 
 /* this function is registered with the netfilter core */
-static void ebt_log_packet(unsigned int pf, unsigned int hooknum,
+static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
    const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, const struct nf_loginfo *li,
    const char *prefix)
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 0af14137137..9330ba3577e 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = {
 };
 
 static void
-ipt_log_packet(unsigned int pf,
+ipt_log_packet(u_int8_t pf,
 	       unsigned int hooknum,
 	       const struct sk_buff *skb,
 	       const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b192756c6d0..d8241e6b077 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -292,7 +292,7 @@ ulog_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static void ipt_logfn(unsigned int pf,
+static void ipt_logfn(u_int8_t pf,
 		      unsigned int hooknum,
 		      const struct sk_buff *skb,
 		      const struct net_device *in,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 97791048fa9..da8edcdaef3 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       int pf,
+		       u_int8_t pf,
 		       unsigned int hooknum)
 {
 	/* Try to delete connection immediately after all replies:
@@ -173,7 +173,7 @@ icmp_error_message(struct sk_buff *skb,
 /* Small and modified version of icmp_rcv */
 static int
 icmp_error(struct sk_buff *skb, unsigned int dataoff,
-	   enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+	   enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmphdr *icmph;
 	struct icmphdr _ih;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 3a2316974f8..0716f8afa0b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -385,7 +385,7 @@ static struct nf_loginfo default_loginfo = {
 };
 
 static void
-ip6t_log_packet(unsigned int pf,
+ip6t_log_packet(u_int8_t pf,
 		unsigned int hooknum,
 		const struct sk_buff *skb,
 		const struct net_device *in,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 14d47d83354..5756f30ebc6 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -81,7 +81,7 @@ static int icmpv6_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       int pf,
+		       u_int8_t pf,
 		       unsigned int hooknum)
 {
 	/* Try to delete connection immediately after all replies:
@@ -173,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb,
 
 static int
 icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
-	     enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+	     enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmp6hdr *icmp6h;
 	struct icmp6hdr _ih;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 292fa28146f..26b8f489d7a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -113,7 +113,7 @@ EXPORT_SYMBOL(nf_unregister_hooks);
 
 unsigned int nf_iterate(struct list_head *head,
 			struct sk_buff *skb,
-			int hook,
+			unsigned int hook,
 			const struct net_device *indev,
 			const struct net_device *outdev,
 			struct list_head **i,
@@ -155,7 +155,7 @@ unsigned int nf_iterate(struct list_head *head,
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev,
 		 struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *),
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9d1830da8e8..6aaf64b5ded 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -665,7 +665,7 @@ resolve_normal_ct(struct sk_buff *skb,
 }
 
 unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
+nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -683,7 +683,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
-	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
+	l3proto = __nf_ct_l3proto_find(pf);
 	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
 	if (ret <= 0) {
@@ -693,7 +693,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 		return -ret;
 	}
 
-	l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
+	l4proto = __nf_ct_l4proto_find(pf, protonum);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e8f0dead267..990fa12f2ee 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -241,7 +241,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 
 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
-		       int family,
+		       u_int8_t family,
 		       const union nf_inet_addr *saddr,
 		       const union nf_inet_addr *daddr,
 		       u_int8_t proto, const __be16 *src, const __be16 *dst)
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 2f83c158934..5dc0478108a 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -709,7 +709,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 /* If the calling party is on the same side of the forward-to party,
  * we don't need to track the second call */
 static int callforward_do_filter(const union nf_inet_addr *src,
-                                 const union nf_inet_addr *dst, int family)
+				 const union nf_inet_addr *dst,
+				 u_int8_t family)
 {
 	const struct nf_afinfo *afinfo;
 	struct flowi fl1, fl2;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e7866dd3cde..edc30358dc1 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -461,7 +461,7 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
 
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       int pf, unsigned int hooknum)
+		       u_int8_t pf, unsigned int hooknum)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
@@ -546,7 +546,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
-		      enum ip_conntrack_info *ctinfo, int pf,
+		      enum ip_conntrack_info *ctinfo, u_int8_t pf,
 		      unsigned int hooknum)
 {
 	struct dccp_hdr _dh, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index e31b0e7bd0b..dbe680af85d 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static int packet(struct nf_conn *ct,
 		  const struct sk_buff *skb,
 		  unsigned int dataoff,
 		  enum ip_conntrack_info ctinfo,
-		  int pf,
+		  u_int8_t pf,
 		  unsigned int hooknum)
 {
 	nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9bd03967fea..c5a78220fa3 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -219,7 +219,7 @@ static int gre_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 30aa5b94a77..b5a90596d3f 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -287,7 +287,7 @@ static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       int pf,
+		       u_int8_t pf,
 		       unsigned int hooknum)
 {
 	enum sctp_conntrack new_state, old_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6f61261888e..539a8202025 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -486,7 +486,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
 			  const struct tcphdr *tcph,
-			  int pf)
+			  u_int8_t pf)
 {
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
@@ -749,7 +749,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 static int tcp_error(struct sk_buff *skb,
 		     unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
-		     int pf,
+		     u_int8_t pf,
 		     unsigned int hooknum)
 {
 	const struct tcphdr *th;
@@ -804,7 +804,7 @@ static int tcp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
 	struct nf_conntrack_tuple *tuple;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8b21762e65d..2a965c4a0ea 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -66,7 +66,7 @@ static int udp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      int pf,
+		      u_int8_t pf,
 		      unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -91,7 +91,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
-		     int pf,
+		     u_int8_t pf,
 		     unsigned int hooknum)
 {
 	unsigned int udplen = skb->len - dataoff;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1fa62f3c24f..4fb6c8d83a8 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -65,7 +65,7 @@ static int udplite_packet(struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
 			  enum ip_conntrack_info ctinfo,
-			  int pf,
+			  u_int8_t pf,
 			  unsigned int hooknum)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -91,7 +91,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
 			 enum ip_conntrack_info *ctinfo,
-			 int pf,
+			 u_int8_t pf,
 			 unsigned int hooknum)
 {
 	unsigned int udplen = skb->len - dataoff;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 196269c1e58..bf6609978af 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -15,7 +15,7 @@
 /* core.c */
 extern unsigned int nf_iterate(struct list_head *head,
 				struct sk_buff *skb,
-				int hook,
+				unsigned int hook,
 				const struct net_device *indev,
 				const struct net_device *outdev,
 				struct list_head **i,
@@ -25,7 +25,7 @@ extern unsigned int nf_iterate(struct list_head *head,
 /* nf_queue.c */
 extern int nf_queue(struct sk_buff *skb,
 		    struct list_head *elem,
-		    int pf, unsigned int hook,
+		    u_int8_t pf, unsigned int hook,
 		    struct net_device *indev,
 		    struct net_device *outdev,
 		    int (*okfn)(struct sk_buff *),
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9fda6ee95a3..5c2f7332015 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -20,7 +20,7 @@ static DEFINE_MUTEX(nf_log_mutex);
 
 /* return EBUSY if somebody else is registered, EEXIST if the same logger
  * is registred, 0 on success. */
-int nf_log_register(int pf, const struct nf_logger *logger)
+int nf_log_register(u_int8_t pf, const struct nf_logger *logger)
 {
 	int ret;
 
@@ -45,7 +45,7 @@ int nf_log_register(int pf, const struct nf_logger *logger)
 }
 EXPORT_SYMBOL(nf_log_register);
 
-void nf_log_unregister_pf(int pf)
+void nf_log_unregister_pf(u_int8_t pf)
 {
 	if (pf >= NPROTO)
 		return;
@@ -73,7 +73,7 @@ void nf_log_unregister(const struct nf_logger *logger)
 }
 EXPORT_SYMBOL(nf_log_unregister);
 
-void nf_log_packet(int pf,
+void nf_log_packet(u_int8_t pf,
 		   unsigned int hooknum,
 		   const struct sk_buff *skb,
 		   const struct net_device *in,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 582ec3efc8a..f285086f629 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -22,7 +22,7 @@ static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	int ret;
 
@@ -45,7 +45,7 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	if (pf >= NPROTO)
 		return -EINVAL;
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
 
 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 {
-	int pf;
+	u_int8_t pf;
 
 	mutex_lock(&queue_handler_mutex);
 	for (pf = 0; pf < NPROTO; pf++)  {
@@ -107,7 +107,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
  */
 static int __nf_queue(struct sk_buff *skb,
 		      struct list_head *elem,
-		      int pf, unsigned int hook,
+		      u_int8_t pf, unsigned int hook,
 		      struct net_device *indev,
 		      struct net_device *outdev,
 		      int (*okfn)(struct sk_buff *),
@@ -191,7 +191,7 @@ err:
 
 int nf_queue(struct sk_buff *skb,
 	     struct list_head *elem,
-	     int pf, unsigned int hook,
+	     u_int8_t pf, unsigned int hook,
 	     struct net_device *indev,
 	     struct net_device *outdev,
 	     int (*okfn)(struct sk_buff *),
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 01489681fa9..f9b46de6a3d 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -60,7 +60,7 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
 }
 EXPORT_SYMBOL(nf_unregister_sockopt);
 
-static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf,
+static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
 		int val, int get)
 {
 	struct nf_sockopt_ops *ops;
@@ -96,7 +96,7 @@ out:
 }
 
 /* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val,
+static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 		      char __user *opt, int *len, int get)
 {
 	struct nf_sockopt_ops *ops;
@@ -115,21 +115,22 @@ static int nf_sockopt(struct sock *sk, int pf, int val,
 	return ret;
 }
 
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
 		  int len)
 {
 	return nf_sockopt(sk, pf, val, opt, &len, 0);
 }
 EXPORT_SYMBOL(nf_setsockopt);
 
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+		  int *len)
 {
 	return nf_sockopt(sk, pf, val, opt, len, 1);
 }
 EXPORT_SYMBOL(nf_getsockopt);
 
 #ifdef CONFIG_COMPAT
-static int compat_nf_sockopt(struct sock *sk, int pf, int val,
+static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 			     char __user *opt, int *len, int get)
 {
 	struct nf_sockopt_ops *ops;
@@ -155,14 +156,14 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val,
 	return ret;
 }
 
-int compat_nf_setsockopt(struct sock *sk, int pf,
+int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
 		int val, char __user *opt, int len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
 }
 EXPORT_SYMBOL(compat_nf_setsockopt);
 
-int compat_nf_getsockopt(struct sock *sk, int pf,
+int compat_nf_getsockopt(struct sock *sk, u_int8_t pf,
 		int val, char __user *opt, int *len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, len, 1);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9a35b57ab76..41e0105d382 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -359,7 +359,7 @@ static inline int
 __build_packet_message(struct nfulnl_instance *inst,
 			const struct sk_buff *skb,
 			unsigned int data_len,
-			unsigned int pf,
+			u_int8_t pf,
 			unsigned int hooknum,
 			const struct net_device *indev,
 			const struct net_device *outdev,
@@ -534,7 +534,7 @@ static struct nf_loginfo default_loginfo = {
 
 /* log handler for internal netfilter logging api */
 static void
-nfulnl_log_packet(unsigned int pf,
+nfulnl_log_packet(u_int8_t pf,
 		  unsigned int hooknum,
 		  const struct sk_buff *skb,
 		  const struct net_device *in,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 5d75cd86ebb..cf2f3e90cef 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -68,7 +68,8 @@ static const char *const xt_prefix[NPROTO] = {
 int
 xt_register_target(struct xt_target *target)
 {
-	int ret, af = target->family;
+	u_int8_t af = target->family;
+	int ret;
 
 	ret = mutex_lock_interruptible(&xt[af].mutex);
 	if (ret != 0)
@@ -82,7 +83,7 @@ EXPORT_SYMBOL(xt_register_target);
 void
 xt_unregister_target(struct xt_target *target)
 {
-	int af = target->family;
+	u_int8_t af = target->family;
 
 	mutex_lock(&xt[af].mutex);
 	list_del(&target->list);
@@ -123,7 +124,8 @@ EXPORT_SYMBOL(xt_unregister_targets);
 int
 xt_register_match(struct xt_match *match)
 {
-	int ret, af = match->family;
+	u_int8_t af = match->family;
+	int ret;
 
 	ret = mutex_lock_interruptible(&xt[af].mutex);
 	if (ret != 0)
@@ -139,7 +141,7 @@ EXPORT_SYMBOL(xt_register_match);
 void
 xt_unregister_match(struct xt_match *match)
 {
-	int af =  match->family;
+	u_int8_t af = match->family;
 
 	mutex_lock(&xt[af].mutex);
 	list_del(&match->list);
@@ -185,7 +187,7 @@ EXPORT_SYMBOL(xt_unregister_matches);
  */
 
 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
-struct xt_match *xt_find_match(int af, const char *name, u8 revision)
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 {
 	struct xt_match *m;
 	int err = 0;
@@ -210,7 +212,7 @@ struct xt_match *xt_find_match(int af, const char *name, u8 revision)
 EXPORT_SYMBOL(xt_find_match);
 
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
-struct xt_target *xt_find_target(int af, const char *name, u8 revision)
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *t;
 	int err = 0;
@@ -234,7 +236,7 @@ struct xt_target *xt_find_target(int af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL(xt_find_target);
 
-struct xt_target *xt_request_find_target(int af, const char *name, u8 revision)
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *target;
 
@@ -246,7 +248,7 @@ struct xt_target *xt_request_find_target(int af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
 
-static int match_revfn(int af, const char *name, u8 revision, int *bestp)
+static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
 {
 	const struct xt_match *m;
 	int have_rev = 0;
@@ -262,7 +264,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp)
 	return have_rev;
 }
 
-static int target_revfn(int af, const char *name, u8 revision, int *bestp)
+static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
 {
 	const struct xt_target *t;
 	int have_rev = 0;
@@ -279,7 +281,7 @@ static int target_revfn(int af, const char *name, u8 revision, int *bestp)
 }
 
 /* Returns true or false (if no such extension at all) */
-int xt_find_revision(int af, const char *name, u8 revision, int target,
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 		     int *err)
 {
 	int have_rev, best = -1;
@@ -337,7 +339,7 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(int af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
 {
 	struct compat_delta *tmp;
 
@@ -359,7 +361,7 @@ int xt_compat_add_offset(int af, unsigned int offset, short delta)
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
-void xt_compat_flush_offsets(int af)
+void xt_compat_flush_offsets(u_int8_t af)
 {
 	struct compat_delta *tmp, *next;
 
@@ -373,7 +375,7 @@ void xt_compat_flush_offsets(int af)
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(int af, unsigned int offset)
+short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
 	struct compat_delta *tmp;
 	short delta;
@@ -590,7 +592,8 @@ void xt_free_table_info(struct xt_table_info *info)
 EXPORT_SYMBOL(xt_free_table_info);
 
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
-struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name)
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name)
 {
 	struct xt_table *t;
 
@@ -612,13 +615,13 @@ void xt_table_unlock(struct xt_table *table)
 EXPORT_SYMBOL_GPL(xt_table_unlock);
 
 #ifdef CONFIG_COMPAT
-void xt_compat_lock(int af)
+void xt_compat_lock(u_int8_t af)
 {
 	mutex_lock(&xt[af].compat_mutex);
 }
 EXPORT_SYMBOL_GPL(xt_compat_lock);
 
-void xt_compat_unlock(int af)
+void xt_compat_unlock(u_int8_t af)
 {
 	mutex_unlock(&xt[af].compat_mutex);
 }
@@ -722,13 +725,13 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
 #ifdef CONFIG_PROC_FS
 struct xt_names_priv {
 	struct seq_net_private p;
-	int af;
+	u_int8_t af;
 };
 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	mutex_lock(&xt[af].mutex);
 	return seq_list_start(&net->xt.tables[af], *pos);
@@ -738,7 +741,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	return seq_list_next(v, &net->xt.tables[af], pos);
 }
@@ -746,7 +749,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void xt_table_seq_stop(struct seq_file *seq, void *v)
 {
 	struct xt_names_priv *priv = seq->private;
-	int af = priv->af;
+	u_int8_t af = priv->af;
 
 	mutex_unlock(&xt[af].mutex);
 }
@@ -922,7 +925,7 @@ static const struct file_operations xt_target_ops = {
 
 #endif /* CONFIG_PROC_FS */
 
-int xt_proto_init(struct net *net, int af)
+int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
@@ -974,7 +977,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(xt_proto_init);
 
-void xt_proto_fini(struct net *net, int af)
+void xt_proto_fini(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 70907f6baac..1655e2cf25c 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -82,7 +82,7 @@ static inline bool already_closed(const struct nf_conn *conn)
 static inline unsigned int
 same_source_net(const union nf_inet_addr *addr,
 		const union nf_inet_addr *mask,
-		const union nf_inet_addr *u3, unsigned int family)
+		const union nf_inet_addr *u3, u_int8_t family)
 {
 	if (family == AF_INET) {
 		return (addr->ip & mask->ip) == (u3->ip & mask->ip);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index d61412f58ef..28a42a3fbff 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -133,7 +133,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr,
 static inline bool
 conntrack_mt_origsrc(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
 	       &info->origsrc_addr, &info->origsrc_mask, family);
@@ -142,7 +142,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_origdst(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
 	       &info->origdst_addr, &info->origdst_mask, family);
@@ -151,7 +151,7 @@ conntrack_mt_origdst(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_replsrc(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
 	       &info->replsrc_addr, &info->replsrc_mask, family);
@@ -160,7 +160,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct,
 static inline bool
 conntrack_mt_repldst(const struct nf_conn *ct,
                      const struct xt_conntrack_mtinfo1 *info,
-                     unsigned int family)
+		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
 	       &info->repldst_addr, &info->repldst_mask, family);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d9418a26781..0c9268fd2e1 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -80,7 +80,7 @@ struct dsthash_ent {
 struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
 	atomic_t use;
-	int family;
+	u_int8_t family;
 
 	struct hashlimit_cfg1 cfg;	/* config */
 
@@ -185,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
+static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
@@ -258,8 +258,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family)
 	return 0;
 }
 
-static int htable_create(struct xt_hashlimit_mtinfo1 *minfo,
-                         unsigned int family)
+static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
@@ -378,7 +377,7 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 }
 
 static struct xt_hashlimit_htable *htable_find_get(const char *name,
-						   int family)
+						   u_int8_t family)
 {
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
@@ -901,7 +900,7 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 	spin_unlock_bh(&htable->lock);
 }
 
-static int dl_seq_real_show(struct dsthash_ent *ent, int family,
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				   struct seq_file *s)
 {
 	/* recalculate to show accurate numbers */
-- 
cgit v1.2.3-70-g09d2


From e948b20a71a06a740c925d6ea22b59b4e17cfa0c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Wed, 8 Oct 2008 11:35:00 +0200
Subject: netfilter: rename ipt_recent to xt_recent

Like with other modules (such as ipt_state), ipt_recent.h is changed
to forward definitions to (IOW include) xt_recent.h, and xt_recent.c
is changed to use the new constant names.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild            |   1 +
 include/linux/netfilter/xt_recent.h       |  26 ++
 include/linux/netfilter_ipv4/ipt_recent.h |  28 +-
 net/ipv4/netfilter/Kconfig                |  13 -
 net/ipv4/netfilter/Makefile               |   1 -
 net/ipv4/netfilter/ipt_recent.c           | 501 -----------------------------
 net/netfilter/Kconfig                     |  11 +
 net/netfilter/Makefile                    |   1 +
 net/netfilter/xt_recent.c                 | 502 ++++++++++++++++++++++++++++++
 9 files changed, 552 insertions(+), 532 deletions(-)
 create mode 100644 include/linux/netfilter/xt_recent.h
 delete mode 100644 net/ipv4/netfilter/ipt_recent.c
 create mode 100644 net/netfilter/xt_recent.c

(limited to 'net')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 3aff513d12c..5a8af875bce 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -32,6 +32,7 @@ header-y += xt_owner.h
 header-y += xt_pkttype.h
 header-y += xt_rateest.h
 header-y += xt_realm.h
+header-y += xt_recent.h
 header-y += xt_sctp.h
 header-y += xt_state.h
 header-y += xt_statistic.h
diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
new file mode 100644
index 00000000000..5cfeb81c679
--- /dev/null
+++ b/include/linux/netfilter/xt_recent.h
@@ -0,0 +1,26 @@
+#ifndef _LINUX_NETFILTER_XT_RECENT_H
+#define _LINUX_NETFILTER_XT_RECENT_H 1
+
+enum {
+	XT_RECENT_CHECK    = 1 << 0,
+	XT_RECENT_SET      = 1 << 1,
+	XT_RECENT_UPDATE   = 1 << 2,
+	XT_RECENT_REMOVE   = 1 << 3,
+	XT_RECENT_TTL      = 1 << 4,
+
+	XT_RECENT_SOURCE   = 0,
+	XT_RECENT_DEST     = 1,
+
+	XT_RECENT_NAME_LEN = 200,
+};
+
+struct xt_recent_mtinfo {
+	u_int32_t seconds;
+	u_int32_t hit_count;
+	u_int8_t check_set;
+	u_int8_t invert;
+	char name[XT_RECENT_NAME_LEN];
+	u_int8_t side;
+};
+
+#endif /* _LINUX_NETFILTER_XT_RECENT_H */
diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h
index 6508a459265..d636cca133c 100644
--- a/include/linux/netfilter_ipv4/ipt_recent.h
+++ b/include/linux/netfilter_ipv4/ipt_recent.h
@@ -1,27 +1,21 @@
 #ifndef _IPT_RECENT_H
 #define _IPT_RECENT_H
 
-#define RECENT_NAME	"ipt_recent"
-#define RECENT_VER	"v0.3.1"
+#include <linux/netfilter/xt_recent.h>
 
-#define IPT_RECENT_CHECK  1
-#define IPT_RECENT_SET    2
-#define IPT_RECENT_UPDATE 4
-#define IPT_RECENT_REMOVE 8
-#define IPT_RECENT_TTL   16
+#define ipt_recent_info xt_recent_mtinfo
 
-#define IPT_RECENT_SOURCE 0
-#define IPT_RECENT_DEST   1
+enum {
+	IPT_RECENT_CHECK    = XT_RECENT_CHECK,
+	IPT_RECENT_SET      = XT_RECENT_SET,
+	IPT_RECENT_UPDATE   = XT_RECENT_UPDATE,
+	IPT_RECENT_REMOVE   = XT_RECENT_REMOVE,
+	IPT_RECENT_TTL      = XT_RECENT_TTL,
 
-#define IPT_RECENT_NAME_LEN 200
+	IPT_RECENT_SOURCE   = XT_RECENT_SOURCE,
+	IPT_RECENT_DEST     = XT_RECENT_DEST,
 
-struct ipt_recent_info {
-	u_int32_t   seconds;
-	u_int32_t   hit_count;
-	u_int8_t    check_set;
-	u_int8_t    invert;
-	char        name[IPT_RECENT_NAME_LEN];
-	u_int8_t    side;
+	IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN,
 };
 
 #endif /*_IPT_RECENT_H*/
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 90eb7cb47e7..4e842d56642 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -57,19 +57,6 @@ config IP_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The matches.
-config IP_NF_MATCH_RECENT
-	tristate '"recent" match support'
-	depends on IP_NF_IPTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  This match is used for creating one or many lists of recently
-	  used addresses and then matching against that/those list(s).
-
-	  Short options are available by using 'iptables -m recent -h'
-	  Official Website: <http://snowman.net/projects/ipt_recent/>
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_ECN
 	tristate '"ecn" match support'
 	depends on IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3f31291f37c..1107edbe478 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 
 # targets
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
deleted file mode 100644
index 3974d7cae5c..00000000000
--- a/net/ipv4/netfilter/ipt_recent.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This is a replacement of the old ipt_recent module, which carried the
- * following copyright notice:
- *
- * Author: Stephen Frost <sfrost@snowman.net>
- * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
- */
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/moduleparam.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/list.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/bitops.h>
-#include <linux/skbuff.h>
-#include <linux/inet.h>
-#include <net/net_namespace.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_recent.h>
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
-MODULE_LICENSE("GPL");
-
-static unsigned int ip_list_tot = 100;
-static unsigned int ip_pkt_list_tot = 20;
-static unsigned int ip_list_hash_size = 0;
-static unsigned int ip_list_perms = 0644;
-static unsigned int ip_list_uid = 0;
-static unsigned int ip_list_gid = 0;
-module_param(ip_list_tot, uint, 0400);
-module_param(ip_pkt_list_tot, uint, 0400);
-module_param(ip_list_hash_size, uint, 0400);
-module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, 0400);
-module_param(ip_list_gid, uint, 0400);
-MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
-MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
-MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
-
-struct recent_entry {
-	struct list_head	list;
-	struct list_head	lru_list;
-	__be32			addr;
-	u_int8_t		ttl;
-	u_int8_t		index;
-	u_int16_t		nstamps;
-	unsigned long		stamps[0];
-};
-
-struct recent_table {
-	struct list_head	list;
-	char			name[IPT_RECENT_NAME_LEN];
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry	*proc;
-#endif
-	unsigned int		refcnt;
-	unsigned int		entries;
-	struct list_head	lru_list;
-	struct list_head	iphash[0];
-};
-
-static LIST_HEAD(tables);
-static DEFINE_SPINLOCK(recent_lock);
-static DEFINE_MUTEX(recent_mutex);
-
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry	*proc_dir;
-static const struct file_operations	recent_fops;
-#endif
-
-static u_int32_t hash_rnd;
-static int hash_rnd_initted;
-
-static unsigned int recent_entry_hash(__be32 addr)
-{
-	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, 4);
-		hash_rnd_initted = 1;
-	}
-	return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
-}
-
-static struct recent_entry *
-recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
-{
-	struct recent_entry *e;
-	unsigned int h;
-
-	h = recent_entry_hash(addr);
-	list_for_each_entry(e, &table->iphash[h], list)
-		if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl))
-			return e;
-	return NULL;
-}
-
-static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
-{
-	list_del(&e->list);
-	list_del(&e->lru_list);
-	kfree(e);
-	t->entries--;
-}
-
-static struct recent_entry *
-recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
-{
-	struct recent_entry *e;
-
-	if (t->entries >= ip_list_tot) {
-		e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
-		recent_entry_remove(t, e);
-	}
-	e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
-		    GFP_ATOMIC);
-	if (e == NULL)
-		return NULL;
-	e->addr      = addr;
-	e->ttl       = ttl;
-	e->stamps[0] = jiffies;
-	e->nstamps   = 1;
-	e->index     = 1;
-	list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]);
-	list_add_tail(&e->lru_list, &t->lru_list);
-	t->entries++;
-	return e;
-}
-
-static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
-{
-	e->stamps[e->index++] = jiffies;
-	if (e->index > e->nstamps)
-		e->nstamps = e->index;
-	e->index %= ip_pkt_list_tot;
-	list_move_tail(&e->lru_list, &t->lru_list);
-}
-
-static struct recent_table *recent_table_lookup(const char *name)
-{
-	struct recent_table *t;
-
-	list_for_each_entry(t, &tables, list)
-		if (!strcmp(t->name, name))
-			return t;
-	return NULL;
-}
-
-static void recent_table_flush(struct recent_table *t)
-{
-	struct recent_entry *e, *next;
-	unsigned int i;
-
-	for (i = 0; i < ip_list_hash_size; i++)
-		list_for_each_entry_safe(e, next, &t->iphash[i], list)
-			recent_entry_remove(t, e);
-}
-
-static bool
-recent_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
-{
-	const struct ipt_recent_info *info = matchinfo;
-	struct recent_table *t;
-	struct recent_entry *e;
-	__be32 addr;
-	u_int8_t ttl;
-	bool ret = info->invert;
-
-	if (info->side == IPT_RECENT_DEST)
-		addr = ip_hdr(skb)->daddr;
-	else
-		addr = ip_hdr(skb)->saddr;
-
-	ttl = ip_hdr(skb)->ttl;
-	/* use TTL as seen before forwarding */
-	if (out && !skb->sk)
-		ttl++;
-
-	spin_lock_bh(&recent_lock);
-	t = recent_table_lookup(info->name);
-	e = recent_entry_lookup(t, addr,
-				info->check_set & IPT_RECENT_TTL ? ttl : 0);
-	if (e == NULL) {
-		if (!(info->check_set & IPT_RECENT_SET))
-			goto out;
-		e = recent_entry_init(t, addr, ttl);
-		if (e == NULL)
-			*hotdrop = true;
-		ret = !ret;
-		goto out;
-	}
-
-	if (info->check_set & IPT_RECENT_SET)
-		ret = !ret;
-	else if (info->check_set & IPT_RECENT_REMOVE) {
-		recent_entry_remove(t, e);
-		ret = !ret;
-	} else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
-		unsigned long time = jiffies - info->seconds * HZ;
-		unsigned int i, hits = 0;
-
-		for (i = 0; i < e->nstamps; i++) {
-			if (info->seconds && time_after(time, e->stamps[i]))
-				continue;
-			if (++hits >= info->hit_count) {
-				ret = !ret;
-				break;
-			}
-		}
-	}
-
-	if (info->check_set & IPT_RECENT_SET ||
-	    (info->check_set & IPT_RECENT_UPDATE && ret)) {
-		recent_entry_update(t, e);
-		e->ttl = ttl;
-	}
-out:
-	spin_unlock_bh(&recent_lock);
-	return ret;
-}
-
-static bool
-recent_mt_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
-{
-	const struct ipt_recent_info *info = matchinfo;
-	struct recent_table *t;
-	unsigned i;
-	bool ret = false;
-
-	if (hweight8(info->check_set &
-		     (IPT_RECENT_SET | IPT_RECENT_REMOVE |
-		      IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1)
-		return false;
-	if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) &&
-	    (info->seconds || info->hit_count))
-		return false;
-	if (info->hit_count > ip_pkt_list_tot)
-		return false;
-	if (info->name[0] == '\0' ||
-	    strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN)
-		return false;
-
-	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
-	if (t != NULL) {
-		t->refcnt++;
-		ret = true;
-		goto out;
-	}
-
-	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
-		    GFP_KERNEL);
-	if (t == NULL)
-		goto out;
-	t->refcnt = 1;
-	strcpy(t->name, info->name);
-	INIT_LIST_HEAD(&t->lru_list);
-	for (i = 0; i < ip_list_hash_size; i++)
-		INIT_LIST_HEAD(&t->iphash[i]);
-#ifdef CONFIG_PROC_FS
-	t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops);
-	if (t->proc == NULL) {
-		kfree(t);
-		goto out;
-	}
-	t->proc->uid       = ip_list_uid;
-	t->proc->gid       = ip_list_gid;
-	t->proc->data      = t;
-#endif
-	spin_lock_bh(&recent_lock);
-	list_add_tail(&t->list, &tables);
-	spin_unlock_bh(&recent_lock);
-	ret = true;
-out:
-	mutex_unlock(&recent_mutex);
-	return ret;
-}
-
-static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
-{
-	const struct ipt_recent_info *info = matchinfo;
-	struct recent_table *t;
-
-	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
-	if (--t->refcnt == 0) {
-		spin_lock_bh(&recent_lock);
-		list_del(&t->list);
-		spin_unlock_bh(&recent_lock);
-#ifdef CONFIG_PROC_FS
-		remove_proc_entry(t->name, proc_dir);
-#endif
-		recent_table_flush(t);
-		kfree(t);
-	}
-	mutex_unlock(&recent_mutex);
-}
-
-#ifdef CONFIG_PROC_FS
-struct recent_iter_state {
-	struct recent_table	*table;
-	unsigned int		bucket;
-};
-
-static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(recent_lock)
-{
-	struct recent_iter_state *st = seq->private;
-	const struct recent_table *t = st->table;
-	struct recent_entry *e;
-	loff_t p = *pos;
-
-	spin_lock_bh(&recent_lock);
-
-	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
-		list_for_each_entry(e, &t->iphash[st->bucket], list)
-			if (p-- == 0)
-				return e;
-	return NULL;
-}
-
-static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct recent_iter_state *st = seq->private;
-	const struct recent_table *t = st->table;
-	struct recent_entry *e = v;
-	struct list_head *head = e->list.next;
-
-	while (head == &t->iphash[st->bucket]) {
-		if (++st->bucket >= ip_list_hash_size)
-			return NULL;
-		head = t->iphash[st->bucket].next;
-	}
-	(*pos)++;
-	return list_entry(head, struct recent_entry, list);
-}
-
-static void recent_seq_stop(struct seq_file *s, void *v)
-	__releases(recent_lock)
-{
-	spin_unlock_bh(&recent_lock);
-}
-
-static int recent_seq_show(struct seq_file *seq, void *v)
-{
-	const struct recent_entry *e = v;
-	unsigned int i;
-
-	i = (e->index - 1) % ip_pkt_list_tot;
-	seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u",
-		   NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index);
-	for (i = 0; i < e->nstamps; i++)
-		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
-	seq_printf(seq, "\n");
-	return 0;
-}
-
-static const struct seq_operations recent_seq_ops = {
-	.start		= recent_seq_start,
-	.next		= recent_seq_next,
-	.stop		= recent_seq_stop,
-	.show		= recent_seq_show,
-};
-
-static int recent_seq_open(struct inode *inode, struct file *file)
-{
-	struct proc_dir_entry *pde = PDE(inode);
-	struct recent_iter_state *st;
-
-	st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
-	if (st == NULL)
-		return -ENOMEM;
-
-	st->table    = pde->data;
-	return 0;
-}
-
-static ssize_t recent_proc_write(struct file *file, const char __user *input,
-				 size_t size, loff_t *loff)
-{
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	struct recent_table *t = pde->data;
-	struct recent_entry *e;
-	char buf[sizeof("+255.255.255.255")], *c = buf;
-	__be32 addr;
-	int add;
-
-	if (size > sizeof(buf))
-		size = sizeof(buf);
-	if (copy_from_user(buf, input, size))
-		return -EFAULT;
-	while (isspace(*c))
-		c++;
-
-	if (size - (c - buf) < 5)
-		return c - buf;
-	if (!strncmp(c, "clear", 5)) {
-		c += 5;
-		spin_lock_bh(&recent_lock);
-		recent_table_flush(t);
-		spin_unlock_bh(&recent_lock);
-		return c - buf;
-	}
-
-	switch (*c) {
-	case '-':
-		add = 0;
-		c++;
-		break;
-	case '+':
-		c++;
-	default:
-		add = 1;
-		break;
-	}
-	addr = in_aton(c);
-
-	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, addr, 0);
-	if (e == NULL) {
-		if (add)
-			recent_entry_init(t, addr, 0);
-	} else {
-		if (add)
-			recent_entry_update(t, e);
-		else
-			recent_entry_remove(t, e);
-	}
-	spin_unlock_bh(&recent_lock);
-	return size;
-}
-
-static const struct file_operations recent_fops = {
-	.open		= recent_seq_open,
-	.read		= seq_read,
-	.write		= recent_proc_write,
-	.release	= seq_release_private,
-	.owner		= THIS_MODULE,
-};
-#endif /* CONFIG_PROC_FS */
-
-static struct xt_match recent_mt_reg __read_mostly = {
-	.name		= "recent",
-	.family		= AF_INET,
-	.match		= recent_mt,
-	.matchsize	= sizeof(struct ipt_recent_info),
-	.checkentry	= recent_mt_check,
-	.destroy	= recent_mt_destroy,
-	.me		= THIS_MODULE,
-};
-
-static int __init recent_mt_init(void)
-{
-	int err;
-
-	if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
-		return -EINVAL;
-	ip_list_hash_size = 1 << fls(ip_list_tot);
-
-	err = xt_register_match(&recent_mt_reg);
-#ifdef CONFIG_PROC_FS
-	if (err)
-		return err;
-	proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
-	if (proc_dir == NULL) {
-		xt_unregister_match(&recent_mt_reg);
-		err = -ENOMEM;
-	}
-#endif
-	return err;
-}
-
-static void __exit recent_mt_exit(void)
-{
-	BUG_ON(!list_empty(&tables));
-	xt_unregister_match(&recent_mt_reg);
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ipt_recent", init_net.proc_net);
-#endif
-}
-
-module_init(recent_mt_init);
-module_exit(recent_mt_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ee898e74808..ccc78b07a1a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -732,6 +732,17 @@ config NETFILTER_XT_MATCH_REALM
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_RECENT
+	tristate '"recent" match support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	---help---
+	This match is used for creating one or many lists of recently
+	used addresses and then matching against that/those list(s).
+
+	Short options are available by using 'iptables -m recent -h'
+	Official Website: <http://snowman.net/projects/ipt_recent/>
+
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
 	depends on NETFILTER_XTABLES && EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3bd2cc556ae..f101cf61e6f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
new file mode 100644
index 00000000000..422c0e4d66b
--- /dev/null
+++ b/net/netfilter/xt_recent.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is a replacement of the old ipt_recent module, which carried the
+ * following copyright notice:
+ *
+ * Author: Stephen Frost <sfrost@snowman.net>
+ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
+ */
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/moduleparam.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_recent.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_recent");
+
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
+MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
+
+struct recent_entry {
+	struct list_head	list;
+	struct list_head	lru_list;
+	__be32			addr;
+	u_int8_t		ttl;
+	u_int8_t		index;
+	u_int16_t		nstamps;
+	unsigned long		stamps[0];
+};
+
+struct recent_table {
+	struct list_head	list;
+	char			name[XT_RECENT_NAME_LEN];
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*proc;
+#endif
+	unsigned int		refcnt;
+	unsigned int		entries;
+	struct list_head	lru_list;
+	struct list_head	iphash[0];
+};
+
+static LIST_HEAD(tables);
+static DEFINE_SPINLOCK(recent_lock);
+static DEFINE_MUTEX(recent_mutex);
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry	*proc_dir;
+static const struct file_operations	recent_fops;
+#endif
+
+static u_int32_t hash_rnd;
+static int hash_rnd_initted;
+
+static unsigned int recent_entry_hash(__be32 addr)
+{
+	if (!hash_rnd_initted) {
+		get_random_bytes(&hash_rnd, 4);
+		hash_rnd_initted = 1;
+	}
+	return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
+}
+
+static struct recent_entry *
+recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
+{
+	struct recent_entry *e;
+	unsigned int h;
+
+	h = recent_entry_hash(addr);
+	list_for_each_entry(e, &table->iphash[h], list)
+		if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl))
+			return e;
+	return NULL;
+}
+
+static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
+{
+	list_del(&e->list);
+	list_del(&e->lru_list);
+	kfree(e);
+	t->entries--;
+}
+
+static struct recent_entry *
+recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
+{
+	struct recent_entry *e;
+
+	if (t->entries >= ip_list_tot) {
+		e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+		recent_entry_remove(t, e);
+	}
+	e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
+		    GFP_ATOMIC);
+	if (e == NULL)
+		return NULL;
+	e->addr      = addr;
+	e->ttl       = ttl;
+	e->stamps[0] = jiffies;
+	e->nstamps   = 1;
+	e->index     = 1;
+	list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]);
+	list_add_tail(&e->lru_list, &t->lru_list);
+	t->entries++;
+	return e;
+}
+
+static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
+{
+	e->stamps[e->index++] = jiffies;
+	if (e->index > e->nstamps)
+		e->nstamps = e->index;
+	e->index %= ip_pkt_list_tot;
+	list_move_tail(&e->lru_list, &t->lru_list);
+}
+
+static struct recent_table *recent_table_lookup(const char *name)
+{
+	struct recent_table *t;
+
+	list_for_each_entry(t, &tables, list)
+		if (!strcmp(t->name, name))
+			return t;
+	return NULL;
+}
+
+static void recent_table_flush(struct recent_table *t)
+{
+	struct recent_entry *e, *next;
+	unsigned int i;
+
+	for (i = 0; i < ip_list_hash_size; i++)
+		list_for_each_entry_safe(e, next, &t->iphash[i], list)
+			recent_entry_remove(t, e);
+}
+
+static bool
+recent_mt(const struct sk_buff *skb, const struct net_device *in,
+          const struct net_device *out, const struct xt_match *match,
+          const void *matchinfo, int offset, unsigned int protoff,
+          bool *hotdrop)
+{
+	const struct xt_recent_mtinfo *info = matchinfo;
+	struct recent_table *t;
+	struct recent_entry *e;
+	__be32 addr;
+	u_int8_t ttl;
+	bool ret = info->invert;
+
+	if (info->side == XT_RECENT_DEST)
+		addr = ip_hdr(skb)->daddr;
+	else
+		addr = ip_hdr(skb)->saddr;
+
+	ttl = ip_hdr(skb)->ttl;
+	/* use TTL as seen before forwarding */
+	if (out && !skb->sk)
+		ttl++;
+
+	spin_lock_bh(&recent_lock);
+	t = recent_table_lookup(info->name);
+	e = recent_entry_lookup(t, addr,
+				info->check_set & XT_RECENT_TTL ? ttl : 0);
+	if (e == NULL) {
+		if (!(info->check_set & XT_RECENT_SET))
+			goto out;
+		e = recent_entry_init(t, addr, ttl);
+		if (e == NULL)
+			*hotdrop = true;
+		ret = !ret;
+		goto out;
+	}
+
+	if (info->check_set & XT_RECENT_SET)
+		ret = !ret;
+	else if (info->check_set & XT_RECENT_REMOVE) {
+		recent_entry_remove(t, e);
+		ret = !ret;
+	} else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) {
+		unsigned long time = jiffies - info->seconds * HZ;
+		unsigned int i, hits = 0;
+
+		for (i = 0; i < e->nstamps; i++) {
+			if (info->seconds && time_after(time, e->stamps[i]))
+				continue;
+			if (++hits >= info->hit_count) {
+				ret = !ret;
+				break;
+			}
+		}
+	}
+
+	if (info->check_set & XT_RECENT_SET ||
+	    (info->check_set & XT_RECENT_UPDATE && ret)) {
+		recent_entry_update(t, e);
+		e->ttl = ttl;
+	}
+out:
+	spin_unlock_bh(&recent_lock);
+	return ret;
+}
+
+static bool
+recent_mt_check(const char *tablename, const void *ip,
+                const struct xt_match *match, void *matchinfo,
+                unsigned int hook_mask)
+{
+	const struct xt_recent_mtinfo *info = matchinfo;
+	struct recent_table *t;
+	unsigned i;
+	bool ret = false;
+
+	if (hweight8(info->check_set &
+		     (XT_RECENT_SET | XT_RECENT_REMOVE |
+		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
+		return false;
+	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
+	    (info->seconds || info->hit_count))
+		return false;
+	if (info->hit_count > ip_pkt_list_tot)
+		return false;
+	if (info->name[0] == '\0' ||
+	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
+		return false;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(info->name);
+	if (t != NULL) {
+		t->refcnt++;
+		ret = true;
+		goto out;
+	}
+
+	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
+		    GFP_KERNEL);
+	if (t == NULL)
+		goto out;
+	t->refcnt = 1;
+	strcpy(t->name, info->name);
+	INIT_LIST_HEAD(&t->lru_list);
+	for (i = 0; i < ip_list_hash_size; i++)
+		INIT_LIST_HEAD(&t->iphash[i]);
+#ifdef CONFIG_PROC_FS
+	t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops);
+	if (t->proc == NULL) {
+		kfree(t);
+		goto out;
+	}
+	t->proc->uid       = ip_list_uid;
+	t->proc->gid       = ip_list_gid;
+	t->proc->data      = t;
+#endif
+	spin_lock_bh(&recent_lock);
+	list_add_tail(&t->list, &tables);
+	spin_unlock_bh(&recent_lock);
+	ret = true;
+out:
+	mutex_unlock(&recent_mutex);
+	return ret;
+}
+
+static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
+{
+	const struct xt_recent_mtinfo *info = matchinfo;
+	struct recent_table *t;
+
+	mutex_lock(&recent_mutex);
+	t = recent_table_lookup(info->name);
+	if (--t->refcnt == 0) {
+		spin_lock_bh(&recent_lock);
+		list_del(&t->list);
+		spin_unlock_bh(&recent_lock);
+#ifdef CONFIG_PROC_FS
+		remove_proc_entry(t->name, proc_dir);
+#endif
+		recent_table_flush(t);
+		kfree(t);
+	}
+	mutex_unlock(&recent_mutex);
+}
+
+#ifdef CONFIG_PROC_FS
+struct recent_iter_state {
+	struct recent_table	*table;
+	unsigned int		bucket;
+};
+
+static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(recent_lock)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	struct recent_entry *e;
+	loff_t p = *pos;
+
+	spin_lock_bh(&recent_lock);
+
+	for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
+		list_for_each_entry(e, &t->iphash[st->bucket], list)
+			if (p-- == 0)
+				return e;
+	return NULL;
+}
+
+static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct recent_iter_state *st = seq->private;
+	const struct recent_table *t = st->table;
+	struct recent_entry *e = v;
+	struct list_head *head = e->list.next;
+
+	while (head == &t->iphash[st->bucket]) {
+		if (++st->bucket >= ip_list_hash_size)
+			return NULL;
+		head = t->iphash[st->bucket].next;
+	}
+	(*pos)++;
+	return list_entry(head, struct recent_entry, list);
+}
+
+static void recent_seq_stop(struct seq_file *s, void *v)
+	__releases(recent_lock)
+{
+	spin_unlock_bh(&recent_lock);
+}
+
+static int recent_seq_show(struct seq_file *seq, void *v)
+{
+	const struct recent_entry *e = v;
+	unsigned int i;
+
+	i = (e->index - 1) % ip_pkt_list_tot;
+	seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u",
+		   NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index);
+	for (i = 0; i < e->nstamps; i++)
+		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
+	seq_printf(seq, "\n");
+	return 0;
+}
+
+static const struct seq_operations recent_seq_ops = {
+	.start		= recent_seq_start,
+	.next		= recent_seq_next,
+	.stop		= recent_seq_stop,
+	.show		= recent_seq_show,
+};
+
+static int recent_seq_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *pde = PDE(inode);
+	struct recent_iter_state *st;
+
+	st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
+	if (st == NULL)
+		return -ENOMEM;
+
+	st->table    = pde->data;
+	return 0;
+}
+
+static ssize_t recent_proc_write(struct file *file, const char __user *input,
+				 size_t size, loff_t *loff)
+{
+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct recent_table *t = pde->data;
+	struct recent_entry *e;
+	char buf[sizeof("+255.255.255.255")], *c = buf;
+	__be32 addr;
+	int add;
+
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size))
+		return -EFAULT;
+	while (isspace(*c))
+		c++;
+
+	if (size - (c - buf) < 5)
+		return c - buf;
+	if (!strncmp(c, "clear", 5)) {
+		c += 5;
+		spin_lock_bh(&recent_lock);
+		recent_table_flush(t);
+		spin_unlock_bh(&recent_lock);
+		return c - buf;
+	}
+
+	switch (*c) {
+	case '-':
+		add = 0;
+		c++;
+		break;
+	case '+':
+		c++;
+	default:
+		add = 1;
+		break;
+	}
+	addr = in_aton(c);
+
+	spin_lock_bh(&recent_lock);
+	e = recent_entry_lookup(t, addr, 0);
+	if (e == NULL) {
+		if (add)
+			recent_entry_init(t, addr, 0);
+	} else {
+		if (add)
+			recent_entry_update(t, e);
+		else
+			recent_entry_remove(t, e);
+	}
+	spin_unlock_bh(&recent_lock);
+	return size;
+}
+
+static const struct file_operations recent_fops = {
+	.open		= recent_seq_open,
+	.read		= seq_read,
+	.write		= recent_proc_write,
+	.release	= seq_release_private,
+	.owner		= THIS_MODULE,
+};
+#endif /* CONFIG_PROC_FS */
+
+static struct xt_match recent_mt_reg __read_mostly = {
+	.name		= "recent",
+	.family		= AF_INET,
+	.match		= recent_mt,
+	.matchsize	= sizeof(struct xt_recent_mtinfo),
+	.checkentry	= recent_mt_check,
+	.destroy	= recent_mt_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init recent_mt_init(void)
+{
+	int err;
+
+	if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
+		return -EINVAL;
+	ip_list_hash_size = 1 << fls(ip_list_tot);
+
+	err = xt_register_match(&recent_mt_reg);
+#ifdef CONFIG_PROC_FS
+	if (err)
+		return err;
+	proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+	if (proc_dir == NULL) {
+		xt_unregister_match(&recent_mt_reg);
+		err = -ENOMEM;
+	}
+#endif
+	return err;
+}
+
+static void __exit recent_mt_exit(void)
+{
+	BUG_ON(!list_empty(&tables));
+	xt_unregister_match(&recent_mt_reg);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ipt_recent", init_net.proc_net);
+#endif
+}
+
+module_init(recent_mt_init);
+module_exit(recent_mt_exit);
-- 
cgit v1.2.3-70-g09d2


From 079aa88fe7172b7650c7cf2c0bc01662bafea236 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:00 +0200
Subject: netfilter: xt_recent: IPv6 support

This updates xt_recent to support the IPv6 address family.
The new /proc/net/xt_recent directory must be used for this.
The old proc interface can also be configured out.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 Documentation/feature-removal-schedule.txt |   3 +
 net/netfilter/Kconfig                      |   7 +
 net/netfilter/xt_recent.c                  | 300 +++++++++++++++++++++++------
 3 files changed, 256 insertions(+), 54 deletions(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d0f22fac55d..3d2d0c29f02 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -250,6 +250,9 @@ What (Why):
 	- xt_mark match revision 0
 	  (superseded by xt_mark match revision 1)
 
+	- xt_recent: the old ipt_recent proc dir
+	  (superseded by /proc/net/xt_recent)
+
 When:	January 2009 or Linux 2.7.0, whichever comes first
 Why:	Superseded by newer revisions or modules
 Who:	Jan Engelhardt <jengelh@computergmbh.de>
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ccc78b07a1a..4a464857f21 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -743,6 +743,13 @@ config NETFILTER_XT_MATCH_RECENT
 	Short options are available by using 'iptables -m recent -h'
 	Official Website: <http://snowman.net/projects/ipt_recent/>
 
+config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	bool 'Enable obsolete /proc/net/ipt_recent'
+	depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
+	---help---
+	This option enables the old /proc/net/ipt_recent interface,
+	which has been obsoleted by /proc/net/xt_recent.
+
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
 	depends on NETFILTER_XTABLES && EXPERIMENTAL
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 422c0e4d66b..adc2e2f1b09 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
+ * Copyright © CC Computer Consultants GmbH, 2007 - 2008
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -13,6 +14,8 @@
  */
 #include <linux/init.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -30,9 +33,11 @@
 #include <linux/netfilter/xt_recent.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_recent");
+MODULE_ALIAS("ip6t_recent");
 
 static unsigned int ip_list_tot = 100;
 static unsigned int ip_pkt_list_tot = 20;
@@ -49,14 +54,15 @@ module_param(ip_list_gid, uint, 0400);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
-MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files");
 
 struct recent_entry {
 	struct list_head	list;
 	struct list_head	lru_list;
-	__be32			addr;
+	union nf_inet_addr	addr;
+	u_int16_t		family;
 	u_int8_t		ttl;
 	u_int8_t		index;
 	u_int16_t		nstamps;
@@ -67,7 +73,7 @@ struct recent_table {
 	struct list_head	list;
 	char			name[XT_RECENT_NAME_LEN];
 #ifdef CONFIG_PROC_FS
-	struct proc_dir_entry	*proc;
+	struct proc_dir_entry	*proc_old, *proc;
 #endif
 	unsigned int		refcnt;
 	unsigned int		entries;
@@ -80,31 +86,53 @@ static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-static struct proc_dir_entry	*proc_dir;
-static const struct file_operations	recent_fops;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static struct proc_dir_entry *proc_old_dir;
+#endif
+static struct proc_dir_entry *recent_proc_dir;
+static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
 static u_int32_t hash_rnd;
-static int hash_rnd_initted;
+static bool hash_rnd_initted;
+
+static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+{
+	if (!hash_rnd_initted) {
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_initted = true;
+	}
+	return jhash_1word((__force u32)addr->ip, hash_rnd) &
+	       (ip_list_hash_size - 1);
+}
 
-static unsigned int recent_entry_hash(__be32 addr)
+static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
 {
 	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, 4);
-		hash_rnd_initted = 1;
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_initted = true;
 	}
-	return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
+	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
+	       (ip_list_hash_size - 1);
 }
 
 static struct recent_entry *
-recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
+recent_entry_lookup(const struct recent_table *table,
+		    const union nf_inet_addr *addrp, u_int16_t family,
+		    u_int8_t ttl)
 {
 	struct recent_entry *e;
 	unsigned int h;
 
-	h = recent_entry_hash(addr);
+	if (family == AF_INET)
+		h = recent_entry_hash4(addrp);
+	else
+		h = recent_entry_hash6(addrp);
+
 	list_for_each_entry(e, &table->iphash[h], list)
-		if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl))
+		if (e->family == family &&
+		    memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 &&
+		    (ttl == e->ttl || ttl == 0 || e->ttl == 0))
 			return e;
 	return NULL;
 }
@@ -118,7 +146,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
 }
 
 static struct recent_entry *
-recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
+recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
+		  u_int16_t family, u_int8_t ttl)
 {
 	struct recent_entry *e;
 
@@ -130,12 +159,16 @@ recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
 		    GFP_ATOMIC);
 	if (e == NULL)
 		return NULL;
-	e->addr      = addr;
+	memcpy(&e->addr, addr, sizeof(e->addr));
 	e->ttl       = ttl;
 	e->stamps[0] = jiffies;
 	e->nstamps   = 1;
 	e->index     = 1;
-	list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]);
+	e->family    = family;
+	if (family == AF_INET)
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
+	else
+		list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
 	list_add_tail(&e->lru_list, &t->lru_list);
 	t->entries++;
 	return e;
@@ -179,28 +212,42 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in,
 	const struct xt_recent_mtinfo *info = matchinfo;
 	struct recent_table *t;
 	struct recent_entry *e;
-	__be32 addr;
+	union nf_inet_addr addr = {};
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (info->side == XT_RECENT_DEST)
-		addr = ip_hdr(skb)->daddr;
-	else
-		addr = ip_hdr(skb)->saddr;
+	if (match->family == AF_INET) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			addr.ip = iph->daddr;
+		else
+			addr.ip = iph->saddr;
+
+		ttl = iph->ttl;
+	} else {
+		const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (info->side == XT_RECENT_DEST)
+			memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6));
+		else
+			memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6));
+
+		ttl = iph->hop_limit;
+	}
 
-	ttl = ip_hdr(skb)->ttl;
 	/* use TTL as seen before forwarding */
 	if (out && !skb->sk)
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
 	t = recent_table_lookup(info->name);
-	e = recent_entry_lookup(t, addr,
-				info->check_set & XT_RECENT_TTL ? ttl : 0);
+	e = recent_entry_lookup(t, &addr, match->family,
+				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
 		if (!(info->check_set & XT_RECENT_SET))
 			goto out;
-		e = recent_entry_init(t, addr, ttl);
+		e = recent_entry_init(t, &addr, match->family, ttl);
 		if (e == NULL)
 			*hotdrop = true;
 		ret = !ret;
@@ -277,11 +324,24 @@ recent_mt_check(const char *tablename, const void *ip,
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops);
+	t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
+		  &recent_mt_fops);
 	if (t->proc == NULL) {
 		kfree(t);
 		goto out;
 	}
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
+		      &recent_old_fops);
+	if (t->proc_old == NULL) {
+		remove_proc_entry(t->name, proc_old_dir);
+		kfree(t);
+		goto out;
+	}
+	t->proc_old->uid   = ip_list_uid;
+	t->proc_old->gid   = ip_list_gid;
+	t->proc_old->data  = t;
+#endif
 	t->proc->uid       = ip_list_uid;
 	t->proc->gid       = ip_list_gid;
 	t->proc->data      = t;
@@ -307,7 +367,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
-		remove_proc_entry(t->name, proc_dir);
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+		remove_proc_entry(t->name, proc_old_dir);
+#endif
+		remove_proc_entry(t->name, recent_proc_dir);
 #endif
 		recent_table_flush(t);
 		kfree(t);
@@ -317,7 +380,7 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
 
 #ifdef CONFIG_PROC_FS
 struct recent_iter_state {
-	struct recent_table	*table;
+	const struct recent_table *table;
 	unsigned int		bucket;
 };
 
@@ -342,8 +405,8 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct recent_iter_state *st = seq->private;
 	const struct recent_table *t = st->table;
-	struct recent_entry *e = v;
-	struct list_head *head = e->list.next;
+	const struct recent_entry *e = v;
+	const struct list_head *head = e->list.next;
 
 	while (head == &t->iphash[st->bucket]) {
 		if (++st->bucket >= ip_list_hash_size)
@@ -366,8 +429,14 @@ static int recent_seq_show(struct seq_file *seq, void *v)
 	unsigned int i;
 
 	i = (e->index - 1) % ip_pkt_list_tot;
-	seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u",
-		   NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index);
+	if (e->family == AF_INET)
+		seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu "
+			   "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl,
+			   e->stamps[i], e->index);
+	else
+		seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu "
+			   "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl,
+			   e->stamps[i], e->index);
 	for (i = 0; i < e->nstamps; i++)
 		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
 	seq_printf(seq, "\n");
@@ -394,8 +463,22 @@ static int recent_seq_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t recent_proc_write(struct file *file, const char __user *input,
-				 size_t size, loff_t *loff)
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+static int recent_old_seq_open(struct inode *inode, struct file *filp)
+{
+	static bool warned_of_old;
+
+	if (unlikely(!warned_of_old)) {
+		printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
+		       " is deprecated; use /proc/net/xt_recent.\n");
+		warned_of_old = true;
+	}
+	return recent_seq_open(inode, filp);
+}
+
+static ssize_t recent_old_proc_write(struct file *file,
+				     const char __user *input,
+				     size_t size, loff_t *loff)
 {
 	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
 	struct recent_table *t = pde->data;
@@ -408,6 +491,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
 		size = sizeof(buf);
 	if (copy_from_user(buf, input, size))
 		return -EFAULT;
+
 	while (isspace(*c))
 		c++;
 
@@ -435,10 +519,10 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
 	addr = in_aton(c);
 
 	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, addr, 0);
+	e = recent_entry_lookup(t, (const void *)&addr, PF_INET, 0);
 	if (e == NULL) {
 		if (add)
-			recent_entry_init(t, addr, 0);
+			recent_entry_init(t, (const void *)&addr, PF_INET, 0);
 	} else {
 		if (add)
 			recent_entry_update(t, e);
@@ -449,23 +533,118 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
 	return size;
 }
 
-static const struct file_operations recent_fops = {
-	.open		= recent_seq_open,
+static const struct file_operations recent_old_fops = {
+	.open		= recent_old_seq_open,
 	.read		= seq_read,
-	.write		= recent_proc_write,
+	.write		= recent_old_proc_write,
 	.release	= seq_release_private,
 	.owner		= THIS_MODULE,
 };
+#endif
+
+static ssize_t
+recent_mt_proc_write(struct file *file, const char __user *input,
+		     size_t size, loff_t *loff)
+{
+	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct recent_table *t = pde->data;
+	struct recent_entry *e;
+	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+	const char *c = buf;
+	union nf_inet_addr addr;
+	u_int16_t family;
+	bool add, succ;
+
+	if (size == 0)
+		return 0;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+
+	/* Strict protocol! */
+	if (*loff != 0)
+		return -ESPIPE;
+	switch (*c) {
+	case '/': /* flush table */
+		spin_lock_bh(&recent_lock);
+		recent_table_flush(t);
+		spin_unlock_bh(&recent_lock);
+		return size;
+	case '-': /* remove address */
+		add = false;
+		break;
+	case '+': /* add address */
+		add = true;
+		break;
+	default:
+		printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n");
+		return -EINVAL;
+	}
+
+	++c;
+	--size;
+	if (strnchr(c, size, ':') != NULL) {
+		family = AF_INET6;
+		succ   = in6_pton(c, size, (void *)&addr, '\n', NULL);
+	} else {
+		family = AF_INET;
+		succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
+	}
+
+	if (!succ) {
+		printk(KERN_INFO KBUILD_MODNAME ": illegal address written "
+		       "to procfs\n");
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&recent_lock);
+	e = recent_entry_lookup(t, &addr, family, 0);
+	if (e == NULL) {
+		if (add)
+			recent_entry_init(t, &addr, family, 0);
+	} else {
+		if (add)
+			recent_entry_update(t, e);
+		else
+			recent_entry_remove(t, e);
+	}
+	spin_unlock_bh(&recent_lock);
+	/* Note we removed one above */
+	*loff += size + 1;
+	return size + 1;
+}
+
+static const struct file_operations recent_mt_fops = {
+	.open    = recent_seq_open,
+	.read    = seq_read,
+	.write   = recent_mt_proc_write,
+	.release = seq_release_private,
+	.owner   = THIS_MODULE,
+};
 #endif /* CONFIG_PROC_FS */
 
-static struct xt_match recent_mt_reg __read_mostly = {
-	.name		= "recent",
-	.family		= AF_INET,
-	.match		= recent_mt,
-	.matchsize	= sizeof(struct xt_recent_mtinfo),
-	.checkentry	= recent_mt_check,
-	.destroy	= recent_mt_destroy,
-	.me		= THIS_MODULE,
+static struct xt_match recent_mt_reg[] __read_mostly = {
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = AF_INET,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "recent",
+		.revision   = 0,
+		.family     = AF_INET6,
+		.match      = recent_mt,
+		.matchsize  = sizeof(struct xt_recent_mtinfo),
+		.checkentry = recent_mt_check,
+		.destroy    = recent_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init recent_mt_init(void)
@@ -476,15 +655,25 @@ static int __init recent_mt_init(void)
 		return -EINVAL;
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
-	err = xt_register_match(&recent_mt_reg);
+	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
 #ifdef CONFIG_PROC_FS
 	if (err)
 		return err;
-	proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
-	if (proc_dir == NULL) {
-		xt_unregister_match(&recent_mt_reg);
+	recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
+	if (recent_proc_dir == NULL) {
+		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+		err = -ENOMEM;
+	}
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	if (err < 0)
+		return err;
+	proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+	if (proc_old_dir == NULL) {
+		remove_proc_entry("xt_recent", init_net.proc_net);
+		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
 		err = -ENOMEM;
 	}
+#endif
 #endif
 	return err;
 }
@@ -492,9 +681,12 @@ static int __init recent_mt_init(void)
 static void __exit recent_mt_exit(void)
 {
 	BUG_ON(!list_empty(&tables));
-	xt_unregister_match(&recent_mt_reg);
+	xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
 #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
 	remove_proc_entry("ipt_recent", init_net.proc_net);
+#endif
+	remove_proc_entry("xt_recent", init_net.proc_net);
 #endif
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7e9c6eeb136a46dfd941852803b3a9dd78939b69 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:00 +0200
Subject: netfilter: Introduce NFPROTO_* constants

The netfilter subsystem only supports a handful of protocols (much
less than PF_*) and even non-PF protocols like ARP and
pseudo-protocols like PF_BRIDGE. By creating NFPROTO_*, we can earn a
few memory savings on arrays that previously were always PF_MAX-sized
and keep the pseudo-protocols to ourselves.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h | 14 ++++++++++++--
 net/netfilter/core.c      |  6 +++---
 net/netfilter/nf_log.c    | 12 ++++++------
 net/netfilter/nf_queue.c  | 12 ++++++------
 net/netfilter/x_tables.c  | 18 ++++++++++--------
 5 files changed, 37 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 8c83d2e23bd..bf3afb0844f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -52,6 +52,16 @@ enum nf_inet_hooks {
 	NF_INET_NUMHOOKS
 };
 
+enum {
+	NFPROTO_UNSPEC =  0,
+	NFPROTO_IPV4   =  2,
+	NFPROTO_ARP    =  3,
+	NFPROTO_BRIDGE =  7,
+	NFPROTO_IPV6   = 10,
+	NFPROTO_DECNET = 12,
+	NFPROTO_NUMPROTO,
+};
+
 union nf_inet_addr {
 	__u32		all[4];
 	__be32		ip;
@@ -138,7 +148,7 @@ extern struct ctl_path nf_net_netfilter_sysctl_path[];
 extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
 #endif /* CONFIG_SYSCTL */
 
-extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
 int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev, struct net_device *outdev,
@@ -247,7 +257,7 @@ struct nf_afinfo {
 	int		route_key_size;
 };
 
-extern const struct nf_afinfo *nf_afinfo[NPROTO];
+extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
 static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
 {
 	return rcu_dereference(nf_afinfo[family]);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 26b8f489d7a..b16cd79951c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -26,7 +26,7 @@
 
 static DEFINE_MUTEX(afinfo_mutex);
 
-const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
+const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
 EXPORT_SYMBOL(nf_afinfo);
 
 int nf_register_afinfo(const struct nf_afinfo *afinfo)
@@ -51,7 +51,7 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
 }
 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
+struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
 static DEFINE_MUTEX(nf_hook_mutex);
 
@@ -264,7 +264,7 @@ EXPORT_SYMBOL(proc_net_netfilter);
 void __init netfilter_init(void)
 {
 	int i, h;
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
 			INIT_LIST_HEAD(&nf_hooks[i][h]);
 	}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 5c2f7332015..fa8ae5d2659 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -15,7 +15,7 @@
 
 #define NF_LOG_PREFIXLEN		128
 
-static const struct nf_logger *nf_loggers[NPROTO] __read_mostly;
+static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
 static DEFINE_MUTEX(nf_log_mutex);
 
 /* return EBUSY if somebody else is registered, EEXIST if the same logger
@@ -24,7 +24,7 @@ int nf_log_register(u_int8_t pf, const struct nf_logger *logger)
 {
 	int ret;
 
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(nf_loggers))
 		return -EINVAL;
 
 	/* Any setup of logging members must be done before
@@ -47,7 +47,7 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister_pf(u_int8_t pf)
 {
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(nf_loggers))
 		return;
 	mutex_lock(&nf_log_mutex);
 	rcu_assign_pointer(nf_loggers[pf], NULL);
@@ -63,7 +63,7 @@ void nf_log_unregister(const struct nf_logger *logger)
 	int i;
 
 	mutex_lock(&nf_log_mutex);
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
 		if (nf_loggers[i] == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 	}
@@ -103,7 +103,7 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
 	rcu_read_lock();
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(nf_loggers))
 		return NULL;
 
 	return pos;
@@ -113,7 +113,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(nf_loggers))
 		return NULL;
 
 	return pos;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f285086f629..4f2310c93e0 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -16,7 +16,7 @@
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static const struct nf_queue_handler *queue_handler[NPROTO];
+static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
@@ -26,7 +26,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	int ret;
 
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
@@ -47,7 +47,7 @@ EXPORT_SYMBOL(nf_register_queue_handler);
 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
-	if (pf >= NPROTO)
+	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
@@ -70,7 +70,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 	u_int8_t pf;
 
 	mutex_lock(&queue_handler_mutex);
-	for (pf = 0; pf < NPROTO; pf++)  {
+	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
 		if (queue_handler[pf] == qh)
 			rcu_assign_pointer(queue_handler[pf], NULL);
 	}
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(nf_reinject);
 #ifdef CONFIG_PROC_FS
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(queue_handler))
 		return NULL;
 
 	return pos;
@@ -295,7 +295,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
 
-	if (*pos >= NPROTO)
+	if (*pos >= ARRAY_SIZE(queue_handler))
 		return NULL;
 
 	return pos;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index cf2f3e90cef..2a7eb1da5d0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -58,10 +58,12 @@ static struct xt_af *xt;
 #define duprintf(format, args...)
 #endif
 
-static const char *const xt_prefix[NPROTO] = {
-	[AF_INET]	= "ip",
-	[AF_INET6]	= "ip6",
-	[NF_ARP]	= "arp",
+static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
+	[NFPROTO_UNSPEC] = "x",
+	[NFPROTO_IPV4]   = "ip",
+	[NFPROTO_ARP]    = "arp",
+	[NFPROTO_BRIDGE] = "eb",
+	[NFPROTO_IPV6]   = "ip6",
 };
 
 /* Registration hooks for targets. */
@@ -932,7 +934,7 @@ int xt_proto_init(struct net *net, u_int8_t af)
 	struct proc_dir_entry *proc;
 #endif
 
-	if (af >= NPROTO)
+	if (af >= ARRAY_SIZE(xt_prefix))
 		return -EINVAL;
 
 
@@ -1001,7 +1003,7 @@ static int __net_init xt_net_init(struct net *net)
 {
 	int i;
 
-	for (i = 0; i < NPROTO; i++)
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		INIT_LIST_HEAD(&net->xt.tables[i]);
 	return 0;
 }
@@ -1014,11 +1016,11 @@ static int __init xt_init(void)
 {
 	int i, rv;
 
-	xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
+	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
 	if (!xt)
 		return -ENOMEM;
 
-	for (i = 0; i < NPROTO; i++) {
+	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
 		mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
 		mutex_init(&xt[i].compat_mutex);
-- 
cgit v1.2.3-70-g09d2


From ee999d8b9573df1b547aacdc6d79f86eb79c25cd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:01 +0200
Subject: netfilter: x_tables: use NFPROTO_* in extensions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_log.c       |  6 ++--
 net/bridge/netfilter/ebt_ulog.c      |  2 +-
 net/ipv4/netfilter/arp_tables.c      | 58 +++++++++++++++++++-----------------
 net/ipv4/netfilter/arpt_mangle.c     |  2 +-
 net/ipv4/netfilter/arptable_filter.c |  8 ++---
 net/ipv4/netfilter/ipt_CLUSTERIP.c   |  4 +--
 net/ipv4/netfilter/ipt_ECN.c         |  2 +-
 net/ipv4/netfilter/ipt_LOG.c         |  6 ++--
 net/ipv4/netfilter/ipt_MASQUERADE.c  |  2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      |  2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    |  2 +-
 net/ipv4/netfilter/ipt_REJECT.c      |  2 +-
 net/ipv4/netfilter/ipt_TTL.c         |  2 +-
 net/ipv4/netfilter/ipt_ULOG.c        |  4 +--
 net/ipv4/netfilter/ipt_addrtype.c    |  4 +--
 net/ipv4/netfilter/ipt_ah.c          |  2 +-
 net/ipv4/netfilter/ipt_ecn.c         |  2 +-
 net/ipv4/netfilter/ipt_ttl.c         |  2 +-
 net/ipv6/netfilter/ip6t_HL.c         |  2 +-
 net/ipv6/netfilter/ip6t_LOG.c        |  7 +++--
 net/ipv6/netfilter/ip6t_REJECT.c     |  2 +-
 net/ipv6/netfilter/ip6t_ah.c         |  2 +-
 net/ipv6/netfilter/ip6t_eui64.c      |  2 +-
 net/ipv6/netfilter/ip6t_frag.c       |  2 +-
 net/ipv6/netfilter/ip6t_hbh.c        |  4 +--
 net/ipv6/netfilter/ip6t_hl.c         |  2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c |  2 +-
 net/ipv6/netfilter/ip6t_mh.c         |  2 +-
 net/ipv6/netfilter/ip6t_rt.c         |  2 +-
 net/netfilter/xt_CLASSIFY.c          |  4 +--
 net/netfilter/xt_CONNMARK.c          |  8 ++---
 net/netfilter/xt_CONNSECMARK.c       |  4 +--
 net/netfilter/xt_DSCP.c              | 10 +++----
 net/netfilter/xt_MARK.c              | 12 ++++----
 net/netfilter/xt_NFLOG.c             |  4 +--
 net/netfilter/xt_NFQUEUE.c           |  4 +--
 net/netfilter/xt_NOTRACK.c           |  4 +--
 net/netfilter/xt_RATEEST.c           |  4 +--
 net/netfilter/xt_SECMARK.c           |  4 +--
 net/netfilter/xt_TCPMSS.c            |  4 +--
 net/netfilter/xt_TCPOPTSTRIP.c       |  4 +--
 net/netfilter/xt_TRACE.c             |  4 +--
 net/netfilter/xt_comment.c           |  4 +--
 net/netfilter/xt_connbytes.c         |  4 +--
 net/netfilter/xt_connlimit.c         | 10 +++----
 net/netfilter/xt_connmark.c          |  8 ++---
 net/netfilter/xt_conntrack.c         | 10 +++----
 net/netfilter/xt_dccp.c              |  4 +--
 net/netfilter/xt_dscp.c              | 12 ++++----
 net/netfilter/xt_esp.c               |  4 +--
 net/netfilter/xt_hashlimit.c         | 40 ++++++++++++-------------
 net/netfilter/xt_helper.c            |  4 +--
 net/netfilter/xt_iprange.c           |  6 ++--
 net/netfilter/xt_length.c            |  4 +--
 net/netfilter/xt_limit.c             |  4 +--
 net/netfilter/xt_mac.c               |  4 +--
 net/netfilter/xt_mark.c              |  8 ++---
 net/netfilter/xt_multiport.c         |  8 ++---
 net/netfilter/xt_owner.c             |  8 ++---
 net/netfilter/xt_physdev.c           |  4 +--
 net/netfilter/xt_pkttype.c           |  8 ++---
 net/netfilter/xt_policy.c            |  8 ++---
 net/netfilter/xt_quota.c             |  4 +--
 net/netfilter/xt_rateest.c           |  4 +--
 net/netfilter/xt_realm.c             |  2 +-
 net/netfilter/xt_recent.c            | 21 ++++++-------
 net/netfilter/xt_sctp.c              |  4 +--
 net/netfilter/xt_state.c             |  4 +--
 net/netfilter/xt_statistic.c         |  4 +--
 net/netfilter/xt_string.c            |  8 ++---
 net/netfilter/xt_tcpmss.c            |  4 +--
 net/netfilter/xt_tcpudp.c            | 12 ++++----
 net/netfilter/xt_time.c              |  4 +--
 net/netfilter/xt_u32.c               |  4 +--
 74 files changed, 225 insertions(+), 223 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 3770cd8a7b3..8b17c64bcd7 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -206,10 +206,10 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 	li.u.log.logflags = info->bitmask;
 
 	if (info->bitmask & EBT_LOG_NFLOG)
-		nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li,
+		nf_log_packet(NFPROTO_BRIDGE, hooknr, skb, in, out, &li,
 			      "%s", info->prefix);
 	else
-		ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li,
+		ebt_log_packet(NFPROTO_BRIDGE, hooknr, skb, in, out, &li,
 			       info->prefix);
 }
 
@@ -234,7 +234,7 @@ static int __init ebt_log_init(void)
 	ret = ebt_register_watcher(&log);
 	if (ret < 0)
 		return ret;
-	nf_log_register(PF_BRIDGE, &ebt_log_logger);
+	nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
 	return 0;
 }
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index c84bda61afb..3b1678cd66f 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -310,7 +310,7 @@ static int __init ebt_ulog_init(void)
 		netlink_kernel_release(ebtulognl);
 
 	if (ret == 0)
-		nf_log_register(PF_BRIDGE, &ebt_ulog_logger);
+		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
 
 	return ret;
 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 03e83a65aec..b4a9a1799c9 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -463,7 +463,8 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 	t = arpt_get_target(e);
 	target = t->u.kernel.target;
 
-	ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
+	ret = xt_check_target(target, NFPROTO_ARP,
+			      t->u.target_size - sizeof(*t),
 			      name, e->comefrom, 0, 0);
 	if (!ret && t->u.kernel.target->checkentry
 	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
@@ -488,7 +489,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
 		return ret;
 
 	t = arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
+	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
+							t->u.user.name,
 							t->u.user.revision),
 					 "arpt_%s", t->u.user.name);
 	if (IS_ERR(target) || !target) {
@@ -788,7 +790,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	int v = *(compat_int_t *)src;
 
 	if (v > 0)
-		v += xt_compat_calc_jump(NF_ARP, v);
+		v += xt_compat_calc_jump(NFPROTO_ARP, v);
 	memcpy(dst, &v, sizeof(v));
 }
 
@@ -797,7 +799,7 @@ static int compat_standard_to_user(void __user *dst, void *src)
 	compat_int_t cv = *(int *)src;
 
 	if (cv > 0)
-		cv -= xt_compat_calc_jump(NF_ARP, cv);
+		cv -= xt_compat_calc_jump(NFPROTO_ARP, cv);
 	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
@@ -815,7 +817,7 @@ static int compat_calc_entry(struct arpt_entry *e,
 	t = arpt_get_target(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
-	ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+	ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
 	if (ret)
 		return ret;
 
@@ -866,9 +868,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
 #ifdef CONFIG_COMPAT
 	if (compat)
-		xt_compat_lock(NF_ARP);
+		xt_compat_lock(NFPROTO_ARP);
 #endif
-	t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
+	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
 	if (t && !IS_ERR(t)) {
 		struct arpt_getinfo info;
@@ -878,7 +880,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 		if (compat) {
 			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
-			xt_compat_flush_offsets(NF_ARP);
+			xt_compat_flush_offsets(NFPROTO_ARP);
 			private = &tmp;
 		}
 #endif
@@ -901,7 +903,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 		ret = t ? PTR_ERR(t) : -ENOENT;
 #ifdef CONFIG_COMPAT
 	if (compat)
-		xt_compat_unlock(NF_ARP);
+		xt_compat_unlock(NFPROTO_ARP);
 #endif
 	return ret;
 }
@@ -925,7 +927,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
 		return -EINVAL;
 	}
 
-	t = xt_find_table_lock(net, NF_ARP, get.name);
+	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
 	if (t && !IS_ERR(t)) {
 		const struct xt_table_info *private = t->private;
 
@@ -967,7 +969,7 @@ static int __do_replace(struct net *net, const char *name,
 		goto out;
 	}
 
-	t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
+	t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
 				    "arptable_%s", name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1134,7 +1136,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	t = xt_find_table_lock(net, NF_ARP, name);
+	t = xt_find_table_lock(net, NFPROTO_ARP, name);
 	if (!t || IS_ERR(t)) {
 		ret = t ? PTR_ERR(t) : -ENOENT;
 		goto free;
@@ -1218,7 +1220,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 	entry_offset = (void *)e - (void *)base;
 
 	t = compat_arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NF_ARP,
+	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
 							t->u.user.name,
 							t->u.user.revision),
 					 "arpt_%s", t->u.user.name);
@@ -1232,7 +1234,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 
 	off += xt_compat_target_offset(target);
 	*size += off;
-	ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+	ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
 	if (ret)
 		goto release_target;
 
@@ -1333,7 +1335,7 @@ static int translate_compat_table(const char *name,
 
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
-	xt_compat_lock(NF_ARP);
+	xt_compat_lock(NFPROTO_ARP);
 	/* Walk through entries, checking offsets. */
 	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
 					check_compat_entry_size_and_hooks,
@@ -1383,8 +1385,8 @@ static int translate_compat_table(const char *name,
 	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
 					compat_copy_entry_from_user,
 					&pos, &size, name, newinfo, entry1);
-	xt_compat_flush_offsets(NF_ARP);
-	xt_compat_unlock(NF_ARP);
+	xt_compat_flush_offsets(NFPROTO_ARP);
+	xt_compat_unlock(NFPROTO_ARP);
 	if (ret)
 		goto free_newinfo;
 
@@ -1420,8 +1422,8 @@ out:
 	COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
 	return ret;
 out_unlock:
-	xt_compat_flush_offsets(NF_ARP);
-	xt_compat_unlock(NF_ARP);
+	xt_compat_flush_offsets(NFPROTO_ARP);
+	xt_compat_unlock(NFPROTO_ARP);
 	goto out;
 }
 
@@ -1607,8 +1609,8 @@ static int compat_get_entries(struct net *net,
 		return -EINVAL;
 	}
 
-	xt_compat_lock(NF_ARP);
-	t = xt_find_table_lock(net, NF_ARP, get.name);
+	xt_compat_lock(NFPROTO_ARP);
+	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
 	if (t && !IS_ERR(t)) {
 		const struct xt_table_info *private = t->private;
 		struct xt_table_info info;
@@ -1623,13 +1625,13 @@ static int compat_get_entries(struct net *net,
 				 private->size, get.size);
 			ret = -EAGAIN;
 		}
-		xt_compat_flush_offsets(NF_ARP);
+		xt_compat_flush_offsets(NFPROTO_ARP);
 		module_put(t->me);
 		xt_table_unlock(t);
 	} else
 		ret = t ? PTR_ERR(t) : -ENOENT;
 
-	xt_compat_unlock(NF_ARP);
+	xt_compat_unlock(NFPROTO_ARP);
 	return ret;
 }
 
@@ -1709,7 +1711,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 			break;
 		}
 
-		try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+		try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
 							 rev.revision, 1, &ret),
 					"arpt_%s", rev.name);
 		break;
@@ -1787,7 +1789,7 @@ void arpt_unregister_table(struct xt_table *table)
 static struct xt_target arpt_standard_target __read_mostly = {
 	.name		= ARPT_STANDARD_TARGET,
 	.targetsize	= sizeof(int),
-	.family		= NF_ARP,
+	.family		= NFPROTO_ARP,
 #ifdef CONFIG_COMPAT
 	.compatsize	= sizeof(compat_int_t),
 	.compat_from_user = compat_standard_from_user,
@@ -1799,7 +1801,7 @@ static struct xt_target arpt_error_target __read_mostly = {
 	.name		= ARPT_ERROR_TARGET,
 	.target		= arpt_error,
 	.targetsize	= ARPT_FUNCTION_MAXNAMELEN,
-	.family		= NF_ARP,
+	.family		= NFPROTO_ARP,
 };
 
 static struct nf_sockopt_ops arpt_sockopts = {
@@ -1821,12 +1823,12 @@ static struct nf_sockopt_ops arpt_sockopts = {
 
 static int __net_init arp_tables_net_init(struct net *net)
 {
-	return xt_proto_init(net, NF_ARP);
+	return xt_proto_init(net, NFPROTO_ARP);
 }
 
 static void __net_exit arp_tables_net_exit(struct net *net)
 {
-	xt_proto_fini(net, NF_ARP);
+	xt_proto_fini(net, NFPROTO_ARP);
 }
 
 static struct pernet_operations arp_tables_net_ops = {
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a385959d265..3f9e4ccd616 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -75,7 +75,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
 
 static struct xt_target arpt_mangle_reg __read_mostly = {
 	.name		= "mangle",
-	.family		= NF_ARP,
+	.family		= NFPROTO_ARP,
 	.target		= target,
 	.targetsize	= sizeof(struct arpt_mangle),
 	.checkentry	= checkentry,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 082f5dd3156..bee3d117661 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -51,7 +51,7 @@ static struct xt_table packet_filter = {
 	.lock		= __RW_LOCK_UNLOCKED(packet_filter.lock),
 	.private	= NULL,
 	.me		= THIS_MODULE,
-	.af		= NF_ARP,
+	.af		= NFPROTO_ARP,
 };
 
 /* The work comes in here from netfilter.c */
@@ -89,21 +89,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
 	{
 		.hook		= arpt_in_hook,
 		.owner		= THIS_MODULE,
-		.pf		= NF_ARP,
+		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_IN,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
 		.hook		= arpt_out_hook,
 		.owner		= THIS_MODULE,
-		.pf		= NF_ARP,
+		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_OUT,
 		.priority	= NF_IP_PRI_FILTER,
 	},
 	{
 		.hook		= arpt_forward_hook,
 		.owner		= THIS_MODULE,
-		.pf		= NF_ARP,
+		.pf		= NFPROTO_ARP,
 		.hooknum	= NF_ARP_FORWARD,
 		.priority	= NF_IP_PRI_FILTER,
 	},
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fafe8ebb4c5..63faddc18a1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -445,7 +445,7 @@ struct compat_ipt_clusterip_tgt_info
 
 static struct xt_target clusterip_tg_reg __read_mostly = {
 	.name		= "CLUSTERIP",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= clusterip_tg,
 	.checkentry	= clusterip_tg_check,
 	.destroy	= clusterip_tg_destroy,
@@ -546,7 +546,7 @@ arp_mangle(unsigned int hook,
 
 static struct nf_hook_ops cip_arp_ops __read_mostly = {
 	.hook = arp_mangle,
-	.pf = NF_ARP,
+	.pf = NFPROTO_ARP,
 	.hooknum = NF_ARP_OUT,
 	.priority = -1
 };
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index d60139c134c..aee2364afff 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -124,7 +124,7 @@ ecn_tg_check(const char *tablename, const void *e_void,
 
 static struct xt_target ecn_tg_reg __read_mostly = {
 	.name		= "ECN",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= ecn_tg,
 	.targetsize	= sizeof(struct ipt_ECN_info),
 	.table		= "mangle",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 9330ba3577e..1c9785df4df 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -437,7 +437,7 @@ log_tg(struct sk_buff *skb, const struct net_device *in,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
+	ipt_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, &li,
 		       loginfo->prefix);
 	return XT_CONTINUE;
 }
@@ -463,7 +463,7 @@ log_tg_check(const char *tablename, const void *e,
 
 static struct xt_target log_tg_reg __read_mostly = {
 	.name		= "LOG",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= log_tg,
 	.targetsize	= sizeof(struct ipt_log_info),
 	.checkentry	= log_tg_check,
@@ -483,7 +483,7 @@ static int __init log_tg_init(void)
 	ret = xt_register_target(&log_tg_reg);
 	if (ret < 0)
 		return ret;
-	nf_log_register(PF_INET, &ipt_log_logger);
+	nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
 	return 0;
 }
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 0841aefaa50..9a4822f8243 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -153,7 +153,7 @@ static struct notifier_block masq_inet_notifier = {
 
 static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= masquerade_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6739abfd152..f281500bd7f 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -75,7 +75,7 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_target netmap_tg_reg __read_mostly = {
 	.name 		= "NETMAP",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target 	= netmap_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5c6292449d1..ef496105eae 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -92,7 +92,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_target redirect_tg_reg __read_mostly = {
 	.name		= "REDIRECT",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= redirect_tg,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 2639872849d..9f5da0c2cae 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -201,7 +201,7 @@ reject_tg_check(const char *tablename, const void *e_void,
 
 static struct xt_target reject_tg_reg __read_mostly = {
 	.name		= "REJECT",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= reject_tg,
 	.targetsize	= sizeof(struct ipt_reject_info),
 	.table		= "filter",
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 30eed65e733..7d01d424a71 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -80,7 +80,7 @@ ttl_tg_check(const char *tablename, const void *e,
 
 static struct xt_target ttl_tg_reg __read_mostly = {
 	.name 		= "TTL",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target 	= ttl_tg,
 	.targetsize	= sizeof(struct ipt_TTL_info),
 	.table		= "mangle",
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d8241e6b077..9065e4a34fb 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -374,7 +374,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src)
 
 static struct xt_target ulog_tg_reg __read_mostly = {
 	.name		= "ULOG",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.target		= ulog_tg,
 	.targetsize	= sizeof(struct ipt_ulog_info),
 	.checkentry	= ulog_tg_check,
@@ -419,7 +419,7 @@ static int __init ulog_tg_init(void)
 		return ret;
 	}
 	if (nflog)
-		nf_log_register(PF_INET, &ipt_ulog_logger);
+		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 462a22c9787..2c9d88a6c83 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -108,14 +108,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
 	{
 		.name		= "addrtype",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= addrtype_mt_v0,
 		.matchsize	= sizeof(struct ipt_addrtype_info),
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "addrtype",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 1,
 		.match		= addrtype_mt_v1,
 		.checkentry	= addrtype_mt_checkentry_v1,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index e977989629c..e2e993edd66 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -83,7 +83,7 @@ ah_mt_check(const char *tablename, const void *ip_void,
 
 static struct xt_match ah_mt_reg __read_mostly = {
 	.name		= "ah",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.match		= ah_mt,
 	.matchsize	= sizeof(struct ipt_ah),
 	.proto		= IPPROTO_AH,
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 749de8284ce..2c45b4be7c3 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -114,7 +114,7 @@ ecn_mt_check(const char *tablename, const void *ip_void,
 
 static struct xt_match ecn_mt_reg __read_mostly = {
 	.name		= "ecn",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.match		= ecn_mt,
 	.matchsize	= sizeof(struct ipt_ecn_info),
 	.checkentry	= ecn_mt_check,
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index e0b8caeb710..d4c3fdc2a79 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -46,7 +46,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_match ttl_mt_reg __read_mostly = {
 	.name		= "ttl",
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.match		= ttl_mt,
 	.matchsize	= sizeof(struct ipt_ttl_info),
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index d5f8fd5f29d..7eebd350916 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -78,7 +78,7 @@ hl_tg6_check(const char *tablename, const void *entry,
 
 static struct xt_target hl_tg6_reg __read_mostly = {
 	.name 		= "HL",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.target		= hl_tg6,
 	.targetsize	= sizeof(struct ip6t_HL_info),
 	.table		= "mangle",
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0716f8afa0b..fd148f3d842 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -449,7 +449,8 @@ log_tg6(struct sk_buff *skb, const struct net_device *in,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix);
+	ip6t_log_packet(NFPROTO_IPV6, hooknum, skb, in, out,
+			&li, loginfo->prefix);
 	return XT_CONTINUE;
 }
 
@@ -475,7 +476,7 @@ log_tg6_check(const char *tablename, const void *entry,
 
 static struct xt_target log_tg6_reg __read_mostly = {
 	.name 		= "LOG",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.target 	= log_tg6,
 	.targetsize	= sizeof(struct ip6t_log_info),
 	.checkentry	= log_tg6_check,
@@ -495,7 +496,7 @@ static int __init log_tg6_init(void)
 	ret = xt_register_target(&log_tg6_reg);
 	if (ret < 0)
 		return ret;
-	nf_log_register(PF_INET6, &ip6t_logger);
+	nf_log_register(NFPROTO_IPV6, &ip6t_logger);
 	return 0;
 }
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 44c8d65a243..672ad9ff3e2 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -237,7 +237,7 @@ reject_tg6_check(const char *tablename, const void *entry,
 
 static struct xt_target reject_tg6_reg __read_mostly = {
 	.name		= "REJECT",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.target		= reject_tg6,
 	.targetsize	= sizeof(struct ip6t_reject_info),
 	.table		= "filter",
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 429629fd63b..061f89beeb6 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -110,7 +110,7 @@ ah_mt6_check(const char *tablename, const void *entry,
 
 static struct xt_match ah_mt6_reg __read_mostly = {
 	.name		= "ah",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= ah_mt6,
 	.matchsize	= sizeof(struct ip6t_ah),
 	.checkentry	= ah_mt6_check,
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 8f331f12b2e..ba38df1116f 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -60,7 +60,7 @@ eui64_mt6(const struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_match eui64_mt6_reg __read_mostly = {
 	.name		= "eui64",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= eui64_mt6,
 	.matchsize	= sizeof(int),
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index e2bbc63dba5..972f699af22 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -127,7 +127,7 @@ frag_mt6_check(const char *tablename, const void *ip,
 
 static struct xt_match frag_mt6_reg __read_mostly = {
 	.name		= "frag",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= frag_mt6,
 	.matchsize	= sizeof(struct ip6t_frag),
 	.checkentry	= frag_mt6_check,
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 26654b26d7f..d5edb51a595 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -187,7 +187,7 @@ hbh_mt6_check(const char *tablename, const void *entry,
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	{
 		.name		= "hbh",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
@@ -196,7 +196,7 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	},
 	{
 		.name		= "dst",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 34567167384..25c1eb92fac 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -51,7 +51,7 @@ hl_mt6(const struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_match hl_mt6_reg __read_mostly = {
 	.name		= "hl",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= hl_mt6,
 	.matchsize	= sizeof(struct ip6t_hl_info),
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 317a8960a75..ef0661aacea 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -138,7 +138,7 @@ ipv6header_mt6_check(const char *tablename, const void *ip,
 
 static struct xt_match ipv6header_mt6_reg __read_mostly = {
 	.name		= "ipv6header",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= ipv6header_mt6,
 	.matchsize	= sizeof(struct ip6t_ipv6header_info),
 	.checkentry	= ipv6header_mt6_check,
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index e06678d07ec..dd876274ff6 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -84,7 +84,7 @@ mh_mt6_check(const char *tablename, const void *entry,
 
 static struct xt_match mh_mt6_reg __read_mostly = {
 	.name		= "mh",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.checkentry	= mh_mt6_check,
 	.match		= mh_mt6,
 	.matchsize	= sizeof(struct ip6t_mh),
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 81aaf7aaaab..7c544ae591d 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -214,7 +214,7 @@ rt_mt6_check(const char *tablename, const void *entry,
 
 static struct xt_match rt_mt6_reg __read_mostly = {
 	.name		= "rt",
-	.family		= AF_INET6,
+	.family		= NFPROTO_IPV6,
 	.match		= rt_mt6,
 	.matchsize	= sizeof(struct ip6t_rt),
 	.checkentry	= rt_mt6_check,
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 77a52bf8322..9d68da1748b 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -39,7 +39,7 @@ classify_tg(struct sk_buff *skb, const struct net_device *in,
 
 static struct xt_target classify_tg_reg[] __read_mostly = {
 	{
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.name 		= "CLASSIFY",
 		.target 	= classify_tg,
 		.targetsize	= sizeof(struct xt_classify_target_info),
@@ -51,7 +51,7 @@ static struct xt_target classify_tg_reg[] __read_mostly = {
 	},
 	{
 		.name 		= "CLASSIFY",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.target 	= classify_tg,
 		.targetsize	= sizeof(struct xt_classify_target_info),
 		.table		= "mangle",
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 5fecfb4794b..e72e5d01752 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -197,7 +197,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
 		.target		= connmark_tg_v0,
@@ -212,7 +212,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.revision	= 0,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
 		.target		= connmark_tg_v0,
@@ -227,7 +227,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name           = "CONNMARK",
 		.revision       = 1,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.checkentry     = connmark_tg_check,
 		.target         = connmark_tg,
 		.targetsize     = sizeof(struct xt_connmark_tginfo1),
@@ -237,7 +237,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name           = "CONNMARK",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.checkentry     = connmark_tg_check,
 		.target         = connmark_tg,
 		.targetsize     = sizeof(struct xt_connmark_tginfo1),
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 76ca1f2421e..ae939e54dfa 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -127,7 +127,7 @@ connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
 static struct xt_target connsecmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNSECMARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= connsecmark_tg_check,
 		.destroy	= connsecmark_tg_destroy,
 		.target		= connsecmark_tg,
@@ -136,7 +136,7 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "CONNSECMARK",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= connsecmark_tg_check,
 		.destroy	= connsecmark_tg_destroy,
 		.target		= connsecmark_tg,
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 97efd74c04f..f0b4958528e 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -165,7 +165,7 @@ tos_tg6(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "DSCP",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dscp_tg_check,
 		.target		= dscp_tg,
 		.targetsize	= sizeof(struct xt_DSCP_info),
@@ -174,7 +174,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "DSCP",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dscp_tg_check,
 		.target		= dscp_tg6,
 		.targetsize	= sizeof(struct xt_DSCP_info),
@@ -184,7 +184,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.table		= "mangle",
 		.target		= tos_tg_v0,
 		.targetsize	= sizeof(struct ipt_tos_target_info),
@@ -194,7 +194,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 1,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.table		= "mangle",
 		.target		= tos_tg,
 		.targetsize	= sizeof(struct xt_tos_target_info),
@@ -203,7 +203,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	{
 		.name		= "TOS",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.table		= "mangle",
 		.target		= tos_tg6,
 		.targetsize	= sizeof(struct xt_tos_target_info),
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f9ce20b5898..55ef0796c76 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -161,7 +161,7 @@ static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
 static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name		= "MARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 0,
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
@@ -176,7 +176,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "MARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 1,
 		.checkentry	= mark_tg_check_v1,
 		.target		= mark_tg_v1,
@@ -191,7 +191,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "MARK",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 0,
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
@@ -206,7 +206,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "MARK",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 1,
 		.checkentry	= mark_tg_check_v1,
 		.target		= mark_tg_v1,
@@ -222,7 +222,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name           = "MARK",
 		.revision       = 2,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.target         = mark_tg,
 		.targetsize     = sizeof(struct xt_mark_tginfo2),
 		.me             = THIS_MODULE,
@@ -230,7 +230,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name           = "MARK",
 		.revision       = 2,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.target         = mark_tg,
 		.targetsize     = sizeof(struct xt_mark_tginfo2),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 19ae8efae65..9b095520176 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -55,7 +55,7 @@ nflog_tg_check(const char *tablename, const void *entry,
 static struct xt_target nflog_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFLOG",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= nflog_tg_check,
 		.target		= nflog_tg,
 		.targetsize	= sizeof(struct xt_nflog_info),
@@ -63,7 +63,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "NFLOG",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= nflog_tg_check,
 		.target		= nflog_tg,
 		.targetsize	= sizeof(struct xt_nflog_info),
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index beb24d19a56..c03c2e8d06f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -36,14 +36,14 @@ nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NFQUEUE",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.target		= nfqueue_tg,
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 6c9de611eb8..b9ee268b37c 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -35,14 +35,14 @@ notrack_tg(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target notrack_tg_reg[] __read_mostly = {
 	{
 		.name		= "NOTRACK",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.target		= notrack_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "NOTRACK",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.target		= notrack_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 64d6ad38029..f7114fc5cfc 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -159,7 +159,7 @@ static void xt_rateest_tg_destroy(const struct xt_target *target,
 
 static struct xt_target xt_rateest_target[] __read_mostly = {
 	{
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.name		= "RATEEST",
 		.target		= xt_rateest_tg,
 		.checkentry	= xt_rateest_tg_checkentry,
@@ -168,7 +168,7 @@ static struct xt_target xt_rateest_target[] __read_mostly = {
 		.me		= THIS_MODULE,
 	},
 	{
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.name		= "RATEEST",
 		.target		= xt_rateest_tg,
 		.checkentry	= xt_rateest_tg_checkentry,
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 94f87ee7552..8f8f57b93a6 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -128,7 +128,7 @@ static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
 static struct xt_target secmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "SECMARK",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= secmark_tg_check,
 		.destroy	= secmark_tg_destroy,
 		.target		= secmark_tg,
@@ -137,7 +137,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "SECMARK",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= secmark_tg_check,
 		.destroy	= secmark_tg_destroy,
 		.target		= secmark_tg,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index beb5094703c..b868f995239 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -289,7 +289,7 @@ tcpmss_tg6_check(const char *tablename, const void *entry,
 
 static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 	{
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.name		= "TCPMSS",
 		.checkentry	= tcpmss_tg4_check,
 		.target		= tcpmss_tg4,
@@ -299,7 +299,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {
 	},
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	{
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.name		= "TCPMSS",
 		.checkentry	= tcpmss_tg6_check,
 		.target		= tcpmss_tg6,
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9685b6fcbc8..2e0ae6cc5d9 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -106,7 +106,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
 	{
 		.name       = "TCPOPTSTRIP",
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.table      = "mangle",
 		.proto      = IPPROTO_TCP,
 		.target     = tcpoptstrip_tg4,
@@ -116,7 +116,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 	{
 		.name       = "TCPOPTSTRIP",
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.table      = "mangle",
 		.proto      = IPPROTO_TCP,
 		.target     = tcpoptstrip_tg6,
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 30dab79a343..e1bcad57914 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -22,14 +22,14 @@ trace_tg(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target trace_tg_reg[] __read_mostly = {
 	{
 		.name		= "TRACE",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.target		= trace_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "TRACE",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.target		= trace_tg,
 		.table		= "raw",
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 89f47364e84..fa211b2ab87 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -28,14 +28,14 @@ comment_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match comment_mt_reg[] __read_mostly = {
 	{
 		.name		= "comment",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= comment_mt,
 		.matchsize	= sizeof(struct xt_comment_info),
 		.me		= THIS_MODULE
 	},
 	{
 		.name		= "comment",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= comment_mt,
 		.matchsize	= sizeof(struct xt_comment_info),
 		.me		= THIS_MODULE
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 3e39c4fe193..d2cd22a49c9 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -130,7 +130,7 @@ connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
 static struct xt_match connbytes_mt_reg[] __read_mostly = {
 	{
 		.name		= "connbytes",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= connbytes_mt_check,
 		.match		= connbytes_mt,
 		.destroy	= connbytes_mt_destroy,
@@ -139,7 +139,7 @@ static struct xt_match connbytes_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "connbytes",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= connbytes_mt_check,
 		.match		= connbytes_mt,
 		.destroy	= connbytes_mt_destroy,
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 1655e2cf25c..d2453d182d6 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -84,7 +84,7 @@ same_source_net(const union nf_inet_addr *addr,
 		const union nf_inet_addr *mask,
 		const union nf_inet_addr *u3, u_int8_t family)
 {
-	if (family == AF_INET) {
+	if (family == NFPROTO_IPV4) {
 		return (addr->ip & mask->ip) == (u3->ip & mask->ip);
 	} else {
 		union nf_inet_addr lh, rh;
@@ -114,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data,
 	int matches = 0;
 
 
-	if (match->family == AF_INET6)
+	if (match->family == NFPROTO_IPV6)
 		hash = &data->iphash[connlimit_iphash6(addr, mask)];
 	else
 		hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
@@ -198,7 +198,7 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 				    match->family, &tuple))
 		goto hotdrop;
 
-	if (match->family == AF_INET6) {
+	if (match->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
 	} else {
@@ -276,7 +276,7 @@ connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
 static struct xt_match connlimit_mt_reg[] __read_mostly = {
 	{
 		.name       = "connlimit",
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.checkentry = connlimit_mt_check,
 		.match      = connlimit_mt,
 		.matchsize  = sizeof(struct xt_connlimit_info),
@@ -285,7 +285,7 @@ static struct xt_match connlimit_mt_reg[] __read_mostly = {
 	},
 	{
 		.name       = "connlimit",
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.checkentry = connlimit_mt_check,
 		.match      = connlimit_mt,
 		.matchsize  = sizeof(struct xt_connlimit_info),
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index aaa1b96691f..0577b8ff4e1 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -140,7 +140,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= connmark_mt_check_v0,
 		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
@@ -155,7 +155,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.revision	= 0,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= connmark_mt_check_v0,
 		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
@@ -170,7 +170,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name           = "connmark",
 		.revision       = 1,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.checkentry     = connmark_mt_check,
 		.match          = connmark_mt,
 		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
@@ -180,7 +180,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name           = "connmark",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.checkentry     = connmark_mt_check,
 		.match          = connmark_mt,
 		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 28a42a3fbff..392b457f9c2 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -121,9 +121,9 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr,
                   const union nf_inet_addr *uaddr,
                   const union nf_inet_addr *umask, unsigned int l3proto)
 {
-	if (l3proto == AF_INET)
+	if (l3proto == NFPROTO_IPV4)
 		return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
-	else if (l3proto == AF_INET6)
+	else if (l3proto == NFPROTO_IPV6)
 		return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
 		       &uaddr->in6) == 0;
 	else
@@ -356,7 +356,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 0,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = conntrack_mt_v0,
 		.checkentry = conntrack_mt_check,
 		.destroy    = conntrack_mt_destroy,
@@ -371,7 +371,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 1,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
 		.match      = conntrack_mt,
 		.checkentry = conntrack_mt_check,
@@ -381,7 +381,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 1,
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
 		.match      = conntrack_mt,
 		.checkentry = conntrack_mt_check,
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 8b6522186d9..87971f47132 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -138,7 +138,7 @@ dccp_mt_check(const char *tablename, const void *inf,
 static struct xt_match dccp_mt_reg[] __read_mostly = {
 	{
 		.name 		= "dccp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dccp_mt_check,
 		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
@@ -147,7 +147,7 @@ static struct xt_match dccp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name 		= "dccp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dccp_mt_check,
 		.match		= dccp_mt,
 		.matchsize	= sizeof(struct xt_dccp_info),
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26f4aab9c42..7f03aa13a95 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -80,7 +80,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
 {
 	const struct xt_tos_match_info *info = matchinfo;
 
-	if (match->family == AF_INET)
+	if (match->family == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
@@ -91,7 +91,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "dscp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= dscp_mt_check,
 		.match		= dscp_mt,
 		.matchsize	= sizeof(struct xt_dscp_info),
@@ -99,7 +99,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "dscp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= dscp_mt_check,
 		.match		= dscp_mt6,
 		.matchsize	= sizeof(struct xt_dscp_info),
@@ -108,7 +108,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tos_mt_v0,
 		.matchsize	= sizeof(struct ipt_tos_info),
 		.me		= THIS_MODULE,
@@ -116,7 +116,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 1,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tos_mt,
 		.matchsize	= sizeof(struct xt_tos_match_info),
 		.me		= THIS_MODULE,
@@ -124,7 +124,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tos",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= tos_mt,
 		.matchsize	= sizeof(struct xt_tos_match_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index a133eb9b23e..045c4deecaf 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -88,7 +88,7 @@ esp_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match esp_mt_reg[] __read_mostly = {
 	{
 		.name		= "esp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= esp_mt_check,
 		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
@@ -97,7 +97,7 @@ static struct xt_match esp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "esp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= esp_mt_check,
 		.match		= esp_mt,
 		.matchsize	= sizeof(struct xt_esp),
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c9268fd2e1..7bae369603d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -218,7 +218,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
 	hinfo->cfg.expire      = minfo->cfg.expire;
 
-	if (family == AF_INET)
+	if (family == NFPROTO_IPV4)
 		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
 	else
 		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
@@ -237,11 +237,10 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	hinfo->family = family;
 	hinfo->rnd_initialized = 0;
 	spin_lock_init(&hinfo->lock);
-	hinfo->pde =
-		proc_create_data(minfo->name, 0,
-				 family == AF_INET ? hashlimit_procdir4 :
-						     hashlimit_procdir6,
-				 &dl_file_ops, hinfo);
+	hinfo->pde = proc_create_data(minfo->name, 0,
+		(family == NFPROTO_IPV4) ?
+		hashlimit_procdir4 : hashlimit_procdir6,
+		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
 		return -1;
@@ -300,11 +299,10 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	hinfo->rnd_initialized = 0;
 	spin_lock_init(&hinfo->lock);
 
-	hinfo->pde =
-		proc_create_data(minfo->name, 0,
-				 family == AF_INET ? hashlimit_procdir4 :
-						     hashlimit_procdir6,
-				 &dl_file_ops, hinfo);
+	hinfo->pde = proc_create_data(minfo->name, 0,
+		(family == NFPROTO_IPV4) ?
+		hashlimit_procdir4 : hashlimit_procdir6,
+		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
 		return -1;
@@ -370,7 +368,7 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 
 	/* remove proc entry */
 	remove_proc_entry(hinfo->pde->name,
-			  hinfo->family == AF_INET ? hashlimit_procdir4 :
+			  hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
 						     hashlimit_procdir6);
 	htable_selective_cleanup(hinfo, select_all);
 	vfree(hinfo);
@@ -501,7 +499,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 	memset(dst, 0, sizeof(*dst));
 
 	switch (hinfo->family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
 			dst->ip.dst = maskl(ip_hdr(skb)->daddr,
 			              hinfo->cfg.dstmask);
@@ -515,7 +513,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 		nexthdr = ip_hdr(skb)->protocol;
 		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
 			memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
 			       sizeof(dst->ip6.dst));
@@ -737,7 +735,7 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 		return false;
 	if (info->name[sizeof(info->name)-1] != '\0')
 		return false;
-	if (match->family == AF_INET) {
+	if (match->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 			return false;
 	} else {
@@ -805,7 +803,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name		= "hashlimit",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= hashlimit_mt_v0,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
@@ -820,7 +818,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.match          = hashlimit_mt,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 		.checkentry     = hashlimit_mt_check,
@@ -830,7 +828,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	{
 		.name		= "hashlimit",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= hashlimit_mt_v0,
 		.matchsize	= sizeof(struct xt_hashlimit_info),
 #ifdef CONFIG_COMPAT
@@ -845,7 +843,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.match          = hashlimit_mt,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
 		.checkentry     = hashlimit_mt_check,
@@ -907,7 +905,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 	rateinfo_recalc(ent, jiffies);
 
 	switch (family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
 				     "%u.%u.%u.%u:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
@@ -918,7 +916,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		return seq_printf(s, "%ld " NIP6_FMT ":%u->"
 				     NIP6_FMT ":%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index dada2905d66..134d94324eb 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -81,7 +81,7 @@ static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
 static struct xt_match helper_mt_reg[] __read_mostly = {
 	{
 		.name		= "helper",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= helper_mt_check,
 		.match		= helper_mt,
 		.destroy	= helper_mt_destroy,
@@ -90,7 +90,7 @@ static struct xt_match helper_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "helper",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= helper_mt_check,
 		.match		= helper_mt,
 		.destroy	= helper_mt_destroy,
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index c63e9333c75..a7498cc48dc 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -141,7 +141,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 0,
-		.family    = AF_INET,
+		.family    = NFPROTO_IPV4,
 		.match     = iprange_mt_v0,
 		.matchsize = sizeof(struct ipt_iprange_info),
 		.me        = THIS_MODULE,
@@ -149,7 +149,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 1,
-		.family    = AF_INET,
+		.family    = NFPROTO_IPV4,
 		.match     = iprange_mt4,
 		.matchsize = sizeof(struct xt_iprange_mtinfo),
 		.me        = THIS_MODULE,
@@ -157,7 +157,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = {
 	{
 		.name      = "iprange",
 		.revision  = 1,
-		.family    = AF_INET6,
+		.family    = NFPROTO_IPV6,
 		.match     = iprange_mt6,
 		.matchsize = sizeof(struct xt_iprange_mtinfo),
 		.me        = THIS_MODULE,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index b8640f97295..b8612d1914b 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -48,14 +48,14 @@ length_mt6(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match length_mt_reg[] __read_mostly = {
 	{
 		.name		= "length",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= length_mt,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "length",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= length_mt6,
 		.matchsize	= sizeof(struct xt_length_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index aad9ab8d204..584d66893c4 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -170,7 +170,7 @@ static int limit_mt_compat_to_user(void __user *dst, void *src)
 static struct xt_match limit_mt_reg[] __read_mostly = {
 	{
 		.name		= "limit",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= limit_mt_check,
 		.match		= limit_mt,
 		.matchsize	= sizeof(struct xt_rateinfo),
@@ -183,7 +183,7 @@ static struct xt_match limit_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "limit",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= limit_mt_check,
 		.match		= limit_mt,
 		.matchsize	= sizeof(struct xt_rateinfo),
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index b3e96a0ec17..60db240098a 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -42,7 +42,7 @@ mac_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match mac_mt_reg[] __read_mostly = {
 	{
 		.name		= "mac",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= mac_mt,
 		.matchsize	= sizeof(struct xt_mac_info),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -52,7 +52,7 @@ static struct xt_match mac_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "mac",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= mac_mt,
 		.matchsize	= sizeof(struct xt_mac_info),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 9f78f6120fb..c66affd5722 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -92,7 +92,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= mark_mt_check_v0,
 		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
@@ -106,7 +106,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
 		.revision	= 0,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= mark_mt_check_v0,
 		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
@@ -120,7 +120,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name           = "mark",
 		.revision       = 1,
-		.family         = AF_INET,
+		.family         = NFPROTO_IPV4,
 		.match          = mark_mt,
 		.matchsize      = sizeof(struct xt_mark_mtinfo1),
 		.me             = THIS_MODULE,
@@ -128,7 +128,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name           = "mark",
 		.revision       = 1,
-		.family         = AF_INET6,
+		.family         = NFPROTO_IPV6,
 		.match          = mark_mt,
 		.matchsize      = sizeof(struct xt_mark_mtinfo1),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index fd88c489b70..f6fe008ab8c 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -216,7 +216,7 @@ multiport_mt6_check(const char *tablename, const void *info,
 static struct xt_match multiport_mt_reg[] __read_mostly = {
 	{
 		.name		= "multiport",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 0,
 		.checkentry	= multiport_mt_check_v0,
 		.match		= multiport_mt_v0,
@@ -225,7 +225,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.revision	= 1,
 		.checkentry	= multiport_mt_check,
 		.match		= multiport_mt,
@@ -234,7 +234,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 0,
 		.checkentry	= multiport_mt6_check_v0,
 		.match		= multiport_mt_v0,
@@ -243,7 +243,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "multiport",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.revision	= 1,
 		.checkentry	= multiport_mt6_check,
 		.match		= multiport_mt,
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 9059c16144c..d1c3b7ae9b4 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -153,7 +153,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 0,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = owner_mt_v0,
 		.matchsize  = sizeof(struct ipt_owner_info),
 		.checkentry = owner_mt_check_v0,
@@ -164,7 +164,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 0,
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = owner_mt6_v0,
 		.matchsize  = sizeof(struct ip6t_owner_info),
 		.checkentry = owner_mt6_check_v0,
@@ -175,7 +175,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 1,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = owner_mt,
 		.matchsize  = sizeof(struct xt_owner_match_info),
 		.hooks      = (1 << NF_INET_LOCAL_OUT) |
@@ -185,7 +185,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 1,
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = owner_mt,
 		.matchsize  = sizeof(struct xt_owner_match_info),
 		.hooks      = (1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 4ec1094bda9..72a0bdd53fa 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -121,7 +121,7 @@ physdev_mt_check(const char *tablename, const void *ip,
 static struct xt_match physdev_mt_reg[] __read_mostly = {
 	{
 		.name		= "physdev",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= physdev_mt_check,
 		.match		= physdev_mt,
 		.matchsize	= sizeof(struct xt_physdev_info),
@@ -129,7 +129,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "physdev",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= physdev_mt_check,
 		.match		= physdev_mt,
 		.matchsize	= sizeof(struct xt_physdev_info),
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 7936f7e2325..81e86d319a8 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -33,10 +33,10 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
 
 	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
-	else if (match->family == AF_INET &&
+	else if (match->family == NFPROTO_IPV4 &&
 	    ipv4_is_multicast(ip_hdr(skb)->daddr))
 		type = PACKET_MULTICAST;
-	else if (match->family == AF_INET6 &&
+	else if (match->family == NFPROTO_IPV6 &&
 	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
 		type = PACKET_MULTICAST;
 	else
@@ -48,14 +48,14 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match pkttype_mt_reg[] __read_mostly = {
 	{
 		.name		= "pkttype",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= pkttype_mt,
 		.matchsize	= sizeof(struct xt_pkttype_info),
 		.me		= THIS_MODULE,
 	},
 	{
 		.name		= "pkttype",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= pkttype_mt,
 		.matchsize	= sizeof(struct xt_pkttype_info),
 		.me		= THIS_MODULE,
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index d351582b2a3..f1d514e9d0a 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -26,9 +26,9 @@ xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
 	    const union nf_inet_addr *a2, unsigned short family)
 {
 	switch (family) {
-	case AF_INET:
+	case NFPROTO_IPV4:
 		return ((a1->ip ^ a2->ip) & m->ip) == 0;
-	case AF_INET6:
+	case NFPROTO_IPV6:
 		return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
 	}
 	return false;
@@ -165,7 +165,7 @@ policy_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match policy_mt_reg[] __read_mostly = {
 	{
 		.name		= "policy",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry 	= policy_mt_check,
 		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
@@ -173,7 +173,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "policy",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= policy_mt_check,
 		.match		= policy_mt,
 		.matchsize	= sizeof(struct xt_policy_info),
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 3b021d0c522..59f61e32b62 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -57,7 +57,7 @@ quota_mt_check(const char *tablename, const void *entry,
 static struct xt_match quota_mt_reg[] __read_mostly = {
 	{
 		.name		= "quota",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= quota_mt_check,
 		.match		= quota_mt,
 		.matchsize	= sizeof(struct xt_quota_info),
@@ -65,7 +65,7 @@ static struct xt_match quota_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "quota",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= quota_mt_check,
 		.match		= quota_mt,
 		.matchsize	= sizeof(struct xt_quota_info),
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ebd84f1b4f6..ba1cb5760f4 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -139,7 +139,7 @@ static void xt_rateest_mt_destroy(const struct xt_match *match,
 
 static struct xt_match xt_rateest_match[] __read_mostly = {
 	{
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.name		= "rateest",
 		.match		= xt_rateest_mt,
 		.checkentry	= xt_rateest_mt_checkentry,
@@ -148,7 +148,7 @@ static struct xt_match xt_rateest_match[] __read_mostly = {
 		.me		= THIS_MODULE,
 	},
 	{
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.name		= "rateest",
 		.match		= xt_rateest_mt,
 		.checkentry	= xt_rateest_mt_checkentry,
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 7df1627c536..ef65756d489 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -39,7 +39,7 @@ static struct xt_match realm_mt_reg __read_mostly = {
 	.matchsize	= sizeof(struct xt_realm_info),
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
-	.family		= AF_INET,
+	.family		= NFPROTO_IPV4,
 	.me		= THIS_MODULE
 };
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index adc2e2f1b09..4a916e2624d 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -124,7 +124,7 @@ recent_entry_lookup(const struct recent_table *table,
 	struct recent_entry *e;
 	unsigned int h;
 
-	if (family == AF_INET)
+	if (family == NFPROTO_IPV4)
 		h = recent_entry_hash4(addrp);
 	else
 		h = recent_entry_hash6(addrp);
@@ -165,7 +165,7 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 	e->nstamps   = 1;
 	e->index     = 1;
 	e->family    = family;
-	if (family == AF_INET)
+	if (family == NFPROTO_IPV4)
 		list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]);
 	else
 		list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]);
@@ -216,7 +216,7 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in,
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (match->family == AF_INET) {
+	if (match->family == NFPROTO_IPV4) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (info->side == XT_RECENT_DEST)
@@ -429,7 +429,7 @@ static int recent_seq_show(struct seq_file *seq, void *v)
 	unsigned int i;
 
 	i = (e->index - 1) % ip_pkt_list_tot;
-	if (e->family == AF_INET)
+	if (e->family == NFPROTO_IPV4)
 		seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu "
 			   "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl,
 			   e->stamps[i], e->index);
@@ -519,10 +519,11 @@ static ssize_t recent_old_proc_write(struct file *file,
 	addr = in_aton(c);
 
 	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, (const void *)&addr, PF_INET, 0);
+	e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0);
 	if (e == NULL) {
 		if (add)
-			recent_entry_init(t, (const void *)&addr, PF_INET, 0);
+			recent_entry_init(t, (const void *)&addr,
+					  NFPROTO_IPV4, 0);
 	} else {
 		if (add)
 			recent_entry_update(t, e);
@@ -585,10 +586,10 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 	++c;
 	--size;
 	if (strnchr(c, size, ':') != NULL) {
-		family = AF_INET6;
+		family = NFPROTO_IPV6;
 		succ   = in6_pton(c, size, (void *)&addr, '\n', NULL);
 	} else {
-		family = AF_INET;
+		family = NFPROTO_IPV4;
 		succ   = in4_pton(c, size, (void *)&addr, '\n', NULL);
 	}
 
@@ -628,7 +629,7 @@ static struct xt_match recent_mt_reg[] __read_mostly = {
 	{
 		.name       = "recent",
 		.revision   = 0,
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = recent_mt,
 		.matchsize  = sizeof(struct xt_recent_mtinfo),
 		.checkentry = recent_mt_check,
@@ -638,7 +639,7 @@ static struct xt_match recent_mt_reg[] __read_mostly = {
 	{
 		.name       = "recent",
 		.revision   = 0,
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = recent_mt,
 		.matchsize  = sizeof(struct xt_recent_mtinfo),
 		.checkentry = recent_mt_check,
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e6e4681fa04..ab67aca4d8f 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -169,7 +169,7 @@ sctp_mt_check(const char *tablename, const void *inf,
 static struct xt_match sctp_mt_reg[] __read_mostly = {
 	{
 		.name		= "sctp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= sctp_mt_check,
 		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
@@ -178,7 +178,7 @@ static struct xt_match sctp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "sctp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= sctp_mt_check,
 		.match		= sctp_mt,
 		.matchsize	= sizeof(struct xt_sctp_info),
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index a776dc36a19..f92f8bcc1e3 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -61,7 +61,7 @@ static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
 static struct xt_match state_mt_reg[] __read_mostly = {
 	{
 		.name		= "state",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= state_mt_check,
 		.match		= state_mt,
 		.destroy	= state_mt_destroy,
@@ -70,7 +70,7 @@ static struct xt_match state_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "state",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= state_mt_check,
 		.match		= state_mt,
 		.destroy	= state_mt_destroy,
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 43133080da7..fd3bb1400df 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -69,7 +69,7 @@ statistic_mt_check(const char *tablename, const void *entry,
 static struct xt_match statistic_mt_reg[] __read_mostly = {
 	{
 		.name		= "statistic",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= statistic_mt_check,
 		.match		= statistic_mt,
 		.matchsize	= sizeof(struct xt_statistic_info),
@@ -77,7 +77,7 @@ static struct xt_match statistic_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "statistic",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= statistic_mt_check,
 		.match		= statistic_mt,
 		.matchsize	= sizeof(struct xt_statistic_info),
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 4903182a062..50169718377 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -85,7 +85,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 0,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -95,7 +95,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 1,
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -105,7 +105,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 0,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -115,7 +115,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 1,
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 6771bf01275..4791c7cbe5a 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -83,7 +83,7 @@ dropit:
 static struct xt_match tcpmss_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcpmss",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
@@ -91,7 +91,7 @@ static struct xt_match tcpmss_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "tcpmss",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.match		= tcpmss_mt,
 		.matchsize	= sizeof(struct xt_tcpmss_match_info),
 		.proto		= IPPROTO_TCP,
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 951b06b8d70..5a6268cbb9f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -186,7 +186,7 @@ udp_mt_check(const char *tablename, const void *info,
 static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	{
 		.name		= "tcp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= tcp_mt_check,
 		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
@@ -195,7 +195,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "tcp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= tcp_mt_check,
 		.match		= tcp_mt,
 		.matchsize	= sizeof(struct xt_tcp),
@@ -204,7 +204,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udp",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -213,7 +213,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udp",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -222,7 +222,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udplite",
-		.family		= AF_INET,
+		.family		= NFPROTO_IPV4,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
@@ -231,7 +231,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "udplite",
-		.family		= AF_INET6,
+		.family		= NFPROTO_IPV6,
 		.checkentry	= udp_mt_check,
 		.match		= udp_mt,
 		.matchsize	= sizeof(struct xt_udp),
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 307a2c3c2df..fe9dae2b4f5 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -240,7 +240,7 @@ time_mt_check(const char *tablename, const void *ip,
 static struct xt_match time_mt_reg[] __read_mostly = {
 	{
 		.name       = "time",
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = time_mt,
 		.matchsize  = sizeof(struct xt_time_info),
 		.checkentry = time_mt_check,
@@ -248,7 +248,7 @@ static struct xt_match time_mt_reg[] __read_mostly = {
 	},
 	{
 		.name       = "time",
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = time_mt,
 		.matchsize  = sizeof(struct xt_time_info),
 		.checkentry = time_mt_check,
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 627e0f336d5..ed9f8340611 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -102,14 +102,14 @@ u32_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match u32_mt_reg[] __read_mostly = {
 	{
 		.name       = "u32",
-		.family     = AF_INET,
+		.family     = NFPROTO_IPV4,
 		.match      = u32_mt,
 		.matchsize  = sizeof(struct xt_u32),
 		.me         = THIS_MODULE,
 	},
 	{
 		.name       = "u32",
-		.family     = AF_INET6,
+		.family     = NFPROTO_IPV6,
 		.match      = u32_mt,
 		.matchsize  = sizeof(struct xt_u32),
 		.me         = THIS_MODULE,
-- 
cgit v1.2.3-70-g09d2


From 55b69e91040c685a064198bd76e59885b7ad26c6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@computergmbh.de>
Date: Wed, 8 Oct 2008 11:35:01 +0200
Subject: netfilter: implement NFPROTO_UNSPEC as a wildcard for extensions

When a match or target is looked up using xt_find_{match,target},
Xtables will also search the NFPROTO_UNSPEC module list. This allows
for protocol-independent extensions (like xt_time) to be reused from
other components (e.g. arptables, ebtables).

Extensions that take different codepaths depending on match->family
or target->family of course cannot use NFPROTO_UNSPEC within the
registration structure (e.g. xt_pkttype).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c     | 10 ++++++++++
 net/netfilter/xt_CLASSIFY.c  | 38 ++++++++++++--------------------------
 net/netfilter/xt_MARK.c      | 10 +---------
 net/netfilter/xt_RATEEST.c   | 33 +++++++++++----------------------
 net/netfilter/xt_SECMARK.c   | 32 +++++++++++---------------------
 net/netfilter/xt_TRACE.c     | 26 +++++++++-----------------
 net/netfilter/xt_limit.c     | 40 +++++++++++++---------------------------
 net/netfilter/xt_mark.c      | 26 ++------------------------
 net/netfilter/xt_quota.c     | 29 ++++++++++-------------------
 net/netfilter/xt_rateest.c   | 33 +++++++++++----------------------
 net/netfilter/xt_statistic.c | 31 ++++++++++---------------------
 net/netfilter/xt_string.c    | 31 ++++++-------------------------
 net/netfilter/xt_time.c      | 28 +++++++++-------------------
 net/netfilter/xt_u32.c       | 26 +++++++++-----------------
 14 files changed, 124 insertions(+), 269 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2a7eb1da5d0..aece6c2d134 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -209,6 +209,11 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 		}
 	}
 	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_match(NFPROTO_UNSPEC, name, revision);
+
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(xt_find_match);
@@ -234,6 +239,11 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 		}
 	}
 	mutex_unlock(&xt[af].mutex);
+
+	if (af != NFPROTO_UNSPEC)
+		/* Try searching again in the family-independent list */
+		return xt_find_target(NFPROTO_UNSPEC, name, revision);
+
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(xt_find_target);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 9d68da1748b..8cffa295dd3 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -37,40 +37,26 @@ classify_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg[] __read_mostly = {
-	{
-		.family		= NFPROTO_IPV4,
-		.name 		= "CLASSIFY",
-		.target 	= classify_tg,
-		.targetsize	= sizeof(struct xt_classify_target_info),
-		.table		= "mangle",
-		.hooks		= (1 << NF_INET_LOCAL_OUT) |
-				  (1 << NF_INET_FORWARD) |
-				  (1 << NF_INET_POST_ROUTING),
-		.me 		= THIS_MODULE,
-	},
-	{
-		.name 		= "CLASSIFY",
-		.family		= NFPROTO_IPV6,
-		.target 	= classify_tg,
-		.targetsize	= sizeof(struct xt_classify_target_info),
-		.table		= "mangle",
-		.hooks		= (1 << NF_INET_LOCAL_OUT) |
-				  (1 << NF_INET_FORWARD) |
-				  (1 << NF_INET_POST_ROUTING),
-		.me 		= THIS_MODULE,
-	},
+static struct xt_target classify_tg_reg __read_mostly = {
+	.name       = "CLASSIFY",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.table      = "mangle",
+	.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+		      (1 << NF_INET_POST_ROUTING),
+	.target     = classify_tg,
+	.targetsize = sizeof(struct xt_classify_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init classify_tg_init(void)
 {
-	return xt_register_targets(classify_tg_reg,
-	       ARRAY_SIZE(classify_tg_reg));
+	return xt_register_target(&classify_tg_reg);
 }
 
 static void __exit classify_tg_exit(void)
 {
-	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
+	xt_unregister_target(&classify_tg_reg);
 }
 
 module_init(classify_tg_init);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 55ef0796c76..c8ea7a80970 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -222,15 +222,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name           = "MARK",
 		.revision       = 2,
-		.family         = NFPROTO_IPV4,
-		.target         = mark_tg,
-		.targetsize     = sizeof(struct xt_mark_tginfo2),
-		.me             = THIS_MODULE,
-	},
-	{
-		.name           = "MARK",
-		.revision       = 2,
-		.family         = NFPROTO_IPV6,
+		.family         = NFPROTO_UNSPEC,
 		.target         = mark_tg,
 		.targetsize     = sizeof(struct xt_mark_tginfo2),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index f7114fc5cfc..da7946e6ecb 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -157,25 +157,15 @@ static void xt_rateest_tg_destroy(const struct xt_target *target,
 	xt_rateest_put(info->est);
 }
 
-static struct xt_target xt_rateest_target[] __read_mostly = {
-	{
-		.family		= NFPROTO_IPV4,
-		.name		= "RATEEST",
-		.target		= xt_rateest_tg,
-		.checkentry	= xt_rateest_tg_checkentry,
-		.destroy	= xt_rateest_tg_destroy,
-		.targetsize	= sizeof(struct xt_rateest_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.family		= NFPROTO_IPV6,
-		.name		= "RATEEST",
-		.target		= xt_rateest_tg,
-		.checkentry	= xt_rateest_tg_checkentry,
-		.destroy	= xt_rateest_tg_destroy,
-		.targetsize	= sizeof(struct xt_rateest_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target xt_rateest_tg_reg __read_mostly = {
+	.name       = "RATEEST",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.target     = xt_rateest_tg,
+	.checkentry = xt_rateest_tg_checkentry,
+	.destroy    = xt_rateest_tg_destroy,
+	.targetsize = sizeof(struct xt_rateest_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init xt_rateest_tg_init(void)
@@ -186,13 +176,12 @@ static int __init xt_rateest_tg_init(void)
 		INIT_HLIST_HEAD(&rateest_hash[i]);
 
 	get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
-	return xt_register_targets(xt_rateest_target,
-				   ARRAY_SIZE(xt_rateest_target));
+	return xt_register_target(&xt_rateest_tg_reg);
 }
 
 static void __exit xt_rateest_tg_fini(void)
 {
-	xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target));
+	xt_unregister_target(&xt_rateest_tg_reg);
 }
 
 
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 8f8f57b93a6..2a2ab833481 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -125,35 +125,25 @@ static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
 	}
 }
 
-static struct xt_target secmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "SECMARK",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= secmark_tg_check,
-		.destroy	= secmark_tg_destroy,
-		.target		= secmark_tg,
-		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "SECMARK",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= secmark_tg_check,
-		.destroy	= secmark_tg_destroy,
-		.target		= secmark_tg,
-		.targetsize	= sizeof(struct xt_secmark_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target secmark_tg_reg __read_mostly = {
+	.name       = "SECMARK",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = secmark_tg_check,
+	.destroy    = secmark_tg_destroy,
+	.target     = secmark_tg,
+	.targetsize = sizeof(struct xt_secmark_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init secmark_tg_init(void)
 {
-	return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+	return xt_register_target(&secmark_tg_reg);
 }
 
 static void __exit secmark_tg_exit(void)
 {
-	xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
+	xt_unregister_target(&secmark_tg_reg);
 }
 
 module_init(secmark_tg_init);
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index e1bcad57914..da35f9f1cd7 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -19,31 +19,23 @@ trace_tg(struct sk_buff *skb, const struct net_device *in,
 	return XT_CONTINUE;
 }
 
-static struct xt_target trace_tg_reg[] __read_mostly = {
-	{
-		.name		= "TRACE",
-		.family		= NFPROTO_IPV4,
-		.target		= trace_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "TRACE",
-		.family		= NFPROTO_IPV6,
-		.target		= trace_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
+static struct xt_target trace_tg_reg __read_mostly = {
+	.name       = "TRACE",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.table      = "raw",
+	.target     = trace_tg,
+	.me         = THIS_MODULE,
 };
 
 static int __init trace_tg_init(void)
 {
-	return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+	return xt_register_target(&trace_tg_reg);
 }
 
 static void __exit trace_tg_exit(void)
 {
-	xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
+	xt_unregister_target(&trace_tg_reg);
 }
 
 module_init(trace_tg_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 584d66893c4..00247bd1095 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -167,43 +167,29 @@ static int limit_mt_compat_to_user(void __user *dst, void *src)
 }
 #endif /* CONFIG_COMPAT */
 
-static struct xt_match limit_mt_reg[] __read_mostly = {
-	{
-		.name		= "limit",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= limit_mt_check,
-		.match		= limit_mt,
-		.matchsize	= sizeof(struct xt_rateinfo),
+static struct xt_match limit_mt_reg __read_mostly = {
+	.name             = "limit",
+	.revision         = 0,
+	.family           = NFPROTO_UNSPEC,
+	.match            = limit_mt,
+	.checkentry       = limit_mt_check,
+	.matchsize        = sizeof(struct xt_rateinfo),
 #ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_rateinfo),
-		.compat_from_user = limit_mt_compat_from_user,
-		.compat_to_user	= limit_mt_compat_to_user,
+	.compatsize       = sizeof(struct compat_xt_rateinfo),
+	.compat_from_user = limit_mt_compat_from_user,
+	.compat_to_user   = limit_mt_compat_to_user,
 #endif
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "limit",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= limit_mt_check,
-		.match		= limit_mt,
-		.matchsize	= sizeof(struct xt_rateinfo),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_rateinfo),
-		.compat_from_user = limit_mt_compat_from_user,
-		.compat_to_user	= limit_mt_compat_to_user,
-#endif
-		.me		= THIS_MODULE,
-	},
+	.me               = THIS_MODULE,
 };
 
 static int __init limit_mt_init(void)
 {
-	return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
+	return xt_register_match(&limit_mt_reg);
 }
 
 static void __exit limit_mt_exit(void)
 {
-	xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
+	xt_unregister_match(&limit_mt_reg);
 }
 
 module_init(limit_mt_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index c66affd5722..96dd2b63b6b 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -92,7 +92,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 	{
 		.name		= "mark",
 		.revision	= 0,
-		.family		= NFPROTO_IPV4,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= mark_mt_check_v0,
 		.match		= mark_mt_v0,
 		.matchsize	= sizeof(struct xt_mark_info),
@@ -103,32 +103,10 @@ static struct xt_match mark_mt_reg[] __read_mostly = {
 #endif
 		.me		= THIS_MODULE,
 	},
-	{
-		.name		= "mark",
-		.revision	= 0,
-		.family		= NFPROTO_IPV6,
-		.checkentry	= mark_mt_check_v0,
-		.match		= mark_mt_v0,
-		.matchsize	= sizeof(struct xt_mark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = mark_mt_compat_from_user_v0,
-		.compat_to_user	= mark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE,
-	},
-	{
-		.name           = "mark",
-		.revision       = 1,
-		.family         = NFPROTO_IPV4,
-		.match          = mark_mt,
-		.matchsize      = sizeof(struct xt_mark_mtinfo1),
-		.me             = THIS_MODULE,
-	},
 	{
 		.name           = "mark",
 		.revision       = 1,
-		.family         = NFPROTO_IPV6,
+		.family         = NFPROTO_UNSPEC,
 		.match          = mark_mt,
 		.matchsize      = sizeof(struct xt_mark_mtinfo1),
 		.me             = THIS_MODULE,
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 59f61e32b62..a3c8798f0cc 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -54,33 +54,24 @@ quota_mt_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match quota_mt_reg[] __read_mostly = {
-	{
-		.name		= "quota",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= quota_mt_check,
-		.match		= quota_mt,
-		.matchsize	= sizeof(struct xt_quota_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "quota",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= quota_mt_check,
-		.match		= quota_mt,
-		.matchsize	= sizeof(struct xt_quota_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match quota_mt_reg __read_mostly = {
+	.name       = "quota",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = quota_mt,
+	.checkentry = quota_mt_check,
+	.matchsize  = sizeof(struct xt_quota_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init quota_mt_init(void)
 {
-	return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
+	return xt_register_match(&quota_mt_reg);
 }
 
 static void __exit quota_mt_exit(void)
 {
-	xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
+	xt_unregister_match(&quota_mt_reg);
 }
 
 module_init(quota_mt_init);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ba1cb5760f4..4dcfd7353db 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -137,36 +137,25 @@ static void xt_rateest_mt_destroy(const struct xt_match *match,
 		xt_rateest_put(info->est2);
 }
 
-static struct xt_match xt_rateest_match[] __read_mostly = {
-	{
-		.family		= NFPROTO_IPV4,
-		.name		= "rateest",
-		.match		= xt_rateest_mt,
-		.checkentry	= xt_rateest_mt_checkentry,
-		.destroy	= xt_rateest_mt_destroy,
-		.matchsize	= sizeof(struct xt_rateest_match_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.family		= NFPROTO_IPV6,
-		.name		= "rateest",
-		.match		= xt_rateest_mt,
-		.checkentry	= xt_rateest_mt_checkentry,
-		.destroy	= xt_rateest_mt_destroy,
-		.matchsize	= sizeof(struct xt_rateest_match_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match xt_rateest_mt_reg __read_mostly = {
+	.name       = "rateest",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = xt_rateest_mt,
+	.checkentry = xt_rateest_mt_checkentry,
+	.destroy    = xt_rateest_mt_destroy,
+	.matchsize  = sizeof(struct xt_rateest_match_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init xt_rateest_mt_init(void)
 {
-	return xt_register_matches(xt_rateest_match,
-				   ARRAY_SIZE(xt_rateest_match));
+	return xt_register_match(&xt_rateest_mt_reg);
 }
 
 static void __exit xt_rateest_mt_fini(void)
 {
-	xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match));
+	xt_unregister_match(&xt_rateest_mt_reg);
 }
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index fd3bb1400df..f41a92322e6 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -66,35 +66,24 @@ statistic_mt_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct xt_match statistic_mt_reg[] __read_mostly = {
-	{
-		.name		= "statistic",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= statistic_mt_check,
-		.match		= statistic_mt,
-		.matchsize	= sizeof(struct xt_statistic_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "statistic",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= statistic_mt_check,
-		.match		= statistic_mt,
-		.matchsize	= sizeof(struct xt_statistic_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match xt_statistic_mt_reg __read_mostly = {
+	.name       = "statistic",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = statistic_mt,
+	.checkentry = statistic_mt_check,
+	.matchsize  = sizeof(struct xt_statistic_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init statistic_mt_init(void)
 {
-	return xt_register_matches(statistic_mt_reg,
-	       ARRAY_SIZE(statistic_mt_reg));
+	return xt_register_match(&xt_statistic_mt_reg);
 }
 
 static void __exit statistic_mt_exit(void)
 {
-	xt_unregister_matches(statistic_mt_reg,
-	                      ARRAY_SIZE(statistic_mt_reg));
+	xt_unregister_match(&xt_statistic_mt_reg);
 }
 
 module_init(statistic_mt_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 50169718377..18d8884e737 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -81,11 +81,11 @@ static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct xt_match string_mt_reg[] __read_mostly = {
+static struct xt_match xt_string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 0,
-		.family		= NFPROTO_IPV4,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -95,27 +95,7 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 	{
 		.name 		= "string",
 		.revision	= 1,
-		.family		= NFPROTO_IPV4,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
-	{
-		.name 		= "string",
-		.revision	= 0,
-		.family		= NFPROTO_IPV6,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
-	{
-		.name 		= "string",
-		.revision	= 1,
-		.family		= NFPROTO_IPV6,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= string_mt_check,
 		.match 		= string_mt,
 		.destroy 	= string_mt_destroy,
@@ -126,12 +106,13 @@ static struct xt_match string_mt_reg[] __read_mostly = {
 
 static int __init string_mt_init(void)
 {
-	return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
+	return xt_register_matches(xt_string_mt_reg,
+				   ARRAY_SIZE(xt_string_mt_reg));
 }
 
 static void __exit string_mt_exit(void)
 {
-	xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
+	xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg));
 }
 
 module_init(string_mt_init);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index fe9dae2b4f5..32d4c769caa 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -237,33 +237,23 @@ time_mt_check(const char *tablename, const void *ip,
 	return true;
 }
 
-static struct xt_match time_mt_reg[] __read_mostly = {
-	{
-		.name       = "time",
-		.family     = NFPROTO_IPV4,
-		.match      = time_mt,
-		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = time_mt_check,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "time",
-		.family     = NFPROTO_IPV6,
-		.match      = time_mt,
-		.matchsize  = sizeof(struct xt_time_info),
-		.checkentry = time_mt_check,
-		.me         = THIS_MODULE,
-	},
+static struct xt_match xt_time_mt_reg __read_mostly = {
+	.name       = "time",
+	.family     = NFPROTO_UNSPEC,
+	.match      = time_mt,
+	.checkentry = time_mt_check,
+	.matchsize  = sizeof(struct xt_time_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init time_mt_init(void)
 {
-	return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
+	return xt_register_match(&xt_time_mt_reg);
 }
 
 static void __exit time_mt_exit(void)
 {
-	xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
+	xt_unregister_match(&xt_time_mt_reg);
 }
 
 module_init(time_mt_init);
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index ed9f8340611..a6b971dc5d3 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -99,31 +99,23 @@ u32_mt(const struct sk_buff *skb, const struct net_device *in,
 	return ret ^ data->invert;
 }
 
-static struct xt_match u32_mt_reg[] __read_mostly = {
-	{
-		.name       = "u32",
-		.family     = NFPROTO_IPV4,
-		.match      = u32_mt,
-		.matchsize  = sizeof(struct xt_u32),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "u32",
-		.family     = NFPROTO_IPV6,
-		.match      = u32_mt,
-		.matchsize  = sizeof(struct xt_u32),
-		.me         = THIS_MODULE,
-	},
+static struct xt_match xt_u32_mt_reg __read_mostly = {
+	.name       = "u32",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.match      = u32_mt,
+	.matchsize  = sizeof(struct xt_u32),
+	.me         = THIS_MODULE,
 };
 
 static int __init u32_mt_init(void)
 {
-	return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
+	return xt_register_match(&xt_u32_mt_reg);
 }
 
 static void __exit u32_mt_exit(void)
 {
-	xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
+	xt_unregister_match(&xt_u32_mt_reg);
 }
 
 module_init(u32_mt_init);
-- 
cgit v1.2.3-70-g09d2


From 48dc7865aa3db9404aedc8677d9daf8f8f469ab0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:01 +0200
Subject: netfilter: netns: remove nf_*_net() wrappers

Now that dev_net() exists, the usefullness of them is even less. Also they're
a big problem in resolving circular header dependencies necessary for
NOTRACK-in-netns patch. See below.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h              | 53 ----------------------------------
 net/ipv4/netfilter/iptable_filter.c    |  6 ++--
 net/ipv4/netfilter/iptable_mangle.c    | 10 +++----
 net/ipv4/netfilter/iptable_raw.c       |  4 +--
 net/ipv4/netfilter/iptable_security.c  |  6 ++--
 net/ipv6/netfilter/ip6table_filter.c   |  6 ++--
 net/ipv6/netfilter/ip6table_security.c |  6 ++--
 7 files changed, 19 insertions(+), 72 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index bf3afb0844f..48cfe51bfdd 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -5,13 +5,11 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
-#include <linux/netdevice.h>
 #include <linux/if.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/wait.h>
 #include <linux/list.h>
-#include <net/net_namespace.h>
 #endif
 #include <linux/types.h>
 #include <linux/compiler.h>
@@ -355,56 +353,5 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *);
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 #endif
 
-static inline struct net *nf_pre_routing_net(const struct net_device *in,
-					     const struct net_device *out)
-{
-#ifdef CONFIG_NET_NS
-	return in->nd_net;
-#else
-	return &init_net;
-#endif
-}
-
-static inline struct net *nf_local_in_net(const struct net_device *in,
-					  const struct net_device *out)
-{
-#ifdef CONFIG_NET_NS
-	return in->nd_net;
-#else
-	return &init_net;
-#endif
-}
-
-static inline struct net *nf_forward_net(const struct net_device *in,
-					 const struct net_device *out)
-{
-#ifdef CONFIG_NET_NS
-	BUG_ON(in->nd_net != out->nd_net);
-	return in->nd_net;
-#else
-	return &init_net;
-#endif
-}
-
-static inline struct net *nf_local_out_net(const struct net_device *in,
-					   const struct net_device *out)
-{
-#ifdef CONFIG_NET_NS
-	return out->nd_net;
-#else
-	return &init_net;
-#endif
-}
-
-static inline struct net *nf_post_routing_net(const struct net_device *in,
-					      const struct net_device *out)
-{
-#ifdef CONFIG_NET_NS
-	return out->nd_net;
-#else
-	return &init_net;
-#endif
-}
-
 #endif /*__KERNEL__*/
 #endif /*__LINUX_NETFILTER_H*/
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 1ea677dcf84..c9224310eba 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_in_net(in, out)->ipv4.iptable_filter);
+			    dev_net(in)->ipv4.iptable_filter);
 }
 
 static unsigned int
@@ -81,7 +81,7 @@ ipt_hook(unsigned int hook,
 	 int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_forward_net(in, out)->ipv4.iptable_filter);
+			    dev_net(in)->ipv4.iptable_filter);
 }
 
 static unsigned int
@@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook,
 	}
 
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_out_net(in, out)->ipv4.iptable_filter);
+			    dev_net(out)->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index da59182f222..69f2c428714 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook,
 		     int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_pre_routing_net(in, out)->ipv4.iptable_mangle);
+			    dev_net(in)->ipv4.iptable_mangle);
 }
 
 static unsigned int
@@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook,
 		      int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_post_routing_net(in, out)->ipv4.iptable_mangle);
+			    dev_net(out)->ipv4.iptable_mangle);
 }
 
 static unsigned int
@@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_in_net(in, out)->ipv4.iptable_mangle);
+			    dev_net(in)->ipv4.iptable_mangle);
 }
 
 static unsigned int
@@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook,
 	 int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_forward_net(in, out)->ipv4.iptable_mangle);
+			    dev_net(in)->ipv4.iptable_mangle);
 }
 
 static unsigned int
@@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook,
 	tos = iph->tos;
 
 	ret = ipt_do_table(skb, hook, in, out,
-			   nf_local_out_net(in, out)->ipv4.iptable_mangle);
+			   dev_net(out)->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
 		iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index fddce7754b7..8faebfe638f 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -53,7 +53,7 @@ ipt_hook(unsigned int hook,
 	 int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_pre_routing_net(in, out)->ipv4.iptable_raw);
+			    dev_net(in)->ipv4.iptable_raw);
 }
 
 static unsigned int
@@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_out_net(in, out)->ipv4.iptable_raw);
+			    dev_net(out)->ipv4.iptable_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index db6d312128e..36f3be3cc42 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -73,7 +73,7 @@ ipt_local_in_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_in_net(in, out)->ipv4.iptable_security);
+			    dev_net(in)->ipv4.iptable_security);
 }
 
 static unsigned int
@@ -84,7 +84,7 @@ ipt_forward_hook(unsigned int hook,
 		 int (*okfn)(struct sk_buff *))
 {
 	return ipt_do_table(skb, hook, in, out,
-			    nf_forward_net(in, out)->ipv4.iptable_security);
+			    dev_net(in)->ipv4.iptable_security);
 }
 
 static unsigned int
@@ -103,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 	return ipt_do_table(skb, hook, in, out,
-			    nf_local_out_net(in, out)->ipv4.iptable_security);
+			    dev_net(out)->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 55a2c290bad..b110a8a85a1 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -68,7 +68,7 @@ ip6t_local_in_hook(unsigned int hook,
 		   int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_local_in_net(in, out)->ipv6.ip6table_filter);
+			     dev_net(in)->ipv6.ip6table_filter);
 }
 
 static unsigned int
@@ -79,7 +79,7 @@ ip6t_forward_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_forward_net(in, out)->ipv6.ip6table_filter);
+			     dev_net(in)->ipv6.ip6table_filter);
 }
 
 static unsigned int
@@ -100,7 +100,7 @@ ip6t_local_out_hook(unsigned int hook,
 #endif
 
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_local_out_net(in, out)->ipv6.ip6table_filter);
+			     dev_net(out)->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 6e7131036bc..20bc52f13e4 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -72,7 +72,7 @@ ip6t_local_in_hook(unsigned int hook,
 		   int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_local_in_net(in, out)->ipv6.ip6table_security);
+			     dev_net(in)->ipv6.ip6table_security);
 }
 
 static unsigned int
@@ -83,7 +83,7 @@ ip6t_forward_hook(unsigned int hook,
 		  int (*okfn)(struct sk_buff *))
 {
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_forward_net(in, out)->ipv6.ip6table_security);
+			     dev_net(in)->ipv6.ip6table_security);
 }
 
 static unsigned int
@@ -95,7 +95,7 @@ ip6t_local_out_hook(unsigned int hook,
 {
 	/* TBD: handle short packets via raw socket */
 	return ip6t_do_table(skb, hook, in, out,
-			     nf_local_out_net(in, out)->ipv6.ip6table_security);
+			     dev_net(out)->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-- 
cgit v1.2.3-70-g09d2


From 1339dd91719f3e841b113ddaccd30fd87b9d2332 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:01 +0200
Subject: netfilter: netns: ip6table_raw in netns for real

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6table_raw.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 92b91077ac2..109fab6f831 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -45,25 +45,37 @@ static struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_hook(unsigned int hook,
+ip6t_pre_routing_hook(unsigned int hook,
 	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_raw);
+	return ip6t_do_table(skb, hook, in, out,
+			     dev_net(in)->ipv6.ip6table_raw);
+}
+
+static unsigned int
+ip6t_local_out_hook(unsigned int hook,
+	 struct sk_buff *skb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(skb, hook, in, out,
+			     dev_net(out)->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-	  .hook = ip6t_hook,
+	  .hook = ip6t_pre_routing_hook,
 	  .pf = PF_INET6,
 	  .hooknum = NF_INET_PRE_ROUTING,
 	  .priority = NF_IP6_PRI_FIRST,
 	  .owner = THIS_MODULE,
 	},
 	{
-	  .hook = ip6t_hook,
+	  .hook = ip6t_local_out_hook,
 	  .pf = PF_INET6,
 	  .hooknum = NF_INET_LOCAL_OUT,
 	  .priority = NF_IP6_PRI_FIRST,
-- 
cgit v1.2.3-70-g09d2


From 7dd1b8dad84c9561fe8949ed5db4de15aee877eb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:02 +0200
Subject: netfilter: netns: ip6table_mangle in netns for real

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6table_mangle.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f405cea21a8..d0b31b259d4 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -67,17 +67,29 @@ static struct xt_table packet_mangler = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6t_route_hook(unsigned int hook,
+ip6t_in_hook(unsigned int hook,
 	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle);
+	return ip6t_do_table(skb, hook, in, out,
+			     dev_net(in)->ipv6.ip6table_mangle);
 }
 
 static unsigned int
-ip6t_local_hook(unsigned int hook,
+ip6t_post_routing_hook(unsigned int hook,
+		struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(skb, hook, in, out,
+			     dev_net(out)->ipv6.ip6table_mangle);
+}
+
+static unsigned int
+ip6t_local_out_hook(unsigned int hook,
 		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
@@ -108,7 +120,8 @@ ip6t_local_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle);
+	ret = ip6t_do_table(skb, hook, in, out,
+			    dev_net(out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN
 		&& (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
@@ -122,35 +135,35 @@ ip6t_local_hook(unsigned int hook,
 
 static struct nf_hook_ops ip6t_ops[] __read_mostly = {
 	{
-		.hook		= ip6t_route_hook,
+		.hook		= ip6t_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_route_hook,
+		.hook		= ip6t_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_route_hook,
+		.hook		= ip6t_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_local_hook,
+		.hook		= ip6t_local_out_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_MANGLE,
 	},
 	{
-		.hook		= ip6t_route_hook,
+		.hook		= ip6t_post_routing_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET6,
 		.hooknum	= NF_INET_POST_ROUTING,
-- 
cgit v1.2.3-70-g09d2


From e10aad9998e463df8e25ec749538faf3324dd31b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:02 +0200
Subject: netfilter: netns: ip6t_REJECT in netns for real

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 672ad9ff3e2..f1a9fce1ec9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,7 +35,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
 /* Send RST reply */
-static void send_reset(struct sk_buff *oldskb)
+static void send_reset(struct net *net, struct sk_buff *oldskb)
 {
 	struct sk_buff *nskb;
 	struct tcphdr otcph, *tcph;
@@ -94,7 +94,7 @@ static void send_reset(struct sk_buff *oldskb)
 	fl.fl_ip_sport = otcph.dest;
 	fl.fl_ip_dport = otcph.source;
 	security_skb_classify_flow(oldskb, &fl);
-	dst = ip6_route_output(&init_net, NULL, &fl);
+	dst = ip6_route_output(net, NULL, &fl);
 	if (dst == NULL)
 		return;
 	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0))
@@ -163,10 +163,11 @@ static void send_reset(struct sk_buff *oldskb)
 }
 
 static inline void
-send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
+	     unsigned int hooknum)
 {
 	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
-		skb_in->dev = init_net.loopback_dev;
+		skb_in->dev = net->loopback_dev;
 
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
 }
@@ -177,6 +178,7 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in,
            const struct xt_target *target, const void *targinfo)
 {
 	const struct ip6t_reject_info *reject = targinfo;
+	struct net *net = dev_net(in ? in : out);
 
 	pr_debug("%s: medium point\n", __func__);
 	/* WARNING: This code causes reentry within ip6tables.
@@ -184,25 +186,25 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in,
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		send_unreach(skb, ICMPV6_NOROUTE, hooknum);
+		send_unreach(net, skb, ICMPV6_NOROUTE, hooknum);
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum);
+		send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, hooknum);
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+		send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum);
+		send_unreach(net, skb, ICMPV6_ADDR_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum);
+		send_unreach(net, skb, ICMPV6_PORT_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		send_reset(skb);
+		send_reset(net, skb);
 		break;
 	default:
 		if (net_ratelimit())
-- 
cgit v1.2.3-70-g09d2


From dfdb8d791877052bbb527d9688d94a064721d8f7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:02 +0200
Subject: netfilter: netns nf_conntrack: add netns boilerplate

One comment: #ifdefs around #include is necessary to overcome amazing compile
breakages in NOTRACK-in-netns patch (see below).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/net_namespace.h               |  6 ++++++
 include/net/netfilter/nf_conntrack_core.h |  4 ++--
 include/net/netns/conntrack.h             |  6 ++++++
 net/netfilter/nf_conntrack_core.c         |  4 ++--
 net/netfilter/nf_conntrack_expect.c       |  4 ++--
 net/netfilter/nf_conntrack_standalone.c   | 21 ++++++++++++++++++---
 6 files changed, 36 insertions(+), 9 deletions(-)
 create mode 100644 include/net/netns/conntrack.h

(limited to 'net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a8eb43cf0c7..708009be88b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -16,6 +16,9 @@
 #include <net/netns/ipv6.h>
 #include <net/netns/dccp.h>
 #include <net/netns/x_tables.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netns/conntrack.h>
+#endif
 
 struct proc_dir_entry;
 struct net_device;
@@ -67,6 +70,9 @@ struct net {
 #endif
 #ifdef CONFIG_NETFILTER
 	struct netns_xt		xt;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct netns_ct		ct;
+#endif
 #endif
 	struct net_generic	*gen;
 };
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 05760d6a706..532aa200cbc 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -24,8 +24,8 @@ extern unsigned int nf_conntrack_in(u_int8_t pf,
 				    unsigned int hooknum,
 				    struct sk_buff *skb);
 
-extern int nf_conntrack_init(void);
-extern void nf_conntrack_cleanup(void);
+extern int nf_conntrack_init(struct net *net);
+extern void nf_conntrack_cleanup(struct net *net);
 
 extern int nf_conntrack_proto_init(void);
 extern void nf_conntrack_proto_fini(void);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
new file mode 100644
index 00000000000..82d80b83477
--- /dev/null
+++ b/include/net/netns/conntrack.h
@@ -0,0 +1,6 @@
+#ifndef __NETNS_CONNTRACK_H
+#define __NETNS_CONNTRACK_H
+
+struct netns_ct {
+};
+#endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6aaf64b5ded..ee79e932589 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1006,7 +1006,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
 
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
-void nf_conntrack_cleanup(void)
+void nf_conntrack_cleanup(struct net *net)
 {
 	rcu_assign_pointer(ip_ct_attach, NULL);
 
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
-int __init nf_conntrack_init(void)
+int nf_conntrack_init(struct net *net)
 {
 	int max_factor = 8;
 	int ret;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 990fa12f2ee..e6a79f2a7c5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -537,7 +537,7 @@ static const struct file_operations exp_file_ops = {
 };
 #endif /* CONFIG_PROC_FS */
 
-static int __init exp_proc_init(void)
+static int exp_proc_init(void)
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc;
@@ -558,7 +558,7 @@ static void exp_proc_remove(void)
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 
-int __init nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(void)
 {
 	int err = -ENOMEM;
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 8509db14670..81dec17196d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -440,11 +440,26 @@ static void nf_conntrack_standalone_fini_sysctl(void)
 }
 #endif /* CONFIG_SYSCTL */
 
+static int nf_conntrack_net_init(struct net *net)
+{
+	return nf_conntrack_init(net);
+}
+
+static void nf_conntrack_net_exit(struct net *net)
+{
+	nf_conntrack_cleanup(net);
+}
+
+static struct pernet_operations nf_conntrack_net_ops = {
+	.init = nf_conntrack_net_init,
+	.exit = nf_conntrack_net_exit,
+};
+
 static int __init nf_conntrack_standalone_init(void)
 {
 	int ret;
 
-	ret = nf_conntrack_init();
+	ret = register_pernet_subsys(&nf_conntrack_net_ops);
 	if (ret < 0)
 		goto out;
 	ret = nf_conntrack_standalone_init_proc();
@@ -458,7 +473,7 @@ static int __init nf_conntrack_standalone_init(void)
 out_sysctl:
 	nf_conntrack_standalone_fini_proc();
 out_proc:
-	nf_conntrack_cleanup();
+	unregister_pernet_subsys(&nf_conntrack_net_ops);
 out:
 	return ret;
 }
@@ -467,7 +482,7 @@ static void __exit nf_conntrack_standalone_fini(void)
 {
 	nf_conntrack_standalone_fini_sysctl();
 	nf_conntrack_standalone_fini_proc();
-	nf_conntrack_cleanup();
+	unregister_pernet_subsys(&nf_conntrack_net_ops);
 }
 
 module_init(nf_conntrack_standalone_init);
-- 
cgit v1.2.3-70-g09d2


From 5a1fb391d881905e89623d78858d05b248cbc86a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:02 +0200
Subject: netfilter: netns nf_conntrack: add ->ct_net -- pointer from conntrack
 to netns

Conntrack (struct nf_conn) gets pointer to netns: ->ct_net -- netns in which
it was created. It comes from netdevice.

->ct_net is write-once field.

Every conntrack in system has ->ct_net initialized, no exceptions.

->ct_net doesn't pin netns: conntracks are recycled after timeouts and
pinning background traffic will prevent netns from even starting shutdown
sequence.

Right now every conntrack is created in init_net.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h | 18 ++++++++++++++++--
 net/netfilter/nf_conntrack_core.c    | 17 +++++++++++++----
 net/netfilter/nf_conntrack_netlink.c |  2 +-
 3 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0741ad592da..2b8d6efecf3 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -123,7 +123,9 @@ struct nf_conn
 
 	/* Extensions */
 	struct nf_ct_ext *ext;
-
+#ifdef CONFIG_NET_NS
+	struct net *ct_net;
+#endif
 	struct rcu_head rcu;
 };
 
@@ -147,6 +149,17 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct)
 /* get master conntrack via master expectation */
 #define master_ct(conntr) (conntr->master)
 
+extern struct net init_net;
+
+static inline struct net *nf_ct_net(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NET_NS
+	return ct->ct_net;
+#else
+	return &init_net;
+#endif
+}
+
 /* Alter reply tuple (maybe alter helper). */
 extern void
 nf_conntrack_alter_reply(struct nf_conn *ct,
@@ -251,7 +264,8 @@ extern void
 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
 extern void nf_conntrack_free(struct nf_conn *ct);
 extern struct nf_conn *
-nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+nf_conntrack_alloc(struct net *net,
+		   const struct nf_conntrack_tuple *orig,
 		   const struct nf_conntrack_tuple *repl,
 		   gfp_t gfp);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ee79e932589..cefc338f6e5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -464,7 +464,8 @@ static noinline int early_drop(unsigned int hash)
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp)
 {
@@ -503,6 +504,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
+#ifdef CONFIG_NET_NS
+	ct->ct_net = net;
+#endif
 	INIT_RCU_HEAD(&ct->rcu);
 
 	return ct;
@@ -528,7 +532,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
-init_conntrack(const struct nf_conntrack_tuple *tuple,
+init_conntrack(struct net *net,
+	       const struct nf_conntrack_tuple *tuple,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
@@ -544,7 +549,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
 	if (ct == NULL || IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -631,7 +636,8 @@ resolve_normal_ct(struct sk_buff *skb,
 	/* look for tuple match */
 	h = nf_conntrack_find_get(&tuple);
 	if (!h) {
-		h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
+		h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb,
+				   dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -1185,6 +1191,9 @@ int nf_conntrack_init(struct net *net)
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+	nf_conntrack_untracked.ct_net = &init_net;
+#endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a8752031adc..da3cdc8db70 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1125,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	struct nf_conn_help *help;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL);
+	ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL);
 	if (ct == NULL || IS_ERR(ct))
 		return -ENOMEM;
 
-- 
cgit v1.2.3-70-g09d2


From 49ac8713b6d064adf7474080fdccebd7cce76be0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:03 +0200
Subject: netfilter: netns nf_conntrack: per-netns conntrack count

Sysctls and proc files are stubbed to init_net's one. This is temporary.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h                  |  1 -
 include/net/netns/conntrack.h                         |  3 +++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c        |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c |  2 +-
 net/netfilter/nf_conntrack_core.c                     | 18 ++++++++----------
 net/netfilter/nf_conntrack_standalone.c               |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2b8d6efecf3..5999c5313d0 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -288,7 +288,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 extern unsigned int nf_conntrack_htable_size;
 extern int nf_conntrack_checksum;
-extern atomic_t nf_conntrack_count;
 extern int nf_conntrack_max;
 
 DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 82d80b83477..edf84714d7c 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -1,6 +1,9 @@
 #ifndef __NETNS_CONNTRACK_H
 #define __NETNS_CONNTRACK_H
 
+#include <asm/atomic.h>
+
 struct netns_ct {
+	atomic_t		count;
 };
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a955c44036..31abee3e29f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -254,7 +254,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_COUNT,
 		.procname	= "ip_conntrack_count",
-		.data		= &nf_conntrack_count,
+		.data		= &init_net.ct.count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 3a020720e40..4556805027f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -314,7 +314,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cefc338f6e5..8299b3490e7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -44,10 +44,6 @@
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
-/* nf_conntrack_standalone needs this */
-atomic_t nf_conntrack_count = ATOMIC_INIT(0);
-EXPORT_SYMBOL_GPL(nf_conntrack_count);
-
 unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
@@ -477,13 +473,13 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	}
 
 	/* We don't want any race condition at early drop stage */
-	atomic_inc(&nf_conntrack_count);
+	atomic_inc(&net->ct.count);
 
 	if (nf_conntrack_max &&
-	    unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) {
+	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
 		unsigned int hash = hash_conntrack(orig);
 		if (!early_drop(hash)) {
-			atomic_dec(&nf_conntrack_count);
+			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "nf_conntrack: table full, dropping"
@@ -495,7 +491,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
-		atomic_dec(&nf_conntrack_count);
+		atomic_dec(&net->ct.count);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -516,10 +512,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 static void nf_conntrack_free_rcu(struct rcu_head *head)
 {
 	struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
+	struct net *net = nf_ct_net(ct);
 
 	nf_ct_ext_free(ct);
 	kmem_cache_free(nf_conntrack_cachep, ct);
-	atomic_dec(&nf_conntrack_count);
+	atomic_dec(&net->ct.count);
 }
 
 void nf_conntrack_free(struct nf_conn *ct)
@@ -1024,7 +1021,7 @@ void nf_conntrack_cleanup(struct net *net)
 	nf_ct_event_cache_flush();
  i_see_dead_people:
 	nf_conntrack_flush();
-	if (atomic_read(&nf_conntrack_count) != 0) {
+	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
 	}
@@ -1148,6 +1145,7 @@ int nf_conntrack_init(struct net *net)
 		 * entries. */
 		max_factor = 4;
 	}
+	atomic_set(&net->ct.count, 0);
 	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 						  &nf_conntrack_vmalloc);
 	if (!nf_conntrack_hash) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 81dec17196d..021b505907d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -226,7 +226,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+	unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -338,7 +338,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_COUNT,
 		.procname	= "nf_conntrack_count",
-		.data		= &nf_conntrack_count,
+		.data		= &init_net.ct.count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
-- 
cgit v1.2.3-70-g09d2


From 400dad39d1c33fe797e47326d87a3f54d0ac5181 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:03 +0200
Subject: netfilter: netns nf_conntrack: per-netns conntrack hash

* make per-netns conntrack hash

  Other solution is to add ->ct_net pointer to tuplehashes and still has one
  hash, I tried that it's ugly and requires more code deep down in protocol
  modules et al.

* propagate netns pointer to where needed, e. g. to conntrack iterators.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h               |  6 +-
 include/net/netfilter/nf_conntrack_core.h          |  3 +-
 include/net/netns/conntrack.h                      |  2 +
 net/ipv4/netfilter/ipt_MASQUERADE.c                |  3 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c       |  2 +-
 net/ipv4/netfilter/nf_nat_core.c                   |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c     |  2 +-
 net/netfilter/nf_conntrack_core.c                  | 74 +++++++++++-----------
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               | 16 ++---
 net/netfilter/nf_conntrack_pptp.c                  |  2 +-
 net/netfilter/nf_conntrack_proto.c                 |  4 +-
 net/netfilter/nf_conntrack_standalone.c            |  4 +-
 net/netfilter/xt_connlimit.c                       |  2 +-
 16 files changed, 67 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5999c5313d0..f5447f14304 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -195,11 +195,11 @@ extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced,
 				 unsigned int size);
 
 extern struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(const struct nf_conntrack_tuple *tuple);
+__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
 
-extern void nf_conntrack_flush(void);
+extern void nf_conntrack_flush(struct net *net);
 
 extern bool nf_ct_get_tuplepr(const struct sk_buff *skb,
 			      unsigned int nhoff, u_int16_t l3num,
@@ -261,7 +261,7 @@ extern struct nf_conn nf_conntrack_untracked;
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
-nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
+nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data);
 extern void nf_conntrack_free(struct nf_conn *ct);
 extern struct nf_conn *
 nf_conntrack_alloc(struct net *net,
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 532aa200cbc..1c373564396 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -48,7 +48,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 /* Find a connection corresponding to a tuple. */
 extern struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple);
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern int __nf_conntrack_confirm(struct sk_buff *skb);
 
@@ -71,7 +71,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-extern struct hlist_head *nf_conntrack_hash;
 extern spinlock_t nf_conntrack_lock ;
 extern struct hlist_head unconfirmed;
 
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index edf84714d7c..b767683f112 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -5,5 +5,7 @@
 
 struct netns_ct {
 	atomic_t		count;
+	struct hlist_head	*hash;
+	int			hash_vmalloc;
 };
 #endif
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9a4822f8243..5e1c81791e5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -129,7 +129,8 @@ static int masq_device_event(struct notifier_block *this,
 		   and forget them. */
 		NF_CT_ASSERT(dev->ifindex != 0);
 
-		nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+		nf_ct_iterate_cleanup(&init_net, device_cmp,
+				      (void *)(long)dev->ifindex);
 	}
 
 	return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 31abee3e29f..03dd108015c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -323,7 +323,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 		return -EINVAL;
 	}
 
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(sock_net(sk), &tuple);
 	if (h) {
 		struct sockaddr_in sin;
 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 4556805027f..8e0afdc2b13 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		n = rcu_dereference(init_net.ct.hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -48,7 +48,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
 	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		head = rcu_dereference(init_net.ct.hash[st->bucket].first);
 	}
 	return head;
 }
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index da8edcdaef3..daf346377b6 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&innertuple);
+	h = nf_conntrack_find_get(&init_net, &innertuple);
 	if (!h) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 6c6a3cba8d5..5d4a5b70da2 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -643,7 +643,7 @@ static int clean_nat(struct nf_conn *i, void *data)
 
 static void __exit nf_nat_cleanup(void)
 {
-	nf_ct_iterate_cleanup(&clean_nat, NULL);
+	nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
 	nf_ct_l3proto_put(l3proto);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 5756f30ebc6..548cf4f15c0 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -156,7 +156,7 @@ icmpv6_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&intuple);
+	h = nf_conntrack_find_get(&init_net, &intuple);
 	if (!h) {
 		pr_debug("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8299b3490e7..da56b260552 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -50,15 +50,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct hlist_head *nf_conntrack_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_hash);
-
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 unsigned int nf_ct_log_invalid __read_mostly;
 HLIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc __read_mostly;
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 
 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
@@ -242,7 +238,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
 }
 
 struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_node *n;
@@ -252,7 +248,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
 	 * at least once for the stats anyway.
 	 */
 	local_bh_disable();
-	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
+	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
 			NF_CT_STAT_INC(found);
 			local_bh_enable();
@@ -268,13 +264,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
-	h = __nf_conntrack_find(tuple);
+	h = __nf_conntrack_find(net, tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
@@ -290,10 +286,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 				       unsigned int hash,
 				       unsigned int repl_hash)
 {
+	struct net *net = nf_ct_net(ct);
+
 	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
-			   &nf_conntrack_hash[hash]);
+			   &net->ct.hash[hash]);
 	hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
-			   &nf_conntrack_hash[repl_hash]);
+			   &net->ct.hash[repl_hash]);
 }
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -319,8 +317,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn_help *help;
 	struct hlist_node *n;
 	enum ip_conntrack_info ctinfo;
+	struct net *net;
 
 	ct = nf_ct_get(skb, &ctinfo);
+	net = nf_ct_net(ct);
 
 	/* ipt_REJECT uses nf_conntrack_attach to attach related
 	   ICMP/TCP RST packets in other direction.  Actual packet
@@ -347,11 +347,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode)
+	hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 				      &h->tuple))
 			goto out;
-	hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode)
+	hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 				      &h->tuple))
 			goto out;
@@ -394,6 +394,7 @@ int
 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 			 const struct nf_conn *ignored_conntrack)
 {
+	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_node *n;
 	unsigned int hash = hash_conntrack(tuple);
@@ -402,7 +403,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	 * least once for the stats anyway.
 	 */
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
+	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
 		    nf_ct_tuple_equal(tuple, &h->tuple)) {
 			NF_CT_STAT_INC(found);
@@ -421,7 +422,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static noinline int early_drop(unsigned int hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
 {
 	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
@@ -432,7 +433,7 @@ static noinline int early_drop(unsigned int hash)
 
 	rcu_read_lock();
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
+		hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
 			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
@@ -478,7 +479,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
 		unsigned int hash = hash_conntrack(orig);
-		if (!early_drop(hash)) {
+		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
@@ -631,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(&init_net, &tuple);
 	if (!h) {
 		h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb,
 				   dataoff);
@@ -941,7 +942,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 
 /* Bring out ya dead! */
 static struct nf_conn *
-get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
+get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 		void *data, unsigned int *bucket)
 {
 	struct nf_conntrack_tuple_hash *h;
@@ -950,7 +951,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 
 	spin_lock_bh(&nf_conntrack_lock);
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
+		hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
 				goto found;
@@ -969,13 +970,14 @@ found:
 	return ct;
 }
 
-void
-nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
+void nf_ct_iterate_cleanup(struct net *net,
+			   int (*iter)(struct nf_conn *i, void *data),
+			   void *data)
 {
 	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
-	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
@@ -1001,9 +1003,9 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush(void)
+void nf_conntrack_flush(struct net *net)
 {
-	nf_ct_iterate_cleanup(kill_all, NULL);
+	nf_ct_iterate_cleanup(net, kill_all, NULL);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush);
 
@@ -1020,7 +1022,7 @@ void nf_conntrack_cleanup(struct net *net)
 
 	nf_ct_event_cache_flush();
  i_see_dead_people:
-	nf_conntrack_flush();
+	nf_conntrack_flush(net);
 	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
@@ -1032,7 +1034,7 @@ void nf_conntrack_cleanup(struct net *net)
 	rcu_assign_pointer(nf_ct_destroy, NULL);
 
 	kmem_cache_destroy(nf_conntrack_cachep);
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 
 	nf_conntrack_acct_fini();
@@ -1097,8 +1099,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		while (!hlist_empty(&nf_conntrack_hash[i])) {
-			h = hlist_entry(nf_conntrack_hash[i].first,
+		while (!hlist_empty(&init_net.ct.hash[i])) {
+			h = hlist_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnode);
 			hlist_del_rcu(&h->hnode);
 			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
@@ -1106,12 +1108,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		}
 	}
 	old_size = nf_conntrack_htable_size;
-	old_vmalloced = nf_conntrack_vmalloc;
-	old_hash = nf_conntrack_hash;
+	old_vmalloced = init_net.ct.hash_vmalloc;
+	old_hash = init_net.ct.hash;
 
 	nf_conntrack_htable_size = hashsize;
-	nf_conntrack_vmalloc = vmalloced;
-	nf_conntrack_hash = hash;
+	init_net.ct.hash_vmalloc = vmalloced;
+	init_net.ct.hash = hash;
 	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
@@ -1146,9 +1148,9 @@ int nf_conntrack_init(struct net *net)
 		max_factor = 4;
 	}
 	atomic_set(&net->ct.count, 0);
-	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
-						  &nf_conntrack_vmalloc);
-	if (!nf_conntrack_hash) {
+	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+						  &net->ct.hash_vmalloc);
+	if (!net->ct.hash) {
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_out;
 	}
@@ -1207,7 +1209,7 @@ out_fini_proto:
 err_free_conntrack_slab:
 	kmem_cache_destroy(nf_conntrack_cachep);
 err_free_hash:
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
+	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_out:
 	return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8e0b4c8f62a..d91278dfdaf 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -159,7 +159,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	hlist_for_each_entry(h, n, &unconfirmed, hnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode)
+		hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
 			unhelp(h, me);
 	}
 	spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da3cdc8db70..918a3358a12 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -549,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]],
+		hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -794,14 +794,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush();
+		nf_conntrack_flush(&init_net);
 		return 0;
 	}
 
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(&init_net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -847,7 +847,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&tuple);
+	h = nf_conntrack_find_get(&init_net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1213,9 +1213,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(&otuple);
+		h = __nf_conntrack_find(&init_net, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(&rtuple);
+		h = __nf_conntrack_find(&init_net, &rtuple);
 
 	if (h == NULL) {
 		struct nf_conntrack_tuple master;
@@ -1230,7 +1230,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			if (err < 0)
 				goto out_unlock;
 
-			master_h = __nf_conntrack_find(&master);
+			master_h = __nf_conntrack_find(&init_net, &master);
 			if (master_h == NULL) {
 				err = -ENOENT;
 				goto out_unlock;
@@ -1670,7 +1670,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(&master_tuple);
+	h = nf_conntrack_find_get(&init_net, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 97e54b0e43a..7caf45b59d2 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -143,7 +143,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(t);
+	h = nf_conntrack_find_get(&init_net, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a49fc932629..3a2f7ef997f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -219,7 +219,7 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(kill_l3proto, proto);
+	nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
 
@@ -328,7 +328,7 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(kill_l4proto, l4proto);
+	nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 021b505907d..5456e4b9424 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		n = rcu_dereference(init_net.ct.hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -67,7 +67,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
 	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+		head = rcu_dereference(init_net.ct.hash[st->bucket].first);
 	}
 	return head;
 }
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d2453d182d6..bd00830ff69 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = __nf_conntrack_find(&conn->tuple);
+		found    = __nf_conntrack_find(&init_net, &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
-- 
cgit v1.2.3-70-g09d2


From b21f89019399ff75d9c239010e38b840eb6e01e7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:03 +0200
Subject: netfilter: netns: fix {ip,6}_route_me_harder() in netns

Take netns from skb->dst->dev. It should be safe because, they are called
from LOCAL_OUT hook where dst is valid (though, I'm not exactly sure about
IPVS and queueing packets to userspace).

[Patrick: its safe everywhere since they already expect skb->dst to be set]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter.c | 7 ++++---
 net/ipv6/netfilter.c | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 01671ad51ed..6efdb70b3eb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,6 +12,7 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 {
+	struct net *net = dev_net(skb->dst->dev);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
@@ -19,7 +20,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	unsigned int hh_len;
 	unsigned int type;
 
-	type = inet_addr_type(&init_net, iph->saddr);
+	type = inet_addr_type(net, iph->saddr);
 	if (skb->sk && inet_sk(skb->sk)->transparent)
 		type = RTN_LOCAL;
 	if (addr_type == RTN_UNSPEC)
@@ -36,7 +37,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 		fl.mark = skb->mark;
 		fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
-		if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
 		/* Drop old route. */
@@ -46,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
 		fl.nl_u.ip4_u.daddr = iph->saddr;
-		if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
 		odst = skb->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8c6c5e71f21..4cb4844a322 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		    .saddr = iph->saddr, } },
 	};
 
-	dst = ip6_route_output(&init_net, skb->sk, &fl);
+	dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl);
 
 #ifdef CONFIG_XFRM
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-- 
cgit v1.2.3-70-g09d2


From 9b03f38d0487f3908696242286d934c9b38f9d2a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:03 +0200
Subject: netfilter: netns nf_conntrack: per-netns expectations

Make per-netns a) expectation hash and b) expectations count.

Expectations always belongs to netns to which it's master conntrack belong.
This is natural and doesn't bloat expectation.

Proc files and leaf users are stubbed to init_net, this is temporary.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_expect.h        | 20 +++++---
 include/net/netns/conntrack.h                      |  3 ++
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  6 ++-
 net/ipv4/netfilter/nf_nat_pptp.c                   |  2 +-
 net/netfilter/nf_conntrack_core.c                  |  8 ++--
 net/netfilter/nf_conntrack_expect.c                | 55 +++++++++++-----------
 net/netfilter/nf_conntrack_h323_main.c             |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               | 13 ++---
 net/netfilter/nf_conntrack_pptp.c                  |  4 +-
 net/netfilter/nf_conntrack_sip.c                   |  2 +-
 11 files changed, 66 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 4c4d894cb9b..37a7fc1164b 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,7 +6,6 @@
 #define _NF_CONNTRACK_EXPECT_H
 #include <net/netfilter/nf_conntrack.h>
 
-extern struct hlist_head *nf_ct_expect_hash;
 extern unsigned int nf_ct_expect_hsize;
 extern unsigned int nf_ct_expect_max;
 
@@ -56,6 +55,15 @@ struct nf_conntrack_expect
 	struct rcu_head rcu;
 };
 
+static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
+{
+#ifdef CONFIG_NET_NS
+	return exp->master->ct_net;	/* by definition */
+#else
+	return &init_net;
+#endif
+}
+
 struct nf_conntrack_expect_policy
 {
 	unsigned int	max_expected;
@@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy
 #define NF_CT_EXPECT_PERMANENT	0x1
 #define NF_CT_EXPECT_INACTIVE	0x2
 
-int nf_conntrack_expect_init(void);
-void nf_conntrack_expect_fini(void);
+int nf_conntrack_expect_init(struct net *net);
+void nf_conntrack_expect_fini(struct net *net);
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
 void nf_ct_remove_expectations(struct nf_conn *ct);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index b767683f112..e453a33f3e9 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -5,7 +5,10 @@
 
 struct netns_ct {
 	atomic_t		count;
+	unsigned int		expect_count;
 	struct hlist_head	*hash;
+	struct hlist_head	*expect_hash;
 	int			hash_vmalloc;
+	int			expect_vmalloc;
 };
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8e0afdc2b13..f8636a57e8c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -177,11 +177,12 @@ struct ct_expect_iter_state {
 
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
+	struct net *net = &init_net;
 	struct ct_expect_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -191,13 +192,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 					     struct hlist_node *head)
 {
+	struct net *net = &init_net;
 	struct ct_expect_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 	}
 	return head;
 }
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index da3d91a5ef5..e4bdddc6034 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -73,7 +73,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	pr_debug("trying to unexpect other dir: ");
 	nf_ct_dump_tuple_ip(&t);
-	other_exp = nf_ct_expect_find_get(&t);
+	other_exp = nf_ct_expect_find_get(&init_net, &t);
 	if (other_exp) {
 		nf_ct_unexpect_related(other_exp);
 		nf_ct_expect_put(other_exp);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da56b260552..c188edea249 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -562,7 +562,7 @@ init_conntrack(struct net *net,
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = nf_ct_find_expectation(tuple);
+	exp = nf_ct_find_expectation(net, tuple);
 	if (exp) {
 		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 			 ct, exp);
@@ -1038,7 +1038,7 @@ void nf_conntrack_cleanup(struct net *net)
 			     nf_conntrack_htable_size);
 
 	nf_conntrack_acct_fini();
-	nf_conntrack_expect_fini();
+	nf_conntrack_expect_fini(net);
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 }
@@ -1173,7 +1173,7 @@ int nf_conntrack_init(struct net *net)
 	if (ret < 0)
 		goto err_free_conntrack_slab;
 
-	ret = nf_conntrack_expect_init();
+	ret = nf_conntrack_expect_init(net);
 	if (ret < 0)
 		goto out_fini_proto;
 
@@ -1203,7 +1203,7 @@ int nf_conntrack_init(struct net *net)
 out_fini_helper:
 	nf_conntrack_helper_fini();
 out_fini_expect:
-	nf_conntrack_expect_fini();
+	nf_conntrack_expect_fini(net);
 out_fini_proto:
 	nf_conntrack_proto_fini();
 err_free_conntrack_slab:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e6a79f2a7c5..5307316356e 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -28,17 +28,12 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-struct hlist_head *nf_ct_expect_hash __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
 
 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
-static unsigned int nf_ct_expect_count;
 unsigned int nf_ct_expect_max __read_mostly;
 static int nf_ct_expect_hash_rnd_initted __read_mostly;
-static int nf_ct_expect_vmalloc;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
@@ -46,12 +41,13 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct net *net = nf_ct_exp_net(exp);
 
 	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
-	nf_ct_expect_count--;
+	net->ct.expect_count--;
 
 	hlist_del(&exp->lnode);
 	master_help->expecting[exp->class]--;
@@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 	struct hlist_node *n;
 	unsigned int h;
 
-	if (!nf_ct_expect_count)
+	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
 			return i;
 	}
@@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	rcu_read_lock();
-	i = __nf_ct_expect_find(tuple);
+	i = __nf_ct_expect_find(net, tuple);
 	if (i && !atomic_inc_not_zero(&i->use))
 		i = NULL;
 	rcu_read_unlock();
@@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
 	struct hlist_node *n;
 	unsigned int h;
 
-	if (!nf_ct_expect_count)
+	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 			exp = i;
@@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct net *net = nf_ct_exp_net(exp);
 	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
@@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	hlist_add_head(&exp->lnode, &master_help->expectations);
 	master_help->expecting[exp->class]++;
 
-	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	nf_ct_expect_count++;
+	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+	net->ct.expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
@@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
+	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *n;
 	unsigned int h;
 	int ret;
@@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
+	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
 			if (refresh_timer(i)) {
@@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 		}
 	}
 
-	if (nf_ct_expect_count >= nf_ct_expect_max) {
+	if (net->ct.expect_count >= nf_ct_expect_max) {
 		if (net_ratelimit())
 			printk(KERN_WARNING
 			       "nf_conntrack: expectation table full\n");
@@ -430,11 +428,12 @@ struct ct_expect_iter_state {
 
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
+	struct net *net = &init_net;
 	struct ct_expect_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -444,13 +443,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 					     struct hlist_node *head)
 {
+	struct net *net = &init_net;
 	struct ct_expect_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 	}
 	return head;
 }
@@ -558,7 +558,7 @@ static void exp_proc_remove(void)
 
 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 
-int nf_conntrack_expect_init(void)
+int nf_conntrack_expect_init(struct net *net)
 {
 	int err = -ENOMEM;
 
@@ -569,9 +569,10 @@ int nf_conntrack_expect_init(void)
 	}
 	nf_ct_expect_max = nf_ct_expect_hsize * 4;
 
-	nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-						  &nf_ct_expect_vmalloc);
-	if (nf_ct_expect_hash == NULL)
+	net->ct.expect_count = 0;
+	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+						  &net->ct.expect_vmalloc);
+	if (net->ct.expect_hash == NULL)
 		goto err1;
 
 	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
@@ -589,16 +590,16 @@ int nf_conntrack_expect_init(void)
 err3:
 	kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
-	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
 err1:
 	return err;
 }
 
-void nf_conntrack_expect_fini(void)
+void nf_conntrack_expect_fini(struct net *net)
 {
 	exp_proc_remove();
 	kmem_cache_destroy(nf_ct_expect_cachep);
-	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 5dc0478108a..dfb826c973d 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1219,7 +1219,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(&tuple);
+	exp = __nf_ct_expect_find(&init_net, &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index d91278dfdaf..c793db810cd 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
 		hlist_for_each_entry_safe(exp, n, next,
-					  &nf_ct_expect_hash[i], hnode) {
+					  &init_net.ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
 			if ((help->helper == me || exp->helper == me) &&
 			    del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 918a3358a12..cadfd15b44f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1458,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
 static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = &init_net;
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
 	struct hlist_node *n;
@@ -1467,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conntrack_expect *)cb->args[1];
 	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]],
+		hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
 				     hnode) {
 			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
@@ -1529,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(&tuple);
+	exp = nf_ct_expect_find_get(&init_net, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1583,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(&tuple);
+		exp = nf_ct_expect_find_get(&init_net, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1613,7 +1614,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &nf_ct_expect_hash[i],
+						  &init_net.ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
 				if (m_help->helper == h
@@ -1629,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &nf_ct_expect_hash[i],
+						  &init_net.ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
@@ -1724,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(&tuple);
+	exp = __nf_ct_expect_find(&init_net, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7caf45b59d2..5db7df5d19b 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(&inv_t);
+		exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -154,7 +154,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(t);
+		exp = nf_ct_expect_find_get(&init_net, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1fa306be60f..a006080eb38 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -775,7 +775,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(&tuple);
+		exp = __nf_ct_expect_find(&init_net, &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
-- 
cgit v1.2.3-70-g09d2


From 63c9a26264be108b52de087724673f8664570e34 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:04 +0200
Subject: netfilter: netns nf_conntrack: per-netns unconfirmed list

What is confirmed connection in one netns can very well be unconfirmed
in another one.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_core.h | 1 -
 include/net/netns/conntrack.h             | 2 ++
 net/netfilter/nf_conntrack_core.c         | 7 ++++---
 net/netfilter/nf_conntrack_helper.c       | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 1c373564396..b4b45c541da 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l4proto *proto);
 
 extern spinlock_t nf_conntrack_lock ;
-extern struct hlist_head unconfirmed;
 
 #endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e453a33f3e9..6ddf58e142a 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -1,6 +1,7 @@
 #ifndef __NETNS_CONNTRACK_H
 #define __NETNS_CONNTRACK_H
 
+#include <linux/list.h>
 #include <asm/atomic.h>
 
 struct netns_ct {
@@ -8,6 +9,7 @@ struct netns_ct {
 	unsigned int		expect_count;
 	struct hlist_head	*hash;
 	struct hlist_head	*expect_hash;
+	struct hlist_head	unconfirmed;
 	int			hash_vmalloc;
 	int			expect_vmalloc;
 };
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c188edea249..2a105db1330 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,7 +54,6 @@ struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 unsigned int nf_ct_log_invalid __read_mostly;
-HLIST_HEAD(unconfirmed);
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 
 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
@@ -596,7 +595,8 @@ init_conntrack(struct net *net,
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
+	hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+		       &net->ct.unconfirmed);
 
 	spin_unlock_bh(&nf_conntrack_lock);
 
@@ -957,7 +957,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 				goto found;
 		}
 	}
-	hlist_for_each_entry(h, n, &unconfirmed, hnode) {
+	hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (iter(ct, data))
 			set_bit(IPS_DYING_BIT, &ct->status);
@@ -1154,6 +1154,7 @@ int nf_conntrack_init(struct net *net)
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
 		goto err_out;
 	}
+	INIT_HLIST_HEAD(&net->ct.unconfirmed);
 
 	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c793db810cd..920e778539a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -156,7 +156,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	hlist_for_each_entry(h, n, &unconfirmed, hnode)
+	hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
 		hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
-- 
cgit v1.2.3-70-g09d2


From a702a65fc1376fc1f6757ec2a6960348af3f1876 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:04 +0200
Subject: netfilter: netns nf_conntrack: pass netns pointer to
 nf_conntrack_in()

It's deducible from skb->dev or skb->dst->dev, but we know netns at
the moment of call, so pass it down and use for finding and creating
conntracks.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_core.h      |  3 ++-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  4 ++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 24 ++++++++++++++++--------
 net/netfilter/nf_conntrack_core.c              | 15 ++++++++-------
 4 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index b4b45c541da..e78afe7f28e 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,7 +20,8 @@
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
-extern unsigned int nf_conntrack_in(u_int8_t pf,
+extern unsigned int nf_conntrack_in(struct net *net,
+				    u_int8_t pf,
 				    unsigned int hooknum,
 				    struct sk_buff *skb);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 03dd108015c..2e4dd3fb002 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -172,7 +172,7 @@ static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return nf_conntrack_in(PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
 }
 
 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
@@ -188,7 +188,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
-	return nf_conntrack_in(PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 85050c072ab..e91db16611d 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	return NF_STOLEN;
 }
 
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
-				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+static unsigned int __ipv6_conntrack_in(struct net *net,
+					unsigned int hooknum,
+					struct sk_buff *skb,
+					int (*okfn)(struct sk_buff *))
 {
 	struct sk_buff *reasm = skb->nfct_reasm;
 
@@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
 		if (!reasm->nfct) {
 			unsigned int ret;
 
-			ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
+			ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
@@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
 		return NF_ACCEPT;
 	}
 
-	return nf_conntrack_in(PF_INET6, hooknum, skb);
+	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
+}
+
+static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
 }
 
 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 			printk("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
+	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2a105db1330..5c96d9732c7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -611,7 +611,8 @@ init_conntrack(struct net *net,
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
 static inline struct nf_conn *
-resolve_normal_ct(struct sk_buff *skb,
+resolve_normal_ct(struct net *net,
+		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
@@ -632,10 +633,9 @@ resolve_normal_ct(struct sk_buff *skb,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(&init_net, &tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h) {
-		h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb,
-				   dataoff);
+		h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -669,7 +669,8 @@ resolve_normal_ct(struct sk_buff *skb,
 }
 
 unsigned int
-nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)
+nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
+		struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -709,8 +710,8 @@ nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)
 		return -ret;
 	}
 
-	ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
-			       &set_reply, &ctinfo);
+	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
 		NF_CT_STAT_INC_ATOMIC(invalid);
-- 
cgit v1.2.3-70-g09d2


From 74c51a1497033e6ff7b8096797daca233a4a30df Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:05 +0200
Subject: netfilter: netns nf_conntrack: pass netns pointer to L4 protocol's
 ->error hook

Again, it's deducible from skb, but we're going to use it for
nf_conntrack_checksum and statistics, so just pass it from upper layer.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_l4proto.h   |  2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  8 ++++----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  9 +++++----
 net/netfilter/nf_conntrack_core.c              | 12 +++++++-----
 net/netfilter/nf_conntrack_proto_dccp.c        |  6 +++---
 net/netfilter/nf_conntrack_proto_tcp.c         |  3 ++-
 net/netfilter/nf_conntrack_proto_udp.c         |  2 +-
 net/netfilter/nf_conntrack_proto_udplite.c     |  4 +++-
 8 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d4376e97bae..97723d33c95 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -50,7 +50,7 @@ struct nf_conntrack_l4proto
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
 
-	int (*error)(struct sk_buff *skb, unsigned int dataoff,
+	int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
 		     u_int8_t pf, unsigned int hooknum);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index daf346377b6..8c7ed5bc959 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
-icmp_error_message(struct sk_buff *skb,
+icmp_error_message(struct net *net, struct sk_buff *skb,
 		 enum ip_conntrack_info *ctinfo,
 		 unsigned int hooknum)
 {
@@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&init_net, &innertuple);
+	h = nf_conntrack_find_get(net, &innertuple);
 	if (!h) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
@@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb,
 
 /* Small and modified version of icmp_rcv */
 static int
-icmp_error(struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	   enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmphdr *icmph;
@@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
 	    && icmph->type != ICMP_REDIRECT)
 		return NF_ACCEPT;
 
-	return icmp_error_message(skb, ctinfo, hooknum);
+	return icmp_error_message(net, skb, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 548cf4f15c0..aabddfe2127 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -122,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 static int
-icmpv6_error_message(struct sk_buff *skb,
+icmpv6_error_message(struct net *net,
+		     struct sk_buff *skb,
 		     unsigned int icmp6off,
 		     enum ip_conntrack_info *ctinfo,
 		     unsigned int hooknum)
@@ -156,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(&init_net, &intuple);
+	h = nf_conntrack_find_get(net, &intuple);
 	if (!h) {
 		pr_debug("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
@@ -172,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb,
 }
 
 static int
-icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
+icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	     enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
 	const struct icmp6hdr *icmp6h;
@@ -197,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
+	return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5c96d9732c7..251f020c7c1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -703,11 +703,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
-	if (l4proto->error != NULL &&
-	    (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
-		NF_CT_STAT_INC_ATOMIC(error);
-		NF_CT_STAT_INC_ATOMIC(invalid);
-		return -ret;
+	if (l4proto->error != NULL) {
+		ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+		if (ret <= 0) {
+			NF_CT_STAT_INC_ATOMIC(error);
+			NF_CT_STAT_INC_ATOMIC(invalid);
+			return -ret;
+		}
 	}
 
 	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index edc30358dc1..6ead8da3e9e 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -545,9 +545,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static int dccp_error(struct sk_buff *skb, unsigned int dataoff,
-		      enum ip_conntrack_info *ctinfo, u_int8_t pf,
-		      unsigned int hooknum)
+static int dccp_error(struct net *net, struct sk_buff *skb,
+		      unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+		      u_int8_t pf, unsigned int hooknum)
 {
 	struct dccp_hdr _dh, *dh;
 	unsigned int dccp_len = skb->len - dataoff;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 539a8202025..4e71de2405f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -746,7 +746,8 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct sk_buff *skb,
+static int tcp_error(struct net *net,
+		     struct sk_buff *skb,
 		     unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
 		     u_int8_t pf,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 2a965c4a0ea..8a245beb2c9 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -89,7 +89,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udp_error(struct sk_buff *skb, unsigned int dataoff,
+static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
 		     u_int8_t pf,
 		     unsigned int hooknum)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4fb6c8d83a8..981701919a7 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -89,7 +89,9 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
+static int udplite_error(struct net *net,
+			 struct sk_buff *skb,
+			 unsigned int dataoff,
 			 enum ip_conntrack_info *ctinfo,
 			 u_int8_t pf,
 			 unsigned int hooknum)
-- 
cgit v1.2.3-70-g09d2


From b2ce2c7479d9b60dd268203e56bb738e78fd5fda Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:05 +0200
Subject: netfilter: netns nf_conntrack: per-netns /proc/net/nf_conntrack,
 /proc/net/stat/nf_conntrack

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_standalone.c | 51 ++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5456e4b9424..02eaf872277 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -40,18 +40,20 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 EXPORT_SYMBOL_GPL(print_tuple);
 
 struct ct_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -61,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 static struct hlist_node *ct_get_next(struct seq_file *seq,
 				      struct hlist_node *head)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+		head = rcu_dereference(net->ct.hash[st->bucket].first);
 	}
 	return head;
 }
@@ -177,7 +180,7 @@ static const struct seq_operations ct_seq_ops = {
 
 static int ct_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ct_seq_ops,
+	return seq_open_net(inode, file, &ct_seq_ops,
 			sizeof(struct ct_iter_state));
 }
 
@@ -186,7 +189,7 @@ static const struct file_operations ct_file_ops = {
 	.open    = ct_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
@@ -277,38 +280,38 @@ static const struct file_operations ct_cpu_seq_fops = {
 	.release = seq_release,
 };
 
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops);
+	pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
 	if (!pde)
 		goto out_nf_conntrack;
 
-	pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat,
+	pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
 			  &ct_cpu_seq_fops);
 	if (!pde)
 		goto out_stat_nf_conntrack;
 	return 0;
 
 out_stat_nf_conntrack:
-	proc_net_remove(&init_net, "nf_conntrack");
+	proc_net_remove(net, "nf_conntrack");
 out_nf_conntrack:
 	return -ENOMEM;
 }
 
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
 {
-	remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
-	proc_net_remove(&init_net, "nf_conntrack");
+	remove_proc_entry("nf_conntrack", net->proc_net_stat);
+	proc_net_remove(net, "nf_conntrack");
 }
 #else
-static int nf_conntrack_standalone_init_proc(void)
+static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	return 0;
 }
 
-static void nf_conntrack_standalone_fini_proc(void)
+static void nf_conntrack_standalone_fini_proc(struct net *net)
 {
 }
 #endif /* CONFIG_PROC_FS */
@@ -442,11 +445,25 @@ static void nf_conntrack_standalone_fini_sysctl(void)
 
 static int nf_conntrack_net_init(struct net *net)
 {
-	return nf_conntrack_init(net);
+	int ret;
+
+	ret = nf_conntrack_init(net);
+	if (ret < 0)
+		goto out_init;
+	ret = nf_conntrack_standalone_init_proc(net);
+	if (ret < 0)
+		goto out_proc;
+	return 0;
+
+out_proc:
+	nf_conntrack_cleanup(net);
+out_init:
+	return ret;
 }
 
 static void nf_conntrack_net_exit(struct net *net)
 {
+	nf_conntrack_standalone_fini_proc(net);
 	nf_conntrack_cleanup(net);
 }
 
@@ -462,17 +479,12 @@ static int __init nf_conntrack_standalone_init(void)
 	ret = register_pernet_subsys(&nf_conntrack_net_ops);
 	if (ret < 0)
 		goto out;
-	ret = nf_conntrack_standalone_init_proc();
-	if (ret < 0)
-		goto out_proc;
 	ret = nf_conntrack_standalone_init_sysctl();
 	if (ret < 0)
 		goto out_sysctl;
 	return 0;
 
 out_sysctl:
-	nf_conntrack_standalone_fini_proc();
-out_proc:
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
 out:
 	return ret;
@@ -481,7 +493,6 @@ out:
 static void __exit nf_conntrack_standalone_fini(void)
 {
 	nf_conntrack_standalone_fini_sysctl();
-	nf_conntrack_standalone_fini_proc();
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From dc5129f8df7cc3f2f04b322728d71c42795d34cc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:06 +0200
Subject: netfilter: netns nf_conntrack: per-netns
 /proc/net/nf_conntrack_expect

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5307316356e..6a09200049e 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -423,12 +423,13 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
-	struct net *net = &init_net;
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 	struct hlist_node *n;
 
@@ -443,7 +444,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 					     struct hlist_node *head)
 {
-	struct net *net = &init_net;
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
@@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = {
 
 static int exp_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &exp_seq_ops,
+	return seq_open_net(inode, file, &exp_seq_ops,
 			sizeof(struct ct_expect_iter_state));
 }
 
@@ -533,26 +534,26 @@ static const struct file_operations exp_file_ops = {
 	.open    = exp_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 #endif /* CONFIG_PROC_FS */
 
-static int exp_proc_init(void)
+static int exp_proc_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc;
 
-	proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
+	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
 	if (!proc)
 		return -ENOMEM;
 #endif /* CONFIG_PROC_FS */
 	return 0;
 }
 
-static void exp_proc_remove(void)
+static void exp_proc_remove(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_net_remove(&init_net, "nf_conntrack_expect");
+	proc_net_remove(net, "nf_conntrack_expect");
 #endif /* CONFIG_PROC_FS */
 }
 
@@ -581,7 +582,7 @@ int nf_conntrack_expect_init(struct net *net)
 	if (!nf_ct_expect_cachep)
 		goto err2;
 
-	err = exp_proc_init();
+	err = exp_proc_init(net);
 	if (err < 0)
 		goto err3;
 
@@ -598,7 +599,7 @@ err1:
 
 void nf_conntrack_expect_fini(struct net *net)
 {
-	exp_proc_remove();
+	exp_proc_remove(net);
 	kmem_cache_destroy(nf_ct_expect_cachep);
 	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
-- 
cgit v1.2.3-70-g09d2


From 5e6b29972b7e9c9c39882227e36fe0cd3463fe96 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:06 +0200
Subject: netfilter: netns nf_conntrack: per-netns /proc/net/ip_conntrack,
 /proc/net/stat/ip_conntrack, /proc/net/ip_conntrack_expect

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   | 57 ++++++++++++++--------
 1 file changed, 38 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f8636a57e8c..b2940836d10 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -21,18 +21,20 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 
 struct ct_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 	struct hlist_node *n;
 
 	for (st->bucket = 0;
 	     st->bucket < nf_conntrack_htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (n)
 			return n;
 	}
@@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 static struct hlist_node *ct_get_next(struct seq_file *seq,
 				      struct hlist_node *head)
 {
+	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
 	while (head == NULL) {
 		if (++st->bucket >= nf_conntrack_htable_size)
 			return NULL;
-		head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+		head = rcu_dereference(net->ct.hash[st->bucket].first);
 	}
 	return head;
 }
@@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = {
 
 static int ct_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ct_seq_ops,
-			sizeof(struct ct_iter_state));
+	return seq_open_net(inode, file, &ct_seq_ops,
+			    sizeof(struct ct_iter_state));
 }
 
 static const struct file_operations ct_file_ops = {
@@ -167,17 +170,18 @@ static const struct file_operations ct_file_ops = {
 	.open    = ct_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 /* expects */
 struct ct_expect_iter_state {
+	struct seq_net_private p;
 	unsigned int bucket;
 };
 
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
-	struct net *net = &init_net;
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 	struct hlist_node *n;
 
@@ -192,7 +196,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 					     struct hlist_node *head)
 {
-	struct net *net = &init_net;
+	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
 	head = rcu_dereference(head->next);
@@ -267,8 +271,8 @@ static const struct seq_operations exp_seq_ops = {
 
 static int exp_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &exp_seq_ops,
-			sizeof(struct ct_expect_iter_state));
+	return seq_open_net(inode, file, &exp_seq_ops,
+			    sizeof(struct ct_expect_iter_state));
 }
 
 static const struct file_operations ip_exp_file_ops = {
@@ -276,7 +280,7 @@ static const struct file_operations ip_exp_file_ops = {
 	.open    = exp_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
@@ -367,36 +371,51 @@ static const struct file_operations ct_cpu_seq_fops = {
 	.release = seq_release,
 };
 
-int __init nf_conntrack_ipv4_compat_init(void)
+static int __net_init ip_conntrack_net_init(struct net *net)
 {
 	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
 
-	proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
+	proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
 	if (!proc)
 		goto err1;
 
-	proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
+	proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
 					&ip_exp_file_ops);
 	if (!proc_exp)
 		goto err2;
 
 	proc_stat = proc_create("ip_conntrack", S_IRUGO,
-				init_net.proc_net_stat, &ct_cpu_seq_fops);
+				net->proc_net_stat, &ct_cpu_seq_fops);
 	if (!proc_stat)
 		goto err3;
 	return 0;
 
 err3:
-	proc_net_remove(&init_net, "ip_conntrack_expect");
+	proc_net_remove(net, "ip_conntrack_expect");
 err2:
-	proc_net_remove(&init_net, "ip_conntrack");
+	proc_net_remove(net, "ip_conntrack");
 err1:
 	return -ENOMEM;
 }
 
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+	remove_proc_entry("ip_conntrack", net->proc_net_stat);
+	proc_net_remove(net, "ip_conntrack_expect");
+	proc_net_remove(net, "ip_conntrack");
+}
+
+static struct pernet_operations ip_conntrack_net_ops = {
+	.init = ip_conntrack_net_init,
+	.exit = ip_conntrack_net_exit,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+	return register_pernet_subsys(&ip_conntrack_net_ops);
+}
+
 void __exit nf_conntrack_ipv4_compat_fini(void)
 {
-	remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
-	proc_net_remove(&init_net, "ip_conntrack_expect");
-	proc_net_remove(&init_net, "ip_conntrack");
+	unregister_pernet_subsys(&ip_conntrack_net_ops);
 }
-- 
cgit v1.2.3-70-g09d2


From b76a461f11eb5f32d37a9c8eae7b2f3b3f261b43 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:06 +0200
Subject: netns: export netns list

Conntrack code will use it for
a) removing expectations and helpers when corresponding module is removed, and
b) removing conntracks when L3 protocol conntrack module is removed.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/core/net_namespace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7c52fe277b6..b0dc818a91d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list;
 static DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
+EXPORT_SYMBOL_GPL(net_namespace_list);
 
 struct net init_net;
 EXPORT_SYMBOL(init_net);
-- 
cgit v1.2.3-70-g09d2


From 68047937677f2dffb5c47b57ce8baba5714b2142 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:06 +0200
Subject: netfilter: netns nf_conntrack: unregister helper in every netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_helper.c | 40 ++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 920e778539a..9c06b9f86ad 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -123,29 +123,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
+					     struct net *net)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	const struct hlist_node *n, *next;
 	unsigned int i;
 
-	mutex_lock(&nf_ct_helper_mutex);
-	hlist_del_rcu(&me->hnode);
-	nf_ct_helper_count--;
-	mutex_unlock(&nf_ct_helper_mutex);
-
-	/* Make sure every nothing is still using the helper unless its a
-	 * connection in the hash.
-	 */
-	synchronize_rcu();
-
-	spin_lock_bh(&nf_conntrack_lock);
-
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
 		hlist_for_each_entry_safe(exp, n, next,
-					  &init_net.ct.expect_hash[i], hnode) {
+					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
 			if ((help->helper == me || exp->helper == me) &&
 			    del_timer(&exp->timeout)) {
@@ -156,12 +145,31 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode)
+	hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
-		hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
+		hlist_for_each_entry(h, n, &net->ct.hash[i], hnode)
 			unhelp(h, me);
 	}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+	struct net *net;
+
+	mutex_lock(&nf_ct_helper_mutex);
+	hlist_del_rcu(&me->hnode);
+	nf_ct_helper_count--;
+	mutex_unlock(&nf_ct_helper_mutex);
+
+	/* Make sure every nothing is still using the helper unless its a
+	 * connection in the hash.
+	 */
+	synchronize_rcu();
+
+	spin_lock_bh(&nf_conntrack_lock);
+	for_each_net(net)
+		__nf_conntrack_helper_unregister(me, net);
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
-- 
cgit v1.2.3-70-g09d2


From 678d66753091a4102910392fb6198a6c6ce7f510 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:07 +0200
Subject: netfilter: netns nf_conntrack: cleanup after L3 and L4 proto
 unregister in every netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 3a2f7ef997f..a59a307e685 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -207,6 +207,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
 
 void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
+	struct net *net;
+
 	BUG_ON(proto->l3proto >= AF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -219,7 +221,8 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto);
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, kill_l3proto, proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
 
@@ -316,6 +319,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
 
 void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
+	struct net *net;
+
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -328,7 +333,8 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto);
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
 
-- 
cgit v1.2.3-70-g09d2


From a71996fccce4b2086a26036aa3c915365ca36926 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:07 +0200
Subject: netfilter: netns nf_conntrack: pass conntrack to
 nf_conntrack_event_cache() not skb

This is cleaner, we already know conntrack to which event is relevant.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_ecache.h    |  4 +---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  2 +-
 net/ipv4/netfilter/nf_nat_helper.c             |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 10 +++++-----
 net/netfilter/nf_conntrack_ftp.c               |  9 +++++----
 net/netfilter/nf_conntrack_proto_gre.c         |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c        |  4 ++--
 net/netfilter/nf_conntrack_proto_tcp.c         |  6 +++---
 net/netfilter/nf_conntrack_proto_udp.c         |  2 +-
 net/netfilter/nf_conntrack_proto_udplite.c     |  2 +-
 net/netfilter/xt_CONNMARK.c                    |  8 ++++----
 net/netfilter/xt_CONNSECMARK.c                 |  2 +-
 13 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index f0b9078235c..c1b406cecf9 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct);
 extern void nf_ct_event_cache_flush(void);
 
 static inline void
-nf_conntrack_event_cache(enum ip_conntrack_events event,
-			 const struct sk_buff *skb)
+nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 {
-	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 8c7ed5bc959..205ba399d4a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 112dcfa1290..cf7a42bf982 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -193,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
 					ct, CTINFO2DIR(ctinfo));
 
-		nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
+		nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
 	}
 	return 1;
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index aabddfe2127..df04de91e6e 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -93,7 +93,7 @@ static int icmpv6_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
 	}
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 251f020c7c1..01f59c57730 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -370,14 +370,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	spin_unlock_bh(&nf_conntrack_lock);
 	help = nfct_help(ct);
 	if (help && help->helper)
-		nf_conntrack_event_cache(IPCT_HELPER, skb);
+		nf_conntrack_event_cache(IPCT_HELPER, ct);
 #ifdef CONFIG_NF_NAT_NEEDED
 	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
 	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_NATINFO, skb);
+		nf_conntrack_event_cache(IPCT_NATINFO, ct);
 #endif
 	nf_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, skb);
+				 IPCT_RELATED : IPCT_NEW, ct);
 	return NF_ACCEPT;
 
 out:
@@ -740,7 +740,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 
 	return ret;
 }
@@ -853,7 +853,7 @@ acct:
 
 	/* must be unlocked when calling event cache */
 	if (event)
-		nf_conntrack_event_cache(event, skb);
+		nf_conntrack_event_cache(event, ct);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index bb20672fe03..4f7107107e9 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir)
 }
 
 /* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
+static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
+			  struct nf_ct_ftp_master *info, int dir,
 			  struct sk_buff *skb)
 {
 	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
 
 	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
 		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	} else if (oldest != NUM_SEQ_TO_REMEMBER &&
 		   after(nl_seq, info->seq_aft_nl[dir][oldest])) {
 		info->seq_aft_nl[dir][oldest] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	}
 }
 
@@ -509,7 +510,7 @@ out_update_nl:
 	/* Now if this ends in \n, update ftp info.  Seq may have been
 	 * adjusted by NAT code. */
 	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info, dir, skb);
+		update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
  out:
 	spin_unlock_bh(&nf_ftp_lock);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c5a78220fa3..5b1273a01fe 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -229,7 +229,7 @@ static int gre_packet(struct nf_conn *ct,
 				   ct->proto.gre.stream_timeout);
 		/* Also, more likely to be important, and not a probe. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb,
 				   ct->proto.gre.timeout);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b5a90596d3f..ae8c2609e23 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct,
 
 		ct->proto.sctp.state = new_state;
 		if (old_state != new_state)
-			nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+			nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 	}
 	write_unlock_bh(&sctp_lock);
 
@@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    new_state == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	}
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4e71de2405f..b5d62d66e02 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -969,9 +969,9 @@ static int tcp_packet(struct nf_conn *ct,
 		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
-	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 	if (new_state != old_state)
-		nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
 		/* If only reply is a RST, we can consider ourselves not to
@@ -990,7 +990,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   after SYN_RECV or a valid answer for a picked up
 		   connection. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, skb);
+		nf_conntrack_event_cache(IPCT_STATUS, ct);
 	}
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8a245beb2c9..e0ee89e179c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct,
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, skb);
+			nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 981701919a7..c5b77c8f86c 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
 				   nf_ct_udplite_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, skb);
+			nf_conntrack_event_cache(IPCT_STATUS, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index e72e5d01752..e1415c3f5c9 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -54,7 +54,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
 			if (newmark != ct->mark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, skb);
+				nf_conntrack_event_cache(IPCT_MARK, ct);
 			}
 			break;
 		case XT_CONNMARK_SAVE:
@@ -62,7 +62,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 				  (skb->mark & markinfo->mask);
 			if (ct->mark != newmark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, skb);
+				nf_conntrack_event_cache(IPCT_MARK, ct);
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
@@ -95,7 +95,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, skb);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
 		}
 		break;
 	case XT_CONNMARK_SAVE:
@@ -103,7 +103,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in,
 		          (skb->mark & info->nfmask);
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, skb);
+			nf_conntrack_event_cache(IPCT_MARK, ct);
 		}
 		break;
 	case XT_CONNMARK_RESTORE:
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index ae939e54dfa..5f221c3bd35 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb)
 		ct = nf_ct_get(skb, &ctinfo);
 		if (ct && !ct->secmark) {
 			ct->secmark = skb->secmark;
-			nf_conntrack_event_cache(IPCT_SECMARK, skb);
+			nf_conntrack_event_cache(IPCT_SECMARK, ct);
 		}
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 6058fa6bb96a5b6145cba10c5171f09c2783ca69 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:07 +0200
Subject: netfilter: netns nf_conntrack: per-netns event cache

Heh, last minute proof-reading of this patch made me think,
that this is actually unneeded, simply because "ct" pointers will be
different for different conntracks in different netns, just like they
are different in one netns.

Not so sure anymore.

[Patrick: pointers will be different, flushing can only be done while
 inactive though and thus it needs to be per netns]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_ecache.h | 22 ++++++++++++++++------
 include/net/netns/conntrack.h               |  5 +++++
 net/netfilter/nf_conntrack_core.c           | 12 +++++++++---
 net/netfilter/nf_conntrack_ecache.c         | 26 +++++++++++++++++++-------
 4 files changed, 49 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index c1b406cecf9..35f814c1e2c 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -8,6 +8,7 @@
 
 #include <linux/notifier.h>
 #include <linux/interrupt.h>
+#include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -15,9 +16,6 @@ struct nf_conntrack_ecache {
 	struct nf_conn *ct;
 	unsigned int events;
 };
-DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-
-#define CONNTRACK_ECACHE(x)	(__get_cpu_var(nf_conntrack_ecache).x)
 
 extern struct atomic_notifier_head nf_conntrack_chain;
 extern int nf_conntrack_register_notifier(struct notifier_block *nb);
@@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
 extern void __nf_ct_event_cache_init(struct nf_conn *ct);
-extern void nf_ct_event_cache_flush(void);
+extern void nf_ct_event_cache_flush(struct net *net);
 
 static inline void
 nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
-	ecache = &__get_cpu_var(nf_conntrack_ecache);
+	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
 	if (ct != ecache->ct)
 		__nf_ct_event_cache_init(ct);
 	ecache->events |= event;
@@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event,
 	atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
 }
 
+extern int nf_conntrack_ecache_init(struct net *net);
+extern void nf_conntrack_ecache_fini(struct net *net);
+
 #else /* CONFIG_NF_CONNTRACK_EVENTS */
 
 static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
@@ -67,7 +69,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
 static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
 				      struct nf_conntrack_expect *exp) {}
-static inline void nf_ct_event_cache_flush(void) {}
+static inline void nf_ct_event_cache_flush(struct net *net) {}
+
+static inline int nf_conntrack_ecache_init(struct net *net)
+{
+	return 0;
+
+static inline void nf_conntrack_ecache_fini(struct net *net)
+{
+}
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
 #endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 6ddf58e142a..9d5c1623c51 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,12 +4,17 @@
 #include <linux/list.h>
 #include <asm/atomic.h>
 
+struct nf_conntrack_ecache;
+
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
 	struct hlist_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_head	unconfirmed;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	struct nf_conntrack_ecache *ecache;
+#endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
 };
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01f59c57730..b55944e5e4e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1023,7 +1023,8 @@ void nf_conntrack_cleanup(struct net *net)
 	   delete... */
 	synchronize_net();
 
-	nf_ct_event_cache_flush();
+	nf_ct_event_cache_flush(net);
+	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
 	nf_conntrack_flush(net);
 	if (atomic_read(&net->ct.count) != 0) {
@@ -1151,11 +1152,14 @@ int nf_conntrack_init(struct net *net)
 		max_factor = 4;
 	}
 	atomic_set(&net->ct.count, 0);
+	ret = nf_conntrack_ecache_init(net);
+	if (ret < 0)
+		goto err_ecache;
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 						  &net->ct.hash_vmalloc);
 	if (!net->ct.hash) {
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
-		goto err_out;
+		goto err_hash;
 	}
 	INIT_HLIST_HEAD(&net->ct.unconfirmed);
 
@@ -1215,6 +1219,8 @@ err_free_conntrack_slab:
 err_free_hash:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
-err_out:
+err_hash:
+	nf_conntrack_ecache_fini(net);
+err_ecache:
 	return -ENOMEM;
 }
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 83c41ac3505..a5f5e2e65d1 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain);
 ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
 EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
 
-DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
-
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 static inline void
@@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
  * by code prior to async packet handling for freeing the skb */
 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
-	ecache = &__get_cpu_var(nf_conntrack_ecache);
+	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
 	if (ecache->ct == ct)
 		__nf_ct_deliver_cached_events(ecache);
 	local_bh_enable();
@@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 /* Deliver cached events for old pending events, if current conntrack != old */
 void __nf_ct_event_cache_init(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *ecache;
 
 	/* take care of delivering potentially old events */
-	ecache = &__get_cpu_var(nf_conntrack_ecache);
+	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
 	BUG_ON(ecache->ct == ct);
 	if (ecache->ct)
 		__nf_ct_deliver_cached_events(ecache);
@@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
 
 /* flush the event cache - touches other CPU's data and must not be called
  * while packets are still passing through the code */
-void nf_ct_event_cache_flush(void)
+void nf_ct_event_cache_flush(struct net *net)
 {
 	struct nf_conntrack_ecache *ecache;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		ecache = &per_cpu(nf_conntrack_ecache, cpu);
+		ecache = per_cpu_ptr(net->ct.ecache, cpu);
 		if (ecache->ct)
 			nf_ct_put(ecache->ct);
 	}
 }
 
+int nf_conntrack_ecache_init(struct net *net)
+{
+	net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
+	if (!net->ct.ecache)
+		return -ENOMEM;
+	return 0;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+	free_percpu(net->ct.ecache);
+}
+
 int nf_conntrack_register_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
-- 
cgit v1.2.3-70-g09d2


From 0d55af8791bfb42e04cc456b348910582f230343 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:07 +0200
Subject: netfilter: netns nf_conntrack: per-netns statistics

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h               |  8 ++--
 include/net/netns/conntrack.h                      |  1 +
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/netfilter/nf_conntrack_core.c                  | 49 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  4 +-
 net/netfilter/nf_conntrack_standalone.c            |  4 +-
 6 files changed, 38 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f5447f14304..c95561050f7 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size;
 extern int nf_conntrack_checksum;
 extern int nf_conntrack_max;
 
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
-#define NF_CT_STAT_INC_ATOMIC(count)			\
+#define NF_CT_STAT_INC(net, count)	\
+	(per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+#define NF_CT_STAT_INC_ATOMIC(net, count)		\
 do {							\
 	local_bh_disable();				\
-	__get_cpu_var(nf_conntrack_stat).count++;	\
+	per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++;	\
 	local_bh_enable();				\
 } while (0)
 
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9d5c1623c51..fc0a46d64cc 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -12,6 +12,7 @@ struct netns_ct {
 	struct hlist_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_head	unconfirmed;
+	struct ip_conntrack_stat *stat;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	struct nf_conntrack_ecache *ecache;
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index b2940836d10..fdc85b37078 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(init_net.ct.stat, cpu);
 	}
 
 	return NULL;
@@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(init_net.ct.stat, cpu);
 	}
 
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b55944e5e4e..1e87fa0cd3a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,9 +56,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 unsigned int nf_ct_log_invalid __read_mostly;
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 
-DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -171,6 +168,7 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
@@ -203,7 +201,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 		hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
 	}
 
-	NF_CT_STAT_INC(delete);
+	NF_CT_STAT_INC(net, delete);
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	if (ct->master)
@@ -216,6 +214,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
+	struct net *net = nf_ct_net(ct);
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_helper *helper;
 
@@ -230,7 +229,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	spin_lock_bh(&nf_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
 	 * Otherwise we can get spurious warnings. */
-	NF_CT_STAT_INC(delete_list);
+	NF_CT_STAT_INC(net, delete_list);
 	clean_from_lists(ct);
 	spin_unlock_bh(&nf_conntrack_lock);
 	nf_ct_put(ct);
@@ -249,11 +248,11 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 	local_bh_disable();
 	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
-			NF_CT_STAT_INC(found);
+			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
 		}
-		NF_CT_STAT_INC(searched);
+		NF_CT_STAT_INC(net, searched);
 	}
 	local_bh_enable();
 
@@ -366,7 +365,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	add_timer(&ct->timeout);
 	atomic_inc(&ct->ct_general.use);
 	set_bit(IPS_CONFIRMED_BIT, &ct->status);
-	NF_CT_STAT_INC(insert);
+	NF_CT_STAT_INC(net, insert);
 	spin_unlock_bh(&nf_conntrack_lock);
 	help = nfct_help(ct);
 	if (help && help->helper)
@@ -381,7 +380,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	return NF_ACCEPT;
 
 out:
-	NF_CT_STAT_INC(insert_failed);
+	NF_CT_STAT_INC(net, insert_failed);
 	spin_unlock_bh(&nf_conntrack_lock);
 	return NF_DROP;
 }
@@ -405,11 +404,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) {
 		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
 		    nf_ct_tuple_equal(tuple, &h->tuple)) {
-			NF_CT_STAT_INC(found);
+			NF_CT_STAT_INC(net, found);
 			rcu_read_unlock_bh();
 			return 1;
 		}
-		NF_CT_STAT_INC(searched);
+		NF_CT_STAT_INC(net, searched);
 	}
 	rcu_read_unlock_bh();
 
@@ -454,7 +453,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	if (del_timer(&ct->timeout)) {
 		death_by_timeout((unsigned long)ct);
 		dropped = 1;
-		NF_CT_STAT_INC_ATOMIC(early_drop);
+		NF_CT_STAT_INC_ATOMIC(net, early_drop);
 	}
 	nf_ct_put(ct);
 	return dropped;
@@ -581,7 +580,7 @@ init_conntrack(struct net *net,
 		ct->secmark = exp->master->secmark;
 #endif
 		nf_conntrack_get(&ct->master->ct_general);
-		NF_CT_STAT_INC(expect_new);
+		NF_CT_STAT_INC(net, expect_new);
 	} else {
 		struct nf_conntrack_helper *helper;
 
@@ -591,7 +590,7 @@ init_conntrack(struct net *net,
 			if (help)
 				rcu_assign_pointer(help->helper, helper);
 		}
-		NF_CT_STAT_INC(new);
+		NF_CT_STAT_INC(net, new);
 	}
 
 	/* Overload tuple linked list to put us in unconfirmed list. */
@@ -683,7 +682,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 
 	/* Previously seen (loopback or untracked)?  Ignore. */
 	if (skb->nfct) {
-		NF_CT_STAT_INC_ATOMIC(ignore);
+		NF_CT_STAT_INC_ATOMIC(net, ignore);
 		return NF_ACCEPT;
 	}
 
@@ -693,8 +692,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 				   &dataoff, &protonum);
 	if (ret <= 0) {
 		pr_debug("not prepared to track yet or error occured\n");
-		NF_CT_STAT_INC_ATOMIC(error);
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, error);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return -ret;
 	}
 
@@ -706,8 +705,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	if (l4proto->error != NULL) {
 		ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
 		if (ret <= 0) {
-			NF_CT_STAT_INC_ATOMIC(error);
-			NF_CT_STAT_INC_ATOMIC(invalid);
+			NF_CT_STAT_INC_ATOMIC(net, error);
+			NF_CT_STAT_INC_ATOMIC(net, invalid);
 			return -ret;
 		}
 	}
@@ -716,13 +715,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return NF_ACCEPT;
 	}
 
 	if (IS_ERR(ct)) {
 		/* Too stressed to deal. */
-		NF_CT_STAT_INC_ATOMIC(drop);
+		NF_CT_STAT_INC_ATOMIC(net, drop);
 		return NF_DROP;
 	}
 
@@ -735,7 +734,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		pr_debug("nf_conntrack_in: Can't track with proto module\n");
 		nf_conntrack_put(skb->nfct);
 		skb->nfct = NULL;
-		NF_CT_STAT_INC_ATOMIC(invalid);
+		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		return -ret;
 	}
 
@@ -1043,6 +1042,7 @@ void nf_conntrack_cleanup(struct net *net)
 
 	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini(net);
+	free_percpu(net->ct.stat);
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 }
@@ -1152,6 +1152,9 @@ int nf_conntrack_init(struct net *net)
 		max_factor = 4;
 	}
 	atomic_set(&net->ct.count, 0);
+	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+	if (!net->ct.stat)
+		goto err_stat;
 	ret = nf_conntrack_ecache_init(net);
 	if (ret < 0)
 		goto err_ecache;
@@ -1222,5 +1225,7 @@ err_free_hash:
 err_hash:
 	nf_conntrack_ecache_fini(net);
 err_ecache:
+	free_percpu(net->ct.stat);
+err_stat:
 	return -ENOMEM;
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 6a09200049e..b7f75117161 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -53,7 +53,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 	master_help->expecting[exp->class]--;
 	nf_ct_expect_put(exp);
 
-	NF_CT_STAT_INC(expect_delete);
+	NF_CT_STAT_INC(net, expect_delete);
 }
 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
 
@@ -326,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	add_timer(&exp->timeout);
 
 	atomic_inc(&exp->use);
-	NF_CT_STAT_INC(expect_create);
+	NF_CT_STAT_INC(net, expect_create);
 }
 
 /* Race with expectations being used means we could have none to find; OK. */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 02eaf872277..a4fdbbf363a 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -203,7 +203,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(init_net.ct.stat, cpu);
 	}
 
 	return NULL;
@@ -217,7 +217,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return &per_cpu(nf_conntrack_stat, cpu);
+		return per_cpu_ptr(init_net.ct.stat, cpu);
 	}
 
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From 8e9df80180b73d4107bf8fbf28b1633c541d2770 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:08 +0200
Subject: netfilter: netns nf_conntrack: per-netns /proc/net/stat/nf_conntrack,
 /proc/net/stat/ip_conntrack

Show correct conntrack count, while I'm at it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 +++++++++-----
 net/netfilter/nf_conntrack_standalone.c               | 14 +++++++++-----
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index fdc85b37078..313ebf00ee3 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -285,6 +285,7 @@ static const struct file_operations ip_exp_file_ops = {
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	if (*pos == 0)
@@ -294,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(init_net.ct.stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -302,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu+1;
-		return per_cpu_ptr(init_net.ct.stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -320,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+	struct net *net = seq_file_net(seq);
+	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -360,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
 
 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ct_cpu_seq_ops);
+	return seq_open_net(inode, file, &ct_cpu_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ct_cpu_seq_fops = {
@@ -368,7 +372,7 @@ static const struct file_operations ct_cpu_seq_fops = {
 	.open    = ct_cpu_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
 static int __net_init ip_conntrack_net_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index a4fdbbf363a..169760ddc16 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -194,6 +194,7 @@ static const struct file_operations ct_file_ops = {
 
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	if (*pos == 0)
@@ -203,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return per_cpu_ptr(init_net.ct.stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -211,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct net *net = seq_file_net(seq);
 	int cpu;
 
 	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
 		if (!cpu_possible(cpu))
 			continue;
 		*pos = cpu + 1;
-		return per_cpu_ptr(init_net.ct.stat, cpu);
+		return per_cpu_ptr(net->ct.stat, cpu);
 	}
 
 	return NULL;
@@ -229,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-	unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+	struct net *net = seq_file_net(seq);
+	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -269,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
 
 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ct_cpu_seq_ops);
+	return seq_open_net(inode, file, &ct_cpu_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ct_cpu_seq_fops = {
@@ -277,7 +281,7 @@ static const struct file_operations ct_cpu_seq_fops = {
 	.open	 = ct_cpu_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
 static int nf_conntrack_standalone_init_proc(struct net *net)
-- 
cgit v1.2.3-70-g09d2


From 802507071b72ed5025747126099cbc6d1542f596 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:08 +0200
Subject: netfilter: netns nf_conntrack: per-netns
 net.netfilter.nf_conntrack_count sysctl

Note, sysctl table is always duplicated, this is simpler and less
special-cased.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h           |  4 ++
 net/netfilter/nf_conntrack_standalone.c | 73 ++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index fc0a46d64cc..2b50758df6a 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,6 +4,7 @@
 #include <linux/list.h>
 #include <asm/atomic.h>
 
+struct ctl_table_header;
 struct nf_conntrack_ecache;
 
 struct netns_ct {
@@ -15,6 +16,9 @@ struct netns_ct {
 	struct ip_conntrack_stat *stat;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	struct nf_conntrack_ecache *ecache;
+#endif
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header	*sysctl_header;
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 169760ddc16..64b4f95b367 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
 static int log_invalid_proto_min = 0;
 static int log_invalid_proto_max = 255;
 
-static struct ctl_table_header *nf_ct_sysctl_header;
 static struct ctl_table_header *nf_ct_netfilter_header;
 
 static ctl_table nf_ct_sysctl_table[] = {
@@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = {
 
 EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
 
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
-	nf_ct_netfilter_header =
-		register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
-	if (!nf_ct_netfilter_header)
-		goto out;
-
-	nf_ct_sysctl_header =
-		 register_sysctl_paths(nf_net_netfilter_sysctl_path,
-					nf_ct_sysctl_table);
-	if (!nf_ct_sysctl_header)
+	struct ctl_table *table;
+
+	if (net_eq(net, &init_net)) {
+		nf_ct_netfilter_header =
+		       register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+		if (!nf_ct_netfilter_header)
+			goto out;
+	}
+
+	table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out_kmemdup;
+
+	table[1].data = &net->ct.count;
+
+	net->ct.sysctl_header = register_net_sysctl_table(net,
+					nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.sysctl_header)
 		goto out_unregister_netfilter;
 
 	return 0;
 
 out_unregister_netfilter:
-	unregister_sysctl_table(nf_ct_netfilter_header);
+	kfree(table);
+out_kmemdup:
+	if (net_eq(net, &init_net))
+		unregister_sysctl_table(nf_ct_netfilter_header);
 out:
 	printk("nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
-	unregister_sysctl_table(nf_ct_netfilter_header);
-	unregister_sysctl_table(nf_ct_sysctl_header);
+	struct ctl_table *table;
+
+	if (net_eq(net, &init_net))
+		unregister_sysctl_table(nf_ct_netfilter_header);
+	table = net->ct.sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.sysctl_header);
+	kfree(table);
 }
 #else
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
 	return 0;
 }
 
-static void nf_conntrack_standalone_fini_sysctl(void)
+static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
 }
 #endif /* CONFIG_SYSCTL */
@@ -457,8 +474,13 @@ static int nf_conntrack_net_init(struct net *net)
 	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
+	ret = nf_conntrack_standalone_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
 	return 0;
 
+out_sysctl:
+	nf_conntrack_standalone_fini_proc(net);
 out_proc:
 	nf_conntrack_cleanup(net);
 out_init:
@@ -467,6 +489,7 @@ out_init:
 
 static void nf_conntrack_net_exit(struct net *net)
 {
+	nf_conntrack_standalone_fini_sysctl(net);
 	nf_conntrack_standalone_fini_proc(net);
 	nf_conntrack_cleanup(net);
 }
@@ -478,25 +501,11 @@ static struct pernet_operations nf_conntrack_net_ops = {
 
 static int __init nf_conntrack_standalone_init(void)
 {
-	int ret;
-
-	ret = register_pernet_subsys(&nf_conntrack_net_ops);
-	if (ret < 0)
-		goto out;
-	ret = nf_conntrack_standalone_init_sysctl();
-	if (ret < 0)
-		goto out_sysctl;
-	return 0;
-
-out_sysctl:
-	unregister_pernet_subsys(&nf_conntrack_net_ops);
-out:
-	return ret;
+	return register_pernet_subsys(&nf_conntrack_net_ops);
 }
 
 static void __exit nf_conntrack_standalone_fini(void)
 {
-	nf_conntrack_standalone_fini_sysctl();
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
 }
 
-- 
cgit v1.2.3-70-g09d2


From c04d05529a6e0bf97183a2caf76a0c7f07f5b78c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:08 +0200
Subject: netfilter: netns nf_conntrack: per-netns
 net.netfilter.nf_conntrack_checksum sysctl

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h           | 1 -
 include/net/netns/conntrack.h                  | 1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
 net/netfilter/nf_conntrack_proto_dccp.c        | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c         | 2 +-
 net/netfilter/nf_conntrack_proto_udp.c         | 2 +-
 net/netfilter/nf_conntrack_proto_udplite.c     | 2 +-
 net/netfilter/nf_conntrack_standalone.c        | 7 +++----
 10 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c95561050f7..b76a8685b5b 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
 
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 extern unsigned int nf_conntrack_htable_size;
-extern int nf_conntrack_checksum;
 extern int nf_conntrack_max;
 
 #define NF_CT_STAT_INC(net, count)	\
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 2b50758df6a..38b6dae4d3d 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,6 +17,7 @@ struct netns_ct {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	struct nf_conntrack_ecache *ecache;
 #endif
+	int			sysctl_checksum;
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e4dd3fb002..75871b1dd8a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_CHECKSUM,
 		.procname	= "ip_conntrack_checksum",
-		.data		= &nf_conntrack_checksum,
+		.data		= &init_net.ct.sysctl_checksum,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 205ba399d4a..ace66cbf921 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	}
 
 	/* See ip_conntrack_proto_tcp.c */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index df04de91e6e..fa12e57749a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -187,7 +187,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 		return -NF_ACCEPT;
 	}
 
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
 		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6ead8da3e9e..769680e68b5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -575,7 +575,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
 		}
 	}
 
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
 				pf)) {
 		msg = "nf_ct_dccp: bad checksum ";
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5d62d66e02..131c9be4470 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -780,7 +780,7 @@ static int tcp_error(struct net *net,
 	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index e0ee89e179c..3d3fffe3f8b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -123,7 +123,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	 * We skip checking packets on the outgoing path
 	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
 		if (LOG_INVALID(IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5b77c8f86c..3d1697c4f91 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -129,7 +129,7 @@ static int udplite_error(struct net *net,
 	}
 
 	/* Checksum invalid? Ignore. */
-	if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
 	    			pf)) {
 		if (LOG_INVALID(IPPROTO_UDPLITE))
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 64b4f95b367..5cd06637977 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -322,9 +322,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
 
 /* Sysctl support */
 
-int nf_conntrack_checksum __read_mostly = 1;
-EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
-
 #ifdef CONFIG_SYSCTL
 /* Log invalid packets of a given protocol */
 static int log_invalid_proto_min = 0;
@@ -360,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_CHECKSUM,
 		.procname	= "nf_conntrack_checksum",
-		.data		= &nf_conntrack_checksum,
+		.data		= &init_net.ct.sysctl_checksum,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -425,6 +422,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[3].data = &net->ct.sysctl_checksum;
 
 	net->ct.sysctl_header = register_net_sysctl_table(net,
 					nf_net_netfilter_sysctl_path, table);
@@ -474,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
 	ret = nf_conntrack_standalone_init_proc(net);
 	if (ret < 0)
 		goto out_proc;
+	net->ct.sysctl_checksum = 1;
 	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
 		goto out_sysctl;
-- 
cgit v1.2.3-70-g09d2


From c2a2c7e0cc39e7f9336cd67e8307a110bdba82f3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:08 +0200
Subject: netfilter: netns nf_conntrack: per-netns
 net.netfilter.nf_conntrack_log_invalid sysctl

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_l4proto.h   | 15 +++++++--------
 include/net/netns/conntrack.h                  |  1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  6 +++---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              |  1 -
 net/netfilter/nf_conntrack_proto_dccp.c        | 10 ++++++----
 net/netfilter/nf_conntrack_proto_tcp.c         | 18 ++++++++++--------
 net/netfilter/nf_conntrack_proto_udp.c         |  6 +++---
 net/netfilter/nf_conntrack_proto_udplite.c     |  8 ++++----
 net/netfilter/nf_conntrack_standalone.c        |  6 +++---
 11 files changed, 39 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 97723d33c95..7f2f43c7728 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
 				      struct nf_conntrack_tuple *t);
 extern const struct nla_policy nf_ct_port_nla_policy[];
 
-/* Log invalid packets */
-extern unsigned int nf_ct_log_invalid;
-
 #ifdef CONFIG_SYSCTL
 #ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
-	(nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#define LOG_INVALID(net, proto)				\
+	((net)->ct.sysctl_log_invalid == (proto) ||	\
+	 (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
 #else
-#define LOG_INVALID(proto) \
-	((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+#define LOG_INVALID(net, proto)				\
+	(((net)->ct.sysctl_log_invalid == (proto) ||	\
+	  (net)->ct.sysctl_log_invalid == IPPROTO_RAW)	\
 	 && net_ratelimit())
 #endif
 #else
-#define LOG_INVALID(proto) 0
+#define LOG_INVALID(net, proto) 0
 #endif /* CONFIG_SYSCTL */
 
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 38b6dae4d3d..503e37551b1 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,7 @@ struct netns_ct {
 	struct nf_conntrack_ecache *ecache;
 #endif
 	int			sysctl_checksum;
+	unsigned int		sysctl_log_invalid; /* Log invalid packets */
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 75871b1dd8a..af69acc1d0f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_LOG_INVALID,
 		.procname	= "ip_conntrack_log_invalid",
-		.data		= &nf_ct_log_invalid,
+		.data		= &init_net.ct.sysctl_log_invalid,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index ace66cbf921..4e887922022 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -181,7 +181,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	/* Not enough header? */
 	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
-		if (LOG_INVALID(IPPROTO_ICMP))
+		if (LOG_INVALID(net, IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmp: short packet ");
 		return -NF_ACCEPT;
@@ -190,7 +190,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	/* See ip_conntrack_proto_tcp.c */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-		if (LOG_INVALID(IPPROTO_ICMP))
+		if (LOG_INVALID(net, IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmp: bad HW ICMP checksum ");
 		return -NF_ACCEPT;
@@ -203,7 +203,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	 *		  discarded.
 	 */
 	if (icmph->type > NR_ICMP_TYPES) {
-		if (LOG_INVALID(IPPROTO_ICMP))
+		if (LOG_INVALID(net, IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_icmp: invalid ICMP type ");
 		return -NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fa12e57749a..05726177903 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -181,7 +181,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 
 	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmp6h == NULL) {
-		if (LOG_INVALID(IPPROTO_ICMPV6))
+		if (LOG_INVALID(net, IPPROTO_ICMPV6))
 		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 			      "nf_ct_icmpv6: short packet ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1e87fa0cd3a..ade0bb3ab2e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-unsigned int nf_ct_log_invalid __read_mostly;
 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
 
 static int nf_conntrack_hash_rnd_initted;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 769680e68b5..8fcf1762fab 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		     unsigned int dataoff)
 {
+	struct net *net = nf_ct_net(ct);
 	struct dccp_hdr _dh, *dh;
 	const char *msg;
 	u_int8_t state;
@@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 
 out_invalid:
-	if (LOG_INVALID(IPPROTO_DCCP))
+	if (LOG_INVALID(net, IPPROTO_DCCP))
 		nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
 	return false;
 }
@@ -463,6 +464,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
 		       u_int8_t pf, unsigned int hooknum)
 {
+	struct net *net = nf_ct_net(ct);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
 	u_int8_t type, old_state, new_state;
@@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.dccp.last_pkt = type;
 
 		write_unlock_bh(&dccp_lock);
-		if (LOG_INVALID(IPPROTO_DCCP))
+		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid packet ignored ");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
 		write_unlock_bh(&dccp_lock);
-		if (LOG_INVALID(IPPROTO_DCCP))
+		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid state transition ");
 		return -NF_ACCEPT;
@@ -590,7 +592,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb,
 	return NF_ACCEPT;
 
 out_invalid:
-	if (LOG_INVALID(IPPROTO_DCCP))
+	if (LOG_INVALID(net, IPPROTO_DCCP))
 		nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
 	return -NF_ACCEPT;
 }
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 131c9be4470..f947ec41e39 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -488,6 +488,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			  const struct tcphdr *tcph,
 			  u_int8_t pf)
 {
+	struct net *net = nf_ct_net(ct);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 		    nf_ct_tcp_be_liberal)
 			res = true;
-		if (!res && LOG_INVALID(IPPROTO_TCP))
+		if (!res && LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
@@ -761,7 +762,7 @@ static int tcp_error(struct net *net,
 	/* Smaller that minimal TCP header? */
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: short packet ");
 		return -NF_ACCEPT;
@@ -769,7 +770,7 @@ static int tcp_error(struct net *net,
 
 	/* Not whole TCP header or malformed packet */
 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_tcp: truncated/malformed packet ");
 		return -NF_ACCEPT;
@@ -782,7 +783,7 @@ static int tcp_error(struct net *net,
 	/* FIXME: Source route IP option packets --RR */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: bad TCP checksum ");
 		return -NF_ACCEPT;
@@ -791,7 +792,7 @@ static int tcp_error(struct net *net,
 	/* Check TCP flags. */
 	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
 	if (!tcp_valid_flags[tcpflags]) {
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid TCP flag combination ");
 		return -NF_ACCEPT;
@@ -808,6 +809,7 @@ static int tcp_packet(struct nf_conn *ct,
 		      u_int8_t pf,
 		      unsigned int hooknum)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir;
@@ -886,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct,
 			 * thus initiate a clean new session.
 			 */
 			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
+			if (LOG_INVALID(net, IPPROTO_TCP))
 				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 					  "nf_ct_tcp: killing out of sync session ");
 			nf_ct_kill(ct);
@@ -899,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct,
 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
 		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid packet ignored ");
 		return NF_ACCEPT;
@@ -908,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct,
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
 		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
+		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3d3fffe3f8b..7c2ca48698b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -101,7 +101,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udp: short packet ");
 		return -NF_ACCEPT;
@@ -109,7 +109,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 
 	/* Truncated/malformed packets */
 	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: truncated/malformed packet ");
 		return -NF_ACCEPT;
@@ -125,7 +125,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 	 * FIXME: Source route IP option packets --RR */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
-		if (LOG_INVALID(IPPROTO_UDP))
+		if (LOG_INVALID(net, IPPROTO_UDP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udp: bad UDP checksum ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 3d1697c4f91..d22d839e4f9 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -104,7 +104,7 @@ static int udplite_error(struct net *net,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: short packet ");
 		return -NF_ACCEPT;
@@ -114,7 +114,7 @@ static int udplite_error(struct net *net,
 	if (cscov == 0)
 		cscov = udplen;
 	else if (cscov < sizeof(*hdr) || cscov > udplen) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				"nf_ct_udplite: invalid checksum coverage ");
 		return -NF_ACCEPT;
@@ -122,7 +122,7 @@ static int udplite_error(struct net *net,
 
 	/* UDPLITE mandates checksums */
 	if (!hdr->check) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: checksum missing ");
 		return -NF_ACCEPT;
@@ -132,7 +132,7 @@ static int udplite_error(struct net *net,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
 	    			pf)) {
-		if (LOG_INVALID(IPPROTO_UDPLITE))
+		if (LOG_INVALID(net, IPPROTO_UDPLITE))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_udplite: bad UDPLite checksum ");
 		return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5cd06637977..98106d4e89f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -365,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_LOG_INVALID,
 		.procname	= "nf_conntrack_log_invalid",
-		.data		= &nf_ct_log_invalid,
+		.data		= &init_net.ct.sysctl_log_invalid,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -403,8 +403,6 @@ static struct ctl_path nf_ct_path[] = {
 	{ }
 };
 
-EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
 	struct ctl_table *table;
@@ -423,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 
 	table[1].data = &net->ct.count;
 	table[3].data = &net->ct.sysctl_checksum;
+	table[4].data = &net->ct.sysctl_log_invalid;
 
 	net->ct.sysctl_header = register_net_sysctl_table(net,
 					nf_net_netfilter_sysctl_path, table);
@@ -473,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net)
 	if (ret < 0)
 		goto out_proc;
 	net->ct.sysctl_checksum = 1;
+	net->ct.sysctl_log_invalid = 0;
 	ret = nf_conntrack_standalone_init_sysctl(net);
 	if (ret < 0)
 		goto out_sysctl;
-- 
cgit v1.2.3-70-g09d2


From d716a4dfbbdf0d4731d596a96e5f4b0d892ac168 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:09 +0200
Subject: netfilter: netns nf_conntrack: per-netns conntrack accounting

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_acct.h |  10 +--
 include/net/netns/conntrack.h             |   2 +
 net/netfilter/nf_conntrack_acct.c         | 100 +++++++++++++++++++++---------
 net/netfilter/nf_conntrack_core.c         |   4 +-
 4 files changed, 81 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 5d5ae55d54c..03e218f0be4 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -8,6 +8,7 @@
 
 #ifndef _NF_CONNTRACK_ACCT_H
 #define _NF_CONNTRACK_ACCT_H
+#include <net/net_namespace.h>
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -18,8 +19,6 @@ struct nf_conn_counter {
 	u_int64_t bytes;
 };
 
-extern int nf_ct_acct;
-
 static inline
 struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
 {
@@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
 static inline
 struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conn_counter *acct;
 
-	if (!nf_ct_acct)
+	if (!net->ct.sysctl_acct)
 		return NULL;
 
 	acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
@@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 extern unsigned int
 seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
 
-extern int nf_conntrack_acct_init(void);
-extern void nf_conntrack_acct_fini(void);
+extern int nf_conntrack_acct_init(struct net *net);
+extern void nf_conntrack_acct_fini(struct net *net);
 
 #endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 503e37551b1..f4498a62881 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,10 +17,12 @@ struct netns_ct {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	struct nf_conntrack_ecache *ecache;
 #endif
+	int			sysctl_acct;
 	int			sysctl_checksum;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
+	struct ctl_table_header	*acct_sysctl_header;
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 59bd8b903a1..03591d37b9c 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,19 +22,17 @@
 #define NF_CT_ACCT_DEFAULT 0
 #endif
 
-int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
-EXPORT_SYMBOL_GPL(nf_ct_acct);
+static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
 
 module_param_named(acct, nf_ct_acct, bool, 0644);
 MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table_header *acct_sysctl_header;
 static struct ctl_table acct_sysctl_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nf_conntrack_acct",
-		.data		= &nf_ct_acct,
+		.data		= &init_net.ct.sysctl_acct,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = {
 	.id	= NF_CT_EXT_ACCT,
 };
 
-int nf_conntrack_acct_init(void)
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_acct_init_sysctl(struct net *net)
 {
-	int ret;
+	struct ctl_table *table;
 
-#ifdef CONFIG_NF_CT_ACCT
-	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
-	printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
-	printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
+	table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_acct;
 
-	ret = nf_ct_extend_register(&acct_extend);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-		return ret;
+	net->ct.acct_sysctl_header = register_net_sysctl_table(net,
+			nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.acct_sysctl_header) {
+		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+		goto out_register;
 	}
+	return 0;
 
-#ifdef CONFIG_SYSCTL
-	acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
-				acct_sysctl_table);
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
 
-	if (!acct_sysctl_header) {
-		nf_ct_extend_unregister(&acct_extend);
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
 
-		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
-		return -ENOMEM;
-	}
+	table = net->ct.acct_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.acct_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_acct_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_acct_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_acct_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_acct = nf_ct_acct;
+
+	if (net_eq(net, &init_net)) {
+#ifdef CONFIG_NF_CT_ACCT
+		printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n");
+		printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
 #endif
 
+		ret = nf_ct_extend_register(&acct_extend);
+		if (ret < 0) {
+			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
+			goto out_extend_register;
+		}
+	}
+
+	ret = nf_conntrack_acct_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
+
 	return 0;
+
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&acct_extend);
+out_extend_register:
+	return ret;
 }
 
-void nf_conntrack_acct_fini(void)
+void nf_conntrack_acct_fini(struct net *net)
 {
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(acct_sysctl_header);
-#endif
-	nf_ct_extend_unregister(&acct_extend);
+	nf_conntrack_acct_fini_sysctl(net);
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&acct_extend);
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ade0bb3ab2e..bb26460d897 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1039,7 +1039,7 @@ void nf_conntrack_cleanup(struct net *net)
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 
-	nf_conntrack_acct_fini();
+	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
 	free_percpu(net->ct.stat);
 	nf_conntrack_helper_fini();
@@ -1191,7 +1191,7 @@ int nf_conntrack_init(struct net *net)
 	if (ret < 0)
 		goto out_fini_expect;
 
-	ret = nf_conntrack_acct_init();
+	ret = nf_conntrack_acct_init(net);
 	if (ret < 0)
 		goto out_fini_helper;
 
-- 
cgit v1.2.3-70-g09d2


From 08f6547d266fdba087f7fa7963fc0610be5b7cd7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:09 +0200
Subject: netfilter: netns nf_conntrack: final netns tweaks

Add init_net checks to not remove kmem_caches twice and so on.

Refactor functions to split code which should be executed only for
init_net into one place.

ip_ct_attach and ip_ct_destroy assignments remain separate, because
they're separate stages in setup and teardown.

NOTE: NOTRACK code is in for-every-net part. It will be made per-netns
after we decidce how to do it correctly.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c   | 151 +++++++++++++++++++++++-------------
 net/netfilter/nf_conntrack_expect.c |  26 ++++---
 2 files changed, 114 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bb26460d897..27de3c7b006 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1010,17 +1010,15 @@ void nf_conntrack_flush(struct net *net)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush);
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void nf_conntrack_cleanup(struct net *net)
+static void nf_conntrack_cleanup_init_net(void)
 {
-	rcu_assign_pointer(ip_ct_attach, NULL);
-
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
-	synchronize_net();
+	nf_conntrack_helper_fini();
+	nf_conntrack_proto_fini();
+	kmem_cache_destroy(nf_conntrack_cachep);
+}
 
+static void nf_conntrack_cleanup_net(struct net *net)
+{
 	nf_ct_event_cache_flush(net);
 	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
@@ -1033,17 +1031,31 @@ void nf_conntrack_cleanup(struct net *net)
 	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
 		schedule();
 
-	rcu_assign_pointer(nf_ct_destroy, NULL);
-
-	kmem_cache_destroy(nf_conntrack_cachep);
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
-
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
 	free_percpu(net->ct.stat);
-	nf_conntrack_helper_fini();
-	nf_conntrack_proto_fini();
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void nf_conntrack_cleanup(struct net *net)
+{
+	if (net_eq(net, &init_net))
+		rcu_assign_pointer(ip_ct_attach, NULL);
+
+	/* This makes sure all current packets have passed through
+	   netfilter framework.  Roll on, two-stage module
+	   delete... */
+	synchronize_net();
+
+	nf_conntrack_cleanup_net(net);
+
+	if (net_eq(net, &init_net)) {
+		rcu_assign_pointer(nf_ct_destroy, NULL);
+		nf_conntrack_cleanup_init_net();
+	}
 }
 
 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1128,7 +1140,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
-int nf_conntrack_init(struct net *net)
+static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
 	int ret;
@@ -1150,21 +1162,6 @@ int nf_conntrack_init(struct net *net)
 		 * entries. */
 		max_factor = 4;
 	}
-	atomic_set(&net->ct.count, 0);
-	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
-	if (!net->ct.stat)
-		goto err_stat;
-	ret = nf_conntrack_ecache_init(net);
-	if (ret < 0)
-		goto err_ecache;
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
-						  &net->ct.hash_vmalloc);
-	if (!net->ct.hash) {
-		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
-		goto err_hash;
-	}
-	INIT_HLIST_HEAD(&net->ct.unconfirmed);
-
 	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
 	printk("nf_conntrack version %s (%u buckets, %d max)\n",
@@ -1176,28 +1173,55 @@ int nf_conntrack_init(struct net *net)
 						0, 0, NULL);
 	if (!nf_conntrack_cachep) {
 		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		goto err_free_hash;
+		ret = -ENOMEM;
+		goto err_cache;
 	}
 
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
-		goto err_free_conntrack_slab;
-
-	ret = nf_conntrack_expect_init(net);
-	if (ret < 0)
-		goto out_fini_proto;
+		goto err_proto;
 
 	ret = nf_conntrack_helper_init();
 	if (ret < 0)
-		goto out_fini_expect;
+		goto err_helper;
+
+	return 0;
+
+err_helper:
+	nf_conntrack_proto_fini();
+err_proto:
+	kmem_cache_destroy(nf_conntrack_cachep);
+err_cache:
+	return ret;
+}
+
+static int nf_conntrack_init_net(struct net *net)
+{
+	int ret;
 
+	atomic_set(&net->ct.count, 0);
+	INIT_HLIST_HEAD(&net->ct.unconfirmed);
+	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+	if (!net->ct.stat) {
+		ret = -ENOMEM;
+		goto err_stat;
+	}
+	ret = nf_conntrack_ecache_init(net);
+	if (ret < 0)
+		goto err_ecache;
+	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+						  &net->ct.hash_vmalloc);
+	if (!net->ct.hash) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
+		goto err_hash;
+	}
+	ret = nf_conntrack_expect_init(net);
+	if (ret < 0)
+		goto err_expect;
 	ret = nf_conntrack_acct_init(net);
 	if (ret < 0)
-		goto out_fini_helper;
-
-	/* For use by REJECT target */
-	rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
-	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+		goto err_acct;
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
@@ -1208,17 +1232,11 @@ int nf_conntrack_init(struct net *net)
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
 
-	return ret;
+	return 0;
 
-out_fini_helper:
-	nf_conntrack_helper_fini();
-out_fini_expect:
+err_acct:
 	nf_conntrack_expect_fini(net);
-out_fini_proto:
-	nf_conntrack_proto_fini();
-err_free_conntrack_slab:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_free_hash:
+err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
@@ -1226,5 +1244,32 @@ err_hash:
 err_ecache:
 	free_percpu(net->ct.stat);
 err_stat:
-	return -ENOMEM;
+	return ret;
+}
+
+int nf_conntrack_init(struct net *net)
+{
+	int ret;
+
+	if (net_eq(net, &init_net)) {
+		ret = nf_conntrack_init_init_net();
+		if (ret < 0)
+			goto out_init_net;
+	}
+	ret = nf_conntrack_init_net(net);
+	if (ret < 0)
+		goto out_net;
+
+	if (net_eq(net, &init_net)) {
+		/* For use by REJECT target */
+		rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
+		rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+	}
+	return 0;
+
+out_net:
+	if (net_eq(net, &init_net))
+		nf_conntrack_cleanup_init_net();
+out_init_net:
+	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index b7f75117161..37a703bc3b8 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -563,12 +563,14 @@ int nf_conntrack_expect_init(struct net *net)
 {
 	int err = -ENOMEM;
 
-	if (!nf_ct_expect_hsize) {
-		nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
-		if (!nf_ct_expect_hsize)
-			nf_ct_expect_hsize = 1;
+	if (net_eq(net, &init_net)) {
+		if (!nf_ct_expect_hsize) {
+			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			if (!nf_ct_expect_hsize)
+				nf_ct_expect_hsize = 1;
+		}
+		nf_ct_expect_max = nf_ct_expect_hsize * 4;
 	}
-	nf_ct_expect_max = nf_ct_expect_hsize * 4;
 
 	net->ct.expect_count = 0;
 	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
@@ -576,11 +578,13 @@ int nf_conntrack_expect_init(struct net *net)
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
-	nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+	if (net_eq(net, &init_net)) {
+		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 					sizeof(struct nf_conntrack_expect),
 					0, 0, NULL);
-	if (!nf_ct_expect_cachep)
-		goto err2;
+		if (!nf_ct_expect_cachep)
+			goto err2;
+	}
 
 	err = exp_proc_init(net);
 	if (err < 0)
@@ -589,7 +593,8 @@ int nf_conntrack_expect_init(struct net *net)
 	return 0;
 
 err3:
-	kmem_cache_destroy(nf_ct_expect_cachep);
+	if (net_eq(net, &init_net))
+		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
 	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
@@ -600,7 +605,8 @@ err1:
 void nf_conntrack_expect_fini(struct net *net)
 {
 	exp_proc_remove(net);
-	kmem_cache_destroy(nf_ct_expect_cachep);
+	if (net_eq(net, &init_net))
+		kmem_cache_destroy(nf_ct_expect_cachep);
 	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 			     nf_ct_expect_hsize);
 }
-- 
cgit v1.2.3-70-g09d2


From a5c3a8005cb7a36ebcc5b849f6045069ce4f7ca8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:09 +0200
Subject: netfilter: netns nf_conntrack: SIP conntracking in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index a006080eb38..6813f1c8863 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 	struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct net *net = nf_ct_net(ct);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr *saddr;
 	struct nf_conntrack_tuple tuple;
@@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(&init_net, &tuple);
+		exp = __nf_ct_expect_find(net, &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
-- 
cgit v1.2.3-70-g09d2


From 84541cc13a3bb31a58c096dde3517461e3ad91c2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:09 +0200
Subject: netfilter: netns nf_conntrack: H323 conntracking in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_h323_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index dfb826c973d..c1504f71cdf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1210,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 					       union nf_inet_addr *addr,
 					       __be16 port)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 
@@ -1219,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(&init_net, &tuple);
+	exp = __nf_ct_expect_find(net, &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From 3bb0d1c00f86b13bb184193a8f0189ddd6f0459f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:10 +0200
Subject: netfilter: netns nf_conntrack: GRE conntracking in netns

* make keymap list per-netns
* per-netns keymal lock (not strictly necessary)
* flush keymap at netns stop and module unload.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h |  2 +-
 net/netfilter/nf_conntrack_pptp.c                |  2 +-
 net/netfilter/nf_conntrack_proto_gre.c           | 97 ++++++++++++++++++------
 3 files changed, 76 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 535e4219d2b..2a10efda17f 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
-extern void nf_ct_gre_keymap_flush(void);
+extern void nf_ct_gre_keymap_flush(struct net *net);
 extern void nf_nat_need_gre(void);
 
 #endif /* __KERNEL__ */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5db7df5d19b..e47d5de41cc 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -602,7 +602,7 @@ static int __init nf_conntrack_pptp_init(void)
 static void __exit nf_conntrack_pptp_fini(void)
 {
 	nf_conntrack_helper_unregister(&pptp);
-	nf_ct_gre_keymap_flush();
+	nf_ct_gre_keymap_flush(&init_net);
 }
 
 module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5b1273a01fe..a2cdbcbf64c 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -29,8 +29,11 @@
 #include <linux/list.h>
 #include <linux/seq_file.h>
 #include <linux/in.h>
+#include <linux/netdevice.h>
 #include <linux/skbuff.h>
-
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -40,19 +43,23 @@
 #define GRE_TIMEOUT		(30 * HZ)
 #define GRE_STREAM_TIMEOUT	(180 * HZ)
 
-static DEFINE_RWLOCK(nf_ct_gre_lock);
-static LIST_HEAD(gre_keymap_list);
+static int proto_gre_net_id;
+struct netns_proto_gre {
+	rwlock_t		keymap_lock;
+	struct list_head	keymap_list;
+};
 
-void nf_ct_gre_keymap_flush(void)
+void nf_ct_gre_keymap_flush(struct net *net)
 {
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_ct_gre_keymap *km, *tmp;
 
-	write_lock_bh(&nf_ct_gre_lock);
-	list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+	write_lock_bh(&net_gre->keymap_lock);
+	list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
 		list_del(&km->list);
 		kfree(km);
 	}
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_unlock_bh(&net_gre->keymap_lock);
 }
 EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
 
@@ -67,19 +74,20 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
 }
 
 /* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
+static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
 {
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_ct_gre_keymap *km;
 	__be16 key = 0;
 
-	read_lock_bh(&nf_ct_gre_lock);
-	list_for_each_entry(km, &gre_keymap_list, list) {
+	read_lock_bh(&net_gre->keymap_lock);
+	list_for_each_entry(km, &net_gre->keymap_list, list) {
 		if (gre_key_cmpfn(km, t)) {
 			key = km->tuple.src.u.gre.key;
 			break;
 		}
 	}
-	read_unlock_bh(&nf_ct_gre_lock);
+	read_unlock_bh(&net_gre->keymap_lock);
 
 	pr_debug("lookup src key 0x%x for ", key);
 	nf_ct_dump_tuple(t);
@@ -91,20 +99,22 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t)
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t)
 {
+	struct net *net = nf_ct_net(ct);
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_ct_gre_keymap **kmp, *km;
 
 	kmp = &help->help.ct_pptp_info.keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
-		read_lock_bh(&nf_ct_gre_lock);
-		list_for_each_entry(km, &gre_keymap_list, list) {
+		read_lock_bh(&net_gre->keymap_lock);
+		list_for_each_entry(km, &net_gre->keymap_list, list) {
 			if (gre_key_cmpfn(km, t) && km == *kmp) {
-				read_unlock_bh(&nf_ct_gre_lock);
+				read_unlock_bh(&net_gre->keymap_lock);
 				return 0;
 			}
 		}
-		read_unlock_bh(&nf_ct_gre_lock);
+		read_unlock_bh(&net_gre->keymap_lock);
 		pr_debug("trying to override keymap_%s for ct %p\n",
 			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
@@ -119,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	pr_debug("adding new entry %p: ", km);
 	nf_ct_dump_tuple(&km->tuple);
 
-	write_lock_bh(&nf_ct_gre_lock);
-	list_add_tail(&km->list, &gre_keymap_list);
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_lock_bh(&net_gre->keymap_lock);
+	list_add_tail(&km->list, &net_gre->keymap_list);
+	write_unlock_bh(&net_gre->keymap_lock);
 
 	return 0;
 }
@@ -130,12 +140,14 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
 /* destroy the keymap entries associated with specified master ct */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
 	struct nf_conn_help *help = nfct_help(ct);
 	enum ip_conntrack_dir dir;
 
 	pr_debug("entering for ct %p\n", ct);
 
-	write_lock_bh(&nf_ct_gre_lock);
+	write_lock_bh(&net_gre->keymap_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
 		if (help->help.ct_pptp_info.keymap[dir]) {
 			pr_debug("removing %p from list\n",
@@ -145,7 +157,7 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 			help->help.ct_pptp_info.keymap[dir] = NULL;
 		}
 	}
-	write_unlock_bh(&nf_ct_gre_lock);
+	write_unlock_bh(&net_gre->keymap_lock);
 }
 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
@@ -164,6 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct nf_conntrack_tuple *tuple)
 {
+	struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
 	const struct gre_hdr_pptp *pgrehdr;
 	struct gre_hdr_pptp _pgrehdr;
 	__be16 srckey;
@@ -190,7 +203,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	}
 
 	tuple->dst.u.gre.key = pgrehdr->call_id;
-	srckey = gre_keymap_lookup(tuple);
+	srckey = gre_keymap_lookup(net, tuple);
 	tuple->src.u.gre.key = srckey;
 
 	return true;
@@ -285,15 +298,53 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 #endif
 };
 
+static int proto_gre_net_init(struct net *net)
+{
+	struct netns_proto_gre *net_gre;
+	int rv;
+
+	net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL);
+	if (!net_gre)
+		return -ENOMEM;
+	rwlock_init(&net_gre->keymap_lock);
+	INIT_LIST_HEAD(&net_gre->keymap_list);
+
+	rv = net_assign_generic(net, proto_gre_net_id, net_gre);
+	if (rv < 0)
+		kfree(net_gre);
+	return rv;
+}
+
+static void proto_gre_net_exit(struct net *net)
+{
+	struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id);
+
+	nf_ct_gre_keymap_flush(net);
+	kfree(net_gre);
+}
+
+static struct pernet_operations proto_gre_net_ops = {
+	.init = proto_gre_net_init,
+	.exit = proto_gre_net_exit,
+};
+
 static int __init nf_ct_proto_gre_init(void)
 {
-	return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+	int rv;
+
+	rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (rv < 0)
+		return rv;
+	rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops);
+	if (rv < 0)
+		nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+	return rv;
 }
 
 static void nf_ct_proto_gre_fini(void)
 {
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
-	nf_ct_gre_keymap_flush();
+	unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops);
 }
 
 module_init(nf_ct_proto_gre_init);
-- 
cgit v1.2.3-70-g09d2


From 0e6e75af921d1f4799eeb9f83a31c86ab7cdeb8f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:10 +0200
Subject: netfilter: netns nf_conntrack: PPTP conntracking in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_pptp.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e47d5de41cc..373e51e91ce 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name);
 static void pptp_expectfn(struct nf_conn *ct,
 			 struct nf_conntrack_expect *exp)
 {
+	struct net *net = nf_ct_net(ct);
 	typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
 	pr_debug("increasing timeouts\n");
 
@@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
+		exp_other = nf_ct_expect_find_get(net, &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct,
 	rcu_read_unlock();
 }
 
-static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
+static int destroy_sibling_or_exp(struct net *net,
+				  const struct nf_conntrack_tuple *t)
 {
 	const struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
@@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(&init_net, t);
+	h = nf_conntrack_find_get(net, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(&init_net, t);
+		exp = nf_ct_expect_find_get(net, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
@@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
 /* timeout GRE data connections */
 static void pptp_destroy_siblings(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	const struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_tuple t;
 
@@ -178,7 +181,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
-	if (!destroy_sibling_or_exp(&t))
+	if (!destroy_sibling_or_exp(net, &t))
 		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
@@ -186,7 +189,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
-	if (!destroy_sibling_or_exp(&t))
+	if (!destroy_sibling_or_exp(net, &t))
 		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
@@ -594,15 +597,32 @@ static struct nf_conntrack_helper pptp __read_mostly = {
 	.expect_policy		= &pptp_exp_policy,
 };
 
+static void nf_conntrack_pptp_net_exit(struct net *net)
+{
+	nf_ct_gre_keymap_flush(net);
+}
+
+static struct pernet_operations nf_conntrack_pptp_net_ops = {
+	.exit = nf_conntrack_pptp_net_exit,
+};
+
 static int __init nf_conntrack_pptp_init(void)
 {
-	return nf_conntrack_helper_register(&pptp);
+	int rv;
+
+	rv = nf_conntrack_helper_register(&pptp);
+	if (rv < 0)
+		return rv;
+	rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
+	if (rv < 0)
+		nf_conntrack_helper_unregister(&pptp);
+	return rv;
 }
 
 static void __exit nf_conntrack_pptp_fini(void)
 {
 	nf_conntrack_helper_unregister(&pptp);
-	nf_ct_gre_keymap_flush(&init_net);
+	unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
 }
 
 module_init(nf_conntrack_pptp_init);
-- 
cgit v1.2.3-70-g09d2


From b8b8063e0d0835fb44c88d9fded2be31c9a1757e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:10 +0200
Subject: netfilter: netns nat: fix ipt_MASQUERADE in netns

First, allow entry in notifier hook.
Second, start conntrack cleanup in netns to which netdevice belongs.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_MASQUERADE.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5e1c81791e5..65c811b27b7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -119,9 +119,7 @@ static int masq_device_event(struct notifier_block *this,
 			     void *ptr)
 {
 	const struct net_device *dev = ptr;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
+	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_DOWN) {
 		/* Device was downed.  Search entire table for
@@ -129,7 +127,7 @@ static int masq_device_event(struct notifier_block *this,
 		   and forget them. */
 		NF_CT_ASSERT(dev->ifindex != 0);
 
-		nf_ct_iterate_cleanup(&init_net, device_cmp,
+		nf_ct_iterate_cleanup(net, device_cmp,
 				      (void *)(long)dev->ifindex);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e099a173573ce1ba171092aee7bb3c72ea686e59 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:10 +0200
Subject: netfilter: netns nat: per-netns NAT table

Same story as with iptable_filter, iptables_raw tables.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/ipv4.h         |  1 +
 net/ipv4/netfilter/nf_nat_rule.c | 40 +++++++++++++++++++++++++++++-----------
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a6ed83853dc..b286b840493 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -38,6 +38,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_raw;
 	struct xt_table		*arptable_filter;
 	struct xt_table		*iptable_security;
+	struct xt_table		*nat_table;
 #endif
 
 	int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index e8b4d0d4439..0a02a8caf3b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -33,7 +33,7 @@ static struct
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
 	struct ipt_error term;
-} nat_initial_table __initdata = {
+} nat_initial_table __net_initdata = {
 	.repl = {
 		.name = "nat",
 		.valid_hooks = NAT_VALID_HOOKS,
@@ -58,14 +58,13 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table __nat_table = {
+static struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.lock		= __RW_LOCK_UNLOCKED(__nat_table.lock),
 	.me		= THIS_MODULE,
 	.af		= AF_INET,
 };
-static struct xt_table *nat_table;
 
 /* Source NAT */
 static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -194,9 +193,10 @@ int nf_nat_rule_find(struct sk_buff *skb,
 		     const struct net_device *out,
 		     struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	int ret;
 
-	ret = ipt_do_table(skb, hooknum, in, out, nat_table);
+	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
 
 	if (ret == NF_ACCEPT) {
 		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -226,14 +226,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
 	.family		= AF_INET,
 };
 
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+	net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+						 &nat_initial_table.repl);
+	if (IS_ERR(net->ipv4.nat_table))
+		return PTR_ERR(net->ipv4.nat_table);
+	return 0;
+}
+
+static void __net_exit nf_nat_rule_net_exit(struct net *net)
+{
+	ipt_unregister_table(net->ipv4.nat_table);
+}
+
+static struct pernet_operations nf_nat_rule_net_ops = {
+	.init = nf_nat_rule_net_init,
+	.exit = nf_nat_rule_net_exit,
+};
+
 int __init nf_nat_rule_init(void)
 {
 	int ret;
 
-	nat_table = ipt_register_table(&init_net, &__nat_table,
-				       &nat_initial_table.repl);
-	if (IS_ERR(nat_table))
-		return PTR_ERR(nat_table);
+	ret = register_pernet_subsys(&nf_nat_rule_net_ops);
+	if (ret != 0)
+		goto out;
 	ret = xt_register_target(&ipt_snat_reg);
 	if (ret != 0)
 		goto unregister_table;
@@ -247,8 +265,8 @@ int __init nf_nat_rule_init(void)
  unregister_snat:
 	xt_unregister_target(&ipt_snat_reg);
  unregister_table:
-	ipt_unregister_table(nat_table);
-
+	unregister_pernet_subsys(&nf_nat_rule_net_ops);
+ out:
 	return ret;
 }
 
@@ -256,5 +274,5 @@ void nf_nat_rule_cleanup(void)
 {
 	xt_unregister_target(&ipt_dnat_reg);
 	xt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(nat_table);
+	unregister_pernet_subsys(&nf_nat_rule_net_ops);
 }
-- 
cgit v1.2.3-70-g09d2


From 0c4c9288ada0e6642d511ef872f10a4781a896ff Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:11 +0200
Subject: netfilter: netns nat: per-netns bysource hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/ipv4.h         |  2 ++
 net/ipv4/netfilter/nf_nat_core.c | 72 +++++++++++++++++++++++++---------------
 2 files changed, 47 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b286b840493..ece1c926b5d 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -39,6 +39,8 @@ struct netns_ipv4 {
 	struct xt_table		*arptable_filter;
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
+	struct hlist_head	*nat_bysource;
+	int			nat_vmalloced;
 #endif
 
 	int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 5d4a5b70da2..2ac9eaf1a8c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
 /* Calculated at init based on memory size */
 static unsigned int nf_nat_htable_size __read_mostly;
-static int nf_nat_vmalloced;
-
-static struct hlist_head *bysource __read_mostly;
 
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
@@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct,
 
 /* Only called for SRC manip */
 static int
-find_appropriate_src(const struct nf_conntrack_tuple *tuple,
+find_appropriate_src(struct net *net,
+		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
@@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
 	const struct hlist_node *n;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
+	hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
 		ct = nat->ct;
 		if (same_src(ct, tuple)) {
 			/* Copy source part from reply tuple. */
@@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
+	struct net *net = nf_ct_net(ct);
 	const struct nf_nat_protocol *proto;
 
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
@@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC &&
 	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (find_appropriate_src(orig_tuple, tuple, range)) {
+		if (find_appropriate_src(net, orig_tuple, tuple, range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
 				return;
@@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
 	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
@@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
 		nat->ct = ct;
-		hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
+		hlist_add_head_rcu(&nat->bysource,
+				   &net->ipv4.nat_bysource[srchash]);
 		spin_unlock_bh(&nf_nat_lock);
 	}
 
@@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
 	.flags		= NF_CT_EXT_F_PREALLOC,
 };
 
+static int __net_init nf_nat_net_init(struct net *net)
+{
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+						      &net->ipv4.nat_vmalloced);
+	if (!net->ipv4.nat_bysource)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+	struct nf_conn_nat *nat = nfct_nat(i);
+
+	if (!nat)
+		return 0;
+	memset(nat, 0, sizeof(*nat));
+	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+	return 0;
+}
+
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+	synchronize_rcu();
+	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
+			     nf_nat_htable_size);
+}
+
+static struct pernet_operations nf_nat_net_ops = {
+	.init = nf_nat_net_init,
+	.exit = nf_nat_net_exit,
+};
+
 static int __init nf_nat_init(void)
 {
 	size_t i;
@@ -599,12 +634,9 @@ static int __init nf_nat_init(void)
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
 
-	bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-					 &nf_nat_vmalloced);
-	if (!bysource) {
-		ret = -ENOMEM;
+	ret = register_pernet_subsys(&nf_nat_net_ops);
+	if (ret < 0)
 		goto cleanup_extend;
-	}
 
 	/* Sew in builtin protocols. */
 	spin_lock_bh(&nf_nat_lock);
@@ -629,23 +661,9 @@ static int __init nf_nat_init(void)
 	return ret;
 }
 
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
-	struct nf_conn_nat *nat = nfct_nat(i);
-
-	if (!nat)
-		return 0;
-	memset(nat, 0, sizeof(*nat));
-	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
-	return 0;
-}
-
 static void __exit nf_nat_cleanup(void)
 {
-	nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL);
-	synchronize_rcu();
-	nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
+	unregister_pernet_subsys(&nf_nat_net_ops);
 	nf_ct_l3proto_put(l3proto);
 	nf_ct_extend_unregister(&nat_extend);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
-- 
cgit v1.2.3-70-g09d2


From 9174c1538fffbb5dddab99563eac6b3d8b212277 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:11 +0200
Subject: netfilter: netns nf_conntrack: fixup DNAT in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_rule.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 0a02a8caf3b..f929352ec0e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -91,13 +91,13 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
 }
 
 /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
 {
 	static int warned = 0;
 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
 	struct rtable *rt;
 
-	if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+	if (ip_route_output_key(net, &rt, &fl) != 0)
 		return;
 
 	if (rt->rt_src != srcip && !warned) {
@@ -130,7 +130,7 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
 
 	if (hooknum == NF_INET_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle(ip_hdr(skb)->daddr,
+		warn_if_extra_mangle(dev_net(out), ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
-- 
cgit v1.2.3-70-g09d2


From cfd6e3d74751b62b6d0844e24c911776e40a0135 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:11 +0200
Subject: netfilter: netns nat: PPTP NAT in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_pptp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e4bdddc6034..9eb171056c6 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp");
 static void pptp_nat_expected(struct nf_conn *ct,
 			      struct nf_conntrack_expect *exp)
 {
+	struct net *net = nf_ct_net(ct);
 	const struct nf_conn *master = ct->master;
 	struct nf_conntrack_expect *other_exp;
 	struct nf_conntrack_tuple t;
@@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	pr_debug("trying to unexpect other dir: ");
 	nf_ct_dump_tuple_ip(&t);
-	other_exp = nf_ct_expect_find_get(&init_net, &t);
+	other_exp = nf_ct_expect_find_get(net, &t);
 	if (other_exp) {
 		nf_ct_unexpect_related(other_exp);
 		nf_ct_expect_put(other_exp);
-- 
cgit v1.2.3-70-g09d2


From 4de6f16b9ec2422fa7ef9c22f7b1c8d5a55499b4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Oct 2008 11:35:11 +0200
Subject: netfilter: enable netfilter in netns

From kernel perspective, allow entrance in nf_hook_slow().

Stuff which uses nf_register_hook/nf_register_hooks, but otherwise not netns-ready:

	DECnet netfilter
	ipt_CLUSTERIP
	nf_nat_standalone.c together with XFRM (?)
	IPVS
	several individual match modules (like hashlimit)
	ctnetlink
	NOTRACK
	all sorts of queueing and reporting to userspace
	L3 and L4 protocol sysctls, bridge sysctls
	probably something else

Anyway critical mass has been achieved, there is no reason to hide netfilter any longer.

From userspace perspective, allow to manipulate all sorts of
iptables/ip6tables/arptables rules.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/core.c       | 8 --------
 net/netfilter/nf_sockopt.c | 3 ---
 2 files changed, 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b16cd79951c..a90ac83c591 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -165,14 +165,6 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 	unsigned int verdict;
 	int ret = 0;
 
-#ifdef CONFIG_NET_NS
-	struct net *net;
-
-	net = indev == NULL ? dev_net(outdev) : dev_net(indev);
-	if (net != &init_net)
-		return 1;
-#endif
-
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index f9b46de6a3d..8ab829f8657 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -65,9 +65,6 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
 {
 	struct nf_sockopt_ops *ops;
 
-	if (!net_eq(sock_net(sk), &init_net))
-		return ERR_PTR(-ENOPROTOOPT);
-
 	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
 		return ERR_PTR(-EINTR);
 
-- 
cgit v1.2.3-70-g09d2


From 73e4022f78acdbe420e8c24a7afbd90f4c8f5077 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 8 Oct 2008 11:35:12 +0200
Subject: netfilter: split netfilter IPv4 defragmentation into a separate
 module

Netfilter connection tracking requires all IPv4 packets to be defragmented.
Both the socket match and the TPROXY target depend on this functionality, so
this patch separates the Netfilter IPv4 defrag hooks into a separate module.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/ipv4/nf_defrag_ipv4.h    |  6 ++
 net/ipv4/netfilter/Kconfig                     |  5 ++
 net/ipv4/netfilter/Makefile                    |  3 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 56 +--------------
 net/ipv4/netfilter/nf_defrag_ipv4.c            | 96 ++++++++++++++++++++++++++
 5 files changed, 113 insertions(+), 53 deletions(-)
 create mode 100644 include/net/netfilter/ipv4/nf_defrag_ipv4.h
 create mode 100644 net/ipv4/netfilter/nf_defrag_ipv4.c

(limited to 'net')

diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
new file mode 100644
index 00000000000..6b00ea38546
--- /dev/null
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -0,0 +1,6 @@
+#ifndef _NF_DEFRAG_IPV4_H
+#define _NF_DEFRAG_IPV4_H
+
+extern void nf_defrag_ipv4_enable(void);
+
+#endif /* _NF_DEFRAG_IPV4_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 4e842d56642..07757ac8d5d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,10 +5,15 @@
 menu "IP: Netfilter Configuration"
 	depends on INET && NETFILTER
 
+config NF_DEFRAG_IPV4
+	tristate
+	default n
+
 config NF_CONNTRACK_IPV4
 	tristate "IPv4 connection tracking support (required for NAT)"
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
+	select NF_DEFRAG_IPV4
 	---help---
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1107edbe478..5f9b650d90f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
 
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 
+# defrag
+obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+
 # NAT helpers (nf_conntrack)
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index af69acc1d0f..4a7c3527539 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,3 +1,4 @@
+
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
@@ -24,6 +25,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
 int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
 			      struct nf_conn *ct,
@@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s,
 			  NIPQUAD(tuple->dst.u3.ip));
 }
 
-/* Returns new sk_buff, or NULL */
-static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-	int err;
-
-	skb_orphan(skb);
-
-	local_bh_disable();
-	err = ip_defrag(skb, user);
-	local_bh_enable();
-
-	if (!err)
-		ip_send_check(ip_hdr(skb));
-
-	return err;
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -144,28 +129,6 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
-					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
-					  int (*okfn)(struct sk_buff *))
-{
-	/* Previously seen (loopback)?  Ignore.  Do this before
-	   fragment check. */
-	if (skb->nfct)
-		return NF_ACCEPT;
-
-	/* Gather fragments. */
-	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		if (nf_ct_ipv4_gather_frags(skb,
-					    hooknum == NF_INET_PRE_ROUTING ?
-					    IP_DEFRAG_CONNTRACK_IN :
-					    IP_DEFRAG_CONNTRACK_OUT))
-			return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
@@ -194,13 +157,6 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 /* Connection tracking may drop packets, but never alters them, so
    make it the first hook. */
 static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
-	{
-		.hook		= ipv4_conntrack_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
 	{
 		.hook		= ipv4_conntrack_in,
 		.owner		= THIS_MODULE,
@@ -208,13 +164,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
-	{
-		.hook           = ipv4_conntrack_defrag,
-		.owner          = THIS_MODULE,
-		.pf             = PF_INET,
-		.hooknum        = NF_INET_LOCAL_OUT,
-		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
 	{
 		.hook		= ipv4_conntrack_local,
 		.owner		= THIS_MODULE,
@@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 	int ret = 0;
 
 	need_conntrack();
+	nf_defrag_ipv4_enable();
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0) {
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
new file mode 100644
index 00000000000..aa2c50a180f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -0,0 +1,96 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+/* Returns new sk_buff, or NULL */
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	int err;
+
+	skb_orphan(skb);
+
+	local_bh_disable();
+	err = ip_defrag(skb, user);
+	local_bh_enable();
+
+	if (!err)
+		ip_send_check(ip_hdr(skb));
+
+	return err;
+}
+
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+					  struct sk_buff *skb,
+					  const struct net_device *in,
+					  const struct net_device *out,
+					  int (*okfn)(struct sk_buff *))
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	/* Previously seen (loopback)?  Ignore.  Do this before
+	   fragment check. */
+	if (skb->nfct)
+		return NF_ACCEPT;
+#endif
+
+	/* Gather fragments. */
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (nf_ct_ipv4_gather_frags(skb,
+					    hooknum == NF_INET_PRE_ROUTING ?
+					    IP_DEFRAG_CONNTRACK_IN :
+					    IP_DEFRAG_CONNTRACK_OUT))
+			return NF_STOLEN;
+	}
+	return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ipv4_defrag_ops[] = {
+	{
+		.hook		= ipv4_conntrack_defrag,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
+	},
+	{
+		.hook           = ipv4_conntrack_defrag,
+		.owner          = THIS_MODULE,
+		.pf             = PF_INET,
+		.hooknum        = NF_INET_LOCAL_OUT,
+		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+	},
+};
+
+static int __init nf_defrag_init(void)
+{
+	return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+static void __exit nf_defrag_fini(void)
+{
+	nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+void nf_defrag_ipv4_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From 9ad2d745a23853927a19789b034d9eb2e62d78ee Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 8 Oct 2008 11:35:12 +0200
Subject: netfilter: iptables tproxy core

The iptables tproxy core is a module that contains the common routines used by
various tproxy related modules (TPROXY target and socket match)

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_tproxy_core.h | 32 ++++++++++++
 net/netfilter/Kconfig                  | 15 ++++++
 net/netfilter/Makefile                 |  3 ++
 net/netfilter/nf_tproxy_core.c         | 96 ++++++++++++++++++++++++++++++++++
 4 files changed, 146 insertions(+)
 create mode 100644 include/net/netfilter/nf_tproxy_core.h
 create mode 100644 net/netfilter/nf_tproxy_core.c

(limited to 'net')

diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
new file mode 100644
index 00000000000..208b46f4d6d
--- /dev/null
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -0,0 +1,32 @@
+#ifndef _NF_TPROXY_CORE_H
+#define _NF_TPROXY_CORE_H
+
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/tcp.h>
+
+/* look up and get a reference to a matching socket */
+extern struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+		      const __be32 saddr, const __be32 daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in, bool listening);
+
+static inline void
+nf_tproxy_put_sock(struct sock *sk)
+{
+	/* TIME_WAIT inet sockets have to be handled differently */
+	if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT))
+		inet_twsk_put(inet_twsk(sk));
+	else
+		sock_put(sk);
+}
+
+/* assign a socket to the skb -- consumes sk */
+int
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
+
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4a464857f21..ed1dcfb61e1 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -287,6 +287,21 @@ config NF_CT_NETLINK
 	help
 	  This option enables support for a netlink-based userspace interface
 
+# transparent proxy support
+config NETFILTER_TPROXY
+	tristate "Transparent proxying support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on IP_NF_MANGLE
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables transparent proxying support, that is,
+	  support for handling non-locally bound IPv4 TCP and UDP sockets.
+	  For it to work you will have to configure certain iptables rules
+	  and use policy routing. For more information on how to set it up
+	  see Documentation/networking/tproxy.txt.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f101cf61e6f..fc8bbb48d38 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
+# transparent proxy support
+obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
new file mode 100644
index 00000000000..fe34f4bf74c
--- /dev/null
+++ b/net/netfilter/nf_tproxy_core.c
@@ -0,0 +1,96 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/net.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <net/udp.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+		      const __be32 saddr, const __be32 daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in, bool listening_only)
+{
+	struct sock *sk;
+
+	/* look up socket */
+	switch (protocol) {
+	case IPPROTO_TCP:
+		if (listening_only)
+			sk = __inet_lookup_listener(net, &tcp_hashinfo,
+						    daddr, ntohs(dport),
+						    in->ifindex);
+		else
+			sk = __inet_lookup(net, &tcp_hashinfo,
+					   saddr, sport, daddr, dport,
+					   in->ifindex);
+		break;
+	case IPPROTO_UDP:
+		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
+
+static void
+nf_tproxy_destructor(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	skb->sk = NULL;
+	skb->destructor = NULL;
+
+	if (sk)
+		nf_tproxy_put_sock(sk);
+}
+
+/* consumes sk */
+int
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	if (inet_sk(sk)->transparent) {
+		skb->sk = sk;
+		skb->destructor = nf_tproxy_destructor;
+		return 1;
+	} else
+		nf_tproxy_put_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
+
+static int __init nf_tproxy_init(void)
+{
+	pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
+	pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
+	return 0;
+}
+
+module_init(nf_tproxy_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Transparent proxy support core routines");
-- 
cgit v1.2.3-70-g09d2


From 136cdc71fd54e77463e570643ac76e2b696e48a0 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 8 Oct 2008 11:35:12 +0200
Subject: netfilter: iptables socket match

Add iptables 'socket' match, which matches packets for which a TCP/UDP
socket lookup succeeds.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig     |  15 ++++
 net/netfilter/Makefile    |   1 +
 net/netfilter/xt_socket.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 208 insertions(+)
 create mode 100644 net/netfilter/xt_socket.c

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ed1dcfb61e1..f6c80729948 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -778,6 +778,21 @@ config NETFILTER_XT_MATCH_SCTP
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_SOCKET
+	tristate '"socket" match support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on NETFILTER_TPROXY
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	select NF_DEFRAG_IPV4
+	help
+	  This option adds a `socket' match, which can be used to match
+	  packets for which a TCP or UDP socket lookup finds a valid socket.
+	  It can be used in combination with the MARK target and policy
+	  routing to implement full featured non-locally bound sockets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index fc8bbb48d38..1cdc3a13d01 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
new file mode 100644
index 00000000000..ac9db17c7b9
--- /dev/null
+++ b/net/netfilter/xt_socket.c
@@ -0,0 +1,192 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_tproxy_core.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#define XT_SOCKET_HAVE_CONNTRACK 1
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp_fields(const struct sk_buff *skb,
+		    u8 *protocol,
+		    __be32 *raddr,
+		    __be32 *laddr,
+		    __be16 *rport,
+		    __be16 *lport)
+{
+	unsigned int outside_hdrlen = ip_hdrlen(skb);
+	struct iphdr *inside_iph, _inside_iph;
+	struct icmphdr *icmph, _icmph;
+	__be16 *ports, _ports[2];
+
+	icmph = skb_header_pointer(skb, outside_hdrlen,
+				   sizeof(_icmph), &_icmph);
+	if (icmph == NULL)
+		return 1;
+
+	switch (icmph->type) {
+	case ICMP_DEST_UNREACH:
+	case ICMP_SOURCE_QUENCH:
+	case ICMP_REDIRECT:
+	case ICMP_TIME_EXCEEDED:
+	case ICMP_PARAMETERPROB:
+		break;
+	default:
+		return 1;
+	}
+
+	inside_iph = skb_header_pointer(skb, outside_hdrlen +
+					sizeof(struct icmphdr),
+					sizeof(_inside_iph), &_inside_iph);
+	if (inside_iph == NULL)
+		return 1;
+
+	if (inside_iph->protocol != IPPROTO_TCP &&
+	    inside_iph->protocol != IPPROTO_UDP)
+		return 1;
+
+	ports = skb_header_pointer(skb, outside_hdrlen +
+				   sizeof(struct icmphdr) +
+				   (inside_iph->ihl << 2),
+				   sizeof(_ports), &_ports);
+	if (ports == NULL)
+		return 1;
+
+	/* the inside IP packet is the one quoted from our side, thus
+	 * its saddr is the local address */
+	*protocol = inside_iph->protocol;
+	*laddr = inside_iph->saddr;
+	*lport = ports[0];
+	*raddr = inside_iph->daddr;
+	*rport = ports[1];
+
+	return 0;
+}
+
+
+static bool
+socket_mt(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const struct xt_match *match,
+	  const void *matchinfo,
+	  int offset,
+	  unsigned int protoff,
+	  bool *hotdrop)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct udphdr _hdr, *hp = NULL;
+	struct sock *sk;
+	__be32 daddr, saddr;
+	__be16 dport, sport;
+	u8 protocol;
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+	struct nf_conn const *ct;
+	enum ip_conntrack_info ctinfo;
+#endif
+
+	if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+		hp = skb_header_pointer(skb, ip_hdrlen(skb),
+					sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return false;
+
+		protocol = iph->protocol;
+		saddr = iph->saddr;
+		sport = hp->source;
+		daddr = iph->daddr;
+		dport = hp->dest;
+
+	} else if (iph->protocol == IPPROTO_ICMP) {
+		if (extract_icmp_fields(skb, &protocol, &saddr, &daddr,
+					&sport, &dport))
+			return false;
+	} else {
+		return false;
+	}
+
+#ifdef XT_SOCKET_HAVE_CONNTRACK
+	/* Do the lookup with the original socket address in case this is a
+	 * reply packet of an established SNAT-ted connection. */
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct && (ct != &nf_conntrack_untracked) &&
+	    ((iph->protocol != IPPROTO_ICMP &&
+	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
+	     (iph->protocol == IPPROTO_ICMP &&
+	      ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) &&
+	    (ct->status & IPS_SRC_NAT_DONE)) {
+
+		daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+		dport = (iph->protocol == IPPROTO_TCP) ?
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+	}
+#endif
+
+	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+				   saddr, daddr, sport, dport, in, false);
+	if (sk != NULL) {
+		bool wildcard = (inet_sk(sk)->rcv_saddr == 0);
+
+		nf_tproxy_put_sock(sk);
+		if (wildcard)
+			sk = NULL;
+	}
+
+	pr_debug("socket match: proto %u %08x:%u -> %08x:%u "
+		 "(orig %08x:%u) sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport),
+		 ntohl(daddr), ntohs(dport),
+		 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
+
+	return (sk != NULL);
+}
+
+static struct xt_match socket_mt_reg __read_mostly = {
+	.name		= "socket",
+	.family		= AF_INET,
+	.match		= socket_mt,
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init socket_mt_init(void)
+{
+	nf_defrag_ipv4_enable();
+	return xt_register_match(&socket_mt_reg);
+}
+
+static void __exit socket_mt_exit(void)
+{
+	xt_unregister_match(&socket_mt_reg);
+}
+
+module_init(socket_mt_init);
+module_exit(socket_mt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("x_tables socket match module");
+MODULE_ALIAS("ipt_socket");
-- 
cgit v1.2.3-70-g09d2


From e84392707e10301b93121e1b74e2823db50cdf9e Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 8 Oct 2008 11:35:12 +0200
Subject: netfilter: iptables TPROXY target

The TPROXY target implements redirection of non-local TCP/UDP traffic to local
sockets. Additionally, it's possible to manipulate the packet mark if and only
if a socket has been found. (We need this because we cannot use multiple
targets in the same iptables rule.)

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_TPROXY.h |  14 +++++
 net/netfilter/Kconfig               |  15 +++++
 net/netfilter/Makefile              |   1 +
 net/netfilter/xt_TPROXY.c           | 112 ++++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+)
 create mode 100644 include/linux/netfilter/xt_TPROXY.h
 create mode 100644 net/netfilter/xt_TPROXY.c

(limited to 'net')

diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h
new file mode 100644
index 00000000000..152e8f97132
--- /dev/null
+++ b/include/linux/netfilter/xt_TPROXY.h
@@ -0,0 +1,14 @@
+#ifndef _XT_TPROXY_H_target
+#define _XT_TPROXY_H_target
+
+/* TPROXY target is capable of marking the packet to perform
+ * redirection. We can get rid of that whenever we get support for
+ * mutliple targets in the same rule. */
+struct xt_tproxy_target_info {
+	u_int32_t mark_mask;
+	u_int32_t mark_value;
+	__be32 laddr;
+	__be16 lport;
+};
+
+#endif /* _XT_TPROXY_H_target */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f6c80729948..de18bba619f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -421,6 +421,21 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TPROXY
+	tristate '"TPROXY" target support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on NETFILTER_TPROXY
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	select NF_DEFRAG_IPV4
+	help
+	  This option adds a `TPROXY' target, which is somewhat similar to
+	  REDIRECT.  It can only be used in the mangle table and is useful
+	  to redirect traffic to a transparent proxy.  It does _not_ depend
+	  on Netfilter connection tracking and NAT, unlike REDIRECT.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TRACE
 	tristate  '"TRACE" target support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1cdc3a13d01..8ce67665882 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
new file mode 100644
index 00000000000..183f251d2f0
--- /dev/null
+++ b/net/netfilter/xt_TPROXY.c
@@ -0,0 +1,112 @@
+/*
+ * Transparent proxy support for Linux/iptables
+ *
+ * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Author: Balazs Scheidler, Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/inet_sock.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/xt_TPROXY.h>
+
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_tproxy_core.h>
+
+static unsigned int
+tproxy_tg(struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  unsigned int hooknum,
+	  const struct xt_target *target,
+	  const void *targinfo)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct xt_tproxy_target_info *tgi = targinfo;
+	struct udphdr _hdr, *hp;
+	struct sock *sk;
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return NF_DROP;
+
+	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+				   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
+				   hp->source, tgi->lport ? tgi->lport : hp->dest,
+				   in, true);
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_assign_sock(skb, sk)) {
+		/* This should be in a separate target, but we don't do multiple
+		   targets on the same rule yet */
+		skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
+
+		pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+			 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+			 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+		return NF_ACCEPT;
+	}
+
+	pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n",
+		 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+		 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+	return NF_DROP;
+}
+
+static bool
+tproxy_tg_check(const char *tablename,
+		const void *entry,
+		const struct xt_target *target,
+		void *targetinfo,
+		unsigned int hook_mask)
+{
+	const struct ipt_ip *i = entry;
+
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+	    && !(i->invflags & IPT_INV_PROTO))
+		return true;
+
+	pr_info("xt_TPROXY: Can be used only in combination with "
+		"either -p tcp or -p udp\n");
+	return false;
+}
+
+static struct xt_target tproxy_tg_reg __read_mostly = {
+	.name		= "TPROXY",
+	.family		= AF_INET,
+	.table		= "mangle",
+	.target		= tproxy_tg,
+	.targetsize	= sizeof(struct xt_tproxy_target_info),
+	.checkentry	= tproxy_tg_check,
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init tproxy_tg_init(void)
+{
+	nf_defrag_ipv4_enable();
+	return xt_register_target(&tproxy_tg_reg);
+}
+
+static void __exit tproxy_tg_exit(void)
+{
+	xt_unregister_target(&tproxy_tg_reg);
+}
+
+module_init(tproxy_tg_init);
+module_exit(tproxy_tg_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
+MODULE_ALIAS("ipt_TPROXY");
-- 
cgit v1.2.3-70-g09d2


From 18219d3f7d6a5bc43825a41e0763158efbdb80d3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:13 +0200
Subject: netfilter: ebtables: do centralized size checking

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  3 +++
 net/bridge/netfilter/ebt_802_3.c          |  7 +++--
 net/bridge/netfilter/ebt_among.c          |  1 +
 net/bridge/netfilter/ebt_arp.c            |  9 +++----
 net/bridge/netfilter/ebt_arpreply.c       |  9 +++----
 net/bridge/netfilter/ebt_dnat.c           |  9 +++----
 net/bridge/netfilter/ebt_ip.c             |  9 +++----
 net/bridge/netfilter/ebt_ip6.c            |  9 +++----
 net/bridge/netfilter/ebt_limit.c          | 11 +++-----
 net/bridge/netfilter/ebt_log.c            | 11 ++++----
 net/bridge/netfilter/ebt_mark.c           |  6 ++---
 net/bridge/netfilter/ebt_mark_m.c         |  7 +++--
 net/bridge/netfilter/ebt_nflog.c          |  4 +--
 net/bridge/netfilter/ebt_pkttype.c        |  7 +++--
 net/bridge/netfilter/ebt_redirect.c       | 11 ++++----
 net/bridge/netfilter/ebt_snat.c           | 11 ++++----
 net/bridge/netfilter/ebt_stp.c            | 10 +++----
 net/bridge/netfilter/ebt_ulog.c           |  5 ++--
 net/bridge/netfilter/ebt_vlan.c           | 10 ++-----
 net/bridge/netfilter/ebtables.c           | 43 +++++++++++++++++++++++++++----
 20 files changed, 104 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 892f5b7771c..fd085af8962 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -215,6 +215,7 @@ struct ebt_match
 	int (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *matchdata, unsigned int datalen);
 	void (*destroy)(void *matchdata, unsigned int datalen);
+	unsigned int matchsize;
 	struct module *me;
 };
 
@@ -229,6 +230,7 @@ struct ebt_watcher
 	int (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *watcherdata, unsigned int datalen);
 	void (*destroy)(void *watcherdata, unsigned int datalen);
+	unsigned int targetsize;
 	struct module *me;
 };
 
@@ -244,6 +246,7 @@ struct ebt_target
 	int (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *targetdata, unsigned int datalen);
 	void (*destroy)(void *targetdata, unsigned int datalen);
+	unsigned int targetsize;
 	struct module *me;
 };
 
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 98534025360..ccecfbd2a25 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -7,10 +7,10 @@
  * May 2003
  *
  */
-
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_802_3.h>
-#include <linux/module.h>
 
 static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, const void *data, unsigned int datalen)
@@ -42,8 +42,6 @@ static int ebt_802_3_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_802_3_info *info = data;
 
-	if (datalen < sizeof(struct ebt_802_3_info))
-		return -EINVAL;
 	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
 		return -EINVAL;
 
@@ -54,6 +52,7 @@ static struct ebt_match filter_802_3 __read_mostly = {
 	.name		= EBT_802_3_MATCH,
 	.match		= ebt_filter_802_3,
 	.check		= ebt_802_3_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_802_3_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 70b6dca5ea7..b0acb13a390 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -216,6 +216,7 @@ static struct ebt_match filter_among __read_mostly = {
 	.name		= EBT_AMONG_MATCH,
 	.match		= ebt_filter_among,
 	.check		= ebt_among_check,
+	.matchsize	= -1, /* special case */
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 7c535be7566..385f9cb85bc 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -8,12 +8,12 @@
  *  April, 2002
  *
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_arp.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_arp.h>
 
 static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, const void *data, unsigned int datalen)
@@ -105,8 +105,6 @@ static int ebt_arp_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_arp_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info)))
-		return -EINVAL;
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
 	   e->invflags & EBT_IPROTO)
@@ -120,6 +118,7 @@ static struct ebt_match filter_arp __read_mostly = {
 	.name		= EBT_ARP_MATCH,
 	.match		= ebt_filter_arp,
 	.check		= ebt_arp_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_arp_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 0c4279590fc..a860ea6da46 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -8,12 +8,12 @@
  *  August, 2003
  *
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_arpreply.h>
 #include <linux/if_arp.h>
 #include <net/arp.h>
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_arpreply.h>
 
 static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
@@ -63,8 +63,6 @@ static int ebt_target_reply_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_arpreply_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info)))
-		return -EINVAL;
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return -EINVAL;
 	if (e->ethproto != htons(ETH_P_ARP) ||
@@ -80,6 +78,7 @@ static struct ebt_target reply_target __read_mostly = {
 	.name		= EBT_ARPREPLY_TARGET,
 	.target		= ebt_target_reply,
 	.check		= ebt_target_reply_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_arpreply_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index ca64c1cc1b4..c2be41e8bb9 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -7,12 +7,12 @@
  *  June, 2002
  *
  */
-
+#include <linux/module.h>
+#include <net/sock.h>
 #include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
-#include <linux/module.h>
-#include <net/sock.h>
 
 static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
@@ -39,8 +39,6 @@ static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
 	   (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) &&
 	   (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
 		return -EINVAL;
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
-		return -EINVAL;
 	if (INVALID_TARGET)
 		return -EINVAL;
 	return 0;
@@ -50,6 +48,7 @@ static struct ebt_target dnat __read_mostly = {
 	.name		= EBT_DNAT_TARGET,
 	.target		= ebt_target_dnat,
 	.check		= ebt_target_dnat_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 65caa00dcf2..c1ae2547e3d 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -11,13 +11,13 @@
  *    Innominate Security Technologies AG <mhopf@innominate.com>
  *    September, 2002
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_ip.h>
 #include <linux/ip.h>
 #include <net/ip.h>
 #include <linux/in.h>
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ip.h>
 
 struct tcpudphdr {
 	__be16 src;
@@ -83,8 +83,6 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_ip_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info)))
-		return -EINVAL;
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
 		return -EINVAL;
@@ -111,6 +109,7 @@ static struct ebt_match filter_ip __read_mostly = {
 	.name		= EBT_IP_MATCH,
 	.match		= ebt_filter_ip,
 	.check		= ebt_ip_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 36efb3a7524..554dd68637c 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -13,14 +13,14 @@
  *
  *  Jan, 2008
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_ip6.h>
 #include <linux/ipv6.h>
 #include <net/ipv6.h>
 #include <linux/in.h>
 #include <linux/module.h>
 #include <net/dsfield.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ip6.h>
 
 struct tcpudphdr {
 	__be16 src;
@@ -97,8 +97,6 @@ static int ebt_ip6_check(const char *tablename, unsigned int hookmask,
 {
 	struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info)))
-		return -EINVAL;
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
 		return -EINVAL;
 	if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
@@ -125,6 +123,7 @@ static struct ebt_match filter_ip6 =
 	.name		= EBT_IP6_MATCH,
 	.match		= ebt_filter_ip6,
 	.check		= ebt_ip6_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip6_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 8cbdc01c253..3d71f3510ff 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -10,13 +10,12 @@
  *  September, 2003
  *
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_limit.h>
 #include <linux/module.h>
-
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_limit.h>
 
 static DEFINE_SPINLOCK(limit_lock);
 
@@ -71,9 +70,6 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask,
 {
 	struct ebt_limit_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info)))
-		return -EINVAL;
-
 	/* Check for overflow. */
 	if (info->burst == 0 ||
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
@@ -94,6 +90,7 @@ static struct ebt_match ebt_limit_reg __read_mostly = {
 	.name		= EBT_LIMIT_MATCH,
 	.match		= ebt_limit_match,
 	.check		= ebt_limit_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_limit_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 8b17c64bcd7..d9596f114a3 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -8,10 +8,6 @@
  *  April, 2002
  *
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_log.h>
-#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <linux/in.h>
@@ -21,6 +17,10 @@
 #include <linux/ipv6.h>
 #include <net/ipv6.h>
 #include <linux/in6.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_log.h>
+#include <linux/netfilter.h>
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
@@ -29,8 +29,6 @@ static int ebt_log_check(const char *tablename, unsigned int hookmask,
 {
 	struct ebt_log_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info)))
-		return -EINVAL;
 	if (info->bitmask & ~EBT_LOG_MASK)
 		return -EINVAL;
 	if (info->loglevel >= 8)
@@ -218,6 +216,7 @@ static struct ebt_watcher log =
 	.name		= EBT_LOG_WATCHER,
 	.watcher	= ebt_log,
 	.check		= ebt_log_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_log_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 36723f47db0..bb02412786c 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -13,9 +13,10 @@
  * Marking a frame doesn't really change anything in the frame anyway.
  */
 
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_t.h>
-#include <linux/module.h>
 
 static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
@@ -42,8 +43,6 @@ static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
 	const struct ebt_mark_t_info *info = data;
 	int tmp;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
-		return -EINVAL;
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
@@ -61,6 +60,7 @@ static struct ebt_target mark_target __read_mostly = {
 	.name		= EBT_MARK_TARGET,
 	.target		= ebt_target_mark,
 	.check		= ebt_target_mark_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_mark_t_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 9b0a4543861..b8ce9eb7170 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -7,10 +7,10 @@
  *  July, 2002
  *
  */
-
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_m.h>
-#include <linux/module.h>
 
 static int ebt_filter_mark(const struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out, const void *data,
@@ -28,8 +28,6 @@ static int ebt_mark_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_mark_m_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info)))
-		return -EINVAL;
 	if (info->bitmask & ~EBT_MARK_MASK)
 		return -EINVAL;
 	if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
@@ -43,6 +41,7 @@ static struct ebt_match filter_mark __read_mostly = {
 	.name		= EBT_MARK_MATCH,
 	.match		= ebt_filter_mark,
 	.check		= ebt_mark_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_mark_m_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 8e799aa9e56..88ceb5eb849 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -14,6 +14,7 @@
 
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nflog.h>
 #include <net/netfilter/nf_log.h>
@@ -42,8 +43,6 @@ static int ebt_nflog_check(const char *tablename,
 {
 	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_nflog_info)))
-		return -EINVAL;
 	if (info->flags & ~EBT_NFLOG_MASK)
 		return -EINVAL;
 	info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0';
@@ -54,6 +53,7 @@ static struct ebt_watcher nflog __read_mostly = {
 	.name = EBT_NFLOG_WATCHER,
 	.watcher = ebt_nflog,
 	.check = ebt_nflog_check,
+	.targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)),
 	.me = THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 676db32df3d..019026177f8 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -7,10 +7,10 @@
  *  April, 2003
  *
  */
-
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_pkttype.h>
-#include <linux/module.h>
 
 static int ebt_filter_pkttype(const struct sk_buff *skb,
    const struct net_device *in,
@@ -28,8 +28,6 @@ static int ebt_pkttype_check(const char *tablename, unsigned int hookmask,
 {
 	const struct ebt_pkttype_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info)))
-		return -EINVAL;
 	if (info->invert != 0 && info->invert != 1)
 		return -EINVAL;
 	/* Allow any pkt_type value */
@@ -40,6 +38,7 @@ static struct ebt_match filter_pkttype __read_mostly = {
 	.name		= EBT_PKTTYPE_MATCH,
 	.match		= ebt_filter_pkttype,
 	.check		= ebt_pkttype_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_pkttype_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index b8afe850cf1..04053268386 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -7,13 +7,13 @@
  *  April, 2002
  *
  */
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_redirect.h>
 #include <linux/module.h>
 #include <net/sock.h>
 #include "../br_private.h"
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_redirect.h>
 
 static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
@@ -38,8 +38,6 @@ static int ebt_target_redirect_check(const char *tablename, unsigned int hookmas
 {
 	const struct ebt_redirect_info *info = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info)))
-		return -EINVAL;
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return -EINVAL;
 	CLEAR_BASE_CHAIN_BIT;
@@ -55,6 +53,7 @@ static struct ebt_target redirect_target __read_mostly = {
 	.name		= EBT_REDIRECT_TARGET,
 	.target		= ebt_target_redirect,
 	.check		= ebt_target_redirect_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_redirect_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 5425333dda0..abfbc6c9502 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -7,14 +7,14 @@
  *  June, 2002
  *
  */
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_nat.h>
 #include <linux/module.h>
 #include <net/sock.h>
 #include <linux/if_arp.h>
 #include <net/arp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_nat.h>
 
 static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
@@ -49,8 +49,6 @@ static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
 	const struct ebt_nat_info *info = data;
 	int tmp;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
-		return -EINVAL;
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
@@ -72,6 +70,7 @@ static struct ebt_target snat __read_mostly = {
 	.name		= EBT_SNAT_TARGET,
 	.target		= ebt_target_snat,
 	.check		= ebt_target_snat_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 40f36d37607..c7a0a00dac7 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -7,11 +7,11 @@
  *
  *  July, 2003
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_stp.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_stp.h>
 
 #define BPDU_TYPE_CONFIG 0
 #define BPDU_TYPE_TCN 0x80
@@ -157,15 +157,12 @@ static int ebt_stp_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_stp_info *info = data;
-	const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info));
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
 	const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
 		return -EINVAL;
-	if (datalen != len)
-		return -EINVAL;
 	/* Make sure the match only receives stp frames */
 	if (compare_ether_addr(e->destmac, bridge_ula) ||
 	    compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
@@ -178,6 +175,7 @@ static struct ebt_match filter_stp __read_mostly = {
 	.name		= EBT_STP_MATCH,
 	.match		= ebt_filter_stp,
 	.check		= ebt_stp_check,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_stp_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3b1678cd66f..bdd8a27bba9 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -36,6 +36,7 @@
 #include <linux/timer.h>
 #include <linux/netlink.h>
 #include <linux/netdevice.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ulog.h>
 #include <net/netfilter/nf_log.h>
@@ -260,8 +261,7 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
 {
 	struct ebt_ulog_info *uloginfo = data;
 
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) ||
-	    uloginfo->nlgroup > 31)
+	if (uloginfo->nlgroup > 31)
 		return -EINVAL;
 
 	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
@@ -276,6 +276,7 @@ static struct ebt_watcher ulog __read_mostly = {
 	.name		= EBT_ULOG_WATCHER,
 	.watcher	= ebt_ulog,
 	.check		= ebt_ulog_check,
+	.targetsize	= XT_ALIGN(sizeof(struct ebt_ulog_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index ab60b0dade8..4dba47aefc8 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -22,6 +22,7 @@
 #include <linux/if_vlan.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_vlan.h>
 
@@ -93,14 +94,6 @@ ebt_check_vlan(const char *tablename,
 {
 	struct ebt_vlan_info *info = data;
 
-	/* Parameters buffer overflow check */
-	if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) {
-		DEBUG_MSG
-		    ("passed size %d is not eq to ebt_vlan_info (%Zd)\n",
-		     datalen, sizeof(struct ebt_vlan_info));
-		return -EINVAL;
-	}
-
 	/* Is it 802.1Q frame checked? */
 	if (e->ethproto != htons(ETH_P_8021Q)) {
 		DEBUG_MSG
@@ -173,6 +166,7 @@ static struct ebt_match filter_vlan __read_mostly = {
 	.name		= EBT_VLAN_MATCH,
 	.match		= ebt_filter_vlan,
 	.check		= ebt_check_vlan,
+	.matchsize	= XT_ALIGN(sizeof(struct ebt_vlan_info)),
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 32afff859e4..b04e288d20f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -19,6 +19,7 @@
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
@@ -59,8 +60,9 @@ static LIST_HEAD(ebt_targets);
 static LIST_HEAD(ebt_matches);
 static LIST_HEAD(ebt_watchers);
 
-static struct ebt_target ebt_standard_target =
-{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL};
+static struct ebt_target ebt_standard_target = {
+	.name = "standard",
+};
 
 static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
    const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
@@ -350,6 +352,18 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		return -ENOENT;
 	}
 	mutex_unlock(&ebt_mutex);
+	if (XT_ALIGN(match->matchsize) != m->match_size &&
+	    match->matchsize != -1) {
+		/*
+		 * ebt_among is exempt from centralized matchsize checking
+		 * because it uses a dynamic-size data set.
+		 */
+		printk(KERN_WARNING "ebtables: %s match: "
+		       "invalid size %Zu != %u\n",
+		       match->name, XT_ALIGN(match->matchsize), m->match_size);
+		module_put(match->me);
+		return -EINVAL;
+	}
 	if (match->check &&
 	   match->check(name, hookmask, e, m->data, m->match_size) != 0) {
 		BUGPRINT("match->check failed\n");
@@ -380,6 +394,14 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 		return -ENOENT;
 	}
 	mutex_unlock(&ebt_mutex);
+	if (XT_ALIGN(watcher->targetsize) != w->watcher_size) {
+		printk(KERN_WARNING "ebtables: %s watcher: "
+		       "invalid size %Zu != %u\n",
+		       watcher->name, XT_ALIGN(watcher->targetsize),
+		       w->watcher_size);
+		module_put(watcher->me);
+		return -EINVAL;
+	}
 	if (watcher->check &&
 	   watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) {
 		BUGPRINT("watcher->check failed\n");
@@ -681,9 +703,20 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 			ret = -EFAULT;
 			goto cleanup_watchers;
 		}
-	} else if (t->target_size > gap - sizeof(struct ebt_entry_target) ||
-	   (t->u.target->check &&
-	   t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){
+	} else if (t->target_size > gap - sizeof(struct ebt_entry_target)) {
+		module_put(t->u.target->me);
+		ret = -EFAULT;
+		goto cleanup_watchers;
+	} else if (XT_ALIGN(target->targetsize) != t->target_size) {
+		printk(KERN_WARNING "ebtables: %s target: "
+		       "invalid size %Zu != %u\n",
+		       target->name, XT_ALIGN(target->targetsize),
+		       t->target_size);
+		module_put(t->u.target->me);
+		ret = -EINVAL;
+		goto cleanup_watchers;
+	} else if (t->u.target->check &&
+	    t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0) {
 		module_put(t->u.target->me);
 		ret = -EFAULT;
 		goto cleanup_watchers;
-- 
cgit v1.2.3-70-g09d2


From 19eda879a136889110c692dec4c2ab59e0e43cef Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:13 +0200
Subject: netfilter: change return types of check functions for Ebtables
 extensions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  9 +++------
 net/bridge/netfilter/ebt_802_3.c          |  6 +++---
 net/bridge/netfilter/ebt_among.c          | 15 ++++++++-------
 net/bridge/netfilter/ebt_arp.c            |  8 ++++----
 net/bridge/netfilter/ebt_arpreply.c       | 10 +++++-----
 net/bridge/netfilter/ebt_dnat.c           | 10 +++++-----
 net/bridge/netfilter/ebt_ip.c             | 16 ++++++++--------
 net/bridge/netfilter/ebt_ip6.c            | 16 ++++++++--------
 net/bridge/netfilter/ebt_limit.c          |  6 +++---
 net/bridge/netfilter/ebt_log.c            |  8 ++++----
 net/bridge/netfilter/ebt_mark.c           | 10 +++++-----
 net/bridge/netfilter/ebt_mark_m.c         | 10 +++++-----
 net/bridge/netfilter/ebt_nflog.c          | 12 ++++++------
 net/bridge/netfilter/ebt_pkttype.c        |  6 +++---
 net/bridge/netfilter/ebt_redirect.c       | 10 +++++-----
 net/bridge/netfilter/ebt_snat.c           | 14 +++++++-------
 net/bridge/netfilter/ebt_stp.c            |  8 ++++----
 net/bridge/netfilter/ebt_ulog.c           | 21 ++++++++++++---------
 net/bridge/netfilter/ebt_vlan.c           | 16 ++++++++--------
 net/bridge/netfilter/ebtables.c           |  6 +++---
 20 files changed, 109 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index fd085af8962..5f71719b7a2 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -211,8 +211,7 @@ struct ebt_match
 	int (*match)(const struct sk_buff *skb, const struct net_device *in,
 	   const struct net_device *out, const void *matchdata,
 	   unsigned int datalen);
-	/* 0 == let it in */
-	int (*check)(const char *tablename, unsigned int hookmask,
+	bool (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *matchdata, unsigned int datalen);
 	void (*destroy)(void *matchdata, unsigned int datalen);
 	unsigned int matchsize;
@@ -226,8 +225,7 @@ struct ebt_watcher
 	void (*watcher)(const struct sk_buff *skb, unsigned int hooknr,
 	   const struct net_device *in, const struct net_device *out,
 	   const void *watcherdata, unsigned int datalen);
-	/* 0 == let it in */
-	int (*check)(const char *tablename, unsigned int hookmask,
+	bool (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *watcherdata, unsigned int datalen);
 	void (*destroy)(void *watcherdata, unsigned int datalen);
 	unsigned int targetsize;
@@ -242,8 +240,7 @@ struct ebt_target
 	int (*target)(struct sk_buff *skb, unsigned int hooknr,
 	   const struct net_device *in, const struct net_device *out,
 	   const void *targetdata, unsigned int datalen);
-	/* 0 == let it in */
-	int (*check)(const char *tablename, unsigned int hookmask,
+	bool (*check)(const char *tablename, unsigned int hookmask,
 	   const struct ebt_entry *e, void *targetdata, unsigned int datalen);
 	void (*destroy)(void *targetdata, unsigned int datalen);
 	unsigned int targetsize;
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index ccecfbd2a25..868df9c1e42 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -37,15 +37,15 @@ static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *
 }
 
 static struct ebt_match filter_802_3;
-static int ebt_802_3_check(const char *tablename, unsigned int hookmask,
+static bool ebt_802_3_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_802_3_info *info = data;
 
 	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
-		return -EINVAL;
+		return false;
 
-	return 0;
+	return true;
 }
 
 static struct ebt_match filter_802_3 __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index b0acb13a390..95e2e70ac90 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -177,9 +177,10 @@ static int ebt_filter_among(const struct sk_buff *skb,
 	return EBT_MATCH;
 }
 
-static int ebt_among_check(const char *tablename, unsigned int hookmask,
-			   const struct ebt_entry *e, void *data,
-			   unsigned int datalen)
+static bool
+ebt_among_check(const char *tablename, unsigned int hookmask,
+		const struct ebt_entry *e, void *data,
+		unsigned int datalen)
 {
 	const struct ebt_among_info *info = data;
 	int expected_length = sizeof(struct ebt_among_info);
@@ -197,19 +198,19 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask,
 		       "against expected %d, rounded to %Zd\n",
 		       datalen, expected_length,
 		       EBT_ALIGN(expected_length));
-		return -EINVAL;
+		return false;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
 		printk(KERN_WARNING
 		       "ebtables: among: dst integrity fail: %x\n", -err);
-		return -EINVAL;
+		return false;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
 		printk(KERN_WARNING
 		       "ebtables: among: src integrity fail: %x\n", -err);
-		return -EINVAL;
+		return false;
 	}
-	return 0;
+	return true;
 }
 
 static struct ebt_match filter_among __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 385f9cb85bc..cb33672380d 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -100,7 +100,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 	return EBT_MATCH;
 }
 
-static int ebt_arp_check(const char *tablename, unsigned int hookmask,
+static bool ebt_arp_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_arp_info *info = data;
@@ -108,10 +108,10 @@ static int ebt_arp_check(const char *tablename, unsigned int hookmask,
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
 	   e->invflags & EBT_IPROTO)
-		return -EINVAL;
+		return false;
 	if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_match filter_arp __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index a860ea6da46..c298d3deffa 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -58,20 +58,20 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
 	return info->target;
 }
 
-static int ebt_target_reply_check(const char *tablename, unsigned int hookmask,
+static bool ebt_target_reply_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_arpreply_info *info = data;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return -EINVAL;
+		return false;
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
-		return -EINVAL;
+		return false;
 	CLEAR_BASE_CHAIN_BIT;
 	if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING))
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_target reply_target __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index c2be41e8bb9..6ddea2184e9 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -27,21 +27,21 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
 	return info->target;
 }
 
-static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
+static bool ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_nat_info *info = data;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return -EINVAL;
+		return false;
 	CLEAR_BASE_CHAIN_BIT;
 	if ( (strcmp(tablename, "nat") ||
 	   (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) &&
 	   (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
-		return -EINVAL;
+		return false;
 	if (INVALID_TARGET)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_target dnat __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index c1ae2547e3d..cbf0918ec16 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -78,31 +78,31 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
 	return EBT_MATCH;
 }
 
-static int ebt_ip_check(const char *tablename, unsigned int hookmask,
+static bool ebt_ip_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_ip_info *info = data;
 
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
-		return -EINVAL;
+		return false;
 	if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK)
-		return -EINVAL;
+		return false;
 	if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) {
 		if (info->invflags & EBT_IP_PROTO)
-			return -EINVAL;
+			return false;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			 return -EINVAL;
+			 return false;
 	}
 	if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
-		return -EINVAL;
+		return false;
 	if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_match filter_ip __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 554dd68637c..1230c9ee394 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -92,30 +92,30 @@ static int ebt_filter_ip6(const struct sk_buff *skb,
 	return EBT_MATCH;
 }
 
-static int ebt_ip6_check(const char *tablename, unsigned int hookmask,
+static bool ebt_ip6_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
-		return -EINVAL;
+		return false;
 	if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
-		return -EINVAL;
+		return false;
 	if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
 		if (info->invflags & EBT_IP6_PROTO)
-			return -EINVAL;
+			return false;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			 return -EINVAL;
+			return false;
 	}
 	if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
-		return -EINVAL;
+		return false;
 	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_match filter_ip6 =
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 3d71f3510ff..9b04f2be94e 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -65,7 +65,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static int ebt_limit_check(const char *tablename, unsigned int hookmask,
+static bool ebt_limit_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	struct ebt_limit_info *info = data;
@@ -75,7 +75,7 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask,
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
 		printk("Overflow in ebt_limit, try lower: %u/%u\n",
 			info->avg, info->burst);
-		return -EINVAL;
+		return false;
 	}
 
 	/* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */
@@ -83,7 +83,7 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask,
 	info->credit = user2credits(info->avg * info->burst);
 	info->credit_cap = user2credits(info->avg * info->burst);
 	info->cost = user2credits(info->avg);
-	return 0;
+	return true;
 }
 
 static struct ebt_match ebt_limit_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index d9596f114a3..f3d6d5ec2dc 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,17 +24,17 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static int ebt_log_check(const char *tablename, unsigned int hookmask,
+static bool ebt_log_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	struct ebt_log_info *info = data;
 
 	if (info->bitmask & ~EBT_LOG_MASK)
-		return -EINVAL;
+		return false;
 	if (info->loglevel >= 8)
-		return -EINVAL;
+		return false;
 	info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0';
-	return 0;
+	return true;
 }
 
 struct tcpudphdr
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index bb02412786c..b85c73895ae 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -37,7 +37,7 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
+static bool ebt_target_mark_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_mark_t_info *info = data;
@@ -45,15 +45,15 @@ static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return -EINVAL;
+		return false;
 	CLEAR_BASE_CHAIN_BIT;
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return -EINVAL;
+		return false;
 	tmp = info->target & ~EBT_VERDICT_BITS;
 	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
 	    tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_target mark_target __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index b8ce9eb7170..b2707d772c9 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -23,18 +23,18 @@ static int ebt_filter_mark(const struct sk_buff *skb,
 	return !(((skb->mark & info->mask) == info->mark) ^ info->invert);
 }
 
-static int ebt_mark_check(const char *tablename, unsigned int hookmask,
+static bool ebt_mark_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_mark_m_info *info = data;
 
 	if (info->bitmask & ~EBT_MARK_MASK)
-		return -EINVAL;
+		return false;
 	if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
-		return -EINVAL;
+		return false;
 	if (!info->bitmask)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_match filter_mark __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 88ceb5eb849..a6954eb3f58 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -36,17 +36,17 @@ static void ebt_nflog(const struct sk_buff *skb,
 	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix);
 }
 
-static int ebt_nflog_check(const char *tablename,
-			   unsigned int hookmask,
-			   const struct ebt_entry *e,
-			   void *data, unsigned int datalen)
+static bool ebt_nflog_check(const char *tablename,
+			    unsigned int hookmask,
+			    const struct ebt_entry *e,
+			    void *data, unsigned int datalen)
 {
 	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
 
 	if (info->flags & ~EBT_NFLOG_MASK)
-		return -EINVAL;
+		return false;
 	info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0';
-	return 0;
+	return true;
 }
 
 static struct ebt_watcher nflog __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 019026177f8..4dcd3b86cff 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -23,15 +23,15 @@ static int ebt_filter_pkttype(const struct sk_buff *skb,
 	return (skb->pkt_type != info->pkt_type) ^ info->invert;
 }
 
-static int ebt_pkttype_check(const char *tablename, unsigned int hookmask,
+static bool ebt_pkttype_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_pkttype_info *info = data;
 
 	if (info->invert != 0 && info->invert != 1)
-		return -EINVAL;
+		return false;
 	/* Allow any pkt_type value */
-	return 0;
+	return true;
 }
 
 static struct ebt_match filter_pkttype __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 04053268386..d2076f4227c 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -33,20 +33,20 @@ static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
 	return info->target;
 }
 
-static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask,
+static bool ebt_target_redirect_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_redirect_info *info = data;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return -EINVAL;
+		return false;
 	CLEAR_BASE_CHAIN_BIT;
 	if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) &&
 	     (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
-		return -EINVAL;
+		return false;
 	if (INVALID_TARGET)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_target redirect_target __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index abfbc6c9502..5a5a16acca0 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -43,7 +43,7 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
+static bool ebt_target_snat_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_nat_info *info = data;
@@ -51,19 +51,19 @@ static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return -EINVAL;
+		return false;
 	CLEAR_BASE_CHAIN_BIT;
 	if (strcmp(tablename, "nat"))
-		return -EINVAL;
+		return false;
 	if (hookmask & ~(1 << NF_BR_POST_ROUTING))
-		return -EINVAL;
+		return false;
 
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return -EINVAL;
+		return false;
 	tmp = info->target | EBT_VERDICT_BITS;
 	if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
-		return -EINVAL;
-	return 0;
+		return false;
+	return true;
 }
 
 static struct ebt_target snat __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index c7a0a00dac7..37d9480a00c 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,7 +153,7 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
 	return EBT_MATCH;
 }
 
-static int ebt_stp_check(const char *tablename, unsigned int hookmask,
+static bool ebt_stp_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	const struct ebt_stp_info *info = data;
@@ -162,13 +162,13 @@ static int ebt_stp_check(const char *tablename, unsigned int hookmask,
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
-		return -EINVAL;
+		return false;
 	/* Make sure the match only receives stp frames */
 	if (compare_ether_addr(e->destmac, bridge_ula) ||
 	    compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
-		return -EINVAL;
+		return false;
 
-	return 0;
+	return true;
 }
 
 static struct ebt_match filter_stp __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index bdd8a27bba9..e13a005f58a 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -255,14 +255,13 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
 	ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
 }
 
-
-static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
+static bool ebt_ulog_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
 	struct ebt_ulog_info *uloginfo = data;
 
 	if (uloginfo->nlgroup > 31)
-		return -EINVAL;
+		return false;
 
 	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
 
@@ -288,12 +287,13 @@ static const struct nf_logger ebt_ulog_logger = {
 
 static int __init ebt_ulog_init(void)
 {
-	int i, ret = 0;
+	bool ret = true;
+	int i;
 
 	if (nlbufsiz >= 128*1024) {
 		printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB,"
 		       " please try a smaller nlbufsiz parameter.\n");
-		return -EINVAL;
+		return false;
 	}
 
 	/* initialize ulog_buffers */
@@ -305,12 +305,15 @@ static int __init ebt_ulog_init(void)
 	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
 					  EBT_ULOG_MAXNLGROUPS, NULL, NULL,
 					  THIS_MODULE);
-	if (!ebtulognl)
-		ret = -ENOMEM;
-	else if ((ret = ebt_register_watcher(&ulog)))
+	if (!ebtulognl) {
+		printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
+		       "call netlink_kernel_create\n");
+		ret = false;
+	} else if (ebt_register_watcher(&ulog) != 0) {
 		netlink_kernel_release(ebtulognl);
+	}
 
-	if (ret == 0)
+	if (ret)
 		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
 
 	return ret;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 4dba47aefc8..fc88d5d59e0 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -87,7 +87,7 @@ ebt_filter_vlan(const struct sk_buff *skb,
 	return EBT_MATCH;
 }
 
-static int
+static bool
 ebt_check_vlan(const char *tablename,
 	       unsigned int hooknr,
 	       const struct ebt_entry *e, void *data, unsigned int datalen)
@@ -99,7 +99,7 @@ ebt_check_vlan(const char *tablename,
 		DEBUG_MSG
 		    ("passed entry proto %2.4X is not 802.1Q (8100)\n",
 		     (unsigned short) ntohs(e->ethproto));
-		return -EINVAL;
+		return false;
 	}
 
 	/* Check for bitmask range
@@ -107,14 +107,14 @@ ebt_check_vlan(const char *tablename,
 	if (info->bitmask & ~EBT_VLAN_MASK) {
 		DEBUG_MSG("bitmask %2X is out of mask (%2X)\n",
 			  info->bitmask, EBT_VLAN_MASK);
-		return -EINVAL;
+		return false;
 	}
 
 	/* Check for inversion flags range */
 	if (info->invflags & ~EBT_VLAN_MASK) {
 		DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n",
 			  info->invflags, EBT_VLAN_MASK);
-		return -EINVAL;
+		return false;
 	}
 
 	/* Reserved VLAN ID (VID) values
@@ -129,7 +129,7 @@ ebt_check_vlan(const char *tablename,
 				DEBUG_MSG
 				    ("id %d is out of range (1-4096)\n",
 				     info->id);
-				return -EINVAL;
+				return false;
 			}
 			/* Note: This is valid VLAN-tagged frame point.
 			 * Any value of user_priority are acceptable,
@@ -144,7 +144,7 @@ ebt_check_vlan(const char *tablename,
 		if ((unsigned char) info->prio > 7) {
 			DEBUG_MSG("prio %d is out of range (0-7)\n",
 			     info->prio);
-			return -EINVAL;
+			return false;
 		}
 	}
 	/* Check for encapsulated proto range - it is possible to be
@@ -155,11 +155,11 @@ ebt_check_vlan(const char *tablename,
 			DEBUG_MSG
 			    ("encap frame length %d is less than minimal\n",
 			     ntohs(info->encap));
-			return -EINVAL;
+			return false;
 		}
 	}
 
-	return 0;
+	return true;
 }
 
 static struct ebt_match filter_vlan __read_mostly = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index b04e288d20f..fe499527729 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -365,7 +365,7 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		return -EINVAL;
 	}
 	if (match->check &&
-	   match->check(name, hookmask, e, m->data, m->match_size) != 0) {
+	    !match->check(name, hookmask, e, m->data, m->match_size)) {
 		BUGPRINT("match->check failed\n");
 		module_put(match->me);
 		return -EINVAL;
@@ -403,7 +403,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 		return -EINVAL;
 	}
 	if (watcher->check &&
-	   watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) {
+	    !watcher->check(name, hookmask, e, w->data, w->watcher_size)) {
 		BUGPRINT("watcher->check failed\n");
 		module_put(watcher->me);
 		return -EINVAL;
@@ -716,7 +716,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 		ret = -EINVAL;
 		goto cleanup_watchers;
 	} else if (t->u.target->check &&
-	    t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0) {
+	    !t->u.target->check(name, hookmask, e, t->data, t->target_size)) {
 		module_put(t->u.target->me);
 		ret = -EFAULT;
 		goto cleanup_watchers;
-- 
cgit v1.2.3-70-g09d2


From 8cc784eec6676b58e7f60419c88179aaa97bf71c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:13 +0200
Subject: netfilter: change return types of match functions for ebtables
 extensions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  3 +--
 net/bridge/netfilter/ebt_802_3.c          | 13 ++++-----
 net/bridge/netfilter/ebt_among.c          | 44 ++++++++++++++-----------------
 net/bridge/netfilter/ebt_arp.c            | 35 ++++++++++++------------
 net/bridge/netfilter/ebt_ip.c             | 25 +++++++++---------
 net/bridge/netfilter/ebt_ip6.c            | 26 +++++++++---------
 net/bridge/netfilter/ebt_limit.c          |  6 ++---
 net/bridge/netfilter/ebt_mark_m.c         |  6 ++---
 net/bridge/netfilter/ebt_pkttype.c        |  4 +--
 net/bridge/netfilter/ebt_stp.c            | 39 ++++++++++++++-------------
 net/bridge/netfilter/ebt_vlan.c           |  8 +++---
 11 files changed, 104 insertions(+), 105 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 5f71719b7a2..f9fda2c442a 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -207,8 +207,7 @@ struct ebt_match
 {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
-	/* 0 == it matches */
-	int (*match)(const struct sk_buff *skb, const struct net_device *in,
+	bool (*match)(const struct sk_buff *skb, const struct net_device *in,
 	   const struct net_device *out, const void *matchdata,
 	   unsigned int datalen);
 	bool (*check)(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 868df9c1e42..8ebe62b9bcc 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -12,7 +12,8 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
-static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in,
+static bool ebt_filter_802_3(const struct sk_buff *skb,
+   const struct net_device *in,
    const struct net_device *out, const void *data, unsigned int datalen)
 {
 	const struct ebt_802_3_info *info = data;
@@ -21,19 +22,19 @@ static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *
 
 	if (info->bitmask & EBT_802_3_SAP) {
 		if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
-				return EBT_NOMATCH;
+			return false;
 		if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
-				return EBT_NOMATCH;
+			return false;
 	}
 
 	if (info->bitmask & EBT_802_3_TYPE) {
 		if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
-			return EBT_NOMATCH;
+			return false;
 		if (FWINV(info->type != type, EBT_802_3_TYPE))
-			return EBT_NOMATCH;
+			return false;
 	}
 
-	return EBT_MATCH;
+	return true;
 }
 
 static struct ebt_match filter_802_3;
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 95e2e70ac90..bfdc67bcbfa 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -14,8 +14,8 @@
 #include <linux/if_arp.h>
 #include <linux/module.h>
 
-static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
-				     const char *mac, __be32 ip)
+static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
+				      const char *mac, __be32 ip)
 {
 	/* You may be puzzled as to how this code works.
 	 * Some tricks were used, refer to
@@ -33,23 +33,19 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
 	if (ip) {
 		for (i = start; i < limit; i++) {
 			p = &wh->pool[i];
-			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) {
-				if (p->ip == 0 || p->ip == ip) {
-					return 1;
-				}
-			}
+			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0])
+				if (p->ip == 0 || p->ip == ip)
+					return true;
 		}
 	} else {
 		for (i = start; i < limit; i++) {
 			p = &wh->pool[i];
-			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) {
-				if (p->ip == 0) {
-					return 1;
-				}
-			}
+			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0])
+				if (p->ip == 0)
+					return true;
 		}
 	}
-	return 0;
+	return false;
 }
 
 static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash
@@ -131,10 +127,10 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 	return 0;
 }
 
-static int ebt_filter_among(const struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out, const void *data,
-			    unsigned int datalen)
+static bool ebt_filter_among(const struct sk_buff *skb,
+			     const struct net_device *in,
+			     const struct net_device *out, const void *data,
+			     unsigned int datalen)
 {
 	const struct ebt_among_info *info = data;
 	const char *dmac, *smac;
@@ -147,34 +143,34 @@ static int ebt_filter_among(const struct sk_buff *skb,
 	if (wh_src) {
 		smac = eth_hdr(skb)->h_source;
 		if (get_ip_src(skb, &sip))
-			return EBT_NOMATCH;
+			return false;
 		if (!(info->bitmask & EBT_AMONG_SRC_NEG)) {
 			/* we match only if it contains */
 			if (!ebt_mac_wormhash_contains(wh_src, smac, sip))
-				return EBT_NOMATCH;
+				return false;
 		} else {
 			/* we match only if it DOES NOT contain */
 			if (ebt_mac_wormhash_contains(wh_src, smac, sip))
-				return EBT_NOMATCH;
+				return false;
 		}
 	}
 
 	if (wh_dst) {
 		dmac = eth_hdr(skb)->h_dest;
 		if (get_ip_dst(skb, &dip))
-			return EBT_NOMATCH;
+			return false;
 		if (!(info->bitmask & EBT_AMONG_DST_NEG)) {
 			/* we match only if it contains */
 			if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip))
-				return EBT_NOMATCH;
+				return false;
 		} else {
 			/* we match only if it DOES NOT contain */
 			if (ebt_mac_wormhash_contains(wh_dst, dmac, dip))
-				return EBT_NOMATCH;
+				return false;
 		}
 	}
 
-	return EBT_MATCH;
+	return true;
 }
 
 static bool
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index cb33672380d..f1f0bcf5524 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -15,7 +15,8 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_arp.h>
 
-static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in,
+static bool ebt_filter_arp(const struct sk_buff *skb,
+   const struct net_device *in,
    const struct net_device *out, const void *data, unsigned int datalen)
 {
 	const struct ebt_arp_info *info = data;
@@ -24,42 +25,42 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 
 	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
 	if (ah == NULL)
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
 	   ah->ar_op, EBT_ARP_OPCODE))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
 	   ah->ar_hrd, EBT_ARP_HTYPE))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
 	   ah->ar_pro, EBT_ARP_PTYPE))
-		return EBT_NOMATCH;
+		return false;
 
 	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
 		const __be32 *sap, *dap;
 		__be32 saddr, daddr;
 
 		if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
-			return EBT_NOMATCH;
+			return false;
 		sap = skb_header_pointer(skb, sizeof(struct arphdr) +
 					ah->ar_hln, sizeof(saddr),
 					&saddr);
 		if (sap == NULL)
-			return EBT_NOMATCH;
+			return false;
 		dap = skb_header_pointer(skb, sizeof(struct arphdr) +
 					2*ah->ar_hln+sizeof(saddr),
 					sizeof(daddr), &daddr);
 		if (dap == NULL)
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_ARP_SRC_IP &&
 		    FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_ARP_DST_IP &&
 		    FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_ARP_GRAT &&
 		    FWINV(*dap != *sap, EBT_ARP_GRAT))
-			return EBT_NOMATCH;
+			return false;
 	}
 
 	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
@@ -68,18 +69,18 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 		uint8_t verdict, i;
 
 		if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_ARP_SRC_MAC) {
 			mp = skb_header_pointer(skb, sizeof(struct arphdr),
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
-				return EBT_NOMATCH;
+				return false;
 			verdict = 0;
 			for (i = 0; i < 6; i++)
 				verdict |= (mp[i] ^ info->smaddr[i]) &
 				       info->smmsk[i];
 			if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
-				return EBT_NOMATCH;
+				return false;
 		}
 
 		if (info->bitmask & EBT_ARP_DST_MAC) {
@@ -87,17 +88,17 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 						ah->ar_hln + ah->ar_pln,
 						sizeof(_mac), &_mac);
 			if (mp == NULL)
-				return EBT_NOMATCH;
+				return false;
 			verdict = 0;
 			for (i = 0; i < 6; i++)
 				verdict |= (mp[i] ^ info->dmaddr[i]) &
 					info->dmmsk[i];
 			if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
-				return EBT_NOMATCH;
+				return false;
 		}
 	}
 
-	return EBT_MATCH;
+	return true;
 }
 
 static bool ebt_arp_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index cbf0918ec16..018782f044c 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -24,7 +24,8 @@ struct tcpudphdr {
 	__be16 dst;
 };
 
-static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
+static bool ebt_filter_ip(const struct sk_buff *skb,
+   const struct net_device *in,
    const struct net_device *out, const void *data,
    unsigned int datalen)
 {
@@ -36,46 +37,46 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
 
 	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_IP_TOS &&
 	   FWINV(info->tos != ih->tos, EBT_IP_TOS))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_IP_SOURCE &&
 	   FWINV((ih->saddr & info->smsk) !=
 	   info->saddr, EBT_IP_SOURCE))
-		return EBT_NOMATCH;
+		return false;
 	if ((info->bitmask & EBT_IP_DEST) &&
 	   FWINV((ih->daddr & info->dmsk) !=
 	   info->daddr, EBT_IP_DEST))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_IP_PROTO) {
 		if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
-			return EBT_NOMATCH;
+			return false;
 		if (!(info->bitmask & EBT_IP_DPORT) &&
 		    !(info->bitmask & EBT_IP_SPORT))
-			return EBT_MATCH;
+			return true;
 		if (ntohs(ih->frag_off) & IP_OFFSET)
-			return EBT_NOMATCH;
+			return false;
 		pptr = skb_header_pointer(skb, ih->ihl*4,
 					  sizeof(_ports), &_ports);
 		if (pptr == NULL)
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_IP_DPORT) {
 			u32 dst = ntohs(pptr->dst);
 			if (FWINV(dst < info->dport[0] ||
 				  dst > info->dport[1],
 				  EBT_IP_DPORT))
-			return EBT_NOMATCH;
+			return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
 			u32 src = ntohs(pptr->src);
 			if (FWINV(src < info->sport[0] ||
 				  src > info->sport[1],
 				  EBT_IP_SPORT))
-			return EBT_NOMATCH;
+			return false;
 		}
 	}
-	return EBT_MATCH;
+	return true;
 }
 
 static bool ebt_ip_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 1230c9ee394..7fc3928e3fb 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -27,7 +27,7 @@ struct tcpudphdr {
 	__be16 dst;
 };
 
-static int ebt_filter_ip6(const struct sk_buff *skb,
+static bool ebt_filter_ip6(const struct sk_buff *skb,
    const struct net_device *in,
    const struct net_device *out, const void *data,
    unsigned int datalen)
@@ -42,54 +42,54 @@ static int ebt_filter_ip6(const struct sk_buff *skb,
 
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_IP6_TCLASS &&
 	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
-		return EBT_NOMATCH;
+		return false;
 	for (i = 0; i < 4; i++)
 		tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] &
 			info->smsk.in6_u.u6_addr32[i];
 	if (info->bitmask & EBT_IP6_SOURCE &&
 		FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
 			EBT_IP6_SOURCE))
-		return EBT_NOMATCH;
+		return false;
 	for (i = 0; i < 4; i++)
 		tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
 			info->dmsk.in6_u.u6_addr32[i];
 	if (info->bitmask & EBT_IP6_DEST &&
 	   FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_IP6_PROTO) {
 		uint8_t nexthdr = ih6->nexthdr;
 		int offset_ph;
 
 		offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr);
 		if (offset_ph == -1)
-			return EBT_NOMATCH;
+			return false;
 		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
-			return EBT_NOMATCH;
+			return false;
 		if (!(info->bitmask & EBT_IP6_DPORT) &&
 		    !(info->bitmask & EBT_IP6_SPORT))
-			return EBT_MATCH;
+			return true;
 		pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
 					  &_ports);
 		if (pptr == NULL)
-			return EBT_NOMATCH;
+			return false;
 		if (info->bitmask & EBT_IP6_DPORT) {
 			u32 dst = ntohs(pptr->dst);
 			if (FWINV(dst < info->dport[0] ||
 				  dst > info->dport[1], EBT_IP6_DPORT))
-				return EBT_NOMATCH;
+				return false;
 		}
 		if (info->bitmask & EBT_IP6_SPORT) {
 			u32 src = ntohs(pptr->src);
 			if (FWINV(src < info->sport[0] ||
 				  src > info->sport[1], EBT_IP6_SPORT))
-			return EBT_NOMATCH;
+			return false;
 		}
-		return EBT_MATCH;
+		return true;
 	}
-	return EBT_MATCH;
+	return true;
 }
 
 static bool ebt_ip6_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 9b04f2be94e..925065a22a6 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -30,7 +30,7 @@ static DEFINE_SPINLOCK(limit_lock);
 
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
-static int ebt_limit_match(const struct sk_buff *skb,
+static bool ebt_limit_match(const struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -46,11 +46,11 @@ static int ebt_limit_match(const struct sk_buff *skb,
 		/* We're not limited. */
 		info->credit -= info->cost;
 		spin_unlock_bh(&limit_lock);
-		return EBT_MATCH;
+		return true;
 	}
 
 	spin_unlock_bh(&limit_lock);
-	return EBT_NOMATCH;
+	return false;
 }
 
 /* Precision saver. */
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index b2707d772c9..ec16c0e2868 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -12,15 +12,15 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
-static int ebt_filter_mark(const struct sk_buff *skb,
+static bool ebt_filter_mark(const struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out, const void *data,
    unsigned int datalen)
 {
 	const struct ebt_mark_m_info *info = data;
 
 	if (info->bitmask & EBT_MARK_OR)
-		return !(!!(skb->mark & info->mask) ^ info->invert);
-	return !(((skb->mark & info->mask) == info->mark) ^ info->invert);
+		return !!(skb->mark & info->mask) ^ info->invert;
+	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
 static bool ebt_mark_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 4dcd3b86cff..74b44328436 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -12,7 +12,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
-static int ebt_filter_pkttype(const struct sk_buff *skb,
+static bool ebt_filter_pkttype(const struct sk_buff *skb,
    const struct net_device *in,
    const struct net_device *out,
    const void *data,
@@ -20,7 +20,7 @@ static int ebt_filter_pkttype(const struct sk_buff *skb,
 {
 	const struct ebt_pkttype_info *info = data;
 
-	return (skb->pkt_type != info->pkt_type) ^ info->invert;
+	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
 static bool ebt_pkttype_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 37d9480a00c..7618206639e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -40,7 +40,7 @@ struct stp_config_pdu {
 #define NR16(p) (p[0] << 8 | p[1])
 #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
 
-static int ebt_filter_config(const struct ebt_stp_info *info,
+static bool ebt_filter_config(const struct ebt_stp_info *info,
    const struct stp_config_pdu *stpc)
 {
 	const struct ebt_stp_config_info *c;
@@ -51,12 +51,12 @@ static int ebt_filter_config(const struct ebt_stp_info *info,
 	c = &info->config;
 	if ((info->bitmask & EBT_STP_FLAGS) &&
 	    FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
-		return EBT_NOMATCH;
+		return false;
 	if (info->bitmask & EBT_STP_ROOTPRIO) {
 		v16 = NR16(stpc->root);
 		if (FWINV(v16 < c->root_priol ||
 		    v16 > c->root_priou, EBT_STP_ROOTPRIO))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTADDR) {
 		verdict = 0;
@@ -64,19 +64,19 @@ static int ebt_filter_config(const struct ebt_stp_info *info,
 			verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
 				   c->root_addrmsk[i];
 		if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_ROOTCOST) {
 		v32 = NR32(stpc->root_cost);
 		if (FWINV(v32 < c->root_costl ||
 		    v32 > c->root_costu, EBT_STP_ROOTCOST))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERPRIO) {
 		v16 = NR16(stpc->sender);
 		if (FWINV(v16 < c->sender_priol ||
 		    v16 > c->sender_priou, EBT_STP_SENDERPRIO))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_SENDERADDR) {
 		verdict = 0;
@@ -84,42 +84,43 @@ static int ebt_filter_config(const struct ebt_stp_info *info,
 			verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
 				   c->sender_addrmsk[i];
 		if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_PORT) {
 		v16 = NR16(stpc->port);
 		if (FWINV(v16 < c->portl ||
 		    v16 > c->portu, EBT_STP_PORT))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_MSGAGE) {
 		v16 = NR16(stpc->msg_age);
 		if (FWINV(v16 < c->msg_agel ||
 		    v16 > c->msg_ageu, EBT_STP_MSGAGE))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_MAXAGE) {
 		v16 = NR16(stpc->max_age);
 		if (FWINV(v16 < c->max_agel ||
 		    v16 > c->max_ageu, EBT_STP_MAXAGE))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_HELLOTIME) {
 		v16 = NR16(stpc->hello_time);
 		if (FWINV(v16 < c->hello_timel ||
 		    v16 > c->hello_timeu, EBT_STP_HELLOTIME))
-			return EBT_NOMATCH;
+			return false;
 	}
 	if (info->bitmask & EBT_STP_FWDD) {
 		v16 = NR16(stpc->forward_delay);
 		if (FWINV(v16 < c->forward_delayl ||
 		    v16 > c->forward_delayu, EBT_STP_FWDD))
-			return EBT_NOMATCH;
+			return false;
 	}
-	return EBT_MATCH;
+	return true;
 }
 
-static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in,
+static bool ebt_filter_stp(const struct sk_buff *skb,
+   const struct net_device *in,
    const struct net_device *out, const void *data, unsigned int datalen)
 {
 	const struct ebt_stp_info *info = data;
@@ -129,15 +130,15 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
 
 	sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
 	if (sp == NULL)
-		return EBT_NOMATCH;
+		return false;
 
 	/* The stp code only considers these */
 	if (memcmp(sp, header, sizeof(header)))
-		return EBT_NOMATCH;
+		return false;
 
 	if (info->bitmask & EBT_STP_TYPE
 	    && FWINV(info->type != sp->type, EBT_STP_TYPE))
-		return EBT_NOMATCH;
+		return false;
 
 	if (sp->type == BPDU_TYPE_CONFIG &&
 	    info->bitmask & EBT_STP_CONFIG_MASK) {
@@ -147,10 +148,10 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in
 		st = skb_header_pointer(skb, sizeof(_stph),
 					sizeof(_stpc), &_stpc);
 		if (st == NULL)
-			return EBT_NOMATCH;
+			return false;
 		return ebt_filter_config(info, st);
 	}
-	return EBT_MATCH;
+	return true;
 }
 
 static bool ebt_stp_check(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index fc88d5d59e0..8cc4257a1ad 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -38,9 +38,9 @@ MODULE_LICENSE("GPL");
 
 #define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
 #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
-#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;}
+#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
-static int
+static bool
 ebt_filter_vlan(const struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -58,7 +58,7 @@ ebt_filter_vlan(const struct sk_buff *skb,
 
 	fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
 	if (fp == NULL)
-		return EBT_NOMATCH;
+		return false;
 
 	/* Tag Control Information (TCI) consists of the following elements:
 	 * - User_priority. The user_priority field is three bits in length,
@@ -84,7 +84,7 @@ ebt_filter_vlan(const struct sk_buff *skb,
 	if (GET_BITMASK(EBT_VLAN_ENCAP))
 		EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP);
 
-	return EBT_MATCH;
+	return true;
 }
 
 static bool
-- 
cgit v1.2.3-70-g09d2


From 0ac6ab1f7915fc820ca0cf8f597290dbb249edcc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:13 +0200
Subject: netfilter: Change return types of targets/watchers for Ebtables
 extensions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  6 +++---
 net/bridge/netfilter/ebt_arpreply.c       |  2 +-
 net/bridge/netfilter/ebt_dnat.c           |  2 +-
 net/bridge/netfilter/ebt_log.c            |  3 ++-
 net/bridge/netfilter/ebt_mark.c           |  2 +-
 net/bridge/netfilter/ebt_nflog.c          | 11 ++++++-----
 net/bridge/netfilter/ebt_redirect.c       |  3 ++-
 net/bridge/netfilter/ebt_snat.c           |  2 +-
 net/bridge/netfilter/ebt_ulog.c           |  3 ++-
 9 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index f9fda2c442a..097432b94c5 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -221,7 +221,7 @@ struct ebt_watcher
 {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
-	void (*watcher)(const struct sk_buff *skb, unsigned int hooknr,
+	unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr,
 	   const struct net_device *in, const struct net_device *out,
 	   const void *watcherdata, unsigned int datalen);
 	bool (*check)(const char *tablename, unsigned int hookmask,
@@ -235,8 +235,8 @@ struct ebt_target
 {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
-	/* returns one of the standard verdicts */
-	int (*target)(struct sk_buff *skb, unsigned int hooknr,
+	/* returns one of the standard EBT_* verdicts */
+	unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr,
 	   const struct net_device *in, const struct net_device *out,
 	   const void *targetdata, unsigned int datalen);
 	bool (*check)(const char *tablename, unsigned int hookmask,
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index c298d3deffa..b444cf835f1 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
-static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 6ddea2184e9..d58b9e32338 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -14,7 +14,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 
-static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index f3d6d5ec2dc..2705d7a2a9b 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -192,7 +192,7 @@ out:
 
 }
 
-static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_log(const struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -209,6 +209,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 	else
 		ebt_log_packet(NFPROTO_BRIDGE, hooknr, skb, in, out, &li,
 			       info->prefix);
+	return EBT_CONTINUE;
 }
 
 static struct ebt_watcher log =
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index b85c73895ae..e4b91d8e2c6 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
-static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index a6954eb3f58..2c75023b326 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -19,11 +19,11 @@
 #include <linux/netfilter_bridge/ebt_nflog.h>
 #include <net/netfilter/nf_log.h>
 
-static void ebt_nflog(const struct sk_buff *skb,
-		      unsigned int hooknr,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      const void *data, unsigned int datalen)
+static unsigned int ebt_nflog(const struct sk_buff *skb,
+			      unsigned int hooknr,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      const void *data, unsigned int datalen)
 {
 	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
 	struct nf_loginfo li;
@@ -34,6 +34,7 @@ static void ebt_nflog(const struct sk_buff *skb,
 	li.u.ulog.qthreshold = info->threshold;
 
 	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix);
+	return EBT_CONTINUE;
 }
 
 static bool ebt_nflog_check(const char *tablename,
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index d2076f4227c..7bf1390ad97 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -15,7 +15,8 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
-static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_target_redirect(struct sk_buff *skb,
+   unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 5a5a16acca0..d13f05d2620 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 
-static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index e13a005f58a..5f86f555f6d 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -246,13 +246,14 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 	ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+static unsigned int ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	const struct ebt_ulog_info *uloginfo = data;
 
 	ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
+	return EBT_CONTINUE;
 }
 
 static bool ebt_ulog_check(const char *tablename, unsigned int hookmask,
-- 
cgit v1.2.3-70-g09d2


From 001a18d369f4813ed792629ff4a9a6ade2a4a031 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:14 +0200
Subject: netfilter: add dummy members to Ebtables code to ease transition to
 Xtables

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  6 ++++
 net/bridge/netfilter/ebt_802_3.c          |  2 ++
 net/bridge/netfilter/ebt_among.c          |  2 ++
 net/bridge/netfilter/ebt_arp.c            |  2 ++
 net/bridge/netfilter/ebt_arpreply.c       |  2 ++
 net/bridge/netfilter/ebt_dnat.c           |  2 ++
 net/bridge/netfilter/ebt_ip.c             |  2 ++
 net/bridge/netfilter/ebt_ip6.c            |  2 ++
 net/bridge/netfilter/ebt_limit.c          |  2 ++
 net/bridge/netfilter/ebt_log.c            |  2 ++
 net/bridge/netfilter/ebt_mark.c           |  2 ++
 net/bridge/netfilter/ebt_mark_m.c         |  2 ++
 net/bridge/netfilter/ebt_nflog.c          |  2 ++
 net/bridge/netfilter/ebt_pkttype.c        |  2 ++
 net/bridge/netfilter/ebt_redirect.c       |  2 ++
 net/bridge/netfilter/ebt_snat.c           |  2 ++
 net/bridge/netfilter/ebt_stp.c            |  2 ++
 net/bridge/netfilter/ebt_ulog.c           |  2 ++
 net/bridge/netfilter/ebt_vlan.c           |  2 ++
 net/bridge/netfilter/ebtables.c           | 58 ++++++++++++++++++++++++++-----
 20 files changed, 91 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 097432b94c5..82f854bf37e 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -214,6 +214,8 @@ struct ebt_match
 	   const struct ebt_entry *e, void *matchdata, unsigned int datalen);
 	void (*destroy)(void *matchdata, unsigned int datalen);
 	unsigned int matchsize;
+	u_int8_t revision;
+	u_int8_t family;
 	struct module *me;
 };
 
@@ -228,6 +230,8 @@ struct ebt_watcher
 	   const struct ebt_entry *e, void *watcherdata, unsigned int datalen);
 	void (*destroy)(void *watcherdata, unsigned int datalen);
 	unsigned int targetsize;
+	u_int8_t revision;
+	u_int8_t family;
 	struct module *me;
 };
 
@@ -243,6 +247,8 @@ struct ebt_target
 	   const struct ebt_entry *e, void *targetdata, unsigned int datalen);
 	void (*destroy)(void *targetdata, unsigned int datalen);
 	unsigned int targetsize;
+	u_int8_t revision;
+	u_int8_t family;
 	struct module *me;
 };
 
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 8ebe62b9bcc..f9876f22757 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -51,6 +51,8 @@ static bool ebt_802_3_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_802_3 __read_mostly = {
 	.name		= EBT_802_3_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_802_3,
 	.check		= ebt_802_3_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_802_3_info)),
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index bfdc67bcbfa..568c890887b 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -211,6 +211,8 @@ ebt_among_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_among __read_mostly = {
 	.name		= EBT_AMONG_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_among,
 	.check		= ebt_among_check,
 	.matchsize	= -1, /* special case */
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index f1f0bcf5524..4a5226cbab8 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -117,6 +117,8 @@ static bool ebt_arp_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_arp __read_mostly = {
 	.name		= EBT_ARP_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_arp,
 	.check		= ebt_arp_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_arp_info)),
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index b444cf835f1..7ab16556800 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -76,6 +76,8 @@ static bool ebt_target_reply_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_target reply_target __read_mostly = {
 	.name		= EBT_ARPREPLY_TARGET,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_target_reply,
 	.check		= ebt_target_reply_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_arpreply_info)),
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index d58b9e32338..64838e2835a 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -46,6 +46,8 @@ static bool ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_target dnat __read_mostly = {
 	.name		= EBT_DNAT_TARGET,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_target_dnat,
 	.check		= ebt_target_dnat_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 018782f044c..0bef6f7bc83 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -108,6 +108,8 @@ static bool ebt_ip_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_ip __read_mostly = {
 	.name		= EBT_IP_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_ip,
 	.check		= ebt_ip_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip_info)),
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 7fc3928e3fb..afcabe205b8 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -121,6 +121,8 @@ static bool ebt_ip6_check(const char *tablename, unsigned int hookmask,
 static struct ebt_match filter_ip6 =
 {
 	.name		= EBT_IP6_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_ip6,
 	.check		= ebt_ip6_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip6_info)),
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 925065a22a6..9ca0a2564c8 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -88,6 +88,8 @@ static bool ebt_limit_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match ebt_limit_reg __read_mostly = {
 	.name		= EBT_LIMIT_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_limit_match,
 	.check		= ebt_limit_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_limit_info)),
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 2705d7a2a9b..c2e1c357025 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -215,6 +215,8 @@ static unsigned int ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 static struct ebt_watcher log =
 {
 	.name		= EBT_LOG_WATCHER,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.watcher	= ebt_log,
 	.check		= ebt_log_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_log_info)),
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index e4b91d8e2c6..910721a1267 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -58,6 +58,8 @@ static bool ebt_target_mark_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_target mark_target __read_mostly = {
 	.name		= EBT_MARK_TARGET,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_target_mark,
 	.check		= ebt_target_mark_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_mark_t_info)),
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index ec16c0e2868..6512ad9b409 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -39,6 +39,8 @@ static bool ebt_mark_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_mark __read_mostly = {
 	.name		= EBT_MARK_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_mark,
 	.check		= ebt_mark_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_mark_m_info)),
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 2c75023b326..aa0410c69a6 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -52,6 +52,8 @@ static bool ebt_nflog_check(const char *tablename,
 
 static struct ebt_watcher nflog __read_mostly = {
 	.name = EBT_NFLOG_WATCHER,
+	.revision = 0,
+	.family = NFPROTO_BRIDGE,
 	.watcher = ebt_nflog,
 	.check = ebt_nflog_check,
 	.targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)),
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 74b44328436..a9acecc88e9 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -36,6 +36,8 @@ static bool ebt_pkttype_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_pkttype __read_mostly = {
 	.name		= EBT_PKTTYPE_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_pkttype,
 	.check		= ebt_pkttype_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_pkttype_info)),
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 7bf1390ad97..4c628108bcd 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -52,6 +52,8 @@ static bool ebt_target_redirect_check(const char *tablename, unsigned int hookma
 
 static struct ebt_target redirect_target __read_mostly = {
 	.name		= EBT_REDIRECT_TARGET,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_target_redirect,
 	.check		= ebt_target_redirect_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_redirect_info)),
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index d13f05d2620..0e83de781c0 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -68,6 +68,8 @@ static bool ebt_target_snat_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_target snat __read_mostly = {
 	.name		= EBT_SNAT_TARGET,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_target_snat,
 	.check		= ebt_target_snat_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 7618206639e..e6d8f0c140a 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -174,6 +174,8 @@ static bool ebt_stp_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_match filter_stp __read_mostly = {
 	.name		= EBT_STP_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_stp,
 	.check		= ebt_stp_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_stp_info)),
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 5f86f555f6d..076b44590f1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -274,6 +274,8 @@ static bool ebt_ulog_check(const char *tablename, unsigned int hookmask,
 
 static struct ebt_watcher ulog __read_mostly = {
 	.name		= EBT_ULOG_WATCHER,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.watcher	= ebt_ulog,
 	.check		= ebt_ulog_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_ulog_info)),
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 8cc4257a1ad..9e3a39ae466 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -164,6 +164,8 @@ ebt_check_vlan(const char *tablename,
 
 static struct ebt_match filter_vlan __read_mostly = {
 	.name		= EBT_VLAN_MATCH,
+	.revision	= 0,
+	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_filter_vlan,
 	.check		= ebt_check_vlan,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_vlan_info)),
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index fe499527729..bc4b3f4f37c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -61,7 +61,9 @@ static LIST_HEAD(ebt_matches);
 static LIST_HEAD(ebt_watchers);
 
 static struct ebt_target ebt_standard_target = {
-	.name = "standard",
+	.name       = "standard",
+	.revision   = 0,
+	.family     = NFPROTO_BRIDGE,
 };
 
 static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
@@ -352,6 +354,17 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		return -ENOENT;
 	}
 	mutex_unlock(&ebt_mutex);
+	if (match->family != NFPROTO_BRIDGE) {
+		printk(KERN_WARNING "ebtables: %s match: not for ebtables?\n",
+		       match->name);
+		goto out;
+	}
+	if (match->revision != 0) {
+		printk(KERN_WARNING "ebtables: %s match: ebtables is not "
+		       "supporting revisions at this time\n",
+		       match->name);
+		goto out;
+	}
 	if (XT_ALIGN(match->matchsize) != m->match_size &&
 	    match->matchsize != -1) {
 		/*
@@ -361,17 +374,18 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		printk(KERN_WARNING "ebtables: %s match: "
 		       "invalid size %Zu != %u\n",
 		       match->name, XT_ALIGN(match->matchsize), m->match_size);
-		module_put(match->me);
-		return -EINVAL;
+		goto out;
 	}
 	if (match->check &&
 	    !match->check(name, hookmask, e, m->data, m->match_size)) {
 		BUGPRINT("match->check failed\n");
-		module_put(match->me);
-		return -EINVAL;
+		goto out;
 	}
 	(*cnt)++;
 	return 0;
+ out:
+	module_put(match->me);
+	return -EINVAL;
 }
 
 static inline int
@@ -394,22 +408,34 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 		return -ENOENT;
 	}
 	mutex_unlock(&ebt_mutex);
+	if (watcher->family != NFPROTO_BRIDGE) {
+		printk(KERN_WARNING "ebtables: %s watcher: not for ebtables?\n",
+		       watcher->name);
+		goto out;
+	}
+	if (watcher->revision != 0) {
+		printk(KERN_WARNING "ebtables: %s watcher: ebtables is not "
+		       "supporting revisions at this time\n",
+		       watcher->name);
+		goto out;
+	}
 	if (XT_ALIGN(watcher->targetsize) != w->watcher_size) {
 		printk(KERN_WARNING "ebtables: %s watcher: "
 		       "invalid size %Zu != %u\n",
 		       watcher->name, XT_ALIGN(watcher->targetsize),
 		       w->watcher_size);
-		module_put(watcher->me);
-		return -EINVAL;
+		goto out;
 	}
 	if (watcher->check &&
 	    !watcher->check(name, hookmask, e, w->data, w->watcher_size)) {
 		BUGPRINT("watcher->check failed\n");
-		module_put(watcher->me);
-		return -EINVAL;
+		goto out;
 	}
 	(*cnt)++;
 	return 0;
+ out:
+	module_put(watcher->me);
+	return -EINVAL;
 }
 
 static int ebt_verify_pointers(struct ebt_replace *repl,
@@ -690,6 +716,20 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 	mutex_unlock(&ebt_mutex);
 
+	if (target->family != NFPROTO_BRIDGE) {
+		printk(KERN_WARNING "ebtables: %s target: not for ebtables?\n",
+		       target->name);
+		ret = -EINVAL;
+		goto cleanup_watchers;
+	}
+	if (target->revision != 0) {
+		printk(KERN_WARNING "ebtables: %s target: ebtables is not "
+		       "supporting revisions at this time\n",
+		       target->name);
+		ret = -EINVAL;
+		goto cleanup_watchers;
+	}
+
 	t->u.target = target;
 	if (t->u.target == &ebt_standard_target) {
 		if (gap < sizeof(struct ebt_standard_target)) {
-- 
cgit v1.2.3-70-g09d2


From 815377fe344c799228ca6278613ca3100b069ad5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:14 +0200
Subject: netfilter: ebt_among: obtain match size through different means

The function signatures will be changed to match those of Xtables, and
the datalen argument will be gone. ebt_among unfortunately relies on
it, so we need to obtain it somehow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_among.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 568c890887b..88b5c9118a7 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -178,6 +178,8 @@ ebt_among_check(const char *tablename, unsigned int hookmask,
 		const struct ebt_entry *e, void *data,
 		unsigned int datalen)
 {
+	const struct ebt_entry_match *em =
+		container_of(data, const struct ebt_entry_match, data);
 	const struct ebt_among_info *info = data;
 	int expected_length = sizeof(struct ebt_among_info);
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
@@ -188,11 +190,11 @@ ebt_among_check(const char *tablename, unsigned int hookmask,
 	expected_length += ebt_mac_wormhash_size(wh_dst);
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
-	if (datalen != EBT_ALIGN(expected_length)) {
+	if (em->match_size != EBT_ALIGN(expected_length)) {
 		printk(KERN_WARNING
 		       "ebtables: among: wrong size: %d "
 		       "against expected %d, rounded to %Zd\n",
-		       datalen, expected_length,
+		       em->match_size, expected_length,
 		       EBT_ALIGN(expected_length));
 		return false;
 	}
-- 
cgit v1.2.3-70-g09d2


From 2d06d4a5cc107046508d860a0b47dbc43b829b79 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:15 +0200
Subject: netfilter: change Ebtables function signatures to match Xtables's

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h | 43 +++++++++++++++++++------------
 net/bridge/netfilter/ebt_802_3.c          | 18 +++++++------
 net/bridge/netfilter/ebt_among.c          | 18 ++++++-------
 net/bridge/netfilter/ebt_arp.c            | 18 ++++++++-----
 net/bridge/netfilter/ebt_arpreply.c       | 18 ++++++++-----
 net/bridge/netfilter/ebt_dnat.c           | 17 +++++++-----
 net/bridge/netfilter/ebt_ip.c             | 19 ++++++++------
 net/bridge/netfilter/ebt_ip6.c            | 19 ++++++++------
 net/bridge/netfilter/ebt_limit.c          | 17 +++++++-----
 net/bridge/netfilter/ebt_log.c            | 17 +++++++-----
 net/bridge/netfilter/ebt_mark.c           | 17 +++++++-----
 net/bridge/netfilter/ebt_mark_m.c         | 17 +++++++-----
 net/bridge/netfilter/ebt_nflog.c          | 21 +++++++--------
 net/bridge/netfilter/ebt_pkttype.c        | 20 +++++++-------
 net/bridge/netfilter/ebt_redirect.c       | 18 +++++++------
 net/bridge/netfilter/ebt_snat.c           | 17 +++++++-----
 net/bridge/netfilter/ebt_stp.c            | 18 ++++++++-----
 net/bridge/netfilter/ebt_ulog.c           | 17 +++++++-----
 net/bridge/netfilter/ebt_vlan.c           | 18 ++++++-------
 net/bridge/netfilter/ebtables.c           | 30 ++++++++++-----------
 20 files changed, 224 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 82f854bf37e..f20a57da7a2 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -31,6 +31,9 @@
  * The 4 lsb are more than enough to store the verdict. */
 #define EBT_VERDICT_BITS 0x0000000F
 
+struct xt_match;
+struct xt_target;
+
 struct ebt_counter
 {
 	uint64_t pcnt;
@@ -208,11 +211,13 @@ struct ebt_match
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	bool (*match)(const struct sk_buff *skb, const struct net_device *in,
-	   const struct net_device *out, const void *matchdata,
-	   unsigned int datalen);
-	bool (*check)(const char *tablename, unsigned int hookmask,
-	   const struct ebt_entry *e, void *matchdata, unsigned int datalen);
-	void (*destroy)(void *matchdata, unsigned int datalen);
+		const struct net_device *out, const struct xt_match *match,
+		const void *matchinfo, int offset, unsigned int protoff,
+		bool *hotdrop);
+	bool (*checkentry)(const char *table, const void *entry,
+		const struct xt_match *match, void *matchinfo,
+		unsigned int hook_mask);
+	void (*destroy)(const struct xt_match *match, void *matchinfo);
 	unsigned int matchsize;
 	u_int8_t revision;
 	u_int8_t family;
@@ -223,12 +228,14 @@ struct ebt_watcher
 {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
-	unsigned int (*watcher)(const struct sk_buff *skb, unsigned int hooknr,
-	   const struct net_device *in, const struct net_device *out,
-	   const void *watcherdata, unsigned int datalen);
-	bool (*check)(const char *tablename, unsigned int hookmask,
-	   const struct ebt_entry *e, void *watcherdata, unsigned int datalen);
-	void (*destroy)(void *watcherdata, unsigned int datalen);
+	unsigned int (*target)(struct sk_buff *skb,
+		const struct net_device *in, const struct net_device *out,
+		unsigned int hook_num, const struct xt_target *target,
+		const void *targinfo);
+	bool (*checkentry)(const char *table, const void *entry,
+		const struct xt_target *target, void *targinfo,
+		unsigned int hook_mask);
+	void (*destroy)(const struct xt_target *target, void *targinfo);
 	unsigned int targetsize;
 	u_int8_t revision;
 	u_int8_t family;
@@ -240,12 +247,14 @@ struct ebt_target
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
 	/* returns one of the standard EBT_* verdicts */
-	unsigned int (*target)(struct sk_buff *skb, unsigned int hooknr,
-	   const struct net_device *in, const struct net_device *out,
-	   const void *targetdata, unsigned int datalen);
-	bool (*check)(const char *tablename, unsigned int hookmask,
-	   const struct ebt_entry *e, void *targetdata, unsigned int datalen);
-	void (*destroy)(void *targetdata, unsigned int datalen);
+	unsigned int (*target)(struct sk_buff *skb,
+		const struct net_device *in, const struct net_device *out,
+		unsigned int hook_num, const struct xt_target *target,
+		const void *targinfo);
+	bool (*checkentry)(const char *table, const void *entry,
+		const struct xt_target *target, void *targinfo,
+		unsigned int hook_mask);
+	void (*destroy)(const struct xt_target *target, void *targinfo);
 	unsigned int targetsize;
 	u_int8_t revision;
 	u_int8_t family;
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index f9876f22757..6f1a69c28ed 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -12,9 +12,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
-static bool ebt_filter_802_3(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out, const void *data, unsigned int datalen)
+static bool
+ebt_802_3_mt(const struct sk_buff *skb, const struct net_device *in,
+	     const struct net_device *out, const struct xt_match *match,
+	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_802_3_info *info = data;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
@@ -37,9 +38,10 @@ static bool ebt_filter_802_3(const struct sk_buff *skb,
 	return true;
 }
 
-static struct ebt_match filter_802_3;
-static bool ebt_802_3_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_802_3_mt_check(const char *table, const void *entry,
+		   const struct xt_match *match, void *data,
+		   unsigned int hook_mask)
 {
 	const struct ebt_802_3_info *info = data;
 
@@ -53,8 +55,8 @@ static struct ebt_match filter_802_3 __read_mostly = {
 	.name		= EBT_802_3_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_802_3,
-	.check		= ebt_802_3_check,
+	.match		= ebt_802_3_mt,
+	.checkentry	= ebt_802_3_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_802_3_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 88b5c9118a7..84a306f085b 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -127,10 +127,10 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 	return 0;
 }
 
-static bool ebt_filter_among(const struct sk_buff *skb,
-			     const struct net_device *in,
-			     const struct net_device *out, const void *data,
-			     unsigned int datalen)
+static bool
+ebt_among_mt(const struct sk_buff *skb, const struct net_device *in,
+	     const struct net_device *out, const struct xt_match *match,
+	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_among_info *info = data;
 	const char *dmac, *smac;
@@ -174,9 +174,9 @@ static bool ebt_filter_among(const struct sk_buff *skb,
 }
 
 static bool
-ebt_among_check(const char *tablename, unsigned int hookmask,
-		const struct ebt_entry *e, void *data,
-		unsigned int datalen)
+ebt_among_mt_check(const char *table, const void *entry,
+		   const struct xt_match *match, void *data,
+		   unsigned int hook_mask)
 {
 	const struct ebt_entry_match *em =
 		container_of(data, const struct ebt_entry_match, data);
@@ -215,8 +215,8 @@ static struct ebt_match filter_among __read_mostly = {
 	.name		= EBT_AMONG_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_among,
-	.check		= ebt_among_check,
+	.match		= ebt_among_mt,
+	.checkentry	= ebt_among_mt_check,
 	.matchsize	= -1, /* special case */
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 4a5226cbab8..6e7cd2f5ad7 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -15,9 +15,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_arp.h>
 
-static bool ebt_filter_arp(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out, const void *data, unsigned int datalen)
+static bool
+ebt_arp_mt(const struct sk_buff *skb, const struct net_device *in,
+	   const struct net_device *out, const struct xt_match *match,
+	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_arp_info *info = data;
 	const struct arphdr *ah;
@@ -101,10 +102,13 @@ static bool ebt_filter_arp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ebt_arp_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_arp_mt_check(const char *table, const void *entry,
+		 const struct xt_match *match, void *data,
+		 unsigned int hook_mask)
 {
 	const struct ebt_arp_info *info = data;
+	const struct ebt_entry *e = entry;
 
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
@@ -119,8 +123,8 @@ static struct ebt_match filter_arp __read_mostly = {
 	.name		= EBT_ARP_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_arp,
-	.check		= ebt_arp_check,
+	.match		= ebt_arp_mt,
+	.checkentry	= ebt_arp_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_arp_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 7ab16556800..6f2f6589777 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,9 +15,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
-static unsigned int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_arpreply_tg(struct sk_buff *skb, const struct net_device *in,
+		const struct net_device *out, unsigned int hook_nr,
+		const struct xt_target *target, const void *data)
 {
 	struct ebt_arpreply_info *info = (void *)data;
 	const __be32 *siptr, *diptr;
@@ -58,10 +59,13 @@ static unsigned int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
 	return info->target;
 }
 
-static bool ebt_target_reply_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_arpreply_tg_check(const char *tablename, const void *entry,
+		      const struct xt_target *target, void *data,
+		      unsigned int hookmask)
 {
 	const struct ebt_arpreply_info *info = data;
+	const struct ebt_entry *e = entry;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return false;
@@ -78,8 +82,8 @@ static struct ebt_target reply_target __read_mostly = {
 	.name		= EBT_ARPREPLY_TARGET,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_target_reply,
-	.check		= ebt_target_reply_check,
+	.target		= ebt_arpreply_tg,
+	.checkentry	= ebt_arpreply_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_arpreply_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 64838e2835a..b7cc013bd37 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -14,9 +14,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 
-static unsigned int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_dnat_tg(struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, unsigned int hook_nr,
+	    const struct xt_target *target, const void *data)
 {
 	const struct ebt_nat_info *info = data;
 
@@ -27,8 +28,10 @@ static unsigned int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
 	return info->target;
 }
 
-static bool ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_dnat_tg_check(const char *tablename, const void *entry,
+		  const struct xt_target *target, void *data,
+		  unsigned int hookmask)
 {
 	const struct ebt_nat_info *info = data;
 
@@ -48,8 +51,8 @@ static struct ebt_target dnat __read_mostly = {
 	.name		= EBT_DNAT_TARGET,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_target_dnat,
-	.check		= ebt_target_dnat_check,
+	.target		= ebt_dnat_tg,
+	.checkentry	= ebt_dnat_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 0bef6f7bc83..e7f3b1776b0 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -24,10 +24,10 @@ struct tcpudphdr {
 	__be16 dst;
 };
 
-static bool ebt_filter_ip(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out, const void *data,
-   unsigned int datalen)
+static bool
+ebt_ip_mt(const struct sk_buff *skb, const struct net_device *in,
+	  const struct net_device *out, const struct xt_match *match,
+	  const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_ip_info *info = data;
 	const struct iphdr *ih;
@@ -79,10 +79,13 @@ static bool ebt_filter_ip(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ebt_ip_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_ip_mt_check(const char *table, const void *entry,
+		const struct xt_match *match, void *data,
+		unsigned int hook_mask)
 {
 	const struct ebt_ip_info *info = data;
+	const struct ebt_entry *e = entry;
 
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
@@ -110,8 +113,8 @@ static struct ebt_match filter_ip __read_mostly = {
 	.name		= EBT_IP_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_ip,
-	.check		= ebt_ip_check,
+	.match		= ebt_ip_mt,
+	.checkentry	= ebt_ip_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index afcabe205b8..807685da293 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -27,10 +27,10 @@ struct tcpudphdr {
 	__be16 dst;
 };
 
-static bool ebt_filter_ip6(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out, const void *data,
-   unsigned int datalen)
+static bool
+ebt_ip6_mt(const struct sk_buff *skb, const struct net_device *in,
+	   const struct net_device *out, const struct xt_match *match,
+	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
 	const struct ipv6hdr *ih6;
@@ -92,9 +92,12 @@ static bool ebt_filter_ip6(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ebt_ip6_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_ip6_mt_check(const char *table, const void *entry,
+		 const struct xt_match *match, void *data,
+		 unsigned int hook_mask)
 {
+	const struct ebt_entry *e = entry;
 	struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
@@ -123,8 +126,8 @@ static struct ebt_match filter_ip6 =
 	.name		= EBT_IP6_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_ip6,
-	.check		= ebt_ip6_check,
+	.match		= ebt_ip6_mt,
+	.checkentry	= ebt_ip6_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_ip6_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 9ca0a2564c8..d3372739227 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -30,9 +30,10 @@ static DEFINE_SPINLOCK(limit_lock);
 
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
-static bool ebt_limit_match(const struct sk_buff *skb,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static bool
+ebt_limit_mt(const struct sk_buff *skb, const struct net_device *in,
+	     const struct net_device *out, const struct xt_match *match,
+	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	struct ebt_limit_info *info = (struct ebt_limit_info *)data;
 	unsigned long now = jiffies;
@@ -65,8 +66,10 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static bool ebt_limit_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_limit_mt_check(const char *table, const void *e,
+		   const struct xt_match *match, void *data,
+		   unsigned int hook_mask)
 {
 	struct ebt_limit_info *info = data;
 
@@ -90,8 +93,8 @@ static struct ebt_match ebt_limit_reg __read_mostly = {
 	.name		= EBT_LIMIT_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_limit_match,
-	.check		= ebt_limit_check,
+	.match		= ebt_limit_mt,
+	.checkentry	= ebt_limit_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_limit_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c2e1c357025..424dfdf7f27 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,8 +24,10 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static bool ebt_log_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_log_tg_check(const char *table, const void *entry,
+		 const struct xt_target *target, void *data,
+		 unsigned int hook_mask)
 {
 	struct ebt_log_info *info = data;
 
@@ -192,9 +194,10 @@ out:
 
 }
 
-static unsigned int ebt_log(const struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_log_tg(struct sk_buff *skb, const struct net_device *in,
+	   const struct net_device *out, unsigned int hooknr,
+	   const struct xt_target *target, const void *data)
 {
 	const struct ebt_log_info *info = data;
 	struct nf_loginfo li;
@@ -217,8 +220,8 @@ static struct ebt_watcher log =
 	.name		= EBT_LOG_WATCHER,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.watcher	= ebt_log,
-	.check		= ebt_log_check,
+	.target		= ebt_log_tg,
+	.checkentry	= ebt_log_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_log_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 910721a1267..92c67271bd8 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -18,9 +18,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
-static unsigned int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_mark_tg(struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, unsigned int hook_nr,
+	    const struct xt_target *target, const void *data)
 {
 	const struct ebt_mark_t_info *info = data;
 	int action = info->target & -16;
@@ -37,8 +38,10 @@ static unsigned int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_target_mark_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_mark_tg_check(const char *table, const void *e,
+		  const struct xt_target *target, void *data,
+		  unsigned int hookmask)
 {
 	const struct ebt_mark_t_info *info = data;
 	int tmp;
@@ -60,8 +63,8 @@ static struct ebt_target mark_target __read_mostly = {
 	.name		= EBT_MARK_TARGET,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_target_mark,
-	.check		= ebt_target_mark_check,
+	.target		= ebt_mark_tg,
+	.checkentry	= ebt_mark_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_mark_t_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 6512ad9b409..db64a0de4f7 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -12,9 +12,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
-static bool ebt_filter_mark(const struct sk_buff *skb,
-   const struct net_device *in, const struct net_device *out, const void *data,
-   unsigned int datalen)
+static bool
+ebt_mark_mt(const struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, const struct xt_match *match,
+	    const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_mark_m_info *info = data;
 
@@ -23,8 +24,10 @@ static bool ebt_filter_mark(const struct sk_buff *skb,
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool ebt_mark_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_mark_mt_check(const char *table, const void *e,
+		  const struct xt_match *match, void *data,
+		  unsigned int hook_mask)
 {
 	const struct ebt_mark_m_info *info = data;
 
@@ -41,8 +44,8 @@ static struct ebt_match filter_mark __read_mostly = {
 	.name		= EBT_MARK_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_mark,
-	.check		= ebt_mark_check,
+	.match		= ebt_mark_mt,
+	.checkentry	= ebt_mark_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_mark_m_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index aa0410c69a6..b415f887188 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -19,11 +19,10 @@
 #include <linux/netfilter_bridge/ebt_nflog.h>
 #include <net/netfilter/nf_log.h>
 
-static unsigned int ebt_nflog(const struct sk_buff *skb,
-			      unsigned int hooknr,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      const void *data, unsigned int datalen)
+static unsigned int
+ebt_nflog_tg(struct sk_buff *skb, const struct net_device *in,
+	     const struct net_device *out, unsigned int hooknr,
+	     const struct xt_target *target, const void *data)
 {
 	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
 	struct nf_loginfo li;
@@ -37,10 +36,10 @@ static unsigned int ebt_nflog(const struct sk_buff *skb,
 	return EBT_CONTINUE;
 }
 
-static bool ebt_nflog_check(const char *tablename,
-			    unsigned int hookmask,
-			    const struct ebt_entry *e,
-			    void *data, unsigned int datalen)
+static bool
+ebt_nflog_tg_check(const char *table, const void *e,
+		   const struct xt_target *target, void *data,
+		   unsigned int hookmask)
 {
 	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
 
@@ -54,8 +53,8 @@ static struct ebt_watcher nflog __read_mostly = {
 	.name = EBT_NFLOG_WATCHER,
 	.revision = 0,
 	.family = NFPROTO_BRIDGE,
-	.watcher = ebt_nflog,
-	.check = ebt_nflog_check,
+	.target = ebt_nflog_tg,
+	.checkentry = ebt_nflog_tg_check,
 	.targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)),
 	.me = THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index a9acecc88e9..06393452ef9 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -12,19 +12,21 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
-static bool ebt_filter_pkttype(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out,
-   const void *data,
-   unsigned int datalen)
+static bool
+ebt_pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
+	       const struct net_device *out, const struct xt_match *match,
+	       const void *data, int offset, unsigned int protoff,
+	       bool *hotdrop)
 {
 	const struct ebt_pkttype_info *info = data;
 
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
-static bool ebt_pkttype_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_pkttype_mt_check(const char *table, const void *e,
+		     const struct xt_match *match, void *data,
+		     unsigned int hook_mask)
 {
 	const struct ebt_pkttype_info *info = data;
 
@@ -38,8 +40,8 @@ static struct ebt_match filter_pkttype __read_mostly = {
 	.name		= EBT_PKTTYPE_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_pkttype,
-	.check		= ebt_pkttype_check,
+	.match		= ebt_pkttype_mt,
+	.checkentry	= ebt_pkttype_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_pkttype_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 4c628108bcd..e9540cf4f6d 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -15,10 +15,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
-static unsigned int ebt_target_redirect(struct sk_buff *skb,
-   unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_redirect_tg(struct sk_buff *skb, const struct net_device *in,
+		const struct net_device *out, unsigned int hooknr,
+		const struct xt_target *target, const void *data)
 {
 	const struct ebt_redirect_info *info = data;
 
@@ -34,8 +34,10 @@ static unsigned int ebt_target_redirect(struct sk_buff *skb,
 	return info->target;
 }
 
-static bool ebt_target_redirect_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_redirect_tg_check(const char *tablename, const void *e,
+		      const struct xt_target *target, void *data,
+		      unsigned int hookmask)
 {
 	const struct ebt_redirect_info *info = data;
 
@@ -54,8 +56,8 @@ static struct ebt_target redirect_target __read_mostly = {
 	.name		= EBT_REDIRECT_TARGET,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_target_redirect,
-	.check		= ebt_target_redirect_check,
+	.target		= ebt_redirect_tg,
+	.checkentry	= ebt_redirect_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_redirect_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 0e83de781c0..363d0051e04 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -16,9 +16,10 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 
-static unsigned int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_snat_tg(struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, unsigned int hook_nr,
+	    const struct xt_target *target, const void *data)
 {
 	const struct ebt_nat_info *info = data;
 
@@ -43,8 +44,10 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_target_snat_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_snat_tg_check(const char *tablename, const void *e,
+		  const struct xt_target *target, void *data,
+		  unsigned int hookmask)
 {
 	const struct ebt_nat_info *info = data;
 	int tmp;
@@ -70,8 +73,8 @@ static struct ebt_target snat __read_mostly = {
 	.name		= EBT_SNAT_TARGET,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_target_snat,
-	.check		= ebt_target_snat_check,
+	.target		= ebt_snat_tg,
+	.checkentry	= ebt_snat_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index e6d8f0c140a..7576d1d62a4 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -119,9 +119,10 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 	return true;
 }
 
-static bool ebt_filter_stp(const struct sk_buff *skb,
-   const struct net_device *in,
-   const struct net_device *out, const void *data, unsigned int datalen)
+static bool
+ebt_stp_mt(const struct sk_buff *skb, const struct net_device *in,
+	   const struct net_device *out, const struct xt_match *match,
+	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_stp_info *info = data;
 	const struct stp_header *sp;
@@ -154,12 +155,15 @@ static bool ebt_filter_stp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ebt_stp_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_stp_mt_check(const char *table, const void *entry,
+		 const struct xt_match *match, void *data,
+		 unsigned int hook_mask)
 {
 	const struct ebt_stp_info *info = data;
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
 	const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	const struct ebt_entry *e = entry;
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
@@ -176,8 +180,8 @@ static struct ebt_match filter_stp __read_mostly = {
 	.name		= EBT_STP_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_stp,
-	.check		= ebt_stp_check,
+	.match		= ebt_stp_mt,
+	.checkentry	= ebt_stp_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_stp_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 076b44590f1..77ff9c46b26 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -246,9 +246,10 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 	ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static unsigned int ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
-   const struct net_device *in, const struct net_device *out,
-   const void *data, unsigned int datalen)
+static unsigned int
+ebt_ulog_tg(struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, unsigned int hooknr,
+	    const struct xt_target *target, const void *data)
 {
 	const struct ebt_ulog_info *uloginfo = data;
 
@@ -256,8 +257,10 @@ static unsigned int ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
 	return EBT_CONTINUE;
 }
 
-static bool ebt_ulog_check(const char *tablename, unsigned int hookmask,
-   const struct ebt_entry *e, void *data, unsigned int datalen)
+static bool
+ebt_ulog_tg_check(const char *table, const void *entry,
+		  const struct xt_target *target, void *data,
+		  unsigned int hookmask)
 {
 	struct ebt_ulog_info *uloginfo = data;
 
@@ -276,8 +279,8 @@ static struct ebt_watcher ulog __read_mostly = {
 	.name		= EBT_ULOG_WATCHER,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.watcher	= ebt_ulog,
-	.check		= ebt_ulog_check,
+	.target		= ebt_ulog_tg,
+	.checkentry	= ebt_ulog_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_ulog_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 9e3a39ae466..3af688b0fc3 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -41,10 +41,9 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_filter_vlan(const struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		const void *data, unsigned int datalen)
+ebt_vlan_mt(const struct sk_buff *skb, const struct net_device *in,
+	    const struct net_device *out, const struct xt_match *match,
+	    const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
 	const struct ebt_vlan_info *info = data;
 	const struct vlan_hdr *fp;
@@ -88,11 +87,12 @@ ebt_filter_vlan(const struct sk_buff *skb,
 }
 
 static bool
-ebt_check_vlan(const char *tablename,
-	       unsigned int hooknr,
-	       const struct ebt_entry *e, void *data, unsigned int datalen)
+ebt_vlan_mt_check(const char *table, const void *entry,
+		  const struct xt_match *match, void *data,
+		  unsigned int hook_mask)
 {
 	struct ebt_vlan_info *info = data;
+	const struct ebt_entry *e = entry;
 
 	/* Is it 802.1Q frame checked? */
 	if (e->ethproto != htons(ETH_P_8021Q)) {
@@ -166,8 +166,8 @@ static struct ebt_match filter_vlan __read_mostly = {
 	.name		= EBT_VLAN_MATCH,
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
-	.match		= ebt_filter_vlan,
-	.check		= ebt_check_vlan,
+	.match		= ebt_vlan_mt,
+	.checkentry	= ebt_vlan_mt_check,
 	.matchsize	= XT_ALIGN(sizeof(struct ebt_vlan_info)),
 	.me		= THIS_MODULE,
 };
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bc4b3f4f37c..340e1c6bdcb 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -67,11 +67,10 @@ static struct ebt_target ebt_standard_target = {
 };
 
 static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
-   const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
+   struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
    const struct net_device *out)
 {
-	w->u.watcher->watcher(skb, hooknr, in, out, w->data,
-	   w->watcher_size);
+	w->u.watcher->target(skb, in, out, hooknr, NULL, w->data);
 	/* watchers don't give a verdict */
 	return 0;
 }
@@ -80,8 +79,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m,
    const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out)
 {
-	return m->u.match->match(skb, in, out, m->data,
-	   m->match_size);
+	return m->u.match->match(skb, in, out, NULL, m->data, 0, 0, NULL);
 }
 
 static inline int ebt_dev_check(char *entry, const struct net_device *device)
@@ -195,8 +193,8 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else
-			verdict = t->u.target->target(skb, hook,
-			   in, out, t->data, t->target_size);
+			verdict = t->u.target->target(skb, in, out, hook,
+				  NULL, t->data);
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
 			return NF_ACCEPT;
@@ -376,8 +374,8 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		       match->name, XT_ALIGN(match->matchsize), m->match_size);
 		goto out;
 	}
-	if (match->check &&
-	    !match->check(name, hookmask, e, m->data, m->match_size)) {
+	if (match->checkentry &&
+	    !match->checkentry(name, e, NULL, m->data, hookmask)) {
 		BUGPRINT("match->check failed\n");
 		goto out;
 	}
@@ -426,8 +424,8 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 		       w->watcher_size);
 		goto out;
 	}
-	if (watcher->check &&
-	    !watcher->check(name, hookmask, e, w->data, w->watcher_size)) {
+	if (watcher->checkentry &&
+	    !watcher->checkentry(name, e, NULL, w->data, hookmask)) {
 		BUGPRINT("watcher->check failed\n");
 		goto out;
 	}
@@ -609,7 +607,7 @@ ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
 	if (i && (*i)-- == 0)
 		return 1;
 	if (m->u.match->destroy)
-		m->u.match->destroy(m->data, m->match_size);
+		m->u.match->destroy(NULL, m->data);
 	module_put(m->u.match->me);
 
 	return 0;
@@ -621,7 +619,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 	if (i && (*i)-- == 0)
 		return 1;
 	if (w->u.watcher->destroy)
-		w->u.watcher->destroy(w->data, w->watcher_size);
+		w->u.watcher->destroy(NULL, w->data);
 	module_put(w->u.watcher->me);
 
 	return 0;
@@ -641,7 +639,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 	if (t->u.target->destroy)
-		t->u.target->destroy(t->data, t->target_size);
+		t->u.target->destroy(NULL, t->data);
 	module_put(t->u.target->me);
 
 	return 0;
@@ -755,8 +753,8 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 		module_put(t->u.target->me);
 		ret = -EINVAL;
 		goto cleanup_watchers;
-	} else if (t->u.target->check &&
-	    !t->u.target->check(name, hookmask, e, t->data, t->target_size)) {
+	} else if (t->u.target->checkentry &&
+	    !t->u.target->checkentry(name, e, NULL, t->data, hookmask)) {
 		module_put(t->u.target->me);
 		ret = -EFAULT;
 		goto cleanup_watchers;
-- 
cgit v1.2.3-70-g09d2


From 043ef46c7690bfdbd5b012e15812a14a19ca5604 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:15 +0200
Subject: netfilter: move Ebtables to use Xtables

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |   6 +-
 net/bridge/netfilter/Kconfig              |   1 +
 net/bridge/netfilter/ebt_802_3.c          |   8 +-
 net/bridge/netfilter/ebt_among.c          |  14 +--
 net/bridge/netfilter/ebt_arp.c            |   8 +-
 net/bridge/netfilter/ebt_arpreply.c       |   8 +-
 net/bridge/netfilter/ebt_dnat.c           |   8 +-
 net/bridge/netfilter/ebt_ip.c             |   8 +-
 net/bridge/netfilter/ebt_ip6.c            |   9 +-
 net/bridge/netfilter/ebt_limit.c          |   8 +-
 net/bridge/netfilter/ebt_log.c            |   9 +-
 net/bridge/netfilter/ebt_mark.c           |   8 +-
 net/bridge/netfilter/ebt_mark_m.c         |   8 +-
 net/bridge/netfilter/ebt_nflog.c          |  16 +--
 net/bridge/netfilter/ebt_pkttype.c        |   8 +-
 net/bridge/netfilter/ebt_redirect.c       |   8 +-
 net/bridge/netfilter/ebt_snat.c           |   8 +-
 net/bridge/netfilter/ebt_stp.c            |   8 +-
 net/bridge/netfilter/ebt_ulog.c           |  10 +-
 net/bridge/netfilter/ebt_vlan.c           |   8 +-
 net/bridge/netfilter/ebtables.c           | 173 ++++++++++++------------------
 net/netfilter/x_tables.c                  |   9 +-
 22 files changed, 158 insertions(+), 193 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index f20a57da7a2..d3f9243b9d9 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -124,7 +124,7 @@ struct ebt_entry_match
 {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
-		struct ebt_match *match;
+		struct xt_match *match;
 	} u;
 	/* size of data */
 	unsigned int match_size;
@@ -135,7 +135,7 @@ struct ebt_entry_watcher
 {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
-		struct ebt_watcher *watcher;
+		struct xt_target *watcher;
 	} u;
 	/* size of data */
 	unsigned int watcher_size;
@@ -146,7 +146,7 @@ struct ebt_entry_target
 {
 	union {
 		char name[EBT_FUNCTION_MAXNAMELEN];
-		struct ebt_target *target;
+		struct xt_target *target;
 	} u;
 	/* size of data */
 	unsigned int target_size;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 90947979499..e7c197ffb2f 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -7,6 +7,7 @@ menu "Bridge: Netfilter Configuration"
 
 config BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
+	select NETFILTER_XTABLES
 	help
 	  ebtables is a general, extensible frame/packet identification
 	  framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 6f1a69c28ed..6fc2a59e09a 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -51,8 +51,8 @@ ebt_802_3_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_802_3 __read_mostly = {
-	.name		= EBT_802_3_MATCH,
+static struct xt_match ebt_802_3_mt_reg __read_mostly = {
+	.name		= "802_3",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_802_3_mt,
@@ -63,12 +63,12 @@ static struct ebt_match filter_802_3 __read_mostly = {
 
 static int __init ebt_802_3_init(void)
 {
-	return ebt_register_match(&filter_802_3);
+	return xt_register_match(&ebt_802_3_mt_reg);
 }
 
 static void __exit ebt_802_3_fini(void)
 {
-	ebt_unregister_match(&filter_802_3);
+	xt_unregister_match(&ebt_802_3_mt_reg);
 }
 
 module_init(ebt_802_3_init);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 84a306f085b..084559e1840 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -7,12 +7,12 @@
  *  August, 2003
  *
  */
-
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_among.h>
 #include <linux/ip.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_among.h>
 
 static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
 				      const char *mac, __be32 ip)
@@ -211,8 +211,8 @@ ebt_among_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_among __read_mostly = {
-	.name		= EBT_AMONG_MATCH,
+static struct xt_match ebt_among_mt_reg __read_mostly = {
+	.name		= "among",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_among_mt,
@@ -223,12 +223,12 @@ static struct ebt_match filter_among __read_mostly = {
 
 static int __init ebt_among_init(void)
 {
-	return ebt_register_match(&filter_among);
+	return xt_register_match(&ebt_among_mt_reg);
 }
 
 static void __exit ebt_among_fini(void)
 {
-	ebt_unregister_match(&filter_among);
+	xt_unregister_match(&ebt_among_mt_reg);
 }
 
 module_init(ebt_among_init);
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 6e7cd2f5ad7..a073dffe7a1 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -119,8 +119,8 @@ ebt_arp_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_arp __read_mostly = {
-	.name		= EBT_ARP_MATCH,
+static struct xt_match ebt_arp_mt_reg __read_mostly = {
+	.name		= "arp",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_arp_mt,
@@ -131,12 +131,12 @@ static struct ebt_match filter_arp __read_mostly = {
 
 static int __init ebt_arp_init(void)
 {
-	return ebt_register_match(&filter_arp);
+	return xt_register_match(&ebt_arp_mt_reg);
 }
 
 static void __exit ebt_arp_fini(void)
 {
-	ebt_unregister_match(&filter_arp);
+	xt_unregister_match(&ebt_arp_mt_reg);
 }
 
 module_init(ebt_arp_init);
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 6f2f6589777..8071b64af46 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -78,8 +78,8 @@ ebt_arpreply_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct ebt_target reply_target __read_mostly = {
-	.name		= EBT_ARPREPLY_TARGET,
+static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
+	.name		= "arpreply",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_arpreply_tg,
@@ -90,12 +90,12 @@ static struct ebt_target reply_target __read_mostly = {
 
 static int __init ebt_arpreply_init(void)
 {
-	return ebt_register_target(&reply_target);
+	return xt_register_target(&ebt_arpreply_tg_reg);
 }
 
 static void __exit ebt_arpreply_fini(void)
 {
-	ebt_unregister_target(&reply_target);
+	xt_unregister_target(&ebt_arpreply_tg_reg);
 }
 
 module_init(ebt_arpreply_init);
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index b7cc013bd37..d2211c4a477 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -47,8 +47,8 @@ ebt_dnat_tg_check(const char *tablename, const void *entry,
 	return true;
 }
 
-static struct ebt_target dnat __read_mostly = {
-	.name		= EBT_DNAT_TARGET,
+static struct xt_target ebt_dnat_tg_reg __read_mostly = {
+	.name		= "dnat",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_dnat_tg,
@@ -59,12 +59,12 @@ static struct ebt_target dnat __read_mostly = {
 
 static int __init ebt_dnat_init(void)
 {
-	return ebt_register_target(&dnat);
+	return xt_register_target(&ebt_dnat_tg_reg);
 }
 
 static void __exit ebt_dnat_fini(void)
 {
-	ebt_unregister_target(&dnat);
+	xt_unregister_target(&ebt_dnat_tg_reg);
 }
 
 module_init(ebt_dnat_init);
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index e7f3b1776b0..b42c7ce799b 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -109,8 +109,8 @@ ebt_ip_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_ip __read_mostly = {
-	.name		= EBT_IP_MATCH,
+static struct xt_match ebt_ip_mt_reg __read_mostly = {
+	.name		= "ip",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_ip_mt,
@@ -121,12 +121,12 @@ static struct ebt_match filter_ip __read_mostly = {
 
 static int __init ebt_ip_init(void)
 {
-	return ebt_register_match(&filter_ip);
+	return xt_register_match(&ebt_ip_mt_reg);
 }
 
 static void __exit ebt_ip_fini(void)
 {
-	ebt_unregister_match(&filter_ip);
+	xt_unregister_match(&ebt_ip_mt_reg);
 }
 
 module_init(ebt_ip_init);
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 807685da293..317e624ae59 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -121,9 +121,8 @@ ebt_ip6_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_ip6 =
-{
-	.name		= EBT_IP6_MATCH,
+static struct xt_match ebt_ip6_mt_reg __read_mostly = {
+	.name		= "ip6",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_ip6_mt,
@@ -134,12 +133,12 @@ static struct ebt_match filter_ip6 =
 
 static int __init ebt_ip6_init(void)
 {
-	return ebt_register_match(&filter_ip6);
+	return xt_register_match(&ebt_ip6_mt_reg);
 }
 
 static void __exit ebt_ip6_fini(void)
 {
-	ebt_unregister_match(&filter_ip6);
+	xt_unregister_match(&ebt_ip6_mt_reg);
 }
 
 module_init(ebt_ip6_init);
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index d3372739227..43d9a500363 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -89,8 +89,8 @@ ebt_limit_mt_check(const char *table, const void *e,
 	return true;
 }
 
-static struct ebt_match ebt_limit_reg __read_mostly = {
-	.name		= EBT_LIMIT_MATCH,
+static struct xt_match ebt_limit_mt_reg __read_mostly = {
+	.name		= "limit",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_limit_mt,
@@ -101,12 +101,12 @@ static struct ebt_match ebt_limit_reg __read_mostly = {
 
 static int __init ebt_limit_init(void)
 {
-	return ebt_register_match(&ebt_limit_reg);
+	return xt_register_match(&ebt_limit_mt_reg);
 }
 
 static void __exit ebt_limit_fini(void)
 {
-	ebt_unregister_match(&ebt_limit_reg);
+	xt_unregister_match(&ebt_limit_mt_reg);
 }
 
 module_init(ebt_limit_init);
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 424dfdf7f27..b40f9ed4c34 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -215,9 +215,8 @@ ebt_log_tg(struct sk_buff *skb, const struct net_device *in,
 	return EBT_CONTINUE;
 }
 
-static struct ebt_watcher log =
-{
-	.name		= EBT_LOG_WATCHER,
+static struct xt_target ebt_log_tg_reg __read_mostly = {
+	.name		= "log",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_log_tg,
@@ -236,7 +235,7 @@ static int __init ebt_log_init(void)
 {
 	int ret;
 
-	ret = ebt_register_watcher(&log);
+	ret = xt_register_target(&ebt_log_tg_reg);
 	if (ret < 0)
 		return ret;
 	nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
@@ -246,7 +245,7 @@ static int __init ebt_log_init(void)
 static void __exit ebt_log_fini(void)
 {
 	nf_log_unregister(&ebt_log_logger);
-	ebt_unregister_watcher(&log);
+	xt_unregister_target(&ebt_log_tg_reg);
 }
 
 module_init(ebt_log_init);
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 92c67271bd8..dff19fc91cf 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -59,8 +59,8 @@ ebt_mark_tg_check(const char *table, const void *e,
 	return true;
 }
 
-static struct ebt_target mark_target __read_mostly = {
-	.name		= EBT_MARK_TARGET,
+static struct xt_target ebt_mark_tg_reg __read_mostly = {
+	.name		= "mark",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_mark_tg,
@@ -71,12 +71,12 @@ static struct ebt_target mark_target __read_mostly = {
 
 static int __init ebt_mark_init(void)
 {
-	return ebt_register_target(&mark_target);
+	return xt_register_target(&ebt_mark_tg_reg);
 }
 
 static void __exit ebt_mark_fini(void)
 {
-	ebt_unregister_target(&mark_target);
+	xt_unregister_target(&ebt_mark_tg_reg);
 }
 
 module_init(ebt_mark_init);
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index db64a0de4f7..aa6781c7f98 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -40,8 +40,8 @@ ebt_mark_mt_check(const char *table, const void *e,
 	return true;
 }
 
-static struct ebt_match filter_mark __read_mostly = {
-	.name		= EBT_MARK_MATCH,
+static struct xt_match ebt_mark_mt_reg __read_mostly = {
+	.name		= "mark_m",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_mark_mt,
@@ -52,12 +52,12 @@ static struct ebt_match filter_mark __read_mostly = {
 
 static int __init ebt_mark_m_init(void)
 {
-	return ebt_register_match(&filter_mark);
+	return xt_register_match(&ebt_mark_mt_reg);
 }
 
 static void __exit ebt_mark_m_fini(void)
 {
-	ebt_unregister_match(&filter_mark);
+	xt_unregister_match(&ebt_mark_mt_reg);
 }
 
 module_init(ebt_mark_m_init);
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index b415f887188..917ac360079 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -49,24 +49,24 @@ ebt_nflog_tg_check(const char *table, const void *e,
 	return true;
 }
 
-static struct ebt_watcher nflog __read_mostly = {
-	.name = EBT_NFLOG_WATCHER,
-	.revision = 0,
-	.family = NFPROTO_BRIDGE,
-	.target = ebt_nflog_tg,
+static struct xt_target ebt_nflog_tg_reg __read_mostly = {
+	.name       = "nflog",
+	.revision   = 0,
+	.family     = NFPROTO_BRIDGE,
+	.target     = ebt_nflog_tg,
 	.checkentry = ebt_nflog_tg_check,
 	.targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)),
-	.me = THIS_MODULE,
+	.me         = THIS_MODULE,
 };
 
 static int __init ebt_nflog_init(void)
 {
-	return ebt_register_watcher(&nflog);
+	return xt_register_target(&ebt_nflog_tg_reg);
 }
 
 static void __exit ebt_nflog_fini(void)
 {
-	ebt_unregister_watcher(&nflog);
+	xt_unregister_target(&ebt_nflog_tg_reg);
 }
 
 module_init(ebt_nflog_init);
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 06393452ef9..1c04ce5a52c 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -36,8 +36,8 @@ ebt_pkttype_mt_check(const char *table, const void *e,
 	return true;
 }
 
-static struct ebt_match filter_pkttype __read_mostly = {
-	.name		= EBT_PKTTYPE_MATCH,
+static struct xt_match ebt_pkttype_mt_reg __read_mostly = {
+	.name		= "pkttype",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_pkttype_mt,
@@ -48,12 +48,12 @@ static struct ebt_match filter_pkttype __read_mostly = {
 
 static int __init ebt_pkttype_init(void)
 {
-	return ebt_register_match(&filter_pkttype);
+	return xt_register_match(&ebt_pkttype_mt_reg);
 }
 
 static void __exit ebt_pkttype_fini(void)
 {
-	ebt_unregister_match(&filter_pkttype);
+	xt_unregister_match(&ebt_pkttype_mt_reg);
 }
 
 module_init(ebt_pkttype_init);
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index e9540cf4f6d..1b7684ffe40 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -52,8 +52,8 @@ ebt_redirect_tg_check(const char *tablename, const void *e,
 	return true;
 }
 
-static struct ebt_target redirect_target __read_mostly = {
-	.name		= EBT_REDIRECT_TARGET,
+static struct xt_target ebt_redirect_tg_reg __read_mostly = {
+	.name		= "redirect",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_redirect_tg,
@@ -64,12 +64,12 @@ static struct ebt_target redirect_target __read_mostly = {
 
 static int __init ebt_redirect_init(void)
 {
-	return ebt_register_target(&redirect_target);
+	return xt_register_target(&ebt_redirect_tg_reg);
 }
 
 static void __exit ebt_redirect_fini(void)
 {
-	ebt_unregister_target(&redirect_target);
+	xt_unregister_target(&ebt_redirect_tg_reg);
 }
 
 module_init(ebt_redirect_init);
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 363d0051e04..c90217a4f9e 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -69,8 +69,8 @@ ebt_snat_tg_check(const char *tablename, const void *e,
 	return true;
 }
 
-static struct ebt_target snat __read_mostly = {
-	.name		= EBT_SNAT_TARGET,
+static struct xt_target ebt_snat_tg_reg __read_mostly = {
+	.name		= "snat",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_snat_tg,
@@ -81,12 +81,12 @@ static struct ebt_target snat __read_mostly = {
 
 static int __init ebt_snat_init(void)
 {
-	return ebt_register_target(&snat);
+	return xt_register_target(&ebt_snat_tg_reg);
 }
 
 static void __exit ebt_snat_fini(void)
 {
-	ebt_unregister_target(&snat);
+	xt_unregister_target(&ebt_snat_tg_reg);
 }
 
 module_init(ebt_snat_init);
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 7576d1d62a4..28bb48b67a8 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -176,8 +176,8 @@ ebt_stp_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_stp __read_mostly = {
-	.name		= EBT_STP_MATCH,
+static struct xt_match ebt_stp_mt_reg __read_mostly = {
+	.name		= "stp",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_stp_mt,
@@ -188,12 +188,12 @@ static struct ebt_match filter_stp __read_mostly = {
 
 static int __init ebt_stp_init(void)
 {
-	return ebt_register_match(&filter_stp);
+	return xt_register_match(&ebt_stp_mt_reg);
 }
 
 static void __exit ebt_stp_fini(void)
 {
-	ebt_unregister_match(&filter_stp);
+	xt_unregister_match(&ebt_stp_mt_reg);
 }
 
 module_init(ebt_stp_init);
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 77ff9c46b26..25ca6467349 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -275,8 +275,8 @@ ebt_ulog_tg_check(const char *table, const void *entry,
 	return 0;
 }
 
-static struct ebt_watcher ulog __read_mostly = {
-	.name		= EBT_ULOG_WATCHER,
+static struct xt_target ebt_ulog_tg_reg __read_mostly = {
+	.name		= "ulog",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.target		= ebt_ulog_tg,
@@ -286,7 +286,7 @@ static struct ebt_watcher ulog __read_mostly = {
 };
 
 static const struct nf_logger ebt_ulog_logger = {
-	.name		= EBT_ULOG_WATCHER,
+	.name		= "ulog",
 	.logfn		= &ebt_log_packet,
 	.me		= THIS_MODULE,
 };
@@ -315,7 +315,7 @@ static int __init ebt_ulog_init(void)
 		printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
 		       "call netlink_kernel_create\n");
 		ret = false;
-	} else if (ebt_register_watcher(&ulog) != 0) {
+	} else if (xt_register_target(&ebt_ulog_tg_reg) != 0) {
 		netlink_kernel_release(ebtulognl);
 	}
 
@@ -331,7 +331,7 @@ static void __exit ebt_ulog_fini(void)
 	int i;
 
 	nf_log_unregister(&ebt_ulog_logger);
-	ebt_unregister_watcher(&ulog);
+	xt_unregister_target(&ebt_ulog_tg_reg);
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
 		ub = &ulog_buffers[i];
 		if (timer_pending(&ub->timer))
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 3af688b0fc3..5addef6d62f 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -162,8 +162,8 @@ ebt_vlan_mt_check(const char *table, const void *entry,
 	return true;
 }
 
-static struct ebt_match filter_vlan __read_mostly = {
-	.name		= EBT_VLAN_MATCH,
+static struct xt_match ebt_vlan_mt_reg __read_mostly = {
+	.name		= "vlan",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
 	.match		= ebt_vlan_mt,
@@ -177,12 +177,12 @@ static int __init ebt_vlan_init(void)
 	DEBUG_MSG("ebtables 802.1Q extension module v"
 		  MODULE_VERS "\n");
 	DEBUG_MSG("module debug=%d\n", !!debug);
-	return ebt_register_match(&filter_vlan);
+	return xt_register_match(&ebt_vlan_mt_reg);
 }
 
 static void __exit ebt_vlan_fini(void)
 {
-	ebt_unregister_match(&filter_vlan);
+	xt_unregister_match(&ebt_vlan_mt_reg);
 }
 
 module_init(ebt_vlan_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 340e1c6bdcb..c4f7a2e8ed3 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -60,17 +60,18 @@ static LIST_HEAD(ebt_targets);
 static LIST_HEAD(ebt_matches);
 static LIST_HEAD(ebt_watchers);
 
-static struct ebt_target ebt_standard_target = {
+static struct xt_target ebt_standard_target = {
 	.name       = "standard",
 	.revision   = 0,
 	.family     = NFPROTO_BRIDGE,
+	.targetsize = sizeof(int),
 };
 
 static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
    struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
    const struct net_device *out)
 {
-	w->u.watcher->target(skb, in, out, hooknr, NULL, w->data);
+	w->u.watcher->target(skb, in, out, hooknr, w->u.watcher, w->data);
 	/* watchers don't give a verdict */
 	return 0;
 }
@@ -79,7 +80,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m,
    const struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out)
 {
-	return m->u.match->match(skb, in, out, NULL, m->data, 0, 0, NULL);
+	return m->u.match->match(skb, in, out, m->u.match, m->data, 0, 0, NULL);
 }
 
 static inline int ebt_dev_check(char *entry, const struct net_device *device)
@@ -194,7 +195,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else
 			verdict = t->u.target->target(skb, in, out, hook,
-				  NULL, t->data);
+				  t->u.target, t->data);
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
 			return NF_ACCEPT;
@@ -336,104 +337,73 @@ static inline int
 ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
    const char *name, unsigned int hookmask, unsigned int *cnt)
 {
-	struct ebt_match *match;
+	struct xt_match *match;
 	size_t left = ((char *)e + e->watchers_offset) - (char *)m;
 	int ret;
 
 	if (left < sizeof(struct ebt_entry_match) ||
 	    left - sizeof(struct ebt_entry_match) < m->match_size)
 		return -EINVAL;
-	match = find_match_lock(m->u.name, &ret, &ebt_mutex);
-	if (!match)
-		return ret;
-	m->u.match = match;
-	if (!try_module_get(match->me)) {
-		mutex_unlock(&ebt_mutex);
+
+	match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
+		m->u.name, 0), "ebt_%s", m->u.name);
+	if (IS_ERR(match))
+		return PTR_ERR(match);
+	if (match == NULL)
 		return -ENOENT;
-	}
-	mutex_unlock(&ebt_mutex);
-	if (match->family != NFPROTO_BRIDGE) {
-		printk(KERN_WARNING "ebtables: %s match: not for ebtables?\n",
-		       match->name);
-		goto out;
-	}
-	if (match->revision != 0) {
-		printk(KERN_WARNING "ebtables: %s match: ebtables is not "
-		       "supporting revisions at this time\n",
-		       match->name);
-		goto out;
-	}
-	if (XT_ALIGN(match->matchsize) != m->match_size &&
-	    match->matchsize != -1) {
-		/*
-		 * ebt_among is exempt from centralized matchsize checking
-		 * because it uses a dynamic-size data set.
-		 */
-		printk(KERN_WARNING "ebtables: %s match: "
-		       "invalid size %Zu != %u\n",
-		       match->name, XT_ALIGN(match->matchsize), m->match_size);
-		goto out;
-	}
-	if (match->checkentry &&
+	m->u.match = match;
+
+	ret = xt_check_match(match, NFPROTO_BRIDGE, m->match_size,
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	if (ret < 0) {
+		module_put(match->me);
+		return ret;
+	} else if (match->checkentry != NULL &&
 	    !match->checkentry(name, e, NULL, m->data, hookmask)) {
+		module_put(match->me);
 		BUGPRINT("match->check failed\n");
-		goto out;
+		return -EINVAL;
 	}
+
 	(*cnt)++;
 	return 0;
- out:
-	module_put(match->me);
-	return -EINVAL;
 }
 
 static inline int
 ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
    const char *name, unsigned int hookmask, unsigned int *cnt)
 {
-	struct ebt_watcher *watcher;
+	struct xt_target *watcher;
 	size_t left = ((char *)e + e->target_offset) - (char *)w;
 	int ret;
 
 	if (left < sizeof(struct ebt_entry_watcher) ||
 	   left - sizeof(struct ebt_entry_watcher) < w->watcher_size)
 		return -EINVAL;
-	watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex);
-	if (!watcher)
-		return ret;
-	w->u.watcher = watcher;
-	if (!try_module_get(watcher->me)) {
-		mutex_unlock(&ebt_mutex);
+
+	watcher = try_then_request_module(
+		  xt_find_target(NFPROTO_BRIDGE, w->u.name, 0),
+		  "ebt_%s", w->u.name);
+	if (IS_ERR(watcher))
+		return PTR_ERR(watcher);
+	if (watcher == NULL)
 		return -ENOENT;
-	}
-	mutex_unlock(&ebt_mutex);
-	if (watcher->family != NFPROTO_BRIDGE) {
-		printk(KERN_WARNING "ebtables: %s watcher: not for ebtables?\n",
-		       watcher->name);
-		goto out;
-	}
-	if (watcher->revision != 0) {
-		printk(KERN_WARNING "ebtables: %s watcher: ebtables is not "
-		       "supporting revisions at this time\n",
-		       watcher->name);
-		goto out;
-	}
-	if (XT_ALIGN(watcher->targetsize) != w->watcher_size) {
-		printk(KERN_WARNING "ebtables: %s watcher: "
-		       "invalid size %Zu != %u\n",
-		       watcher->name, XT_ALIGN(watcher->targetsize),
-		       w->watcher_size);
-		goto out;
-	}
-	if (watcher->checkentry &&
+	w->u.watcher = watcher;
+
+	ret = xt_check_target(watcher, NFPROTO_BRIDGE, w->watcher_size,
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	if (ret < 0) {
+		module_put(watcher->me);
+		return ret;
+	} else if (watcher->checkentry != NULL &&
 	    !watcher->checkentry(name, e, NULL, w->data, hookmask)) {
+		module_put(watcher->me);
 		BUGPRINT("watcher->check failed\n");
-		goto out;
+		return -EINVAL;
 	}
+
 	(*cnt)++;
 	return 0;
- out:
-	module_put(watcher->me);
-	return -EINVAL;
 }
 
 static int ebt_verify_pointers(struct ebt_replace *repl,
@@ -607,7 +577,7 @@ ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
 	if (i && (*i)-- == 0)
 		return 1;
 	if (m->u.match->destroy)
-		m->u.match->destroy(NULL, m->data);
+		m->u.match->destroy(m->u.match, m->data);
 	module_put(m->u.match->me);
 
 	return 0;
@@ -619,7 +589,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 	if (i && (*i)-- == 0)
 		return 1;
 	if (w->u.watcher->destroy)
-		w->u.watcher->destroy(NULL, w->data);
+		w->u.watcher->destroy(w->u.watcher, w->data);
 	module_put(w->u.watcher->me);
 
 	return 0;
@@ -639,7 +609,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 	if (t->u.target->destroy)
-		t->u.target->destroy(NULL, t->data);
+		t->u.target->destroy(t->u.target, t->data);
 	module_put(t->u.target->me);
 
 	return 0;
@@ -651,7 +621,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
    struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
 {
 	struct ebt_entry_target *t;
-	struct ebt_target *target;
+	struct xt_target *target;
 	unsigned int i, j, hook = 0, hookmask = 0;
 	size_t gap;
 	int ret;
@@ -704,27 +674,15 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 		goto cleanup_watchers;
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 	gap = e->next_offset - e->target_offset;
-	target = find_target_lock(t->u.name, &ret, &ebt_mutex);
-	if (!target)
-		goto cleanup_watchers;
-	if (!try_module_get(target->me)) {
-		mutex_unlock(&ebt_mutex);
-		ret = -ENOENT;
-		goto cleanup_watchers;
-	}
-	mutex_unlock(&ebt_mutex);
 
-	if (target->family != NFPROTO_BRIDGE) {
-		printk(KERN_WARNING "ebtables: %s target: not for ebtables?\n",
-		       target->name);
-		ret = -EINVAL;
+	target = try_then_request_module(
+		 xt_find_target(NFPROTO_BRIDGE, t->u.name, 0),
+		 "ebt_%s", t->u.name);
+	if (IS_ERR(target)) {
+		ret = PTR_ERR(target);
 		goto cleanup_watchers;
-	}
-	if (target->revision != 0) {
-		printk(KERN_WARNING "ebtables: %s target: ebtables is not "
-		       "supporting revisions at this time\n",
-		       target->name);
-		ret = -EINVAL;
+	} else if (target == NULL) {
+		ret = -ENOENT;
 		goto cleanup_watchers;
 	}
 
@@ -745,13 +703,12 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 		module_put(t->u.target->me);
 		ret = -EFAULT;
 		goto cleanup_watchers;
-	} else if (XT_ALIGN(target->targetsize) != t->target_size) {
-		printk(KERN_WARNING "ebtables: %s target: "
-		       "invalid size %Zu != %u\n",
-		       target->name, XT_ALIGN(target->targetsize),
-		       t->target_size);
-		module_put(t->u.target->me);
-		ret = -EINVAL;
+	}
+
+	ret = xt_check_target(target, NFPROTO_BRIDGE, t->target_size,
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	if (ret < 0) {
+		module_put(target->me);
 		goto cleanup_watchers;
 	} else if (t->u.target->checkentry &&
 	    !t->u.target->checkentry(name, e, NULL, t->data, hookmask)) {
@@ -1589,11 +1546,14 @@ static int __init ebtables_init(void)
 {
 	int ret;
 
-	mutex_lock(&ebt_mutex);
-	list_add(&ebt_standard_target.list, &ebt_targets);
-	mutex_unlock(&ebt_mutex);
-	if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
+	ret = xt_register_target(&ebt_standard_target);
+	if (ret < 0)
 		return ret;
+	ret = nf_register_sockopt(&ebt_sockopts);
+	if (ret < 0) {
+		xt_unregister_target(&ebt_standard_target);
+		return ret;
+	}
 
 	printk(KERN_INFO "Ebtables v2.0 registered\n");
 	return 0;
@@ -1602,6 +1562,7 @@ static int __init ebtables_init(void)
 static void __exit ebtables_fini(void)
 {
 	nf_unregister_sockopt(&ebt_sockopts);
+	xt_unregister_target(&ebt_standard_target);
 	printk(KERN_INFO "Ebtables v2.0 unregistered\n");
 }
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index aece6c2d134..0e23f42e341 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -30,7 +30,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
+MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
@@ -325,7 +325,12 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 		   unsigned int size, const char *table, unsigned int hook_mask,
 		   unsigned short proto, int inv_proto)
 {
-	if (XT_ALIGN(match->matchsize) != size) {
+	if (XT_ALIGN(match->matchsize) != size &&
+	    match->matchsize != -1) {
+		/*
+		 * ebt_among is exempt from centralized matchsize checking
+		 * because it uses a dynamic-size data set.
+		 */
 		printk("%s_tables: %s match: invalid size %Zu != %u\n",
 		       xt_prefix[family], match->name,
 		       XT_ALIGN(match->matchsize), size);
-- 
cgit v1.2.3-70-g09d2


From 102befab75c438bfa356c6976026326728771ebc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:15 +0200
Subject: netfilter: x_tables: output bad hook mask in hexadecimal

It is a mask, and masks are most useful in hex.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0e23f42e341..3b1fc40cc27 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -342,7 +342,7 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 		return -EINVAL;
 	}
 	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
-		printk("%s_tables: %s match: bad hook_mask %u/%u\n",
+		printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
 		       xt_prefix[family], match->name, hook_mask, match->hooks);
 		return -EINVAL;
 	}
@@ -483,7 +483,7 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
 		return -EINVAL;
 	}
 	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
-		printk("%s_tables: %s target: bad hook_mask %u/%u\n",
+		printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
 		       xt_prefix[family], target->name, hook_mask,
 		       target->hooks);
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From f2ff525c8dae57b3cda51d76443f60f764f34202 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:15 +0200
Subject: netfilter: ebtables: use generic table checking

Ebtables ORs (1 << NF_BR_NUMHOOKS) into the hook mask to indicate that
the extension was called from a base chain. So this also needs to be
present in the extensions' ->hooks.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_arpreply.c | 4 ++--
 net/bridge/netfilter/ebt_dnat.c     | 2 ++
 net/bridge/netfilter/ebt_redirect.c | 2 ++
 net/bridge/netfilter/ebt_snat.c     | 6 ++----
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 8071b64af46..0e51c8d7e5f 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -73,8 +73,6 @@ ebt_arpreply_tg_check(const char *tablename, const void *entry,
 	    e->invflags & EBT_IPROTO)
 		return false;
 	CLEAR_BASE_CHAIN_BIT;
-	if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING))
-		return false;
 	return true;
 }
 
@@ -82,6 +80,8 @@ static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
 	.name		= "arpreply",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
+	.table		= "nat",
+	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING),
 	.target		= ebt_arpreply_tg,
 	.checkentry	= ebt_arpreply_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_arpreply_info)),
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index d2211c4a477..cb80101e412 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -51,6 +51,8 @@ static struct xt_target ebt_dnat_tg_reg __read_mostly = {
 	.name		= "dnat",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
+	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) |
+			  (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING),
 	.target		= ebt_dnat_tg,
 	.checkentry	= ebt_dnat_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 1b7684ffe40..a50ffbe0e4f 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -56,6 +56,8 @@ static struct xt_target ebt_redirect_tg_reg __read_mostly = {
 	.name		= "redirect",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
+	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) |
+			  (1 << NF_BR_BROUTING),
 	.target		= ebt_redirect_tg,
 	.checkentry	= ebt_redirect_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_redirect_info)),
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index c90217a4f9e..8a55c7d49b5 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -56,10 +56,6 @@ ebt_snat_tg_check(const char *tablename, const void *e,
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return false;
 	CLEAR_BASE_CHAIN_BIT;
-	if (strcmp(tablename, "nat"))
-		return false;
-	if (hookmask & ~(1 << NF_BR_POST_ROUTING))
-		return false;
 
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
 		return false;
@@ -73,6 +69,8 @@ static struct xt_target ebt_snat_tg_reg __read_mostly = {
 	.name		= "snat",
 	.revision	= 0,
 	.family		= NFPROTO_BRIDGE,
+	.table		= "nat",
+	.hooks		= (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING),
 	.target		= ebt_snat_tg,
 	.checkentry	= ebt_snat_tg_check,
 	.targetsize	= XT_ALIGN(sizeof(struct ebt_nat_info)),
-- 
cgit v1.2.3-70-g09d2


From 5365f8022e04310f0276c95e82548da917d514db Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:16 +0200
Subject: netfilter: implement hotdrop for Ebtables

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c4f7a2e8ed3..7964d3f0388 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -78,9 +78,10 @@ static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
 
 static inline int ebt_do_match (struct ebt_entry_match *m,
    const struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out)
+   const struct net_device *out, bool *hotdrop)
 {
-	return m->u.match->match(skb, in, out, m->u.match, m->data, 0, 0, NULL);
+	return m->u.match->match(skb, in, out, m->u.match,
+	       m->data, 0, 0, hotdrop);
 }
 
 static inline int ebt_dev_check(char *entry, const struct net_device *device)
@@ -156,6 +157,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct ebt_entries *chaininfo;
 	char *base;
 	struct ebt_table_info *private;
+	bool hotdrop = false;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -176,8 +178,13 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb,
+		    in, out, &hotdrop) != 0)
 			goto letscontinue;
+		if (hotdrop) {
+			read_unlock_bh(&table->lock);
+			return NF_DROP;
+		}
 
 		/* increase counter */
 		(*(counter_base + i)).pcnt++;
-- 
cgit v1.2.3-70-g09d2


From 66bff35b722956cc2423f55fcf1b69cefa24ef8b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:16 +0200
Subject: netfilter: remove unused Ebtables functions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |   6 --
 net/bridge/netfilter/ebtables.c           | 108 ------------------------------
 2 files changed, 114 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index d3f9243b9d9..568a690f6a6 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -302,12 +302,6 @@ struct ebt_table
 		     ~(__alignof__(struct ebt_replace)-1))
 extern int ebt_register_table(struct ebt_table *table);
 extern void ebt_unregister_table(struct ebt_table *table);
-extern int ebt_register_match(struct ebt_match *match);
-extern void ebt_unregister_match(struct ebt_match *match);
-extern int ebt_register_watcher(struct ebt_watcher *watcher);
-extern void ebt_unregister_watcher(struct ebt_watcher *watcher);
-extern int ebt_register_target(struct ebt_target *target);
-extern void ebt_unregister_target(struct ebt_target *target);
 extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    struct ebt_table *table);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 7964d3f0388..b489ed262fa 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -56,9 +56,6 @@
 
 static DEFINE_MUTEX(ebt_mutex);
 static LIST_HEAD(ebt_tables);
-static LIST_HEAD(ebt_targets);
-static LIST_HEAD(ebt_matches);
-static LIST_HEAD(ebt_watchers);
 
 static struct xt_target ebt_standard_target = {
 	.name       = "standard",
@@ -322,24 +319,6 @@ find_table_lock(const char *name, int *error, struct mutex *mutex)
 	return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex);
 }
 
-static inline struct ebt_match *
-find_match_lock(const char *name, int *error, struct mutex *mutex)
-{
-	return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex);
-}
-
-static inline struct ebt_watcher *
-find_watcher_lock(const char *name, int *error, struct mutex *mutex)
-{
-	return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex);
-}
-
-static inline struct ebt_target *
-find_target_lock(const char *name, int *error, struct mutex *mutex)
-{
-	return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex);
-}
-
 static inline int
 ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
    const char *name, unsigned int hookmask, unsigned int *cnt)
@@ -1103,87 +1082,6 @@ free_newinfo:
 	return ret;
 }
 
-int ebt_register_target(struct ebt_target *target)
-{
-	struct ebt_target *t;
-	int ret;
-
-	ret = mutex_lock_interruptible(&ebt_mutex);
-	if (ret != 0)
-		return ret;
-	list_for_each_entry(t, &ebt_targets, list) {
-		if (strcmp(t->name, target->name) == 0) {
-			mutex_unlock(&ebt_mutex);
-			return -EEXIST;
-		}
-	}
-	list_add(&target->list, &ebt_targets);
-	mutex_unlock(&ebt_mutex);
-
-	return 0;
-}
-
-void ebt_unregister_target(struct ebt_target *target)
-{
-	mutex_lock(&ebt_mutex);
-	list_del(&target->list);
-	mutex_unlock(&ebt_mutex);
-}
-
-int ebt_register_match(struct ebt_match *match)
-{
-	struct ebt_match *m;
-	int ret;
-
-	ret = mutex_lock_interruptible(&ebt_mutex);
-	if (ret != 0)
-		return ret;
-	list_for_each_entry(m, &ebt_matches, list) {
-		if (strcmp(m->name, match->name) == 0) {
-			mutex_unlock(&ebt_mutex);
-			return -EEXIST;
-		}
-	}
-	list_add(&match->list, &ebt_matches);
-	mutex_unlock(&ebt_mutex);
-
-	return 0;
-}
-
-void ebt_unregister_match(struct ebt_match *match)
-{
-	mutex_lock(&ebt_mutex);
-	list_del(&match->list);
-	mutex_unlock(&ebt_mutex);
-}
-
-int ebt_register_watcher(struct ebt_watcher *watcher)
-{
-	struct ebt_watcher *w;
-	int ret;
-
-	ret = mutex_lock_interruptible(&ebt_mutex);
-	if (ret != 0)
-		return ret;
-	list_for_each_entry(w, &ebt_watchers, list) {
-		if (strcmp(w->name, watcher->name) == 0) {
-			mutex_unlock(&ebt_mutex);
-			return -EEXIST;
-		}
-	}
-	list_add(&watcher->list, &ebt_watchers);
-	mutex_unlock(&ebt_mutex);
-
-	return 0;
-}
-
-void ebt_unregister_watcher(struct ebt_watcher *watcher)
-{
-	mutex_lock(&ebt_mutex);
-	list_del(&watcher->list);
-	mutex_unlock(&ebt_mutex);
-}
-
 int ebt_register_table(struct ebt_table *table)
 {
 	struct ebt_table_info *newinfo;
@@ -1575,12 +1473,6 @@ static void __exit ebtables_fini(void)
 
 EXPORT_SYMBOL(ebt_register_table);
 EXPORT_SYMBOL(ebt_unregister_table);
-EXPORT_SYMBOL(ebt_register_match);
-EXPORT_SYMBOL(ebt_unregister_match);
-EXPORT_SYMBOL(ebt_register_watcher);
-EXPORT_SYMBOL(ebt_unregister_watcher);
-EXPORT_SYMBOL(ebt_register_target);
-EXPORT_SYMBOL(ebt_unregister_target);
 EXPORT_SYMBOL(ebt_do_table);
 module_init(ebtables_init);
 module_exit(ebtables_fini);
-- 
cgit v1.2.3-70-g09d2


From f7277f8d3aa4d3f99a9bdb48b27a2344a637a4b2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:16 +0200
Subject: netfilter: remove redundant casts from Ebtables

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_arpreply.c | 2 +-
 net/bridge/netfilter/ebt_ip6.c      | 4 ++--
 net/bridge/netfilter/ebt_limit.c    | 2 +-
 net/bridge/netfilter/ebt_nflog.c    | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 0e51c8d7e5f..baf5510d044 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -20,7 +20,7 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct net_device *in,
 		const struct net_device *out, unsigned int hook_nr,
 		const struct xt_target *target, const void *data)
 {
-	struct ebt_arpreply_info *info = (void *)data;
+	const struct ebt_arpreply_info *info = data;
 	const __be32 *siptr, *diptr;
 	__be32 _sip, _dip;
 	const struct arphdr *ap;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 317e624ae59..7bd98312967 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -32,7 +32,7 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct net_device *in,
 	   const struct net_device *out, const struct xt_match *match,
 	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
-	const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+	const struct ebt_ip6_info *info = data;
 	const struct ipv6hdr *ih6;
 	struct ipv6hdr _ip6h;
 	const struct tcpudphdr *pptr;
@@ -98,7 +98,7 @@ ebt_ip6_mt_check(const char *table, const void *entry,
 		 unsigned int hook_mask)
 {
 	const struct ebt_entry *e = entry;
-	struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+	struct ebt_ip6_info *info = data;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
 		return false;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 43d9a500363..58aaaa14906 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -35,7 +35,7 @@ ebt_limit_mt(const struct sk_buff *skb, const struct net_device *in,
 	     const struct net_device *out, const struct xt_match *match,
 	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
 {
-	struct ebt_limit_info *info = (struct ebt_limit_info *)data;
+	struct ebt_limit_info *info = (void *)data;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 917ac360079..74b4fa0aabc 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct net_device *in,
 	     const struct net_device *out, unsigned int hooknr,
 	     const struct xt_target *target, const void *data)
 {
-	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
+	const struct ebt_nflog_info *info = data;
 	struct nf_loginfo li;
 
 	li.type = NF_LOG_TYPE_ULOG;
@@ -41,7 +41,7 @@ ebt_nflog_tg_check(const char *table, const void *e,
 		   const struct xt_target *target, void *data,
 		   unsigned int hookmask)
 {
-	struct ebt_nflog_info *info = (struct ebt_nflog_info *)data;
+	struct ebt_nflog_info *info = data;
 
 	if (info->flags & ~EBT_NFLOG_MASK)
 		return false;
-- 
cgit v1.2.3-70-g09d2


From 147c3844ad381b58715a6ee2ea697594e3c06284 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:16 +0200
Subject: netfilter: ebtables: fix one wrong return value

Usually -EINVAL is used when checkentry fails (see *_tables).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index b489ed262fa..7d8ead52d25 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -699,7 +699,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	} else if (t->u.target->checkentry &&
 	    !t->u.target->checkentry(name, e, NULL, t->data, hookmask)) {
 		module_put(t->u.target->me);
-		ret = -EFAULT;
+		ret = -EINVAL;
 		goto cleanup_watchers;
 	}
 	(*cnt)++;
-- 
cgit v1.2.3-70-g09d2


From 367c679007fa4f990eb7ee381326ec59d8148b0e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: xtables: do centralized checkentry call (1/2)

It used to be that {ip,ip6,etc}_tables called extension->checkentry
themselves, but this can be moved into the xtables core.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  6 ++++--
 net/bridge/netfilter/ebtables.c    | 24 ++++++------------------
 net/ipv4/netfilter/arp_tables.c    | 10 ++++------
 net/ipv4/netfilter/ip_tables.c     | 23 +++++++++--------------
 net/ipv6/netfilter/ip6_tables.c    | 23 +++++++++--------------
 net/netfilter/x_tables.c           | 12 ++++++++++--
 net/sched/act_ipt.c                | 14 +++-----------
 7 files changed, 45 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6989b22716e..85aa42785a5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -330,10 +330,12 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
 extern int xt_check_match(const struct xt_match *match, unsigned short family,
 			  unsigned int size, const char *table, unsigned int hook,
-			  unsigned short proto, int inv_proto);
+			  unsigned short proto, int inv_proto,
+			  const void *entry, void *matchinfo);
 extern int xt_check_target(const struct xt_target *target, unsigned short family,
 			   unsigned int size, const char *table, unsigned int hook,
-			   unsigned short proto, int inv_proto);
+			   unsigned short proto, int inv_proto,
+			   const void *entry, void *targinfo);
 
 extern struct xt_table *xt_register_table(struct net *net,
 					  struct xt_table *table,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 7d8ead52d25..7ee72b71d3c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -340,15 +340,11 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 	m->u.match = match;
 
 	ret = xt_check_match(match, NFPROTO_BRIDGE, m->match_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
+	      e, m->data);
 	if (ret < 0) {
 		module_put(match->me);
 		return ret;
-	} else if (match->checkentry != NULL &&
-	    !match->checkentry(name, e, NULL, m->data, hookmask)) {
-		module_put(match->me);
-		BUGPRINT("match->check failed\n");
-		return -EINVAL;
 	}
 
 	(*cnt)++;
@@ -377,15 +373,11 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 	w->u.watcher = watcher;
 
 	ret = xt_check_target(watcher, NFPROTO_BRIDGE, w->watcher_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
+	      e, w->data);
 	if (ret < 0) {
 		module_put(watcher->me);
 		return ret;
-	} else if (watcher->checkentry != NULL &&
-	    !watcher->checkentry(name, e, NULL, w->data, hookmask)) {
-		module_put(watcher->me);
-		BUGPRINT("watcher->check failed\n");
-		return -EINVAL;
 	}
 
 	(*cnt)++;
@@ -692,15 +684,11 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 
 	ret = xt_check_target(target, NFPROTO_BRIDGE, t->target_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO);
+	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
+	      e, t->data);
 	if (ret < 0) {
 		module_put(target->me);
 		goto cleanup_watchers;
-	} else if (t->u.target->checkentry &&
-	    !t->u.target->checkentry(name, e, NULL, t->data, hookmask)) {
-		module_put(t->u.target->me);
-		ret = -EINVAL;
-		goto cleanup_watchers;
 	}
 	(*cnt)++;
 	return 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b4a9a1799c9..ae525a9afbe 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -465,15 +465,13 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 
 	ret = xt_check_target(target, NFPROTO_ARP,
 			      t->u.target_size - sizeof(*t),
-			      name, e->comefrom, 0, 0);
-	if (!ret && t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
-					       e->comefrom)) {
+			      name, e->comefrom, 0, 0, e, t->data);
+	if (ret < 0) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
-		ret = -EINVAL;
+		return ret;
 	}
-	return ret;
+	return 0;
 }
 
 static inline int
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4e7c719445c..b4c74a7a807 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -616,17 +616,14 @@ check_match(struct ipt_entry_match *m, const char *name,
 	match = m->u.kernel.match;
 	ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
 			     name, hookmask, ip->proto,
-			     ip->invflags & IPT_INV_PROTO);
-	if (!ret && m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ip, match, m->data,
-					      hookmask)) {
+			     ip->invflags & IPT_INV_PROTO, ip, m->data);
+	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
-		ret = -EINVAL;
+		return ret;
 	}
-	if (!ret)
-		(*i)++;
-	return ret;
+	++*i;
+	return 0;
 }
 
 static int
@@ -668,15 +665,13 @@ static int check_target(struct ipt_entry *e, const char *name)
 	target = t->u.kernel.target;
 	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
 			      name, e->comefrom, e->ip.proto,
-			      e->ip.invflags & IPT_INV_PROTO);
-	if (!ret && t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
-					       e->comefrom)) {
+			      e->ip.invflags & IPT_INV_PROTO, e, t->data);
+	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
-		ret = -EINVAL;
+		return ret;
 	}
-	return ret;
+	return 0;
 }
 
 static int
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0b4557e0343..12c41b8d365 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -642,17 +642,14 @@ static int check_match(struct ip6t_entry_match *m, const char *name,
 	match = m->u.kernel.match;
 	ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
 			     name, hookmask, ipv6->proto,
-			     ipv6->invflags & IP6T_INV_PROTO);
-	if (!ret && m->u.kernel.match->checkentry
-	    && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
-					      hookmask)) {
+			     ipv6->invflags & IP6T_INV_PROTO, ipv6, m->data);
+	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
-		ret = -EINVAL;
+		return ret;
 	}
-	if (!ret)
-		(*i)++;
-	return ret;
+	++*i;
+	return 0;
 }
 
 static int
@@ -694,15 +691,13 @@ static int check_target(struct ip6t_entry *e, const char *name)
 	target = t->u.kernel.target;
 	ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
 			      name, e->comefrom, e->ipv6.proto,
-			      e->ipv6.invflags & IP6T_INV_PROTO);
-	if (!ret && t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(name, e, target, t->data,
-					       e->comefrom)) {
+			      e->ipv6.invflags & IP6T_INV_PROTO, e, t->data);
+	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
-		ret = -EINVAL;
+		return ret;
 	}
-	return ret;
+	return 0;
 }
 
 static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 3b1fc40cc27..d1f2fb3e8f2 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -323,7 +323,8 @@ EXPORT_SYMBOL_GPL(xt_find_revision);
 
 int xt_check_match(const struct xt_match *match, unsigned short family,
 		   unsigned int size, const char *table, unsigned int hook_mask,
-		   unsigned short proto, int inv_proto)
+		   unsigned short proto, int inv_proto, const void *entry,
+		   void *matchinfo)
 {
 	if (XT_ALIGN(match->matchsize) != size &&
 	    match->matchsize != -1) {
@@ -351,6 +352,9 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 		       xt_prefix[family], match->name, match->proto);
 		return -EINVAL;
 	}
+	if (match->checkentry != NULL &&
+	    !match->checkentry(table, entry, match, matchinfo, hook_mask))
+		return -EINVAL;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_match);
@@ -469,7 +473,8 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 
 int xt_check_target(const struct xt_target *target, unsigned short family,
 		    unsigned int size, const char *table, unsigned int hook_mask,
-		    unsigned short proto, int inv_proto)
+		    unsigned short proto, int inv_proto, const void *entry,
+		    void *targinfo)
 {
 	if (XT_ALIGN(target->targetsize) != size) {
 		printk("%s_tables: %s target: invalid size %Zu != %u\n",
@@ -493,6 +498,9 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
 		       xt_prefix[family], target->name, target->proto);
 		return -EINVAL;
 	}
+	if (target->checkentry != NULL &&
+	    !target->checkentry(table, entry, target, targinfo, hook_mask))
+		return -EINVAL;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_target);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d1263b3c96c..79ea19375ca 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -51,20 +51,12 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	t->u.kernel.target = target;
 
 	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      table, hook, 0, 0);
-	if (ret) {
+			      table, hook, 0, 0, NULL, t->data);
+	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
 	}
-	if (t->u.kernel.target->checkentry
-	    && !t->u.kernel.target->checkentry(table, NULL,
-					       t->u.kernel.target, t->data,
-					       hook)) {
-		module_put(t->u.kernel.target->me);
-		ret = -EINVAL;
-	}
-
-	return ret;
+	return 0;
 }
 
 static void ipt_destroy_target(struct ipt_entry_target *t)
-- 
cgit v1.2.3-70-g09d2


From 77d7358995489bf354fb4f65f4528e47980ffb08 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: ip6tables: fix name of hopbyhop in Kconfig

The module is called hbh, not hopbyhop.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0cfcce7b18d..f24432462be 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -67,7 +67,7 @@ config IP6_NF_MATCH_RT
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP6_NF_MATCH_OPTS
-	tristate '"hopbyhop" and "dst" opts header match support'
+	tristate '"hbh" hop-by-hop and "dst" opts header match support'
 	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-- 
cgit v1.2.3-70-g09d2


From 2203eb47603b01b15a4b1d5b1c7886da96158e74 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: ip6tables: fix Kconfig entry dependency for ip6t_LOG

ip6t_LOG does certainly not depend on the filter table.
(Also, move it so that menuconfig still displays it correctly.)

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/Kconfig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f24432462be..fee881bd31f 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -136,24 +136,24 @@ config IP6_NF_MATCH_EUI64
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The targets
-config IP6_NF_FILTER
-	tristate "Packet filtering"
+config IP6_NF_TARGET_LOG
+	tristate "LOG target support"
 	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
-	  Packet filtering defines a table `filter', which has a series of
-	  rules for simple packet filtering at local input, forwarding and
-	  local output.  See the man page for iptables(8).
+	  This option adds a `LOG' target, which allows you to create rules in
+	  any iptables table which records the packet header to the syslog.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_TARGET_LOG
-	tristate "LOG target support"
-	depends on IP6_NF_FILTER
+config IP6_NF_FILTER
+	tristate "Packet filtering"
+	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
-	  This option adds a `LOG' target, which allows you to create rules in
-	  any iptables table which records the packet header to the syslog.
+	  Packet filtering defines a table `filter', which has a series of
+	  rules for simple packet filtering at local input, forwarding and
+	  local output.  See the man page for iptables(8).
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-- 
cgit v1.2.3-70-g09d2


From 20f3c56f4d7c76bcb66050f3364aa8da110f5bbd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: ebtables: make BRIDGE_NF_EBTABLES a menuconfig option

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/Kconfig | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index e7c197ffb2f..366d3e9d51f 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,22 +2,21 @@
 # Bridge netfilter configuration
 #
 
-menu "Bridge: Netfilter Configuration"
-	depends on BRIDGE && BRIDGE_NETFILTER
-
-config BRIDGE_NF_EBTABLES
+menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
 	select NETFILTER_XTABLES
 	help
 	  ebtables is a general, extensible frame/packet identification
 	  framework. Say 'Y' or 'M' here if you want to do Ethernet
 	  filtering/NAT/brouting on the Ethernet bridge.
+
+if BRIDGE_NF_EBTABLES
+
 #
 # tables
 #
 config BRIDGE_EBT_BROUTE
 	tristate "ebt: broute table support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  The ebtables broute table is used to define rules that decide between
 	  bridging and routing frames, giving Linux the functionality of a
@@ -28,7 +27,6 @@ config BRIDGE_EBT_BROUTE
 
 config BRIDGE_EBT_T_FILTER
 	tristate "ebt: filter table support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  The ebtables filter table is used to define frame filtering rules at
 	  local input, forwarding and local output. See the man page for
@@ -38,7 +36,6 @@ config BRIDGE_EBT_T_FILTER
 
 config BRIDGE_EBT_T_NAT
 	tristate "ebt: nat table support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  The ebtables nat table is used to define rules that alter the MAC
 	  source address (MAC SNAT) or the MAC destination address (MAC DNAT).
@@ -50,7 +47,6 @@ config BRIDGE_EBT_T_NAT
 #
 config BRIDGE_EBT_802_3
 	tristate "ebt: 802.3 filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds matching support for 802.3 Ethernet frames.
 
@@ -58,7 +54,6 @@ config BRIDGE_EBT_802_3
 
 config BRIDGE_EBT_AMONG
 	tristate "ebt: among filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the among match, which allows matching the MAC source
 	  and/or destination address on a list of addresses. Optionally,
@@ -68,7 +63,6 @@ config BRIDGE_EBT_AMONG
 
 config BRIDGE_EBT_ARP
 	tristate "ebt: ARP filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the ARP match, which allows ARP and RARP header field
 	  filtering.
@@ -77,7 +71,6 @@ config BRIDGE_EBT_ARP
 
 config BRIDGE_EBT_IP
 	tristate "ebt: IP filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the IP match, which allows basic IP header field
 	  filtering.
@@ -95,7 +88,6 @@ config BRIDGE_EBT_IP6
 
 config BRIDGE_EBT_LIMIT
 	tristate "ebt: limit match support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the limit match, which allows you to control
 	  the rate at which a rule can be matched. This match is the
@@ -106,7 +98,6 @@ config BRIDGE_EBT_LIMIT
 
 config BRIDGE_EBT_MARK
 	tristate "ebt: mark filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the mark match, which allows matching frames based on
 	  the 'nfmark' value in the frame. This can be set by the mark target.
@@ -117,7 +108,6 @@ config BRIDGE_EBT_MARK
 
 config BRIDGE_EBT_PKTTYPE
 	tristate "ebt: packet type filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the packet type match, which allows matching on the
 	  type of packet based on its Ethernet "class" (as determined by
@@ -128,7 +118,6 @@ config BRIDGE_EBT_PKTTYPE
 
 config BRIDGE_EBT_STP
 	tristate "ebt: STP filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the Spanning Tree Protocol match, which
 	  allows STP header field filtering.
@@ -137,7 +126,6 @@ config BRIDGE_EBT_STP
 
 config BRIDGE_EBT_VLAN
 	tristate "ebt: 802.1Q VLAN filter support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the 802.1Q vlan match, which allows the filtering of
 	  802.1Q vlan fields.
@@ -157,7 +145,6 @@ config BRIDGE_EBT_ARPREPLY
 
 config BRIDGE_EBT_DNAT
 	tristate "ebt: dnat target support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the MAC DNAT target, which allows altering the MAC
 	  destination address of frames.
@@ -166,7 +153,6 @@ config BRIDGE_EBT_DNAT
 
 config BRIDGE_EBT_MARK_T
 	tristate "ebt: mark target support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the mark target, which allows marking frames by
 	  setting the 'nfmark' value in the frame.
@@ -177,7 +163,6 @@ config BRIDGE_EBT_MARK_T
 
 config BRIDGE_EBT_REDIRECT
 	tristate "ebt: redirect target support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the MAC redirect target, which allows altering the MAC
 	  destination address of a frame to that of the device it arrived on.
@@ -186,7 +171,6 @@ config BRIDGE_EBT_REDIRECT
 
 config BRIDGE_EBT_SNAT
 	tristate "ebt: snat target support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the MAC SNAT target, which allows altering the MAC
 	  source address of frames.
@@ -197,7 +181,6 @@ config BRIDGE_EBT_SNAT
 #
 config BRIDGE_EBT_LOG
 	tristate "ebt: log support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option adds the log watcher, that you can use in any rule
 	  in any ebtables table. It records info about the frame header
@@ -207,7 +190,6 @@ config BRIDGE_EBT_LOG
 
 config BRIDGE_EBT_ULOG
 	tristate "ebt: ulog support (OBSOLETE)"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option enables the old bridge-specific "ebt_ulog" implementation
 	  which has been obsoleted by the new "nfnetlink_log" code (see
@@ -224,7 +206,6 @@ config BRIDGE_EBT_ULOG
 
 config BRIDGE_EBT_NFLOG
 	tristate "ebt: nflog support"
-	depends on BRIDGE_NF_EBTABLES
 	help
 	  This option enables the nflog watcher, which allows to LOG
 	  messages through the netfilter logging API, which can use
@@ -236,4 +217,4 @@ config BRIDGE_EBT_NFLOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-endmenu
+endif # BRIDGE_NF_EBTABLES
-- 
cgit v1.2.3-70-g09d2


From aba0d34800d7f56493b4d5548cc06498a4d69124 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:17 +0200
Subject: netfilter: xtables: sort extensions alphabetically in Kconfig

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/Kconfig |  78 +++++++++++++++++------------------
 net/ipv6/netfilter/Kconfig |  44 ++++++++++----------
 net/netfilter/Kconfig      | 100 ++++++++++++++++++++++-----------------------
 3 files changed, 111 insertions(+), 111 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 07757ac8d5d..087b8290684 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -62,15 +62,16 @@ config IP_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The matches.
-config IP_NF_MATCH_ECN
-	tristate '"ecn" match support'
+config IP_NF_MATCH_ADDRTYPE
+	tristate '"addrtype" address type match support'
 	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  This option adds a `ECN' match, which allows you to match against
-	  the IPv4 and TCP header ECN fields.
+	  This option allows you to match what routing thinks of an address,
+	  eg. UNICAST, LOCAL, BROADCAST, ...
 
-	  To compile it as a module, choose M here.  If unsure, say N.
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
 config IP_NF_MATCH_AH
 	tristate '"ah" match support'
@@ -82,26 +83,25 @@ config IP_NF_MATCH_AH
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_TTL
-	tristate '"ttl" match support'
+config IP_NF_MATCH_ECN
+	tristate '"ecn" match support'
 	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
-	  to match packets by their TTL value.
+	  This option adds a `ECN' match, which allows you to match against
+	  the IPv4 and TCP header ECN fields.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_ADDRTYPE
-	tristate '"addrtype" address type match support'
+config IP_NF_MATCH_TTL
+	tristate '"ttl" match support'
 	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  This option allows you to match what routing thinks of an address,
-	  eg. UNICAST, LOCAL, BROADCAST, ...
+	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
+	  to match packets by their TTL value.
 
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  To compile it as a module, choose M here.  If unsure, say N.
 
 # `filter', generic and specific targets
 config IP_NF_FILTER
@@ -186,26 +186,26 @@ config IP_NF_TARGET_MASQUERADE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_REDIRECT
-	tristate "REDIRECT target support"
+config IP_NF_TARGET_NETMAP
+	tristate "NETMAP target support"
 	depends on NF_NAT
 	depends on NETFILTER_ADVANCED
 	help
-	  REDIRECT is a special case of NAT: all incoming connections are
-	  mapped onto the incoming interface's address, causing the packets to
-	  come to the local machine instead of passing through.  This is
-	  useful for transparent proxies.
+	  NETMAP is an implementation of static 1:1 NAT mapping of network
+	  addresses. It maps the network address part, while keeping the host
+	  address part intact.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_NETMAP
-	tristate "NETMAP target support"
+config IP_NF_TARGET_REDIRECT
+	tristate "REDIRECT target support"
 	depends on NF_NAT
 	depends on NETFILTER_ADVANCED
 	help
-	  NETMAP is an implementation of static 1:1 NAT mapping of network
-	  addresses. It maps the network address part, while keeping the host
-	  address part intact.
+	  REDIRECT is a special case of NAT: all incoming connections are
+	  mapped onto the incoming interface's address, causing the packets to
+	  come to the local machine instead of passing through.  This is
+	  useful for transparent proxies.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -300,6 +300,19 @@ config IP_NF_MANGLE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_NF_TARGET_CLUSTERIP
+	tristate "CLUSTERIP target support (EXPERIMENTAL)"
+	depends on IP_NF_MANGLE && EXPERIMENTAL
+	depends on NF_CONNTRACK_IPV4
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_MARK
+	help
+	  The CLUSTERIP target allows you to build load-balancing clusters of
+	  network servers without having a dedicated load-balancing
+	  router/server/switch.
+	
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_NF_TARGET_ECN
 	tristate "ECN target support"
 	depends on IP_NF_MANGLE
@@ -330,19 +343,6 @@ config IP_NF_TARGET_TTL
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_CLUSTERIP
-	tristate "CLUSTERIP target support (EXPERIMENTAL)"
-	depends on IP_NF_MANGLE && EXPERIMENTAL
-	depends on NF_CONNTRACK_IPV4
-	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  The CLUSTERIP target allows you to build load-balancing clusters of
-	  network servers without having a dedicated load-balancing
-	  router/server/switch.
-	
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # raw + specific targets
 config IP_NF_RAW
 	tristate  'raw table support (required for NOTRACK/TRACE)'
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index fee881bd31f..91ffba08c29 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -56,23 +56,23 @@ config IP6_NF_IPTABLES
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # The simple matches.
-config IP6_NF_MATCH_RT
-	tristate '"rt" Routing header match support'
+config IP6_NF_MATCH_AH
+	tristate '"ah" match support'
 	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  rt matching allows you to match packets based on the routing
-	  header of the packet.
+	  This module allows one to match AH packets.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_OPTS
-	tristate '"hbh" hop-by-hop and "dst" opts header match support'
+config IP6_NF_MATCH_EUI64
+	tristate '"eui64" address check'
 	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  This allows one to match packets based on the hop-by-hop
-	  and destination options headers of a packet.
+	  This module performs checking on the IPv6 source address
+	  Compares the last 64 bits with the EUI64 (delivered
+	  from the MAC address) address
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -86,6 +86,16 @@ config IP6_NF_MATCH_FRAG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_MATCH_OPTS
+	tristate '"hbh" hop-by-hop and "dst" opts header match support'
+	depends on IP6_NF_IPTABLES
+	depends on NETFILTER_ADVANCED
+	help
+	  This allows one to match packets based on the hop-by-hop
+	  and destination options headers of a packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP6_NF_MATCH_HL
 	tristate '"hl" match support'
 	depends on IP6_NF_IPTABLES
@@ -106,15 +116,6 @@ config IP6_NF_MATCH_IPV6HEADER
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_AH
-	tristate '"ah" match support'
-	depends on IP6_NF_IPTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  This module allows one to match AH packets.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_MATCH_MH
 	tristate '"mh" match support'
 	depends on IP6_NF_IPTABLES
@@ -124,14 +125,13 @@ config IP6_NF_MATCH_MH
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP6_NF_MATCH_EUI64
-	tristate '"eui64" address check'
+config IP6_NF_MATCH_RT
+	tristate '"rt" Routing header match support'
 	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
-	  This module performs checking on the IPv6 source address
-	  Compares the last 64 bits with the EUI64 (delivered
-	  from the MAC address) address
+	  rt matching allows you to match packets based on the routing
+	  header of the packet.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index de18bba619f..9ad74e8bc5b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -340,6 +340,18 @@ config NETFILTER_XT_TARGET_CONNMARK
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
 	  ipt_CONNMARK.ko.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_CONNSECMARK
+	tristate '"CONNSECMARK" target support'
+	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
+	default m if NETFILTER_ADVANCED=n
+	help
+	  The CONNSECMARK target copies security markings from packets
+	  to connections, and restores security markings from connections
+	  to packets (if the packets are not already marked).  This would
+	  normally be used in conjunction with the SECMARK target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
 	depends on NETFILTER_XTABLES
@@ -371,18 +383,6 @@ config NETFILTER_XT_TARGET_MARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_TARGET_NFQUEUE
-	tristate '"NFQUEUE" target Support'
-	depends on NETFILTER_XTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  This target replaced the old obsolete QUEUE target.
-
-	  As opposed to QUEUE, it supports 65535 different queues,
-	  not just one.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
 	depends on NETFILTER_XTABLES
@@ -395,6 +395,18 @@ config NETFILTER_XT_TARGET_NFLOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_NFQUEUE
+	tristate '"NFQUEUE" target Support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	help
+	  This target replaced the old obsolete QUEUE target.
+
+	  As opposed to QUEUE, it supports 65535 different queues,
+	  not just one.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
 	depends on NETFILTER_XTABLES
@@ -459,18 +471,6 @@ config NETFILTER_XT_TARGET_SECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_TARGET_CONNSECMARK
-	tristate '"CONNSECMARK" target support'
-	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
-	default m if NETFILTER_ADVANCED=n
-	help
-	  The CONNSECMARK target copies security markings from packets
-	  to connections, and restores security markings from connections
-	  to packets (if the packets are not already marked).  This would
-	  normally be used in conjunction with the SECMARK target.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
 	depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
@@ -607,6 +607,21 @@ config NETFILTER_XT_MATCH_ESP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_HASHLIMIT
+	tristate '"hashlimit" match support'
+	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `hashlimit' match.
+
+	  As opposed to `limit', this match dynamically creates a hash table
+	  of limit buckets, based on your selection of source/destination
+	  addresses and/or ports.
+
+	  It enables you to express policies like `10kpps for any given
+	  destination address' or `500pps from any given source address'
+	  with a single rule.
+
 config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
 	depends on NETFILTER_XTABLES
@@ -671,6 +686,17 @@ config NETFILTER_XT_MATCH_MARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_MULTIPORT
+	tristate '"multiport" Multiple port match support'
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_ADVANCED
+	help
+	  Multiport matching allows you to match TCP or UDP packets based on
+	  a series of source or destination ports: normally a rule can only
+	  match a single range of ports.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
 	depends on NETFILTER_XTABLES
@@ -691,17 +717,6 @@ config NETFILTER_XT_MATCH_POLICY
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NETFILTER_XT_MATCH_MULTIPORT
-	tristate '"multiport" Multiple port match support'
-	depends on NETFILTER_XTABLES
-	depends on NETFILTER_ADVANCED
-	help
-	  Multiport matching allows you to match TCP or UDP packets based on
-	  a series of source or destination ports: normally a rule can only
-	  match a single range of ports.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NETFILTER_XT_MATCH_PHYSDEV
 	tristate '"physdev" match support'
 	depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
@@ -884,20 +899,5 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
-config NETFILTER_XT_MATCH_HASHLIMIT
-	tristate '"hashlimit" match support'
-	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
-	depends on NETFILTER_ADVANCED
-	help
-	  This option adds a `hashlimit' match.
-
-	  As opposed to `limit', this match dynamically creates a hash table
-	  of limit buckets, based on your selection of source/destination
-	  addresses and/or ports.
-
-	  It enables you to express policies like `10kpps for any given
-	  destination address' or `500pps from any given source address'
-	  with a single rule.
-
 endmenu
 
-- 
cgit v1.2.3-70-g09d2


From c2df73de246ae75705af8ceed4f385b261dea108 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:18 +0200
Subject: netfilter: xtables: use "if" blocks in Kconfig

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/Kconfig | 36 +++++++++-----------
 net/ipv6/netfilter/Kconfig | 17 +++-------
 net/netfilter/Kconfig      | 84 ++++++++++++----------------------------------
 3 files changed, 41 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 087b8290684..3816e1dc929 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,10 +61,11 @@ config IP_NF_IPTABLES
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if IP_NF_IPTABLES
+
 # The matches.
 config IP_NF_MATCH_ADDRTYPE
 	tristate '"addrtype" address type match support'
-	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option allows you to match what routing thinks of an address,
@@ -75,7 +76,6 @@ config IP_NF_MATCH_ADDRTYPE
 
 config IP_NF_MATCH_AH
 	tristate '"ah" match support'
-	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This match extension allows you to match a range of SPIs
@@ -85,7 +85,6 @@ config IP_NF_MATCH_AH
 
 config IP_NF_MATCH_ECN
 	tristate '"ecn" match support'
-	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `ECN' match, which allows you to match against
@@ -95,7 +94,6 @@ config IP_NF_MATCH_ECN
 
 config IP_NF_MATCH_TTL
 	tristate '"ttl" match support'
-	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
@@ -106,7 +104,6 @@ config IP_NF_MATCH_TTL
 # `filter', generic and specific targets
 config IP_NF_FILTER
 	tristate "Packet filtering"
-	depends on IP_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Packet filtering defines a table `filter', which has a series of
@@ -128,7 +125,6 @@ config IP_NF_TARGET_REJECT
 
 config IP_NF_TARGET_LOG
 	tristate "LOG target support"
-	depends on IP_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
@@ -138,7 +134,6 @@ config IP_NF_TARGET_LOG
 
 config IP_NF_TARGET_ULOG
 	tristate "ULOG target support"
-	depends on IP_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	---help---
 
@@ -159,7 +154,7 @@ config IP_NF_TARGET_ULOG
 # NAT + specific targets: nf_conntrack
 config NF_NAT
 	tristate "Full NAT"
-	depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK_IPV4
 	default m if NETFILTER_ADVANCED=n
 	help
 	  The Full NAT option allows masquerading, port forwarding and other
@@ -254,44 +249,43 @@ config NF_NAT_PROTO_SCTP
 
 config NF_NAT_FTP
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_FTP
 
 config NF_NAT_IRC
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_IRC
 
 config NF_NAT_TFTP
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_TFTP
 
 config NF_NAT_AMANDA
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_AMANDA
 
 config NF_NAT_PPTP
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_PPTP
 	select NF_NAT_PROTO_GRE
 
 config NF_NAT_H323
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_H323
 
 config NF_NAT_SIP
 	tristate
-	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_SIP
 
 # mangle + specific targets
 config IP_NF_MANGLE
 	tristate "Packet mangling"
-	depends on IP_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `mangle' table to iptables: see the man page for
@@ -346,7 +340,6 @@ config IP_NF_TARGET_TTL
 # raw + specific targets
 config IP_NF_RAW
 	tristate  'raw table support (required for NOTRACK/TRACE)'
-	depends on IP_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to iptables. This table is the very
@@ -359,7 +352,6 @@ config IP_NF_RAW
 # security table for MAC policy
 config IP_NF_SECURITY
 	tristate "Security table"
-	depends on IP_NF_IPTABLES
 	depends on SECURITY
 	depends on NETFILTER_ADVANCED
 	help
@@ -368,6 +360,8 @@ config IP_NF_SECURITY
 	 
 	  If unsure, say N.
 
+endif # IP_NF_IPTABLES
+
 # ARP tables
 config IP_NF_ARPTABLES
 	tristate "ARP tables support"
@@ -380,9 +374,10 @@ config IP_NF_ARPTABLES
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if IP_NF_ARPTABLES
+
 config IP_NF_ARPFILTER
 	tristate "ARP packet filtering"
-	depends on IP_NF_ARPTABLES
 	help
 	  ARP packet filtering defines a table `filter', which has a series of
 	  rules for simple ARP packet filtering at local input and
@@ -393,10 +388,11 @@ config IP_NF_ARPFILTER
 
 config IP_NF_ARP_MANGLE
 	tristate "ARP payload mangling"
-	depends on IP_NF_ARPTABLES
 	help
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
+endif # IP_NF_ARPTABLES
+
 endmenu
 
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 91ffba08c29..53ea512c460 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -55,10 +55,11 @@ config IP6_NF_IPTABLES
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if IP6_NF_IPTABLES
+
 # The simple matches.
 config IP6_NF_MATCH_AH
 	tristate '"ah" match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This module allows one to match AH packets.
@@ -67,7 +68,6 @@ config IP6_NF_MATCH_AH
 
 config IP6_NF_MATCH_EUI64
 	tristate '"eui64" address check'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This module performs checking on the IPv6 source address
@@ -78,7 +78,6 @@ config IP6_NF_MATCH_EUI64
 
 config IP6_NF_MATCH_FRAG
 	tristate '"frag" Fragmentation header match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  frag matching allows you to match packets based on the fragmentation
@@ -88,7 +87,6 @@ config IP6_NF_MATCH_FRAG
 
 config IP6_NF_MATCH_OPTS
 	tristate '"hbh" hop-by-hop and "dst" opts header match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This allows one to match packets based on the hop-by-hop
@@ -98,7 +96,6 @@ config IP6_NF_MATCH_OPTS
 
 config IP6_NF_MATCH_HL
 	tristate '"hl" match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  HL matching allows you to match packets based on the hop
@@ -108,7 +105,6 @@ config IP6_NF_MATCH_HL
 
 config IP6_NF_MATCH_IPV6HEADER
 	tristate '"ipv6header" IPv6 Extension Headers Match'
-	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This module allows one to match packets based upon
@@ -118,7 +114,6 @@ config IP6_NF_MATCH_IPV6HEADER
 
 config IP6_NF_MATCH_MH
 	tristate '"mh" match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This module allows one to match MH packets.
@@ -127,7 +122,6 @@ config IP6_NF_MATCH_MH
 
 config IP6_NF_MATCH_RT
 	tristate '"rt" Routing header match support'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  rt matching allows you to match packets based on the routing
@@ -138,7 +132,6 @@ config IP6_NF_MATCH_RT
 # The targets
 config IP6_NF_TARGET_LOG
 	tristate "LOG target support"
-	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `LOG' target, which allows you to create rules in
@@ -148,7 +141,6 @@ config IP6_NF_TARGET_LOG
 
 config IP6_NF_FILTER
 	tristate "Packet filtering"
-	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Packet filtering defines a table `filter', which has a series of
@@ -170,7 +162,6 @@ config IP6_NF_TARGET_REJECT
 
 config IP6_NF_MANGLE
 	tristate "Packet mangling"
-	depends on IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `mangle' table to iptables: see the man page for
@@ -198,7 +189,6 @@ config IP6_NF_TARGET_HL
 
 config IP6_NF_RAW
 	tristate  'raw table support (required for TRACE)'
-	depends on IP6_NF_IPTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `raw' table to ip6tables. This table is the very
@@ -211,7 +201,6 @@ config IP6_NF_RAW
 # security table for MAC policy
 config IP6_NF_SECURITY
        tristate "Security table"
-       depends on IP6_NF_IPTABLES
        depends on SECURITY
        depends on NETFILTER_ADVANCED
        help
@@ -220,5 +209,7 @@ config IP6_NF_SECURITY
         
          If unsure, say N.
 
+endif # IP6_NF_IPTABLES
+
 endmenu
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9ad74e8bc5b..899e78051d8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -38,10 +38,11 @@ config NF_CONNTRACK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+if NF_CONNTRACK
+
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
 	depends on NETFILTER_ADVANCED
-	depends on NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  keep per-flow packet and byte counters.
@@ -63,7 +64,6 @@ config NF_CT_ACCT
 config NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
 	depends on NETFILTER_ADVANCED
-	depends on NF_CONNTRACK
 	help
 	  This option enables support for connection marks, used by the
 	  `CONNMARK' target and `connmark' match. Similar to the mark value
@@ -72,7 +72,7 @@ config NF_CONNTRACK_MARK
 
 config NF_CONNTRACK_SECMARK
 	bool  'Connection tracking security mark support'
-	depends on NF_CONNTRACK && NETWORK_SECMARK
+	depends on NETWORK_SECMARK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables security markings to be applied to
@@ -85,7 +85,6 @@ config NF_CONNTRACK_SECMARK
 
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  If this option is enabled, the connection tracking code will
@@ -96,7 +95,7 @@ config NF_CONNTRACK_EVENTS
 
 config NF_CT_PROTO_DCCP
 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_DCCP
 	help
@@ -107,11 +106,10 @@ config NF_CT_PROTO_DCCP
 
 config NF_CT_PROTO_GRE
 	tristate
-	depends on NF_CONNTRACK
 
 config NF_CT_PROTO_SCTP
 	tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -123,7 +121,6 @@ config NF_CT_PROTO_SCTP
 
 config NF_CT_PROTO_UDPLITE
 	tristate 'UDP-Lite protocol connection tracking support'
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  With this option enabled, the layer 3 independent connection
@@ -134,7 +131,6 @@ config NF_CT_PROTO_UDPLITE
 
 config NF_CONNTRACK_AMANDA
 	tristate "Amanda backup protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
@@ -150,7 +146,6 @@ config NF_CONNTRACK_AMANDA
 
 config NF_CONNTRACK_FTP
 	tristate "FTP protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Tracking FTP connections is problematic: special helpers are
@@ -165,7 +160,7 @@ config NF_CONNTRACK_FTP
 
 config NF_CONNTRACK_H323
 	tristate "H.323 protocol support"
-	depends on NF_CONNTRACK && (IPV6 || IPV6=n)
+	depends on (IPV6 || IPV6=n)
 	depends on NETFILTER_ADVANCED
 	help
 	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -185,7 +180,6 @@ config NF_CONNTRACK_H323
 
 config NF_CONNTRACK_IRC
 	tristate "IRC protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  There is a commonly-used extension to IRC called
@@ -201,7 +195,6 @@ config NF_CONNTRACK_IRC
 
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
@@ -221,7 +214,6 @@ config NF_CONNTRACK_NETBIOS_NS
 
 config NF_CONNTRACK_PPTP
 	tristate "PPtP protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CT_PROTO_GRE
 	help
@@ -241,7 +233,7 @@ config NF_CONNTRACK_PPTP
 
 config NF_CONNTRACK_SANE
 	tristate "SANE protocol support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && NF_CONNTRACK
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	help
 	  SANE is a protocol for remote access to scanners as implemented
@@ -255,7 +247,6 @@ config NF_CONNTRACK_SANE
 
 config NF_CONNTRACK_SIP
 	tristate "SIP protocol support"
-	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  SIP is an application-layer control protocol that can establish,
@@ -268,7 +259,6 @@ config NF_CONNTRACK_SIP
 
 config NF_CONNTRACK_TFTP
 	tristate "TFTP protocol support"
-	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
 	  TFTP connection tracking helper, this is required depending
@@ -280,7 +270,6 @@ config NF_CONNTRACK_TFTP
 
 config NF_CT_NETLINK
 	tristate 'Connection tracking netlink interface'
-	depends on NF_CONNTRACK
 	select NETFILTER_NETLINK
 	depends on NF_NAT=n || NF_NAT
 	default m if NETFILTER_ADVANCED=n
@@ -302,6 +291,8 @@ config NETFILTER_TPROXY
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+endif # NF_CONNTRACK
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
@@ -309,11 +300,12 @@ config NETFILTER_XTABLES
 	  This is required if you intend to use any of ip_tables,
 	  ip6_tables or arp_tables.
 
+if NETFILTER_XTABLES
+
 # alphabetically ordered list of targets
 
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `CLASSIFY' target, which enables the user to set
@@ -326,7 +318,6 @@ config NETFILTER_XT_TARGET_CLASSIFY
 
 config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
@@ -342,7 +333,7 @@ config NETFILTER_XT_TARGET_CONNMARK
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
-	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
+	depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  The CONNSECMARK target copies security markings from packets
@@ -354,7 +345,6 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -371,7 +361,6 @@ config NETFILTER_XT_TARGET_DSCP
 
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option adds a `MARK' target, which allows you to create rules
@@ -385,7 +374,6 @@ config NETFILTER_XT_TARGET_MARK
 
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  This option enables the NFLOG target, which allows to LOG
@@ -397,7 +385,6 @@ config NETFILTER_XT_TARGET_NFLOG
 
 config NETFILTER_XT_TARGET_NFQUEUE
 	tristate '"NFQUEUE" target Support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This target replaced the old obsolete QUEUE target.
@@ -409,7 +396,6 @@ config NETFILTER_XT_TARGET_NFQUEUE
 
 config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
@@ -424,7 +410,6 @@ config NETFILTER_XT_TARGET_NOTRACK
 
 config NETFILTER_XT_TARGET_RATEEST
 	tristate '"RATEEST" target support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `RATEEST' target, which allows to measure
@@ -450,7 +435,6 @@ config NETFILTER_XT_TARGET_TPROXY
 
 config NETFILTER_XT_TARGET_TRACE
 	tristate  '"TRACE" target support'
-	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
 	depends on NETFILTER_ADVANCED
 	help
@@ -463,7 +447,7 @@ config NETFILTER_XT_TARGET_TRACE
 
 config NETFILTER_XT_TARGET_SECMARK
 	tristate '"SECMARK" target support'
-	depends on NETFILTER_XTABLES && NETWORK_SECMARK
+	depends on NETWORK_SECMARK
 	default m if NETFILTER_ADVANCED=n
 	help
 	  The SECMARK target allows security marking of network
@@ -473,7 +457,7 @@ config NETFILTER_XT_TARGET_SECMARK
 
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
-	depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
+	depends on (IPV6 || IPV6=n)
 	default m if NETFILTER_ADVANCED=n
 	---help---
 	  This option adds a `TCPMSS' target, which allows you to alter the
@@ -500,7 +484,7 @@ config NETFILTER_XT_TARGET_TCPMSS
 
 config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && NETFILTER_XTABLES
+	depends on EXPERIMENTAL
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NETFILTER_ADVANCED
 	help
@@ -509,7 +493,6 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 
 config NETFILTER_XT_MATCH_COMMENT
 	tristate  '"comment" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `comment' dummy-match, which allows you to put
@@ -520,7 +503,6 @@ config NETFILTER_XT_MATCH_COMMENT
 
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CT_ACCT
@@ -533,7 +515,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
 
 config NETFILTER_XT_MATCH_CONNLIMIT
 	tristate '"connlimit" match support"'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	---help---
@@ -542,7 +523,6 @@ config NETFILTER_XT_MATCH_CONNLIMIT
 
 config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
@@ -556,7 +536,6 @@ config NETFILTER_XT_MATCH_CONNMARK
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
@@ -570,7 +549,6 @@ config NETFILTER_XT_MATCH_CONNTRACK
 
 config NETFILTER_XT_MATCH_DCCP
 	tristate '"dccp" protocol match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	default IP_DCCP
 	help
@@ -583,7 +561,6 @@ config NETFILTER_XT_MATCH_DCCP
 
 config NETFILTER_XT_MATCH_DSCP
 	tristate '"dscp" and "tos" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `DSCP' match, which allows you to match against
@@ -599,7 +576,6 @@ config NETFILTER_XT_MATCH_DSCP
 
 config NETFILTER_XT_MATCH_ESP
 	tristate '"esp" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This match extension allows you to match a range of SPIs
@@ -609,7 +585,7 @@ config NETFILTER_XT_MATCH_ESP
 
 config NETFILTER_XT_MATCH_HASHLIMIT
 	tristate '"hashlimit" match support'
-	depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+	depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `hashlimit' match.
@@ -624,7 +600,6 @@ config NETFILTER_XT_MATCH_HASHLIMIT
 
 config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	help
@@ -635,7 +610,6 @@ config NETFILTER_XT_MATCH_HELPER
 
 config NETFILTER_XT_MATCH_IPRANGE
 	tristate '"iprange" address range match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	This option adds a "iprange" match, which allows you to match based on
@@ -646,7 +620,6 @@ config NETFILTER_XT_MATCH_IPRANGE
 
 config NETFILTER_XT_MATCH_LENGTH
 	tristate '"length" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option allows you to match the length of a packet against a
@@ -656,7 +629,6 @@ config NETFILTER_XT_MATCH_LENGTH
 
 config NETFILTER_XT_MATCH_LIMIT
 	tristate '"limit" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  limit matching allows you to control the rate at which a rule can be
@@ -667,7 +639,6 @@ config NETFILTER_XT_MATCH_LIMIT
 
 config NETFILTER_XT_MATCH_MAC
 	tristate '"mac" address match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  MAC matching allows you to match packets based on the source
@@ -677,7 +648,6 @@ config NETFILTER_XT_MATCH_MAC
 
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
-	depends on NETFILTER_XTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Netfilter mark matching allows you to match packets based on the
@@ -688,7 +658,6 @@ config NETFILTER_XT_MATCH_MARK
 
 config NETFILTER_XT_MATCH_MULTIPORT
 	tristate '"multiport" Multiple port match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  Multiport matching allows you to match TCP or UDP packets based on
@@ -699,7 +668,6 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	Socket owner matching allows you to match locally-generated packets
@@ -708,7 +676,7 @@ config NETFILTER_XT_MATCH_OWNER
 
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
-	depends on NETFILTER_XTABLES && XFRM
+	depends on XFRM
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Policy matching allows you to match packets based on the
@@ -719,7 +687,7 @@ config NETFILTER_XT_MATCH_POLICY
 
 config NETFILTER_XT_MATCH_PHYSDEV
 	tristate '"physdev" match support'
-	depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
+	depends on BRIDGE && BRIDGE_NETFILTER
 	depends on NETFILTER_ADVANCED
 	help
 	  Physdev packet matching matches against the physical bridge ports
@@ -729,7 +697,6 @@ config NETFILTER_XT_MATCH_PHYSDEV
 
 config NETFILTER_XT_MATCH_PKTTYPE
 	tristate '"pkttype" packet type match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  Packet type matching allows you to match a packet by
@@ -742,7 +709,6 @@ config NETFILTER_XT_MATCH_PKTTYPE
 
 config NETFILTER_XT_MATCH_QUOTA
 	tristate '"quota" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `quota' match, which allows to match on a
@@ -753,7 +719,6 @@ config NETFILTER_XT_MATCH_QUOTA
 
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select NETFILTER_XT_TARGET_RATEEST
 	help
@@ -764,7 +729,6 @@ config NETFILTER_XT_MATCH_RATEEST
 
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select NET_CLS_ROUTE
 	help
@@ -779,7 +743,6 @@ config NETFILTER_XT_MATCH_REALM
 
 config NETFILTER_XT_MATCH_RECENT
 	tristate '"recent" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	This match is used for creating one or many lists of recently
@@ -797,7 +760,7 @@ config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
 
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
-	depends on NETFILTER_XTABLES && EXPERIMENTAL
+	depends on EXPERIMENTAL
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
@@ -825,7 +788,6 @@ config NETFILTER_XT_MATCH_SOCKET
 
 config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
-	depends on NETFILTER_XTABLES
 	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	help
@@ -837,7 +799,6 @@ config NETFILTER_XT_MATCH_STATE
 
 config NETFILTER_XT_MATCH_STATISTIC
 	tristate '"statistic" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `statistic' match, which allows you to match
@@ -847,7 +808,6 @@ config NETFILTER_XT_MATCH_STATISTIC
 
 config NETFILTER_XT_MATCH_STRING
 	tristate  '"string" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	select TEXTSEARCH
 	select TEXTSEARCH_KMP
@@ -861,7 +821,6 @@ config NETFILTER_XT_MATCH_STRING
 
 config NETFILTER_XT_MATCH_TCPMSS
 	tristate '"tcpmss" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	help
 	  This option adds a `tcpmss' match, which allows you to examine the
@@ -872,7 +831,6 @@ config NETFILTER_XT_MATCH_TCPMSS
 
 config NETFILTER_XT_MATCH_TIME
 	tristate '"time" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a "time" match, which allows you to match based on
@@ -887,7 +845,6 @@ config NETFILTER_XT_MATCH_TIME
 
 config NETFILTER_XT_MATCH_U32
 	tristate '"u32" match support'
-	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
 	---help---
 	  u32 allows you to extract quantities of up to 4 bytes from a packet,
@@ -899,5 +856,6 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
-endmenu
+endif # NETFILTER_XTABLES
 
+endmenu
-- 
cgit v1.2.3-70-g09d2


From f7108a20dee44e5bb037f9e48f6a207b42e6ae1c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:18 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (1/6)

The function signatures for Xtables extensions have grown over time.
It involves a lot of typing/replication, and also a bit of stack space
even if they are not used. Realize an NFWS2008 idea and pack them into
structs. The skb remains outside of the struct so gcc can continue to
apply its optimizations.

This patch does this for match extensions' match functions.

A few ambiguities have also been addressed. The "offset" parameter for
example has been renamed to "fragoff" (there are so many different
offsets already) and "protoff" to "thoff" (there is more than just one
protocol here, so clarify).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h   | 28 ++++++++++++++++------
 net/bridge/netfilter/ebt_802_3.c     |  6 ++---
 net/bridge/netfilter/ebt_among.c     |  6 ++---
 net/bridge/netfilter/ebt_arp.c       |  6 ++---
 net/bridge/netfilter/ebt_ip.c        |  6 ++---
 net/bridge/netfilter/ebt_ip6.c       |  6 ++---
 net/bridge/netfilter/ebt_limit.c     |  6 ++---
 net/bridge/netfilter/ebt_mark_m.c    |  6 ++---
 net/bridge/netfilter/ebt_pkttype.c   |  7 ++----
 net/bridge/netfilter/ebt_stp.c       |  6 ++---
 net/bridge/netfilter/ebt_vlan.c      |  6 ++---
 net/bridge/netfilter/ebtables.c      | 16 ++++++++-----
 net/ipv4/netfilter/ip_tables.c       | 46 ++++++++++++++++--------------------
 net/ipv4/netfilter/ipt_addrtype.c    | 18 +++++---------
 net/ipv4/netfilter/ipt_ah.c          | 14 ++++-------
 net/ipv4/netfilter/ipt_ecn.c         |  9 +++----
 net/ipv4/netfilter/ipt_ttl.c         |  7 ++----
 net/ipv6/netfilter/ip6_tables.c      | 44 +++++++++++++---------------------
 net/ipv6/netfilter/ip6t_ah.c         | 11 ++++-----
 net/ipv6/netfilter/ip6t_eui64.c      |  9 +++----
 net/ipv6/netfilter/ip6t_frag.c       | 11 ++++-----
 net/ipv6/netfilter/ip6t_hbh.c        | 13 ++++------
 net/ipv6/netfilter/ip6t_hl.c         |  7 ++----
 net/ipv6/netfilter/ip6t_ipv6header.c |  7 ++----
 net/ipv6/netfilter/ip6t_mh.c         | 15 +++++-------
 net/ipv6/netfilter/ip6t_rt.c         | 11 ++++-----
 net/netfilter/xt_comment.c           |  5 +---
 net/netfilter/xt_connbytes.c         |  7 ++----
 net/netfilter/xt_connlimit.c         | 17 ++++++-------
 net/netfilter/xt_connmark.c          | 14 ++++-------
 net/netfilter/xt_conntrack.c         | 22 +++++++----------
 net/netfilter/xt_dccp.c              | 16 ++++++-------
 net/netfilter/xt_dscp.c              | 30 ++++++++---------------
 net/netfilter/xt_esp.c               | 13 ++++------
 net/netfilter/xt_hashlimit.c         | 22 +++++++----------
 net/netfilter/xt_helper.c            |  7 ++----
 net/netfilter/xt_iprange.c           | 21 +++++-----------
 net/netfilter/xt_length.c            | 14 ++++-------
 net/netfilter/xt_limit.c             |  7 ++----
 net/netfilter/xt_mac.c               |  7 ++----
 net/netfilter/xt_mark.c              | 13 ++++------
 net/netfilter/xt_multiport.c         | 26 ++++++++------------
 net/netfilter/xt_owner.c             | 21 +++++-----------
 net/netfilter/xt_physdev.c           |  7 ++----
 net/netfilter/xt_pkttype.c           | 11 ++++-----
 net/netfilter/xt_policy.c            | 11 ++++-----
 net/netfilter/xt_quota.c             |  7 ++----
 net/netfilter/xt_rateest.c           | 12 +++-------
 net/netfilter/xt_realm.c             |  7 ++----
 net/netfilter/xt_recent.c            | 17 ++++++-------
 net/netfilter/xt_sctp.c              | 16 ++++++-------
 net/netfilter/xt_socket.c            | 11 ++-------
 net/netfilter/xt_state.c             |  7 ++----
 net/netfilter/xt_statistic.c         |  7 ++----
 net/netfilter/xt_string.c            |  9 +++----
 net/netfilter/xt_tcpmss.c            | 13 ++++------
 net/netfilter/xt_tcpudp.c            | 36 ++++++++++++----------------
 net/netfilter/xt_time.c              |  6 ++---
 net/netfilter/xt_u32.c               |  7 ++----
 59 files changed, 286 insertions(+), 487 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 85aa42785a5..bcd40ec8325 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -173,6 +173,26 @@ struct xt_counters_info
 
 #include <linux/netdevice.h>
 
+/**
+ * struct xt_match_param - parameters for match extensions' match functions
+ *
+ * @in:		input netdevice
+ * @out:	output netdevice
+ * @match:	struct xt_match through which this function was invoked
+ * @matchinfo:	per-match data
+ * @fragoff:	packet is a fragment, this is the data offset
+ * @thoff:	position of transport header relative to skb->data
+ * @hotdrop:	drop packet if we had inspection problems
+ */
+struct xt_match_param {
+	const struct net_device *in, *out;
+	const struct xt_match *match;
+	const void *matchinfo;
+	int fragoff;
+	unsigned int thoff;
+	bool *hotdrop;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -185,13 +205,7 @@ struct xt_match
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
 	bool (*match)(const struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      const struct xt_match *match,
-		      const void *matchinfo,
-		      int offset,
-		      unsigned int protoff,
-		      bool *hotdrop);
+		      const struct xt_match_param *);
 
 	/* Called when user tries to insert an entry of this type. */
 	/* Should return true or false. */
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 6fc2a59e09a..c9e1bc14951 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,11 +13,9 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct net_device *in,
-	     const struct net_device *out, const struct xt_match *match,
-	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_802_3_info *info = data;
+	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
 	__be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 084559e1840..0ad0db3e815 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -128,11 +128,9 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct net_device *in,
-	     const struct net_device *out, const struct xt_match *match,
-	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_among_info *info = data;
+	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
 	__be32 dip = 0, sip = 0;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index a073dffe7a1..1ff8fa3a9e7 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,11 +16,9 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct net_device *in,
-	   const struct net_device *out, const struct xt_match *match,
-	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_arp_info *info = data;
+	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
 	struct arphdr _arph;
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index b42c7ce799b..c70ea39840b 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,11 +25,9 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct net_device *in,
-	  const struct net_device *out, const struct xt_match *match,
-	  const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_ip_info *info = data;
+	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
 	struct iphdr _iph;
 	const struct tcpudphdr *pptr;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 7bd98312967..5acee02de72 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,11 +28,9 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct net_device *in,
-	   const struct net_device *out, const struct xt_match *match,
-	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_ip6_info *info = data;
+	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
 	struct ipv6hdr _ip6h;
 	const struct tcpudphdr *pptr;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 58aaaa14906..9a3ec8cadaa 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -31,11 +31,9 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct net_device *in,
-	     const struct net_device *out, const struct xt_match *match,
-	     const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct ebt_limit_info *info = (void *)data;
+	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index aa6781c7f98..5b22ef96127 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,11 +13,9 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, const struct xt_match *match,
-	    const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_mark_m_info *info = data;
+	const struct ebt_mark_m_info *info = par->matchinfo;
 
 	if (info->bitmask & EBT_MARK_OR)
 		return !!(skb->mark & info->mask) ^ info->invert;
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 1c04ce5a52c..b756f88fb10 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,12 +13,9 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
-	       const struct net_device *out, const struct xt_match *match,
-	       const void *data, int offset, unsigned int protoff,
-	       bool *hotdrop)
+ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_pkttype_info *info = data;
+	const struct ebt_pkttype_info *info = par->matchinfo;
 
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 28bb48b67a8..06d777c62c3 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,11 +120,9 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct net_device *in,
-	   const struct net_device *out, const struct xt_match *match,
-	   const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_stp_info *info = data;
+	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
 	struct stp_header _stph;
 	const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 5addef6d62f..b05b4a81834 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -41,11 +41,9 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, const struct xt_match *match,
-	    const void *data, int offset, unsigned int protoff, bool *hotdrop)
+ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ebt_vlan_info *info = data;
+	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
 	struct vlan_hdr _frame;
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 7ee72b71d3c..f8e1822f38d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -74,11 +74,11 @@ static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
 }
 
 static inline int ebt_do_match (struct ebt_entry_match *m,
-   const struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, bool *hotdrop)
+   const struct sk_buff *skb, struct xt_match_param *par)
 {
-	return m->u.match->match(skb, in, out, m->u.match,
-	       m->data, 0, 0, hotdrop);
+	par->match     = m->u.match;
+	par->matchinfo = m->data;
+	return m->u.match->match(skb, par);
 }
 
 static inline int ebt_dev_check(char *entry, const struct net_device *device)
@@ -155,6 +155,11 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	char *base;
 	struct ebt_table_info *private;
 	bool hotdrop = false;
+	struct xt_match_param mtpar;
+
+	mtpar.in      = in;
+	mtpar.out     = out;
+	mtpar.hotdrop = &hotdrop;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -175,8 +180,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb,
-		    in, out, &hotdrop) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0)
 			goto letscontinue;
 		if (hotdrop) {
 			read_unlock_bh(&table->lock);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b4c74a7a807..99fdb59454f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,16 +186,14 @@ ipt_error(struct sk_buff *skb,
 
 /* Performance critical - called for every packet */
 static inline bool
-do_match(struct ipt_entry_match *m,
-	      const struct sk_buff *skb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      int offset,
-	      bool *hotdrop)
+do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
+	 struct xt_match_param *par)
 {
+	par->match     = m->u.kernel.match;
+	par->matchinfo = m->data;
+
 	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
-				      offset, ip_hdrlen(skb), hotdrop))
+	if (!m->u.kernel.match->match(skb, par))
 		return true;
 	else
 		return false;
@@ -326,7 +324,6 @@ ipt_do_table(struct sk_buff *skb,
 	     struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	u_int16_t offset;
 	const struct iphdr *ip;
 	u_int16_t datalen;
 	bool hotdrop = false;
@@ -336,6 +333,7 @@ ipt_do_table(struct sk_buff *skb,
 	void *table_base;
 	struct ipt_entry *e, *back;
 	struct xt_table_info *private;
+	struct xt_match_param mtpar;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -348,7 +346,11 @@ ipt_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	offset = ntohs(ip->frag_off) & IP_OFFSET;
+	mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+	mtpar.thoff   = ip_hdrlen(skb);
+	mtpar.hotdrop = &hotdrop;
+	mtpar.in      = in;
+	mtpar.out     = out;
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -362,12 +364,11 @@ ipt_do_table(struct sk_buff *skb,
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+		if (ip_packet_match(ip, indev, outdev,
+		    &e->ip, mtpar.fragoff)) {
 			struct ipt_entry_target *t;
 
-			if (IPT_MATCH_ITERATE(e, do_match,
-					      skb, in, out,
-					      offset, &hotdrop) != 0)
+			if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
 				goto no_match;
 
 			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
@@ -2116,30 +2117,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   const struct xt_match *match,
-	   const void *matchinfo,
-	   int offset,
-	   unsigned int protoff,
-	   bool *hotdrop)
+icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
-	const struct ipt_icmp *icmpinfo = matchinfo;
+	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
+	ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 2c9d88a6c83..e60995e4c20 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-	       const struct net_device *out, const struct xt_match *match,
-	       const void *matchinfo, int offset, unsigned int protoff,
-	       bool *hotdrop)
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_addrtype_info *info = matchinfo;
+	const struct ipt_addrtype_info *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool ret = true;
 
@@ -50,20 +47,17 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
-	       const struct net_device *out, const struct xt_match *match,
-	       const void *matchinfo, int offset, unsigned int protoff,
-	       bool *hotdrop)
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_addrtype_info_v1 *info = matchinfo;
+	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct net_device *dev = NULL;
 	bool ret = true;
 
 	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
-		dev = in;
+		dev = par->in;
 	else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
-		dev = out;
+		dev = par->out;
 
 	if (info->source)
 		ret &= match_type(dev, iph->saddr, info->source) ^
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index e2e993edd66..2fce19ef4f3 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool
-ah_mt(const struct sk_buff *skb, const struct net_device *in,
-      const struct net_device *out, const struct xt_match *match,
-      const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
-	const struct ipt_ah *ahinfo = matchinfo;
+	const struct ipt_ah *ahinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	ah = skb_header_pointer(skb, protoff,
-				sizeof(_ahdr), &_ahdr);
+	ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr);
 	if (ah == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil AH tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 2c45b4be7c3..06915463150 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool
-ecn_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_ecn_info *info = matchinfo;
+	const struct ipt_ecn_info *info = par->matchinfo;
 
 	if (info->operation & IPT_ECN_OP_MATCH_IP)
 		if (!match_ip(skb, info))
@@ -81,7 +78,7 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in,
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
 		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return false;
-		if (!match_tcp(skb, info, hotdrop))
+		if (!match_tcp(skb, info, par->hotdrop))
 			return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index d4c3fdc2a79..297f1cbf4ff 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
 MODULE_LICENSE("GPL");
 
-static bool
-ttl_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_ttl_info *info = matchinfo;
+	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
 
 	switch (info->mode) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 12c41b8d365..cf2c5370a4e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -215,17 +215,14 @@ ip6t_error(struct sk_buff *skb,
 
 /* Performance critical - called for every packet */
 static inline bool
-do_match(struct ip6t_entry_match *m,
-	      const struct sk_buff *skb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      int offset,
-	      unsigned int protoff,
-	      bool *hotdrop)
+do_match(struct ip6t_entry_match *m, const struct sk_buff *skb,
+	 struct xt_match_param *par)
 {
+	par->match     = m->u.kernel.match;
+	par->matchinfo = m->data;
+
 	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
-				      offset, protoff, hotdrop))
+	if (!m->u.kernel.match->match(skb, par))
 		return true;
 	else
 		return false;
@@ -355,8 +352,6 @@ ip6t_do_table(struct sk_buff *skb,
 	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	int offset = 0;
-	unsigned int protoff = 0;
 	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
@@ -364,6 +359,7 @@ ip6t_do_table(struct sk_buff *skb,
 	void *table_base;
 	struct ip6t_entry *e, *back;
 	struct xt_table_info *private;
+	struct xt_match_param mtpar;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -374,6 +370,9 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
+	mtpar.hotdrop = &hotdrop;
+	mtpar.in      = in;
+	mtpar.out     = out;
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -388,12 +387,10 @@ ip6t_do_table(struct sk_buff *skb,
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
 		if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
-			&protoff, &offset, &hotdrop)) {
+			&mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
 			struct ip6t_entry_target *t;
 
-			if (IP6T_MATCH_ITERATE(e, do_match,
-					       skb, in, out,
-					       offset, protoff, &hotdrop) != 0)
+			if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
@@ -2141,30 +2138,23 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   const struct xt_match *match,
-	   const void *matchinfo,
-	   int offset,
-	   unsigned int protoff,
-	   bool *hotdrop)
+icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
-	const struct ip6t_icmp *icmpinfo = matchinfo;
+	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
+	ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
 	if (ic == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 061f89beeb6..a04f2b8396e 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,14 +36,11 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool
-ah_mt6(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
-	const struct ip6t_ah *ahinfo = matchinfo;
+	const struct ip6t_ah *ahinfo = par->matchinfo;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 	int err;
@@ -51,13 +48,13 @@ ah_mt6(const struct sk_buff *skb, const struct net_device *in,
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = true;
+			*par->hotdrop = true;
 		return false;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ba38df1116f..db610bacbcc 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,18 +20,15 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	unsigned char eui64[8];
 	int i = 0;
 
 	if (!(skb_mac_header(skb) >= skb->head &&
 	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
-	    offset != 0) {
-		*hotdrop = true;
+	    par->fragoff != 0) {
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 972f699af22..6951d0dacf4 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,27 +35,24 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
-	const struct ip6t_frag *fraginfo = matchinfo;
+	const struct ip6t_frag *fraginfo = par->matchinfo;
 	unsigned int ptr;
 	int err;
 
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = true;
+			*par->hotdrop = true;
 		return false;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d5edb51a595..d3351978819 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -42,14 +42,11 @@ MODULE_ALIAS("ip6t_dst");
  */
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff,
-        bool *hotdrop)
+hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
-	const struct ip6t_opts *optinfo = matchinfo;
+	const struct ip6t_opts *optinfo = par->matchinfo;
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
@@ -61,16 +58,16 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
 	unsigned int optlen;
 	int err;
 
-	err = ipv6_find_hdr(skb, &ptr, match->data, NULL);
+	err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = true;
+			*par->hotdrop = true;
 		return false;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 25c1eb92fac..c964dca1132 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -19,12 +19,9 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match");
 MODULE_LICENSE("GPL");
 
-static bool
-hl_mt6(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ip6t_hl_info *info = matchinfo;
+	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 	switch (info->mode) {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index ef0661aacea..6aaca511d47 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,12 +27,9 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, const struct xt_match *match,
-               const void *matchinfo, int offset, unsigned int protoff,
-               bool *hotdrop)
+ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ip6t_ipv6header_info *info = matchinfo;
+	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
 	int len;
 	u8 nexthdr;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index dd876274ff6..2803258b6d0 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -37,32 +37,29 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool
-mh_mt6(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
-	const struct ip6t_mh *mhinfo = matchinfo;
+	const struct ip6t_mh *mhinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh);
+	mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh);
 	if (mh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil MH tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		duprintf("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 7c544ae591d..9cf4b8a37af 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,14 +36,11 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool
-rt_mt6(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
-	const struct ip6t_rt *rtinfo = matchinfo;
+	const struct ip6t_rt *rtinfo = par->matchinfo;
 	unsigned int temp;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
@@ -55,13 +52,13 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in,
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*hotdrop = true;
+			*par->hotdrop = true;
 		return false;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index fa211b2ab87..bd7aa57af42 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,10 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protooff,
-           bool *hotdrop)
+comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d2cd22a49c9..30c19b5fe90 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -17,12 +17,9 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index bd00830ff69..8b8f70e7664 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -178,12 +178,9 @@ static int count_them(struct xt_connlimit_data *data,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connlimit_info *info = matchinfo;
+	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
 	const struct nf_conntrack_tuple *tuple_ptr = &tuple;
@@ -195,10 +192,10 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 	if (ct != NULL)
 		tuple_ptr = &ct->tuplehash[0].tuple;
 	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    match->family, &tuple))
+				    par->match->family, &tuple))
 		goto hotdrop;
 
-	if (match->family == NFPROTO_IPV6) {
+	if (par->match->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
 	} else {
@@ -208,19 +205,19 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 
 	spin_lock_bh(&info->data->lock);
 	connections = count_them(info->data, tuple_ptr, &addr,
-	                         &info->mask, match);
+	                         &info->mask, par->match);
 	spin_unlock_bh(&info->data->lock);
 
 	if (connections < 0) {
 		/* kmalloc failed, drop it entirely */
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
 	return (connections > info->limit) ^ info->inverse;
 
  hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 0577b8ff4e1..df4f4a865a5 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -34,12 +34,9 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connmark_mtinfo1 *info = matchinfo;
+	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 
@@ -51,12 +48,9 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, const struct xt_match *match,
-               const void *matchinfo, int offset, unsigned int protoff,
-               bool *hotdrop)
+connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_connmark_info *info = matchinfo;
+	const struct xt_connmark_info *info = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 392b457f9c2..13a7e4eacdf 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_conntrack");
 MODULE_ALIAS("ip6t_conntrack");
 
 static bool
-conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_conntrack_info *sinfo = matchinfo;
+	const struct xt_conntrack_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
@@ -205,12 +202,9 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_conntrack_mtinfo1 *info = matchinfo;
+	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	unsigned int statebit;
@@ -244,22 +238,22 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-		if (conntrack_mt_origsrc(ct, info, match->family) ^
+		if (conntrack_mt_origsrc(ct, info, par->match->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGDST)
-		if (conntrack_mt_origdst(ct, info, match->family) ^
+		if (conntrack_mt_origdst(ct, info, par->match->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLSRC)
-		if (conntrack_mt_replsrc(ct, info, match->family) ^
+		if (conntrack_mt_replsrc(ct, info, par->match->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLDST)
-		if (conntrack_mt_repldst(ct, info, match->family) ^
+		if (conntrack_mt_repldst(ct, info, par->match->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 87971f47132..7aa30bb9105 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -93,20 +93,18 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dccp_info *info = matchinfo;
+	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
 	struct dccp_hdr _dh;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh);
+	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -118,8 +116,8 @@ dccp_mt(const struct sk_buff *skb, const struct net_device *in,
 			XT_DCCP_DEST_PORTS, info->flags, info->invflags)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
-		&& DCCHECK(match_option(info->option, skb, protoff, dh,
-					hotdrop),
+		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
+					par->hotdrop),
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 7f03aa13a95..57d61206135 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -26,23 +26,18 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dscp_info *info = matchinfo;
+	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_dscp_info *info = matchinfo;
+	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
@@ -63,24 +58,19 @@ dscp_mt_check(const char *tablename, const void *info,
 	return true;
 }
 
-static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                      const struct net_device *out,
-                      const struct xt_match *match, const void *matchinfo,
-                      int offset, unsigned int protoff, bool *hotdrop)
+static bool
+tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_tos_info *info = matchinfo;
+	const struct ipt_tos_info *info = par->matchinfo;
 
 	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
-static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
-                   const struct net_device *out, const struct xt_match *match,
-                   const void *matchinfo, int offset, unsigned int protoff,
-                   bool *hotdrop)
+static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_tos_match_info *info = matchinfo;
+	const struct xt_tos_match_info *info = par->matchinfo;
 
-	if (match->family == NFPROTO_IPV4)
+	if (par->match->family == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 045c4deecaf..6d59f2e7c1c 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -42,26 +42,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool
-esp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
-	const struct xt_esp *espinfo = matchinfo;
+	const struct xt_esp *espinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp);
+	eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp);
 	if (eh == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ESP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 7bae369603d..22a60a728cf 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -563,19 +563,16 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_hashlimit_info *r =
-		((const struct xt_hashlimit_info *)matchinfo)->u.master;
+		((const struct xt_hashlimit_info *)par->matchinfo)->u.master;
 	struct xt_hashlimit_htable *hinfo = r->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 
-	if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
+	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
 	spin_lock_bh(&hinfo->lock);
@@ -613,23 +610,20 @@ hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 	return false;
 
 hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 
-	if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
+	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
 	spin_lock_bh(&hinfo->lock);
@@ -666,7 +660,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 134d94324eb..73bdc3ba13f 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,12 +24,9 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_helper_info *info = matchinfo;
+	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
 	const struct nf_conn_help *master_help;
 	const struct nf_conntrack_helper *helper;
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index a7498cc48dc..6f62c36948d 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,12 +17,9 @@
 #include <linux/netfilter_ipv4/ipt_iprange.h>
 
 static bool
-iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-              const struct net_device *out, const struct xt_match *match,
-              const void *matchinfo, int offset, unsigned int protoff,
-              bool *hotdrop)
+iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_iprange_info *info = matchinfo;
+	const struct ipt_iprange_info *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 
 	if (info->flags & IPRANGE_SRC) {
@@ -55,12 +52,9 @@ iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool m;
 
@@ -111,12 +105,9 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_iprange_mtinfo *info = matchinfo;
+	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	bool m;
 
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index b8612d1914b..c4871ca6c86 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,24 +21,18 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_length_info *info = matchinfo;
+	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_length_info *info = matchinfo;
+	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
 				 sizeof(struct ipv6hdr);
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 00247bd1095..c475eac5dbe 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -58,13 +58,10 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct xt_rateinfo *r =
-		((const struct xt_rateinfo *)matchinfo)->master;
+		((const struct xt_rateinfo *)par->matchinfo)->master;
 	unsigned long now = jiffies;
 
 	spin_lock_bh(&limit_lock);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 60db240098a..269f9d8aef5 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -24,12 +24,9 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool
-mac_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-    const struct xt_mac_info *info = matchinfo;
+    const struct xt_mac_info *info = par->matchinfo;
 
     /* Is mac pointer valid? */
     return skb_mac_header(skb) >= skb->head &&
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 96dd2b63b6b..88547614653 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -23,22 +23,17 @@ MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
 static bool
-mark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_mark_info *info = matchinfo;
+	const struct xt_mark_info *info = par->matchinfo;
 
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_mark_mtinfo1 *info = matchinfo;
+	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index f6fe008ab8c..7087e291528 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -95,25 +95,22 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-                const struct net_device *out, const struct xt_match *match,
-                const void *matchinfo, int offset, unsigned int protoff,
-                bool *hotdrop)
+multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -122,25 +119,22 @@ multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports);
+	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
 	if (pptr == NULL) {
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index d1c3b7ae9b4..493b5eb8d14 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,12 +21,9 @@
 #include <linux/netfilter_ipv6/ip6t_owner.h>
 
 static bool
-owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, const struct xt_match *match,
-            const void *matchinfo, int offset, unsigned int protoff,
-            bool *hotdrop)
+owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ipt_owner_info *info = matchinfo;
+	const struct ipt_owner_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
@@ -50,12 +47,9 @@ owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct ip6t_owner_info *info = matchinfo;
+	const struct ip6t_owner_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
@@ -79,12 +73,9 @@ owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_owner_match_info *info = matchinfo;
+	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 72a0bdd53fa..e980e179d4f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -21,14 +21,11 @@ MODULE_ALIAS("ipt_physdev");
 MODULE_ALIAS("ip6t_physdev");
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	int i;
 	static const char nulldevname[IFNAMSIZ];
-	const struct xt_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = par->matchinfo;
 	bool ret;
 	const char *indev, *outdev;
 	const struct nf_bridge_info *nf_bridge;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 81e86d319a8..37753a37760 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,20 +23,17 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, const struct xt_match *match,
-           const void *matchinfo, int offset, unsigned int protoff,
-           bool *hotdrop)
+pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_pkttype_info *info = matchinfo;
+	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
 
 	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
-	else if (match->family == NFPROTO_IPV4 &&
+	else if (par->match->family == NFPROTO_IPV4 &&
 	    ipv4_is_multicast(ip_hdr(skb)->daddr))
 		type = PACKET_MULTICAST;
-	else if (match->family == NFPROTO_IPV6 &&
+	else if (par->match->family == NFPROTO_IPV6 &&
 	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
 		type = PACKET_MULTICAST;
 	else
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index f1d514e9d0a..b0a00fb0511 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,18 +110,15 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_policy_info *info = matchinfo;
+	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
 
 	if (info->flags & XT_POLICY_MATCH_IN)
-		ret = match_policy_in(skb, info, match->family);
+		ret = match_policy_in(skb, info, par->match->family);
 	else
-		ret = match_policy_out(skb, info, match->family);
+		ret = match_policy_out(skb, info, par->match->family);
 
 	if (ret < 0)
 		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index a3c8798f0cc..3ab92666c14 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -18,13 +18,10 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	struct xt_quota_info *q =
-		((const struct xt_quota_info *)matchinfo)->master;
+		((const struct xt_quota_info *)par->matchinfo)->master;
 	bool ret = q->flags & XT_QUOTA_INVERT;
 
 	spin_lock_bh(&quota_lock);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4dcfd7353db..e9f64ef4565 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -14,16 +14,10 @@
 #include <net/netfilter/xt_rateest.h>
 
 
-static bool xt_rateest_mt(const struct sk_buff *skb,
-			  const struct net_device *in,
-			  const struct net_device *out,
-			  const struct xt_match *match,
-			  const void *matchinfo,
-			  int offset,
-			  unsigned int protoff,
-			  bool *hotdrop)
+static bool
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_rateest_match_info *info = matchinfo;
+	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
 	u_int32_t bps1, bps2, pps1, pps2;
 	bool ret = true;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index ef65756d489..b25942110ed 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,12 +22,9 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_realm_info *info = matchinfo;
+	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb->dst;
 
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 4a916e2624d..baeb90a5623 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -204,19 +204,16 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_recent_mtinfo *info = matchinfo;
+	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 	struct recent_entry *e;
 	union nf_inet_addr addr = {};
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (match->family == NFPROTO_IPV4) {
+	if (par->match->family == NFPROTO_IPV4) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (info->side == XT_RECENT_DEST)
@@ -237,19 +234,19 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in,
 	}
 
 	/* use TTL as seen before forwarding */
-	if (out && !skb->sk)
+	if (par->out != NULL && skb->sk == NULL)
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
 	t = recent_table_lookup(info->name);
-	e = recent_entry_lookup(t, &addr, match->family,
+	e = recent_entry_lookup(t, &addr, par->match->family,
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
 		if (!(info->check_set & XT_RECENT_SET))
 			goto out;
-		e = recent_entry_init(t, &addr, match->family, ttl);
+		e = recent_entry_init(t, &addr, par->match->family, ttl);
 		if (e == NULL)
-			*hotdrop = true;
+			*par->hotdrop = true;
 		ret = !ret;
 		goto out;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ab67aca4d8f..b0014ab65da 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -117,23 +117,21 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_sctp_info *info = matchinfo;
+	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
 	sctp_sctphdr_t _sh;
 
-	if (offset) {
+	if (par->fragoff != 0) {
 		duprintf("Dropping non-first fragment.. FIXME\n");
 		return false;
 	}
 
-	sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh);
+	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
@@ -144,8 +142,8 @@ sctp_mt(const struct sk_buff *skb, const struct net_device *in,
 		&& SCCHECK(ntohs(sh->dest) >= info->dpts[0]
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t),
-					info, hotdrop),
+		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
+					info, par->hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index ac9db17c7b9..02a8fed2108 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -86,14 +86,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_mt(const struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  const struct xt_match *match,
-	  const void *matchinfo,
-	  int offset,
-	  unsigned int protoff,
-	  bool *hotdrop)
+socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
@@ -146,7 +139,7 @@ socket_mt(const struct sk_buff *skb,
 #endif
 
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
-				   saddr, daddr, sport, dport, in, false);
+				   saddr, daddr, sport, dport, par->in, false);
 	if (sk != NULL) {
 		bool wildcard = (inet_sk(sk)->rcv_saddr == 0);
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f92f8bcc1e3..29f5a8a1b02 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,12 +21,9 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, const struct xt_match *match,
-         const void *matchinfo, int offset, unsigned int protoff,
-         bool *hotdrop)
+state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_state_info *sinfo = matchinfo;
+	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
 
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index f41a92322e6..dcadc491db2 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, const struct xt_match *match,
-             const void *matchinfo, int offset, unsigned int protoff,
-             bool *hotdrop)
+statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
+	struct xt_statistic_info *info = (void *)par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
 
 	switch (info->mode) {
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 18d8884e737..33f2d29ca4f 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -22,18 +22,15 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_string_info *conf = matchinfo;
+	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
 	int invert;
 
 	memset(&state, 0, sizeof(struct ts_state));
 
-	invert = (match->revision == 0 ? conf->u.v0.invert :
+	invert = (par->match->revision == 0 ? conf->u.v0.invert :
 				    conf->u.v1.flags & XT_STRING_FLAG_INVERT);
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4791c7cbe5a..4809b34b10f 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
+tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_tcpmss_match_info *info = matchinfo;
+	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
@@ -39,7 +36,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
 	unsigned int i, optlen;
 
 	/* If we don't have the whole header, drop packet. */
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		goto dropit;
 
@@ -52,7 +49,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
 		goto out;
 
 	/* Truncated options. */
-	op = skb_header_pointer(skb, protoff + sizeof(*th), optlen, _opt);
+	op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt);
 	if (op == NULL)
 		goto dropit;
 
@@ -76,7 +73,7 @@ out:
 	return info->invert;
 
 dropit:
-	*hotdrop = true;
+	*par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 5a6268cbb9f..66cf71b1d59 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -68,25 +68,22 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool
-tcp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
-	const struct xt_tcp *tcpinfo = matchinfo;
+	const struct xt_tcp *tcpinfo = par->matchinfo;
 
-	if (offset) {
+	if (par->fragoff != 0) {
 		/* To quote Alan:
 
 		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
 		   causes this. Its a cracker trying to break in by doing a
 		   flag overwrite to pass the direction checks.
 		*/
-		if (offset == 1) {
+		if (par->fragoff == 1) {
 			duprintf("Dropping evil TCP offset=1 frag.\n");
-			*hotdrop = true;
+			*par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return false;
@@ -94,12 +91,12 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in,
 
 #define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
 
-	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil TCP offset=0 tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
@@ -117,13 +114,13 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in,
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*hotdrop = true;
+			*par->hotdrop = true;
 			return false;
 		}
-		if (!tcp_find_option(tcpinfo->option, skb, protoff,
+		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
-				     hotdrop))
+				     par->hotdrop))
 			return false;
 	}
 	return true;
@@ -141,25 +138,22 @@ tcp_mt_check(const char *tablename, const void *info,
 	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
 }
 
-static bool
-udp_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
-	const struct xt_udp *udpinfo = matchinfo;
+	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must not be a fragment. */
-	if (offset)
+	if (par->fragoff != 0)
 		return false;
 
-	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+	uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph);
 	if (uh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		duprintf("Dropping evil UDP tinygram.\n");
-		*hotdrop = true;
+		*par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 32d4c769caa..28599d3979c 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -153,11 +153,9 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, const struct xt_match *match,
-        const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_time_info *info = matchinfo;
+	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
 	struct xtm current_time;
 	s64 stamp;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index a6b971dc5d3..24a52762450 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -87,12 +87,9 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool
-u32_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
+static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_u32 *data = matchinfo;
+	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
 
 	ret = u32_match_it(data, skb);
-- 
cgit v1.2.3-70-g09d2


From 9b4fce7a3508a9776534188b6065b206a9608ccf Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:18 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (2/6)

This patch does this for match extensions' checkentry functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h   | 32 +++++++++++++++--------
 net/bridge/netfilter/ebt_802_3.c     |  7 ++----
 net/bridge/netfilter/ebt_among.c     |  9 +++----
 net/bridge/netfilter/ebt_arp.c       |  9 +++----
 net/bridge/netfilter/ebt_ip.c        |  9 +++----
 net/bridge/netfilter/ebt_ip6.c       |  9 +++----
 net/bridge/netfilter/ebt_limit.c     |  7 ++----
 net/bridge/netfilter/ebt_mark_m.c    |  7 ++----
 net/bridge/netfilter/ebt_pkttype.c   |  7 ++----
 net/bridge/netfilter/ebt_stp.c       |  9 +++----
 net/bridge/netfilter/ebt_vlan.c      |  9 +++----
 net/bridge/netfilter/ebtables.c      | 19 +++++++++-----
 net/ipv4/netfilter/ip_tables.c       | 49 +++++++++++++++++-------------------
 net/ipv4/netfilter/ipt_addrtype.c    | 13 +++++-----
 net/ipv4/netfilter/ipt_ah.c          |  8 ++----
 net/ipv4/netfilter/ipt_ecn.c         |  9 +++----
 net/ipv6/netfilter/ip6_tables.c      | 48 +++++++++++++++++------------------
 net/ipv6/netfilter/ip6t_ah.c         |  8 ++----
 net/ipv6/netfilter/ip6t_frag.c       |  8 ++----
 net/ipv6/netfilter/ip6t_hbh.c        |  8 ++----
 net/ipv6/netfilter/ip6t_ipv6header.c |  7 ++----
 net/ipv6/netfilter/ip6t_mh.c         |  8 ++----
 net/ipv6/netfilter/ip6t_rt.c         |  8 ++----
 net/netfilter/x_tables.c             | 32 +++++++++++------------
 net/netfilter/xt_connbytes.c         | 14 ++++-------
 net/netfilter/xt_connlimit.c         | 13 ++++------
 net/netfilter/xt_connmark.c          | 20 ++++++---------
 net/netfilter/xt_conntrack.c         |  9 +++----
 net/netfilter/xt_dccp.c              |  7 ++----
 net/netfilter/xt_dscp.c              | 11 +++-----
 net/netfilter/xt_esp.c               |  8 ++----
 net/netfilter/xt_hashlimit.c         | 24 +++++++-----------
 net/netfilter/xt_helper.c            | 11 +++-----
 net/netfilter/xt_limit.c             |  7 ++----
 net/netfilter/xt_mark.c              |  7 ++----
 net/netfilter/xt_multiport.c         | 37 +++++++++------------------
 net/netfilter/xt_owner.c             | 14 +++--------
 net/netfilter/xt_physdev.c           | 13 ++++------
 net/netfilter/xt_policy.c            | 15 +++++------
 net/netfilter/xt_quota.c             |  7 ++----
 net/netfilter/xt_rateest.c           |  8 ++----
 net/netfilter/xt_recent.c            |  7 ++----
 net/netfilter/xt_sctp.c              |  7 ++----
 net/netfilter/xt_state.c             |  9 +++----
 net/netfilter/xt_statistic.c         |  7 ++----
 net/netfilter/xt_string.c            |  9 +++----
 net/netfilter/xt_tcpudp.c            | 16 +++---------
 net/netfilter/xt_time.c              |  7 ++----
 48 files changed, 240 insertions(+), 386 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index bcd40ec8325..763a704ce83 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -193,6 +193,25 @@ struct xt_match_param {
 	bool *hotdrop;
 };
 
+/**
+ * struct xt_mtchk_param - parameters for match extensions'
+ * checkentry functions
+ *
+ * @table:	table the rule is tried to be inserted into
+ * @entryinfo:	the family-specific rule data
+ * 		(struct ipt_ip, ip6t_ip, ebt_entry)
+ * @match:	struct xt_match through which this function was invoked
+ * @matchinfo:	per-match data
+ * @hook_mask:	via which hooks the new rule is reachable
+ */
+struct xt_mtchk_param {
+	const char *table;
+	const void *entryinfo;
+	const struct xt_match *match;
+	void *matchinfo;
+	unsigned int hook_mask;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -208,12 +227,7 @@ struct xt_match
 		      const struct xt_match_param *);
 
 	/* Called when user tries to insert an entry of this type. */
-	/* Should return true or false. */
-	bool (*checkentry)(const char *tablename,
-			   const void *ip,
-			   const struct xt_match *match,
-			   void *matchinfo,
-			   unsigned int hook_mask);
+	bool (*checkentry)(const struct xt_mtchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_match *match, void *matchinfo);
@@ -342,10 +356,8 @@ extern void xt_unregister_match(struct xt_match *target);
 extern int xt_register_matches(struct xt_match *match, unsigned int n);
 extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
-extern int xt_check_match(const struct xt_match *match, unsigned short family,
-			  unsigned int size, const char *table, unsigned int hook,
-			  unsigned short proto, int inv_proto,
-			  const void *entry, void *matchinfo);
+extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family,
+			  unsigned int size, u_int8_t proto, bool inv_proto);
 extern int xt_check_target(const struct xt_target *target, unsigned short family,
 			   unsigned int size, const char *table, unsigned int hook,
 			   unsigned short proto, int inv_proto,
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index c9e1bc14951..bd91dc58d49 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -36,12 +36,9 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_802_3_mt_check(const char *table, const void *entry,
-		   const struct xt_match *match, void *data,
-		   unsigned int hook_mask)
+static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_802_3_info *info = data;
+	const struct ebt_802_3_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
 		return false;
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 0ad0db3e815..b595f091f35 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -171,14 +171,11 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_among_mt_check(const char *table, const void *entry,
-		   const struct xt_match *match, void *data,
-		   unsigned int hook_mask)
+static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
+	const struct ebt_among_info *info = par->matchinfo;
 	const struct ebt_entry_match *em =
-		container_of(data, const struct ebt_entry_match, data);
-	const struct ebt_among_info *info = data;
+		container_of(par->matchinfo, const struct ebt_entry_match, data);
 	int expected_length = sizeof(struct ebt_among_info);
 	const struct ebt_mac_wormhash *wh_dst, *wh_src;
 	int err;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 1ff8fa3a9e7..b7ad60419f9 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -100,13 +100,10 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_arp_mt_check(const char *table, const void *entry,
-		 const struct xt_match *match, void *data,
-		 unsigned int hook_mask)
+static bool ebt_arp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_arp_info *info = data;
-	const struct ebt_entry *e = entry;
+	const struct ebt_arp_info *info = par->matchinfo;
+	const struct ebt_entry *e = par->entryinfo;
 
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index c70ea39840b..d771bbfbcbe 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -77,13 +77,10 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_ip_mt_check(const char *table, const void *entry,
-		const struct xt_match *match, void *data,
-		unsigned int hook_mask)
+static bool ebt_ip_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_ip_info *info = data;
-	const struct ebt_entry *e = entry;
+	const struct ebt_ip_info *info = par->matchinfo;
+	const struct ebt_entry *e = par->entryinfo;
 
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 5acee02de72..784a6573876 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -90,13 +90,10 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_ip6_mt_check(const char *table, const void *entry,
-		 const struct xt_match *match, void *data,
-		 unsigned int hook_mask)
+static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_entry *e = entry;
-	struct ebt_ip6_info *info = data;
+	const struct ebt_entry *e = par->entryinfo;
+	struct ebt_ip6_info *info = par->matchinfo;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
 		return false;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 9a3ec8cadaa..f7bd9192ff0 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -64,12 +64,9 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static bool
-ebt_limit_mt_check(const char *table, const void *e,
-		   const struct xt_match *match, void *data,
-		   unsigned int hook_mask)
+static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct ebt_limit_info *info = data;
+	struct ebt_limit_info *info = par->matchinfo;
 
 	/* Check for overflow. */
 	if (info->burst == 0 ||
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 5b22ef96127..ea570f214b1 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -22,12 +22,9 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-ebt_mark_mt_check(const char *table, const void *e,
-		  const struct xt_match *match, void *data,
-		  unsigned int hook_mask)
+static bool ebt_mark_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_mark_m_info *info = data;
+	const struct ebt_mark_m_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_MARK_MASK)
 		return false;
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index b756f88fb10..883e96e2a54 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -20,12 +20,9 @@ ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
-static bool
-ebt_pkttype_mt_check(const char *table, const void *e,
-		     const struct xt_match *match, void *data,
-		     unsigned int hook_mask)
+static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_pkttype_info *info = data;
+	const struct ebt_pkttype_info *info = par->matchinfo;
 
 	if (info->invert != 0 && info->invert != 1)
 		return false;
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 06d777c62c3..48527e62162 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,15 +153,12 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_stp_mt_check(const char *table, const void *entry,
-		 const struct xt_match *match, void *data,
-		 unsigned int hook_mask)
+static bool ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ebt_stp_info *info = data;
+	const struct ebt_stp_info *info = par->matchinfo;
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
 	const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-	const struct ebt_entry *e = entry;
+	const struct ebt_entry *e = par->entryinfo;
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index b05b4a81834..3dddd489328 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -84,13 +84,10 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ebt_vlan_mt_check(const char *table, const void *entry,
-		  const struct xt_match *match, void *data,
-		  unsigned int hook_mask)
+static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 {
-	struct ebt_vlan_info *info = data;
-	const struct ebt_entry *e = entry;
+	struct ebt_vlan_info *info = par->matchinfo;
+	const struct ebt_entry *e = par->entryinfo;
 
 	/* Is it 802.1Q frame checked? */
 	if (e->ethproto != htons(ETH_P_8021Q)) {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f8e1822f38d..5ce37b2f5b8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -324,9 +324,10 @@ find_table_lock(const char *name, int *error, struct mutex *mutex)
 }
 
 static inline int
-ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
-   const char *name, unsigned int hookmask, unsigned int *cnt)
+ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
+		unsigned int *cnt)
 {
+	const struct ebt_entry *e = par->entryinfo;
 	struct xt_match *match;
 	size_t left = ((char *)e + e->watchers_offset) - (char *)m;
 	int ret;
@@ -343,9 +344,10 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
 		return -ENOENT;
 	m->u.match = match;
 
-	ret = xt_check_match(match, NFPROTO_BRIDGE, m->match_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
-	      e, m->data);
+	par->match     = match;
+	par->matchinfo = m->data;
+	ret = xt_check_match(par, NFPROTO_BRIDGE, m->match_size,
+	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(match->me);
 		return ret;
@@ -607,6 +609,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	unsigned int i, j, hook = 0, hookmask = 0;
 	size_t gap;
 	int ret;
+	struct xt_mtchk_param par;
 
 	/* don't mess with the struct ebt_entries */
 	if (e->bitmask == 0)
@@ -647,7 +650,11 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 			hookmask = cl_s[i - 1].hookmask;
 	}
 	i = 0;
-	ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i);
+
+	par.table     = name;
+	par.entryinfo = e;
+	par.hook_mask = hookmask;
+	ret = EBT_MATCH_ITERATE(e, ebt_check_match, &par, &i);
 	if (ret != 0)
 		goto cleanup_matches;
 	j = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99fdb59454f..4147298a6a8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -607,20 +607,20 @@ check_entry(struct ipt_entry *e, const char *name)
 }
 
 static int
-check_match(struct ipt_entry_match *m, const char *name,
-			      const struct ipt_ip *ip,
-			      unsigned int hookmask, unsigned int *i)
+check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
+	    unsigned int *i)
 {
-	struct xt_match *match;
+	const struct ipt_ip *ip = par->entryinfo;
 	int ret;
 
-	match = m->u.kernel.match;
-	ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
-			     name, hookmask, ip->proto,
-			     ip->invflags & IPT_INV_PROTO, ip, m->data);
+	par->match     = m->u.kernel.match;
+	par->matchinfo = m->data;
+
+	ret = xt_check_match(par, NFPROTO_IPV4, m->u.match_size - sizeof(*m),
+	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
+			 par.match->name);
 		return ret;
 	}
 	++*i;
@@ -628,10 +628,7 @@ check_match(struct ipt_entry_match *m, const char *name,
 }
 
 static int
-find_check_match(struct ipt_entry_match *m,
-		 const char *name,
-		 const struct ipt_ip *ip,
-		 unsigned int hookmask,
+find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
 		 unsigned int *i)
 {
 	struct xt_match *match;
@@ -646,7 +643,7 @@ find_check_match(struct ipt_entry_match *m,
 	}
 	m->u.kernel.match = match;
 
-	ret = check_match(m, name, ip, hookmask, i);
+	ret = check_match(m, par, i);
 	if (ret)
 		goto err;
 
@@ -683,14 +680,17 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	struct xt_target *target;
 	int ret;
 	unsigned int j;
+	struct xt_mtchk_param mtpar;
 
 	ret = check_entry(e, name);
 	if (ret)
 		return ret;
 
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
-				e->comefrom, &j);
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ip;
+	mtpar.hook_mask = e->comefrom;
+	ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -1644,12 +1644,15 @@ static int
 compat_check_entry(struct ipt_entry *e, const char *name,
 				     unsigned int *i)
 {
+	struct xt_mtchk_param mtpar;
 	unsigned int j;
 	int ret;
 
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
-				e->comefrom, &j);
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ip;
+	mtpar.hook_mask = e->comefrom;
+	ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
 	if (ret)
 		goto cleanup_matches;
 
@@ -2144,15 +2147,9 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
 				    !!(icmpinfo->invflags&IPT_ICMP_INV));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-icmp_checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+static bool icmp_checkentry(const struct xt_mtchk_param *par)
 {
-	const struct ipt_icmp *icmpinfo = matchinfo;
+	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(icmpinfo->invflags & ~IPT_ICMP_INV);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e60995e4c20..88762f02779 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -68,12 +68,9 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool
-addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
-			  const struct xt_match *match, void *matchinfo,
-			  unsigned int hook_mask)
+static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
-	struct ipt_addrtype_info_v1 *info = matchinfo;
+	struct ipt_addrtype_info_v1 *info = par->matchinfo;
 
 	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
@@ -82,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
 		return false;
 	}
 
-	if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) &&
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
 		printk(KERN_ERR "ipt_addrtype: output interface limitation "
 				"not valid in PRE_ROUTING and INPUT\n");
 		return false;
 	}
 
-	if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) &&
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
 		printk(KERN_ERR "ipt_addrtype: input interface limitation "
 				"not valid in POST_ROUTING and OUTPUT\n");
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2fce19ef4f3..0104c0b399d 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -61,13 +61,9 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(ahinfo->invflags & IPT_AH_INV_SPI));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-ah_mt_check(const char *tablename, const void *ip_void,
-            const struct xt_match *match, void *matchinfo,
-            unsigned int hook_mask)
+static bool ah_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ah *ahinfo = matchinfo;
+	const struct ipt_ah *ahinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 06915463150..6289b64144c 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -85,13 +85,10 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-ecn_mt_check(const char *tablename, const void *ip_void,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool ecn_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ecn_info *info = matchinfo;
-	const struct ipt_ip *ip = ip_void;
+	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
 
 	if (info->operation & IPT_ECN_OP_MATCH_MASK)
 		return false;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cf2c5370a4e..9c843e3777b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -629,20 +629,20 @@ check_entry(struct ip6t_entry *e, const char *name)
 	return 0;
 }
 
-static int check_match(struct ip6t_entry_match *m, const char *name,
-			      const struct ip6t_ip6 *ipv6,
-			      unsigned int hookmask, unsigned int *i)
+static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
+		       unsigned int *i)
 {
-	struct xt_match *match;
+	const struct ip6t_ip6 *ipv6 = par->entryinfo;
 	int ret;
 
-	match = m->u.kernel.match;
-	ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
-			     name, hookmask, ipv6->proto,
-			     ipv6->invflags & IP6T_INV_PROTO, ipv6, m->data);
+	par->match     = m->u.kernel.match;
+	par->matchinfo = m->data;
+
+	ret = xt_check_match(par, NFPROTO_IPV6, m->u.match_size - sizeof(*m),
+			     ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
-			 m->u.kernel.match->name);
+			 par.match->name);
 		return ret;
 	}
 	++*i;
@@ -650,10 +650,7 @@ static int check_match(struct ip6t_entry_match *m, const char *name,
 }
 
 static int
-find_check_match(struct ip6t_entry_match *m,
-		 const char *name,
-		 const struct ip6t_ip6 *ipv6,
-		 unsigned int hookmask,
+find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
 		 unsigned int *i)
 {
 	struct xt_match *match;
@@ -668,7 +665,7 @@ find_check_match(struct ip6t_entry_match *m,
 	}
 	m->u.kernel.match = match;
 
-	ret = check_match(m, name, ipv6, hookmask, i);
+	ret = check_match(m, par, i);
 	if (ret)
 		goto err;
 
@@ -705,14 +702,17 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	struct xt_target *target;
 	int ret;
 	unsigned int j;
+	struct xt_mtchk_param mtpar;
 
 	ret = check_entry(e, name);
 	if (ret)
 		return ret;
 
 	j = 0;
-	ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6,
-				 e->comefrom, &j);
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ipv6;
+	mtpar.hook_mask = e->comefrom;
+	ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -1669,10 +1669,13 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name,
 {
 	unsigned int j;
 	int ret;
+	struct xt_mtchk_param mtpar;
 
 	j = 0;
-	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6,
-				 e->comefrom, &j);
+	mtpar.table     = name;
+	mtpar.entryinfo = &e->ipv6;
+	mtpar.hook_mask = e->comefrom;
+	ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j);
 	if (ret)
 		goto cleanup_matches;
 
@@ -2166,14 +2169,9 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 /* Called when user tries to insert an entry of this type. */
-static bool
-icmp6_checkentry(const char *tablename,
-	   const void *entry,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
+static bool icmp6_checkentry(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_icmp *icmpinfo = matchinfo;
+	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(icmpinfo->invflags & ~IP6T_ICMP_INV);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index a04f2b8396e..3a82f24746b 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -90,13 +90,9 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	       !(ahinfo->hdrres && ah->reserved);
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-ah_mt6_check(const char *tablename, const void *entry,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool ah_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ah *ahinfo = matchinfo;
+	const struct ip6t_ah *ahinfo = par->matchinfo;
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
 		pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 6951d0dacf4..673aa0a5084 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -107,13 +107,9 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		 && (ntohs(fh->frag_off) & IP6_MF));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-frag_mt6_check(const char *tablename, const void *ip,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool frag_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_frag *fraginfo = matchinfo;
+	const struct ip6t_frag *fraginfo = par->matchinfo;
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
 		pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d3351978819..cbe8dec9744 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -160,13 +160,9 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-hbh_mt6_check(const char *tablename, const void *entry,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_opts *optsinfo = matchinfo;
+	const struct ip6t_opts *optsinfo = par->matchinfo;
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
 		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 6aaca511d47..14e6724d567 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -118,12 +118,9 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 }
 
-static bool
-ipv6header_mt6_check(const char *tablename, const void *ip,
-                     const struct xt_match *match, void *matchinfo,
-                     unsigned int hook_mask)
+static bool ipv6header_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ipv6header_info *info = matchinfo;
+	const struct ip6t_ipv6header_info *info = par->matchinfo;
 
 	/* invflags is 0 or 0xff in hard mode */
 	if ((!info->modeflag) && info->invflags != 0x00 &&
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 2803258b6d0..aafe4e66577 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -67,13 +67,9 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-mh_mt6_check(const char *tablename, const void *entry,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool mh_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_mh *mhinfo = matchinfo;
+	const struct ip6t_mh *mhinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 9cf4b8a37af..356b8d6f6ba 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -186,13 +186,9 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-rt_mt6_check(const char *tablename, const void *entry,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool rt_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_rt *rtinfo = matchinfo;
+	const struct ip6t_rt *rtinfo = par->matchinfo;
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
 		pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d1f2fb3e8f2..817ab14f7cd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -321,39 +321,39 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-int xt_check_match(const struct xt_match *match, unsigned short family,
-		   unsigned int size, const char *table, unsigned int hook_mask,
-		   unsigned short proto, int inv_proto, const void *entry,
-		   void *matchinfo)
+int xt_check_match(struct xt_mtchk_param *par, u_int8_t family,
+		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
-	if (XT_ALIGN(match->matchsize) != size &&
-	    match->matchsize != -1) {
+	if (XT_ALIGN(par->match->matchsize) != size &&
+	    par->match->matchsize != -1) {
 		/*
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
 		printk("%s_tables: %s match: invalid size %Zu != %u\n",
-		       xt_prefix[family], match->name,
-		       XT_ALIGN(match->matchsize), size);
+		       xt_prefix[family], par->match->name,
+		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
-	if (match->table && strcmp(match->table, table)) {
+	if (par->match->table != NULL &&
+	    strcmp(par->match->table, par->table) != 0) {
 		printk("%s_tables: %s match: only valid in %s table, not %s\n",
-		       xt_prefix[family], match->name, match->table, table);
+		       xt_prefix[family], par->match->name,
+		       par->match->table, par->table);
 		return -EINVAL;
 	}
-	if (match->hooks && (hook_mask & ~match->hooks) != 0) {
+	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
 		printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
-		       xt_prefix[family], match->name, hook_mask, match->hooks);
+		       xt_prefix[family], par->match->name,
+		       par->hook_mask, par->match->hooks);
 		return -EINVAL;
 	}
-	if (match->proto && (match->proto != proto || inv_proto)) {
+	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
 		printk("%s_tables: %s match: only valid for protocol %u\n",
-		       xt_prefix[family], match->name, match->proto);
+		       xt_prefix[family], par->match->name, par->match->proto);
 		return -EINVAL;
 	}
-	if (match->checkentry != NULL &&
-	    !match->checkentry(table, entry, match, matchinfo, hook_mask))
+	if (par->match->checkentry != NULL && !par->match->checkentry(par))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 30c19b5fe90..43a36c728e5 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -92,12 +92,9 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		return what >= sinfo->count.from;
 }
 
-static bool
-connbytes_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_connbytes_info *sinfo = matchinfo;
+	const struct xt_connbytes_info *sinfo = par->matchinfo;
 
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
@@ -109,17 +106,16 @@ connbytes_mt_check(const char *tablename, const void *ip,
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return false;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 
 	return true;
 }
 
-static void
-connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
 {
 	nf_ct_l3proto_module_put(match->family);
 }
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 8b8f70e7664..1361e9919cf 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -221,24 +221,21 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool
-connlimit_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_connlimit_info *info = matchinfo;
+	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "address family %u\n", match->family);
+		       "address family %u\n", par->match->family);
 		return false;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
-		nf_ct_l3proto_module_put(match->family);
+		nf_ct_l3proto_module_put(par->match->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index df4f4a865a5..b935b7888a9 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -61,33 +61,27 @@ connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-connmark_mt_check_v0(const char *tablename, const void *ip,
-                     const struct xt_match *match, void *matchinfo,
-                     unsigned int hook_mask)
+static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct xt_connmark_info *cm = matchinfo;
+	const struct xt_connmark_info *cm = par->matchinfo;
 
 	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
 		printk(KERN_WARNING "connmark: only support 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 	return true;
 }
 
-static bool
-connmark_mt_check(const char *tablename, const void *ip,
-                  const struct xt_match *match, void *matchinfo,
-                  unsigned int hook_mask)
+static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", match->family);
+		       "proto=%u\n", par->match->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 13a7e4eacdf..f04c46a02ce 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -278,14 +278,11 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-conntrack_mt_check(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 7aa30bb9105..e5d3e867328 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -121,12 +121,9 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static bool
-dccp_mt_check(const char *tablename, const void *inf,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool dccp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_dccp_info *info = matchinfo;
+	const struct xt_dccp_info *info = par->matchinfo;
 
 	return !(info->flags & ~XT_DCCP_VALID_FLAGS)
 		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 57d61206135..c3f8085460d 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -43,15 +43,12 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool
-dscp_mt_check(const char *tablename, const void *info,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool dscp_mt_check(const struct xt_mtchk_param *par)
 {
-	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
+	const struct xt_dscp_info *info = par->matchinfo;
 
-	if (dscp > XT_DSCP_MAX) {
-		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
+	if (info->dscp > XT_DSCP_MAX) {
+		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 6d59f2e7c1c..609439967c2 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -66,13 +66,9 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(espinfo->invflags & XT_ESP_INV_SPI));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-esp_mt_check(const char *tablename, const void *ip_void,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool esp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_esp *espinfo = matchinfo;
+	const struct xt_esp *espinfo = par->matchinfo;
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
 		duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 22a60a728cf..2f73820e46d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -664,12 +664,9 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool
-hashlimit_mt_check_v0(const char *tablename, const void *inf,
-                      const struct xt_match *match, void *matchinfo,
-                      unsigned int hook_mask)
+static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	struct xt_hashlimit_info *r = matchinfo;
+	struct xt_hashlimit_info *r = par->matchinfo;
 
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
@@ -698,8 +695,8 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf,
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	r->hinfo = htable_find_get(r->name, match->family);
-	if (!r->hinfo && htable_create_v0(r, match->family) != 0) {
+	r->hinfo = htable_find_get(r->name, par->match->family);
+	if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -710,12 +707,9 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf,
 	return true;
 }
 
-static bool
-hashlimit_mt_check(const char *tablename, const void *inf,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	/* Check for overflow. */
 	if (info->cfg.burst == 0 ||
@@ -729,7 +723,7 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 		return false;
 	if (info->name[sizeof(info->name)-1] != '\0')
 		return false;
-	if (match->family == NFPROTO_IPV4) {
+	if (par->match->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 			return false;
 	} else {
@@ -744,8 +738,8 @@ hashlimit_mt_check(const char *tablename, const void *inf,
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	info->hinfo = htable_find_get(info->name, match->family);
-	if (!info->hinfo && htable_create(info, match->family) != 0) {
+	info->hinfo = htable_find_get(info->name, par->match->family);
+	if (!info->hinfo && htable_create(info, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 73bdc3ba13f..86d3c332fcb 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -54,16 +54,13 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool
-helper_mt_check(const char *tablename, const void *inf,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool helper_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_helper_info *info = matchinfo;
+	struct xt_helper_info *info = par->matchinfo;
 
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 	info->name[29] = '\0';
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index c475eac5dbe..c908d69a559 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -92,12 +92,9 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static bool
-limit_mt_check(const char *tablename, const void *inf,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool limit_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_rateinfo *r = matchinfo;
+	struct xt_rateinfo *r = par->matchinfo;
 
 	/* Check for overflow. */
 	if (r->burst == 0
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 88547614653..10b9e34bbc5 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -38,12 +38,9 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-mark_mt_check_v0(const char *tablename, const void *entry,
-                 const struct xt_match *match, void *matchinfo,
-                 unsigned int hook_mask)
+static bool mark_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct xt_mark_info *minfo = matchinfo;
+	const struct xt_mark_info *minfo = par->matchinfo;
 
 	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "mark: only supports 32bit mark\n");
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 7087e291528..d06bb2dd390 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -158,50 +158,37 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-multiport_mt_check_v0(const char *tablename, const void *info,
-                      const struct xt_match *match, void *matchinfo,
-                      unsigned int hook_mask)
+static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ip *ip = info;
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt_check(const char *tablename, const void *info,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool multiport_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ip *ip = info;
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt6_check_v0(const char *tablename, const void *info,
-                       const struct xt_match *match, void *matchinfo,
-                       unsigned int hook_mask)
+static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ip6 *ip = info;
-	const struct xt_multiport *multiinfo = matchinfo;
+	const struct ip6t_ip6 *ip = par->entryinfo;
+	const struct xt_multiport *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
 }
 
-static bool
-multiport_mt6_check(const char *tablename, const void *info,
-                    const struct xt_match *match, void *matchinfo,
-                    unsigned int hook_mask)
+static bool multiport_mt6_check(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_ip6 *ip = info;
-	const struct xt_multiport_v1 *multiinfo = matchinfo;
+	const struct ip6t_ip6 *ip = par->entryinfo;
+	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
 		     multiinfo->count);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 493b5eb8d14..32f84e84d9e 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -107,12 +107,9 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-owner_mt_check_v0(const char *tablename, const void *ip,
-                  const struct xt_match *match, void *matchinfo,
-                  unsigned int hook_mask)
+static bool owner_mt_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ipt_owner_info *info = matchinfo;
+	const struct ipt_owner_info *info = par->matchinfo;
 
 	if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
 		printk(KERN_WARNING KBUILD_MODNAME
@@ -124,12 +121,9 @@ owner_mt_check_v0(const char *tablename, const void *ip,
 	return true;
 }
 
-static bool
-owner_mt6_check_v0(const char *tablename, const void *ip,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool owner_mt6_check_v0(const struct xt_mtchk_param *par)
 {
-	const struct ip6t_owner_info *info = matchinfo;
+	const struct ip6t_owner_info *info = par->matchinfo;
 
 	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
 		printk(KERN_WARNING KBUILD_MODNAME
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e980e179d4f..b01786d2dd9 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -91,12 +91,9 @@ match_outdev:
 	return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT);
 }
 
-static bool
-physdev_mt_check(const char *tablename, const void *ip,
-                 const struct xt_match *match, void *matchinfo,
-                 unsigned int hook_mask)
+static bool physdev_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_physdev_info *info = matchinfo;
+	const struct xt_physdev_info *info = par->matchinfo;
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -104,12 +101,12 @@ physdev_mt_check(const char *tablename, const void *ip,
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
-	    hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-			 (1 << NF_INET_POST_ROUTING))) {
+	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
+	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
 		printk(KERN_WARNING "physdev match: using --physdev-out in the "
 		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
 		       "traffic is not supported anymore.\n");
-		if (hook_mask & (1 << NF_INET_LOCAL_OUT))
+		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index b0a00fb0511..328bd20ddd2 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -128,26 +128,23 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool
-policy_mt_check(const char *tablename, const void *ip_void,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool policy_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_policy_info *info = matchinfo;
+	const struct xt_policy_info *info = par->matchinfo;
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
 		printk(KERN_ERR "xt_policy: neither incoming nor "
 				"outgoing policy selected\n");
 		return false;
 	}
-	if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN)
-	    && info->flags & XT_POLICY_MATCH_OUT) {
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
 		printk(KERN_ERR "xt_policy: output policy not valid in "
 				"PRE_ROUTING and INPUT\n");
 		return false;
 	}
-	if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT)
-	    && info->flags & XT_POLICY_MATCH_IN) {
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
 		printk(KERN_ERR "xt_policy: input policy not valid in "
 				"POST_ROUTING and OUTPUT\n");
 		return false;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 3ab92666c14..c84fce5e0f3 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -37,12 +37,9 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool
-quota_mt_check(const char *tablename, const void *entry,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool quota_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_quota_info *q = matchinfo;
+	struct xt_quota_info *q = par->matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
 		return false;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index e9f64ef4565..4b05ce168a7 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -74,13 +74,9 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool xt_rateest_mt_checkentry(const char *tablename,
-				     const void *ip,
-				     const struct xt_match *match,
-				     void *matchinfo,
-				     unsigned int hook_mask)
+static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
-	struct xt_rateest_match_info *info = matchinfo;
+	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
 
 	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index baeb90a5623..a512b49f3fe 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -280,12 +280,9 @@ out:
 	return ret;
 }
 
-static bool
-recent_mt_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool recent_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_recent_mtinfo *info = matchinfo;
+	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 	unsigned i;
 	bool ret = false;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index b0014ab65da..e223cb43ae8 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -147,12 +147,9 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static bool
-sctp_mt_check(const char *tablename, const void *inf,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool sctp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_sctp_info *info = matchinfo;
+	const struct xt_sctp_info *info = par->matchinfo;
 
 	return !(info->flags & ~XT_SCTP_VALID_FLAGS)
 		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 29f5a8a1b02..88b1235519d 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -37,14 +37,11 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (sinfo->statemask & statebit);
 }
 
-static bool
-state_mt_check(const char *tablename, const void *inf,
-               const struct xt_match *match, void *matchinfo,
-               unsigned int hook_mask)
+static bool state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", match->family);
+				    "proto=%u\n", par->match->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index dcadc491db2..0d75141139d 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -49,12 +49,9 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool
-statistic_mt_check(const char *tablename, const void *entry,
-                   const struct xt_match *match, void *matchinfo,
-                   unsigned int hook_mask)
+static bool statistic_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_statistic_info *info = matchinfo;
+	struct xt_statistic_info *info = par->matchinfo;
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 33f2d29ca4f..c9407aa78f7 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -40,12 +40,9 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
-static bool
-string_mt_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
+static bool string_mt_check(const struct xt_mtchk_param *par)
 {
-	struct xt_string_info *conf = matchinfo;
+	struct xt_string_info *conf = par->matchinfo;
 	struct ts_config *ts_conf;
 	int flags = TS_AUTOLOAD;
 
@@ -56,7 +53,7 @@ string_mt_check(const char *tablename, const void *ip,
 		return false;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
 		return false;
-	if (match->revision == 1) {
+	if (par->match->revision == 1) {
 		if (conf->u.v1.flags &
 		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
 			return false;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 66cf71b1d59..1ebdc4934ee 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -126,13 +126,9 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-tcp_mt_check(const char *tablename, const void *info,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool tcp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_tcp *tcpinfo = matchinfo;
+	const struct xt_tcp *tcpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
@@ -165,13 +161,9 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
 }
 
-/* Called when user tries to insert an entry of this type. */
-static bool
-udp_mt_check(const char *tablename, const void *info,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
+static bool udp_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_udp *udpinfo = matchinfo;
+	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 28599d3979c..29375ba8db7 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -218,12 +218,9 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool
-time_mt_check(const char *tablename, const void *ip,
-              const struct xt_match *match, void *matchinfo,
-              unsigned int hook_mask)
+static bool time_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct xt_time_info *info = matchinfo;
+	const struct xt_time_info *info = par->matchinfo;
 
 	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
-- 
cgit v1.2.3-70-g09d2


From 6be3d8598e883fb632edf059ba2f8d1b9f4da138 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (3/6)

This patch does this for match extensions' destroy functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  8 +++++++-
 net/bridge/netfilter/ebtables.c    | 20 ++++++++++++--------
 net/ipv4/netfilter/ip_tables.c     | 10 +++++++---
 net/ipv6/netfilter/ip6_tables.c    | 10 +++++++---
 net/netfilter/xt_connbytes.c       |  4 ++--
 net/netfilter/xt_connlimit.c       |  7 +++----
 net/netfilter/xt_connmark.c        |  5 ++---
 net/netfilter/xt_conntrack.c       |  5 ++---
 net/netfilter/xt_hashlimit.c       |  9 ++++-----
 net/netfilter/xt_helper.c          |  4 ++--
 net/netfilter/xt_rateest.c         |  5 ++---
 net/netfilter/xt_recent.c          |  4 ++--
 net/netfilter/xt_state.c           |  4 ++--
 net/netfilter/xt_string.c          |  4 ++--
 14 files changed, 56 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 763a704ce83..c79c8838014 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -212,6 +212,12 @@ struct xt_mtchk_param {
 	unsigned int hook_mask;
 };
 
+/* Match destructor parameters */
+struct xt_mtdtor_param {
+	const struct xt_match *match;
+	void *matchinfo;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -230,7 +236,7 @@ struct xt_match
 	bool (*checkentry)(const struct xt_mtchk_param *);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_match *match, void *matchinfo);
+	void (*destroy)(const struct xt_mtdtor_param *);
 
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, void *src);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5ce37b2f5b8..0320b520362 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -558,12 +558,16 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
 static inline int
 ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
 {
+	struct xt_mtdtor_param par;
+
 	if (i && (*i)-- == 0)
 		return 1;
-	if (m->u.match->destroy)
-		m->u.match->destroy(m->u.match, m->data);
-	module_put(m->u.match->me);
 
+	par.match     = m->u.match;
+	par.matchinfo = m->data;
+	if (par.match->destroy != NULL)
+		par.match->destroy(&par);
+	module_put(par.match->me);
 	return 0;
 }
 
@@ -609,7 +613,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	unsigned int i, j, hook = 0, hookmask = 0;
 	size_t gap;
 	int ret;
-	struct xt_mtchk_param par;
+	struct xt_mtchk_param mtpar;
 
 	/* don't mess with the struct ebt_entries */
 	if (e->bitmask == 0)
@@ -651,10 +655,10 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 	i = 0;
 
-	par.table     = name;
-	par.entryinfo = e;
-	par.hook_mask = hookmask;
-	ret = EBT_MATCH_ITERATE(e, ebt_check_match, &par, &i);
+	mtpar.table     = name;
+	mtpar.entryinfo = e;
+	mtpar.hook_mask = hookmask;
+	ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i);
 	if (ret != 0)
 		goto cleanup_matches;
 	j = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4147298a6a8..12ad4d5c55d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -576,12 +576,16 @@ mark_source_chains(struct xt_table_info *newinfo,
 static int
 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 {
+	struct xt_mtdtor_param par;
+
 	if (i && (*i)-- == 0)
 		return 1;
 
-	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
-	module_put(m->u.kernel.match->me);
+	par.match     = m->u.kernel.match;
+	par.matchinfo = m->data;
+	if (par.match->destroy != NULL)
+		par.match->destroy(&par);
+	module_put(par.match->me);
 	return 0;
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9c843e3777b..891358e89a2 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -599,12 +599,16 @@ mark_source_chains(struct xt_table_info *newinfo,
 static int
 cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 {
+	struct xt_mtdtor_param par;
+
 	if (i && (*i)-- == 0)
 		return 1;
 
-	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
-	module_put(m->u.kernel.match->me);
+	par.match     = m->u.kernel.match;
+	par.matchinfo = m->data;
+	if (par.match->destroy != NULL)
+		par.match->destroy(&par);
+	module_put(par.match->me);
 	return 0;
 }
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 43a36c728e5..5bf4aa08b0f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -115,9 +115,9 @@ static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 static struct xt_match connbytes_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 1361e9919cf..bfb3ee6c712 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -246,16 +246,15 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void
-connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_connlimit_info *info = matchinfo;
+	const struct xt_connlimit_info *info = par->matchinfo;
 	struct xt_connlimit_conn *conn;
 	struct xt_connlimit_conn *tmp;
 	struct list_head *hash = info->data->iphash;
 	unsigned int i;
 
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
 		list_for_each_entry_safe(conn, tmp, &hash[i], list) {
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b935b7888a9..c708577ea1b 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -87,10 +87,9 @@ static bool connmark_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void
-connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index f04c46a02ce..5cd58d7fcb1 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -288,10 +288,9 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void
-conntrack_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2f73820e46d..6fc4292d46e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -748,17 +748,16 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 }
 
 static void
-hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo)
+hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_info *r = matchinfo;
+	const struct xt_hashlimit_info *r = par->matchinfo;
 
 	htable_put(r->hinfo);
 }
 
-static void
-hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo1 *info = matchinfo;
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 86d3c332fcb..280c984349f 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -67,9 +67,9 @@ static bool helper_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void helper_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 static struct xt_match helper_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4b05ce168a7..220a1d588ee 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -117,10 +117,9 @@ err1:
 	return false;
 }
 
-static void xt_rateest_mt_destroy(const struct xt_match *match,
-				  void *matchinfo)
+static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	struct xt_rateest_match_info *info = matchinfo;
+	struct xt_rateest_match_info *info = par->matchinfo;
 
 	xt_rateest_put(info->est1);
 	if (info->est2)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index a512b49f3fe..4ebd4ca9a99 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -349,9 +349,9 @@ out:
 	return ret;
 }
 
-static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_recent_mtinfo *info = matchinfo;
+	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 
 	mutex_lock(&recent_mutex);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 88b1235519d..4c946cbd731 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -47,9 +47,9 @@ static bool state_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void state_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(match->family);
+	nf_ct_l3proto_module_put(par->match->family);
 }
 
 static struct xt_match state_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index c9407aa78f7..b4d77411131 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -70,9 +70,9 @@ static bool string_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
+static void string_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
+	textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
 }
 
 static struct xt_match xt_string_mt_reg[] __read_mostly = {
-- 
cgit v1.2.3-70-g09d2


From 7eb3558655aaa87a3e71a0c065dfaddda521fa6d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (4/6)

This patch does this for target extensions' target functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h  | 22 +++++++++++++++++-----
 net/bridge/netfilter/ebt_arpreply.c |  8 +++-----
 net/bridge/netfilter/ebt_dnat.c     |  6 ++----
 net/bridge/netfilter/ebt_log.c      | 14 ++++++--------
 net/bridge/netfilter/ebt_mark.c     |  6 ++----
 net/bridge/netfilter/ebt_nflog.c    |  9 ++++-----
 net/bridge/netfilter/ebt_redirect.c | 12 +++++-------
 net/bridge/netfilter/ebt_snat.c     |  6 ++----
 net/bridge/netfilter/ebt_ulog.c     |  9 +++------
 net/bridge/netfilter/ebtables.c     | 27 ++++++++++++++++-----------
 net/ipv4/netfilter/arp_tables.c     | 23 ++++++++++++-----------
 net/ipv4/netfilter/arpt_mangle.c    |  7 ++-----
 net/ipv4/netfilter/ip_tables.c      | 24 ++++++++++--------------
 net/ipv4/netfilter/ipt_CLUSTERIP.c  |  6 ++----
 net/ipv4/netfilter/ipt_ECN.c        |  6 ++----
 net/ipv4/netfilter/ipt_LOG.c        |  8 +++-----
 net/ipv4/netfilter/ipt_MASQUERADE.c | 14 ++++++--------
 net/ipv4/netfilter/ipt_NETMAP.c     | 17 ++++++++---------
 net/ipv4/netfilter/ipt_REDIRECT.c   | 12 +++++-------
 net/ipv4/netfilter/ipt_REJECT.c     |  8 +++-----
 net/ipv4/netfilter/ipt_TTL.c        |  6 ++----
 net/ipv4/netfilter/ipt_ULOG.c       | 10 +++-------
 net/ipv4/netfilter/nf_nat_rule.c    | 32 ++++++++++++--------------------
 net/ipv6/netfilter/ip6_tables.c     | 24 +++++++++++-------------
 net/ipv6/netfilter/ip6t_HL.c        |  6 ++----
 net/ipv6/netfilter/ip6t_LOG.c       |  8 +++-----
 net/ipv6/netfilter/ip6t_REJECT.c    | 18 ++++++++----------
 net/netfilter/xt_CLASSIFY.c         |  6 ++----
 net/netfilter/xt_CONNMARK.c         | 12 ++++--------
 net/netfilter/xt_CONNSECMARK.c      |  6 ++----
 net/netfilter/xt_DSCP.c             | 30 ++++++++++--------------------
 net/netfilter/xt_MARK.c             | 18 ++++++------------
 net/netfilter/xt_NFLOG.c            | 10 ++++------
 net/netfilter/xt_NFQUEUE.c          |  6 ++----
 net/netfilter/xt_NOTRACK.c          |  4 +---
 net/netfilter/xt_RATEEST.c          |  9 ++-------
 net/netfilter/xt_SECMARK.c          |  6 ++----
 net/netfilter/xt_TCPMSS.c           | 12 ++++--------
 net/netfilter/xt_TCPOPTSTRIP.c      | 12 ++++--------
 net/netfilter/xt_TPROXY.c           | 11 +++--------
 net/netfilter/xt_TRACE.c            |  4 +---
 net/sched/act_ipt.c                 | 12 ++++++++----
 42 files changed, 209 insertions(+), 297 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c79c8838014..46d0cb1ad34 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -218,6 +218,22 @@ struct xt_mtdtor_param {
 	void *matchinfo;
 };
 
+/**
+ * struct xt_target_param - parameters for target extensions' target functions
+ *
+ * @hooknum:	hook through which this target was invoked
+ * @target:	struct xt_target through which this function was invoked
+ * @targinfo:	per-target data
+ *
+ * Other fields see above.
+ */
+struct xt_target_param {
+	const struct net_device *in, *out;
+	unsigned int hooknum;
+	const struct xt_target *target;
+	const void *targinfo;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -269,11 +285,7 @@ struct xt_target
 	   must now handle non-linear skbs, using skb_copy_bits and
 	   skb_ip_make_writable. */
 	unsigned int (*target)(struct sk_buff *skb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       unsigned int hooknum,
-			       const struct xt_target *target,
-			       const void *targinfo);
+			       const struct xt_target_param *);
 
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index baf5510d044..fc94699f719 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -16,11 +16,9 @@
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
 static unsigned int
-ebt_arpreply_tg(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hook_nr,
-		const struct xt_target *target, const void *data)
+ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_arpreply_info *info = data;
+	const struct ebt_arpreply_info *info = par->targinfo;
 	const __be32 *siptr, *diptr;
 	__be32 _sip, _dip;
 	const struct arphdr *ap;
@@ -53,7 +51,7 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct net_device *in,
 	if (diptr == NULL)
 		return EBT_DROP;
 
-	arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in,
+	arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in,
 		 *diptr, shp, info->mac, shp);
 
 	return info->target;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index cb80101e412..bb5d79e0bee 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -15,11 +15,9 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_dnat_tg(struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, unsigned int hook_nr,
-	    const struct xt_target *target, const void *data)
+ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_nat_info *info = data;
+	const struct ebt_nat_info *info = par->targinfo;
 
 	if (!skb_make_writable(skb, 0))
 		return EBT_DROP;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index b40f9ed4c34..87de5fccb2f 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -195,11 +195,9 @@ out:
 }
 
 static unsigned int
-ebt_log_tg(struct sk_buff *skb, const struct net_device *in,
-	   const struct net_device *out, unsigned int hooknr,
-	   const struct xt_target *target, const void *data)
+ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_log_info *info = data;
+	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type = NF_LOG_TYPE_LOG;
@@ -207,11 +205,11 @@ ebt_log_tg(struct sk_buff *skb, const struct net_device *in,
 	li.u.log.logflags = info->bitmask;
 
 	if (info->bitmask & EBT_LOG_NFLOG)
-		nf_log_packet(NFPROTO_BRIDGE, hooknr, skb, in, out, &li,
-			      "%s", info->prefix);
+		nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
+		              par->out, &li, "%s", info->prefix);
 	else
-		ebt_log_packet(NFPROTO_BRIDGE, hooknr, skb, in, out, &li,
-			       info->prefix);
+		ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
+		               par->out, &li, info->prefix);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index dff19fc91cf..aafc456c3c3 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -19,11 +19,9 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
 static unsigned int
-ebt_mark_tg(struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, unsigned int hook_nr,
-	    const struct xt_target *target, const void *data)
+ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_mark_t_info *info = data;
+	const struct ebt_mark_t_info *info = par->targinfo;
 	int action = info->target & -16;
 
 	if (action == MARK_SET_VALUE)
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 74b4fa0aabc..6a28d994cf7 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -20,11 +20,9 @@
 #include <net/netfilter/nf_log.h>
 
 static unsigned int
-ebt_nflog_tg(struct sk_buff *skb, const struct net_device *in,
-	     const struct net_device *out, unsigned int hooknr,
-	     const struct xt_target *target, const void *data)
+ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_nflog_info *info = data;
+	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type = NF_LOG_TYPE_ULOG;
@@ -32,7 +30,8 @@ ebt_nflog_tg(struct sk_buff *skb, const struct net_device *in,
 	li.u.ulog.group = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix);
+	nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out,
+	              &li, "%s", info->prefix);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a50ffbe0e4f..0cfe2fad940 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -16,20 +16,18 @@
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
 static unsigned int
-ebt_redirect_tg(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hooknr,
-		const struct xt_target *target, const void *data)
+ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_redirect_info *info = data;
+	const struct ebt_redirect_info *info = par->targinfo;
 
 	if (!skb_make_writable(skb, 0))
 		return EBT_DROP;
 
-	if (hooknr != NF_BR_BROUTING)
+	if (par->hooknum != NF_BR_BROUTING)
 		memcpy(eth_hdr(skb)->h_dest,
-		       in->br_port->br->dev->dev_addr, ETH_ALEN);
+		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
 	else
-		memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN);
+		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
 	return info->target;
 }
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 8a55c7d49b5..f55960eee99 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -17,11 +17,9 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_snat_tg(struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, unsigned int hook_nr,
-	    const struct xt_target *target, const void *data)
+ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_nat_info *info = data;
+	const struct ebt_nat_info *info = par->targinfo;
 
 	if (!skb_make_writable(skb, 0))
 		return EBT_DROP;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 25ca6467349..bfedf12cbf4 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -247,13 +247,10 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 }
 
 static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct net_device *in,
-	    const struct net_device *out, unsigned int hooknr,
-	    const struct xt_target *target, const void *data)
+ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ebt_ulog_info *uloginfo = data;
-
-	ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
+	ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
+	                par->targinfo, NULL);
 	return EBT_CONTINUE;
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0320b520362..a1156bab4a0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -64,11 +64,13 @@ static struct xt_target ebt_standard_target = {
 	.targetsize = sizeof(int),
 };
 
-static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
-   struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
-   const struct net_device *out)
+static inline int
+ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
+	       struct xt_target_param *par)
 {
-	w->u.watcher->target(skb, in, out, hooknr, w->u.watcher, w->data);
+	par->target   = w->u.watcher;
+	par->targinfo = w->data;
+	w->u.watcher->target(skb, par);
 	/* watchers don't give a verdict */
 	return 0;
 }
@@ -156,10 +158,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct ebt_table_info *private;
 	bool hotdrop = false;
 	struct xt_match_param mtpar;
+	struct xt_target_param tgpar;
 
-	mtpar.in      = in;
-	mtpar.out     = out;
+	mtpar.in      = tgpar.in  = in;
+	mtpar.out     = tgpar.out = out;
 	mtpar.hotdrop = &hotdrop;
+	tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -193,17 +197,18 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in,
-		   out);
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar);
 
 		t = (struct ebt_entry_target *)
 		   (((char *)point) + point->target_offset);
 		/* standard target */
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
-		else
-			verdict = t->u.target->target(skb, in, out, hook,
-				  t->u.target, t->data);
+		else {
+			tgpar.target   = t->u.target;
+			tgpar.targinfo = t->data;
+			verdict = t->u.target->target(skb, &tgpar);
+		}
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
 			return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index ae525a9afbe..5b631ad74b5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 	return 1;
 }
 
-static unsigned int arpt_error(struct sk_buff *skb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       unsigned int hooknum,
-			       const struct xt_target *target,
-			       const void *targinfo)
+static unsigned int
+arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("arp_tables: error: '%s'\n", (char *)targinfo);
+		printk("arp_tables: error: '%s'\n",
+		       (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	void *table_base;
 	const struct xt_table_info *private;
+	struct xt_target_param tgpar;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -245,6 +243,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
+	tgpar.in      = in;
+	tgpar.out     = out;
+	tgpar.hooknum = hook;
+
 	arp = arp_hdr(skb);
 	do {
 		if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
@@ -290,11 +292,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 				/* Targets which reenter must return
 				 * abs. verdicts
 				 */
+				tgpar.target   = t->u.kernel.target;
+				tgpar.targinfo = t->data;
 				verdict = t->u.kernel.target->target(skb,
-								     in, out,
-								     hook,
-								     t->u.kernel.target,
-								     t->data);
+								     &tgpar);
 
 				/* Target might have changed stuff. */
 				arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3f9e4ccd616..0bf81b35369 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff *skb,
-       const struct net_device *in, const struct net_device *out,
-       unsigned int hooknum, const struct xt_target *target,
-       const void *targinfo)
+target(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct arpt_mangle *mangle = targinfo;
+	const struct arpt_mangle *mangle = par->targinfo;
 	const struct arphdr *arp;
 	unsigned char *arpptr;
 	int pln, hln;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 12ad4d5c55d..0f8ecf39022 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -171,15 +171,11 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 
 static unsigned int
-ipt_error(struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
+ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip_tables: error: `%s'\n", (char *)targinfo);
+		printk("ip_tables: error: `%s'\n",
+		       (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -334,6 +330,7 @@ ipt_do_table(struct sk_buff *skb,
 	struct ipt_entry *e, *back;
 	struct xt_table_info *private;
 	struct xt_match_param mtpar;
+	struct xt_target_param tgpar;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -349,8 +346,9 @@ ipt_do_table(struct sk_buff *skb,
 	mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	mtpar.thoff   = ip_hdrlen(skb);
 	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = in;
-	mtpar.out     = out;
+	mtpar.in      = tgpar.in  = in;
+	mtpar.out     = tgpar.out = out;
+	tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -414,16 +412,14 @@ ipt_do_table(struct sk_buff *skb,
 			} else {
 				/* Targets which reenter must return
 				   abs. verdicts */
+				tgpar.target   = t->u.kernel.target;
+				tgpar.targinfo = t->data;
 #ifdef CONFIG_NETFILTER_DEBUG
 				((struct ipt_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
 				verdict = t->u.kernel.target->target(skb,
-								     in, out,
-								     hook,
-								     t->u.kernel.target,
-								     t->data);
-
+								     &tgpar);
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ipt_entry *)table_base)->comefrom
 				    != 0xeeeeeeec
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 63faddc18a1..67e8aa8f34f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct net_device *in,
-             const struct net_device *out, unsigned int hooknum,
-             const struct xt_target *target, const void *targinfo)
+clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int32_t hash;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index aee2364afff..e37f181e829 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 
 static unsigned int
-ecn_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_ECN_info *einfo = targinfo;
+	const struct ipt_ECN_info *einfo = par->targinfo;
 
 	if (einfo->operation & IPT_ECN_OP_SET_IP)
 		if (!set_ect_ip(skb, einfo))
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 1c9785df4df..e9942aed35a 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -426,18 +426,16 @@ ipt_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_log_info *loginfo = targinfo;
+	const struct ipt_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ipt_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, &li,
+	ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
 		       loginfo->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 65c811b27b7..e0d9d49b79e 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -50,9 +50,7 @@ masquerade_tg_check(const char *tablename, const void *e,
 }
 
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct net_device *in,
-              const struct net_device *out, unsigned int hooknum,
-              const struct xt_target *target, const void *targinfo)
+masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
@@ -62,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
 	const struct rtable *rt;
 	__be32 newsrc;
 
-	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 	nat = nfct_nat(ct);
@@ -76,16 +74,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
 		return NF_ACCEPT;
 
-	mr = targinfo;
+	mr = par->targinfo;
 	rt = skb->rtable;
-	newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+	newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
-		printk("MASQUERADE: %s ate my IP address\n", out->name);
+		printk("MASQUERADE: %s ate my IP address\n", par->out->name);
 		return NF_DROP;
 	}
 
 	write_lock_bh(&masq_lock);
-	nat->masq_index = out->ifindex;
+	nat->masq_index = par->out->ifindex;
 	write_unlock_bh(&masq_lock);
 
 	/* Transfer from original range. */
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f281500bd7f..cf18f23b346 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -41,24 +41,23 @@ netmap_tg_check(const char *tablename, const void *e,
 }
 
 static unsigned int
-netmap_tg(struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
+netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
-		     || hooknum == NF_INET_POST_ROUTING
-		     || hooknum == NF_INET_LOCAL_OUT);
+	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+		     par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_OUT);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
-	if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT)
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_OUT)
 		new_ip = ip_hdr(skb)->daddr & ~netmask;
 	else
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -70,7 +69,7 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum));
+	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
 }
 
 static struct xt_target netmap_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index ef496105eae..23adb09ddfb 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -45,24 +45,22 @@ redirect_tg_check(const char *tablename, const void *e,
 }
 
 static unsigned int
-redirect_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
-		     || hooknum == NF_INET_LOCAL_OUT);
+	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
-	if (hooknum == NF_INET_LOCAL_OUT)
+	if (par->hooknum == NF_INET_LOCAL_OUT)
 		newdst = htonl(0x7F000001);
 	else {
 		struct in_device *indev;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 9f5da0c2cae..b36071bb107 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 }
 
 static unsigned int
-reject_tg(struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
+reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_reject_info *reject = targinfo;
+	const struct ipt_reject_info *reject = par->targinfo;
 
 	/* WARNING: This code causes reentry within iptables.
 	   This means that the iptables jump stack is now crap.  We
@@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in,
 		send_unreach(skb, ICMP_PKT_FILTERED);
 		break;
 	case IPT_TCP_RESET:
-		send_reset(skb, hooknum);
+		send_reset(skb, par->hooknum);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 7d01d424a71..05cbfd2f747 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ttl_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct iphdr *iph;
-	const struct ipt_TTL_info *info = targinfo;
+	const struct ipt_TTL_info *info = par->targinfo;
 	int new_ttl;
 
 	if (!skb_make_writable(skb, skb->len))
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9065e4a34fb..46c0df0dc2d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -281,14 +281,10 @@ alloc_failure:
 }
 
 static unsigned int
-ulog_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
-
-	ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
-
+	ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+	                par->targinfo, NULL);
 	return XT_CONTINUE;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index f929352ec0e..83170ff131f 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -67,25 +67,21 @@ static struct xt_table nat_table = {
 };
 
 /* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
+static unsigned int
+ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
-	NF_CT_ASSERT(out);
+	NF_CT_ASSERT(par->out != NULL);
 
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
 }
@@ -109,28 +105,24 @@ static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
 	ip_rt_put(rt);
 }
 
-static unsigned int ipt_dnat_target(struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
+static unsigned int
+ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-		     hooknum == NF_INET_LOCAL_OUT);
+	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
-	if (hooknum == NF_INET_LOCAL_OUT &&
+	if (par->hooknum == NF_INET_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle(dev_net(out), ip_hdr(skb)->daddr,
+		warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 891358e89a2..ee0986cdbd6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -200,15 +200,11 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
 }
 
 static unsigned int
-ip6t_error(struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
+ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip6_tables: error: `%s'\n", (char *)targinfo);
+		printk("ip6_tables: error: `%s'\n",
+		       (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -360,6 +356,7 @@ ip6t_do_table(struct sk_buff *skb,
 	struct ip6t_entry *e, *back;
 	struct xt_table_info *private;
 	struct xt_match_param mtpar;
+	struct xt_target_param tgpar;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -371,8 +368,9 @@ ip6t_do_table(struct sk_buff *skb,
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
 	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = in;
-	mtpar.out     = out;
+	mtpar.in      = tgpar.in  = in;
+	mtpar.out     = tgpar.out = out;
+	tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -438,15 +436,15 @@ ip6t_do_table(struct sk_buff *skb,
 			} else {
 				/* Targets which reenter must return
 				   abs. verdicts */
+				tgpar.target   = t->u.kernel.target;
+				tgpar.targinfo = t->data;
+
 #ifdef CONFIG_NETFILTER_DEBUG
 				((struct ip6t_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
 				verdict = t->u.kernel.target->target(skb,
-								     in, out,
-								     hook,
-								     t->u.kernel.target,
-								     t->data);
+								     &tgpar);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ip6t_entry *)table_base)->comefrom
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 7eebd350916..ac759a54f2c 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -19,12 +19,10 @@ MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-hl_tg6(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct ipv6hdr *ip6h;
-	const struct ip6t_HL_info *info = targinfo;
+	const struct ip6t_HL_info *info = par->targinfo;
 	int new_hl;
 
 	if (!skb_make_writable(skb, skb->len))
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index fd148f3d842..a31d3ecd1fc 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -438,18 +438,16 @@ ip6t_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg6(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ip6t_log_info *loginfo = targinfo;
+	const struct ip6t_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ip6t_log_packet(NFPROTO_IPV6, hooknum, skb, in, out,
+	ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out,
 			&li, loginfo->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index f1a9fce1ec9..1d5f3a70ed0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -173,12 +173,10 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 }
 
 static unsigned int
-reject_tg6(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ip6t_reject_info *reject = targinfo;
-	struct net *net = dev_net(in ? in : out);
+	const struct ip6t_reject_info *reject = par->targinfo;
+	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
 	pr_debug("%s: medium point\n", __func__);
 	/* WARNING: This code causes reentry within ip6tables.
@@ -186,19 +184,19 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in,
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		send_unreach(net, skb, ICMPV6_NOROUTE, hooknum);
+		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, hooknum);
+		send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+		send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		send_unreach(net, skb, ICMPV6_ADDR_UNREACH, hooknum);
+		send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		send_unreach(net, skb, ICMPV6_PORT_UNREACH, hooknum);
+		send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 8cffa295dd3..011bc80dd2a 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,11 +27,9 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-classify_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_classify_target_info *clinfo = targinfo;
+	const struct xt_classify_target_info *clinfo = par->targinfo;
 
 	skb->priority = clinfo->priority;
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index e1415c3f5c9..95ed267328a 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -36,11 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, unsigned int hooknum,
-               const struct xt_target *target, const void *targinfo)
+connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connmark_target_info *markinfo = targinfo;
+	const struct xt_connmark_target_info *markinfo = par->targinfo;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int32_t diff;
@@ -77,11 +75,9 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct net_device *in,
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connmark_tginfo1 *info = targinfo;
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	u_int32_t newmark;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 5f221c3bd35..2211a2cef28 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -65,11 +65,9 @@ static void secmark_restore(struct sk_buff *skb)
 }
 
 static unsigned int
-connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
-               const struct net_device *out, unsigned int hooknum,
-               const struct xt_target *target, const void *targinfo)
+connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_connsecmark_target_info *info = targinfo;
+	const struct xt_connsecmark_target_info *info = par->targinfo;
 
 	switch (info->mode) {
 	case CONNSECMARK_SAVE:
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index f0b4958528e..c78e80afdf3 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -29,11 +29,9 @@ MODULE_ALIAS("ipt_TOS");
 MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
-dscp_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_DSCP_info *dinfo = targinfo;
+	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
@@ -48,11 +46,9 @@ dscp_tg(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_DSCP_info *dinfo = targinfo;
+	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
@@ -80,11 +76,9 @@ dscp_tg_check(const char *tablename, const void *e_void,
 }
 
 static unsigned int
-tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
+tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct ipt_tos_target_info *info = targinfo;
+	const struct ipt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
 	u_int8_t oldtos;
 
@@ -119,11 +113,9 @@ tos_tg_check_v0(const char *tablename, const void *e_void,
 }
 
 static unsigned int
-tos_tg(struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
+tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_tos_target_info *info = targinfo;
+	const struct xt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
 	u_int8_t orig, nv;
 
@@ -141,11 +133,9 @@ tos_tg(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-tos_tg6(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_tos_target_info *info = targinfo;
+	const struct xt_tos_target_info *info = par->targinfo;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	u_int8_t orig, nv;
 
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index c8ea7a80970..27d03f39611 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -25,22 +25,18 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = par->targinfo;
 
 	skb->mark = markinfo->mark;
 	return XT_CONTINUE;
 }
 
 static unsigned int
-mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
 	int mark = 0;
 
 	switch (markinfo->mode) {
@@ -62,11 +58,9 @@ mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
 }
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct net_device *in,
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_mark_tginfo2 *info = targinfo;
+	const struct xt_mark_tginfo2 *info = par->targinfo;
 
 	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 9b095520176..3218ad63bd1 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -21,11 +21,9 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_tg(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_nflog_info *info = targinfo;
+	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
 
 	li.type		     = NF_LOG_TYPE_ULOG;
@@ -33,8 +31,8 @@ nflog_tg(struct sk_buff *skb, const struct net_device *in,
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(target->family, hooknum, skb, in, out, &li,
-		      "%s", info->prefix);
+	nf_log_packet(par->target->family, par->hooknum, skb, par->in,
+	              par->out, &li, "%s", info->prefix);
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index c03c2e8d06f..2cc1fff4930 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,11 +24,9 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static unsigned int
-nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_NFQ_info *tinfo = targinfo;
+	const struct xt_NFQ_info *tinfo = par->targinfo;
 
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b9ee268b37c..cc50295cd11 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,9 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-notrack_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index da7946e6ecb..92e33524f78 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -71,14 +71,9 @@ void xt_rateest_put(struct xt_rateest *est)
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
 static unsigned int
-xt_rateest_tg(struct sk_buff *skb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      unsigned int hooknum,
-	      const struct xt_target *target,
-	      const void *targinfo)
+xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	const struct xt_rateest_target_info *info = targinfo;
+	const struct xt_rateest_target_info *info = par->targinfo;
 	struct gnet_stats_basic *stats = &info->est->bstats;
 
 	spin_lock_bh(&info->est->lock);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 2a2ab833481..ad05214e380 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -29,12 +29,10 @@ MODULE_ALIAS("ip6t_SECMARK");
 static u8 mode;
 
 static unsigned int
-secmark_tg(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	u32 secmark = 0;
-	const struct xt_secmark_target_info *info = targinfo;
+	const struct xt_secmark_target_info *info = par->targinfo;
 
 	BUG_ON(info->mode != mode);
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b868f995239..e08762d9b0f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -174,15 +174,13 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 }
 
 static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(skb, targinfo,
+	ret = tcpmss_mangle_packet(skb, par->targinfo,
 				   tcpmss_reverse_mtu(skb, PF_INET),
 				   iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
@@ -199,9 +197,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
-           const struct net_device *out, unsigned int hooknum,
-           const struct xt_target *target, const void *targinfo)
+tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
@@ -212,7 +208,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
 	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0)
 		return NF_DROP;
-	ret = tcpmss_mangle_packet(skb, targinfo,
+	ret = tcpmss_mangle_packet(skb, par->targinfo,
 				   tcpmss_reverse_mtu(skb, PF_INET6),
 				   tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 2e0ae6cc5d9..9dd8c8ef63e 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -75,19 +75,15 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hooknum,
-		const struct xt_target *target, const void *targinfo)
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 {
-	return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb),
+	return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
 	       sizeof(struct iphdr) + sizeof(struct tcphdr));
 }
 
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 static unsigned int
-tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
-		const struct net_device *out, unsigned int hooknum,
-		const struct xt_target *target, const void *targinfo)
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int tcphoff;
@@ -98,7 +94,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
 	if (tcphoff < 0)
 		return NF_DROP;
 
-	return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff,
+	return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff,
 	       sizeof(*ipv6h) + sizeof(struct tcphdr));
 }
 #endif
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 183f251d2f0..f08c49ea4bd 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -25,15 +25,10 @@
 #include <net/netfilter/nf_tproxy_core.h>
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
+tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	const struct xt_tproxy_target_info *tgi = targinfo;
+	const struct xt_tproxy_target_info *tgi = par->targinfo;
 	struct udphdr _hdr, *hp;
 	struct sock *sk;
 
@@ -44,7 +39,7 @@ tproxy_tg(struct sk_buff *skb,
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
 				   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
 				   hp->source, tgi->lport ? tgi->lport : hp->dest,
-				   in, true);
+				   par->in, true);
 
 	/* NOTE: assign_sock consumes our sk reference */
 	if (sk && nf_tproxy_assign_sock(skb, sk)) {
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index da35f9f1cd7..fbb04b86c46 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,9 +11,7 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-trace_tg(struct sk_buff *skb, const struct net_device *in,
-         const struct net_device *out, unsigned int hooknum,
-         const struct xt_target *target, const void *targinfo)
+trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 79ea19375ca..89791a56429 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -188,6 +188,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = a->priv;
+	struct xt_target_param par;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -203,10 +204,13 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
 	 from earlier kernels */
-	ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
-						   ipt->tcfi_hook,
-						   ipt->tcfi_t->u.kernel.target,
-						   ipt->tcfi_t->data);
+	par.in       = skb->dev;
+	par.out      = NULL;
+	par.hooknum  = ipt->tcfi_hook;
+	par.target   = ipt->tcfi_t->u.kernel.target;
+	par.targinfo = ipt->tcfi_t->data;
+	ret = par.target->target(skb, &par);
+
 	switch (ret) {
 	case NF_ACCEPT:
 		result = TC_ACT_OK;
-- 
cgit v1.2.3-70-g09d2


From af5d6dc200eb0fcc6fbd3df1ab4d8969004cb37f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (5/6)

This patch does this for target extensions' checkentry functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h        | 29 +++++++++++++++++++---------
 include/linux/netfilter_bridge/ebtables.h |  4 ++--
 net/bridge/netfilter/ebt_arpreply.c       | 10 +++-------
 net/bridge/netfilter/ebt_dnat.c           | 19 +++++++++---------
 net/bridge/netfilter/ebt_log.c            |  7 ++-----
 net/bridge/netfilter/ebt_mark.c           |  8 ++------
 net/bridge/netfilter/ebt_nflog.c          |  7 ++-----
 net/bridge/netfilter/ebt_redirect.c       | 17 ++++++++--------
 net/bridge/netfilter/ebt_snat.c           |  8 ++------
 net/bridge/netfilter/ebt_ulog.c           |  7 ++-----
 net/bridge/netfilter/ebtables.c           | 28 +++++++++++++++------------
 net/ipv4/netfilter/arp_tables.c           | 20 ++++++++++---------
 net/ipv4/netfilter/arpt_mangle.c          |  6 ++----
 net/ipv4/netfilter/ip_tables.c            | 17 +++++++++-------
 net/ipv4/netfilter/ipt_CLUSTERIP.c        | 13 +++++--------
 net/ipv4/netfilter/ipt_ECN.c              |  9 +++------
 net/ipv4/netfilter/ipt_LOG.c              |  7 ++-----
 net/ipv4/netfilter/ipt_MASQUERADE.c       |  7 ++-----
 net/ipv4/netfilter/ipt_NETMAP.c           |  7 ++-----
 net/ipv4/netfilter/ipt_REDIRECT.c         |  7 ++-----
 net/ipv4/netfilter/ipt_REJECT.c           |  9 +++------
 net/ipv4/netfilter/ipt_TTL.c              |  7 ++-----
 net/ipv4/netfilter/ipt_ULOG.c             |  7 ++-----
 net/ipv4/netfilter/nf_nat_rule.c          | 16 ++++------------
 net/ipv6/netfilter/ip6_tables.c           | 16 ++++++++++------
 net/ipv6/netfilter/ip6t_HL.c              |  7 ++-----
 net/ipv6/netfilter/ip6t_LOG.c             |  7 ++-----
 net/ipv6/netfilter/ip6t_REJECT.c          |  9 +++------
 net/netfilter/x_tables.c                  | 32 +++++++++++++++----------------
 net/netfilter/xt_CONNMARK.c               | 24 +++++++++--------------
 net/netfilter/xt_CONNSECMARK.c            | 16 +++++++---------
 net/netfilter/xt_DSCP.c                   | 19 +++++++-----------
 net/netfilter/xt_MARK.c                   | 14 ++++----------
 net/netfilter/xt_NFLOG.c                  |  7 ++-----
 net/netfilter/xt_RATEEST.c                |  9 ++-------
 net/netfilter/xt_SECMARK.c                | 12 +++++-------
 net/netfilter/xt_TCPMSS.c                 | 22 ++++++++-------------
 net/netfilter/xt_TPROXY.c                 |  9 ++-------
 net/sched/act_ipt.c                       | 12 +++++++++---
 39 files changed, 208 insertions(+), 283 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 46d0cb1ad34..8daeb496ba7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -234,6 +234,23 @@ struct xt_target_param {
 	const void *targinfo;
 };
 
+/**
+ * struct xt_tgchk_param - parameters for target extensions'
+ * checkentry functions
+ *
+ * @entryinfo:	the family-specific rule data
+ * 		(struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry)
+ *
+ * Other fields see above.
+ */
+struct xt_tgchk_param {
+	const char *table;
+	void *entryinfo;
+	const struct xt_target *target;
+	void *targinfo;
+	unsigned int hook_mask;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -291,11 +308,7 @@ struct xt_target
            hook_mask is a bitmask of hooks from which it can be
            called. */
 	/* Should return true or false. */
-	bool (*checkentry)(const char *tablename,
-			   const void *entry,
-			   const struct xt_target *target,
-			   void *targinfo,
-			   unsigned int hook_mask);
+	bool (*checkentry)(const struct xt_tgchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_target *target, void *targinfo);
@@ -376,10 +389,8 @@ extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
 extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family,
 			  unsigned int size, u_int8_t proto, bool inv_proto);
-extern int xt_check_target(const struct xt_target *target, unsigned short family,
-			   unsigned int size, const char *table, unsigned int hook,
-			   unsigned short proto, int inv_proto,
-			   const void *entry, void *targinfo);
+extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family,
+			   unsigned int size, u_int8_t proto, bool inv_proto);
 
 extern struct xt_table *xt_register_table(struct net *net,
 					  struct xt_table *table,
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 568a690f6a6..d45e29cd1cf 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -310,9 +310,9 @@ extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
 #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg))
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
-#define BASE_CHAIN (hookmask & (1 << NF_BR_NUMHOOKS))
+#define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
 /* Clear the bit in the hook mask that tells if the rule is on a base chain */
-#define CLEAR_BASE_CHAIN_BIT (hookmask &= ~(1 << NF_BR_NUMHOOKS))
+#define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS))
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index fc94699f719..76584cd72e5 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -57,20 +57,16 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool
-ebt_arpreply_tg_check(const char *tablename, const void *entry,
-		      const struct xt_target *target, void *data,
-		      unsigned int hookmask)
+static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ebt_arpreply_info *info = data;
-	const struct ebt_entry *e = entry;
+	const struct ebt_arpreply_info *info = par->targinfo;
+	const struct ebt_entry *e = par->entryinfo;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return false;
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return false;
-	CLEAR_BASE_CHAIN_BIT;
 	return true;
 }
 
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index bb5d79e0bee..6b49ea9e31f 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -26,19 +26,20 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool
-ebt_dnat_tg_check(const char *tablename, const void *entry,
-		  const struct xt_target *target, void *data,
-		  unsigned int hookmask)
+static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ebt_nat_info *info = data;
+	const struct ebt_nat_info *info = par->targinfo;
+	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return false;
-	CLEAR_BASE_CHAIN_BIT;
-	if ( (strcmp(tablename, "nat") ||
-	   (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) &&
-	   (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
+
+	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
+	if ((strcmp(par->table, "nat") != 0 ||
+	    (hook_mask & ~((1 << NF_BR_PRE_ROUTING) |
+	    (1 << NF_BR_LOCAL_OUT)))) &&
+	    (strcmp(par->table, "broute") != 0 ||
+	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return false;
 	if (INVALID_TARGET)
 		return false;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 87de5fccb2f..3d33c608906 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,12 +24,9 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static bool
-ebt_log_tg_check(const char *table, const void *entry,
-		 const struct xt_target *target, void *data,
-		 unsigned int hook_mask)
+static bool ebt_log_tg_check(const struct xt_tgchk_param *par)
 {
-	struct ebt_log_info *info = data;
+	struct ebt_log_info *info = par->targinfo;
 
 	if (info->bitmask & ~EBT_LOG_MASK)
 		return false;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index aafc456c3c3..2fee7e8e2e9 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -36,18 +36,14 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool
-ebt_mark_tg_check(const char *table, const void *e,
-		  const struct xt_target *target, void *data,
-		  unsigned int hookmask)
+static bool ebt_mark_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ebt_mark_t_info *info = data;
+	const struct ebt_mark_t_info *info = par->targinfo;
 	int tmp;
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return false;
-	CLEAR_BASE_CHAIN_BIT;
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
 		return false;
 	tmp = info->target & ~EBT_VERDICT_BITS;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 6a28d994cf7..2a63d996dd4 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -35,12 +35,9 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool
-ebt_nflog_tg_check(const char *table, const void *e,
-		   const struct xt_target *target, void *data,
-		   unsigned int hookmask)
+static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 {
-	struct ebt_nflog_info *info = data;
+	struct ebt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~EBT_NFLOG_MASK)
 		return false;
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 0cfe2fad940..c8a49f7a57b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -32,18 +32,19 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool
-ebt_redirect_tg_check(const char *tablename, const void *e,
-		      const struct xt_target *target, void *data,
-		      unsigned int hookmask)
+static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ebt_redirect_info *info = data;
+	const struct ebt_redirect_info *info = par->targinfo;
+	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
 		return false;
-	CLEAR_BASE_CHAIN_BIT;
-	if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) &&
-	     (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
+
+	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
+	if ((strcmp(par->table, "nat") != 0 ||
+	    hook_mask & ~(1 << NF_BR_PRE_ROUTING)) &&
+	    (strcmp(par->table, "broute") != 0 ||
+	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return false;
 	if (INVALID_TARGET)
 		return false;
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index f55960eee99..8d04d4c302b 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -42,18 +42,14 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool
-ebt_snat_tg_check(const char *tablename, const void *e,
-		  const struct xt_target *target, void *data,
-		  unsigned int hookmask)
+static bool ebt_snat_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ebt_nat_info *info = data;
+	const struct ebt_nat_info *info = par->targinfo;
 	int tmp;
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return false;
-	CLEAR_BASE_CHAIN_BIT;
 
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
 		return false;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index bfedf12cbf4..2c6d6823e70 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -254,12 +254,9 @@ ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool
-ebt_ulog_tg_check(const char *table, const void *entry,
-		  const struct xt_target *target, void *data,
-		  unsigned int hookmask)
+static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 {
-	struct ebt_ulog_info *uloginfo = data;
+	struct ebt_ulog_info *uloginfo = par->targinfo;
 
 	if (uloginfo->nlgroup > 31)
 		return false;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a1156bab4a0..cf823c21c16 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -363,9 +363,10 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 }
 
 static inline int
-ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
-   const char *name, unsigned int hookmask, unsigned int *cnt)
+ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
+		  unsigned int *cnt)
 {
+	const struct ebt_entry *e = par->entryinfo;
 	struct xt_target *watcher;
 	size_t left = ((char *)e + e->target_offset) - (char *)w;
 	int ret;
@@ -383,9 +384,10 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
 		return -ENOENT;
 	w->u.watcher = watcher;
 
-	ret = xt_check_target(watcher, NFPROTO_BRIDGE, w->watcher_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
-	      e, w->data);
+	par->target   = watcher;
+	par->targinfo = w->data;
+	ret = xt_check_target(par, NFPROTO_BRIDGE, w->watcher_size,
+	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(watcher->me);
 		return ret;
@@ -619,6 +621,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	size_t gap;
 	int ret;
 	struct xt_mtchk_param mtpar;
+	struct xt_tgchk_param tgpar;
 
 	/* don't mess with the struct ebt_entries */
 	if (e->bitmask == 0)
@@ -660,14 +663,14 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 	i = 0;
 
-	mtpar.table     = name;
-	mtpar.entryinfo = e;
-	mtpar.hook_mask = hookmask;
+	mtpar.table     = tgpar.table     = name;
+	mtpar.entryinfo = tgpar.entryinfo = e;
+	mtpar.hook_mask = tgpar.hook_mask = hookmask;
 	ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i);
 	if (ret != 0)
 		goto cleanup_matches;
 	j = 0;
-	ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j);
+	ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j);
 	if (ret != 0)
 		goto cleanup_watchers;
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
@@ -703,9 +706,10 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 		goto cleanup_watchers;
 	}
 
-	ret = xt_check_target(target, NFPROTO_BRIDGE, t->target_size,
-	      name, hookmask, e->ethproto, e->invflags & EBT_IPROTO,
-	      e, t->data);
+	tgpar.target   = target;
+	tgpar.targinfo = t->data;
+	ret = xt_check_target(&tgpar, NFPROTO_BRIDGE, t->target_size,
+	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(target->me);
 		goto cleanup_watchers;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5b631ad74b5..b3238d0101c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -457,16 +457,18 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
 
 static inline int check_target(struct arpt_entry *e, const char *name)
 {
-	struct arpt_entry_target *t;
-	struct xt_target *target;
+	struct arpt_entry_target *t = arpt_get_target(e);
 	int ret;
-
-	t = arpt_get_target(e);
-	target = t->u.kernel.target;
-
-	ret = xt_check_target(target, NFPROTO_ARP,
-			      t->u.target_size - sizeof(*t),
-			      name, e->comefrom, 0, 0, e, t->data);
+	struct xt_tgchk_param par = {
+		.table     = name,
+		.entryinfo = e,
+		.target    = t->u.kernel.target,
+		.targinfo  = t->data,
+		.hook_mask = e->comefrom,
+	};
+
+	ret = xt_check_target(&par, NFPROTO_ARP,
+	      t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 0bf81b35369..b0d5b1d0a76 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -54,11 +54,9 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
 	return mangle->target;
 }
 
-static bool
-checkentry(const char *tablename, const void *e, const struct xt_target *target,
-	   void *targinfo, unsigned int hook_mask)
+static bool checkentry(const struct xt_tgchk_param *par)
 {
-	const struct arpt_mangle *mangle = targinfo;
+	const struct arpt_mangle *mangle = par->targinfo;
 
 	if (mangle->flags & ~ARPT_MANGLE_MASK ||
 	    !(mangle->flags & ARPT_MANGLE_MASK))
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 0f8ecf39022..e592c54d499 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -655,15 +655,18 @@ err:
 
 static int check_target(struct ipt_entry *e, const char *name)
 {
-	struct ipt_entry_target *t;
-	struct xt_target *target;
+	struct ipt_entry_target *t = ipt_get_target(e);
+	struct xt_tgchk_param par = {
+		.table     = name,
+		.entryinfo = e,
+		.target    = t->u.kernel.target,
+		.targinfo  = t->data,
+		.hook_mask = e->comefrom,
+	};
 	int ret;
 
-	t = ipt_get_target(e);
-	target = t->u.kernel.target;
-	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, e->ip.proto,
-			      e->ip.invflags & IPT_INV_PROTO, e, t->data);
+	ret = xt_check_target(&par, NFPROTO_IPV4, t->u.target_size - sizeof(*t),
+	      e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 67e8aa8f34f..6c7254e0256 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -347,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-clusterip_tg_check(const char *tablename, const void *e_void,
-                   const struct xt_target *target, void *targinfo,
-                   unsigned int hook_mask)
+static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 {
-	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
-	const struct ipt_entry *e = e_void;
+	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
 
 	struct clusterip_config *config;
 
@@ -404,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void,
 	}
 	cipinfo->config = config;
 
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", target->family);
+				    "proto=%u\n", par->target->family);
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index e37f181e829..f7e2fa0974d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -93,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-ecn_tg_check(const char *tablename, const void *e_void,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
+static bool ecn_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_ECN_info *einfo = targinfo;
-	const struct ipt_entry *e = e_void;
+	const struct ipt_ECN_info *einfo = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
 
 	if (einfo->operation & IPT_ECN_OP_MASK) {
 		printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index e9942aed35a..fc6ce04a3e3 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -440,12 +440,9 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-log_tg_check(const char *tablename, const void *e,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
+static bool log_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_log_info *loginfo = targinfo;
+	const struct ipt_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
 		pr_debug("LOG: level %u >= 8\n", loginfo->level);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index e0d9d49b79e..f389f60cb10 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 static DEFINE_RWLOCK(masq_lock);
 
 /* FIXME: Multiple targets. --RR */
-static bool
-masquerade_tg_check(const char *tablename, const void *e,
-                    const struct xt_target *target, void *targinfo,
-                    unsigned int hook_mask)
+static bool masquerade_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("masquerade_check: bad MAP_IPS.\n");
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index cf18f23b346..7c29582d4ec 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -22,12 +22,9 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
-static bool
-netmap_tg_check(const char *tablename, const void *e,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
+static bool netmap_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		pr_debug("NETMAP:check: bad MAP_IPS.\n");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 23adb09ddfb..698e5e78685 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 
 /* FIXME: Take multiple ranges --RR */
-static bool
-redirect_tg_check(const char *tablename, const void *e,
-                  const struct xt_target *target, void *targinfo,
-                  unsigned int hook_mask)
+static bool redirect_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("redirect_check: bad MAP_IPS.\n");
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b36071bb107..0b4b6e0ff2b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -175,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool
-reject_tg_check(const char *tablename, const void *e_void,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
+static bool reject_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_reject_info *rejinfo = targinfo;
-	const struct ipt_entry *e = e_void;
+	const struct ipt_reject_info *rejinfo = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
 		printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 05cbfd2f747..6d76aae90cc 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -59,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-ttl_tg_check(const char *tablename, const void *e,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
+static bool ttl_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_TTL_info *info = targinfo;
+	const struct ipt_TTL_info *info = par->targinfo;
 
 	if (info->mode > IPT_TTL_MAXMODE) {
 		printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 46c0df0dc2d..18a2826b57c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -313,12 +313,9 @@ static void ipt_logfn(u_int8_t pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static bool
-ulog_tg_check(const char *tablename, const void *e,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hookmask)
+static bool ulog_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_ulog_info *loginfo = targinfo;
+	const struct ipt_ulog_info *loginfo = par->targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
 		pr_debug("ipt_ULOG: prefix term %i\n",
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 83170ff131f..bea54a68510 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -128,13 +128,9 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
 
-static bool ipt_snat_checkentry(const char *tablename,
-				const void *entry,
-				const struct xt_target *target,
-				void *targinfo,
-				unsigned int hook_mask)
+static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -144,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename,
 	return true;
 }
 
-static bool ipt_dnat_checkentry(const char *tablename,
-				const void *entry,
-				const struct xt_target *target,
-				void *targinfo,
-				unsigned int hook_mask)
+static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index ee0986cdbd6..ca14fb8bd36 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -679,15 +679,19 @@ err:
 
 static int check_target(struct ip6t_entry *e, const char *name)
 {
-	struct ip6t_entry_target *t;
-	struct xt_target *target;
+	struct ip6t_entry_target *t = ip6t_get_target(e);
+	struct xt_tgchk_param par = {
+		.table     = name,
+		.entryinfo = e,
+		.target    = t->u.kernel.target,
+		.targinfo  = t->data,
+		.hook_mask = e->comefrom,
+	};
 	int ret;
 
 	t = ip6t_get_target(e);
-	target = t->u.kernel.target;
-	ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
-			      name, e->comefrom, e->ipv6.proto,
-			      e->ipv6.invflags & IP6T_INV_PROTO, e, t->data);
+	ret = xt_check_target(&par, NFPROTO_IPV6, t->u.target_size - sizeof(*t),
+	      e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ac759a54f2c..27b5adf670a 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -54,12 +54,9 @@ hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-hl_tg6_check(const char *tablename, const void *entry,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
+static bool hl_tg6_check(const struct xt_tgchk_param *par)
 {
-	const struct ip6t_HL_info *info = targinfo;
+	const struct ip6t_HL_info *info = par->targinfo;
 
 	if (info->mode > IP6T_HL_MAXMODE) {
 		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index a31d3ecd1fc..caa441d0956 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -453,12 +453,9 @@ log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 
-static bool
-log_tg6_check(const char *tablename, const void *entry,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hook_mask)
+static bool log_tg6_check(const struct xt_tgchk_param *par)
 {
-	const struct ip6t_log_info *loginfo = targinfo;
+	const struct ip6t_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
 		pr_debug("LOG: level %u >= 8\n", loginfo->level);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 1d5f3a70ed0..0981b4ccb8b 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -213,13 +213,10 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool
-reject_tg6_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool reject_tg6_check(const struct xt_tgchk_param *par)
 {
-	const struct ip6t_reject_info *rejinfo = targinfo;
-	const struct ip6t_entry *e = entry;
+	const struct ip6t_reject_info *rejinfo = par->targinfo;
+	const struct ip6t_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
 		printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 817ab14f7cd..f29513cd139 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -471,35 +471,35 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
 EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 #endif /* CONFIG_COMPAT */
 
-int xt_check_target(const struct xt_target *target, unsigned short family,
-		    unsigned int size, const char *table, unsigned int hook_mask,
-		    unsigned short proto, int inv_proto, const void *entry,
-		    void *targinfo)
+int xt_check_target(struct xt_tgchk_param *par, u_int8_t family,
+		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
-	if (XT_ALIGN(target->targetsize) != size) {
+	if (XT_ALIGN(par->target->targetsize) != size) {
 		printk("%s_tables: %s target: invalid size %Zu != %u\n",
-		       xt_prefix[family], target->name,
-		       XT_ALIGN(target->targetsize), size);
+		       xt_prefix[family], par->target->name,
+		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
-	if (target->table && strcmp(target->table, table)) {
+	if (par->target->table != NULL &&
+	    strcmp(par->target->table, par->table) != 0) {
 		printk("%s_tables: %s target: only valid in %s table, not %s\n",
-		       xt_prefix[family], target->name, target->table, table);
+		       xt_prefix[family], par->target->name,
+		       par->target->table, par->table);
 		return -EINVAL;
 	}
-	if (target->hooks && (hook_mask & ~target->hooks) != 0) {
+	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
 		printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
-		       xt_prefix[family], target->name, hook_mask,
-		       target->hooks);
+		       xt_prefix[family], par->target->name, par->hook_mask,
+		       par->target->hooks);
 		return -EINVAL;
 	}
-	if (target->proto && (target->proto != proto || inv_proto)) {
+	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
 		printk("%s_tables: %s target: only valid for protocol %u\n",
-		       xt_prefix[family], target->name, target->proto);
+		       xt_prefix[family], par->target->name,
+		       par->target->proto);
 		return -EINVAL;
 	}
-	if (target->checkentry != NULL &&
-	    !target->checkentry(table, entry, target, targinfo, hook_mask))
+	if (par->target->checkentry != NULL && !par->target->checkentry(par))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 95ed267328a..8fc9f35e67d 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -112,18 +112,15 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-connmark_tg_check_v0(const char *tablename, const void *entry,
-                     const struct xt_target *target, void *targinfo,
-                     unsigned int hook_mask)
+static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const struct xt_connmark_target_info *matchinfo = targinfo;
+	const struct xt_connmark_target_info *matchinfo = par->targinfo;
 
 	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-		if (strcmp(tablename, "mangle") != 0) {
+		if (strcmp(par->table, "mangle") != 0) {
 			printk(KERN_WARNING "CONNMARK: restore can only be "
 			       "called from \"mangle\" table, not \"%s\"\n",
-			       tablename);
+			       par->table);
 			return false;
 		}
 	}
@@ -131,22 +128,19 @@ connmark_tg_check_v0(const char *tablename, const void *entry,
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", target->family);
+				    "proto=%u\n", par->target->family);
 		return false;
 	}
 	return true;
 }
 
-static bool
-connmark_tg_check(const char *tablename, const void *entry,
-                  const struct xt_target *target, void *targinfo,
-                  unsigned int hook_mask)
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", target->family);
+		       "proto=%u\n", par->target->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2211a2cef28..2041a3d4b4d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -85,16 +85,14 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-connsecmark_tg_check(const char *tablename, const void *entry,
-                     const struct xt_target *target, void *targinfo,
-                     unsigned int hook_mask)
+static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_connsecmark_target_info *info = targinfo;
+	const struct xt_connsecmark_target_info *info = par->targinfo;
 
-	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "security") != 0) {
 		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		       "or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
@@ -108,9 +106,9 @@ connsecmark_tg_check(const char *tablename, const void *entry,
 		return false;
 	}
 
-	if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", target->family);
+				    "proto=%u\n", par->target->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index c78e80afdf3..6a347e768f8 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -61,15 +61,12 @@ dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-dscp_tg_check(const char *tablename, const void *e_void,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hook_mask)
+static bool dscp_tg_check(const struct xt_tgchk_param *par)
 {
-	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
+	const struct xt_DSCP_info *info = par->targinfo;
 
-	if (dscp > XT_DSCP_MAX) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+	if (info->dscp > XT_DSCP_MAX) {
+		printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp);
 		return false;
 	}
 	return true;
@@ -95,12 +92,10 @@ tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-tos_tg_check_v0(const char *tablename, const void *e_void,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
+static bool tos_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+	const struct ipt_tos_target_info *info = par->targinfo;
+	const uint8_t tos = info->tos;
 
 	if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
 	    tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 27d03f39611..123ee0ba78c 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -66,12 +66,9 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-mark_tg_check_v0(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool mark_tg_check_v0(const struct xt_tgchk_param *par)
 {
-	const struct xt_mark_target_info *markinfo = targinfo;
+	const struct xt_mark_target_info *markinfo = par->targinfo;
 
 	if (markinfo->mark > 0xffffffff) {
 		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
@@ -80,12 +77,9 @@ mark_tg_check_v0(const char *tablename, const void *entry,
 	return true;
 }
 
-static bool
-mark_tg_check_v1(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool mark_tg_check_v1(const struct xt_tgchk_param *par)
 {
-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
+	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
 
 	if (markinfo->mode != XT_MARK_SET
 	    && markinfo->mode != XT_MARK_AND
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 3218ad63bd1..56ee4f118b5 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -36,12 +36,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-nflog_tg_check(const char *tablename, const void *entry,
-               const struct xt_target *target, void *targetinfo,
-               unsigned int hookmask)
+static bool nflog_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_nflog_info *info = targetinfo;
+	const struct xt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
 		return false;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 92e33524f78..edf4ab1f30f 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -84,14 +84,9 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool
-xt_rateest_tg_checkentry(const char *tablename,
-			 const void *entry,
-			 const struct xt_target *target,
-			 void *targinfo,
-			 unsigned int hook_mask)
+static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
-	struct xt_rateest_target_info *info = targinfo;
+	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
 	struct {
 		struct nlattr		opt;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index ad05214e380..e5777227192 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -80,16 +80,14 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool
-secmark_tg_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool secmark_tg_check(const struct xt_tgchk_param *par)
 {
-	struct xt_secmark_target_info *info = targinfo;
+	struct xt_secmark_target_info *info = par->targinfo;
 
-	if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) {
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "security") != 0) {
 		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", tablename);
+		       "or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e08762d9b0f..4f3b1f80879 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -237,16 +237,13 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 	return false;
 }
 
-static bool
-tcpmss_tg4_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tcpmss_info *info = targinfo;
-	const struct ipt_entry *e = entry;
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ipt_entry *e = par->entryinfo;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
@@ -260,16 +257,13 @@ tcpmss_tg4_check(const char *tablename, const void *entry,
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static bool
-tcpmss_tg6_check(const char *tablename, const void *entry,
-                 const struct xt_target *target, void *targinfo,
-                 unsigned int hook_mask)
+static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tcpmss_info *info = targinfo;
-	const struct ip6t_entry *e = entry;
+	const struct xt_tcpmss_info *info = par->targinfo;
+	const struct ip6t_entry *e = par->entryinfo;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (hook_mask & ~((1 << NF_INET_FORWARD) |
+	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		printk("xt_TCPMSS: path-MTU clamping only supported in "
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index f08c49ea4bd..1340c2fa362 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -59,14 +59,9 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool
-tproxy_tg_check(const char *tablename,
-		const void *entry,
-		const struct xt_target *target,
-		void *targetinfo,
-		unsigned int hook_mask)
+static bool tproxy_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct ipt_ip *i = entry;
+	const struct ipt_ip *i = par->entryinfo;
 
 	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
 	    && !(i->invflags & IPT_INV_PROTO))
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 89791a56429..a54dc3f8234 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = {
 
 static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
+	struct xt_tgchk_param par;
 	struct xt_target *target;
 	int ret = 0;
 
@@ -49,9 +50,14 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 		return -ENOENT;
 
 	t->u.kernel.target = target;
-
-	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
-			      table, hook, 0, 0, NULL, t->data);
+	par.table     = table;
+	par.entryinfo = NULL;
+	par.target    = target;
+	par.targinfo  = t->data;
+	par.hook_mask = hook;
+
+	ret = xt_check_target(&par, NFPROTO_IPV4,
+	      t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
-- 
cgit v1.2.3-70-g09d2


From a2df1648ba615dd5908e9a1fa7b2f133fa302487 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:19 +0200
Subject: netfilter: xtables: move extension arguments into compound structure
 (6/6)

This patch does this for target extensions' destroy functions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  8 +++++++-
 net/bridge/netfilter/ebtables.c    | 19 +++++++++++++------
 net/ipv4/netfilter/arp_tables.c    |  9 ++++++---
 net/ipv4/netfilter/ip_tables.c     | 10 +++++++---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |  6 +++---
 net/ipv6/netfilter/ip6_tables.c    | 10 +++++++---
 net/netfilter/xt_CONNMARK.c        |  5 ++---
 net/netfilter/xt_CONNSECMARK.c     |  5 ++---
 net/netfilter/xt_RATEEST.c         |  5 ++---
 net/netfilter/xt_SECMARK.c         |  2 +-
 net/sched/act_ipt.c                | 10 +++++++---
 11 files changed, 57 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 8daeb496ba7..e3b3b669a14 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -251,6 +251,12 @@ struct xt_tgchk_param {
 	unsigned int hook_mask;
 };
 
+/* Target destructor parameters */
+struct xt_tgdtor_param {
+	const struct xt_target *target;
+	void *targinfo;
+};
+
 struct xt_match
 {
 	struct list_head list;
@@ -311,7 +317,7 @@ struct xt_target
 	bool (*checkentry)(const struct xt_tgchk_param *);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_target *target, void *targinfo);
+	void (*destroy)(const struct xt_tgdtor_param *);
 
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, void *src);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cf823c21c16..29d8061fa15 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -581,18 +581,23 @@ ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
 static inline int
 ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 {
+	struct xt_tgdtor_param par;
+
 	if (i && (*i)-- == 0)
 		return 1;
-	if (w->u.watcher->destroy)
-		w->u.watcher->destroy(w->u.watcher, w->data);
-	module_put(w->u.watcher->me);
 
+	par.target   = w->u.watcher;
+	par.targinfo = w->data;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 	return 0;
 }
 
 static inline int
 ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 {
+	struct xt_tgdtor_param par;
 	struct ebt_entry_target *t;
 
 	if (e->bitmask == 0)
@@ -603,10 +608,12 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
-	if (t->u.target->destroy)
-		t->u.target->destroy(t->u.target, t->data);
-	module_put(t->u.target->me);
 
+	par.target   = t->u.target;
+	par.targinfo = t->data;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 	return 0;
 }
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b3238d0101c..3bab78330cf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -557,15 +557,18 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 
 static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 {
+	struct xt_tgdtor_param par;
 	struct arpt_entry_target *t;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
 	t = arpt_get_target(e);
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
-	module_put(t->u.kernel.target->me);
+	par.target   = t->u.kernel.target;
+	par.targinfo = t->data;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 	return 0;
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e592c54d499..50b9a6c34c3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -768,6 +768,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 static int
 cleanup_entry(struct ipt_entry *e, unsigned int *i)
 {
+	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
 
 	if (i && (*i)-- == 0)
@@ -776,9 +777,12 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 	/* Cleanup all matches */
 	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ipt_get_target(e);
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
-	module_put(t->u.kernel.target->me);
+
+	par.target   = t->u.kernel.target;
+	par.targinfo = t->data;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 	return 0;
 }
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6c7254e0256..7ac1677419a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -411,9 +411,9 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 }
 
 /* drop reference count of cluster config when rule is deleted */
-static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
+static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 
 	/* if no more entries are referencing the config, remove it
 	 * from the list and destroy the proc entry */
@@ -421,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
 
 	clusterip_config_put(cipinfo->config);
 
-	nf_ct_l3proto_module_put(target->family);
+	nf_ct_l3proto_module_put(par->target->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index ca14fb8bd36..d934a699463 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -793,6 +793,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 static int
 cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 {
+	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
 
 	if (i && (*i)-- == 0)
@@ -801,9 +802,12 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 	/* Cleanup all matches */
 	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ip6t_get_target(e);
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
-	module_put(t->u.kernel.target->me);
+
+	par.target   = t->u.kernel.target;
+	par.targinfo = t->data;
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 	return 0;
 }
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 8fc9f35e67d..c5a5072e005 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -146,10 +146,9 @@ static bool connmark_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static void
-connmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(target->family);
+	nf_ct_l3proto_module_put(par->target->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2041a3d4b4d..b6e3f3f125f 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -114,10 +114,9 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static void
-connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(target->family);
+	nf_ct_l3proto_module_put(par->target->family);
 }
 
 static struct xt_target connsecmark_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index edf4ab1f30f..43f5676b1af 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -139,10 +139,9 @@ err1:
 	return false;
 }
 
-static void xt_rateest_tg_destroy(const struct xt_target *target,
-				  void *targinfo)
+static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	struct xt_rateest_target_info *info = targinfo;
+	struct xt_rateest_target_info *info = par->targinfo;
 
 	xt_rateest_put(info->est);
 }
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index e5777227192..7a6f9e6f5df 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -113,7 +113,7 @@ static bool secmark_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static void secmark_tg_destroy(const struct xt_target *target, void *targinfo)
+static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	switch (mode) {
 	case SECMARK_MODE_SEL:
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index a54dc3f8234..b951d422db9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -67,9 +67,13 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 
 static void ipt_destroy_target(struct ipt_entry_target *t)
 {
-	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
-	module_put(t->u.kernel.target->me);
+	struct xt_tgdtor_param par = {
+		.target   = t->u.kernel.target,
+		.targinfo = t->data,
+	};
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
 }
 
 static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
-- 
cgit v1.2.3-70-g09d2


From 916a917dfec18535ff9e2afdafba82e6279eb4f4 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:20 +0200
Subject: netfilter: xtables: provide invoked family value to extensions

By passing in the family through which extensions were invoked, a bit
of data space can be reclaimed. The "family" member will be added to
the parameter structures and the check functions be adjusted.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 12 ++++++++++--
 net/bridge/netfilter/ebtables.c    | 11 ++++++++---
 net/ipv4/netfilter/arp_tables.c    |  6 ++++--
 net/ipv4/netfilter/ip_tables.c     | 10 ++++++++--
 net/ipv6/netfilter/ip6_tables.c    | 10 ++++++++--
 net/netfilter/x_tables.c           | 23 ++++++++++++-----------
 net/sched/act_ipt.c                |  4 ++--
 7 files changed, 52 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index e3b3b669a14..be41b609c88 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -183,6 +183,8 @@ struct xt_counters_info
  * @fragoff:	packet is a fragment, this is the data offset
  * @thoff:	position of transport header relative to skb->data
  * @hotdrop:	drop packet if we had inspection problems
+ * @family:	Actual NFPROTO_* through which the function is invoked
+ * 		(helpful when match->family == NFPROTO_UNSPEC)
  */
 struct xt_match_param {
 	const struct net_device *in, *out;
@@ -191,6 +193,7 @@ struct xt_match_param {
 	int fragoff;
 	unsigned int thoff;
 	bool *hotdrop;
+	u_int8_t family;
 };
 
 /**
@@ -210,12 +213,14 @@ struct xt_mtchk_param {
 	const struct xt_match *match;
 	void *matchinfo;
 	unsigned int hook_mask;
+	u_int8_t family;
 };
 
 /* Match destructor parameters */
 struct xt_mtdtor_param {
 	const struct xt_match *match;
 	void *matchinfo;
+	u_int8_t family;
 };
 
 /**
@@ -232,6 +237,7 @@ struct xt_target_param {
 	unsigned int hooknum;
 	const struct xt_target *target;
 	const void *targinfo;
+	u_int8_t family;
 };
 
 /**
@@ -249,12 +255,14 @@ struct xt_tgchk_param {
 	const struct xt_target *target;
 	void *targinfo;
 	unsigned int hook_mask;
+	u_int8_t family;
 };
 
 /* Target destructor parameters */
 struct xt_tgdtor_param {
 	const struct xt_target *target;
 	void *targinfo;
+	u_int8_t family;
 };
 
 struct xt_match
@@ -393,9 +401,9 @@ extern void xt_unregister_match(struct xt_match *target);
 extern int xt_register_matches(struct xt_match *match, unsigned int n);
 extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
-extern int xt_check_match(struct xt_mtchk_param *, u_int8_t family,
+extern int xt_check_match(struct xt_mtchk_param *,
 			  unsigned int size, u_int8_t proto, bool inv_proto);
-extern int xt_check_target(struct xt_tgchk_param *, u_int8_t family,
+extern int xt_check_target(struct xt_tgchk_param *,
 			   unsigned int size, u_int8_t proto, bool inv_proto);
 
 extern struct xt_table *xt_register_table(struct net *net,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 29d8061fa15..5bb88eb0aad 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -160,6 +160,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
 
+	mtpar.family  = tgpar.family = NFPROTO_BRIDGE;
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
 	mtpar.hotdrop = &hotdrop;
@@ -351,7 +352,7 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 
 	par->match     = match;
 	par->matchinfo = m->data;
-	ret = xt_check_match(par, NFPROTO_BRIDGE, m->match_size,
+	ret = xt_check_match(par, m->match_size,
 	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(match->me);
@@ -386,7 +387,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 
 	par->target   = watcher;
 	par->targinfo = w->data;
-	ret = xt_check_target(par, NFPROTO_BRIDGE, w->watcher_size,
+	ret = xt_check_target(par, w->watcher_size,
 	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(watcher->me);
@@ -572,6 +573,7 @@ ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
 
 	par.match     = m->u.match;
 	par.matchinfo = m->data;
+	par.family    = NFPROTO_BRIDGE;
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 	module_put(par.match->me);
@@ -588,6 +590,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 
 	par.target   = w->u.watcher;
 	par.targinfo = w->data;
+	par.family   = NFPROTO_BRIDGE;
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
@@ -611,6 +614,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 
 	par.target   = t->u.target;
 	par.targinfo = t->data;
+	par.family   = NFPROTO_BRIDGE;
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
@@ -673,6 +677,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
 	mtpar.hook_mask = tgpar.hook_mask = hookmask;
+	mtpar.family    = tgpar.family    = NFPROTO_BRIDGE;
 	ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i);
 	if (ret != 0)
 		goto cleanup_matches;
@@ -715,7 +720,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 
 	tgpar.target   = target;
 	tgpar.targinfo = t->data;
-	ret = xt_check_target(&tgpar, NFPROTO_BRIDGE, t->target_size,
+	ret = xt_check_target(&tgpar, t->target_size,
 	      e->ethproto, e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(target->me);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3bab78330cf..8d70d29f1cc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -246,6 +246,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	tgpar.in      = in;
 	tgpar.out     = out;
 	tgpar.hooknum = hook;
+	tgpar.family  = NFPROTO_ARP;
 
 	arp = arp_hdr(skb);
 	do {
@@ -465,10 +466,10 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 		.target    = t->u.kernel.target,
 		.targinfo  = t->data,
 		.hook_mask = e->comefrom,
+		.family    = NFPROTO_ARP,
 	};
 
-	ret = xt_check_target(&par, NFPROTO_ARP,
-	      t->u.target_size - sizeof(*t), 0, false);
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -566,6 +567,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 	t = arpt_get_target(e);
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
+	par.family   = NFPROTO_ARP;
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50b9a6c34c3..213fb27debc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -348,6 +348,7 @@ ipt_do_table(struct sk_buff *skb,
 	mtpar.hotdrop = &hotdrop;
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
+	mtpar.family  = tgpar.family = NFPROTO_IPV4;
 	tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
@@ -579,6 +580,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
+	par.family    = NFPROTO_IPV4;
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 	module_put(par.match->me);
@@ -616,7 +618,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
 	par->match     = m->u.kernel.match;
 	par->matchinfo = m->data;
 
-	ret = xt_check_match(par, NFPROTO_IPV4, m->u.match_size - sizeof(*m),
+	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
@@ -662,10 +664,11 @@ static int check_target(struct ipt_entry *e, const char *name)
 		.target    = t->u.kernel.target,
 		.targinfo  = t->data,
 		.hook_mask = e->comefrom,
+		.family    = NFPROTO_IPV4,
 	};
 	int ret;
 
-	ret = xt_check_target(&par, NFPROTO_IPV4, t->u.target_size - sizeof(*t),
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
 	      e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
@@ -693,6 +696,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV4;
 	ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
 	if (ret != 0)
 		goto cleanup_matches;
@@ -780,6 +784,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
+	par.family   = NFPROTO_IPV4;
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
@@ -1659,6 +1664,7 @@ compat_check_entry(struct ipt_entry *e, const char *name,
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV4;
 	ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
 	if (ret)
 		goto cleanup_matches;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d934a699463..a33485dc81c 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -370,6 +370,7 @@ ip6t_do_table(struct sk_buff *skb,
 	mtpar.hotdrop = &hotdrop;
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
+	mtpar.family  = tgpar.family = NFPROTO_IPV6;
 	tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
@@ -604,6 +605,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
+	par.family    = NFPROTO_IPV6;
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 	module_put(par.match->me);
@@ -640,7 +642,7 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par,
 	par->match     = m->u.kernel.match;
 	par->matchinfo = m->data;
 
-	ret = xt_check_match(par, NFPROTO_IPV6, m->u.match_size - sizeof(*m),
+	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 			     ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
@@ -686,11 +688,12 @@ static int check_target(struct ip6t_entry *e, const char *name)
 		.target    = t->u.kernel.target,
 		.targinfo  = t->data,
 		.hook_mask = e->comefrom,
+		.family    = NFPROTO_IPV6,
 	};
 	int ret;
 
 	t = ip6t_get_target(e);
-	ret = xt_check_target(&par, NFPROTO_IPV6, t->u.target_size - sizeof(*t),
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
 	      e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
 	if (ret < 0) {
 		duprintf("ip_tables: check failed for `%s'.\n",
@@ -718,6 +721,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV6;
 	ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
 	if (ret != 0)
 		goto cleanup_matches;
@@ -805,6 +809,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
+	par.family   = NFPROTO_IPV6;
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
@@ -1685,6 +1690,7 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name,
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
+	mtpar.family    = NFPROTO_IPV6;
 	ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j);
 	if (ret)
 		goto cleanup_matches;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f29513cd139..89837a4eef7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -321,7 +321,7 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-int xt_check_match(struct xt_mtchk_param *par, u_int8_t family,
+int xt_check_match(struct xt_mtchk_param *par,
 		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->match->matchsize) != size &&
@@ -331,26 +331,27 @@ int xt_check_match(struct xt_mtchk_param *par, u_int8_t family,
 		 * because it uses a dynamic-size data set.
 		 */
 		printk("%s_tables: %s match: invalid size %Zu != %u\n",
-		       xt_prefix[family], par->match->name,
+		       xt_prefix[par->family], par->match->name,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
 	if (par->match->table != NULL &&
 	    strcmp(par->match->table, par->table) != 0) {
 		printk("%s_tables: %s match: only valid in %s table, not %s\n",
-		       xt_prefix[family], par->match->name,
+		       xt_prefix[par->family], par->match->name,
 		       par->match->table, par->table);
 		return -EINVAL;
 	}
 	if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
 		printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
-		       xt_prefix[family], par->match->name,
+		       xt_prefix[par->family], par->match->name,
 		       par->hook_mask, par->match->hooks);
 		return -EINVAL;
 	}
 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
 		printk("%s_tables: %s match: only valid for protocol %u\n",
-		       xt_prefix[family], par->match->name, par->match->proto);
+		       xt_prefix[par->family], par->match->name,
+		       par->match->proto);
 		return -EINVAL;
 	}
 	if (par->match->checkentry != NULL && !par->match->checkentry(par))
@@ -471,31 +472,31 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
 EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 #endif /* CONFIG_COMPAT */
 
-int xt_check_target(struct xt_tgchk_param *par, u_int8_t family,
+int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
 		printk("%s_tables: %s target: invalid size %Zu != %u\n",
-		       xt_prefix[family], par->target->name,
+		       xt_prefix[par->family], par->target->name,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
 	if (par->target->table != NULL &&
 	    strcmp(par->target->table, par->table) != 0) {
 		printk("%s_tables: %s target: only valid in %s table, not %s\n",
-		       xt_prefix[family], par->target->name,
+		       xt_prefix[par->family], par->target->name,
 		       par->target->table, par->table);
 		return -EINVAL;
 	}
 	if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
 		printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
-		       xt_prefix[family], par->target->name, par->hook_mask,
-		       par->target->hooks);
+		       xt_prefix[par->family], par->target->name,
+		       par->hook_mask, par->target->hooks);
 		return -EINVAL;
 	}
 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
 		printk("%s_tables: %s target: only valid for protocol %u\n",
-		       xt_prefix[family], par->target->name,
+		       xt_prefix[par->family], par->target->name,
 		       par->target->proto);
 		return -EINVAL;
 	}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index b951d422db9..0453d79ebf5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -55,9 +55,9 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	par.target    = target;
 	par.targinfo  = t->data;
 	par.hook_mask = hook;
+	par.family    = NFPROTO_IPV4;
 
-	ret = xt_check_target(&par, NFPROTO_IPV4,
-	      t->u.target_size - sizeof(*t), 0, false);
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
 	if (ret < 0) {
 		module_put(t->u.kernel.target->me);
 		return ret;
-- 
cgit v1.2.3-70-g09d2


From 92f3b2b1bc968caaabee8cd78bee75ab7c4af74e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:20 +0200
Subject: netfilter: xtables: cut down on static data for family-independent
 extensions

Using ->family in struct xt_*_param, multiple struct xt_{match,target}
can be squashed together.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_CONNMARK.c    | 39 +++++++----------------------------
 net/netfilter/xt_CONNSECMARK.c | 40 +++++++++++++----------------------
 net/netfilter/xt_NFLOG.c       | 31 ++++++++++------------------
 net/netfilter/xt_connbytes.c   | 39 +++++++++++++----------------------
 net/netfilter/xt_connlimit.c   | 47 ++++++++++++++++--------------------------
 net/netfilter/xt_connmark.c    | 39 +++++++----------------------------
 net/netfilter/xt_conntrack.c   | 26 +++++++----------------
 net/netfilter/xt_helper.c      | 38 +++++++++++++---------------------
 net/netfilter/xt_pkttype.c     | 30 ++++++++++-----------------
 9 files changed, 104 insertions(+), 225 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index c5a5072e005..d6e5ab46327 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -128,9 +128,9 @@ static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -138,9 +138,9 @@ static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
 
 static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->target->family);
+		       "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -148,7 +148,7 @@ static bool connmark_tg_check(const struct xt_tgchk_param *par)
 
 static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -186,7 +186,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 	{
 		.name		= "CONNMARK",
 		.revision	= 0,
-		.family		= NFPROTO_IPV4,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= connmark_tg_check_v0,
 		.destroy	= connmark_tg_destroy,
 		.target		= connmark_tg_v0,
@@ -198,35 +198,10 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
 #endif
 		.me		= THIS_MODULE
 	},
-	{
-		.name		= "CONNMARK",
-		.revision	= 0,
-		.family		= NFPROTO_IPV6,
-		.checkentry	= connmark_tg_check_v0,
-		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg_v0,
-		.targetsize	= sizeof(struct xt_connmark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user_v0,
-		.compat_to_user	= connmark_tg_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "CONNMARK",
-		.revision       = 1,
-		.family         = NFPROTO_IPV4,
-		.checkentry     = connmark_tg_check,
-		.target         = connmark_tg,
-		.targetsize     = sizeof(struct xt_connmark_tginfo1),
-		.destroy        = connmark_tg_destroy,
-		.me             = THIS_MODULE,
-	},
 	{
 		.name           = "CONNMARK",
 		.revision       = 1,
-		.family         = NFPROTO_IPV6,
+		.family         = NFPROTO_UNSPEC,
 		.checkentry     = connmark_tg_check,
 		.target         = connmark_tg,
 		.targetsize     = sizeof(struct xt_connmark_tginfo1),
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index b6e3f3f125f..b54c3756fdc 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -106,9 +106,9 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 		return false;
 	}
 
-	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -116,40 +116,28 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_target connsecmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "CONNSECMARK",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= connsecmark_tg_check,
-		.destroy	= connsecmark_tg_destroy,
-		.target		= connsecmark_tg,
-		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "CONNSECMARK",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= connsecmark_tg_check,
-		.destroy	= connsecmark_tg_destroy,
-		.target		= connsecmark_tg,
-		.targetsize	= sizeof(struct xt_connsecmark_target_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target connsecmark_tg_reg __read_mostly = {
+	.name       = "CONNSECMARK",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connsecmark_tg_check,
+	.destroy    = connsecmark_tg_destroy,
+	.target     = connsecmark_tg,
+	.targetsize = sizeof(struct xt_connsecmark_target_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init connsecmark_tg_init(void)
 {
-	return xt_register_targets(connsecmark_tg_reg,
-	       ARRAY_SIZE(connsecmark_tg_reg));
+	return xt_register_target(&connsecmark_tg_reg);
 }
 
 static void __exit connsecmark_tg_exit(void)
 {
-	xt_unregister_targets(connsecmark_tg_reg,
-	                      ARRAY_SIZE(connsecmark_tg_reg));
+	xt_unregister_target(&connsecmark_tg_reg);
 }
 
 module_init(connsecmark_tg_init);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 56ee4f118b5..50e3a52d3b3 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -31,7 +31,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(par->target->family, par->hooknum, skb, par->in,
+	nf_log_packet(par->family, par->hooknum, skb, par->in,
 	              par->out, &li, "%s", info->prefix);
 	return XT_CONTINUE;
 }
@@ -47,33 +47,24 @@ static bool nflog_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static struct xt_target nflog_tg_reg[] __read_mostly = {
-	{
-		.name		= "NFLOG",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= nflog_tg_check,
-		.target		= nflog_tg,
-		.targetsize	= sizeof(struct xt_nflog_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "NFLOG",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= nflog_tg_check,
-		.target		= nflog_tg,
-		.targetsize	= sizeof(struct xt_nflog_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_target nflog_tg_reg __read_mostly = {
+	.name       = "NFLOG",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = nflog_tg_check,
+	.target     = nflog_tg,
+	.targetsize = sizeof(struct xt_nflog_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init nflog_tg_init(void)
 {
-	return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+	return xt_register_target(&nflog_tg_reg);
 }
 
 static void __exit nflog_tg_exit(void)
 {
-	xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
+	xt_unregister_target(&nflog_tg_reg);
 }
 
 module_init(nflog_tg_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5bf4aa08b0f..955e6598a7f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -106,9 +106,9 @@ static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return false;
 
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 
@@ -117,39 +117,28 @@ static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 
 static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match connbytes_mt_reg[] __read_mostly = {
-	{
-		.name		= "connbytes",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= connbytes_mt_check,
-		.match		= connbytes_mt,
-		.destroy	= connbytes_mt_destroy,
-		.matchsize	= sizeof(struct xt_connbytes_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "connbytes",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= connbytes_mt_check,
-		.match		= connbytes_mt,
-		.destroy	= connbytes_mt_destroy,
-		.matchsize	= sizeof(struct xt_connbytes_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match connbytes_mt_reg __read_mostly = {
+	.name       = "connbytes",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connbytes_mt_check,
+	.match      = connbytes_mt,
+	.destroy    = connbytes_mt_destroy,
+	.matchsize  = sizeof(struct xt_connbytes_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init connbytes_mt_init(void)
 {
-	return xt_register_matches(connbytes_mt_reg,
-	       ARRAY_SIZE(connbytes_mt_reg));
+	return xt_register_match(&connbytes_mt_reg);
 }
 
 static void __exit connbytes_mt_exit(void)
 {
-	xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg));
+	xt_unregister_match(&connbytes_mt_reg);
 }
 
 module_init(connbytes_mt_init);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index bfb3ee6c712..7f404cc64c8 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -192,10 +192,10 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (ct != NULL)
 		tuple_ptr = &ct->tuplehash[0].tuple;
 	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    par->match->family, &tuple))
+				    par->family, &tuple))
 		goto hotdrop;
 
-	if (par->match->family == NFPROTO_IPV6) {
+	if (par->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
 	} else {
@@ -226,16 +226,16 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
 
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "address family %u\n", par->match->family);
+		       "address family %u\n", par->family);
 		return false;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
-		nf_ct_l3proto_module_put(par->match->family);
+		nf_ct_l3proto_module_put(par->family);
 		return false;
 	}
 
@@ -254,7 +254,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	struct list_head *hash = info->data->iphash;
 	unsigned int i;
 
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
 		list_for_each_entry_safe(conn, tmp, &hash[i], list) {
@@ -266,41 +266,30 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	kfree(info->data);
 }
 
-static struct xt_match connlimit_mt_reg[] __read_mostly = {
-	{
-		.name       = "connlimit",
-		.family     = NFPROTO_IPV4,
-		.checkentry = connlimit_mt_check,
-		.match      = connlimit_mt,
-		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_mt_destroy,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "connlimit",
-		.family     = NFPROTO_IPV6,
-		.checkentry = connlimit_mt_check,
-		.match      = connlimit_mt,
-		.matchsize  = sizeof(struct xt_connlimit_info),
-		.destroy    = connlimit_mt_destroy,
-		.me         = THIS_MODULE,
-	},
+static struct xt_match connlimit_mt_reg __read_mostly = {
+	.name       = "connlimit",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = connlimit_mt_check,
+	.match      = connlimit_mt,
+	.matchsize  = sizeof(struct xt_connlimit_info),
+	.destroy    = connlimit_mt_destroy,
+	.me         = THIS_MODULE,
 };
 
 static int __init connlimit_mt_init(void)
 {
-	return xt_register_matches(connlimit_mt_reg,
-	       ARRAY_SIZE(connlimit_mt_reg));
+	return xt_register_match(&connlimit_mt_reg);
 }
 
 static void __exit connlimit_mt_exit(void)
 {
-	xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
+	xt_unregister_match(&connlimit_mt_reg);
 }
 
 module_init(connlimit_mt_init);
 module_exit(connlimit_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: Number of connections matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index c708577ea1b..86cacab7a4a 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -69,9 +69,9 @@ static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
 		printk(KERN_WARNING "connmark: only support 32bit mark\n");
 		return false;
 	}
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -79,9 +79,9 @@ static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
 
 static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->match->family);
+		       "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -89,7 +89,7 @@ static bool connmark_mt_check(const struct xt_mtchk_param *par)
 
 static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -127,7 +127,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 	{
 		.name		= "connmark",
 		.revision	= 0,
-		.family		= NFPROTO_IPV4,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= connmark_mt_check_v0,
 		.match		= connmark_mt_v0,
 		.destroy	= connmark_mt_destroy,
@@ -139,35 +139,10 @@ static struct xt_match connmark_mt_reg[] __read_mostly = {
 #endif
 		.me		= THIS_MODULE
 	},
-	{
-		.name		= "connmark",
-		.revision	= 0,
-		.family		= NFPROTO_IPV6,
-		.checkentry	= connmark_mt_check_v0,
-		.match		= connmark_mt_v0,
-		.destroy	= connmark_mt_destroy,
-		.matchsize	= sizeof(struct xt_connmark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = connmark_mt_compat_from_user_v0,
-		.compat_to_user	= connmark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "connmark",
-		.revision       = 1,
-		.family         = NFPROTO_IPV4,
-		.checkentry     = connmark_mt_check,
-		.match          = connmark_mt,
-		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
-		.destroy        = connmark_mt_destroy,
-		.me             = THIS_MODULE,
-	},
 	{
 		.name           = "connmark",
 		.revision       = 1,
-		.family         = NFPROTO_IPV6,
+		.family         = NFPROTO_UNSPEC,
 		.checkentry     = connmark_mt_check,
 		.match          = connmark_mt,
 		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 5cd58d7fcb1..0b7139f3dd7 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -238,22 +238,22 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-		if (conntrack_mt_origsrc(ct, info, par->match->family) ^
+		if (conntrack_mt_origsrc(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGDST)
-		if (conntrack_mt_origdst(ct, info, par->match->family) ^
+		if (conntrack_mt_origdst(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLSRC)
-		if (conntrack_mt_replsrc(ct, info, par->match->family) ^
+		if (conntrack_mt_replsrc(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
 			return false;
 
 	if (info->match_flags & XT_CONNTRACK_REPLDST)
-		if (conntrack_mt_repldst(ct, info, par->match->family) ^
+		if (conntrack_mt_repldst(ct, info, par->family) ^
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
@@ -280,9 +280,9 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -290,7 +290,7 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
@@ -361,17 +361,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 	{
 		.name       = "conntrack",
 		.revision   = 1,
-		.family     = NFPROTO_IPV4,
-		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
-		.match      = conntrack_mt,
-		.checkentry = conntrack_mt_check,
-		.destroy    = conntrack_mt_destroy,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "conntrack",
-		.revision   = 1,
-		.family     = NFPROTO_IPV6,
+		.family     = NFPROTO_UNSPEC,
 		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
 		.match      = conntrack_mt,
 		.checkentry = conntrack_mt_check,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 280c984349f..64fc7f27722 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -58,9 +58,9 @@ static bool helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
 
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	info->name[29] = '\0';
@@ -69,38 +69,28 @@ static bool helper_mt_check(const struct xt_mtchk_param *par)
 
 static void helper_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match helper_mt_reg[] __read_mostly = {
-	{
-		.name		= "helper",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= helper_mt_check,
-		.match		= helper_mt,
-		.destroy	= helper_mt_destroy,
-		.matchsize	= sizeof(struct xt_helper_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "helper",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= helper_mt_check,
-		.match		= helper_mt,
-		.destroy	= helper_mt_destroy,
-		.matchsize	= sizeof(struct xt_helper_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match helper_mt_reg __read_mostly = {
+	.name       = "helper",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = helper_mt_check,
+	.match      = helper_mt,
+	.destroy    = helper_mt_destroy,
+	.matchsize  = sizeof(struct xt_helper_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init helper_mt_init(void)
 {
-	return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
+	return xt_register_match(&helper_mt_reg);
 }
 
 static void __exit helper_mt_exit(void)
 {
-	xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
+	xt_unregister_match(&helper_mt_reg);
 }
 
 module_init(helper_mt_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 37753a37760..69da1d3a1d8 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 	if (skb->pkt_type != PACKET_LOOPBACK)
 		type = skb->pkt_type;
-	else if (par->match->family == NFPROTO_IPV4 &&
+	else if (par->family == NFPROTO_IPV4 &&
 	    ipv4_is_multicast(ip_hdr(skb)->daddr))
 		type = PACKET_MULTICAST;
-	else if (par->match->family == NFPROTO_IPV6 &&
+	else if (par->family == NFPROTO_IPV6 &&
 	    ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
 		type = PACKET_MULTICAST;
 	else
@@ -42,31 +42,23 @@ pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match pkttype_mt_reg[] __read_mostly = {
-	{
-		.name		= "pkttype",
-		.family		= NFPROTO_IPV4,
-		.match		= pkttype_mt,
-		.matchsize	= sizeof(struct xt_pkttype_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "pkttype",
-		.family		= NFPROTO_IPV6,
-		.match		= pkttype_mt,
-		.matchsize	= sizeof(struct xt_pkttype_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match pkttype_mt_reg __read_mostly = {
+	.name      = "pkttype",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = pkttype_mt,
+	.matchsize = sizeof(struct xt_pkttype_info),
+	.me        = THIS_MODULE,
 };
 
 static int __init pkttype_mt_init(void)
 {
-	return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
+	return xt_register_match(&pkttype_mt_reg);
 }
 
 static void __exit pkttype_mt_exit(void)
 {
-	xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
+	xt_unregister_match(&pkttype_mt_reg);
 }
 
 module_init(pkttype_mt_init);
-- 
cgit v1.2.3-70-g09d2


From ab4f21e6fb1c09b13c4c3cb8357babe8223471bd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:20 +0200
Subject: netfilter: xtables: use NFPROTO_UNSPEC in more extensions

Lots of extensions are completely family-independent, so squash some code.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_MARK.c    | 34 ++--------------------------------
 net/netfilter/xt_NOTRACK.c | 26 +++++++++-----------------
 net/netfilter/xt_comment.c | 26 +++++++++-----------------
 net/netfilter/xt_mac.c     | 34 +++++++++++-----------------------
 net/netfilter/xt_owner.c   | 12 +-----------
 net/netfilter/xt_physdev.c | 29 ++++++++++-------------------
 net/netfilter/xt_realm.c   |  2 +-
 7 files changed, 43 insertions(+), 120 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 123ee0ba78c..67574bcfb8a 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -149,7 +149,7 @@ static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
 static struct xt_target mark_tg_reg[] __read_mostly = {
 	{
 		.name		= "MARK",
-		.family		= NFPROTO_IPV4,
+		.family		= NFPROTO_UNSPEC,
 		.revision	= 0,
 		.checkentry	= mark_tg_check_v0,
 		.target		= mark_tg_v0,
@@ -164,37 +164,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "MARK",
-		.family		= NFPROTO_IPV4,
-		.revision	= 1,
-		.checkentry	= mark_tg_check_v1,
-		.target		= mark_tg_v1,
-		.targetsize	= sizeof(struct xt_mark_target_info_v1),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = mark_tg_compat_from_user_v1,
-		.compat_to_user	= mark_tg_compat_to_user_v1,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= NFPROTO_IPV6,
-		.revision	= 0,
-		.checkentry	= mark_tg_check_v0,
-		.target		= mark_tg_v0,
-		.targetsize	= sizeof(struct xt_mark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info),
-		.compat_from_user = mark_tg_compat_from_user_v0,
-		.compat_to_user	= mark_tg_compat_to_user_v0,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= NFPROTO_IPV6,
+		.family		= NFPROTO_UNSPEC,
 		.revision	= 1,
 		.checkentry	= mark_tg_check_v1,
 		.target		= mark_tg_v1,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index cc50295cd11..e7a0a54fd4e 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -30,31 +30,23 @@ notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static struct xt_target notrack_tg_reg[] __read_mostly = {
-	{
-		.name		= "NOTRACK",
-		.family		= NFPROTO_IPV4,
-		.target		= notrack_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "NOTRACK",
-		.family		= NFPROTO_IPV6,
-		.target		= notrack_tg,
-		.table		= "raw",
-		.me		= THIS_MODULE,
-	},
+static struct xt_target notrack_tg_reg __read_mostly = {
+	.name     = "NOTRACK",
+	.revision = 0,
+	.family   = NFPROTO_UNSPEC,
+	.target   = notrack_tg,
+	.table    = "raw",
+	.me       = THIS_MODULE,
 };
 
 static int __init notrack_tg_init(void)
 {
-	return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
+	return xt_register_target(&notrack_tg_reg);
 }
 
 static void __exit notrack_tg_exit(void)
 {
-	xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
+	xt_unregister_target(&notrack_tg_reg);
 }
 
 module_init(notrack_tg_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index bd7aa57af42..e82179832ac 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -22,31 +22,23 @@ comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static struct xt_match comment_mt_reg[] __read_mostly = {
-	{
-		.name		= "comment",
-		.family		= NFPROTO_IPV4,
-		.match		= comment_mt,
-		.matchsize	= sizeof(struct xt_comment_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "comment",
-		.family		= NFPROTO_IPV6,
-		.match		= comment_mt,
-		.matchsize	= sizeof(struct xt_comment_info),
-		.me		= THIS_MODULE
-	},
+static struct xt_match comment_mt_reg __read_mostly = {
+	.name      = "comment",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = comment_mt,
+	.matchsize = sizeof(struct xt_comment_info),
+	.me        = THIS_MODULE,
 };
 
 static int __init comment_mt_init(void)
 {
-	return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
+	return xt_register_match(&comment_mt_reg);
 }
 
 static void __exit comment_mt_exit(void)
 {
-	xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
+	xt_unregister_match(&comment_mt_reg);
 }
 
 module_init(comment_mt_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 269f9d8aef5..c2007116ce5 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -36,37 +36,25 @@ static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		^ info->invert);
 }
 
-static struct xt_match mac_mt_reg[] __read_mostly = {
-	{
-		.name		= "mac",
-		.family		= NFPROTO_IPV4,
-		.match		= mac_mt,
-		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_INET_PRE_ROUTING) |
-				  (1 << NF_INET_LOCAL_IN) |
-				  (1 << NF_INET_FORWARD),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "mac",
-		.family		= NFPROTO_IPV6,
-		.match		= mac_mt,
-		.matchsize	= sizeof(struct xt_mac_info),
-		.hooks		= (1 << NF_INET_PRE_ROUTING) |
-				  (1 << NF_INET_LOCAL_IN) |
-				  (1 << NF_INET_FORWARD),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match mac_mt_reg __read_mostly = {
+	.name      = "mac",
+	.revision  = 0,
+	.family    = NFPROTO_UNSPEC,
+	.match     = mac_mt,
+	.matchsize = sizeof(struct xt_mac_info),
+	.hooks     = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+	             (1 << NF_INET_FORWARD),
+	.me        = THIS_MODULE,
 };
 
 static int __init mac_mt_init(void)
 {
-	return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
+	return xt_register_match(&mac_mt_reg);
 }
 
 static void __exit mac_mt_exit(void)
 {
-	xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
+	xt_unregister_match(&mac_mt_reg);
 }
 
 module_init(mac_mt_init);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 32f84e84d9e..f19ebd9b78f 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -160,17 +160,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = {
 	{
 		.name       = "owner",
 		.revision   = 1,
-		.family     = NFPROTO_IPV4,
-		.match      = owner_mt,
-		.matchsize  = sizeof(struct xt_owner_match_info),
-		.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		              (1 << NF_INET_POST_ROUTING),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "owner",
-		.revision   = 1,
-		.family     = NFPROTO_IPV6,
+		.family     = NFPROTO_UNSPEC,
 		.match      = owner_mt,
 		.matchsize  = sizeof(struct xt_owner_match_info),
 		.hooks      = (1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index b01786d2dd9..1bcdfc12cf5 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -112,33 +112,24 @@ static bool physdev_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static struct xt_match physdev_mt_reg[] __read_mostly = {
-	{
-		.name		= "physdev",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= physdev_mt_check,
-		.match		= physdev_mt,
-		.matchsize	= sizeof(struct xt_physdev_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "physdev",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= physdev_mt_check,
-		.match		= physdev_mt,
-		.matchsize	= sizeof(struct xt_physdev_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match physdev_mt_reg __read_mostly = {
+	.name       = "physdev",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = physdev_mt_check,
+	.match      = physdev_mt,
+	.matchsize  = sizeof(struct xt_physdev_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init physdev_mt_init(void)
 {
-	return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
+	return xt_register_match(&physdev_mt_reg);
 }
 
 static void __exit physdev_mt_exit(void)
 {
-	xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
+	xt_unregister_match(&physdev_mt_reg);
 }
 
 module_init(physdev_mt_init);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index b25942110ed..67419287bc7 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -36,7 +36,7 @@ static struct xt_match realm_mt_reg __read_mostly = {
 	.matchsize	= sizeof(struct xt_realm_info),
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
 			  (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
-	.family		= NFPROTO_IPV4,
+	.family		= NFPROTO_UNSPEC,
 	.me		= THIS_MODULE
 };
 
-- 
cgit v1.2.3-70-g09d2


From f39a9410ed0503278fd5edc559fa019051413039 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 8 Oct 2008 11:35:20 +0200
Subject: netfilter: xtables: remove bogus mangle table dependency of connmark

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 899e78051d8..f70b4145ffc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -318,7 +318,6 @@ config NETFILTER_XT_TARGET_CLASSIFY
 
 config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
-	depends on IP_NF_MANGLE || IP6_NF_MANGLE
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
-- 
cgit v1.2.3-70-g09d2


From 0b0588d42b2774734b51525fe6550d77f8ea9bc0 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:31:18 -0700
Subject: ipv6: local dev is actually unused in ip6_fragment

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3df2c442d90..f22393e1166 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,7 +613,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
-	struct net_device *dev;
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
@@ -624,7 +623,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	int ptr, offset = 0, err=0;
 	u8 *prevhdr, nexthdr = 0;
 
-	dev = rt->u.dst.dev;
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 
-- 
cgit v1.2.3-70-g09d2


From 98b3377ca77a06a7bd75a444e9f7136e9bb5112e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:31:44 -0700
Subject: ipv6: consolidate error paths in ipv6_frag_rcv

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2eeadfa039c..f4f62f08609 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -578,19 +578,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
-	if (hdr->payload_len==0) {
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-				  skb_network_header_len(skb));
-		return -1;
-	}
+	if (hdr->payload_len==0)
+		goto fail_hdr;
+
 	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 sizeof(struct frag_hdr)))) {
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-				  skb_network_header_len(skb));
-		return -1;
-	}
+				 sizeof(struct frag_hdr))))
+		goto fail_hdr;
 
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
@@ -624,6 +617,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
+
+fail_hdr:
+	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
+	return -1;
 }
 
 static struct inet6_protocol frag_protocol =
-- 
cgit v1.2.3-70-g09d2


From 3bd653c8455bc7991bae77968702b31c8f5df883 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:54:51 -0700
Subject: netns: add net parameter to IP6_INC_STATS

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    |  3 ++-
 net/ipv6/icmp.c       |  2 +-
 net/ipv6/ip6_output.c | 56 +++++++++++++++++++++++++++++++--------------------
 net/ipv6/mcast.c      | 10 ++++-----
 net/ipv6/ndisc.c      |  4 ++--
 net/ipv6/netfilter.c  |  3 ++-
 net/ipv6/raw.c        |  4 ++--
 net/ipv6/reassembly.c |  8 ++++----
 net/ipv6/route.c      |  7 +++++--
 9 files changed, 57 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index dfa7ae3c560..26c17988b90 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -129,7 +129,8 @@ extern struct ctl_path net_ipv6_ctl_path[];
 /* MIBs */
 DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 
-#define IP6_INC_STATS(idev,field)	_DEVINC(ipv6, , idev, field)
+#define IP6_INC_STATS(net, idev,field)	({ (void)(net); \
+		_DEVINC(ipv6, , idev, field); })
 #define IP6_INC_STATS_BH(idev,field)	_DEVINC(ipv6, _BH, idev, field)
 #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b3157a0cc15..758cdd7dc27 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -183,7 +183,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
 	 */
 	dst = ip6_route_output(net, sk, fl);
 	if (dst->error) {
-		IP6_INC_STATS(ip6_dst_idev(dst),
+		IP6_INC_STATS(net, ip6_dst_idev(dst),
 			      IPSTATS_MIB_OUTNOROUTES);
 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 		res = 1;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f22393e1166..db28c208f32 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -150,13 +150,14 @@ static int ip6_output2(struct sk_buff *skb)
 					ip6_dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
-				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+				IP6_INC_STATS(dev_net(dev), idev,
+					      IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return 0;
 			}
 		}
 
-		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
 	}
 
 	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
@@ -175,7 +176,8 @@ int ip6_output(struct sk_buff *skb)
 {
 	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS(dev_net(skb->dst->dev), idev,
+			      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
@@ -194,6 +196,7 @@ int ip6_output(struct sk_buff *skb)
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	     struct ipv6_txoptions *opt, int ipfragok)
 {
+	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *first_hop = &fl->fl6_dst;
 	struct dst_entry *dst = skb->dst;
@@ -216,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		if (skb_headroom(skb) < head_room) {
 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 			if (skb2 == NULL) {
-				IP6_INC_STATS(ip6_dst_idev(skb->dst),
+				IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 					      IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return -ENOBUFS;
@@ -270,7 +273,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 
 	mtu = dst_mtu(dst);
 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
-		IP6_INC_STATS(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 			      IPSTATS_MIB_OUTREQUESTS);
 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 				dst_output);
@@ -280,7 +283,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
 }
@@ -422,7 +425,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
-		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -468,13 +471,14 @@ int ip6_forward(struct sk_buff *skb)
 		if (proxied > 0)
 			return ip6_input(skb);
 		else if (proxied < 0) {
-			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+			IP6_INC_STATS(net, ip6_dst_idev(dst),
+				      IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}
 	}
 
 	if (!xfrm6_route_forward(skb)) {
-		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 	dst = skb->dst;
@@ -530,7 +534,7 @@ int ip6_forward(struct sk_buff *skb)
 	}
 
 	if (skb_cow(skb, dst->dev->hard_header_len)) {
-		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 		goto drop;
 	}
 
@@ -622,6 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	__be32 frag_id = 0;
 	int ptr, offset = 0, err=0;
 	u8 *prevhdr, nexthdr = 0;
+	struct net *net = dev_net(skb->dst->dev);
 
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
@@ -635,7 +640,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	if (!skb->local_df) {
 		skb->dev = skb->dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+			      IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
@@ -684,7 +690,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		*prevhdr = NEXTHDR_FRAGMENT;
 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
-			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+				      IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
 		}
 
@@ -735,7 +742,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			err = output(skb);
 			if(!err)
-				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+					      IPSTATS_MIB_FRAGCREATES);
 
 			if (err || !frag)
 				break;
@@ -748,7 +756,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+				      IPSTATS_MIB_FRAGOKS);
 			dst_release(&rt->u.dst);
 			return 0;
 		}
@@ -759,7 +768,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+			      IPSTATS_MIB_FRAGFAILS);
 		dst_release(&rt->u.dst);
 		return err;
 	}
@@ -793,7 +803,7 @@ slow_path:
 
 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
-			IP6_INC_STATS(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 				      IPSTATS_MIB_FRAGFAILS);
 			err = -ENOMEM;
 			goto fail;
@@ -857,15 +867,16 @@ slow_path:
 		if (err)
 			goto fail;
 
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
+		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+			      IPSTATS_MIB_FRAGCREATES);
 	}
-	IP6_INC_STATS(ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 		      IPSTATS_MIB_FRAGOKS);
 	kfree_skb(skb);
 	return err;
 
 fail:
-	IP6_INC_STATS(ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 		      IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return err;
@@ -1385,7 +1396,7 @@ alloc_new_skb:
 	return 0;
 error:
 	inet->cork.length -= length;
-	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
@@ -1409,6 +1420,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
 	struct ipv6_txoptions *opt = np->cork.opt;
 	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
@@ -1462,7 +1474,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->mark = sk->sk_mark;
 
 	skb->dst = dst_clone(&rt->u.dst);
-	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
@@ -1491,7 +1503,7 @@ void ip6_flush_pending_frames(struct sock *sk)
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
 		if (skb->dst)
-			IP6_INC_STATS(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
 				      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 	}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e7c03bcc278..a1d588d0b5d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1446,7 +1446,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	int err;
 	struct flowi fl;
 
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
 	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
 	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
@@ -1771,7 +1771,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	struct flowi fl;
 
 	rcu_read_lock();
-	IP6_INC_STATS(__in6_dev_get(dev),
+	IP6_INC_STATS(net, __in6_dev_get(dev),
 		      IPSTATS_MIB_OUTREQUESTS);
 	rcu_read_unlock();
 	if (type == ICMPV6_MGM_REDUCTION)
@@ -1787,7 +1787,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	if (skb == NULL) {
 		rcu_read_lock();
-		IP6_INC_STATS(__in6_dev_get(dev),
+		IP6_INC_STATS(net, __in6_dev_get(dev),
 			      IPSTATS_MIB_OUTDISCARDS);
 		rcu_read_unlock();
 		return;
@@ -1841,9 +1841,9 @@ out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, type);
 		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
-		IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
 	if (likely(idev != NULL))
 		in6_dev_put(idev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f1c62ba0f56..ce5b61763fd 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -516,7 +516,7 @@ static void __ndisc_send(struct net_device *dev,
 	skb->dst = dst;
 
 	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
 
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 		      dst_output);
@@ -1581,7 +1581,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	buff->dst = dst;
 	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 4cb4844a322..6b29b03925f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -33,7 +33,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
 #endif
 
 	if (dst->error) {
-		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+		IP6_INC_STATS(&init_net, ip6_dst_idev(dst),
+			      IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 		dst_release(dst);
 		return -EINVAL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e53e493606c..2ba04d41dc2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -638,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	if (err)
 		goto error_fault;
 
-	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 		      dst_output);
 	if (err > 0)
@@ -652,7 +652,7 @@ error_fault:
 	err = -EFAULT;
 	kfree_skb(skb);
 error:
-	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f4f62f08609..63644075f2d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -433,7 +433,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	return -1;
 
 err:
-	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
+		      IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
 }
@@ -573,7 +574,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
-	struct net *net;
+	struct net *net = dev_net(skb->dst->dev);
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
@@ -597,7 +598,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	net = dev_net(skb->dev);
 	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
 		ip6_evictor(net, ip6_dst_idev(skb->dst));
 
@@ -619,7 +619,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	return -1;
 
 fail_hdr:
-	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 	return -1;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e10a1701550..89dc6992434 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1833,16 +1833,19 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
 {
 	int type;
+	struct dst_entry *dst = skb->dst;
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
-			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
+			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+				      IPSTATS_MIB_INADDRERRORS);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IPSTATS_MIB_OUTNOROUTES:
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
+		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+			      ipstats_mib_noroutes);
 		break;
 	}
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
-- 
cgit v1.2.3-70-g09d2


From 483a47d2fe794328d29950fe00ce26dd405d9437 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 11:09:27 -0700
Subject: ipv6: added net argument to IP6_INC_STATS_BH

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    |  3 ++-
 net/ipv6/exthdrs.c    | 42 ++++++++++++++++++++++++------------------
 net/ipv6/ip6_input.c  | 27 ++++++++++++++++-----------
 net/ipv6/ip6_output.c | 18 +++++++++++-------
 net/ipv6/ip6mr.c      |  3 ++-
 net/ipv6/mcast.c      |  4 ++--
 net/ipv6/reassembly.c | 26 +++++++++++++++-----------
 7 files changed, 72 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 26c17988b90..e7732d3a3f9 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -131,7 +131,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 
 #define IP6_INC_STATS(net, idev,field)	({ (void)(net); \
 		_DEVINC(ipv6, , idev, field); })
-#define IP6_INC_STATS_BH(idev,field)	_DEVINC(ipv6, _BH, idev, field)
+#define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \
+		_DEVINC(ipv6, _BH, idev, field); })
 #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
 
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 837c830d6d8..6bfffec2371 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -277,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -301,7 +301,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 	dst_release(dst);
 	return -1;
 }
@@ -319,7 +320,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	int n, i;
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
-	int accept_source_route = dev_net(skb->dev)->ipv6.devconf_all->accept_source_route;
+	struct net *net = dev_net(skb->dev);
+	int accept_source_route = net->ipv6.devconf_all->accept_source_route;
 
 	idev = in6_dev_get(skb->dev);
 	if (idev) {
@@ -331,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -341,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -356,7 +358,7 @@ looped_back:
 			 * processed by own
 			 */
 			if (!addr) {
-				IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+				IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 						 IPSTATS_MIB_INADDRERRORS);
 				kfree_skb(skb);
 				return -1;
@@ -382,7 +384,7 @@ looped_back:
 			goto unknown_rh;
 		/* Silently discard invalid RTH type 2 */
 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
 			kfree_skb(skb);
 			return -1;
@@ -401,7 +403,7 @@ looped_back:
 	n = hdr->hdrlen >> 1;
 
 	if (hdr->segments_left > n) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((&hdr->segments_left) -
@@ -415,7 +417,7 @@ looped_back:
 	if (skb_cloned(skb)) {
 		/* the copy is a forwarded packet */
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_OUTDISCARDS);
 			kfree_skb(skb);
 			return -1;
@@ -438,13 +440,13 @@ looped_back:
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 				     IPPROTO_ROUTING) < 0) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
 		}
 		if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
@@ -456,7 +458,7 @@ looped_back:
 	}
 
 	if (ipv6_addr_is_multicast(addr)) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -476,7 +478,7 @@ looped_back:
 
 	if (skb->dst->dev->flags&IFF_LOOPBACK) {
 		if (ipv6_hdr(skb)->hop_limit <= 1) {
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 				    0, skb->dev);
@@ -492,7 +494,7 @@ looped_back:
 	return -1;
 
 unknown_rh:
-	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 			  (&hdr->type) - skb_network_header(skb));
 	return -1;
@@ -579,29 +581,33 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
 	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
+	struct net *net = dev_net(skb->dst->dev);
 
 	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
 			       nh[optoff+1]);
-		IP6_INC_STATS_BH(ipv6_skb_idev(skb),
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
 				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
 	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
-		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
 		return 0;
 	}
 	if (ipv6_hdr(skb)->payload_len) {
-		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
 		return 0;
 	}
 
 	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
-		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS);
+		IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+				 IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	}
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 7e14cccd056..936f48946e2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -59,6 +59,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	struct ipv6hdr *hdr;
 	u32 		pkt_len;
 	struct inet6_dev *idev;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type == PACKET_OTHERHOST) {
 		kfree_skb(skb);
@@ -69,11 +70,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	idev = __in6_dev_get(skb->dev);
 
-	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
 	    !idev || unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
+		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
 		rcu_read_unlock();
 		goto out;
 	}
@@ -118,11 +119,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* pkt_len may be zero if Jumbo payload option is present */
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
 		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
-			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+			IP6_INC_STATS_BH(net,
+					 idev, IPSTATS_MIB_INTRUNCATEDPKTS);
 			goto drop;
 		}
 		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
-			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 			goto drop;
 		}
 		hdr = ipv6_hdr(skb);
@@ -130,7 +132,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
 		if (ipv6_parse_hopopts(skb) < 0) {
-			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 			rcu_read_unlock();
 			return 0;
 		}
@@ -141,7 +143,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip6_rcv_finish);
 err:
-	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
 	rcu_read_unlock();
 	kfree_skb(skb);
@@ -161,6 +163,7 @@ static int ip6_input_finish(struct sk_buff *skb)
 	int nexthdr, raw;
 	u8 hash;
 	struct inet6_dev *idev;
+	struct net *net = dev_net(skb->dst->dev);
 
 	/*
 	 *	Parse extension headers
@@ -205,24 +208,25 @@ resubmit:
 		if (ret > 0)
 			goto resubmit;
 		else if (ret == 0)
-			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
 	} else {
 		if (!raw) {
 			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-				IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
+				IP6_INC_STATS_BH(net, idev,
+						 IPSTATS_MIB_INUNKNOWNPROTOS);
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
 					    ICMPV6_UNK_NEXTHDR, nhoff,
 					    skb->dev);
 			}
 		} else
-			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
+			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
 		kfree_skb(skb);
 	}
 	rcu_read_unlock();
 	return 0;
 
 discard:
-	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
 	rcu_read_unlock();
 	kfree_skb(skb);
 	return 0;
@@ -240,7 +244,8 @@ int ip6_mc_input(struct sk_buff *skb)
 	struct ipv6hdr *hdr;
 	int deliver;
 
-	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
+	IP6_INC_STATS_BH(dev_net(skb->dst->dev),
+			 ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
 
 	hdr = ipv6_hdr(skb);
 	deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index db28c208f32..f0fded630f5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -103,7 +103,8 @@ static int ip6_output_finish(struct sk_buff *skb)
 	else if (dst->neighbour)
 		return dst->neighbour->output(skb);
 
-	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EINVAL;
 
@@ -458,7 +459,8 @@ int ip6_forward(struct sk_buff *skb)
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 			    0, skb->dev);
-		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+		IP6_INC_STATS_BH(net,
+				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 
 		kfree_skb(skb);
 		return -ETIMEDOUT;
@@ -527,8 +529,10 @@ int ip6_forward(struct sk_buff *skb)
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
-		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
-		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
+		IP6_INC_STATS_BH(net,
+				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
+		IP6_INC_STATS_BH(net,
+				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
@@ -544,12 +548,12 @@ int ip6_forward(struct sk_buff *skb)
 
 	hdr->hop_limit--;
 
-	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
 error:
-	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 drop:
 	kfree_skb(skb);
 	return -EINVAL;
@@ -991,7 +995,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 
 out_err_release:
 	if (err == -ENETUNREACH)
-		IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
+		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 	dst_release(*dst);
 	*dst = NULL;
 	return err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 095bc453ff4..182f8a177e7 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1383,7 +1383,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 
 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 {
-	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
+			 IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return dst_output(skb);
 }
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a1d588d0b5d..88811ebbe4b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1476,9 +1476,9 @@ out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
 	if (likely(idev != NULL))
 		in6_dev_put(idev);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 63644075f2d..693d20836b3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -213,8 +213,8 @@ static void ip6_frag_expire(unsigned long data)
 		goto out;
 
 	rcu_read_lock();
-	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
-	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
 	rcu_read_unlock();
 
 	/* Don't send error if the first segment did not arrive. */
@@ -257,7 +257,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	return container_of(q, struct frag_queue, q);
 
 oom:
-	IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS);
 	return NULL;
 }
 
@@ -267,6 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
 	int offset, end;
+	struct net *net = dev_net(skb->dst->dev);
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
@@ -276,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((u8 *)&fhdr->frag_off -
@@ -309,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 					  offsetof(struct ipv6hdr, payload_len));
@@ -433,7 +434,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	return -1;
 
 err:
-	IP6_INC_STATS(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
 		      IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
@@ -550,7 +551,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 					  head->csum);
 
 	rcu_read_lock();
-	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+	IP6_INC_STATS_BH(dev_net(dev),
+			 __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
 	return 1;
@@ -564,7 +566,8 @@ out_oom:
 		printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
 out_fail:
 	rcu_read_lock();
-	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(dev_net(dev),
+			 __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
 	rcu_read_unlock();
 	return -1;
 }
@@ -576,7 +579,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct net *net = dev_net(skb->dst->dev);
 
-	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
 	if (hdr->payload_len==0)
@@ -592,7 +595,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
 		skb->transport_header += sizeof(struct frag_hdr);
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
+		IP6_INC_STATS_BH(net,
+				 ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		return 1;
@@ -614,7 +618,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return ret;
 	}
 
-	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
 
-- 
cgit v1.2.3-70-g09d2


From 821d57776d4dda47ef5f0c33fdb3c761214b2f9f Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:32:43 -0700
Subject: ipv6: added net argument to IP6_ADD_STATS_BH

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    | 3 ++-
 net/ipv6/reassembly.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e7732d3a3f9..ac0487bab8b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -133,7 +133,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 		_DEVINC(ipv6, , idev, field); })
 #define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \
 		_DEVINC(ipv6, _BH, idev, field); })
-#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val)
+#define IP6_ADD_STATS_BH(net, idev,field,val) ({ (void)(net); \
+		_DEVADD(ipv6, _BH, idev, field, val); })
 
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 693d20836b3..af12de071f4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -189,7 +189,7 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev)
 
 	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
 	if (evicted)
-		IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
+		IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 static void ip6_frag_expire(unsigned long data)
-- 
cgit v1.2.3-70-g09d2


From a862f6a6dc89c57dd3a959a1636b59f0c27169c2 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:33:06 -0700
Subject: ipv6: added net argument to ICMP6_INC_STATS

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 3 ++-
 net/ipv6/mcast.c   | 2 +-
 net/ipv6/ndisc.c   | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ac0487bab8b..744fe7443cb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -139,7 +139,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 
-#define ICMP6_INC_STATS(idev, field)	_DEVINC(icmpv6, , idev, field)
+#define ICMP6_INC_STATS(net, idev, field)	({ (void)(net); \
+		_DEVINC(icmpv6, , idev, field); })
 #define ICMP6_INC_STATS_BH(idev, field)	_DEVINC(icmpv6, _BH, idev, field)
 
 #define ICMP6MSGOUT_INC_STATS(idev, field) \
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 88811ebbe4b..fa413afeb99 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1840,7 +1840,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, type);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ce5b61763fd..6ce238ca538 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -522,7 +522,7 @@ static void __ndisc_send(struct net_device *dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, type);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
 	if (likely(idev != NULL))
@@ -1586,7 +1586,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
 	if (likely(idev != NULL))
-- 
cgit v1.2.3-70-g09d2


From e41b5368e029e79d11acb5952bc73284e5026c62 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:33:26 -0700
Subject: ipv6: added net argument to ICMP6_INC_STATS_BH

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    | 3 ++-
 net/dccp/ipv6.c       | 6 ++++--
 net/ipv6/icmp.c       | 4 ++--
 net/ipv6/ip6_output.c | 2 +-
 net/ipv6/mcast.c      | 2 +-
 net/ipv6/tcp_ipv6.c   | 3 ++-
 net/sctp/ipv6.c       | 2 +-
 7 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 744fe7443cb..5107cd92a46 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -141,7 +141,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 
 #define ICMP6_INC_STATS(net, idev, field)	({ (void)(net); \
 		_DEVINC(icmpv6, , idev, field); })
-#define ICMP6_INC_STATS_BH(idev, field)	_DEVINC(icmpv6, _BH, idev, field)
+#define ICMP6_INC_STATS_BH(net, idev, field)	({ (void)(net); \
+		_DEVINC(icmpv6, _BH, idev, field); })
 
 #define ICMP6MSGOUT_INC_STATS(idev, field) \
 	_DEVINC(icmpv6msg, , idev, field +256)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index caa7f346962..11062780bb0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -98,7 +98,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (skb->len < offset + sizeof(*dh) ||
 	    skb->len < offset + __dccp_basic_hdr_len(dh)) {
-		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+				   ICMP6_MIB_INERRORS);
 		return;
 	}
 
@@ -107,7 +108,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			&hdr->saddr, dh->dccph_sport, inet6_iif(skb));
 
 	if (sk == NULL) {
-		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+				   ICMP6_MIB_INERRORS);
 		return;
 	}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 758cdd7dc27..4c961556306 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -664,7 +664,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		skb_set_network_header(skb, nh);
 	}
 
-	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
@@ -772,7 +772,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	return 0;
 
 discard_it:
-	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 drop_no_count:
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f0fded630f5..e7eff320619 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1483,7 +1483,7 @@ int ip6_push_pending_frames(struct sock *sk)
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
 		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
-		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
 	err = ip6_local_out(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fa413afeb99..f06ceea1ffa 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1475,7 +1475,7 @@ static void mld_sendpack(struct sk_buff *skb)
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
-		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6268d266c03..424d9c4a67a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -330,7 +330,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 
 	if (sk == NULL) {
-		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+				   ICMP6_MIB_INERRORS);
 		return;
 	}
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c78da3c9dd3..4124bbb9994 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -156,7 +156,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	skb->network_header   = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
-		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS);
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 5c5d244bd388fe498dd7f5f57cb7770aae40b9ab Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:33:50 -0700
Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 4 ++--
 net/ipv6/mcast.c   | 2 +-
 net/ipv6/ndisc.c   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5107cd92a46..7f5a8dec1ae 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -144,8 +144,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 #define ICMP6_INC_STATS_BH(net, idev, field)	({ (void)(net); \
 		_DEVINC(icmpv6, _BH, idev, field); })
 
-#define ICMP6MSGOUT_INC_STATS(idev, field) \
-	_DEVINC(icmpv6msg, , idev, field +256)
+#define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \
+	_DEVINC(icmpv6msg, , idev, field +256); })
 #define ICMP6MSGOUT_INC_STATS_BH(idev, field) \
 	_DEVINC(icmpv6msg, _BH, idev, field +256)
 #define ICMP6MSGIN_INC_STATS(idev, field) \
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f06ceea1ffa..a96e4235fbf 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1839,7 +1839,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 		      dst_output);
 out:
 	if (!err) {
-		ICMP6MSGOUT_INC_STATS(idev, type);
+		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6ce238ca538..840b15780a3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -521,7 +521,7 @@ static void __ndisc_send(struct net_device *dev,
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
-		ICMP6MSGOUT_INC_STATS(idev, type);
+		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
@@ -1585,7 +1585,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
-		ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
+		ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 5a57d4c7fdac0e227efe8c5739fcbb263d9ae993 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:34:14 -0700
Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS_BH

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    | 4 ++--
 net/ipv6/ip6_output.c | 2 +-
 net/ipv6/mcast.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7f5a8dec1ae..4736d8f1f28 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -146,8 +146,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 
 #define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \
 	_DEVINC(icmpv6msg, , idev, field +256); })
-#define ICMP6MSGOUT_INC_STATS_BH(idev, field) \
-	_DEVINC(icmpv6msg, _BH, idev, field +256)
+#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \
+	_DEVINC(icmpv6msg, _BH, idev, field +256); })
 #define ICMP6MSGIN_INC_STATS(idev, field) \
 	 _DEVINC(icmpv6msg, , idev, field)
 #define ICMP6MSGIN_INC_STATS_BH(idev, field) \
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e7eff320619..c77db0b95e2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1482,7 +1482,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
-		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
+		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a96e4235fbf..d7b3c6d398a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1474,7 +1474,7 @@ static void mld_sendpack(struct sk_buff *skb)
 		      dst_output);
 out:
 	if (!err) {
-		ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
+		ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
 	} else
-- 
cgit v1.2.3-70-g09d2


From 55d43808eb26e689dacb95b11f956a3b1a56a5f3 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:34:54 -0700
Subject: ipv6: added net argument to ICMP6MSGIN_INC_STATS_BH

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 4 ++--
 net/ipv6/icmp.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 01da23c061e..47a76bfbf76 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -148,8 +148,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 	_DEVINC(icmpv6msg, , idev, field +256); })
 #define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \
 	_DEVINC(icmpv6msg, _BH, idev, field +256); })
-#define ICMP6MSGIN_INC_STATS_BH(idev, field) \
-	_DEVINC(icmpv6msg, _BH, idev, field)
+#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) ({ (void)(net); \
+	_DEVINC(icmpv6msg, _BH, idev, field); })
 
 struct ip6_ra_chain
 {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4c961556306..9b7d19ae5ce 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -693,7 +693,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
 	type = hdr->icmp6_type;
 
-	ICMP6MSGIN_INC_STATS_BH(idev, type);
+	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
 
 	switch (type) {
 	case ICMPV6_ECHO_REQUEST:
-- 
cgit v1.2.3-70-g09d2


From 9261e53701121f83eb9482347d68833e95315362 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:36:03 -0700
Subject: ipv6: making ip and icmp statistics per/namespace

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h      | 10 ++--------
 include/net/netns/mib.h |  3 +++
 net/ipv6/af_inet6.c     | 38 ++++++++++++++++++--------------------
 net/ipv6/proc.c         |  8 +++++---
 4 files changed, 28 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d0538dd2c44..6d5b58a1c74 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -113,23 +113,20 @@ extern struct ctl_path net_ipv6_ctl_path[];
 #define _DEVINC(net, statname, modifier, idev, field)			\
 ({									\
 	struct inet6_dev *_idev = (idev);				\
-	(void)(net);							\
 	if (likely(_idev != NULL))					\
 		SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \
-	SNMP_INC_STATS##modifier(statname##_statistics, (field));	\
+	SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\
 })
 
 #define _DEVADD(net, statname, modifier, idev, field, val)		\
 ({									\
 	struct inet6_dev *_idev = (idev);				\
-	(void)(net);							\
 	if (likely(_idev != NULL))					\
 		SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \
-	SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\
+	SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\
 })
 
 /* MIBs */
-DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 
 #define IP6_INC_STATS(net, idev,field)		\
 		_DEVINC(net, ipv6, , idev, field)
@@ -138,9 +135,6 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
 #define IP6_ADD_STATS_BH(net, idev,field,val)	\
 		_DEVADD(net, ipv6, _BH, idev, field, val)
 
-DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
-DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
-
 #define ICMP6_INC_STATS(net, idev, field)	\
 		_DEVINC(net, icmpv6, , idev, field)
 #define ICMP6_INC_STATS_BH(net, idev, field)	\
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 4e58f0519ce..10cb7c336de 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -16,6 +16,9 @@ struct netns_mib {
 	struct proc_dir_entry *proc_net_devsnmp6;
 	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
+	DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+	DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+	DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 #endif
 };
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 127b240d2d8..6b509d7700d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -797,31 +797,11 @@ static void ipv6_packet_cleanup(void)
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp_mib_init((void **)ipv6_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
-		goto err_ip_mib;
-	if (snmp_mib_init((void **)icmpv6_statistics,
-			  sizeof(struct icmpv6_mib)) < 0)
-		goto err_icmp_mib;
-	if (snmp_mib_init((void **)icmpv6msg_statistics,
-			  sizeof(struct icmpv6msg_mib)) < 0)
-		goto err_icmpmsg_mib;
 	return 0;
-
-err_icmpmsg_mib:
-	snmp_mib_free((void **)icmpv6_statistics);
-err_icmp_mib:
-	snmp_mib_free((void **)ipv6_statistics);
-err_ip_mib:
-	return -ENOMEM;
-
 }
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp_mib_free((void **)ipv6_statistics);
-	snmp_mib_free((void **)icmpv6_statistics);
-	snmp_mib_free((void **)icmpv6msg_statistics);
 }
 
 static int __net_init ipv6_init_mibs(struct net *net)
@@ -832,8 +812,23 @@ static int __net_init ipv6_init_mibs(struct net *net)
 	if (snmp_mib_init((void **)net->mib.udplite_stats_in6,
 			  sizeof (struct udp_mib)) < 0)
 		goto err_udplite_mib;
+	if (snmp_mib_init((void **)net->mib.ipv6_statistics,
+			  sizeof(struct ipstats_mib)) < 0)
+		goto err_ip_mib;
+	if (snmp_mib_init((void **)net->mib.icmpv6_statistics,
+			  sizeof(struct icmpv6_mib)) < 0)
+		goto err_icmp_mib;
+	if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics,
+			  sizeof(struct icmpv6msg_mib)) < 0)
+		goto err_icmpmsg_mib;
 	return 0;
 
+err_icmpmsg_mib:
+	snmp_mib_free((void **)net->mib.icmpv6_statistics);
+err_icmp_mib:
+	snmp_mib_free((void **)net->mib.ipv6_statistics);
+err_ip_mib:
+	snmp_mib_free((void **)net->mib.udplite_stats_in6);
 err_udplite_mib:
 	snmp_mib_free((void **)net->mib.udp_stats_in6);
 	return -ENOMEM;
@@ -843,6 +838,9 @@ static void __net_exit ipv6_cleanup_mibs(struct net *net)
 {
 	snmp_mib_free((void **)net->mib.udp_stats_in6);
 	snmp_mib_free((void **)net->mib.udplite_stats_in6);
+	snmp_mib_free((void **)net->mib.ipv6_statistics);
+	snmp_mib_free((void **)net->mib.icmpv6_statistics);
+	snmp_mib_free((void **)net->mib.icmpv6msg_statistics);
 }
 
 static int inet6_net_init(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index c78cf754ef3..07f0b76e742 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -177,9 +177,11 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
-	snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
-	snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
+	snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics,
+			    snmp6_ipstats_list);
+	snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics,
+			    snmp6_icmp6_list);
+	snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics);
 	snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6,
 			    snmp6_udp6_list);
 	snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6,
-- 
cgit v1.2.3-70-g09d2


From 2ca89cea5c9fdafd495fb840fa055383d253174e Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 8 Oct 2008 10:36:24 -0700
Subject: ipv6: remove unused not init_ipv6_mibs/cleanup_ipv6_mibs

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6b509d7700d..050e14b7f70 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -795,15 +795,6 @@ static void ipv6_packet_cleanup(void)
 	dev_remove_pack(&ipv6_packet_type);
 }
 
-static int __init init_ipv6_mibs(void)
-{
-	return 0;
-}
-
-static void cleanup_ipv6_mibs(void)
-{
-}
-
 static int __net_init ipv6_init_mibs(struct net *net)
 {
 	if (snmp_mib_init((void **)net->mib.udp_stats_in6,
@@ -935,11 +926,6 @@ static int __init inet6_init(void)
 	if (err)
 		goto out_sock_register_fail;
 
-	/* Initialise ipv6 mibs */
-	err = init_ipv6_mibs();
-	if (err)
-		goto out_unregister_sock;
-
 #ifdef CONFIG_SYSCTL
 	err = ipv6_static_sysctl_register();
 	if (err)
@@ -1073,8 +1059,6 @@ register_pernet_fail:
 	ipv6_static_sysctl_unregister();
 static_sysctl_fail:
 #endif
-	cleanup_ipv6_mibs();
-out_unregister_sock:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);
 out_sock_register_fail:
@@ -1131,7 +1115,6 @@ static void __exit inet6_exit(void)
 #ifdef CONFIG_SYSCTL
 	ipv6_static_sysctl_unregister();
 #endif
-	cleanup_ipv6_mibs();
 	proto_unregister(&rawv6_prot);
 	proto_unregister(&udplitev6_prot);
 	proto_unregister(&udpv6_prot);
-- 
cgit v1.2.3-70-g09d2


From 52cd5750e81ec8d213949fa7c0d2e08907bf498b Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 8 Oct 2008 11:34:06 -0700
Subject: tcp: fix length used for checksum in a reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While looking for some common code I came across difference
in checksum calculation between tcp_v6_send_(reset|ack) I
couldn't explain. I checked both v4 and v6 and found out that
both seem to have the same "feature". I couldn't find anything
in rfc nor anywhere else which would state that md5 option
should be ignored like it was in case of reset so I came to
a conclusion that this is probably a genuine bug. I suspect
that addition of md5 just was fooled by the excessive
copy-paste code in those functions and the reset part was
never tested well enough to find out the problem.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 24ffc5e1d3d..ba46769c6e9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -589,7 +589,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
-				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
+				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 424d9c4a67a..e8b0fdd9edb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1012,14 +1012,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
-	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+	buff->csum = csum_partial((char *)t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    sizeof(*t1), IPPROTO_TCP,
+				    tot_len, IPPROTO_TCP,
 				    buff->csum);
 
 	fl.proto = IPPROTO_TCP;
-- 
cgit v1.2.3-70-g09d2


From 53e915034970935596703a6005cde27c2128b5c3 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 8 Oct 2008 11:36:22 -0700
Subject: pkt_sched: Update qdisc requeue stats in dev_requeue_skb()

After the last change of requeuing there is no info about such
incidents in tc stats. This patch updates the counter, but we should
consider this should differ from previous stats because of additional
checks preventing to repeat this. On the other hand, previous stats
didn't include requeuing of gso_segmented skbs.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 31f6b614b59..7b5572d6beb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -45,6 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	q->gso_skb = skb;
+	q->qstats.requeues++;
 	__netif_schedule(q);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 9088c5609584684149f3fb5b065aa7f18dcb03ff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 8 Oct 2008 11:44:17 -0700
Subject: udp: Improve port randomization

Current UDP port allocation is suboptimal.
We select the shortest chain to chose a port (out of 512)
that will hash in this shortest chain.

First, it can lead to give not so ramdom ports and ease
give attackers more opportunities to break the system.

Second, it can consume a lot of CPU to scan all table
in order to find the shortest chain.

Third, in some pathological cases we can fail to find
a free port even if they are plenty of them.

This patch zap the search for a short chain and only
use one random seed. Problem of getting long chains
should be addressed in another way, since we can
obtain long chains with non random ports.

Based on a report and patch from Vitaly Mayatskikh

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 56 ++++++++++++--------------------------------------------
 1 file changed, 12 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 85f8e8e10b1..67d8430b4a2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -155,55 +155,23 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	write_lock_bh(&udp_hash_lock);
 
 	if (!snum) {
-		int i, low, high, remaining;
-		unsigned rover, best, best_size_so_far;
+		int low, high, remaining;
+		unsigned rand;
+		unsigned short first;
 
 		inet_get_local_port_range(&low, &high);
 		remaining = (high - low) + 1;
 
-		best_size_so_far = UINT_MAX;
-		best = rover = net_random() % remaining + low;
-
-		/* 1st pass: look for empty (or shortest) hash chain */
-		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
-			int size = 0;
-
-			head = &udptable[udp_hashfn(net, rover)];
-			if (hlist_empty(head))
-				goto gotit;
-
-			sk_for_each(sk2, node, head) {
-				if (++size >= best_size_so_far)
-					goto next;
-			}
-			best_size_so_far = size;
-			best = rover;
-		next:
-			/* fold back if end of range */
-			if (++rover > high)
-				rover = low + ((rover - low)
-					       & (UDP_HTABLE_SIZE - 1));
-
-
-		}
-
-		/* 2nd pass: find hole in shortest hash chain */
-		rover = best;
-		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-			if (! __udp_lib_lport_inuse(net, rover, udptable))
-				goto gotit;
-			rover += UDP_HTABLE_SIZE;
-			if (rover > high)
-				rover = low + ((rover - low)
-					       & (UDP_HTABLE_SIZE - 1));
+		rand = net_random();
+		snum = first = rand % remaining + low;
+		rand |= 1;
+		while (__udp_lib_lport_inuse(net, snum, udptable)) {
+			do {
+				snum = snum + rand;
+			} while (snum < low || snum > high);
+			if (snum == first)
+				goto fail;
 		}
-
-
-		/* All ports in use! */
-		goto fail;
-
-gotit:
-		snum = rover;
 	} else {
 		head = &udptable[udp_hashfn(net, snum)];
 
-- 
cgit v1.2.3-70-g09d2


From 3c689b7320ae6f20dba6a8b71806a6c6fd604ee8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 8 Oct 2008 14:18:04 -0700
Subject: inet: cleanup of local_port_range

I noticed sysctl_local_port_range[] and its associated seqlock
sysctl_local_port_range_lock were on separate cache lines.
Moreover, sysctl_local_port_range[] was close to unrelated
variables, highly modified, leading to cache misses.

Moving these two variables in a structure can help data
locality and moving this structure to read_mostly section
helps sharing of this data among cpus.

Cleanup of extern declarations (moved in include file where
they belong), and use of inet_get_local_port_range()
accessor instead of direct access to ports values.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h                |  4 ++++
 net/ipv4/inet_connection_sock.c | 16 +++++++++-------
 net/ipv4/sysctl_net_ipv4.c      | 23 ++++++++++-------------
 3 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index d678ea3d474..1cbccaf0de3 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -178,6 +178,10 @@ extern unsigned long snmp_fold_field(void *mib[], int offt);
 extern int snmp_mib_init(void *ptr[2], size_t mibsize);
 extern void snmp_mib_free(void *ptr[2]);
 
+extern struct local_ports {
+	seqlock_t	lock;
+	int		range[2];
+} sysctl_local_ports;
 extern void inet_get_local_port_range(int *low, int *high);
 
 extern int sysctl_ip_default_ttl;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 21fcc5a9045..bd1278a2d82 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
 #endif
 
 /*
- * This array holds the first and last local port number.
+ * This struct holds the first and last local port number.
  */
-int sysctl_local_port_range[2] = { 32768, 61000 };
-DEFINE_SEQLOCK(sysctl_port_range_lock);
+struct local_ports sysctl_local_ports __read_mostly = {
+	.lock = SEQLOCK_UNLOCKED,
+	.range = { 32768, 61000 },
+};
 
 void inet_get_local_port_range(int *low, int *high)
 {
 	unsigned seq;
 	do {
-		seq = read_seqbegin(&sysctl_port_range_lock);
+		seq = read_seqbegin(&sysctl_local_ports.lock);
 
-		*low = sysctl_local_port_range[0];
-		*high = sysctl_local_port_range[1];
-	} while (read_seqretry(&sysctl_port_range_lock, seq));
+		*low = sysctl_local_ports.range[0];
+		*high = sysctl_local_ports.range[1];
+	} while (read_seqretry(&sysctl_local_ports.lock, seq));
 }
 EXPORT_SYMBOL(inet_get_local_port_range);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e0689fd7b79..276d047fb85 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,16 +26,13 @@ static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 
-extern seqlock_t sysctl_port_range_lock;
-extern int sysctl_local_port_range[2];
-
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
 {
-	write_seqlock(&sysctl_port_range_lock);
-	sysctl_local_port_range[0] = range[0];
-	sysctl_local_port_range[1] = range[1];
-	write_sequnlock(&sysctl_port_range_lock);
+	write_seqlock(&sysctl_local_ports.lock);
+	sysctl_local_ports.range[0] = range[0];
+	sysctl_local_ports.range[1] = range[1];
+	write_sequnlock(&sysctl_local_ports.lock);
 }
 
 /* Validate changes from /proc interface. */
@@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 				 size_t *lenp, loff_t *ppos)
 {
 	int ret;
-	int range[2] = { sysctl_local_port_range[0],
-			 sysctl_local_port_range[1] };
+	int range[2];
 	ctl_table tmp = {
 		.data = &range,
 		.maxlen = sizeof(range),
@@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 		.extra2 = &ip_local_port_range_max,
 	};
 
+	inet_get_local_port_range(range, range + 1);
 	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
@@ -73,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 					void __user *newval, size_t newlen)
 {
 	int ret;
-	int range[2] = { sysctl_local_port_range[0],
-			 sysctl_local_port_range[1] };
+	int range[2];
 	ctl_table tmp = {
 		.data = &range,
 		.maxlen = sizeof(range),
@@ -83,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 		.extra2 = &ip_local_port_range_max,
 	};
 
+	inet_get_local_port_range(range, range + 1);
 	ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen) {
 		if (range[1] < range[0])
@@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_LOCAL_PORT_RANGE,
 		.procname	= "ip_local_port_range",
-		.data		= &sysctl_local_port_range,
-		.maxlen		= sizeof(sysctl_local_port_range),
+		.data		= &sysctl_local_ports.range,
+		.maxlen		= sizeof(sysctl_local_ports.range),
 		.mode		= 0644,
 		.proc_handler	= &ipv4_local_port_range,
 		.strategy	= &ipv4_sysctl_local_port_range,
-- 
cgit v1.2.3-70-g09d2


From 8e1ee18c332e08bee9d8bd66e63cd564fbf17fc2 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 8 Oct 2008 14:18:39 -0700
Subject: sctp: Rework the tsn map to use generic bitmap.

The tsn map currently use is 4K large and is stuck inside
the sctp_association structure making memory references REALLY
expensive.  What we really need is at most 4K worth of bits
so the biggest map we would have is 512 bytes.   Also, the
map is only really usefull when we have gaps to store and
report.  As such, starting with minimal map of say 32 TSNs (bits)
should be enough for normal low-loss operations.  We can grow
the map by some multiple of 32 along with some extra room any
time we receive the TSN which would put us outside of the map
boundry.  As we close gaps, we can shift the map to rebase
it on the latest TSN we've seen.  This saves 4088 bytes per
association just in the map alone along savings from the now
unnecessary structure members.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |   4 +-
 include/net/sctp/structs.h   |   1 -
 include/net/sctp/tsnmap.h    |  39 ++---
 net/sctp/associola.c         |   9 +-
 net/sctp/sm_make_chunk.c     |   5 +-
 net/sctp/sm_sideeffect.c     |   3 +-
 net/sctp/tsnmap.c            | 331 +++++++++++++++++++------------------------
 net/sctp/ulpevent.c          |  10 +-
 8 files changed, 178 insertions(+), 224 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index c32ddf0279c..b05b0557211 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -261,7 +261,9 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
  * must be less than 65535 (2^16 - 1), or we will have overflow
  * problems creating SACK's.
  */
-#define SCTP_TSN_MAP_SIZE 2048
+#define SCTP_TSN_MAP_INITIAL BITS_PER_LONG
+#define SCTP_TSN_MAP_INCREMENT SCTP_TSN_MAP_INITIAL
+#define SCTP_TSN_MAP_SIZE 4096
 #define SCTP_TSN_MAX_GAP  65535
 
 /* We will not record more than this many duplicate TSNs between two
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 94c62e4ddea..9661d7b765f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1545,7 +1545,6 @@ struct sctp_association {
 		 * in tsn_map--we get it by calling sctp_tsnmap_get_ctsn().
 		 */
 		struct sctp_tsnmap tsn_map;
-		__u8 _map[sctp_tsnmap_storage_size(SCTP_TSN_MAP_SIZE)];
 
 		/* Ack State   : This flag indicates if the next received
 		 *             : packet is to be responded to with a
diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index 099211bf998..6dabbee8bbd 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -60,18 +60,7 @@ struct sctp_tsnmap {
 	 * It points at one of the two buffers with which we will
 	 * ping-pong between.
 	 */
-	__u8 *tsn_map;
-
-	/* This marks the tsn which overflows the tsn_map, when the
-	 * cumulative ack point reaches this point we know we can switch
-	 * maps (tsn_map and overflow_map swap).
-	 */
-	__u32 overflow_tsn;
-
-	/* This is the overflow array for tsn_map.
-	 * It points at one of the other ping-pong buffers.
-	 */
-	__u8 *overflow_map;
+	unsigned long *tsn_map;
 
 	/* This is the TSN at tsn_map[0].  */
 	__u32 base_tsn;
@@ -89,15 +78,15 @@ struct sctp_tsnmap {
 	 */
 	__u32 cumulative_tsn_ack_point;
 
+	/* This is the highest TSN we've marked.  */
+	__u32 max_tsn_seen;
+
 	/* This is the minimum number of TSNs we can track.  This corresponds
 	 * to the size of tsn_map.   Note: the overflow_map allows us to
 	 * potentially track more than this quantity.
 	 */
 	__u16 len;
 
-	/* This is the highest TSN we've marked.  */
-	__u32 max_tsn_seen;
-
 	/* Data chunks pending receipt. used by SCTP_STATUS sockopt */
 	__u16 pending_data;
 
@@ -110,24 +99,17 @@ struct sctp_tsnmap {
 
 	/* Record gap ack block information here.  */
 	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
-
-	int malloced;
-
-	__u8 raw_map[0];
 };
 
 struct sctp_tsnmap_iter {
 	__u32 start;
 };
 
-/* This macro assists in creation of external storage for variable length
- * internal buffers.  We double allocate so the overflow map works.
- */
-#define sctp_tsnmap_storage_size(count) (sizeof(__u8) * (count) * 2)
-
 /* Initialize a block of memory as a tsnmap.  */
 struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len,
-				     __u32 initial_tsn);
+				     __u32 initial_tsn, gfp_t gfp);
+
+void sctp_tsnmap_free(struct sctp_tsnmap *map);
 
 /* Test the tracking state of this TSN.
  * Returns:
@@ -138,7 +120,7 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len,
 int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn);
 
 /* Mark this TSN as seen.  */
-void sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn);
+int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn);
 
 /* Mark this TSN and all lower as seen. */
 void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn);
@@ -183,10 +165,7 @@ static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap
 /* Is there a gap in the TSN map?  */
 static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map)
 {
-	int has_gap;
-
-	has_gap = (map->cumulative_tsn_ack_point != map->max_tsn_seen);
-	return has_gap;
+	return (map->cumulative_tsn_ack_point != map->max_tsn_seen);
 }
 
 /* Mark a duplicate TSN.  Note:  limit the storage of duplicate TSN
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index abd51cef241..f4b23043b61 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -283,8 +283,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	if (!sctp_ulpq_init(&asoc->ulpq, asoc))
 		goto fail_init;
 
-	/* Set up the tsn tracking. */
-	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, 0);
+	memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap));
 
 	asoc->need_ecne = 0;
 
@@ -402,6 +401,8 @@ void sctp_association_free(struct sctp_association *asoc)
 	/* Dispose of any pending chunks on the inqueue. */
 	sctp_inq_free(&asoc->base.inqueue);
 
+	sctp_tsnmap_free(&asoc->peer.tsn_map);
+
 	/* Free ssnmap storage. */
 	sctp_ssnmap_free(asoc->ssnmap);
 
@@ -1122,8 +1123,8 @@ void sctp_assoc_update(struct sctp_association *asoc,
 	asoc->peer.rwnd = new->peer.rwnd;
 	asoc->peer.sack_needed = new->peer.sack_needed;
 	asoc->peer.i = new->peer.i;
-	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE,
-			 asoc->peer.i.initial_tsn);
+	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+			 asoc->peer.i.initial_tsn, GFP_ATOMIC);
 
 	/* Remove any peer addresses not present in the new association. */
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 76726bcff3e..6dd9b3ef33d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2288,8 +2288,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 	}
 
 	/* Set up the TSN tracking pieces.  */
-	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE,
-			 asoc->peer.i.initial_tsn);
+	if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+				asoc->peer.i.initial_tsn, gfp))
+		goto clean_up;
 
 	/* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
 	 *
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 13d9eea5cf1..e1d6076b4f5 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1152,7 +1152,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 
 		case SCTP_CMD_REPORT_TSN:
 			/* Record the arrival of a TSN.  */
-			sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32);
+			error = sctp_tsnmap_mark(&asoc->peer.tsn_map,
+						 cmd->obj.u32);
 			break;
 
 		case SCTP_CMD_REPORT_FWDTSN:
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index f3e58b27590..142ed7ca424 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -43,37 +43,44 @@
  */
 
 #include <linux/types.h>
+#include <linux/bitmap.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
 static void sctp_tsnmap_update(struct sctp_tsnmap *map);
-static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off,
-				     __u16 len, __u16 base,
-				     int *started, __u16 *start,
-				     int *ended, __u16 *end);
+static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off,
+				     __u16 len, __u16 *start, __u16 *end);
+static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap);
 
 /* Initialize a block of memory as a tsnmap.  */
 struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len,
-				     __u32 initial_tsn)
+				     __u32 initial_tsn, gfp_t gfp)
 {
-	map->tsn_map = map->raw_map;
-	map->overflow_map = map->tsn_map + len;
-	map->len = len;
-
-	/* Clear out a TSN ack status.  */
-	memset(map->tsn_map, 0x00, map->len + map->len);
+	if (!map->tsn_map) {
+		map->tsn_map = kzalloc(len>>3, gfp);
+		if (map->tsn_map == NULL)
+			return NULL;
+
+		map->len = len;
+	} else {
+		bitmap_zero(map->tsn_map, map->len);
+	}
 
 	/* Keep track of TSNs represented by tsn_map.  */
 	map->base_tsn = initial_tsn;
-	map->overflow_tsn = initial_tsn + map->len;
 	map->cumulative_tsn_ack_point = initial_tsn - 1;
 	map->max_tsn_seen = map->cumulative_tsn_ack_point;
-	map->malloced = 0;
 	map->num_dup_tsns = 0;
 
 	return map;
 }
 
+void sctp_tsnmap_free(struct sctp_tsnmap *map)
+{
+	map->len = 0;
+	kfree(map->tsn_map);
+}
+
 /* Test the tracking state of this TSN.
  * Returns:
  *   0 if the TSN has not yet been seen
@@ -82,66 +89,69 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len,
  */
 int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
 {
-	__s32 gap;
-	int dup;
+	u32 gap;
+
+	/* Check to see if this is an old TSN */
+	if (TSN_lte(tsn, map->cumulative_tsn_ack_point))
+		return 1;
+
+	/* Verify that we can hold this TSN and that it will not
+	 * overlfow our map
+	 */
+	if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE))
+		return -1;
 
 	/* Calculate the index into the mapping arrays.  */
 	gap = tsn - map->base_tsn;
 
-	/* Verify that we can hold this TSN.  */
-	if (gap >= (/* base */ map->len + /* overflow */ map->len)) {
-		dup = -1;
-		goto out;
-	}
-
-	/* Honk if we've already seen this TSN.
-	 * We have three cases:
-	 *	1. The TSN is ancient or belongs to a previous tsn_map.
-	 *	2. The TSN is already marked in the tsn_map.
-	 *	3. The TSN is already marked in the tsn_map_overflow.
-	 */
-	if (gap < 0 ||
-	    (gap < map->len && map->tsn_map[gap]) ||
-	    (gap >= map->len && map->overflow_map[gap - map->len]))
-		dup = 1;
+	/* Check to see if TSN has already been recorded.  */
+	if (gap < map->len && test_bit(gap, map->tsn_map))
+		return 1;
 	else
-		dup = 0;
-
-out:
-	return dup;
+		return 0;
 }
 
 
 /* Mark this TSN as seen.  */
-void sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
+int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
 {
-	__s32 gap;
+	u16 gap;
 
-	/* Vacuously mark any TSN which precedes the map base or
-	 * exceeds the end of the map.
-	 */
 	if (TSN_lt(tsn, map->base_tsn))
-		return;
-	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
-		return;
-
-	/* Bump the max.  */
-	if (TSN_lt(map->max_tsn_seen, tsn))
-		map->max_tsn_seen = tsn;
+		return 0;
 
-	/* Assert: TSN is in range.  */
 	gap = tsn - map->base_tsn;
 
-	/* Mark the TSN as received.  */
-	if (gap < map->len)
-		map->tsn_map[gap]++;
-	else
-		map->overflow_map[gap - map->len]++;
+	if (gap >= map->len && !sctp_tsnmap_grow(map, gap))
+		return -ENOMEM;
 
-	/* Go fixup any internal TSN mapping variables including
-	 * cumulative_tsn_ack_point.
-	 */
-	sctp_tsnmap_update(map);
+	if (!sctp_tsnmap_has_gap(map) && gap == 0) {
+		/* In this case the map has no gaps and the tsn we are
+		 * recording is the next expected tsn.  We don't touch
+		 * the map but simply bump the values.
+		 */
+		map->max_tsn_seen++;
+		map->cumulative_tsn_ack_point++;
+		map->base_tsn++;
+	} else {
+		/* Either we already have a gap, or about to record a gap, so
+		 * have work to do.
+		 *
+		 * Bump the max.
+		 */
+		if (TSN_lt(map->max_tsn_seen, tsn))
+			map->max_tsn_seen = tsn;
+
+		/* Mark the TSN as received.  */
+		set_bit(gap, map->tsn_map);
+
+		/* Go fixup any internal TSN mapping variables including
+		 * cumulative_tsn_ack_point.
+		 */
+		sctp_tsnmap_update(map);
+	}
+
+	return 0;
 }
 
 
@@ -160,66 +170,34 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map,
 					 struct sctp_tsnmap_iter *iter,
 					 __u16 *start, __u16 *end)
 {
-	int started, ended;
-	__u16 start_, end_, offset;
-
-	/* We haven't found a gap yet.  */
-	started = ended = 0;
+	int ended = 0;
+	__u16 start_ = 0, end_ = 0, offset;
 
 	/* If there are no more gap acks possible, get out fast.  */
 	if (TSN_lte(map->max_tsn_seen, iter->start))
 		return 0;
 
-	/* Search the first mapping array.  */
-	if (iter->start - map->base_tsn < map->len) {
-
-		offset = iter->start - map->base_tsn;
-		sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0,
-					 &started, &start_, &ended, &end_);
-	}
-
-	/* Do we need to check the overflow map? */
-	if (!ended) {
-		/* Fix up where we'd like to start searching in the
-		 * overflow map.
-		 */
-		if (iter->start - map->base_tsn < map->len)
-			offset = 0;
-		else
-			offset = iter->start - map->base_tsn - map->len;
-
-		/* Search the overflow map.  */
-		sctp_tsnmap_find_gap_ack(map->overflow_map,
-					 offset,
-					 map->len,
-					 map->len,
-					 &started, &start_,
-					 &ended, &end_);
-	}
+	offset = iter->start - map->base_tsn;
+	sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len,
+				 &start_, &end_);
 
-	/* The Gap Ack Block happens to end at the end of the
-	 * overflow map.
-	 */
-	if (started && !ended) {
-		ended++;
-		end_ = map->len + map->len - 1;
-	}
+	/* The Gap Ack Block happens to end at the end of the map. */
+	if (start_ && !end_)
+		end_ = map->len - 1;
 
 	/* If we found a Gap Ack Block, return the start and end and
 	 * bump the iterator forward.
 	 */
-	if (ended) {
+	if (end_) {
 		/* Fix up the start and end based on the
-		 * Cumulative TSN Ack offset into the map.
+		 * Cumulative TSN Ack which is always 1 behind base.
 		 */
-		int gap = map->cumulative_tsn_ack_point -
-			map->base_tsn;
-
-		*start = start_ - gap;
-		*end = end_ - gap;
+		*start = start_ + 1;
+		*end = end_ + 1;
 
 		/* Move the iterator forward.  */
 		iter->start = map->cumulative_tsn_ack_point + *end + 1;
+		ended = 1;
 	}
 
 	return ended;
@@ -228,35 +206,33 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map,
 /* Mark this and any lower TSN as seen.  */
 void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn)
 {
-	__s32 gap;
+	u32 gap;
 
-	/* Vacuously mark any TSN which precedes the map base or
-	 * exceeds the end of the map.
-	 */
 	if (TSN_lt(tsn, map->base_tsn))
 		return;
-	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
+	if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE))
 		return;
 
 	/* Bump the max.  */
 	if (TSN_lt(map->max_tsn_seen, tsn))
 		map->max_tsn_seen = tsn;
 
-	/* Assert: TSN is in range.  */
 	gap = tsn - map->base_tsn + 1;
 
-	/* Mark the TSNs as received.  */
-	if (gap <= map->len)
-		memset(map->tsn_map, 0x01, gap);
-	else {
-		memset(map->tsn_map, 0x01, map->len);
-		memset(map->overflow_map, 0x01, (gap - map->len));
+	map->base_tsn += gap;
+	map->cumulative_tsn_ack_point += gap;
+	if (gap >= map->len) {
+		/* If our gap is larger then the map size, just
+		 * zero out the map.
+		 */
+		bitmap_zero(map->tsn_map, map->len);
+	} else {
+		/* If the gap is smaller then the map size,
+		 * shift the map by 'gap' bits and update further.
+		 */
+		bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len);
+		sctp_tsnmap_update(map);
 	}
-
-	/* Go fixup any internal TSN mapping variables including
-	 * cumulative_tsn_ack_point.
-	 */
-	sctp_tsnmap_update(map);
 }
 
 /********************************************************************
@@ -268,27 +244,19 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn)
  */
 static void sctp_tsnmap_update(struct sctp_tsnmap *map)
 {
-	__u32 ctsn;
-
-	ctsn = map->cumulative_tsn_ack_point;
-	do {
-		ctsn++;
-		if (ctsn == map->overflow_tsn) {
-			/* Now tsn_map must have been all '1's,
-			 * so we swap the map and check the overflow table
-			 */
-			__u8 *tmp = map->tsn_map;
-			memset(tmp, 0, map->len);
-			map->tsn_map = map->overflow_map;
-			map->overflow_map = tmp;
-
-			/* Update the tsn_map boundaries.  */
-			map->base_tsn += map->len;
-			map->overflow_tsn += map->len;
-		}
-	} while (map->tsn_map[ctsn - map->base_tsn]);
+	u16 len;
+	unsigned long zero_bit;
+
+
+	len = map->max_tsn_seen - map->cumulative_tsn_ack_point;
+	zero_bit = find_first_zero_bit(map->tsn_map, len);
+	if (!zero_bit)
+		return;		/* The first 0-bit is bit 0.  nothing to do */
+
+	map->base_tsn += zero_bit;
+	map->cumulative_tsn_ack_point += zero_bit;
 
-	map->cumulative_tsn_ack_point = ctsn - 1; /* Back up one. */
+	bitmap_shift_right(map->tsn_map, map->tsn_map, zero_bit, map->len);
 }
 
 /* How many data chunks  are we missing from our peer?
@@ -299,31 +267,19 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map)
 	__u32 max_tsn = map->max_tsn_seen;
 	__u32 base_tsn = map->base_tsn;
 	__u16 pending_data;
-	__s32 gap, start, end, i;
+	u32 gap, i;
 
 	pending_data = max_tsn - cum_tsn;
 	gap = max_tsn - base_tsn;
 
-	if (gap <= 0 || gap >= (map->len + map->len))
+	if (gap == 0 || gap >= map->len)
 		goto out;
 
-	start = ((cum_tsn >= base_tsn) ? (cum_tsn - base_tsn + 1) : 0);
-	end = ((gap > map->len ) ? map->len : gap + 1);
-
-	for (i = start; i < end; i++) {
-		if (map->tsn_map[i])
+	for (i = 0; i < gap+1; i++) {
+		if (test_bit(i, map->tsn_map))
 			pending_data--;
 	}
 
-	if (gap >= map->len) {
-		start = 0;
-		end = gap - map->len + 1;
-		for (i = start; i < end; i++) {
-			if (map->overflow_map[i])
-				pending_data--;
-		}
-	}
-
 out:
 	return pending_data;
 }
@@ -334,10 +290,8 @@ out:
  * The flags "started" and "ended" tell is if we found the beginning
  * or (respectively) the end of a Gap Ack Block.
  */
-static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off,
-				     __u16 len, __u16 base,
-				     int *started, __u16 *start,
-				     int *ended, __u16 *end)
+static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off,
+				     __u16 len, __u16 *start, __u16 *end)
 {
 	int i = off;
 
@@ -348,49 +302,36 @@ static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off,
 	/* Also, stop looking past the maximum TSN seen. */
 
 	/* Look for the start. */
-	if (!(*started)) {
-		for (; i < len; i++) {
-			if (map[i]) {
-				(*started)++;
-				*start = base + i;
-				break;
-			}
-		}
-	}
+	i = find_next_bit(map, len, off);
+	if (i < len)
+		*start = i;
 
 	/* Look for the end.  */
-	if (*started) {
+	if (*start) {
 		/* We have found the start, let's find the
 		 * end.  If we find the end, break out.
 		 */
-		for (; i < len; i++) {
-			if (!map[i]) {
-				(*ended)++;
-				*end = base + i - 1;
-				break;
-			}
-		}
+		i = find_next_zero_bit(map, len, i);
+		if (i < len)
+			*end = i - 1;
 	}
 }
 
 /* Renege that we have seen a TSN.  */
 void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn)
 {
-	__s32 gap;
+	u32 gap;
 
 	if (TSN_lt(tsn, map->base_tsn))
 		return;
-	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
+	/* Assert: TSN is in range.  */
+	if (!TSN_lt(tsn, map->base_tsn + map->len))
 		return;
 
-	/* Assert: TSN is in range.  */
 	gap = tsn - map->base_tsn;
 
 	/* Pretend we never saw the TSN.  */
-	if (gap < map->len)
-		map->tsn_map[gap] = 0;
-	else
-		map->overflow_map[gap - map->len] = 0;
+	clear_bit(gap, map->tsn_map);
 }
 
 /* How many gap ack blocks do we have recorded? */
@@ -416,3 +357,27 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map)
 	}
 	return gabs;
 }
+
+static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap)
+{
+	unsigned long *new;
+	unsigned long inc;
+	u16  len;
+
+	if (gap >= SCTP_TSN_MAP_SIZE)
+		return 0;
+
+	inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT;
+	len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE);
+
+	new = kzalloc(len>>3, GFP_ATOMIC);
+	if (!new)
+		return 0;
+
+	bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn);
+	kfree(map->tsn_map);
+	map->tsn_map = new;
+	map->len = len;
+
+	return 1;
+}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index a1f654aea26..5f186ca550d 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -713,7 +713,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	/* Now that all memory allocations for this chunk succeeded, we
 	 * can mark it as received so the tsn_map is updated correctly.
 	 */
-	sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn));
+	if (sctp_tsnmap_mark(&asoc->peer.tsn_map,
+			     ntohl(chunk->subh.data_hdr->tsn)))
+		goto fail_mark;
 
 	/* First calculate the padding, so we don't inadvertently
 	 * pass up the wrong length to the user.
@@ -755,8 +757,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	event->msg_flags |= chunk->chunk_hdr->flags;
 	event->iif = sctp_chunk_iif(chunk);
 
-fail:
 	return event;
+
+fail_mark:
+	kfree_skb(skb);
+fail:
+	return NULL;
 }
 
 /* Create a partial delivery related event.
-- 
cgit v1.2.3-70-g09d2


From 02015180e2509afd2e3fe3790a333b30708a116b Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 8 Oct 2008 14:19:01 -0700
Subject: sctp: shrink sctp_tsnmap some more by removing gabs array

The gabs array in the sctp_tsnmap structure is only used
in one place, sctp_make_sack().  As such, carrying the
array around in the sctp_tsnmap and thus directly in
the sctp_association is rather pointless since most
of the time it's just taking up space.  Now, let
sctp_make_sack create and populate it and then throw
it away when it's done.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/tsnmap.h | 14 +++-----------
 net/sctp/sm_make_chunk.c  |  6 ++++--
 net/sctp/tsnmap.c         | 15 ++++++++-------
 3 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index 6dabbee8bbd..4aabc5a96cf 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -94,11 +94,8 @@ struct sctp_tsnmap {
 	 * every SACK.  Store up to SCTP_MAX_DUP_TSNS worth of
 	 * information.
 	 */
-	__be32 dup_tsns[SCTP_MAX_DUP_TSNS];
 	__u16 num_dup_tsns;
-
-	/* Record gap ack block information here.  */
-	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+	__be32 dup_tsns[SCTP_MAX_DUP_TSNS];
 };
 
 struct sctp_tsnmap_iter {
@@ -151,17 +148,12 @@ static inline __be32 *sctp_tsnmap_get_dups(struct sctp_tsnmap *map)
 }
 
 /* How many gap ack blocks do we have recorded? */
-__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map);
+__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
+			   struct sctp_gap_ack_block *gabs);
 
 /* Refresh the count on pending data. */
 __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map);
 
-/* Return pointer to gap ack blocks as needed by SACK. */
-static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap *map)
-{
-	return map->gabs;
-}
-
 /* Is there a gap in the TSN map?  */
 static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map)
 {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6dd9b3ef33d..fd8acb48c3f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -702,12 +702,14 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 	__u32 ctsn;
 	__u16 num_gabs, num_dup_tsns;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
 
+	memset(gabs, 0, sizeof(gabs));
 	ctsn = sctp_tsnmap_get_ctsn(map);
 	SCTP_DEBUG_PRINTK("sackCTSNAck sent:  0x%x.\n", ctsn);
 
 	/* How much room is needed in the chunk? */
-	num_gabs = sctp_tsnmap_num_gabs(map);
+	num_gabs = sctp_tsnmap_num_gabs(map, gabs);
 	num_dup_tsns = sctp_tsnmap_num_dups(map);
 
 	/* Initialize the SACK header.  */
@@ -763,7 +765,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 	/* Add the gap ack block information.   */
 	if (num_gabs)
 		sctp_addto_chunk(retval, sizeof(__u32) * num_gabs,
-				 sctp_tsnmap_get_gabs(map));
+				 gabs);
 
 	/* Add the duplicate TSN information.  */
 	if (num_dup_tsns)
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 142ed7ca424..35c73e82553 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -335,10 +335,11 @@ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn)
 }
 
 /* How many gap ack blocks do we have recorded? */
-__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map)
+__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
+			   struct sctp_gap_ack_block *gabs)
 {
 	struct sctp_tsnmap_iter iter;
-	int gabs = 0;
+	int ngaps = 0;
 
 	/* Refresh the gap ack information. */
 	if (sctp_tsnmap_has_gap(map)) {
@@ -348,14 +349,14 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map)
 						&start,
 						&end)) {
 
-			map->gabs[gabs].start = htons(start);
-			map->gabs[gabs].end = htons(end);
-			gabs++;
-			if (gabs >= SCTP_MAX_GABS)
+			gabs[ngaps].start = htons(start);
+			gabs[ngaps].end = htons(end);
+			ngaps++;
+			if (ngaps >= SCTP_MAX_GABS)
 				break;
 		}
 	}
-	return gabs;
+	return ngaps;
 }
 
 static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap)
-- 
cgit v1.2.3-70-g09d2


From 53b125779fb0b29e5b316bf3dc7d199e6dcea567 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 8 Oct 2008 14:36:33 -0700
Subject: tcpv6: fix option space offsets with md5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More breakage :-), part of timestamps just were previously
overwritten.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e8b0fdd9edb..dd7bdde7bdd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1088,7 +1088,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 		*topt++ = htonl(tcp_time_stamp);
-		*topt = htonl(ts);
+		*topt++ = htonl(ts);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
-- 
cgit v1.2.3-70-g09d2


From 071d7ab6649eb34a873a53e71635186e9117101d Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 8 Oct 2008 14:41:35 -0700
Subject: ipvs: Remove stray file left over from ipvs move

Commit cb7f6a7b716e801097b564dec3ccb58d330aef56 ("IPVS: Move IPVS to
net/netfilter/ipvs") has left a stray file in the old location of ipvs.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_dh.c | 261 -----------------------------------------------
 1 file changed, 261 deletions(-)
 delete mode 100644 net/ipv4/ipvs/ip_vs_dh.c

(limited to 'net')

diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
deleted file mode 100644
index a16943fd72f..00000000000
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * IPVS:        Destination Hashing scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              Inspired by the consistent hashing scheduler patch from
- *              Thomas Proell <proellt@gmx.de>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-
-/*
- * The dh algorithm is to select server by the hash key of destination IP
- * address. The pseudo code is as follows:
- *
- *       n <- servernode[dest_ip];
- *       if (n is dead) OR
- *          (n is overloaded) OR (n.weight <= 0) then
- *                 return NULL;
- *
- *       return n;
- *
- * Notes that servernode is a 256-bucket hash table that maps the hash
- * index derived from packet destination IP address to the current server
- * array. If the dh scheduler is used in cache cluster, it is good to
- * combine it with cache_bypass feature. When the statically assigned
- * server is dead or overloaded, the load balancer can bypass the cache
- * server and send requests to the original server directly.
- *
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-
-#include <net/ip_vs.h>
-
-
-/*
- *      IPVS DH bucket
- */
-struct ip_vs_dh_bucket {
-	struct ip_vs_dest       *dest;          /* real server (cache) */
-};
-
-/*
- *     for IPVS DH entry hash table
- */
-#ifndef CONFIG_IP_VS_DH_TAB_BITS
-#define CONFIG_IP_VS_DH_TAB_BITS        8
-#endif
-#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
-#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
-#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
-
-
-/*
- *	Returns hash value for IPVS DH entry
- */
-static inline unsigned ip_vs_dh_hashkey(__be32 addr)
-{
-	return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
-}
-
-
-/*
- *      Get ip_vs_dest associated with supplied parameters.
- */
-static inline struct ip_vs_dest *
-ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
-{
-	return (tbl[ip_vs_dh_hashkey(addr)]).dest;
-}
-
-
-/*
- *      Assign all the hash buckets of the specified table with the service.
- */
-static int
-ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
-{
-	int i;
-	struct ip_vs_dh_bucket *b;
-	struct list_head *p;
-	struct ip_vs_dest *dest;
-
-	b = tbl;
-	p = &svc->destinations;
-	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-		if (list_empty(p)) {
-			b->dest = NULL;
-		} else {
-			if (p == &svc->destinations)
-				p = p->next;
-
-			dest = list_entry(p, struct ip_vs_dest, n_list);
-			atomic_inc(&dest->refcnt);
-			b->dest = dest;
-
-			p = p->next;
-		}
-		b++;
-	}
-	return 0;
-}
-
-
-/*
- *      Flush all the hash buckets of the specified table.
- */
-static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
-{
-	int i;
-	struct ip_vs_dh_bucket *b;
-
-	b = tbl;
-	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-		if (b->dest) {
-			atomic_dec(&b->dest->refcnt);
-			b->dest = NULL;
-		}
-		b++;
-	}
-}
-
-
-static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_dh_bucket *tbl;
-
-	/* allocate the DH table for this service */
-	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
-		      GFP_ATOMIC);
-	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
-		return -ENOMEM;
-	}
-	svc->sched_data = tbl;
-	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
-		  "current service\n",
-		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-
-	/* assign the hash buckets with the updated service */
-	ip_vs_dh_assign(tbl, svc);
-
-	return 0;
-}
-
-
-static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_dh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_dh_flush(tbl);
-
-	/* release the table itself */
-	kfree(svc->sched_data);
-	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
-		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-
-	return 0;
-}
-
-
-static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
-{
-	struct ip_vs_dh_bucket *tbl = svc->sched_data;
-
-	/* got to clean up hash buckets here */
-	ip_vs_dh_flush(tbl);
-
-	/* assign the hash buckets with the updated service */
-	ip_vs_dh_assign(tbl, svc);
-
-	return 0;
-}
-
-
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-	return dest->flags & IP_VS_DEST_F_OVERLOAD;
-}
-
-
-/*
- *      Destination hashing scheduling
- */
-static struct ip_vs_dest *
-ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-	struct ip_vs_dest *dest;
-	struct ip_vs_dh_bucket *tbl;
-	struct iphdr *iph = ip_hdr(skb);
-
-	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
-
-	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
-	dest = ip_vs_dh_get(tbl, iph->daddr);
-	if (!dest
-	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-	    || atomic_read(&dest->weight) <= 0
-	    || is_overloaded(dest)) {
-		return NULL;
-	}
-
-	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
-		  "--> server %u.%u.%u.%u:%d\n",
-		  NIPQUAD(iph->daddr),
-		  NIPQUAD(dest->addr.ip),
-		  ntohs(dest->port));
-
-	return dest;
-}
-
-
-/*
- *      IPVS DH Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_dh_scheduler =
-{
-	.name =			"dh",
-	.refcnt =		ATOMIC_INIT(0),
-	.module =		THIS_MODULE,
-	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	0,
-#endif
-	.init_service =		ip_vs_dh_init_svc,
-	.done_service =		ip_vs_dh_done_svc,
-	.update_service =	ip_vs_dh_update_svc,
-	.schedule =		ip_vs_dh_schedule,
-};
-
-
-static int __init ip_vs_dh_init(void)
-{
-	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
-}
-
-
-static void __exit ip_vs_dh_cleanup(void)
-{
-	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
-}
-
-
-module_init(ip_vs_dh_init);
-module_exit(ip_vs_dh_cleanup);
-MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From 91da11f870f00a3322b81c73042291d7f0be5a17 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 7 Oct 2008 13:44:02 +0000
Subject: net: Distributed Switch Architecture protocol support

Distributed Switch Architecture is a protocol for managing hardware
switch chips.  It consists of a set of MII management registers and
commands to configure the switch, and an ethernet header format to
signal which of the ports of the switch a packet was received from
or is intended to be sent to.

The switches that this driver supports are typically embedded in
access points and routers, and a typical setup with a DSA switch
looks something like this:

	+-----------+       +-----------+
	|           | RGMII |           |
	|           +-------+           +------ 1000baseT MDI ("WAN")
	|           |       |  6-port   +------ 1000baseT MDI ("LAN1")
	|    CPU    |       |  ethernet +------ 1000baseT MDI ("LAN2")
	|           |MIImgmt|  switch   +------ 1000baseT MDI ("LAN3")
	|           +-------+  w/5 PHYs +------ 1000baseT MDI ("LAN4")
	|           |       |           |
	+-----------+       +-----------+

The switch driver presents each port on the switch as a separate
network interface to Linux, polls the switch to maintain software
link state of those ports, forwards MII management interface
accesses to those network interfaces (e.g. as done by ethtool) to
the switch, and exposes the switch's hardware statistics counters
via the appropriate Linux kernel interfaces.

This initial patch supports the MII management interface register
layout of the Marvell 88E6123, 88E6161 and 88E6165 switch chips, and
supports the "Ethertype DSA" packet tagging format.

(There is no officially registered ethertype for the Ethertype DSA
packet format, so we just grab a random one.  The ethertype to use
is programmed into the switch, and the switch driver uses the value
of ETH_P_EDSA for this, so this define can be changed at any time in
the future if the one we chose is allocated to another protocol or
if Ethertype DSA gets its own officially registered ethertype, and
everything will continue to work.)

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Nicolas Pitre <nico@marvell.com>
Tested-by: Byron Bradley <byron.bbradley@gmail.com>
Tested-by: Tim Ellis <tim.ellis@mac.com>
Tested-by: Peter van Valderen <linux@ddcrew.com>
Tested-by: Dirk Teurlings <dirk@upexia.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h  |   1 +
 include/linux/netdevice.h |   3 +
 include/net/dsa.h         |  34 ++++
 net/Kconfig               |   1 +
 net/Makefile              |   1 +
 net/dsa/Kconfig           |  31 ++++
 net/dsa/Makefile          |   9 +
 net/dsa/dsa.c             | 369 ++++++++++++++++++++++++++++++++++++++++
 net/dsa/dsa_priv.h        | 110 ++++++++++++
 net/dsa/mv88e6123_61_65.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++
 net/dsa/mv88e6xxx.c       | 377 +++++++++++++++++++++++++++++++++++++++++
 net/dsa/mv88e6xxx.h       |  77 +++++++++
 net/dsa/slave.c           | 288 ++++++++++++++++++++++++++++++++
 net/dsa/tag_edsa.c        | 213 +++++++++++++++++++++++
 14 files changed, 1931 insertions(+)
 create mode 100644 include/net/dsa.h
 create mode 100644 net/dsa/Kconfig
 create mode 100644 net/dsa/Makefile
 create mode 100644 net/dsa/dsa.c
 create mode 100644 net/dsa/dsa_priv.h
 create mode 100644 net/dsa/mv88e6123_61_65.c
 create mode 100644 net/dsa/mv88e6xxx.c
 create mode 100644 net/dsa/mv88e6xxx.h
 create mode 100644 net/dsa/slave.c
 create mode 100644 net/dsa/tag_edsa.c

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 723a1c5fbc6..2140aacb633 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -77,6 +77,7 @@
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
+#define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
  *	Non DIX types. Won't clash for 1500 types.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9cfd20be8b7..794eeb4b346 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -607,6 +607,9 @@ struct net_device
 
 	/* Protocol specific pointers */
 	
+#ifdef CONFIG_NET_DSA
+	void			*dsa_ptr;	/* dsa specific data */
+#endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
 	void			*ip_ptr;	/* IPv4 specific data	*/  
 	void                    *dn_ptr;        /* DECnet specific data */
diff --git a/include/net/dsa.h b/include/net/dsa.h
new file mode 100644
index 00000000000..dc4784f5452
--- /dev/null
+++ b/include/net/dsa.h
@@ -0,0 +1,34 @@
+/*
+ * include/net/dsa.h - Driver for Distributed Switch Architecture switch chips
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_NET_DSA_H
+#define __LINUX_NET_DSA_H
+
+#define DSA_MAX_PORTS	12
+
+struct dsa_platform_data {
+	/*
+	 * Reference to a Linux network interface that connects
+	 * to the switch chip.
+	 */
+	struct device	*netdev;
+
+	/*
+	 * How to access the switch configuration registers, and
+	 * the names of the switch ports (use "cpu" to designate
+	 * the switch port that the cpu is connected to).
+	 */
+	struct device	*mii_bus;
+	int		sw_addr;
+	char		*port_names[DSA_MAX_PORTS];
+};
+
+
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index 9103a16a77b..d789d79551a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -180,6 +180,7 @@ source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
 source "net/802/Kconfig"
 source "net/bridge/Kconfig"
+source "net/dsa/Kconfig"
 source "net/8021q/Kconfig"
 source "net/decnet/Kconfig"
 source "net/llc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index acaf819f24a..27d1f10dc0e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
 obj-$(CONFIG_NET_SCHED)		+= sched/
 obj-$(CONFIG_BRIDGE)		+= bridge/
+obj-$(CONFIG_NET_DSA)		+= dsa/
 obj-$(CONFIG_IPX)		+= ipx/
 obj-$(CONFIG_ATALK)		+= appletalk/
 obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
new file mode 100644
index 00000000000..7cf55e5eb39
--- /dev/null
+++ b/net/dsa/Kconfig
@@ -0,0 +1,31 @@
+menuconfig NET_DSA
+	bool "Distributed Switch Architecture support"
+	default n
+	depends on EXPERIMENTAL
+	---help---
+	  This allows you to use hardware switch chips that use
+	  the Distributed Switch Architecture.
+
+
+if NET_DSA
+
+# tagging formats
+config NET_DSA_TAG_EDSA
+	bool
+	default n
+
+
+# switch drivers
+config NET_DSA_MV88E6XXX
+	bool
+	default n
+
+config NET_DSA_MV88E6123_61_65
+	bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
+	select NET_DSA_MV88E6XXX
+	select NET_DSA_TAG_EDSA
+	---help---
+	  This enables support for the Marvell 88E6123/6161/6165
+	  ethernet switch chips.
+
+endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
new file mode 100644
index 00000000000..b59a6f6bcf5
--- /dev/null
+++ b/net/dsa/Makefile
@@ -0,0 +1,9 @@
+# tagging formats
+obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+
+# switch drivers
+obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
+obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
+
+# the core
+obj-$(CONFIG_NET_DSA) += dsa.o slave.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
new file mode 100644
index 00000000000..6cc5be2ec7f
--- /dev/null
+++ b/net/dsa/dsa.c
@@ -0,0 +1,369 @@
+/*
+ * net/dsa/dsa.c - Hardware switch handling
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+char dsa_driver_version[] = "0.1";
+
+
+/* switch driver registration ***********************************************/
+static DEFINE_MUTEX(dsa_switch_drivers_mutex);
+static LIST_HEAD(dsa_switch_drivers);
+
+void register_switch_driver(struct dsa_switch_driver *drv)
+{
+	mutex_lock(&dsa_switch_drivers_mutex);
+	list_add_tail(&drv->list, &dsa_switch_drivers);
+	mutex_unlock(&dsa_switch_drivers_mutex);
+}
+
+void unregister_switch_driver(struct dsa_switch_driver *drv)
+{
+	mutex_lock(&dsa_switch_drivers_mutex);
+	list_del_init(&drv->list);
+	mutex_unlock(&dsa_switch_drivers_mutex);
+}
+
+static struct dsa_switch_driver *
+dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
+{
+	struct dsa_switch_driver *ret;
+	struct list_head *list;
+	char *name;
+
+	ret = NULL;
+	name = NULL;
+
+	mutex_lock(&dsa_switch_drivers_mutex);
+	list_for_each(list, &dsa_switch_drivers) {
+		struct dsa_switch_driver *drv;
+
+		drv = list_entry(list, struct dsa_switch_driver, list);
+
+		name = drv->probe(bus, sw_addr);
+		if (name != NULL) {
+			ret = drv;
+			break;
+		}
+	}
+	mutex_unlock(&dsa_switch_drivers_mutex);
+
+	*_name = name;
+
+	return ret;
+}
+
+
+/* basic switch operations **************************************************/
+static struct dsa_switch *
+dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
+		 struct mii_bus *bus, struct net_device *dev)
+{
+	struct dsa_switch *ds;
+	int ret;
+	struct dsa_switch_driver *drv;
+	char *name;
+	int i;
+
+	/*
+	 * Probe for switch model.
+	 */
+	drv = dsa_switch_probe(bus, pd->sw_addr, &name);
+	if (drv == NULL) {
+		printk(KERN_ERR "%s: could not detect attached switch\n",
+		       dev->name);
+		return ERR_PTR(-EINVAL);
+	}
+	printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name);
+
+
+	/*
+	 * Allocate and initialise switch state.
+	 */
+	ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL);
+	if (ds == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ds->pd = pd;
+	ds->master_netdev = dev;
+	ds->master_mii_bus = bus;
+
+	ds->drv = drv;
+	ds->tag_protocol = drv->tag_protocol;
+
+
+	/*
+	 * Validate supplied switch configuration.
+	 */
+	ds->cpu_port = -1;
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		char *name;
+
+		name = pd->port_names[i];
+		if (name == NULL)
+			continue;
+
+		if (!strcmp(name, "cpu")) {
+			if (ds->cpu_port != -1) {
+				printk(KERN_ERR "multiple cpu ports?!\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			ds->cpu_port = i;
+		} else {
+			ds->valid_port_mask |= 1 << i;
+		}
+	}
+
+	if (ds->cpu_port == -1) {
+		printk(KERN_ERR "no cpu port?!\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+
+	/*
+	 * If we use a tagging format that doesn't have an ethertype
+	 * field, make sure that all packets from this point on get
+	 * sent to the tag format's receive function.  (Which will
+	 * discard received packets until we set ds->ports[] below.)
+	 */
+	wmb();
+	dev->dsa_ptr = (void *)ds;
+
+
+	/*
+	 * Do basic register setup.
+	 */
+	ret = drv->setup(ds);
+	if (ret < 0)
+		goto out;
+
+	ret = drv->set_addr(ds, dev->dev_addr);
+	if (ret < 0)
+		goto out;
+
+	ds->slave_mii_bus = mdiobus_alloc();
+	if (ds->slave_mii_bus == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	dsa_slave_mii_bus_init(ds);
+
+	ret = mdiobus_register(ds->slave_mii_bus);
+	if (ret < 0)
+		goto out_free;
+
+
+	/*
+	 * Create network devices for physical switch ports.
+	 */
+	wmb();
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		struct net_device *slave_dev;
+
+		if (!(ds->valid_port_mask & (1 << i)))
+			continue;
+
+		slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]);
+		if (slave_dev == NULL) {
+			printk(KERN_ERR "%s: can't create dsa slave "
+			       "device for port %d(%s)\n",
+			       dev->name, i, pd->port_names[i]);
+			continue;
+		}
+
+		ds->ports[i] = slave_dev;
+	}
+
+	return ds;
+
+out_free:
+	mdiobus_free(ds->slave_mii_bus);
+out:
+	dev->dsa_ptr = NULL;
+	kfree(ds);
+	return ERR_PTR(ret);
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
+}
+
+
+/* link polling *************************************************************/
+static void dsa_link_poll_work(struct work_struct *ugly)
+{
+	struct dsa_switch *ds;
+
+	ds = container_of(ugly, struct dsa_switch, link_poll_work);
+
+	ds->drv->poll_link(ds);
+	mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ));
+}
+
+static void dsa_link_poll_timer(unsigned long _ds)
+{
+	struct dsa_switch *ds = (void *)_ds;
+
+	schedule_work(&ds->link_poll_work);
+}
+
+
+/* platform driver init and cleanup *****************************************/
+static int dev_is_class(struct device *dev, void *class)
+{
+	if (dev->class != NULL && !strcmp(dev->class->name, class))
+		return 1;
+
+	return 0;
+}
+
+static struct device *dev_find_class(struct device *parent, char *class)
+{
+	if (dev_is_class(parent, class)) {
+		get_device(parent);
+		return parent;
+	}
+
+	return device_find_child(parent, class, dev_is_class);
+}
+
+static struct mii_bus *dev_to_mii_bus(struct device *dev)
+{
+	struct device *d;
+
+	d = dev_find_class(dev, "mdio_bus");
+	if (d != NULL) {
+		struct mii_bus *bus;
+
+		bus = to_mii_bus(d);
+		put_device(d);
+
+		return bus;
+	}
+
+	return NULL;
+}
+
+static struct net_device *dev_to_net_device(struct device *dev)
+{
+	struct device *d;
+
+	d = dev_find_class(dev, "net");
+	if (d != NULL) {
+		struct net_device *nd;
+
+		nd = to_net_dev(d);
+		dev_hold(nd);
+		put_device(d);
+
+		return nd;
+	}
+
+	return NULL;
+}
+
+static int dsa_probe(struct platform_device *pdev)
+{
+	static int dsa_version_printed;
+	struct dsa_platform_data *pd = pdev->dev.platform_data;
+	struct net_device *dev;
+	struct mii_bus *bus;
+	struct dsa_switch *ds;
+
+	if (!dsa_version_printed++)
+		printk(KERN_NOTICE "Distributed Switch Architecture "
+			"driver version %s\n", dsa_driver_version);
+
+	if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL)
+		return -EINVAL;
+
+	bus = dev_to_mii_bus(pd->mii_bus);
+	if (bus == NULL)
+		return -EINVAL;
+
+	dev = dev_to_net_device(pd->netdev);
+	if (dev == NULL)
+		return -EINVAL;
+
+	if (dev->dsa_ptr != NULL) {
+		dev_put(dev);
+		return -EEXIST;
+	}
+
+	ds = dsa_switch_setup(&pdev->dev, pd, bus, dev);
+	if (IS_ERR(ds)) {
+		dev_put(dev);
+		return PTR_ERR(ds);
+	}
+
+	if (ds->drv->poll_link != NULL) {
+		INIT_WORK(&ds->link_poll_work, dsa_link_poll_work);
+		init_timer(&ds->link_poll_timer);
+		ds->link_poll_timer.data = (unsigned long)ds;
+		ds->link_poll_timer.function = dsa_link_poll_timer;
+		ds->link_poll_timer.expires = round_jiffies(jiffies + HZ);
+		add_timer(&ds->link_poll_timer);
+	}
+
+	platform_set_drvdata(pdev, ds);
+
+	return 0;
+}
+
+static int dsa_remove(struct platform_device *pdev)
+{
+	struct dsa_switch *ds = platform_get_drvdata(pdev);
+
+	if (ds->drv->poll_link != NULL)
+		del_timer_sync(&ds->link_poll_timer);
+
+	flush_scheduled_work();
+
+	dsa_switch_destroy(ds);
+
+	return 0;
+}
+
+static void dsa_shutdown(struct platform_device *pdev)
+{
+}
+
+static struct platform_driver dsa_driver = {
+	.probe		= dsa_probe,
+	.remove		= dsa_remove,
+	.shutdown	= dsa_shutdown,
+	.driver = {
+		.name	= "dsa",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init dsa_init_module(void)
+{
+	return platform_driver_register(&dsa_driver);
+}
+module_init(dsa_init_module);
+
+static void __exit dsa_cleanup_module(void)
+{
+	platform_driver_unregister(&dsa_driver);
+}
+module_exit(dsa_cleanup_module);
+
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>")
+MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dsa");
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
new file mode 100644
index 00000000000..21ee9052079
--- /dev/null
+++ b/net/dsa/dsa_priv.h
@@ -0,0 +1,110 @@
+/*
+ * net/dsa/dsa_priv.h - Hardware switch handling
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DSA_PRIV_H
+#define __DSA_PRIV_H
+
+#include <linux/list.h>
+#include <linux/phy.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <net/dsa.h>
+
+struct dsa_switch {
+	/*
+	 * Configuration data for the platform device that owns
+	 * this dsa switch instance.
+	 */
+	struct dsa_platform_data	*pd;
+
+	/*
+	 * References to network device and mii bus to use.
+	 */
+	struct net_device		*master_netdev;
+	struct mii_bus			*master_mii_bus;
+
+	/*
+	 * The used switch driver and frame tagging type.
+	 */
+	struct dsa_switch_driver	*drv;
+	__be16				tag_protocol;
+
+	/*
+	 * Slave mii_bus and devices for the individual ports.
+	 */
+	int				cpu_port;
+	u32				valid_port_mask;
+	struct mii_bus			*slave_mii_bus;
+	struct net_device		*ports[DSA_MAX_PORTS];
+
+	/*
+	 * Link state polling.
+	 */
+	struct work_struct		link_poll_work;
+	struct timer_list		link_poll_timer;
+};
+
+struct dsa_slave_priv {
+	struct net_device	*dev;
+	struct dsa_switch	*parent;
+	int			port;
+	struct phy_device	*phy;
+};
+
+struct dsa_switch_driver {
+	struct list_head	list;
+
+	__be16			tag_protocol;
+	int			priv_size;
+
+	/*
+	 * Probing and setup.
+	 */
+	char	*(*probe)(struct mii_bus *bus, int sw_addr);
+	int	(*setup)(struct dsa_switch *ds);
+	int	(*set_addr)(struct dsa_switch *ds, u8 *addr);
+
+	/*
+	 * Access to the switch's PHY registers.
+	 */
+	int	(*phy_read)(struct dsa_switch *ds, int port, int regnum);
+	int	(*phy_write)(struct dsa_switch *ds, int port,
+			     int regnum, u16 val);
+
+	/*
+	 * Link state polling and IRQ handling.
+	 */
+	void	(*poll_link)(struct dsa_switch *ds);
+
+	/*
+	 * ethtool hardware statistics.
+	 */
+	void	(*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
+	void	(*get_ethtool_stats)(struct dsa_switch *ds,
+				     int port, uint64_t *data);
+	int	(*get_sset_count)(struct dsa_switch *ds);
+};
+
+/* dsa.c */
+extern char dsa_driver_version[];
+void register_switch_driver(struct dsa_switch_driver *type);
+void unregister_switch_driver(struct dsa_switch_driver *type);
+
+/* slave.c */
+void dsa_slave_mii_bus_init(struct dsa_switch *ds);
+struct net_device *dsa_slave_create(struct dsa_switch *ds,
+				    struct device *parent,
+				    int port, char *name);
+
+/* tag_edsa.c */
+int edsa_xmit(struct sk_buff *skb, struct net_device *dev);
+
+
+#endif
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
new file mode 100644
index 00000000000..147818cc706
--- /dev/null
+++ b/net/dsa/mv88e6123_61_65.c
@@ -0,0 +1,417 @@
+/*
+ * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+
+static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr)
+{
+	int ret;
+
+	ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
+	if (ret >= 0) {
+		ret &= 0xfff0;
+		if (ret == 0x1210)
+			return "Marvell 88E6123";
+		if (ret == 0x1610)
+			return "Marvell 88E6161";
+		if (ret == 0x1650)
+			return "Marvell 88E6165";
+	}
+
+	return NULL;
+}
+
+static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds)
+{
+	int i;
+	int ret;
+
+	/*
+	 * Set all ports to the disabled state.
+	 */
+	for (i = 0; i < 8; i++) {
+		ret = REG_READ(REG_PORT(i), 0x04);
+		REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+	}
+
+	/*
+	 * Wait for transmit queues to drain.
+	 */
+	msleep(2);
+
+	/*
+	 * Reset the switch.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
+
+	/*
+	 * Wait up to one second for reset to complete.
+	 */
+	for (i = 0; i < 1000; i++) {
+		ret = REG_READ(REG_GLOBAL, 0x00);
+		if ((ret & 0xc800) == 0xc800)
+			break;
+
+		msleep(1);
+	}
+	if (i == 1000)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
+{
+	int ret;
+	int i;
+
+	/*
+	 * Disable the PHY polling unit (since there won't be any
+	 * external PHYs to poll), don't discard packets with
+	 * excessive collisions, and mask all interrupt sources.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x04, 0x0000);
+
+	/*
+	 * Set the default address aging time to 5 minutes, and
+	 * enable address learn messages to be sent to all message
+	 * ports.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
+
+	/*
+	 * Configure the priority mapping registers.
+	 */
+	ret = mv88e6xxx_config_prio(ds);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Configure the cpu port, and configure the cpu port as the
+	 * port to which ingress and egress monitor frames are to be
+	 * sent.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110));
+
+	/*
+	 * Disable remote management for now, and set the switch's
+	 * DSA device number to zero.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x1c, 0x0000);
+
+	/*
+	 * Send all frames with destination addresses matching
+	 * 01:80:c2:00:00:2x to the CPU port.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x02, 0xffff);
+
+	/*
+	 * Send all frames with destination addresses matching
+	 * 01:80:c2:00:00:0x to the CPU port.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
+
+	/*
+	 * Disable the loopback filter, disable flow control
+	 * messages, disable flood broadcast override, disable
+	 * removing of provider tags, disable ATU age violation
+	 * interrupts, disable tag flow control, force flow
+	 * control priority to the highest, and send all special
+	 * multicast frames to the CPU at the highest priority.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
+
+	/*
+	 * Map all DSA device IDs to the CPU port.
+	 */
+	for (i = 0; i < 32; i++)
+		REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port);
+
+	/*
+	 * Clear all trunk masks.
+	 */
+	for (i = 0; i < 8; i++)
+		REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff);
+
+	/*
+	 * Clear all trunk mappings.
+	 */
+	for (i = 0; i < 16; i++)
+		REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
+
+	/*
+	 * Disable ingress rate limiting by resetting all ingress
+	 * rate limit registers to their initial state.
+	 */
+	for (i = 0; i < 6; i++)
+		REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8));
+
+	/*
+	 * Initialise cross-chip port VLAN table to reset defaults.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000);
+
+	/*
+	 * Clear the priority override table.
+	 */
+	for (i = 0; i < 16; i++)
+		REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8));
+
+	/* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */
+
+	return 0;
+}
+
+static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
+{
+	int addr = REG_PORT(p);
+
+	/*
+	 * MAC Forcing register: don't force link, speed, duplex
+	 * or flow control state to any particular values.
+	 */
+	REG_WRITE(addr, 0x01, 0x0003);
+
+	/*
+	 * Do not limit the period of time that this port can be
+	 * paused for by the remote end or the period of time that
+	 * this port can pause the remote end.
+	 */
+	REG_WRITE(addr, 0x02, 0x0000);
+
+	/*
+	 * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
+	 * configure the EDSA tagging mode if this is the CPU port,
+	 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
+	 * tunneling, determine priority by looking at 802.1p and IP
+	 * priority fields (IP prio has precedence), and set STP state
+	 * to Forwarding.  Finally, if this is the CPU port, additionally
+	 * enable forwarding of unknown unicast and multicast addresses.
+	 */
+	REG_WRITE(addr, 0x04,
+			(p == ds->cpu_port) ? 0x373f : 0x0433);
+
+	/*
+	 * Port Control 1: disable trunking.  Also, if this is the
+	 * CPU port, enable learn messages to be sent to this port.
+	 */
+	REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000);
+
+	/*
+	 * Port based VLAN map: give each port its own address
+	 * database, allow the CPU port to talk to each of the 'real'
+	 * ports, and allow each of the 'real' ports to only talk to
+	 * the CPU port.
+	 */
+	REG_WRITE(addr, 0x06,
+			((p & 0xf) << 12) |
+			 ((p == ds->cpu_port) ?
+				ds->valid_port_mask :
+				(1 << ds->cpu_port)));
+
+	/*
+	 * Default VLAN ID and priority: don't set a default VLAN
+	 * ID, and set the default packet priority to zero.
+	 */
+	REG_WRITE(addr, 0x07, 0x0000);
+
+	/*
+	 * Port Control 2: don't force a good FCS, set the maximum
+	 * frame size to 10240 bytes, don't let the switch add or
+	 * strip 802.1q tags, don't discard tagged or untagged frames
+	 * on this port, do a destination address lookup on all
+	 * received packets as usual, disable ARP mirroring and don't
+	 * send a copy of all transmitted/received frames on this port
+	 * to the CPU.
+	 */
+	REG_WRITE(addr, 0x08, 0x2080);
+
+	/*
+	 * Egress rate control: disable egress rate control.
+	 */
+	REG_WRITE(addr, 0x09, 0x0001);
+
+	/*
+	 * Egress rate control 2: disable egress rate control.
+	 */
+	REG_WRITE(addr, 0x0a, 0x0000);
+
+	/*
+	 * Port Association Vector: when learning source addresses
+	 * of packets, add the address to the address database using
+	 * a port bitmap that has only the bit for this port set and
+	 * the other bits clear.
+	 */
+	REG_WRITE(addr, 0x0b, 1 << p);
+
+	/*
+	 * Port ATU control: disable limiting the number of address
+	 * database entries that this port is allowed to use.
+	 */
+	REG_WRITE(addr, 0x0c, 0x0000);
+
+	/*
+	 * Priorit Override: disable DA, SA and VTU priority override.
+	 */
+	REG_WRITE(addr, 0x0d, 0x0000);
+
+	/*
+	 * Port Ethertype: use the Ethertype DSA Ethertype value.
+	 */
+	REG_WRITE(addr, 0x0f, ETH_P_EDSA);
+
+	/*
+	 * Tag Remap: use an identity 802.1p prio -> switch prio
+	 * mapping.
+	 */
+	REG_WRITE(addr, 0x18, 0x3210);
+
+	/*
+	 * Tag Remap 2: use an identity 802.1p prio -> switch prio
+	 * mapping.
+	 */
+	REG_WRITE(addr, 0x19, 0x7654);
+
+	return 0;
+}
+
+static int mv88e6123_61_65_setup(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int i;
+	int ret;
+
+	mutex_init(&ps->smi_mutex);
+	mutex_init(&ps->stats_mutex);
+
+	ret = mv88e6123_61_65_switch_reset(ds);
+	if (ret < 0)
+		return ret;
+
+	/* @@@ initialise vtu and atu */
+
+	ret = mv88e6123_61_65_setup_global(ds);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < 6; i++) {
+		ret = mv88e6123_61_65_setup_port(ds, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mv88e6123_61_65_port_to_phy_addr(int port)
+{
+	if (port >= 0 && port <= 4)
+		return port;
+	return -1;
+}
+
+static int
+mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+	int addr = mv88e6123_61_65_port_to_phy_addr(port);
+	return mv88e6xxx_phy_read(ds, addr, regnum);
+}
+
+static int
+mv88e6123_61_65_phy_write(struct dsa_switch *ds,
+			      int port, int regnum, u16 val)
+{
+	int addr = mv88e6123_61_65_port_to_phy_addr(port);
+	return mv88e6xxx_phy_write(ds, addr, regnum, val);
+}
+
+static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
+	{ "in_good_octets", 8, 0x00, },
+	{ "in_bad_octets", 4, 0x02, },
+	{ "in_unicast", 4, 0x04, },
+	{ "in_broadcasts", 4, 0x06, },
+	{ "in_multicasts", 4, 0x07, },
+	{ "in_pause", 4, 0x16, },
+	{ "in_undersize", 4, 0x18, },
+	{ "in_fragments", 4, 0x19, },
+	{ "in_oversize", 4, 0x1a, },
+	{ "in_jabber", 4, 0x1b, },
+	{ "in_rx_error", 4, 0x1c, },
+	{ "in_fcs_error", 4, 0x1d, },
+	{ "out_octets", 8, 0x0e, },
+	{ "out_unicast", 4, 0x10, },
+	{ "out_broadcasts", 4, 0x13, },
+	{ "out_multicasts", 4, 0x12, },
+	{ "out_pause", 4, 0x15, },
+	{ "excessive", 4, 0x11, },
+	{ "collisions", 4, 0x1e, },
+	{ "deferred", 4, 0x05, },
+	{ "single", 4, 0x14, },
+	{ "multiple", 4, 0x17, },
+	{ "out_fcs_error", 4, 0x03, },
+	{ "late", 4, 0x1f, },
+	{ "hist_64bytes", 4, 0x08, },
+	{ "hist_65_127bytes", 4, 0x09, },
+	{ "hist_128_255bytes", 4, 0x0a, },
+	{ "hist_256_511bytes", 4, 0x0b, },
+	{ "hist_512_1023bytes", 4, 0x0c, },
+	{ "hist_1024_max_bytes", 4, 0x0d, },
+};
+
+static void
+mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+	mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
+			      mv88e6123_61_65_hw_stats, port, data);
+}
+
+static void
+mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds,
+				  int port, uint64_t *data)
+{
+	mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
+				    mv88e6123_61_65_hw_stats, port, data);
+}
+
+static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
+{
+	return ARRAY_SIZE(mv88e6123_61_65_hw_stats);
+}
+
+static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
+	.tag_protocol		= __constant_htons(ETH_P_EDSA),
+	.priv_size		= sizeof(struct mv88e6xxx_priv_state),
+	.probe			= mv88e6123_61_65_probe,
+	.setup			= mv88e6123_61_65_setup,
+	.set_addr		= mv88e6xxx_set_addr_indirect,
+	.phy_read		= mv88e6123_61_65_phy_read,
+	.phy_write		= mv88e6123_61_65_phy_write,
+	.poll_link		= mv88e6xxx_poll_link,
+	.get_strings		= mv88e6123_61_65_get_strings,
+	.get_ethtool_stats	= mv88e6123_61_65_get_ethtool_stats,
+	.get_sset_count		= mv88e6123_61_65_get_sset_count,
+};
+
+int __init mv88e6123_61_65_init(void)
+{
+	register_switch_driver(&mv88e6123_61_65_switch_driver);
+	return 0;
+}
+module_init(mv88e6123_61_65_init);
+
+void __exit mv88e6123_61_65_cleanup(void)
+{
+	unregister_switch_driver(&mv88e6123_61_65_switch_driver);
+}
+module_exit(mv88e6123_61_65_cleanup);
diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c
new file mode 100644
index 00000000000..13d2328a240
--- /dev/null
+++ b/net/dsa/mv88e6xxx.c
@@ -0,0 +1,377 @@
+/*
+ * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+
+/*
+ * If the switch's ADDR[4:0] strap pins are strapped to zero, it will
+ * use all 32 SMI bus addresses on its SMI bus, and all switch registers
+ * will be directly accessible on some {device address,register address}
+ * pair.  If the ADDR[4:0] pins are not strapped to zero, the switch
+ * will only respond to SMI transactions to that specific address, and
+ * an indirect addressing mechanism needs to be used to access its
+ * registers.
+ */
+static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		ret = mdiobus_read(bus, sw_addr, 0);
+		if (ret < 0)
+			return ret;
+
+		if ((ret & 0x8000) == 0)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
+{
+	int ret;
+
+	if (sw_addr == 0)
+		return mdiobus_read(bus, addr, reg);
+
+	/*
+	 * Wait for the bus to become free.
+	 */
+	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Transmit the read command.
+	 */
+	ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Wait for the read command to complete.
+	 */
+	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Read the data.
+	 */
+	ret = mdiobus_read(bus, sw_addr, 1);
+	if (ret < 0)
+		return ret;
+
+	return ret & 0xffff;
+}
+
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int ret;
+
+	mutex_lock(&ps->smi_mutex);
+	ret = __mv88e6xxx_reg_read(ds->master_mii_bus,
+				   ds->pd->sw_addr, addr, reg);
+	mutex_unlock(&ps->smi_mutex);
+
+	return ret;
+}
+
+int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
+			  int reg, u16 val)
+{
+	int ret;
+
+	if (sw_addr == 0)
+		return mdiobus_write(bus, addr, reg, val);
+
+	/*
+	 * Wait for the bus to become free.
+	 */
+	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Transmit the data to write.
+	 */
+	ret = mdiobus_write(bus, sw_addr, 1, val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Transmit the write command.
+	 */
+	ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Wait for the write command to complete.
+	 */
+	ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int ret;
+
+	mutex_lock(&ps->smi_mutex);
+	ret = __mv88e6xxx_reg_write(ds->master_mii_bus,
+				    ds->pd->sw_addr, addr, reg, val);
+	mutex_unlock(&ps->smi_mutex);
+
+	return ret;
+}
+
+int mv88e6xxx_config_prio(struct dsa_switch *ds)
+{
+	/*
+	 * Configure the IP ToS mapping registers.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x10, 0x0000);
+	REG_WRITE(REG_GLOBAL, 0x11, 0x0000);
+	REG_WRITE(REG_GLOBAL, 0x12, 0x5555);
+	REG_WRITE(REG_GLOBAL, 0x13, 0x5555);
+	REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa);
+	REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa);
+	REG_WRITE(REG_GLOBAL, 0x16, 0xffff);
+	REG_WRITE(REG_GLOBAL, 0x17, 0xffff);
+
+	/*
+	 * Configure the IEEE 802.1p priority mapping register.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x18, 0xfa41);
+
+	return 0;
+}
+
+int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < 6; i++) {
+		int j;
+
+		/*
+		 * Write the MAC address byte.
+		 */
+		REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]);
+
+		/*
+		 * Wait for the write to complete.
+		 */
+		for (j = 0; j < 16; j++) {
+			ret = REG_READ(REG_GLOBAL2, 0x0d);
+			if ((ret & 0x8000) == 0)
+				break;
+		}
+		if (j == 16)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum)
+{
+	if (addr >= 0)
+		return mv88e6xxx_reg_read(ds, addr, regnum);
+	return 0xffff;
+}
+
+int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val)
+{
+	if (addr >= 0)
+		return mv88e6xxx_reg_write(ds, addr, regnum, val);
+	return 0;
+}
+
+void mv88e6xxx_poll_link(struct dsa_switch *ds)
+{
+	int i;
+
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		struct net_device *dev;
+		int port_status;
+		int link;
+		int speed;
+		int duplex;
+		int fc;
+
+		dev = ds->ports[i];
+		if (dev == NULL)
+			continue;
+
+		link = 0;
+		if (dev->flags & IFF_UP) {
+			port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00);
+			if (port_status < 0)
+				continue;
+
+			link = !!(port_status & 0x0800);
+		}
+
+		if (!link) {
+			if (netif_carrier_ok(dev)) {
+				printk(KERN_INFO "%s: link down\n", dev->name);
+				netif_carrier_off(dev);
+			}
+			continue;
+		}
+
+		switch (port_status & 0x0300) {
+		case 0x0000:
+			speed = 10;
+			break;
+		case 0x0100:
+			speed = 100;
+			break;
+		case 0x0200:
+			speed = 1000;
+			break;
+		default:
+			speed = -1;
+			break;
+		}
+		duplex = (port_status & 0x0400) ? 1 : 0;
+		fc = (port_status & 0x8000) ? 1 : 0;
+
+		if (!netif_carrier_ok(dev)) {
+			printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+					 "flow control %sabled\n", dev->name,
+					 speed, duplex ? "full" : "half",
+					 fc ? "en" : "dis");
+			netif_carrier_on(dev);
+		}
+	}
+}
+
+static int mv88e6xxx_stats_wait(struct dsa_switch *ds)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		ret = REG_READ(REG_GLOBAL2, 0x1d);
+		if ((ret & 0x8000) == 0)
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
+{
+	int ret;
+
+	/*
+	 * Snapshot the hardware statistics counters for this port.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port);
+
+	/*
+	 * Wait for the snapshotting to complete.
+	 */
+	ret = mv88e6xxx_stats_wait(ds);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val)
+{
+	u32 _val;
+	int ret;
+
+	*val = 0;
+
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat);
+	if (ret < 0)
+		return;
+
+	ret = mv88e6xxx_stats_wait(ds);
+	if (ret < 0)
+		return;
+
+	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e);
+	if (ret < 0)
+		return;
+
+	_val = ret << 16;
+
+	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f);
+	if (ret < 0)
+		return;
+
+	*val = _val | ret;
+}
+
+void mv88e6xxx_get_strings(struct dsa_switch *ds,
+			   int nr_stats, struct mv88e6xxx_hw_stat *stats,
+			   int port, uint8_t *data)
+{
+	int i;
+
+	for (i = 0; i < nr_stats; i++) {
+		memcpy(data + i * ETH_GSTRING_LEN,
+		       stats[i].string, ETH_GSTRING_LEN);
+	}
+}
+
+void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
+				 int nr_stats, struct mv88e6xxx_hw_stat *stats,
+				 int port, uint64_t *data)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int ret;
+	int i;
+
+	mutex_lock(&ps->stats_mutex);
+
+	ret = mv88e6xxx_stats_snapshot(ds, port);
+	if (ret < 0) {
+		mutex_unlock(&ps->stats_mutex);
+		return;
+	}
+
+	/*
+	 * Read each of the counters.
+	 */
+	for (i = 0; i < nr_stats; i++) {
+		struct mv88e6xxx_hw_stat *s = stats + i;
+		u32 low;
+		u32 high;
+
+		mv88e6xxx_stats_read(ds, s->reg, &low);
+		if (s->sizeof_stat == 8)
+			mv88e6xxx_stats_read(ds, s->reg + 1, &high);
+		else
+			high = 0;
+
+		data[i] = (((u64)high) << 32) | low;
+	}
+
+	mutex_unlock(&ps->stats_mutex);
+}
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
new file mode 100644
index 00000000000..a004d4d0208
--- /dev/null
+++ b/net/dsa/mv88e6xxx.h
@@ -0,0 +1,77 @@
+/*
+ * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __MV88E6XXX_H
+#define __MV88E6XXX_H
+
+#define REG_PORT(p)		(0x10 + (p))
+#define REG_GLOBAL		0x1b
+#define REG_GLOBAL2		0x1c
+
+struct mv88e6xxx_priv_state {
+	/*
+	 * When using multi-chip addressing, this mutex protects
+	 * access to the indirect access registers.  (In single-chip
+	 * mode, this mutex is effectively useless.)
+	 */
+	struct mutex	smi_mutex;
+
+	/*
+	 * This mutex serialises access to the statistics unit.
+	 * Hold this mutex over snapshot + dump sequences.
+	 */
+	struct mutex	stats_mutex;
+};
+
+struct mv88e6xxx_hw_stat {
+	char string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int reg;
+};
+
+int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
+int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
+                          int reg, u16 val);
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val);
+int mv88e6xxx_config_prio(struct dsa_switch *ds);
+int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr);
+int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum);
+int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val);
+void mv88e6xxx_poll_link(struct dsa_switch *ds);
+void mv88e6xxx_get_strings(struct dsa_switch *ds,
+			   int nr_stats, struct mv88e6xxx_hw_stat *stats,
+			   int port, uint8_t *data);
+void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
+				 int nr_stats, struct mv88e6xxx_hw_stat *stats,
+				 int port, uint64_t *data);
+
+#define REG_READ(addr, reg)						\
+	({								\
+		int __ret;						\
+									\
+		__ret = mv88e6xxx_reg_read(ds, addr, reg);		\
+		if (__ret < 0)						\
+			return __ret;					\
+		__ret;							\
+	})
+
+#define REG_WRITE(addr, reg, val)					\
+	({								\
+		int __ret;						\
+									\
+		__ret = mv88e6xxx_reg_write(ds, addr, reg, val);	\
+		if (__ret < 0)						\
+			return __ret;					\
+	})
+
+
+
+#endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
new file mode 100644
index 00000000000..3cb331e98b8
--- /dev/null
+++ b/net/dsa/slave.c
@@ -0,0 +1,288 @@
+/*
+ * net/dsa/slave.c - Slave device handling
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+
+/* slave mii_bus handling ***************************************************/
+static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
+{
+	struct dsa_switch *ds = bus->priv;
+
+	if (ds->valid_port_mask & (1 << addr))
+		return ds->drv->phy_read(ds, addr, reg);
+
+	return 0xffff;
+}
+
+static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
+{
+	struct dsa_switch *ds = bus->priv;
+
+	if (ds->valid_port_mask & (1 << addr))
+		return ds->drv->phy_write(ds, addr, reg, val);
+
+	return 0;
+}
+
+void dsa_slave_mii_bus_init(struct dsa_switch *ds)
+{
+	ds->slave_mii_bus->priv = (void *)ds;
+	ds->slave_mii_bus->name = "dsa slave smi";
+	ds->slave_mii_bus->read = dsa_slave_phy_read;
+	ds->slave_mii_bus->write = dsa_slave_phy_write;
+	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x",
+			ds->master_mii_bus->id, ds->pd->sw_addr);
+	ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev);
+}
+
+
+/* slave device handling ****************************************************/
+static int dsa_slave_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static int dsa_slave_close(struct net_device *dev)
+{
+	return 0;
+}
+
+static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct net_device *master = p->parent->master_netdev;
+
+	if (change & IFF_ALLMULTI)
+		dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
+	if (change & IFF_PROMISC)
+		dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1);
+}
+
+static void dsa_slave_set_rx_mode(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct net_device *master = p->parent->master_netdev;
+
+	dev_mc_sync(master, dev);
+	dev_unicast_sync(master, dev);
+}
+
+static int dsa_slave_set_mac_address(struct net_device *dev, void *addr)
+{
+	memcpy(dev->dev_addr, addr + 2, 6);
+
+	return 0;
+}
+
+static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct mii_ioctl_data *mii_data = if_mii(ifr);
+
+	if (p->phy != NULL)
+		return phy_mii_ioctl(p->phy, mii_data, cmd);
+
+	return -EOPNOTSUPP;
+}
+
+
+/* ethtool operations *******************************************************/
+static int
+dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	int err;
+
+	err = -EOPNOTSUPP;
+	if (p->phy != NULL) {
+		err = phy_read_status(p->phy);
+		if (err == 0)
+			err = phy_ethtool_gset(p->phy, cmd);
+	}
+
+	return err;
+}
+
+static int
+dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	if (p->phy != NULL)
+		return phy_ethtool_sset(p->phy, cmd);
+
+	return -EOPNOTSUPP;
+}
+
+static void dsa_slave_get_drvinfo(struct net_device *dev,
+				  struct ethtool_drvinfo *drvinfo)
+{
+	strncpy(drvinfo->driver, "dsa", 32);
+	strncpy(drvinfo->version, dsa_driver_version, 32);
+	strncpy(drvinfo->fw_version, "N/A", 32);
+	strncpy(drvinfo->bus_info, "platform", 32);
+}
+
+static int dsa_slave_nway_reset(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	if (p->phy != NULL)
+		return genphy_restart_aneg(p->phy);
+
+	return -EOPNOTSUPP;
+}
+
+static u32 dsa_slave_get_link(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	if (p->phy != NULL) {
+		genphy_update_link(p->phy);
+		return p->phy->link;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static void dsa_slave_get_strings(struct net_device *dev,
+				  uint32_t stringset, uint8_t *data)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (stringset == ETH_SS_STATS) {
+		int len = ETH_GSTRING_LEN;
+
+		strncpy(data, "tx_packets", len);
+		strncpy(data + len, "tx_bytes", len);
+		strncpy(data + 2 * len, "rx_packets", len);
+		strncpy(data + 3 * len, "rx_bytes", len);
+		if (ds->drv->get_strings != NULL)
+			ds->drv->get_strings(ds, p->port, data + 4 * len);
+	}
+}
+
+static void dsa_slave_get_ethtool_stats(struct net_device *dev,
+					struct ethtool_stats *stats,
+					uint64_t *data)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	data[0] = p->dev->stats.tx_packets;
+	data[1] = p->dev->stats.tx_bytes;
+	data[2] = p->dev->stats.rx_packets;
+	data[3] = p->dev->stats.rx_bytes;
+	if (ds->drv->get_ethtool_stats != NULL)
+		ds->drv->get_ethtool_stats(ds, p->port, data + 4);
+}
+
+static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (sset == ETH_SS_STATS) {
+		int count;
+
+		count = 4;
+		if (ds->drv->get_sset_count != NULL)
+			count += ds->drv->get_sset_count(ds);
+
+		return count;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops dsa_slave_ethtool_ops = {
+	.get_settings		= dsa_slave_get_settings,
+	.set_settings		= dsa_slave_set_settings,
+	.get_drvinfo		= dsa_slave_get_drvinfo,
+	.nway_reset		= dsa_slave_nway_reset,
+	.get_link		= dsa_slave_get_link,
+	.set_sg			= ethtool_op_set_sg,
+	.get_strings		= dsa_slave_get_strings,
+	.get_ethtool_stats	= dsa_slave_get_ethtool_stats,
+	.get_sset_count		= dsa_slave_get_sset_count,
+};
+
+
+/* slave device setup *******************************************************/
+struct net_device *
+dsa_slave_create(struct dsa_switch *ds, struct device *parent,
+		 int port, char *name)
+{
+	struct net_device *master = ds->master_netdev;
+	struct net_device *slave_dev;
+	struct dsa_slave_priv *p;
+	int ret;
+
+	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv),
+				 name, ether_setup);
+	if (slave_dev == NULL)
+		return slave_dev;
+
+	slave_dev->features = master->vlan_features;
+	SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
+	memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN);
+	slave_dev->tx_queue_len = 0;
+	switch (ds->tag_protocol) {
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+	case htons(ETH_P_EDSA):
+		slave_dev->hard_start_xmit = edsa_xmit;
+		break;
+#endif
+	default:
+		BUG();
+	}
+	slave_dev->open = dsa_slave_open;
+	slave_dev->stop = dsa_slave_close;
+	slave_dev->change_rx_flags = dsa_slave_change_rx_flags;
+	slave_dev->set_rx_mode = dsa_slave_set_rx_mode;
+	slave_dev->set_multicast_list = dsa_slave_set_rx_mode;
+	slave_dev->set_mac_address = dsa_slave_set_mac_address;
+	slave_dev->do_ioctl = dsa_slave_ioctl;
+	SET_NETDEV_DEV(slave_dev, parent);
+	slave_dev->vlan_features = master->vlan_features;
+
+	p = netdev_priv(slave_dev);
+	p->dev = slave_dev;
+	p->parent = ds;
+	p->port = port;
+	p->phy = ds->slave_mii_bus->phy_map[port];
+
+	ret = register_netdev(slave_dev);
+	if (ret) {
+		printk(KERN_ERR "%s: error %d registering interface %s\n",
+				master->name, ret, slave_dev->name);
+		free_netdev(slave_dev);
+		return NULL;
+	}
+
+	netif_carrier_off(slave_dev);
+
+	if (p->phy != NULL) {
+		phy_attach(slave_dev, p->phy->dev.bus_id,
+			   0, PHY_INTERFACE_MODE_GMII);
+
+		p->phy->autoneg = AUTONEG_ENABLE;
+		p->phy->speed = 0;
+		p->phy->duplex = 0;
+		p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg;
+		phy_start_aneg(p->phy);
+	}
+
+	return slave_dev;
+}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
new file mode 100644
index 00000000000..f985ea99384
--- /dev/null
+++ b/net/dsa/tag_edsa.c
@@ -0,0 +1,213 @@
+/*
+ * net/dsa/tag_edsa.c - Ethertype DSA tagging
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include "dsa_priv.h"
+
+#define DSA_HLEN	4
+#define EDSA_HLEN	8
+
+int edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	u8 *edsa_header;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	/*
+	 * Convert the outermost 802.1q tag to a DSA tag and prepend
+	 * a DSA ethertype field is the packet is tagged, or insert
+	 * a DSA ethertype plus DSA tag between the addresses and the
+	 * current ethertype field if the packet is untagged.
+	 */
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		if (skb_cow_head(skb, DSA_HLEN) < 0)
+			goto out_free;
+		skb_push(skb, DSA_HLEN);
+
+		memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
+
+		/*
+		 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
+		 */
+		edsa_header = skb->data + 2 * ETH_ALEN;
+		edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff;
+		edsa_header[1] = ETH_P_EDSA & 0xff;
+		edsa_header[2] = 0x00;
+		edsa_header[3] = 0x00;
+		edsa_header[4] = 0x60;
+		edsa_header[5] = p->port << 3;
+
+		/*
+		 * Move CFI field from byte 6 to byte 5.
+		 */
+		if (edsa_header[6] & 0x10) {
+			edsa_header[5] |= 0x01;
+			edsa_header[6] &= ~0x10;
+		}
+	} else {
+		if (skb_cow_head(skb, EDSA_HLEN) < 0)
+			goto out_free;
+		skb_push(skb, EDSA_HLEN);
+
+		memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN);
+
+		/*
+		 * Construct untagged FROM_CPU DSA tag.
+		 */
+		edsa_header = skb->data + 2 * ETH_ALEN;
+		edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff;
+		edsa_header[1] = ETH_P_EDSA & 0xff;
+		edsa_header[2] = 0x00;
+		edsa_header[3] = 0x00;
+		edsa_header[4] = 0x40;
+		edsa_header[5] = p->port << 3;
+		edsa_header[6] = 0x00;
+		edsa_header[7] = 0x00;
+	}
+
+	skb->protocol = htons(ETH_P_EDSA);
+
+	skb->dev = p->parent->master_netdev;
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+
+out_free:
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
+		    struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch *ds = dev->dsa_ptr;
+	u8 *edsa_header;
+	int source_port;
+
+	if (unlikely(ds == NULL))
+		goto out_drop;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
+		goto out_drop;
+
+	/*
+	 * Skip the two null bytes after the ethertype.
+	 */
+	edsa_header = skb->data + 2;
+
+	/*
+	 * Check that frame type is either TO_CPU or FORWARD, and
+	 * that the source device is zero.
+	 */
+	if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0)
+		goto out_drop;
+
+	/*
+	 * Check that the source port is a registered DSA port.
+	 */
+	source_port = (edsa_header[1] >> 3) & 0x1f;
+	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+		goto out_drop;
+
+	/*
+	 * If the 'tagged' bit is set, convert the DSA tag to a 802.1q
+	 * tag and delete the ethertype part.  If the 'tagged' bit is
+	 * clear, delete the ethertype and the DSA tag parts.
+	 */
+	if (edsa_header[0] & 0x20) {
+		u8 new_header[4];
+
+		/*
+		 * Insert 802.1q ethertype and copy the VLAN-related
+		 * fields, but clear the bit that will hold CFI (since
+		 * DSA uses that bit location for another purpose).
+		 */
+		new_header[0] = (ETH_P_8021Q >> 8) & 0xff;
+		new_header[1] = ETH_P_8021Q & 0xff;
+		new_header[2] = edsa_header[2] & ~0x10;
+		new_header[3] = edsa_header[3];
+
+		/*
+		 * Move CFI bit from its place in the DSA header to
+		 * its 802.1q-designated place.
+		 */
+		if (edsa_header[1] & 0x01)
+			new_header[2] |= 0x10;
+
+		skb_pull_rcsum(skb, DSA_HLEN);
+
+		/*
+		 * Update packet checksum if skb is CHECKSUM_COMPLETE.
+		 */
+		if (skb->ip_summed == CHECKSUM_COMPLETE) {
+			__wsum c = skb->csum;
+			c = csum_add(c, csum_partial(new_header + 2, 2, 0));
+			c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0));
+			skb->csum = c;
+		}
+
+		memcpy(edsa_header, new_header, DSA_HLEN);
+
+		memmove(skb->data - ETH_HLEN,
+			skb->data - ETH_HLEN - DSA_HLEN,
+			2 * ETH_ALEN);
+	} else {
+		/*
+		 * Remove DSA tag and update checksum.
+		 */
+		skb_pull_rcsum(skb, EDSA_HLEN);
+		memmove(skb->data - ETH_HLEN,
+			skb->data - ETH_HLEN - EDSA_HLEN,
+			2 * ETH_ALEN);
+	}
+
+	skb->dev = ds->ports[source_port];
+	skb_push(skb, ETH_HLEN);
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	skb->dev->last_rx = jiffies;
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+
+	return 0;
+
+out_drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+static struct packet_type edsa_packet_type = {
+	.type	= __constant_htons(ETH_P_EDSA),
+	.func	= edsa_rcv,
+};
+
+static int __init edsa_init_module(void)
+{
+	dev_add_pack(&edsa_packet_type);
+	return 0;
+}
+module_init(edsa_init_module);
+
+static void __exit edsa_cleanup_module(void)
+{
+	dev_remove_pack(&edsa_packet_type);
+}
+module_exit(edsa_cleanup_module);
-- 
cgit v1.2.3-70-g09d2


From cf85d08fdf4548ee46657ccfb7f9949a85145db5 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 7 Oct 2008 13:45:02 +0000
Subject: dsa: add support for original DSA tagging format

Most of the DSA switches currently in the field do not support the
Ethertype DSA tagging format that one of the previous patches added
support for, but only the original DSA tagging format.

The original DSA tagging format carries the same information as the
Ethertype DSA tagging format, but with the difference that it does not
have an ethertype field.  In other words, when receiving a packet that
is tagged with an original DSA tag, there is no way of telling in
eth_type_trans() that this packet is in fact a DSA-tagged packet.

This patch adds a hook into eth_type_trans() which is only compiled in
if support for a switch chip that doesn't support Ethertype DSA is
selected, and which checks whether there is a DSA switch driver
instance attached to this network device which uses the old tag format.
If so, it sets the protocol field to ETH_P_DSA without looking at the
packet, so that the packet ends up in the right place.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Nicolas Pitre <nico@marvell.com>
Tested-by: Peter van Valderen <linux@ddcrew.com>
Tested-by: Dirk Teurlings <dirk@upexia.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h  |   1 +
 include/linux/netdevice.h |  11 +++
 include/net/dsa.h         |   2 +
 net/dsa/Kconfig           |   4 +
 net/dsa/Makefile          |   1 +
 net/dsa/dsa.c             |  16 ++++
 net/dsa/dsa_priv.h        |   3 +
 net/dsa/mv88e6123_61_65.c |  18 +++--
 net/dsa/slave.c           |   5 ++
 net/dsa/tag_dsa.c         | 194 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ethernet/eth.c        |  10 +++
 11 files changed, 258 insertions(+), 7 deletions(-)
 create mode 100644 net/dsa/tag_dsa.c

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 2140aacb633..32b9dcda68c 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -101,6 +101,7 @@
 #define ETH_P_ECONET	0x0018		/* Acorn Econet			*/
 #define ETH_P_HDLC	0x0019		/* HDLC frames			*/
 #define ETH_P_ARCNET	0x001A		/* 1A for ArcNet :-)            */
+#define ETH_P_DSA	0x001B		/* Distributed Switch Arch.	*/
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 
 /*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 794eeb4b346..97f0c64c152 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -42,6 +42,7 @@
 #include <linux/workqueue.h>
 
 #include <net/net_namespace.h>
+#include <net/dsa.h>
 
 struct vlan_group;
 struct ethtool_ops;
@@ -801,6 +802,16 @@ void dev_net_set(struct net_device *dev, struct net *net)
 #endif
 }
 
+static inline bool netdev_uses_dsa_tags(struct net_device *dev)
+{
+#ifdef CONFIG_NET_DSA_TAG_DSA
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_dsa_tags(dev->dsa_ptr);
+#endif
+
+	return 0;
+}
+
 /**
  *	netdev_priv - access network device private data
  *	@dev: network device
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dc4784f5452..72e509b6a12 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -30,5 +30,7 @@ struct dsa_platform_data {
 	char		*port_names[DSA_MAX_PORTS];
 };
 
+extern bool dsa_uses_dsa_tags(void *dsa_ptr);
+
 
 #endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 7cf55e5eb39..6b68016827d 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -10,6 +10,10 @@ menuconfig NET_DSA
 if NET_DSA
 
 # tagging formats
+config NET_DSA_TAG_DSA
+	bool
+	default n
+
 config NET_DSA_TAG_EDSA
 	bool
 	default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index b59a6f6bcf5..8b92123315b 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,4 +1,5 @@
 # tagging formats
+obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 
 # switch drivers
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6cc5be2ec7f..f8c549281c3 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -202,6 +202,22 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 }
 
 
+/* hooks for ethertype-less tagging formats *********************************/
+/*
+ * The original DSA tag format and some other tag formats have no
+ * ethertype, which means that we need to add a little hack to the
+ * networking receive path to make sure that received frames get
+ * the right ->protocol assigned to them when one of those tag
+ * formats is in use.
+ */
+bool dsa_uses_dsa_tags(void *dsa_ptr)
+{
+	struct dsa_switch *ds = dsa_ptr;
+
+	return !!(ds->tag_protocol == htons(ETH_P_DSA));
+}
+
+
 /* link polling *************************************************************/
 static void dsa_link_poll_work(struct work_struct *ugly)
 {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 21ee9052079..2f1d68c495e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -103,6 +103,9 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    struct device *parent,
 				    int port, char *name);
 
+/* tag_dsa.c */
+int dsa_xmit(struct sk_buff *skb, struct net_device *dev);
+
 /* tag_edsa.c */
 int edsa_xmit(struct sk_buff *skb, struct net_device *dev);
 
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
index 147818cc706..555b164082f 100644
--- a/net/dsa/mv88e6123_61_65.c
+++ b/net/dsa/mv88e6123_61_65.c
@@ -192,15 +192,19 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
 
 	/*
 	 * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
-	 * configure the EDSA tagging mode if this is the CPU port,
-	 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
-	 * tunneling, determine priority by looking at 802.1p and IP
-	 * priority fields (IP prio has precedence), and set STP state
-	 * to Forwarding.  Finally, if this is the CPU port, additionally
-	 * enable forwarding of unknown unicast and multicast addresses.
+	 * configure the requested (DSA/EDSA) tagging mode if this is
+	 * the CPU port, disable Header mode, enable IGMP/MLD snooping,
+	 * disable VLAN tunneling, determine priority by looking at
+	 * 802.1p and IP priority fields (IP prio has precedence), and
+	 * set STP state to Forwarding.  Finally, if this is the CPU
+	 * port, additionally enable forwarding of unknown unicast and
+	 * multicast addresses.
 	 */
 	REG_WRITE(addr, 0x04,
-			(p == ds->cpu_port) ? 0x373f : 0x0433);
+			(p == ds->cpu_port) ?
+			 (ds->tag_protocol == htons(ETH_P_DSA)) ?
+			  0x053f : 0x373f :
+			 0x0433);
 
 	/*
 	 * Port Control 1: disable trunking.  Also, if this is the
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3cb331e98b8..8f8868dd430 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -239,6 +239,11 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN);
 	slave_dev->tx_queue_len = 0;
 	switch (ds->tag_protocol) {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+	case htons(ETH_P_DSA):
+		slave_dev->hard_start_xmit = dsa_xmit;
+		break;
+#endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case htons(ETH_P_EDSA):
 		slave_dev->hard_start_xmit = edsa_xmit;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
new file mode 100644
index 00000000000..bdc0510b53b
--- /dev/null
+++ b/net/dsa/tag_dsa.c
@@ -0,0 +1,194 @@
+/*
+ * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include "dsa_priv.h"
+
+#define DSA_HLEN	4
+
+int dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	u8 *dsa_header;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	/*
+	 * Convert the outermost 802.1q tag to a DSA tag for tagged
+	 * packets, or insert a DSA tag between the addresses and
+	 * the ethertype field for untagged packets.
+	 */
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		if (skb_cow_head(skb, 0) < 0)
+			goto out_free;
+
+		/*
+		 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
+		 */
+		dsa_header = skb->data + 2 * ETH_ALEN;
+		dsa_header[0] = 0x60;
+		dsa_header[1] = p->port << 3;
+
+		/*
+		 * Move CFI field from byte 2 to byte 1.
+		 */
+		if (dsa_header[2] & 0x10) {
+			dsa_header[1] |= 0x01;
+			dsa_header[2] &= ~0x10;
+		}
+	} else {
+		if (skb_cow_head(skb, DSA_HLEN) < 0)
+			goto out_free;
+		skb_push(skb, DSA_HLEN);
+
+		memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
+
+		/*
+		 * Construct untagged FROM_CPU DSA tag.
+		 */
+		dsa_header = skb->data + 2 * ETH_ALEN;
+		dsa_header[0] = 0x40;
+		dsa_header[1] = p->port << 3;
+		dsa_header[2] = 0x00;
+		dsa_header[3] = 0x00;
+	}
+
+	skb->protocol = htons(ETH_P_DSA);
+
+	skb->dev = p->parent->master_netdev;
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+
+out_free:
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch *ds = dev->dsa_ptr;
+	u8 *dsa_header;
+	int source_port;
+
+	if (unlikely(ds == NULL))
+		goto out_drop;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
+		goto out_drop;
+
+	/*
+	 * The ethertype field is part of the DSA header.
+	 */
+	dsa_header = skb->data - 2;
+
+	/*
+	 * Check that frame type is either TO_CPU or FORWARD, and
+	 * that the source device is zero.
+	 */
+	if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0)
+		goto out_drop;
+
+	/*
+	 * Check that the source port is a registered DSA port.
+	 */
+	source_port = (dsa_header[1] >> 3) & 0x1f;
+	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+		goto out_drop;
+
+	/*
+	 * Convert the DSA header to an 802.1q header if the 'tagged'
+	 * bit in the DSA header is set.  If the 'tagged' bit is clear,
+	 * delete the DSA header entirely.
+	 */
+	if (dsa_header[0] & 0x20) {
+		u8 new_header[4];
+
+		/*
+		 * Insert 802.1q ethertype and copy the VLAN-related
+		 * fields, but clear the bit that will hold CFI (since
+		 * DSA uses that bit location for another purpose).
+		 */
+		new_header[0] = (ETH_P_8021Q >> 8) & 0xff;
+		new_header[1] = ETH_P_8021Q & 0xff;
+		new_header[2] = dsa_header[2] & ~0x10;
+		new_header[3] = dsa_header[3];
+
+		/*
+		 * Move CFI bit from its place in the DSA header to
+		 * its 802.1q-designated place.
+		 */
+		if (dsa_header[1] & 0x01)
+			new_header[2] |= 0x10;
+
+		/*
+		 * Update packet checksum if skb is CHECKSUM_COMPLETE.
+		 */
+		if (skb->ip_summed == CHECKSUM_COMPLETE) {
+			__wsum c = skb->csum;
+			c = csum_add(c, csum_partial(new_header + 2, 2, 0));
+			c = csum_sub(c, csum_partial(dsa_header + 2, 2, 0));
+			skb->csum = c;
+		}
+
+		memcpy(dsa_header, new_header, DSA_HLEN);
+	} else {
+		/*
+		 * Remove DSA tag and update checksum.
+		 */
+		skb_pull_rcsum(skb, DSA_HLEN);
+		memmove(skb->data - ETH_HLEN,
+			skb->data - ETH_HLEN - DSA_HLEN,
+			2 * ETH_ALEN);
+	}
+
+	skb->dev = ds->ports[source_port];
+	skb_push(skb, ETH_HLEN);
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	skb->dev->last_rx = jiffies;
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+
+	return 0;
+
+out_drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+static struct packet_type dsa_packet_type = {
+	.type	= __constant_htons(ETH_P_DSA),
+	.func	= dsa_rcv,
+};
+
+static int __init dsa_init_module(void)
+{
+	dev_add_pack(&dsa_packet_type);
+	return 0;
+}
+module_init(dsa_init_module);
+
+static void __exit dsa_cleanup_module(void)
+{
+	dev_remove_pack(&dsa_packet_type);
+}
+module_exit(dsa_cleanup_module);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 647a9edee37..dae47e7a44d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -57,6 +57,7 @@
 #include <net/sock.h>
 #include <net/ipv6.h>
 #include <net/ip.h>
+#include <net/dsa.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
@@ -184,6 +185,15 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 			skb->pkt_type = PACKET_OTHERHOST;
 	}
 
+	/*
+	 * Some variants of DSA tagging don't have an ethertype field
+	 * at all, so we check here whether one of those tagging
+	 * variants has been configured on the receiving interface,
+	 * and if so, set skb->protocol without looking at the packet.
+	 */
+	if (netdev_uses_dsa_tags(dev))
+		return htons(ETH_P_DSA);
+
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
 
-- 
cgit v1.2.3-70-g09d2


From 2e5f032095ff101274dfb03d5fd5e06d9aeb83cd Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 7 Oct 2008 13:45:18 +0000
Subject: dsa: add support for the Marvell 88E6131 switch chip

Add support for the Marvell 88E6131 switch chip.  This chip only
supports the original (ethertype-less) DSA tagging format.

On the 88E6131, there is a PHY Polling Unit (PPU) which has exclusive
access to each of the PHYs's MII management registers.  If we want to
talk to the PHYs from software, we have to disable the PPU and wait
for it to complete its current transaction before we can do so, and we
need to re-enable the PPU afterwards to make sure that the switch will
notice changes in link state and speed on the individual ports as they
occur.

Since disabling the PPU is rather slow, and since MII management
accesses are typically done in bursts, this patch keeps the PPU disabled
for 10ms after a software access completes.  This makes handling the
PPU slightly more complex, but speeds up something like running ethtool
on one of the switch slave interfaces from ~300ms to ~30ms on typical
hardware.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Nicolas Pitre <nico@marvell.com>
Tested-by: Peter van Valderen <linux@ddcrew.com>
Tested-by: Dirk Teurlings <dirk@upexia.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig     |  13 ++
 net/dsa/Makefile    |   1 +
 net/dsa/mv88e6131.c | 380 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dsa/mv88e6xxx.c | 145 ++++++++++++++++++++
 net/dsa/mv88e6xxx.h |  16 +++
 5 files changed, 555 insertions(+)
 create mode 100644 net/dsa/mv88e6131.c

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 6b68016827d..79bcd76d3f1 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -24,6 +24,19 @@ config NET_DSA_MV88E6XXX
 	bool
 	default n
 
+config NET_DSA_MV88E6XXX_NEED_PPU
+	bool
+	default n
+
+config NET_DSA_MV88E6131
+	bool "Marvell 88E6131 ethernet switch chip support"
+	select NET_DSA_MV88E6XXX
+	select NET_DSA_MV88E6XXX_NEED_PPU
+	select NET_DSA_TAG_DSA
+	---help---
+	  This enables support for the Marvell 88E6131 ethernet switch
+	  chip.
+
 config NET_DSA_MV88E6123_61_65
 	bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 8b92123315b..7fb6f85a69e 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 # switch drivers
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
 obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
+obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
 
 # the core
 obj-$(CONFIG_NET_DSA) += dsa.o slave.o
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
new file mode 100644
index 00000000000..36e01eb863a
--- /dev/null
+++ b/net/dsa/mv88e6131.c
@@ -0,0 +1,380 @@
+/*
+ * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+
+static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
+{
+	int ret;
+
+	ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
+	if (ret >= 0) {
+		ret &= 0xfff0;
+		if (ret == 0x1060)
+			return "Marvell 88E6131";
+	}
+
+	return NULL;
+}
+
+static int mv88e6131_switch_reset(struct dsa_switch *ds)
+{
+	int i;
+	int ret;
+
+	/*
+	 * Set all ports to the disabled state.
+	 */
+	for (i = 0; i < 8; i++) {
+		ret = REG_READ(REG_PORT(i), 0x04);
+		REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+	}
+
+	/*
+	 * Wait for transmit queues to drain.
+	 */
+	msleep(2);
+
+	/*
+	 * Reset the switch.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
+
+	/*
+	 * Wait up to one second for reset to complete.
+	 */
+	for (i = 0; i < 1000; i++) {
+		ret = REG_READ(REG_GLOBAL, 0x00);
+		if ((ret & 0xc800) == 0xc800)
+			break;
+
+		msleep(1);
+	}
+	if (i == 1000)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int mv88e6131_setup_global(struct dsa_switch *ds)
+{
+	int ret;
+	int i;
+
+	/*
+	 * Enable the PHY polling unit, don't discard packets with
+	 * excessive collisions, use a weighted fair queueing scheme
+	 * to arbitrate between packet queues, set the maximum frame
+	 * size to 1632, and mask all interrupt sources.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x04, 0x4400);
+
+	/*
+	 * Set the default address aging time to 5 minutes, and
+	 * enable address learn messages to be sent to all message
+	 * ports.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
+
+	/*
+	 * Configure the priority mapping registers.
+	 */
+	ret = mv88e6xxx_config_prio(ds);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Set the VLAN ethertype to 0x8100.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x19, 0x8100);
+
+	/*
+	 * Disable ARP mirroring, and configure the cpu port as the
+	 * port to which ingress and egress monitor frames are to be
+	 * sent.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0);
+
+	/*
+	 * Disable cascade port functionality, and set the switch's
+	 * DSA device number to zero.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x1c, 0xe000);
+
+	/*
+	 * Send all frames with destination addresses matching
+	 * 01:80:c2:00:00:0x to the CPU port.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
+
+	/*
+	 * Ignore removed tag data on doubly tagged packets, disable
+	 * flow control messages, force flow control priority to the
+	 * highest, and send all special multicast frames to the CPU
+	 * port at the higest priority.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
+
+	/*
+	 * Map all DSA device IDs to the CPU port.
+	 */
+	for (i = 0; i < 32; i++)
+		REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port);
+
+	/*
+	 * Clear all trunk masks.
+	 */
+	for (i = 0; i < 8; i++)
+		REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff);
+
+	/*
+	 * Clear all trunk mappings.
+	 */
+	for (i = 0; i < 16; i++)
+		REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
+
+	/*
+	 * Force the priority of IGMP/MLD snoop frames and ARP frames
+	 * to the highest setting.
+	 */
+	REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff);
+
+	return 0;
+}
+
+static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
+{
+	int addr = REG_PORT(p);
+
+	/*
+	 * MAC Forcing register: don't force link, speed, duplex
+	 * or flow control state to any particular values.
+	 */
+	REG_WRITE(addr, 0x01, 0x0003);
+
+	/*
+	 * Port Control: disable Core Tag, disable Drop-on-Lock,
+	 * transmit frames unmodified, disable Header mode,
+	 * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN
+	 * tunneling, determine priority by looking at 802.1p and
+	 * IP priority fields (IP prio has precedence), and set STP
+	 * state to Forwarding.  Finally, if this is the CPU port,
+	 * additionally enable DSA tagging and forwarding of unknown
+	 * unicast addresses.
+	 */
+	REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433);
+
+	/*
+	 * Port Control 1: disable trunking.  Also, if this is the
+	 * CPU port, enable learn messages to be sent to this port.
+	 */
+	REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000);
+
+	/*
+	 * Port based VLAN map: give each port its own address
+	 * database, allow the CPU port to talk to each of the 'real'
+	 * ports, and allow each of the 'real' ports to only talk to
+	 * the CPU port.
+	 */
+	REG_WRITE(addr, 0x06,
+			((p & 0xf) << 12) |
+			 ((p == ds->cpu_port) ?
+				ds->valid_port_mask :
+				(1 << ds->cpu_port)));
+
+	/*
+	 * Default VLAN ID and priority: don't set a default VLAN
+	 * ID, and set the default packet priority to zero.
+	 */
+	REG_WRITE(addr, 0x07, 0x0000);
+
+	/*
+	 * Port Control 2: don't force a good FCS, don't use
+	 * VLAN-based, source address-based or destination
+	 * address-based priority overrides, don't let the switch
+	 * add or strip 802.1q tags, don't discard tagged or
+	 * untagged frames on this port, do a destination address
+	 * lookup on received packets as usual, don't send a copy
+	 * of all transmitted/received frames on this port to the
+	 * CPU, and configure the CPU port number.  Also, if this
+	 * is the CPU port, enable forwarding of unknown multicast
+	 * addresses.
+	 */
+	REG_WRITE(addr, 0x08,
+			((p == ds->cpu_port) ? 0x00c0 : 0x0080) |
+			 ds->cpu_port);
+
+	/*
+	 * Rate Control: disable ingress rate limiting.
+	 */
+	REG_WRITE(addr, 0x09, 0x0000);
+
+	/*
+	 * Rate Control 2: disable egress rate limiting.
+	 */
+	REG_WRITE(addr, 0x0a, 0x0000);
+
+	/*
+	 * Port Association Vector: when learning source addresses
+	 * of packets, add the address to the address database using
+	 * a port bitmap that has only the bit for this port set and
+	 * the other bits clear.
+	 */
+	REG_WRITE(addr, 0x0b, 1 << p);
+
+	/*
+	 * Tag Remap: use an identity 802.1p prio -> switch prio
+	 * mapping.
+	 */
+	REG_WRITE(addr, 0x18, 0x3210);
+
+	/*
+	 * Tag Remap 2: use an identity 802.1p prio -> switch prio
+	 * mapping.
+	 */
+	REG_WRITE(addr, 0x19, 0x7654);
+
+	return 0;
+}
+
+static int mv88e6131_setup(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int i;
+	int ret;
+
+	mutex_init(&ps->smi_mutex);
+	mv88e6xxx_ppu_state_init(ds);
+	mutex_init(&ps->stats_mutex);
+
+	ret = mv88e6131_switch_reset(ds);
+	if (ret < 0)
+		return ret;
+
+	/* @@@ initialise vtu and atu */
+
+	ret = mv88e6131_setup_global(ds);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < 6; i++) {
+		ret = mv88e6131_setup_port(ds, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mv88e6131_port_to_phy_addr(int port)
+{
+	if (port >= 0 && port != 3 && port <= 7)
+		return port;
+	return -1;
+}
+
+static int
+mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+	int addr = mv88e6131_port_to_phy_addr(port);
+	return mv88e6xxx_phy_read_ppu(ds, addr, regnum);
+}
+
+static int
+mv88e6131_phy_write(struct dsa_switch *ds,
+			      int port, int regnum, u16 val)
+{
+	int addr = mv88e6131_port_to_phy_addr(port);
+	return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val);
+}
+
+static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = {
+	{ "in_good_octets", 8, 0x00, },
+	{ "in_bad_octets", 4, 0x02, },
+	{ "in_unicast", 4, 0x04, },
+	{ "in_broadcasts", 4, 0x06, },
+	{ "in_multicasts", 4, 0x07, },
+	{ "in_pause", 4, 0x16, },
+	{ "in_undersize", 4, 0x18, },
+	{ "in_fragments", 4, 0x19, },
+	{ "in_oversize", 4, 0x1a, },
+	{ "in_jabber", 4, 0x1b, },
+	{ "in_rx_error", 4, 0x1c, },
+	{ "in_fcs_error", 4, 0x1d, },
+	{ "out_octets", 8, 0x0e, },
+	{ "out_unicast", 4, 0x10, },
+	{ "out_broadcasts", 4, 0x13, },
+	{ "out_multicasts", 4, 0x12, },
+	{ "out_pause", 4, 0x15, },
+	{ "excessive", 4, 0x11, },
+	{ "collisions", 4, 0x1e, },
+	{ "deferred", 4, 0x05, },
+	{ "single", 4, 0x14, },
+	{ "multiple", 4, 0x17, },
+	{ "out_fcs_error", 4, 0x03, },
+	{ "late", 4, 0x1f, },
+	{ "hist_64bytes", 4, 0x08, },
+	{ "hist_65_127bytes", 4, 0x09, },
+	{ "hist_128_255bytes", 4, 0x0a, },
+	{ "hist_256_511bytes", 4, 0x0b, },
+	{ "hist_512_1023bytes", 4, 0x0c, },
+	{ "hist_1024_max_bytes", 4, 0x0d, },
+};
+
+static void
+mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+	mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats),
+			      mv88e6131_hw_stats, port, data);
+}
+
+static void
+mv88e6131_get_ethtool_stats(struct dsa_switch *ds,
+				  int port, uint64_t *data)
+{
+	mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats),
+				    mv88e6131_hw_stats, port, data);
+}
+
+static int mv88e6131_get_sset_count(struct dsa_switch *ds)
+{
+	return ARRAY_SIZE(mv88e6131_hw_stats);
+}
+
+static struct dsa_switch_driver mv88e6131_switch_driver = {
+	.tag_protocol		= __constant_htons(ETH_P_DSA),
+	.priv_size		= sizeof(struct mv88e6xxx_priv_state),
+	.probe			= mv88e6131_probe,
+	.setup			= mv88e6131_setup,
+	.set_addr		= mv88e6xxx_set_addr_direct,
+	.phy_read		= mv88e6131_phy_read,
+	.phy_write		= mv88e6131_phy_write,
+	.poll_link		= mv88e6xxx_poll_link,
+	.get_strings		= mv88e6131_get_strings,
+	.get_ethtool_stats	= mv88e6131_get_ethtool_stats,
+	.get_sset_count		= mv88e6131_get_sset_count,
+};
+
+int __init mv88e6131_init(void)
+{
+	register_switch_driver(&mv88e6131_switch_driver);
+	return 0;
+}
+module_init(mv88e6131_init);
+
+void __exit mv88e6131_cleanup(void)
+{
+	unregister_switch_driver(&mv88e6131_switch_driver);
+}
+module_exit(mv88e6131_cleanup);
diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c
index 13d2328a240..aa6c609c59f 100644
--- a/net/dsa/mv88e6xxx.c
+++ b/net/dsa/mv88e6xxx.c
@@ -165,6 +165,15 @@ int mv88e6xxx_config_prio(struct dsa_switch *ds)
 	return 0;
 }
 
+int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
+{
+	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
+	REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
+	REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+
+	return 0;
+}
+
 int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
 {
 	int i;
@@ -207,6 +216,142 @@ int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val)
 	return 0;
 }
 
+#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
+static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
+{
+	int ret;
+	int i;
+
+	ret = REG_READ(REG_GLOBAL, 0x04);
+	REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000);
+
+	for (i = 0; i < 1000; i++) {
+	        ret = REG_READ(REG_GLOBAL, 0x00);
+	        msleep(1);
+	        if ((ret & 0xc000) != 0xc000)
+	                return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
+{
+	int ret;
+	int i;
+
+	ret = REG_READ(REG_GLOBAL, 0x04);
+	REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000);
+
+	for (i = 0; i < 1000; i++) {
+	        ret = REG_READ(REG_GLOBAL, 0x00);
+	        msleep(1);
+	        if ((ret & 0xc000) == 0xc000)
+	                return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
+{
+	struct mv88e6xxx_priv_state *ps;
+
+	ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work);
+	if (mutex_trylock(&ps->ppu_mutex)) {
+	        struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1;
+
+	        if (mv88e6xxx_ppu_enable(ds) == 0)
+	                ps->ppu_disabled = 0;
+	        mutex_unlock(&ps->ppu_mutex);
+	}
+}
+
+static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)_ps;
+
+	schedule_work(&ps->ppu_work);
+}
+
+static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+	int ret;
+
+	mutex_lock(&ps->ppu_mutex);
+
+	/*
+	 * If the PHY polling unit is enabled, disable it so that
+	 * we can access the PHY registers.  If it was already
+	 * disabled, cancel the timer that is going to re-enable
+	 * it.
+	 */
+	if (!ps->ppu_disabled) {
+	        ret = mv88e6xxx_ppu_disable(ds);
+	        if (ret < 0) {
+	                mutex_unlock(&ps->ppu_mutex);
+	                return ret;
+	        }
+	        ps->ppu_disabled = 1;
+	} else {
+	        del_timer(&ps->ppu_timer);
+	        ret = 0;
+	}
+
+	return ret;
+}
+
+static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+
+	/*
+	 * Schedule a timer to re-enable the PHY polling unit.
+	 */
+	mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
+	mutex_unlock(&ps->ppu_mutex);
+}
+
+void mv88e6xxx_ppu_state_init(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+
+	mutex_init(&ps->ppu_mutex);
+	INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
+	init_timer(&ps->ppu_timer);
+	ps->ppu_timer.data = (unsigned long)ps;
+	ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
+}
+
+int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum)
+{
+	int ret;
+
+	ret = mv88e6xxx_ppu_access_get(ds);
+	if (ret >= 0) {
+	        ret = mv88e6xxx_reg_read(ds, addr, regnum);
+	        mv88e6xxx_ppu_access_put(ds);
+	}
+
+	return ret;
+}
+
+int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
+			    int regnum, u16 val)
+{
+	int ret;
+
+	ret = mv88e6xxx_ppu_access_get(ds);
+	if (ret >= 0) {
+	        ret = mv88e6xxx_reg_write(ds, addr, regnum, val);
+	        mv88e6xxx_ppu_access_put(ds);
+	}
+
+	return ret;
+}
+#endif
+
 void mv88e6xxx_poll_link(struct dsa_switch *ds)
 {
 	int i;
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
index a004d4d0208..eb0e0aaa9f1 100644
--- a/net/dsa/mv88e6xxx.h
+++ b/net/dsa/mv88e6xxx.h
@@ -23,6 +23,17 @@ struct mv88e6xxx_priv_state {
 	 */
 	struct mutex	smi_mutex;
 
+#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
+	/*
+	 * Handles automatic disabling and re-enabling of the PHY
+	 * polling unit.
+	 */
+	struct mutex		ppu_mutex;
+	int			ppu_disabled;
+	struct work_struct	ppu_work;
+	struct timer_list	ppu_timer;
+#endif
+
 	/*
 	 * This mutex serialises access to the statistics unit.
 	 * Hold this mutex over snapshot + dump sequences.
@@ -42,9 +53,14 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
                           int reg, u16 val);
 int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val);
 int mv88e6xxx_config_prio(struct dsa_switch *ds);
+int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr);
 int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr);
 int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum);
 int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val);
+void mv88e6xxx_ppu_state_init(struct dsa_switch *ds);
+int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum);
+int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
+			    int regnum, u16 val);
 void mv88e6xxx_poll_link(struct dsa_switch *ds);
 void mv88e6xxx_get_strings(struct dsa_switch *ds,
 			   int nr_stats, struct mv88e6xxx_hw_stat *stats,
-- 
cgit v1.2.3-70-g09d2


From 396138f03f4521c55ecc3a5dd75d4c56e6323244 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 7 Oct 2008 13:46:07 +0000
Subject: dsa: add support for Trailer tagging format

This adds support for the Trailer switch tagging format.  This is
another tagging that doesn't explicitly mark tagged packets with a
distinct ethertype, so that we need to add a similar hack in the
receive path as for the Original DSA tagging format.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Byron Bradley <byron.bbradley@gmail.com>
Tested-by: Tim Ellis <tim.ellis@mac.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h  |   1 +
 include/linux/netdevice.h |  10 ++++
 include/net/dsa.h         |   1 +
 net/dsa/Kconfig           |   4 ++
 net/dsa/Makefile          |   1 +
 net/dsa/dsa.c             |   7 +++
 net/dsa/dsa_priv.h        |   3 ++
 net/dsa/slave.c           |   5 ++
 net/dsa/tag_trailer.c     | 130 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ethernet/eth.c        |   2 +
 10 files changed, 164 insertions(+)
 create mode 100644 net/dsa/tag_trailer.c

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 32b9dcda68c..a0099e98b5c 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -102,6 +102,7 @@
 #define ETH_P_HDLC	0x0019		/* HDLC frames			*/
 #define ETH_P_ARCNET	0x001A		/* 1A for ArcNet :-)            */
 #define ETH_P_DSA	0x001B		/* Distributed Switch Arch.	*/
+#define ETH_P_TRAILER	0x001C		/* Trailer switch tagging	*/
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 
 /*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97f0c64c152..d3ea3de70a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -812,6 +812,16 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev)
 	return 0;
 }
 
+static inline bool netdev_uses_trailer_tags(struct net_device *dev)
+{
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_trailer_tags(dev->dsa_ptr);
+#endif
+
+	return 0;
+}
+
 /**
  *	netdev_priv - access network device private data
  *	@dev: network device
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 72e509b6a12..52e97bfca5a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -31,6 +31,7 @@ struct dsa_platform_data {
 };
 
 extern bool dsa_uses_dsa_tags(void *dsa_ptr);
+extern bool dsa_uses_trailer_tags(void *dsa_ptr);
 
 
 #endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 79bcd76d3f1..505aa14e67f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -18,6 +18,10 @@ config NET_DSA_TAG_EDSA
 	bool
 	default n
 
+config NET_DSA_TAG_TRAILER
+	bool
+	default n
+
 
 # switch drivers
 config NET_DSA_MV88E6XXX
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 7fb6f85a69e..63d3c44908b 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,7 @@
 # tagging formats
 obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
 
 # switch drivers
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index f8c549281c3..33e99462023 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -217,6 +217,13 @@ bool dsa_uses_dsa_tags(void *dsa_ptr)
 	return !!(ds->tag_protocol == htons(ETH_P_DSA));
 }
 
+bool dsa_uses_trailer_tags(void *dsa_ptr)
+{
+	struct dsa_switch *ds = dsa_ptr;
+
+	return !!(ds->tag_protocol == htons(ETH_P_TRAILER));
+}
+
 
 /* link polling *************************************************************/
 static void dsa_link_poll_work(struct work_struct *ugly)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2f1d68c495e..7063378a1eb 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -109,5 +109,8 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev);
 /* tag_edsa.c */
 int edsa_xmit(struct sk_buff *skb, struct net_device *dev);
 
+/* tag_trailer.c */
+int trailer_xmit(struct sk_buff *skb, struct net_device *dev);
+
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8f8868dd430..37616884b8a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -248,6 +248,11 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	case htons(ETH_P_EDSA):
 		slave_dev->hard_start_xmit = edsa_xmit;
 		break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+	case htons(ETH_P_TRAILER):
+		slave_dev->hard_start_xmit = trailer_xmit;
+		break;
 #endif
 	default:
 		BUG();
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
new file mode 100644
index 00000000000..d3117764b2c
--- /dev/null
+++ b/net/dsa/tag_trailer.c
@@ -0,0 +1,130 @@
+/*
+ * net/dsa/tag_trailer.c - Trailer tag format handling
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include "dsa_priv.h"
+
+int trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct sk_buff *nskb;
+	int padlen;
+	u8 *trailer;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	/*
+	 * We have to make sure that the trailer ends up as the very
+	 * last 4 bytes of the packet.  This means that we have to pad
+	 * the packet to the minimum ethernet frame size, if necessary,
+	 * before adding the trailer.
+	 */
+	padlen = 0;
+	if (skb->len < 60)
+		padlen = 60 - skb->len;
+
+	nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC);
+	if (nskb == NULL) {
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+	skb_reserve(nskb, NET_IP_ALIGN);
+
+	skb_reset_mac_header(nskb);
+	skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
+	skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
+	skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
+	kfree_skb(skb);
+
+	if (padlen) {
+		u8 *pad = skb_put(nskb, padlen);
+		memset(pad, 0, padlen);
+	}
+
+	trailer = skb_put(nskb, 4);
+	trailer[0] = 0x80;
+	trailer[1] = 1 << p->port;
+	trailer[2] = 0x10;
+	trailer[3] = 0x00;
+
+	nskb->protocol = htons(ETH_P_TRAILER);
+
+	nskb->dev = p->parent->master_netdev;
+	dev_queue_xmit(nskb);
+
+	return NETDEV_TX_OK;
+}
+
+static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch *ds = dev->dsa_ptr;
+	u8 *trailer;
+	int source_port;
+
+	if (unlikely(ds == NULL))
+		goto out_drop;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out_drop;
+
+	trailer = skb_tail_pointer(skb) - 4;
+	if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 ||
+	    (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00)
+		goto out_drop;
+
+	source_port = trailer[1] & 7;
+	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+		goto out_drop;
+
+	pskb_trim_rcsum(skb, skb->len - 4);
+
+	skb->dev = ds->ports[source_port];
+	skb_push(skb, ETH_HLEN);
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	skb->dev->last_rx = jiffies;
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+
+	return 0;
+
+out_drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+static struct packet_type trailer_packet_type = {
+	.type	= __constant_htons(ETH_P_TRAILER),
+	.func	= trailer_rcv,
+};
+
+static int __init trailer_init_module(void)
+{
+	dev_add_pack(&trailer_packet_type);
+	return 0;
+}
+module_init(trailer_init_module);
+
+static void __exit trailer_cleanup_module(void)
+{
+	dev_remove_pack(&trailer_packet_type);
+}
+module_exit(trailer_cleanup_module);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index dae47e7a44d..b9d85af2dd3 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -193,6 +193,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if (netdev_uses_dsa_tags(dev))
 		return htons(ETH_P_DSA);
+	if (netdev_uses_trailer_tags(dev))
+		return htons(ETH_P_TRAILER);
 
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
-- 
cgit v1.2.3-70-g09d2


From 2e16a77e1e674644b4fe552daa1fb11e32398ae6 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Tue, 7 Oct 2008 13:46:22 +0000
Subject: dsa: add support for the Marvell 88E6060 switch chip

Add support for the Marvell 88E6060 switch chip.  This chip only
supports the Header and Trailer tagging formats, and we use it in
Trailer mode since that mode is slightly easier to handle than
Header mode.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Tested-by: Byron Bradley <byron.bbradley@gmail.com>
Tested-by: Tim Ellis <tim.ellis@mac.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig     |   7 ++
 net/dsa/Makefile    |   1 +
 net/dsa/mv88e6060.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+)
 create mode 100644 net/dsa/mv88e6060.c

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 505aa14e67f..3f2fd397926 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -28,6 +28,13 @@ config NET_DSA_MV88E6XXX
 	bool
 	default n
 
+config NET_DSA_MV88E6060
+	bool "Marvell 88E6060 ethernet switch chip support"
+	select NET_DSA_TAG_TRAILER
+	---help---
+	  This enables support for the Marvell 88E6060 ethernet switch
+	  chip.
+
 config NET_DSA_MV88E6XXX_NEED_PPU
 	bool
 	default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 63d3c44908b..2374faff4de 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
 
 # switch drivers
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
+obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
 obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
 
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
new file mode 100644
index 00000000000..54068ef251e
--- /dev/null
+++ b/net/dsa/mv88e6060.c
@@ -0,0 +1,287 @@
+/*
+ * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+
+#define REG_PORT(p)		(8 + (p))
+#define REG_GLOBAL		0x0f
+
+static int reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+	return mdiobus_read(ds->master_mii_bus, addr, reg);
+}
+
+#define REG_READ(addr, reg)					\
+	({							\
+		int __ret;					\
+								\
+		__ret = reg_read(ds, addr, reg);		\
+		if (__ret < 0)					\
+			return __ret;				\
+		__ret;						\
+	})
+
+
+static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+	return mdiobus_write(ds->master_mii_bus, addr, reg, val);
+}
+
+#define REG_WRITE(addr, reg, val)				\
+	({							\
+		int __ret;					\
+								\
+		__ret = reg_write(ds, addr, reg, val);		\
+		if (__ret < 0)					\
+			return __ret;				\
+	})
+
+static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
+{
+	int ret;
+
+	ret = mdiobus_read(bus, REG_PORT(0), 0x03);
+	if (ret >= 0) {
+		ret &= 0xfff0;
+		if (ret == 0x0600)
+			return "Marvell 88E6060";
+	}
+
+	return NULL;
+}
+
+static int mv88e6060_switch_reset(struct dsa_switch *ds)
+{
+	int i;
+	int ret;
+
+	/*
+	 * Set all ports to the disabled state.
+	 */
+	for (i = 0; i < 6; i++) {
+		ret = REG_READ(REG_PORT(i), 0x04);
+		REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+	}
+
+	/*
+	 * Wait for transmit queues to drain.
+	 */
+	msleep(2);
+
+	/*
+	 * Reset the switch.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x0A, 0xa130);
+
+	/*
+	 * Wait up to one second for reset to complete.
+	 */
+	for (i = 0; i < 1000; i++) {
+		ret = REG_READ(REG_GLOBAL, 0x00);
+		if ((ret & 0x8000) == 0x0000)
+			break;
+
+		msleep(1);
+	}
+	if (i == 1000)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int mv88e6060_setup_global(struct dsa_switch *ds)
+{
+	/*
+	 * Disable discarding of frames with excessive collisions,
+	 * set the maximum frame size to 1536 bytes, and mask all
+	 * interrupt sources.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
+
+	/*
+	 * Enable automatic address learning, set the address
+	 * database size to 1024 entries, and set the default aging
+	 * time to 5 minutes.
+	 */
+	REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
+
+	return 0;
+}
+
+static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
+{
+	int addr = REG_PORT(p);
+
+	/*
+	 * Do not force flow control, disable Ingress and Egress
+	 * Header tagging, disable VLAN tunneling, and set the port
+	 * state to Forwarding.  Additionally, if this is the CPU
+	 * port, enable Ingress and Egress Trailer tagging mode.
+	 */
+	REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003);
+
+	/*
+	 * Port based VLAN map: give each port its own address
+	 * database, allow the CPU port to talk to each of the 'real'
+	 * ports, and allow each of the 'real' ports to only talk to
+	 * the CPU port.
+	 */
+	REG_WRITE(addr, 0x06,
+			((p & 0xf) << 12) |
+			 ((p == ds->cpu_port) ?
+				ds->valid_port_mask :
+				(1 << ds->cpu_port)));
+
+	/*
+	 * Port Association Vector: when learning source addresses
+	 * of packets, add the address to the address database using
+	 * a port bitmap that has only the bit for this port set and
+	 * the other bits clear.
+	 */
+	REG_WRITE(addr, 0x0b, 1 << p);
+
+	return 0;
+}
+
+static int mv88e6060_setup(struct dsa_switch *ds)
+{
+	int i;
+	int ret;
+
+	ret = mv88e6060_switch_reset(ds);
+	if (ret < 0)
+		return ret;
+
+	/* @@@ initialise atu */
+
+	ret = mv88e6060_setup_global(ds);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < 6; i++) {
+		ret = mv88e6060_setup_port(ds, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
+	REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
+	REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+
+	return 0;
+}
+
+static int mv88e6060_port_to_phy_addr(int port)
+{
+	if (port >= 0 && port <= 5)
+		return port;
+	return -1;
+}
+
+static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+	int addr;
+
+	addr = mv88e6060_port_to_phy_addr(port);
+	if (addr == -1)
+		return 0xffff;
+
+	return reg_read(ds, addr, regnum);
+}
+
+static int
+mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
+{
+	int addr;
+
+	addr = mv88e6060_port_to_phy_addr(port);
+	if (addr == -1)
+		return 0xffff;
+
+	return reg_write(ds, addr, regnum, val);
+}
+
+static void mv88e6060_poll_link(struct dsa_switch *ds)
+{
+	int i;
+
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		struct net_device *dev;
+		int port_status;
+		int link;
+		int speed;
+		int duplex;
+		int fc;
+
+		dev = ds->ports[i];
+		if (dev == NULL)
+			continue;
+
+		link = 0;
+		if (dev->flags & IFF_UP) {
+			port_status = reg_read(ds, REG_PORT(i), 0x00);
+			if (port_status < 0)
+				continue;
+
+			link = !!(port_status & 0x1000);
+		}
+
+		if (!link) {
+			if (netif_carrier_ok(dev)) {
+				printk(KERN_INFO "%s: link down\n", dev->name);
+				netif_carrier_off(dev);
+			}
+			continue;
+		}
+
+		speed = (port_status & 0x0100) ? 100 : 10;
+		duplex = (port_status & 0x0200) ? 1 : 0;
+		fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
+
+		if (!netif_carrier_ok(dev)) {
+			printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+					 "flow control %sabled\n", dev->name,
+					 speed, duplex ? "full" : "half",
+					 fc ? "en" : "dis");
+			netif_carrier_on(dev);
+		}
+	}
+}
+
+static struct dsa_switch_driver mv88e6060_switch_driver = {
+	.tag_protocol	= htons(ETH_P_TRAILER),
+	.probe		= mv88e6060_probe,
+	.setup		= mv88e6060_setup,
+	.set_addr	= mv88e6060_set_addr,
+	.phy_read	= mv88e6060_phy_read,
+	.phy_write	= mv88e6060_phy_write,
+	.poll_link	= mv88e6060_poll_link,
+};
+
+int __init mv88e6060_init(void)
+{
+	register_switch_driver(&mv88e6060_switch_driver);
+	return 0;
+}
+module_init(mv88e6060_init);
+
+void __exit mv88e6060_cleanup(void)
+{
+	unregister_switch_driver(&mv88e6060_switch_driver);
+}
+module_exit(mv88e6060_cleanup);
-- 
cgit v1.2.3-70-g09d2


From 45cec1bac0719c904bb5f4405c2937f7e715888c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 8 Oct 2008 17:33:01 -0700
Subject: dsa: Need to select PHYLIB.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 3f2fd397926..cdce4c6c672 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -2,6 +2,7 @@ menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
 	depends on EXPERIMENTAL
+	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
 	  the Distributed Switch Architecture.
-- 
cgit v1.2.3-70-g09d2


From c95b819ad75b13102139aad0e7062d927be23cc6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Oct 2008 11:58:54 -0700
Subject: gre: Use needed_headroom

Now that we have dev->needed_headroom, we can use it instead of
having a bogus dev->hard_header_len.  This also allows us to
include dev->hard_header_len in the MTU computation so that when
we do have a meaningful hard_harder_len in future it is included
automatically in figuring out the MTU.

Incidentally, this fixes a bug where we ignored the needed_headroom
field of the underlying device in calculating our own hard_header_len.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2a61158ea72..fd192d67695 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -637,7 +637,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
+		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
 	else
 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 
@@ -785,7 +785,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
 	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
 
-	/* Guess output device to choose reasonable mtu and hard_header_len */
+	/* Guess output device to choose reasonable mtu and needed_headroom */
 
 	if (iph->daddr) {
 		struct flowi fl = { .oif = tunnel->parms.link,
@@ -806,7 +806,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 
 	if (tdev) {
-		hlen = tdev->hard_header_len;
+		hlen = tdev->hard_header_len + tdev->needed_headroom;
 		mtu = tdev->mtu;
 	}
 	dev->iflink = tunnel->parms.link;
@@ -820,8 +820,8 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
 		if (tunnel->parms.o_flags&GRE_SEQ)
 			addend += 4;
 	}
-	dev->hard_header_len = hlen + addend;
-	dev->mtu = mtu - addend;
+	dev->needed_headroom = addend + hlen;
+	dev->mtu = mtu - dev->hard_header_len - addend;
 	tunnel->hlen = addend;
 
 }
@@ -959,7 +959,8 @@ done:
 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+	if (new_mtu < 68 ||
+	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
@@ -1085,7 +1086,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 	dev->change_mtu		= ipgre_tunnel_change_mtu;
 
 	dev->type		= ARPHRD_IPGRE;
-	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 	dev->flags		= IFF_NOARP;
 	dev->iflink		= 0;
-- 
cgit v1.2.3-70-g09d2


From 42aa916265d740d66ac1f17290366e9494c884c2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Oct 2008 11:59:32 -0700
Subject: gre: Move MTU setting out of ipgre_tunnel_bind_dev

This patch moves the dev->mtu setting out of ipgre_tunnel_bind_dev.
This is in prepartion of using rtnl_link where we'll need to make
the MTU setting conditional on whether the user has supplied an
MTU.  This also requires the move of the ipgre_tunnel_bind_dev
call out of the dev->init function so that we can access the user
parameters later.

This patch also adds a check to prevent setting the MTU below
the minimum of 68.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd192d67695..80622dd6fb3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -119,6 +119,7 @@
 
 static int ipgre_tunnel_init(struct net_device *dev);
 static void ipgre_tunnel_setup(struct net_device *dev);
+static int ipgre_tunnel_bind_dev(struct net_device *dev);
 
 /* Fallback tunnel: no source, no destination, no key, no options */
 
@@ -289,6 +290,8 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
 	nt = netdev_priv(dev);
 	nt->parms = *parms;
 
+	dev->mtu = ipgre_tunnel_bind_dev(dev);
+
 	if (register_netdevice(dev) < 0)
 		goto failed_free;
 
@@ -773,7 +776,7 @@ tx_error:
 	return 0;
 }
 
-static void ipgre_tunnel_bind_dev(struct net_device *dev)
+static int ipgre_tunnel_bind_dev(struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
 	struct ip_tunnel *tunnel;
@@ -821,9 +824,14 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
 			addend += 4;
 	}
 	dev->needed_headroom = addend + hlen;
-	dev->mtu = mtu - dev->hard_header_len - addend;
+	mtu -= dev->hard_header_len - addend;
+
+	if (mtu < 68)
+		mtu = 68;
+
 	tunnel->hlen = addend;
 
+	return mtu;
 }
 
 static int
@@ -917,7 +925,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				t->parms.iph.frag_off = p.iph.frag_off;
 				if (t->parms.link != p.link) {
 					t->parms.link = p.link;
-					ipgre_tunnel_bind_dev(dev);
+					dev->mtu = ipgre_tunnel_bind_dev(dev);
 					netdev_state_change(dev);
 				}
 			}
@@ -1108,8 +1116,6 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
-	ipgre_tunnel_bind_dev(dev);
-
 	if (iph->daddr) {
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (ipv4_is_multicast(iph->daddr)) {
-- 
cgit v1.2.3-70-g09d2


From c19e654ddbe3831252f61e76a74d661e1a755530 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Oct 2008 11:59:55 -0700
Subject: gre: Add netlink interface

This patch adds a netlink interface that will eventually displace
the existing ioctl interface.  It utilises the elegant rtnl_link_ops
mechanism.

This also means that user-space no longer needs to rely on the
tunnel interface being of type GRE to identify GRE tunnels.  The
identification can now occur using rtnl_link_ops.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  19 ++++
 net/ipv4/ip_gre.c         | 247 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 262 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index d4efe401470..aeab2cb32a9 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -2,6 +2,7 @@
 #define _IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <linux/ip.h>
 
 #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
 #define SIOCADDTUNNEL   (SIOCDEVPRIVATE + 1)
@@ -47,4 +48,22 @@ struct ip_tunnel_prl {
 /* PRL flags */
 #define	PRL_DEFAULT		0x0001
 
+enum
+{
+	IFLA_GRE_UNSPEC,
+	IFLA_GRE_LINK,
+	IFLA_GRE_IFLAGS,
+	IFLA_GRE_OFLAGS,
+	IFLA_GRE_IKEY,
+	IFLA_GRE_OKEY,
+	IFLA_GRE_LOCAL,
+	IFLA_GRE_REMOTE,
+	IFLA_GRE_TTL,
+	IFLA_GRE_TOS,
+	IFLA_GRE_PMTUDISC,
+	__IFLA_GRE_MAX,
+};
+
+#define IFLA_GRE_MAX	(__IFLA_GRE_MAX - 1)
+
 #endif /* _IF_TUNNEL_H_ */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 80622dd6fb3..25d2c77a7f3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -41,6 +41,7 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/rtnetlink.h>
 
 #ifdef CONFIG_IPV6
 #include <net/ipv6.h>
@@ -117,6 +118,7 @@
    Alexey Kuznetsov.
  */
 
+static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
 static void ipgre_tunnel_setup(struct net_device *dev);
 static int ipgre_tunnel_bind_dev(struct net_device *dev);
@@ -286,9 +288,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
 			goto failed_free;
 	}
 
-	dev->init = ipgre_tunnel_init;
 	nt = netdev_priv(dev);
 	nt->parms = *parms;
+	dev->rtnl_link_ops = &ipgre_link_ops;
 
 	dev->mtu = ipgre_tunnel_bind_dev(dev);
 
@@ -1087,6 +1089,7 @@ static int ipgre_close(struct net_device *dev)
 
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
+	dev->init		= ipgre_tunnel_init;
 	dev->uninit		= ipgre_tunnel_uninit;
 	dev->destructor 	= free_netdev;
 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
@@ -1196,6 +1199,7 @@ static int ipgre_init_net(struct net *net)
 
 	ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
 	dev_net_set(ign->fb_tunnel_dev, net);
+	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
 
 	if ((err = register_netdev(ign->fb_tunnel_dev)))
 		goto err_reg_dev;
@@ -1228,6 +1232,229 @@ static struct pernet_operations ipgre_net_ops = {
 	.exit = ipgre_exit_net,
 };
 
+static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	__be16 flags;
+
+	if (!data)
+		return 0;
+
+	flags = 0;
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (flags & (GRE_VERSION|GRE_ROUTING))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ipgre_netlink_parms(struct nlattr *data[],
+				struct ip_tunnel_parm *parms)
+{
+	memset(parms, 0, sizeof(parms));
+
+	parms->iph.protocol = IPPROTO_GRE;
+
+	if (!data)
+		return;
+
+	if (data[IFLA_GRE_LINK])
+		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+	if (data[IFLA_GRE_IFLAGS])
+		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+	if (data[IFLA_GRE_OFLAGS])
+		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+	if (data[IFLA_GRE_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+	if (data[IFLA_GRE_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+	if (data[IFLA_GRE_LOCAL])
+		memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
+
+	if (data[IFLA_GRE_REMOTE])
+		memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
+
+	if (data[IFLA_GRE_TTL])
+		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
+
+	if (data[IFLA_GRE_TOS])
+		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+
+	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+		parms->iph.frag_off = htons(IP_DF);
+}
+
+static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
+			 struct nlattr *data[])
+{
+	struct ip_tunnel *nt;
+	struct net *net = dev_net(dev);
+	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+	int mtu;
+	int err;
+
+	nt = netdev_priv(dev);
+	ipgre_netlink_parms(data, &nt->parms);
+
+	if (ipgre_tunnel_locate(net, &nt->parms, 0))
+		return -EEXIST;
+
+	mtu = ipgre_tunnel_bind_dev(dev);
+	if (!tb[IFLA_MTU])
+		dev->mtu = mtu;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	dev_hold(dev);
+	ipgre_tunnel_link(ign, nt);
+
+out:
+	return err;
+}
+
+static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
+			    struct nlattr *data[])
+{
+	struct ip_tunnel *t, *nt;
+	struct net *net = dev_net(dev);
+	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+	struct ip_tunnel_parm p;
+	int mtu;
+
+	if (dev == ign->fb_tunnel_dev)
+		return -EINVAL;
+
+	nt = netdev_priv(dev);
+	ipgre_netlink_parms(data, &p);
+
+	t = ipgre_tunnel_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else {
+		unsigned nflags = 0;
+
+		t = nt;
+
+		if (ipv4_is_multicast(p.iph.daddr))
+			nflags = IFF_BROADCAST;
+		else if (p.iph.daddr)
+			nflags = IFF_POINTOPOINT;
+
+		if ((dev->flags ^ nflags) &
+		    (IFF_POINTOPOINT | IFF_BROADCAST))
+			return -EINVAL;
+
+		ipgre_tunnel_unlink(ign, t);
+		t->parms.iph.saddr = p.iph.saddr;
+		t->parms.iph.daddr = p.iph.daddr;
+		t->parms.i_key = p.i_key;
+		memcpy(dev->dev_addr, &p.iph.saddr, 4);
+		memcpy(dev->broadcast, &p.iph.daddr, 4);
+		ipgre_tunnel_link(ign, t);
+		netdev_state_change(dev);
+	}
+
+	t->parms.o_key = p.o_key;
+	t->parms.iph.ttl = p.iph.ttl;
+	t->parms.iph.tos = p.iph.tos;
+	t->parms.iph.frag_off = p.iph.frag_off;
+
+	if (t->parms.link != p.link) {
+		t->parms.link = p.link;
+		mtu = ipgre_tunnel_bind_dev(dev);
+		if (!tb[IFLA_MTU])
+			dev->mtu = mtu;
+		netdev_state_change(dev);
+	}
+
+	return 0;
+}
+
+static size_t ipgre_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_GRE_LINK */
+		nla_total_size(4) +
+		/* IFLA_GRE_IFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_OFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_IKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_OKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_LOCAL */
+		nla_total_size(4) +
+		/* IFLA_GRE_REMOTE */
+		nla_total_size(4) +
+		/* IFLA_GRE_TTL */
+		nla_total_size(1) +
+		/* IFLA_GRE_TOS */
+		nla_total_size(1) +
+		/* IFLA_GRE_PMTUDISC */
+		nla_total_size(1) +
+		0;
+}
+
+static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_parm *p = &t->parms;
+
+	NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
+	NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
+	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
+	NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
+	NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
+	NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
+	NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
+	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
+	NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
+	NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
+	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
+	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
+	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
+	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
+	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
+	[IFLA_GRE_LOCAL]	= { .len = 4 },
+	[IFLA_GRE_REMOTE]	= { .len = 4 },
+	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
+	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
+	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
+};
+
+static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
+	.kind		= "gre",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= ipgre_tunnel_setup,
+	.validate	= ipgre_tunnel_validate,
+	.newlink	= ipgre_newlink,
+	.changelink	= ipgre_changelink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+};
+
 /*
  *	And now the modules code and kernel interface.
  */
@@ -1245,19 +1472,31 @@ static int __init ipgre_init(void)
 
 	err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
 	if (err < 0)
-		inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+		goto gen_device_failed;
 
+	err = rtnl_link_register(&ipgre_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+out:
 	return err;
+
+rtnl_link_failed:
+	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
+gen_device_failed:
+	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+	goto out;
 }
 
 static void __exit ipgre_fini(void)
 {
+	rtnl_link_unregister(&ipgre_link_ops);
+	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
-
-	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
 }
 
 module_init(ipgre_init);
 module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("rtnl-link-gre");
-- 
cgit v1.2.3-70-g09d2


From e1a8000228e16212c93b23cfbed4d622e2ec7a6b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Oct 2008 12:00:17 -0700
Subject: gre: Add Transparent Ethernet Bridging

This patch adds support for Ethernet over GRE encapsulation.
This is exposed to user-space with a new link type of "gretap"
instead of "gre".  It will create an ARPHRD_ETHER device in
lieu of the usual ARPHRD_IPGRE.

Note that to preserver backwards compatibility all Transparent
Ethernet Bridging packets are passed to an ARPHRD_IPGRE tunnel
if its key matches and there is no ARPHRD_ETHER device whose
key matches more closely.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h |   1 +
 net/ipv4/ip_gre.c        | 206 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 175 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index a0099e98b5c..bf1a53b2682 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -56,6 +56,7 @@
 #define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
 #define ETH_P_CUST      0x6006          /* DEC Customer use             */
 #define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_TEB	0x6558		/* Trans Ether Bridging		*/
 #define ETH_P_RARP      0x8035		/* Reverse Addr Res packet	*/
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 25d2c77a7f3..44ed9487fa1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -27,6 +27,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 
 #include <net/sock.h>
@@ -166,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock);
 /* Given src, dst and key, find appropriate for input tunnel. */
 
 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
-		__be32 remote, __be32 local, __be32 key)
+					      __be32 remote, __be32 local,
+					      __be32 key, __be16 gre_proto)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(key);
 	struct ip_tunnel *t;
+	struct ip_tunnel *t2 = NULL;
 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+		       ARPHRD_ETHER : ARPHRD_IPGRE;
 
 	for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
-			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
-				return t;
+			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
+				if (t->dev->type == dev_type)
+					return t;
+				if (t->dev->type == ARPHRD_IPGRE && !t2)
+					t2 = t;
+			}
 		}
 	}
+
 	for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
 		if (remote == t->parms.iph.daddr) {
-			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
-				return t;
+			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
+				if (t->dev->type == dev_type)
+					return t;
+				if (t->dev->type == ARPHRD_IPGRE && !t2)
+					t2 = t;
+			}
 		}
 	}
+
 	for (t = ign->tunnels_l[h1]; t; t = t->next) {
 		if (local == t->parms.iph.saddr ||
 		     (local == t->parms.iph.daddr &&
 		      ipv4_is_multicast(local))) {
-			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
-				return t;
+			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
+				if (t->dev->type == dev_type)
+					return t;
+				if (t->dev->type == ARPHRD_IPGRE && !t2)
+					t2 = t;
+			}
 		}
 	}
+
 	for (t = ign->tunnels_wc[h1]; t; t = t->next) {
-		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
-			return t;
+		if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
+			if (t->dev->type == dev_type)
+				return t;
+			if (t->dev->type == ARPHRD_IPGRE && !t2)
+				t2 = t;
+		}
 	}
 
+	if (t2)
+		return t2;
+
 	if (ign->fb_tunnel_dev->flags&IFF_UP)
 		return netdev_priv(ign->fb_tunnel_dev);
 	return NULL;
@@ -252,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
 	}
 }
 
-static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
-		struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
+					   struct ip_tunnel_parm *parms,
+					   int type)
 {
 	__be32 remote = parms->iph.daddr;
 	__be32 local = parms->iph.saddr;
 	__be32 key = parms->i_key;
-	struct ip_tunnel *t, **tp, *nt;
+	struct ip_tunnel *t, **tp;
+	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+
+	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr &&
+		    key == t->parms.i_key &&
+		    type == t->dev->type)
+			break;
+
+	return t;
+}
+
+static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
+		struct ip_tunnel_parm *parms, int create)
+{
+	struct ip_tunnel *t, *nt;
 	struct net_device *dev;
 	char name[IFNAMSIZ];
 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 
-	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
-		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
-			if (key == t->parms.i_key)
-				return t;
-		}
-	}
-	if (!create)
-		return NULL;
+	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
+	if (t || !create)
+		return t;
 
 	if (parms->name[0])
 		strlcpy(name, parms->name, IFNAMSIZ);
@@ -385,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 
 	read_lock(&ipgre_lock);
 	t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
-			(flags&GRE_KEY) ?
-			*(((__be32*)p) + (grehlen>>2) - 1) : 0);
+				flags & GRE_KEY ?
+				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+				p[1]);
 	if (t == NULL || t->parms.iph.daddr == 0 ||
 	    ipv4_is_multicast(t->parms.iph.daddr))
 		goto out;
@@ -436,6 +476,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 	u32    seqno = 0;
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
+	__be16 gre_proto;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
@@ -475,20 +516,22 @@ static int ipgre_rcv(struct sk_buff *skb)
 		}
 	}
 
+	gre_proto = *(__be16 *)(h + 2);
+
 	read_lock(&ipgre_lock);
 	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
-					iph->saddr, iph->daddr, key)) != NULL) {
+					  iph->saddr, iph->daddr, key,
+					  gre_proto))) {
 		struct net_device_stats *stats = &tunnel->dev->stats;
 
 		secpath_reset(skb);
 
-		skb->protocol = *(__be16*)(h + 2);
+		skb->protocol = gre_proto;
 		/* WCCP version 1 and 2 protocol decoding.
 		 * - Change protocol to IP
 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 		 */
-		if (flags == 0 &&
-		    skb->protocol == htons(ETH_P_WCCP)) {
+		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
 			skb->protocol = htons(ETH_P_IP);
 			if ((*(h + offset) & 0xF0) != 0x40)
 				offset += 4;
@@ -496,7 +539,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 
 		skb->mac_header = skb->network_header;
 		__pskb_pull(skb, offset);
-		skb_reset_network_header(skb);
 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -524,13 +566,30 @@ static int ipgre_rcv(struct sk_buff *skb)
 			}
 			tunnel->i_seqno = seqno + 1;
 		}
+
+		/* Warning: All skb pointers will be invalidated! */
+		if (tunnel->dev->type == ARPHRD_ETHER) {
+			if (!pskb_may_pull(skb, ETH_HLEN)) {
+				stats->rx_length_errors++;
+				stats->rx_errors++;
+				goto drop;
+			}
+
+			iph = ip_hdr(skb);
+			skb->protocol = eth_type_trans(skb, tunnel->dev);
+			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+		}
+
 		stats->rx_packets++;
 		stats->rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
 		nf_reset(skb);
+
+		skb_reset_network_header(skb);
 		ipgre_ecn_decapsulate(iph, skb);
+
 		netif_rx(skb);
 		read_unlock(&ipgre_lock);
 		return(0);
@@ -565,7 +624,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_error;
 	}
 
-	if (dev->header_ops) {
+	if (dev->type == ARPHRD_ETHER)
+		IPCB(skb)->flags = 0;
+
+	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 		gre_hlen = 0;
 		tiph = (struct iphdr*)skb->data;
 	} else {
@@ -741,8 +803,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
 	}
 
-	((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
-	((__be16*)(iph+1))[1] = skb->protocol;
+	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
+	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+				   htons(ETH_P_TEB) : skb->protocol;
 
 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
@@ -804,7 +867,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
-		dev->flags |= IFF_POINTOPOINT;
+
+		if (dev->type != ARPHRD_ETHER)
+			dev->flags |= IFF_POINTOPOINT;
 	}
 
 	if (!tdev && tunnel->parms.link)
@@ -1250,6 +1315,30 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	__be32 daddr;
+
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (!data)
+		goto out;
+
+	if (data[IFLA_GRE_REMOTE]) {
+		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
+		if (!daddr)
+			return -EINVAL;
+	}
+
+out:
+	return ipgre_tunnel_validate(tb, data);
+}
+
 static void ipgre_netlink_parms(struct nlattr *data[],
 				struct ip_tunnel_parm *parms)
 {
@@ -1291,6 +1380,35 @@ static void ipgre_netlink_parms(struct nlattr *data[],
 		parms->iph.frag_off = htons(IP_DF);
 }
 
+static int ipgre_tap_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	ipgre_tunnel_bind_dev(dev);
+
+	return 0;
+}
+
+static void ipgre_tap_setup(struct net_device *dev)
+{
+
+	ether_setup(dev);
+
+	dev->init		= ipgre_tap_init;
+	dev->uninit		= ipgre_tunnel_uninit;
+	dev->destructor 	= free_netdev;
+	dev->hard_start_xmit	= ipgre_tunnel_xmit;
+	dev->change_mtu		= ipgre_tunnel_change_mtu;
+
+	dev->iflink		= 0;
+	dev->features		|= NETIF_F_NETNS_LOCAL;
+}
+
 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
 			 struct nlattr *data[])
 {
@@ -1303,9 +1421,12 @@ static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
 	nt = netdev_priv(dev);
 	ipgre_netlink_parms(data, &nt->parms);
 
-	if (ipgre_tunnel_locate(net, &nt->parms, 0))
+	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
 		return -EEXIST;
 
+	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+		random_ether_addr(dev->dev_addr);
+
 	mtu = ipgre_tunnel_bind_dev(dev);
 	if (!tb[IFLA_MTU])
 		dev->mtu = mtu;
@@ -1455,6 +1576,19 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
 	.fill_info	= ipgre_fill_info,
 };
 
+static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
+	.kind		= "gretap",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= ipgre_tap_setup,
+	.validate	= ipgre_tap_validate,
+	.newlink	= ipgre_newlink,
+	.changelink	= ipgre_changelink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+};
+
 /*
  *	And now the modules code and kernel interface.
  */
@@ -1478,9 +1612,15 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto rtnl_link_failed;
 
+	err = rtnl_link_register(&ipgre_tap_ops);
+	if (err < 0)
+		goto tap_ops_failed;
+
 out:
 	return err;
 
+tap_ops_failed:
+	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
 gen_device_failed:
@@ -1490,6 +1630,7 @@ gen_device_failed:
 
 static void __exit ipgre_fini(void)
 {
+	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
@@ -1500,3 +1641,4 @@ module_init(ipgre_init);
 module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("rtnl-link-gre");
+MODULE_ALIAS("rtnl-link-gretap");
-- 
cgit v1.2.3-70-g09d2


From 64194c31a0b6f5d84703b772113aafc400eeaad6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Oct 2008 12:03:17 -0700
Subject: inet: Make tunnel RX/TX byte counters more consistent

This patch makes the RX/TX byte counters for IPIP, GRE and SIT more
consistent.  Previously we included the external IP headers on the
way out but not when the packet is inbound.

The new scheme is to count payload only in both directions.  For
IPIP and SIT this simply means the exclusion of the external IP
header.  For GRE this means that we exclude the GRE header as
well.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h | 2 +-
 net/ipv4/ip_gre.c  | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/ipip.h b/include/net/ipip.h
index a85bda64b85..fdf9bd74370 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -37,7 +37,7 @@ struct ip_tunnel_prl_entry
 
 #define IPTUNNEL_XMIT() do {						\
 	int err;							\
-	int pkt_len = skb->len;						\
+	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
 	skb->ip_summed = CHECKSUM_NONE;					\
 	ip_select_ident(iph, &rt->u.dst, NULL);				\
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 44ed9487fa1..0d5e35b0ed5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -477,6 +477,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
 	__be16 gre_proto;
+	unsigned int len;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
@@ -567,6 +568,8 @@ static int ipgre_rcv(struct sk_buff *skb)
 			tunnel->i_seqno = seqno + 1;
 		}
 
+		len = skb->len;
+
 		/* Warning: All skb pointers will be invalidated! */
 		if (tunnel->dev->type == ARPHRD_ETHER) {
 			if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -581,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		}
 
 		stats->rx_packets++;
-		stats->rx_bytes += skb->len;
+		stats->rx_bytes += len;
 		skb->dev = tunnel->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -770,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->transport_header = skb->network_header;
+	skb_reset_transport_header(skb);
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-- 
cgit v1.2.3-70-g09d2


From 3d5a019d5761a40465337711ae7d2beb1e9b43ec Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 9 Oct 2008 14:32:24 -0700
Subject: sctp: Fix the SNMP number of SCTP_MIB_CURRESTAB

RFC3873 defined SCTP_MIB_CURRESTAB:
  sctpCurrEstab OBJECT-TYPE
    SYNTAX         Gauge32
    MAX-ACCESS     read-only
    STATUS         current
    DESCRIPTION
         "The number of associations for which the current state is
         either ESTABLISHED, SHUTDOWN-RECEIVED or SHUTDOWN-PENDING."
    REFERENCE
         "Section 4 in RFC2960 covers the SCTP   Association state
         diagram."

If the T4 RTO timer expires many times(timeout), the association will enter
CLOSED state, so we should dec the number of SCTP_MIB_CURRESTAB, not inc the
number of SCTP_MIB_CURRESTAB.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ea3a34cbe47..1588d063c68 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5436,7 +5436,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_ERROR));
 		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 56eb82bb8d2cdd8d9f4838eaa109df41d7164ca5 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 9 Oct 2008 14:33:01 -0700
Subject: sctp: Fix SNMP number of SCTP_MIB_ABORTED during violation handling.

If ABORT chunks require authentication and a protocol violation
is triggered, we do not tear down the association.  Subsequently,
we should not increment SCTP_MIB_ABORTED.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 1588d063c68..f5094f1b5af 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4188,11 +4188,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
 	}
 
-discard:
-	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
-
 	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
 
+discard:
+	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
 	return SCTP_DISPOSITION_ABORT;
 
 nomem_pkt:
@@ -4265,12 +4264,10 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
 	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
 
 discard:
 	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
-
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-
 	return SCTP_DISPOSITION_ABORT;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
-- 
cgit v1.2.3-70-g09d2


From a1080a8b0bc301c223c4bf0cea4c5e42f43dcf58 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 9 Oct 2008 14:33:26 -0700
Subject: sctp: update SNMP statiscts when T5 timer expired.

The T5 timer is the timer for the over-all shutdown procedure.  If
this timer expires, then shutdown procedure has not completed and we
ABORT the association.  We should update SCTP_MIB_ABORTED and
SCTP_MIB_CURRESTAB  when aborting.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f5094f1b5af..d4c3fbc4671 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5489,6 +5489,9 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_NO_ERROR));
 
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
 	return SCTP_DISPOSITION_DELETE_TCB;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
-- 
cgit v1.2.3-70-g09d2


From 78e645cb890b0f32ea81a974e29427d9cd2f64f0 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Thu, 9 Oct 2008 14:37:47 -0700
Subject: tcpv[46]: fix md5 pseudoheader address field ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maybe it's just me but I guess those md5 people made a mess
out of it by having *_md5_hash_* to use daddr, saddr order
instead of the one that is natural (and equal to what csum
functions use). For the segment were sending, the original
addresses are reversed so buff's saddr == skb's daddr and
vice-versa.

Maybe I can finally proceed with unification of some code
after fixing it first... :-)

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 ++--
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ba46769c6e9..5c8fa7f1e32 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -583,8 +583,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		rep.th.doff = arg.iov[0].iov_len / 4;
 
 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
-				     key, ip_hdr(skb)->daddr,
-				     ip_hdr(skb)->saddr, &rep.th);
+				     key, ip_hdr(skb)->saddr,
+				     ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dd7bdde7bdd..eab10bc7ff8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1007,8 +1007,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
 		tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
-				    &ipv6_hdr(skb)->daddr,
-				    &ipv6_hdr(skb)->saddr, t1);
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr, t1);
 	}
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 77c676da1b717eed7239144fb539dfc4c7b78e04 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Thu, 9 Oct 2008 14:41:38 -0700
Subject: tcpv6: trivial formatting changes to send_(ack|reset)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index eab10bc7ff8..910603cd12d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -949,7 +949,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct flowi fl;
 	struct net *net = dev_net(skb->dst->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
-	unsigned int tot_len = sizeof(*th);
+	unsigned int tot_len = sizeof(struct tcphdr);
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
 #endif
@@ -1033,7 +1033,6 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	 * namespace
 	 */
 	if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
-
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -1070,13 +1069,13 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 
 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 
-	t1 = (struct tcphdr *) skb_push(buff,tot_len);
+	t1 = (struct tcphdr *) skb_push(buff, tot_len);
 
 	/* Swap the send and the receive. */
 	memset(t1, 0, sizeof(*t1));
 	t1->dest = th->source;
 	t1->source = th->dest;
-	t1->doff = tot_len/4;
+	t1->doff = tot_len / 4;
 	t1->seq = htonl(seq);
 	t1->ack_seq = htonl(ack);
 	t1->ack = 1;
-- 
cgit v1.2.3-70-g09d2


From 81ada62d70060023923f46ab666cdc2970e1e0ce Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Thu, 9 Oct 2008 14:42:01 -0700
Subject: tcpv6: convert opt[] -> topt in tcp_v6_send_reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

after this I get:

$ diff-funcs tcp_v6_send_reset tcp_ipv6.c tcp_ipv6.c tcp_v6_send_ack
 --- tcp_ipv6.c:tcp_v6_send_reset()
 +++ tcp_ipv6.c:tcp_v6_send_ack()
@@ -1,4 +1,5 @@
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
u32 ts,
+                           struct tcp_md5sig_key *key)
 {
        struct tcphdr *th = tcp_hdr(skb), *t1;
        struct sk_buff *buff;
@@ -7,31 +8,14 @@
        struct sock *ctl_sk = net->ipv6.tcp_sk;
        unsigned int tot_len = sizeof(struct tcphdr);
        __be32 *topt;
-#ifdef CONFIG_TCP_MD5SIG
-       struct tcp_md5sig_key *key;
-#endif
-
-       if (th->rst)
-               return;
-
-       if (!ipv6_unicast_destination(skb))
-               return;

+       if (ts)
+               tot_len += TCPOLEN_TSTAMP_ALIGNED;
 #ifdef CONFIG_TCP_MD5SIG
-       if (sk)
-               key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
-       else
-               key = NULL;
-
        if (key)
                tot_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif

-       /*
-        * We need to grab some memory, and put together an RST,
-        * and then put it into the queue to be sent.
-        */
-
        buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
                         GFP_ATOMIC);
        if (buff == NULL)
@@ -46,18 +30,20 @@
        t1->dest = th->source;
        t1->source = th->dest;
        t1->doff = tot_len / 4;
-       t1->rst = 1;
-
-       if(th->ack) {
-               t1->seq = th->ack_seq;
-       } else {
-               t1->ack = 1;
-               t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
-                                   + skb->len - (th->doff<<2));
-       }
+       t1->seq = htonl(seq);
+       t1->ack_seq = htonl(ack);
+       t1->ack = 1;
+       t1->window = htons(win);

        topt = (__be32 *)(t1 + 1);

+       if (ts) {
+               *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+                               (TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
+               *topt++ = htonl(tcp_time_stamp);
+               *topt++ = htonl(ts);
+       }
+
 #ifdef CONFIG_TCP_MD5SIG
        if (key) {
                *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -84,15 +70,10 @@
        fl.fl_ip_sport = t1->source;
        security_skb_classify_flow(skb, &fl);

-       /* Pass a socket to ip6_dst_lookup either it is for RST
-        * Underlying function will use this to retrieve the network
-        * namespace
-        */
        if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
                if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
                        ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
                        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-                       TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
                        return;
                }
        }


...which starts to be trivial to combine.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 910603cd12d..1941c5c888d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -950,6 +950,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct net *net = dev_net(skb->dst->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
+	__be32 *topt;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
 #endif
@@ -999,14 +1000,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 				    + skb->len - (th->doff<<2));
 	}
 
+	topt = (__be32 *)(t1 + 1);
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (key) {
-		__be32 *opt = (__be32*)(t1 + 1);
-		opt[0] = htonl((TCPOPT_NOP << 24) |
-			       (TCPOPT_NOP << 16) |
-			       (TCPOPT_MD5SIG << 8) |
-			       TCPOLEN_MD5SIG);
-		tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
+		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 				    &ipv6_hdr(skb)->saddr,
 				    &ipv6_hdr(skb)->daddr, t1);
 	}
-- 
cgit v1.2.3-70-g09d2


From 626e264dd1989bdc98a5eaf2e059af4dba07ac4f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Thu, 9 Oct 2008 14:42:40 -0700
Subject: tcpv6: combine tcp_v6_send_(reset|ack)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

$ codiff tcp_ipv6.o.old tcp_ipv6.o.new
net/ipv6/tcp_ipv6.c:
  tcp_v6_md5_hash_hdr | -144
  tcp_v6_send_ack     | -585
  tcp_v6_send_reset   | -540
 3 functions changed, 1269 bytes removed, diff: -1269

net/ipv6/tcp_ipv6.c:
  tcp_v6_send_response | +791
 1 function changed, 791 bytes added, diff: +791

tcp_ipv6.o.new:
 4 functions changed, 791 bytes added, 1269 bytes removed, diff: -478

I choose to leave the reset related netns comment in place (not
the one that is killed) as I cannot understand its English so
it's a bit hard for me to evaluate its usefulness :-).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 139 +++++++++++++++-------------------------------------
 1 file changed, 40 insertions(+), 99 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1941c5c888d..13c65144a00 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -942,7 +942,8 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
+				 u32 ts, struct tcp_md5sig_key *key, int rst)
 {
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
@@ -951,31 +952,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
 	__be32 *topt;
-#ifdef CONFIG_TCP_MD5SIG
-	struct tcp_md5sig_key *key;
-#endif
-
-	if (th->rst)
-		return;
-
-	if (!ipv6_unicast_destination(skb))
-		return;
 
+	if (ts)
+		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 #ifdef CONFIG_TCP_MD5SIG
-	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
-	else
-		key = NULL;
-
 	if (key)
 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
-	/*
-	 * We need to grab some memory, and put together an RST,
-	 * and then put it into the queue to be sent.
-	 */
-
 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 			 GFP_ATOMIC);
 	if (buff == NULL)
@@ -990,18 +974,21 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	t1->dest = th->source;
 	t1->source = th->dest;
 	t1->doff = tot_len / 4;
-	t1->rst = 1;
-
-	if(th->ack) {
-		t1->seq = th->ack_seq;
-	} else {
-		t1->ack = 1;
-		t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
-				    + skb->len - (th->doff<<2));
-	}
+	t1->seq = htonl(seq);
+	t1->ack_seq = htonl(ack);
+	t1->ack = !rst || !th->ack;
+	t1->rst = rst;
+	t1->window = htons(win);
 
 	topt = (__be32 *)(t1 + 1);
 
+	if (ts) {
+		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*topt++ = htonl(tcp_time_stamp);
+		*topt++ = htonl(ts);
+	}
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (key) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -1036,7 +1023,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-			TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+			if (rst)
+				TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
 			return;
 		}
 	}
@@ -1044,87 +1032,40 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	kfree_skb(buff);
 }
 
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
-			    struct tcp_md5sig_key *key)
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb), *t1;
-	struct sk_buff *buff;
-	struct flowi fl;
-	struct net *net = dev_net(skb->dst->dev);
-	struct sock *ctl_sk = net->ipv6.tcp_sk;
-	unsigned int tot_len = sizeof(struct tcphdr);
-	__be32 *topt;
-
-	if (ts)
-		tot_len += TCPOLEN_TSTAMP_ALIGNED;
+	struct tcphdr *th = tcp_hdr(skb);
+	u32 seq = 0, ack_seq = 0;
 #ifdef CONFIG_TCP_MD5SIG
-	if (key)
-		tot_len += TCPOLEN_MD5SIG_ALIGNED;
+	struct tcp_md5sig_key *key;
 #endif
 
-	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
-			 GFP_ATOMIC);
-	if (buff == NULL)
+	if (th->rst)
 		return;
 
-	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
-
-	t1 = (struct tcphdr *) skb_push(buff, tot_len);
-
-	/* Swap the send and the receive. */
-	memset(t1, 0, sizeof(*t1));
-	t1->dest = th->source;
-	t1->source = th->dest;
-	t1->doff = tot_len / 4;
-	t1->seq = htonl(seq);
-	t1->ack_seq = htonl(ack);
-	t1->ack = 1;
-	t1->window = htons(win);
-
-	topt = (__be32 *)(t1 + 1);
-
-	if (ts) {
-		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
-		*topt++ = htonl(tcp_time_stamp);
-		*topt++ = htonl(ts);
-	}
+	if (!ipv6_unicast_destination(skb))
+		return;
 
 #ifdef CONFIG_TCP_MD5SIG
-	if (key) {
-		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
-		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
-				    &ipv6_hdr(skb)->saddr,
-				    &ipv6_hdr(skb)->daddr, t1);
-	}
+	if (sk)
+		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
+	else
+		key = NULL;
 #endif
 
-	buff->csum = csum_partial((char *)t1, tot_len, 0);
-
-	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
-
-	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    tot_len, IPPROTO_TCP,
-				    buff->csum);
-
-	fl.proto = IPPROTO_TCP;
-	fl.oif = inet6_iif(skb);
-	fl.fl_ip_dport = t1->dest;
-	fl.fl_ip_sport = t1->source;
-	security_skb_classify_flow(skb, &fl);
+	if (th->ack)
+		seq = ntohl(th->ack_seq);
+	else
+		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
+			  (th->doff << 2);
 
-	if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
-		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
-			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
-			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-			return;
-		}
-	}
+	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
+}
 
-	kfree_skb(buff);
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+			    struct tcp_md5sig_key *key)
+{
+	tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
 }
 
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3-70-g09d2


From f24d43c07e208372aa3d3bff419afbf43ba87698 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 9 Oct 2008 14:51:27 -0700
Subject: udp: complete port availability checking

While looking at UDP port randomization, I noticed it
was litle bit pessimistic, not looking at type of sockets
(IPV6/IPV4) and not looking at bound addresses if any.

We should perform same tests than when binding to a
specific port.

This permits a cleanup of udp_lib_get_port()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 43 +++++++++++++++++++------------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 67d8430b4a2..eacf4cfef14 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,14 +122,23 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
 atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
-static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
-					const struct hlist_head udptable[])
+static int udp_lib_lport_inuse(struct net *net, __u16 num,
+			       const struct hlist_head udptable[],
+			       struct sock *sk,
+			       int (*saddr_comp)(const struct sock *sk1,
+						 const struct sock *sk2))
 {
-	struct sock *sk;
+	struct sock *sk2;
 	struct hlist_node *node;
 
-	sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
-		if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
+	sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)])
+		if (net_eq(sock_net(sk2), net)			&&
+		    sk2 != sk					&&
+		    sk2->sk_hash == num				&&
+		    (!sk2->sk_reuse || !sk->sk_reuse)		&&
+		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+		    (*saddr_comp)(sk, sk2))
 			return 1;
 	return 0;
 }
@@ -146,9 +155,6 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 					 const struct sock *sk2 )    )
 {
 	struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
-	struct hlist_node *node;
-	struct hlist_head *head;
-	struct sock *sk2;
 	int    error = 1;
 	struct net *net = sock_net(sk);
 
@@ -165,32 +171,21 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		rand = net_random();
 		snum = first = rand % remaining + low;
 		rand |= 1;
-		while (__udp_lib_lport_inuse(net, snum, udptable)) {
+		while (udp_lib_lport_inuse(net, snum, udptable, sk,
+					   saddr_comp)) {
 			do {
 				snum = snum + rand;
 			} while (snum < low || snum > high);
 			if (snum == first)
 				goto fail;
 		}
-	} else {
-		head = &udptable[udp_hashfn(net, snum)];
-
-		sk_for_each(sk2, node, head)
-			if (sk2->sk_hash == snum                             &&
-			    sk2 != sk                                        &&
-			    net_eq(sock_net(sk2), net)			     &&
-			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
-			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
-			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (*saddr_comp)(sk, sk2)                             )
-				goto fail;
-	}
+	} else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp))
+		goto fail;
 
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		head = &udptable[udp_hashfn(net, snum)];
-		sk_add_node(sk, head);
+		sk_add_node(sk, &udptable[udp_hashfn(net, snum)]);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
 	error = 0;
-- 
cgit v1.2.3-70-g09d2


From fa3e5b4eb8261ae6ee27922881093db973e9d640 Mon Sep 17 00:00:00 2001
From: Guo-Fu Tseng <cooldavid@cooldavid.org>
Date: Thu, 9 Oct 2008 21:11:56 -0700
Subject: tcpv6: fix error with CONFIG_TCP_MD5SIG disabled

This patch fix error with CONFIG_TCP_MD5SIG disabled.

Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 13c65144a00..e5310c9b84d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1036,9 +1036,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
-#ifdef CONFIG_TCP_MD5SIG
-	struct tcp_md5sig_key *key;
-#endif
+	struct tcp_md5sig_key *key = NULL;
 
 	if (th->rst)
 		return;
@@ -1049,8 +1047,6 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_TCP_MD5SIG
 	if (sk)
 		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
-	else
-		key = NULL;
 #endif
 
 	if (th->ack)
-- 
cgit v1.2.3-70-g09d2


From 561967010edef40f539dacf2aa125e20773ab40b Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:29 -0400
Subject: netlabel: Fix some sparse warnings

Fix a few sparse warnings.  One dealt with a RCU lock being held on error,
another dealt with an improper type caused by a signed/unsigned mixup while
the rest appeared to be caused by using rcu_dereference() in a
list_for_each_entry_rcu() call.  The latter probably isn't a big deal, but
I derive a certain pleasure from knowing that the net/netlabel is nice and
clean.

Thanks to James Morris for pointing out the issues and demonstrating how
to run sparse.

Signed-off-by: Paul Moore <paul.moore@hp.com>
---
 net/netlabel/netlabel_cipso_v4.c   |  4 ++--
 net/netlabel/netlabel_domainhash.c | 12 ++++++------
 net/netlabel/netlabel_unlabeled.c  | 12 ++++++------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 0aec318bf0e..aaf50032b3a 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -491,7 +491,7 @@ list_start:
 	doi_def = cipso_v4_doi_getdef(doi);
 	if (doi_def == NULL) {
 		ret_val = -EINVAL;
-		goto list_failure;
+		goto list_failure_lock;
 	}
 
 	ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
@@ -655,7 +655,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 				  struct netlink_callback *cb)
 {
 	struct netlbl_cipsov4_doiwalk_arg cb_arg;
-	int doi_skip = cb->args[0];
+	u32 doi_skip = cb->args[0];
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 643c032a3a5..dc42206c431 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -115,13 +115,13 @@ static u32 netlbl_domhsh_hash(const char *key)
 static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
 {
 	u32 bkt;
+	struct list_head *bkt_list;
 	struct netlbl_dom_map *iter;
 
 	if (domain != NULL) {
 		bkt = netlbl_domhsh_hash(domain);
-		list_for_each_entry_rcu(iter,
-				     &rcu_dereference(netlbl_domhsh)->tbl[bkt],
-				     list)
+		bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt];
+		list_for_each_entry_rcu(iter, bkt_list, list)
 			if (iter->valid && strcmp(iter->domain, domain) == 0)
 				return iter;
 	}
@@ -410,6 +410,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 {
 	int ret_val = -ENOENT;
 	u32 iter_bkt;
+	struct list_head *iter_list;
 	struct netlbl_dom_map *iter_entry;
 	u32 chain_cnt = 0;
 
@@ -417,9 +418,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 	for (iter_bkt = *skip_bkt;
 	     iter_bkt < rcu_dereference(netlbl_domhsh)->size;
 	     iter_bkt++, chain_cnt = 0) {
-		list_for_each_entry_rcu(iter_entry,
-				&rcu_dereference(netlbl_domhsh)->tbl[iter_bkt],
-				list)
+		iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt];
+		list_for_each_entry_rcu(iter_entry, iter_list, list)
 			if (iter_entry->valid) {
 				if (chain_cnt++ < *skip_chain)
 					continue;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 921c118ead8..cc105a10e3f 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -381,12 +381,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
 static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
 {
 	u32 bkt;
+	struct list_head *bkt_list;
 	struct netlbl_unlhsh_iface *iter;
 
 	bkt = netlbl_unlhsh_hash(ifindex);
-	list_for_each_entry_rcu(iter,
-				&rcu_dereference(netlbl_unlhsh)->tbl[bkt],
-				list)
+	bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt];
+	list_for_each_entry_rcu(iter, bkt_list, list)
 		if (iter->valid && iter->ifindex == ifindex)
 			return iter;
 
@@ -1427,6 +1427,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 	struct netlbl_unlhsh_addr4 *addr4;
 	struct netlbl_unlhsh_addr6 *addr6;
+	struct list_head *iter_list;
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -1436,9 +1437,8 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	for (iter_bkt = skip_bkt;
 	     iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
 	     iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
-		list_for_each_entry_rcu(iface,
-			        &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt],
-				list) {
+		iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
+		list_for_each_entry_rcu(iface, iter_list, list) {
 			if (!iface->valid ||
 			    iter_chain++ < skip_chain)
 				continue;
-- 
cgit v1.2.3-70-g09d2


From 948a72438d4178d0728c4b0a38836d280b846939 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:30 -0400
Subject: netlabel: Remove unneeded in-kernel API functions

After some discussions with the Smack folks, well just Casey, I now have a
better idea of what Smack wants out of NetLabel in the future so I think it
is now safe to do some API "pruning".  If another LSM comes along that
needs this functionality we can always add it back in, but I don't see any
LSMs on the horizon which might make use of these functions.

Thanks to Rami Rosen who suggested removing netlbl_cfg_cipsov4_del() back
in February 2008.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h       | 13 -------
 net/netlabel/netlabel_kapi.c | 84 ++++++++++++--------------------------------
 2 files changed, 23 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index e4d2d6baa98..5303749b709 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -352,12 +352,9 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr)
 int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_cfg_unlbl_add_map(const char *domain,
 			     struct netlbl_audit *audit_info);
-int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
-			   struct netlbl_audit *audit_info);
 int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 			       const char *domain,
 			       struct netlbl_audit *audit_info);
-int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info);
 
 /*
  * LSM security attribute operations
@@ -404,22 +401,12 @@ static inline int netlbl_cfg_unlbl_add_map(const char *domain,
 {
 	return -ENOSYS;
 }
-static inline int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
-					 struct netlbl_audit *audit_info)
-{
-	return -ENOSYS;
-}
 static inline int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 					     const char *domain,
 					     struct netlbl_audit *audit_info)
 {
 	return -ENOSYS;
 }
-static inline int netlbl_cfg_cipsov4_del(u32 doi,
-					 struct netlbl_audit *audit_info)
-{
-	return -ENOSYS;
-}
 static inline int netlbl_secattr_catmap_walk(
 	                              struct netlbl_lsm_secattr_catmap *catmap,
 				      u32 offset)
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 39793a1a93a..6c211fe9778 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain,
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
-		goto cfg_unlbl_add_map_failure;
+		return -ENOMEM;
 	if (domain != NULL) {
 		entry->domain = kstrdup(domain, GFP_ATOMIC);
 		if (entry->domain == NULL)
@@ -103,49 +103,6 @@ cfg_unlbl_add_map_failure:
 	return ret_val;
 }
 
-/**
- * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition
- * @doi_def: the DOI definition
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Add a new CIPSOv4 DOI definition to the NetLabel subsystem.  Returns zero on
- * success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
-			   struct netlbl_audit *audit_info)
-{
-	int ret_val;
-	const char *type_str;
-	struct audit_buffer *audit_buf;
-
-	ret_val = cipso_v4_doi_add(doi_def);
-
-	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
-					      audit_info);
-	if (audit_buf != NULL) {
-		switch (doi_def->type) {
-		case CIPSO_V4_MAP_STD:
-			type_str = "std";
-			break;
-		case CIPSO_V4_MAP_PASS:
-			type_str = "pass";
-			break;
-		default:
-			type_str = "(unknown)";
-		}
-		audit_log_format(audit_buf,
-				 " cipso_doi=%u cipso_type=%s res=%u",
-				 doi_def->doi,
-				 type_str,
-				 ret_val == 0 ? 1 : 0);
-		audit_log_end(audit_buf);
-	}
-
-	return ret_val;
-}
-
 /**
  * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping
  * @doi_def: the DOI definition
@@ -165,10 +122,12 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 {
 	int ret_val = -ENOMEM;
 	struct netlbl_dom_map *entry;
+	const char *type_str;
+	struct audit_buffer *audit_buf;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
-		goto cfg_cipsov4_add_map_failure;
+		return -ENOMEM;
 	if (domain != NULL) {
 		entry->domain = kstrdup(domain, GFP_ATOMIC);
 		if (entry->domain == NULL)
@@ -182,7 +141,7 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 	 * domain mapping for it. */
 
 	rcu_read_lock();
-	ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info);
+	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
 		goto cfg_cipsov4_add_map_failure_unlock;
 	ret_val = netlbl_domhsh_add(entry, audit_info);
@@ -196,6 +155,24 @@ cfg_cipsov4_add_map_failure_remove_doi:
 	cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
 cfg_cipsov4_add_map_failure_unlock:
 	rcu_read_unlock();
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+					      audit_info);
+	if (audit_buf != NULL) {
+		switch (doi_def->type) {
+		case CIPSO_V4_MAP_STD:
+			type_str = "std";
+			break;
+		case CIPSO_V4_MAP_PASS:
+			type_str = "pass";
+			break;
+		default:
+			type_str = "(unknown)";
+		}
+		audit_log_format(audit_buf,
+				 " cipso_doi=%u cipso_type=%s res=%u",
+				 doi_def->doi, type_str, ret_val == 0 ? 1 : 0);
+		audit_log_end(audit_buf);
+	}
 cfg_cipsov4_add_map_failure:
 	if (entry != NULL)
 		kfree(entry->domain);
@@ -203,21 +180,6 @@ cfg_cipsov4_add_map_failure:
 	return ret_val;
 }
 
-/**
- * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition
- * @doi: the CIPSO DOI value
- * @audit_info: NetLabel audit information
- *
- * Description:
- * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem.
- * Returns zero on success, negative values on failure.
- *
- */
-int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info)
-{
-	return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free);
-}
-
 /*
  * Security Attribute Functions
  */
-- 
cgit v1.2.3-70-g09d2


From dfaebe9825ff34983778f287101bc5f3bce00640 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:31 -0400
Subject: selinux: Fix missing calls to netlbl_skbuff_err()

At some point I think I messed up and dropped the calls to netlbl_skbuff_err()
which are necessary for CIPSO to send error notifications to remote systems.
This patch re-introduces the error handling calls into the SELinux code.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h              |  6 ++++--
 net/netlabel/netlabel_kapi.c        |  5 +++--
 security/selinux/hooks.c            | 19 +++++++++++++++----
 security/selinux/include/netlabel.h |  9 +++++++++
 security/selinux/netlabel.c         | 20 +++++++++++++++++++-
 5 files changed, 50 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 5303749b709..e16db096126 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -382,7 +382,7 @@ int netlbl_sock_getattr(struct sock *sk,
 int netlbl_skbuff_getattr(const struct sk_buff *skb,
 			  u16 family,
 			  struct netlbl_lsm_secattr *secattr);
-void netlbl_skbuff_err(struct sk_buff *skb, int error);
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway);
 
 /*
  * LSM label mapping cache operations
@@ -454,7 +454,9 @@ static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
 {
 	return -ENOSYS;
 }
-static inline void netlbl_skbuff_err(struct sk_buff *skb, int error)
+static inline void netlbl_skbuff_err(struct sk_buff *skb,
+				     int error,
+				     int gateway)
 {
 	return;
 }
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 6c211fe9778..22faba620e4 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -490,6 +490,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * netlbl_skbuff_err - Handle a LSM error on a sk_buff
  * @skb: the packet
  * @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
  *
  * Description:
  * Deal with a LSM problem when handling the packet in @skb, typically this is
@@ -497,10 +498,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * according to the packet's labeling protocol.
  *
  */
-void netlbl_skbuff_err(struct sk_buff *skb, int error)
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
 {
 	if (CIPSO_V4_OPTEXIST(skb))
-		cipso_v4_error(skb, error, 0);
+		cipso_v4_error(skb, error, gateway);
 }
 
 /**
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b520667a24b..a91146a6b37 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4101,6 +4101,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 			return err;
 		err = avc_has_perm(sk_sid, peer_sid,
 				   SECCLASS_PEER, PEER__RECV, &ad);
+		if (err)
+			selinux_netlbl_err(skb, err, 0);
 	} else {
 		err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
 		if (err)
@@ -4156,10 +4158,14 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			return err;
 		err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family,
 					       peer_sid, &ad);
-		if (err)
+		if (err) {
+			selinux_netlbl_err(skb, err, 0);
 			return err;
+		}
 		err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER,
 				   PEER__RECV, &ad);
+		if (err)
+			selinux_netlbl_err(skb, err, 0);
 	}
 
 	if (secmark_active) {
@@ -4396,6 +4402,7 @@ out:
 static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 				       u16 family)
 {
+	int err;
 	char *addrp;
 	u32 peer_sid;
 	struct avc_audit_data ad;
@@ -4419,10 +4426,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
 		return NF_DROP;
 
-	if (peerlbl_active)
-		if (selinux_inet_sys_rcv_skb(ifindex, addrp, family,
-					     peer_sid, &ad) != 0)
+	if (peerlbl_active) {
+		err = selinux_inet_sys_rcv_skb(ifindex, addrp, family,
+					       peer_sid, &ad);
+		if (err) {
+			selinux_netlbl_err(skb, err, 1);
 			return NF_DROP;
+		}
+	}
 
 	if (secmark_active)
 		if (avc_has_perm(peer_sid, skb->secmark,
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index 487a7d81fe2..d4e3ac8a7fb 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -39,6 +39,8 @@
 #ifdef CONFIG_NETLABEL
 void selinux_netlbl_cache_invalidate(void);
 
+void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway);
+
 void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec,
 				      int family);
 
@@ -63,6 +65,13 @@ static inline void selinux_netlbl_cache_invalidate(void)
 	return;
 }
 
+static inline void selinux_netlbl_err(struct sk_buff *skb,
+				      int error,
+				      int gateway)
+{
+	return;
+}
+
 static inline void selinux_netlbl_sk_security_reset(
 					       struct sk_security_struct *ssec,
 					       int family)
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index b9ce5fcf343..4053f7fc95f 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -107,6 +107,24 @@ void selinux_netlbl_cache_invalidate(void)
 	netlbl_cache_invalidate();
 }
 
+/**
+ * selinux_netlbl_err - Handle a NetLabel packet error
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
+ *
+ * Description:
+ * When a packet is dropped due to a call to avc_has_perm() pass the error
+ * code to the NetLabel subsystem so any protocol specific processing can be
+ * done.  This is safe to call even if you are unsure if NetLabel labeling is
+ * present on the packet, NetLabel is smart enough to only act when it should.
+ *
+ */
+void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway)
+{
+	netlbl_skbuff_err(skb, error, gateway);
+}
+
 /**
  * selinux_netlbl_sk_security_reset - Reset the NetLabel fields
  * @ssec: the sk_security_struct
@@ -289,7 +307,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 		return 0;
 
 	if (nlbl_sid != SECINITSID_UNLABELED)
-		netlbl_skbuff_err(skb, rc);
+		netlbl_skbuff_err(skb, rc, 0);
 	return rc;
 }
 
-- 
cgit v1.2.3-70-g09d2


From b1edeb102397546438ab4624489c6ccd7b410d97 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:31 -0400
Subject: netlabel: Replace protocol/NetLabel linking with refrerence counts

NetLabel has always had a list of backpointers in the CIPSO DOI definition
structure which pointed to the NetLabel LSM domain mapping structures which
referenced the CIPSO DOI struct.  The rationale for this was that when an
administrator removed a CIPSO DOI from the system all of the associated
NetLabel LSM domain mappings should be removed as well; a list of
backpointers made this a simple operation.

Unfortunately, while the backpointers did make the removal easier they were
a bit of a mess from an implementation point of view which was making
further development difficult.  Since the removal of a CIPSO DOI is a
realtively rare event it seems to make sense to remove this backpointer
list as the optimization was hurting us more then it was helping.  However,
we still need to be able to track when a CIPSO DOI definition is being used
so replace the backpointer list with a reference count.  In order to
preserve the current functionality of removing the associated LSM domain
mappings when a CIPSO DOI is removed we walk the LSM domain mapping table,
removing the relevant entries.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h           |  21 ++--
 net/ipv4/cipso_ipv4.c              | 235 ++++++++++++++++---------------------
 net/netlabel/netlabel_cipso_v4.c   |  77 ++++++------
 net/netlabel/netlabel_domainhash.c |  95 ++++++++-------
 net/netlabel/netlabel_domainhash.h |   2 +
 net/netlabel/netlabel_kapi.c       |  43 ++++---
 net/netlabel/netlabel_mgmt.c       |  24 +---
 security/smack/smackfs.c           |   4 +-
 8 files changed, 235 insertions(+), 266 deletions(-)

(limited to 'net')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index a6bb94530cf..5fe6556fb3c 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -40,6 +40,7 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <net/netlabel.h>
+#include <asm/atomic.h>
 
 /* known doi values */
 #define CIPSO_V4_DOI_UNKNOWN          0x00000000
@@ -79,10 +80,9 @@ struct cipso_v4_doi {
 	} map;
 	u8 tags[CIPSO_V4_TAG_MAXCNT];
 
-	u32 valid;
+	atomic_t refcount;
 	struct list_head list;
 	struct rcu_head rcu;
-	struct list_head dom_list;
 };
 
 /* Standard CIPSO mapping table */
@@ -128,25 +128,26 @@ extern int cipso_v4_rbm_strictvalid;
 
 #ifdef CONFIG_NETLABEL
 int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
-int cipso_v4_doi_remove(u32 doi,
-			struct netlbl_audit *audit_info,
-			void (*callback) (struct rcu_head * head));
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def);
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info);
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def);
 int cipso_v4_doi_walk(u32 *skip_cnt,
 		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
 	             void *cb_arg);
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
-			       const char *domain);
 #else
 static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 {
 	return -ENOSYS;
 }
 
+static inline void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+	return;
+}
+
 static inline int cipso_v4_doi_remove(u32 doi,
-				    struct netlbl_audit *audit_info,
-				    void (*callback) (struct rcu_head * head))
+				      struct netlbl_audit *audit_info)
 {
 	return 0;
 }
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c0e4572cc9..bf87eddfec3 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -47,17 +47,7 @@
 #include <asm/bug.h>
 #include <asm/unaligned.h>
 
-struct cipso_v4_domhsh_entry {
-	char *domain;
-	u32 valid;
-	struct list_head list;
-	struct rcu_head rcu;
-};
-
 /* List of available DOI definitions */
-/* XXX - Updates should be minimal so having a single lock for the
- * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
- * okay. */
 /* XXX - This currently assumes a minimal number of different DOIs in use,
  * if in practice there are a lot of different DOIs this list should
  * probably be turned into a hash table or something similar so we
@@ -193,25 +183,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
 		bitmap[byte_spot] &= ~bitmask;
 }
 
-/**
- * cipso_v4_doi_domhsh_free - Frees a domain list entry
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to a domain list entry can be released
- * safely.
- *
- */
-static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
-{
-	struct cipso_v4_domhsh_entry *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
-	kfree(ptr->domain);
-	kfree(ptr);
-}
-
 /**
  * cipso_v4_cache_entry_free - Frees a cache entry
  * @entry: the entry to free
@@ -457,7 +428,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
 	struct cipso_v4_doi *iter;
 
 	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-		if (iter->doi == doi && iter->valid)
+		if (iter->doi == doi && atomic_read(&iter->refcount))
 			return iter;
 	return NULL;
 }
@@ -501,9 +472,8 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 		}
 	}
 
-	doi_def->valid = 1;
+	atomic_set(&doi_def->refcount, 1);
 	INIT_RCU_HEAD(&doi_def->rcu);
-	INIT_LIST_HEAD(&doi_def->dom_list);
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	if (cipso_v4_doi_search(doi_def->doi) != NULL)
@@ -518,60 +488,130 @@ doi_add_failure:
 	return -EEXIST;
 }
 
+/**
+ * cipso_v4_doi_free - Frees a DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function frees all of the memory associated with a DOI definition.
+ *
+ */
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_STD:
+		kfree(doi_def->map.std->lvl.cipso);
+		kfree(doi_def->map.std->lvl.local);
+		kfree(doi_def->map.std->cat.cipso);
+		kfree(doi_def->map.std->cat.local);
+		break;
+	}
+	kfree(doi_def);
+}
+
+/**
+ * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_free_rcu(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *doi_def;
+
+	doi_def = container_of(entry, struct cipso_v4_doi, rcu);
+	cipso_v4_doi_free(doi_def);
+}
+
 /**
  * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
  * @doi: the DOI value
  * @audit_secid: the LSM secid to use in the audit message
- * @callback: the DOI cleanup/free callback
  *
  * Description:
- * Removes a DOI definition from the CIPSO engine, @callback is called to
- * free any memory.  The NetLabel routines will be called to release their own
- * LSM domain mappings as well as our own domain list.  Returns zero on
- * success and negative values on failure.
+ * Removes a DOI definition from the CIPSO engine.  The NetLabel routines will
+ * be called to release their own LSM domain mappings as well as our own
+ * domain list.  Returns zero on success and negative values on failure.
  *
  */
-int cipso_v4_doi_remove(u32 doi,
-			struct netlbl_audit *audit_info,
-			void (*callback) (struct rcu_head * head))
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
 {
 	struct cipso_v4_doi *doi_def;
-	struct cipso_v4_domhsh_entry *dom_iter;
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	doi_def = cipso_v4_doi_search(doi);
-	if (doi_def != NULL) {
-		doi_def->valid = 0;
-		list_del_rcu(&doi_def->list);
+	if (doi_def == NULL) {
 		spin_unlock(&cipso_v4_doi_list_lock);
-		rcu_read_lock();
-		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
-			if (dom_iter->valid)
-				netlbl_cfg_map_del(dom_iter->domain,
-						   audit_info);
-		rcu_read_unlock();
-		cipso_v4_cache_invalidate();
-		call_rcu(&doi_def->rcu, callback);
-		return 0;
+		return -ENOENT;
+	}
+	if (!atomic_dec_and_test(&doi_def->refcount)) {
+		spin_unlock(&cipso_v4_doi_list_lock);
+		return -EBUSY;
 	}
+	list_del_rcu(&doi_def->list);
 	spin_unlock(&cipso_v4_doi_list_lock);
 
-	return -ENOENT;
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+
+	return 0;
 }
 
 /**
- * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition
  * @doi: the DOI value
  *
  * Description:
  * Searches for a valid DOI definition and if one is found it is returned to
  * the caller.  Otherwise NULL is returned.  The caller must ensure that
- * rcu_read_lock() is held while accessing the returned definition.
+ * rcu_read_lock() is held while accessing the returned definition and the DOI
+ * definition reference count is decremented when the caller is done.
  *
  */
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
 {
-	return cipso_v4_doi_search(doi);
+	struct cipso_v4_doi *doi_def;
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_search(doi);
+	if (doi_def == NULL)
+		goto doi_getdef_return;
+	if (!atomic_inc_not_zero(&doi_def->refcount))
+		doi_def = NULL;
+
+doi_getdef_return:
+	rcu_read_unlock();
+	return doi_def;
+}
+
+/**
+ * cipso_v4_doi_putdef - Releases a reference for the given DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * Releases a DOI definition reference obtained from cipso_v4_doi_getdef().
+ *
+ */
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	if (!atomic_dec_and_test(&doi_def->refcount))
+		return;
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_del_rcu(&doi_def->list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
 }
 
 /**
@@ -597,7 +637,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
-		if (iter_doi->valid) {
+		if (atomic_read(&iter_doi->refcount) > 0) {
 			if (doi_cnt++ < *skip_cnt)
 				continue;
 			ret_val = callback(iter_doi, cb_arg);
@@ -613,85 +653,6 @@ doi_walk_return:
 	return ret_val;
 }
 
-/**
- * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to add
- *
- * Description:
- * Adds the @domain to the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).  This function
- * does allocate memory.  Returns zero on success, negative values on failure.
- *
- */
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-	struct cipso_v4_domhsh_entry *new_dom;
-
-	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
-	if (new_dom == NULL)
-		return -ENOMEM;
-	if (domain) {
-		new_dom->domain = kstrdup(domain, GFP_KERNEL);
-		if (new_dom->domain == NULL) {
-			kfree(new_dom);
-			return -ENOMEM;
-		}
-	}
-	new_dom->valid = 1;
-	INIT_RCU_HEAD(&new_dom->rcu);
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			spin_unlock(&cipso_v4_doi_list_lock);
-			kfree(new_dom->domain);
-			kfree(new_dom);
-			return -EEXIST;
-		}
-	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return 0;
-}
-
-/**
- * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to remove
- *
- * Description:
- * Removes the @domain from the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).   Returns zero
- * on success and negative values on error.
- *
- */
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
-			       const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			iter->valid = 0;
-			list_del_rcu(&iter->list);
-			spin_unlock(&cipso_v4_doi_list_lock);
-			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
-			return 0;
-		}
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return -ENOENT;
-}
-
 /*
  * Label Mapping Functions
  */
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index aaf50032b3a..5c4f60bbc82 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -43,6 +43,7 @@
 #include "netlabel_user.h"
 #include "netlabel_cipso_v4.h"
 #include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
 
 /* Argument struct for cipso_v4_doi_walk() */
 struct netlbl_cipsov4_doiwalk_arg {
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg {
 	u32 seq;
 };
 
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+	struct netlbl_audit *audit_info;
+	u32 doi;
+};
+
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_cipsov4_gnl_family = {
 	.id = GENL_ID_GENERATE,
@@ -80,32 +87,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1
  * Helper Functions
  */
 
-/**
- * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to the DOI definition can be released
- * safely.
- *
- */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry)
-{
-	struct cipso_v4_doi *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_doi, rcu);
-	switch (ptr->type) {
-	case CIPSO_V4_MAP_STD:
-		kfree(ptr->map.std->lvl.cipso);
-		kfree(ptr->map.std->lvl.local);
-		kfree(ptr->map.std->cat.cipso);
-		kfree(ptr->map.std->cat.local);
-		break;
-	}
-	kfree(ptr);
-}
-
 /**
  * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
  * @info: the Generic NETLINK info block
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 
 add_std_failure:
 	if (doi_def)
-		netlbl_cipsov4_doi_free(&doi_def->rcu);
+		cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -379,7 +360,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
 	return 0;
 
 add_pass_failure:
-	netlbl_cipsov4_doi_free(&doi_def->rcu);
+	cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -667,6 +648,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 	return skb->len;
 }
 
+/**
+ * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE
+ * @entry: LSM domain mapping entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is intended for use by netlbl_cipsov4_remove() as the callback
+ * for the netlbl_domhsh_walk() function; it removes LSM domain map entries
+ * which are associated with the CIPSO DOI specified in @arg.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
+{
+	struct netlbl_domhsh_walk_arg *cb_arg = arg;
+
+	if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
+	    entry->type_def.cipsov4->doi == cb_arg->doi)
+		return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
+
+	return 0;
+}
+
 /**
  * netlbl_cipsov4_remove - Handle a REMOVE message
  * @skb: the NETLINK buffer
@@ -681,8 +685,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
 	u32 doi = 0;
+	struct netlbl_domhsh_walk_arg cb_arg;
 	struct audit_buffer *audit_buf;
 	struct netlbl_audit audit_info;
+	u32 skip_bkt = 0;
+	u32 skip_chain = 0;
 
 	if (!info->attrs[NLBL_CIPSOV4_A_DOI])
 		return -EINVAL;
@@ -690,11 +697,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
 	netlbl_netlink_auditinfo(skb, &audit_info);
 
-	ret_val = cipso_v4_doi_remove(doi,
-				      &audit_info,
-				      netlbl_cipsov4_doi_free);
-	if (ret_val == 0)
-		atomic_dec(&netlabel_mgmt_protocount);
+	cb_arg.doi = doi;
+	cb_arg.audit_info = &audit_info;
+	ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
+				     netlbl_cipsov4_remove_cb, &cb_arg);
+	if (ret_val == 0 || ret_val == -ENOENT) {
+		ret_val = cipso_v4_doi_remove(doi, &audit_info);
+		if (ret_val == 0)
+			atomic_dec(&netlabel_mgmt_protocount);
+	}
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
 					      &audit_info);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index dc42206c431..0243f0c57b4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -217,20 +217,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	u32 bkt;
 	struct audit_buffer *audit_buf;
 
-	switch (entry->type) {
-	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = 0;
-		break;
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
-						  entry->domain);
-		break;
-	default:
-		return -EINVAL;
-	}
-	if (ret_val != 0)
-		return ret_val;
-
 	entry->valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
 
@@ -271,16 +257,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	}
 	rcu_read_unlock();
 
-	if (ret_val != 0) {
-		switch (entry->type) {
-		case NETLBL_NLTYPE_CIPSOV4:
-			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-						       entry->domain) != 0)
-				BUG();
-			break;
-		}
-	}
-
 	return ret_val;
 }
 
@@ -302,35 +278,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 }
 
 /**
- * netlbl_domhsh_remove - Removes an entry from the domain hash table
- * @domain: the domain to remove
+ * netlbl_domhsh_remove_entry - Removes a given entry from the domain table
+ * @entry: the entry to remove
  * @audit_info: NetLabel audit information
  *
  * Description:
  * Removes an entry from the domain hash table and handles any updates to the
- * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
- * negative on failure.
+ * lower level protocol handler (i.e. CIPSO).  Caller is responsible for
+ * ensuring that the RCU read lock is held.  Returns zero on success, negative
+ * on failure.
  *
  */
-int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
-	struct netlbl_dom_map *entry;
+	int ret_val = 0;
 	struct audit_buffer *audit_buf;
 
-	rcu_read_lock();
-	if (domain)
-		entry = netlbl_domhsh_search(domain);
-	else
-		entry = netlbl_domhsh_search_def(domain);
 	if (entry == NULL)
-		goto remove_return;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-					   entry->domain);
-		break;
-	}
+		return -ENOENT;
+
 	spin_lock(&netlbl_domhsh_lock);
 	if (entry->valid) {
 		entry->valid = 0;
@@ -338,8 +305,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 			list_del_rcu(&entry->list);
 		else
 			rcu_assign_pointer(netlbl_domhsh_def, NULL);
-		ret_val = 0;
-	}
+	} else
+		ret_val = -ENOENT;
 	spin_unlock(&netlbl_domhsh_lock);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
@@ -351,10 +318,42 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 		audit_log_end(audit_buf);
 	}
 
-remove_return:
-	rcu_read_unlock();
-	if (ret_val == 0)
+	if (ret_val == 0) {
+		switch (entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
+			break;
+		}
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain)
+		entry = netlbl_domhsh_search(domain);
+	else
+		entry = netlbl_domhsh_search_def(domain);
+	ret_val = netlbl_domhsh_remove_entry(entry, audit_info);
+	rcu_read_unlock();
+
 	return ret_val;
 }
 
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 8220990ceb9..afcc41a7432 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,6 +61,8 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		      struct netlbl_audit *audit_info);
 int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 			      struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 22faba620e4..7d8ecea9391 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -121,10 +121,15 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 			       struct netlbl_audit *audit_info)
 {
 	int ret_val = -ENOMEM;
+	u32 doi;
+	u32 doi_type;
 	struct netlbl_dom_map *entry;
 	const char *type_str;
 	struct audit_buffer *audit_buf;
 
+	doi = doi_def->doi;
+	doi_type = doi_def->type;
+
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
@@ -133,32 +138,25 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 		if (entry->domain == NULL)
 			goto cfg_cipsov4_add_map_failure;
 	}
-	entry->type = NETLBL_NLTYPE_CIPSOV4;
-	entry->type_def.cipsov4 = doi_def;
-
-	/* Grab a RCU read lock here so nothing happens to the doi_def variable
-	 * between adding it to the CIPSOv4 protocol engine and adding a
-	 * domain mapping for it. */
 
-	rcu_read_lock();
 	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_unlock;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	entry->type = NETLBL_NLTYPE_CIPSOV4;
+	entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi);
+	if (entry->type_def.cipsov4 == NULL) {
+		ret_val = -ENOENT;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	}
 	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_remove_doi;
-	rcu_read_unlock();
-
-	return 0;
+		goto cfg_cipsov4_add_map_failure_release_doi;
 
-cfg_cipsov4_add_map_failure_remove_doi:
-	cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
-cfg_cipsov4_add_map_failure_unlock:
-	rcu_read_unlock();
+cfg_cipsov4_add_map_return:
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
 					      audit_info);
 	if (audit_buf != NULL) {
-		switch (doi_def->type) {
+		switch (doi_type) {
 		case CIPSO_V4_MAP_STD:
 			type_str = "std";
 			break;
@@ -170,14 +168,21 @@ cfg_cipsov4_add_map_failure_unlock:
 		}
 		audit_log_format(audit_buf,
 				 " cipso_doi=%u cipso_type=%s res=%u",
-				 doi_def->doi, type_str, ret_val == 0 ? 1 : 0);
+				 doi, type_str, ret_val == 0 ? 1 : 0);
 		audit_log_end(audit_buf);
 	}
+
+	return ret_val;
+
+cfg_cipsov4_add_map_failure_release_doi:
+	cipso_v4_doi_putdef(doi_def);
+cfg_cipsov4_add_map_failure_remove_doi:
+	cipso_v4_doi_remove(doi, audit_info);
 cfg_cipsov4_add_map_failure:
 	if (entry != NULL)
 		kfree(entry->domain);
 	kfree(entry);
-	return ret_val;
+	goto cfg_cipsov4_add_map_return;
 }
 
 /*
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 44be5d5261f..c4e18c7bc0c 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -122,18 +122,12 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 			goto add_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
 		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
+		if (entry->type_def.cipsov4 == NULL)
 			goto add_failure;
-		}
 		ret_val = netlbl_domhsh_add(entry, &audit_info);
-		rcu_read_unlock();
+		if (ret_val != 0)
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
 		break;
 	default:
 		goto add_failure;
@@ -294,18 +288,12 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 			goto adddef_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
 		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
+		if (entry->type_def.cipsov4 == NULL)
 			goto adddef_failure;
-		}
 		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		rcu_read_unlock();
+		if (ret_val != 0)
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
 		break;
 	default:
 		goto adddef_failure;
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 271a835fbbe..9733f8eb1a2 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -343,9 +343,11 @@ static void smk_cipso_doi(void)
 		doip->tags[rc] = CIPSO_V4_TAG_INVALID;
 
 	rc = netlbl_cfg_cipsov4_add_map(doip, NULL, &audit_info);
-	if (rc != 0)
+	if (rc != 0) {
 		printk(KERN_WARNING "%s:%d add rc = %d\n",
 		       __func__, __LINE__, rc);
+		kfree(doip);
+	}
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 61e1068219950c672ce979719ad2be3aadb00d7d Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:32 -0400
Subject: netlabel: Add a generic way to create ordered linked lists of network
 addrs

Create an ordered IP address linked list mechanism similar to the core
kernel's linked list construct.  The idea behind this list functionality
is to create an extensibile linked list ordered by IP address mask to
ease the matching of network addresses.  The linked list is ordered with
larger address masks at the front of the list and shorter address masks
at the end to facilitate overriding network entries with individual host
or subnet entries.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 net/netlabel/Makefile             |   3 +-
 net/netlabel/netlabel_addrlist.c  | 258 ++++++++++++++++++++++++++++
 net/netlabel/netlabel_addrlist.h  | 174 +++++++++++++++++++
 net/netlabel/netlabel_unlabeled.c | 350 +++++++++++++++-----------------------
 4 files changed, 569 insertions(+), 216 deletions(-)
 create mode 100644 net/netlabel/netlabel_addrlist.c
 create mode 100644 net/netlabel/netlabel_addrlist.h

(limited to 'net')

diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index 8af18c0a47d..ea750e9df65 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -5,7 +5,8 @@
 #
 
 # base objects
-obj-y	:= netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+obj-y	:= netlabel_user.o netlabel_kapi.o
+obj-y	+= netlabel_domainhash.o netlabel_addrlist.o
 
 # management objects
 obj-y	+= netlabel_mgmt.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
new file mode 100644
index 00000000000..dd928aa52db
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.c
@@ -0,0 +1,258 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+#include "netlabel_addrlist.h"
+
+/*
+ * Address List Functions
+ */
+
+/**
+ * netlbl_af4list_search - Search for a matching IPv4 address entry
+ * @addr: IPv4 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head.  If a matching address entry
+ * is found it is returned, otherwise NULL is returned.  The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+					     struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid && (addr & iter->mask) == iter->addr)
+			return iter;
+
+	return NULL;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_search - Search for a matching IPv6 address entry
+ * @addr: IPv6 address
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head.  If a matching address entry
+ * is found it is returned, otherwise NULL is returned.  The caller is
+ * responsible for calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+					     struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
+			return iter;
+
+	return NULL;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_add - Add a new IPv4 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head.  On success zero is
+ * returned, otherwise a negative value is returned.  The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	iter = netlbl_af4list_search(entry->addr, head);
+	if (iter != NULL &&
+	    iter->addr == entry->addr && iter->mask == entry->mask)
+		return -EEXIST;
+
+	/* in order to speed up address searches through the list (the common
+	 * case) we need to keep the list in order based on the size of the
+	 * address mask such that the entry with the widest mask (smallest
+	 * numerical value) appears first in the list */
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ntohl(entry->mask) > ntohl(iter->mask)) {
+			__list_add_rcu(&entry->list,
+				       iter->list.prev,
+				       &iter->list);
+			return 0;
+		}
+	list_add_tail_rcu(&entry->list, head);
+	return 0;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_add - Add a new IPv6 address entry to a list
+ * @entry: address entry
+ * @head: the list head
+ *
+ * Description:
+ * Add a new address entry to the list pointed to by @head.  On success zero is
+ * returned, otherwise a negative value is returned.  The caller is responsible
+ * for calling the necessary locking functions.
+ *
+ */
+int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	iter = netlbl_af6list_search(&entry->addr, head);
+	if (iter != NULL &&
+	    ipv6_addr_equal(&iter->addr, &entry->addr) &&
+	    ipv6_addr_equal(&iter->mask, &entry->mask))
+		return -EEXIST;
+
+	/* in order to speed up address searches through the list (the common
+	 * case) we need to keep the list in order based on the size of the
+	 * address mask such that the entry with the widest mask (smallest
+	 * numerical value) appears first in the list */
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
+			__list_add_rcu(&entry->list,
+				       iter->list.prev,
+				       &iter->list);
+			return 0;
+		}
+	list_add_tail_rcu(&entry->list, head);
+	return 0;
+}
+#endif /* IPv6 */
+
+/**
+ * netlbl_af4list_remove_entry - Remove an IPv4 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry.  The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry)
+{
+	entry->valid = 0;
+	list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af4list_remove - Remove an IPv4 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head.  Returns the
+ * entry on success, NULL on failure.  The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+					     struct list_head *head)
+{
+	struct netlbl_af4list *entry;
+
+	entry = netlbl_af4list_search(addr, head);
+	if (entry != NULL && entry->addr == addr && entry->mask == mask) {
+		netlbl_af4list_remove_entry(entry);
+		return entry;
+	}
+
+	return NULL;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_remove_entry - Remove an IPv6 address entry
+ * @entry: address entry
+ *
+ * Description:
+ * Remove the specified IP address entry.  The caller is responsible for
+ * calling the necessary locking functions.
+ *
+ */
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry)
+{
+	entry->valid = 0;
+	list_del_rcu(&entry->list);
+}
+
+/**
+ * netlbl_af6list_remove - Remove an IPv6 address entry
+ * @addr: IP address
+ * @mask: IP address mask
+ * @head: the list head
+ *
+ * Description:
+ * Remove an IP address entry from the list pointed to by @head.  Returns the
+ * entry on success, NULL on failure.  The caller is responsible for calling
+ * the necessary locking functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+					     const struct in6_addr *mask,
+					     struct list_head *head)
+{
+	struct netlbl_af6list *entry;
+
+	entry = netlbl_af6list_search(addr, head);
+	if (entry != NULL &&
+	    ipv6_addr_equal(&entry->addr, addr) &&
+	    ipv6_addr_equal(&entry->mask, mask)) {
+		netlbl_af6list_remove_entry(entry);
+		return entry;
+	}
+
+	return NULL;
+}
+#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
new file mode 100644
index 00000000000..0c41df057fa
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.h
@@ -0,0 +1,174 @@
+/*
+ * NetLabel Network Address Lists
+ *
+ * This file contains network address list functions used to manage ordered
+ * lists of network addresses for use by the NetLabel subsystem.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_ADDRLIST_H
+#define _NETLABEL_ADDRLIST_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/in6.h>
+
+/**
+ * struct netlbl_af4list - NetLabel IPv4 address list
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af4list {
+	__be32 addr;
+	__be32 mask;
+
+	u32 valid;
+	struct list_head list;
+};
+
+/**
+ * struct netlbl_af6list - NetLabel IPv6 address list
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @valid: valid flag
+ * @list: list structure, used internally
+ */
+struct netlbl_af6list {
+	struct in6_addr addr;
+	struct in6_addr mask;
+
+	u32 valid;
+	struct list_head list;
+};
+
+#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list)
+
+static inline struct netlbl_af4list *__af4list_valid(struct list_head *s,
+						     struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af4list *n = __af4list_entry(s);
+	while (i != h && !n->valid) {
+		i = i->next;
+		n = __af4list_entry(i);
+	}
+	return n;
+}
+
+static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
+							 struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af4list *n = __af4list_entry(s);
+	while (i != h && !n->valid) {
+		i = rcu_dereference(i->next);
+		n = __af4list_entry(i);
+	}
+	return n;
+}
+
+#define netlbl_af4list_foreach(iter, head)				\
+	for (iter = __af4list_valid((head)->next, head);		\
+	     prefetch(iter->list.next), &iter->list != (head);		\
+	     iter = __af4list_valid(iter->list.next, head))
+
+#define netlbl_af4list_foreach_rcu(iter, head)				\
+	for (iter = __af4list_valid_rcu((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af4list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af4list_foreach_safe(iter, tmp, head)			\
+	for (iter = __af4list_valid((head)->next, head),		\
+		     tmp = __af4list_valid(iter->list.next, head);	\
+	     &iter->list != (head);					\
+	     iter = tmp, tmp = __af4list_valid(iter->list.next, head))
+
+int netlbl_af4list_add(struct netlbl_af4list *entry,
+		       struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
+					     struct list_head *head);
+void netlbl_af4list_remove_entry(struct netlbl_af4list *entry);
+struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
+					     struct list_head *head);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
+
+static inline struct netlbl_af6list *__af6list_valid(struct list_head *s,
+						     struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af6list *n = __af6list_entry(s);
+	while (i != h && !n->valid) {
+		i = i->next;
+		n = __af6list_entry(i);
+	}
+	return n;
+}
+
+static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
+							 struct list_head *h)
+{
+	struct list_head *i = s;
+	struct netlbl_af6list *n = __af6list_entry(s);
+	while (i != h && !n->valid) {
+		i = rcu_dereference(i->next);
+		n = __af6list_entry(i);
+	}
+	return n;
+}
+
+#define netlbl_af6list_foreach(iter, head)				\
+	for (iter = __af6list_valid((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af6list_valid(iter->list.next, head))
+
+#define netlbl_af6list_foreach_rcu(iter, head)				\
+	for (iter = __af6list_valid_rcu((head)->next, head);		\
+	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     iter = __af6list_valid_rcu(iter->list.next, head))
+
+#define netlbl_af6list_foreach_safe(iter, tmp, head)			\
+	for (iter = __af6list_valid((head)->next, head),		\
+		     tmp = __af6list_valid(iter->list.next, head);	\
+	     &iter->list != (head);					\
+	     iter = tmp, tmp = __af6list_valid(iter->list.next, head))
+
+int netlbl_af6list_add(struct netlbl_af6list *entry,
+		       struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
+					     const struct in6_addr *mask,
+					     struct list_head *head);
+void netlbl_af6list_remove_entry(struct netlbl_af6list *entry);
+struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
+					     struct list_head *head);
+#endif /* IPV6 */
+
+#endif
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index cc105a10e3f..ab8131a8e48 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@
 #include <asm/atomic.h>
 
 #include "netlabel_user.h"
+#include "netlabel_addrlist.h"
 #include "netlabel_domainhash.h"
 #include "netlabel_unlabeled.h"
 #include "netlabel_mgmt.h"
@@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl {
 	struct list_head *tbl;
 	u32 size;
 };
+#define netlbl_unlhsh_addr4_entry(iter) \
+	container_of(iter, struct netlbl_unlhsh_addr4, list)
 struct netlbl_unlhsh_addr4 {
-	__be32 addr;
-	__be32 mask;
 	u32 secid;
 
-	u32 valid;
-	struct list_head list;
+	struct netlbl_af4list list;
 	struct rcu_head rcu;
 };
+#define netlbl_unlhsh_addr6_entry(iter) \
+	container_of(iter, struct netlbl_unlhsh_addr6, list)
 struct netlbl_unlhsh_addr6 {
-	struct in6_addr addr;
-	struct in6_addr mask;
 	u32 secid;
 
-	u32 valid;
-	struct list_head list;
+	struct netlbl_af6list list;
 	struct rcu_head rcu;
 };
 struct netlbl_unlhsh_iface {
@@ -274,26 +273,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
 static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 {
 	struct netlbl_unlhsh_iface *iface;
-	struct netlbl_unlhsh_addr4 *iter4;
-	struct netlbl_unlhsh_addr4 *tmp4;
-	struct netlbl_unlhsh_addr6 *iter6;
-	struct netlbl_unlhsh_addr6 *tmp6;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);
 
 	/* no need for locks here since we are the only one with access to this
 	 * structure */
 
-	list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list)
-		if (iter4->valid) {
-			list_del_rcu(&iter4->list);
-			kfree(iter4);
-		}
-	list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list)
-		if (iter6->valid) {
-			list_del_rcu(&iter6->list);
-			kfree(iter6);
-		}
+	netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) {
+		netlbl_af4list_remove_entry(iter4);
+		kfree(netlbl_unlhsh_addr4_entry(iter4));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
+		netlbl_af6list_remove_entry(iter6);
+		kfree(netlbl_unlhsh_addr6_entry(iter6));
+	}
+#endif /* IPv6 */
 	kfree(iface);
 }
 
@@ -315,59 +316,6 @@ static u32 netlbl_unlhsh_hash(int ifindex)
 	return ifindex & (rcu_dereference(netlbl_unlhsh)->size - 1);
 }
 
-/**
- * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry
- * @addr: IPv4 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv4 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned.  The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4(
-	                               __be32 addr,
-	                               const struct netlbl_unlhsh_iface *iface)
-{
-	struct netlbl_unlhsh_addr4 *iter;
-
-	list_for_each_entry_rcu(iter, &iface->addr4_list, list)
-		if (iter->valid && (addr & iter->mask) == iter->addr)
-			return iter;
-
-	return NULL;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry
- * @addr: IPv6 address
- * @iface: the network interface entry
- *
- * Description:
- * Searches the IPv6 address list of the network interface specified by @iface.
- * If a matching address entry is found it is returned, otherwise NULL is
- * returned.  The caller is responsible for calling the rcu_read_[un]lock()
- * functions.
- *
- */
-static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
-	                               const struct in6_addr *addr,
-	                               const struct netlbl_unlhsh_iface *iface)
-{
-	struct netlbl_unlhsh_addr6 *iter;
-
-	list_for_each_entry_rcu(iter, &iface->addr6_list, list)
-		if (iter->valid &&
-		    ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
-		return iter;
-
-	return NULL;
-}
-#endif /* IPv6 */
-
 /**
  * netlbl_unlhsh_search_iface - Search for a matching interface entry
  * @ifindex: the network interface
@@ -439,43 +387,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
 				   const struct in_addr *mask,
 				   u32 secid)
 {
+	int ret_val;
 	struct netlbl_unlhsh_addr4 *entry;
-	struct netlbl_unlhsh_addr4 *iter;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
 
-	entry->addr = addr->s_addr & mask->s_addr;
-	entry->mask = mask->s_addr;
-	entry->secid = secid;
-	entry->valid = 1;
+	entry->list.addr = addr->s_addr & mask->s_addr;
+	entry->list.mask = mask->s_addr;
+	entry->list.valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
+	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	iter = netlbl_unlhsh_search_addr4(entry->addr, iface);
-	if (iter != NULL &&
-	    iter->addr == addr->s_addr && iter->mask == mask->s_addr) {
-		spin_unlock(&netlbl_unlhsh_lock);
-		kfree(entry);
-		return -EEXIST;
-	}
-	/* in order to speed up address searches through the list (the common
-	 * case) we need to keep the list in order based on the size of the
-	 * address mask such that the entry with the widest mask (smallest
-	 * numerical value) appears first in the list */
-	list_for_each_entry_rcu(iter, &iface->addr4_list, list)
-		if (iter->valid &&
-		    ntohl(entry->mask) > ntohl(iter->mask)) {
-			__list_add_rcu(&entry->list,
-				       iter->list.prev,
-				       &iter->list);
-			spin_unlock(&netlbl_unlhsh_lock);
-			return 0;
-		}
-	list_add_tail_rcu(&entry->list, &iface->addr4_list);
+	ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list);
 	spin_unlock(&netlbl_unlhsh_lock);
-	return 0;
+
+	if (ret_val != 0)
+		kfree(entry);
+	return ret_val;
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -498,47 +429,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
 				   const struct in6_addr *mask,
 				   u32 secid)
 {
+	int ret_val;
 	struct netlbl_unlhsh_addr6 *entry;
-	struct netlbl_unlhsh_addr6 *iter;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
 
-	ipv6_addr_copy(&entry->addr, addr);
-	entry->addr.s6_addr32[0] &= mask->s6_addr32[0];
-	entry->addr.s6_addr32[1] &= mask->s6_addr32[1];
-	entry->addr.s6_addr32[2] &= mask->s6_addr32[2];
-	entry->addr.s6_addr32[3] &= mask->s6_addr32[3];
-	ipv6_addr_copy(&entry->mask, mask);
-	entry->secid = secid;
-	entry->valid = 1;
+	ipv6_addr_copy(&entry->list.addr, addr);
+	entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+	entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+	entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+	entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+	ipv6_addr_copy(&entry->list.mask, mask);
+	entry->list.valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
+	entry->secid = secid;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	iter = netlbl_unlhsh_search_addr6(&entry->addr, iface);
-	if (iter != NULL &&
-	    (ipv6_addr_equal(&iter->addr, addr) &&
-	     ipv6_addr_equal(&iter->mask, mask))) {
-		spin_unlock(&netlbl_unlhsh_lock);
-		kfree(entry);
-		return -EEXIST;
-	}
-	/* in order to speed up address searches through the list (the common
-	 * case) we need to keep the list in order based on the size of the
-	 * address mask such that the entry with the widest mask (smallest
-	 * numerical value) appears first in the list */
-	list_for_each_entry_rcu(iter, &iface->addr6_list, list)
-		if (iter->valid &&
-		    ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
-			__list_add_rcu(&entry->list,
-				       iter->list.prev,
-				       &iter->list);
-			spin_unlock(&netlbl_unlhsh_lock);
-			return 0;
-		}
-	list_add_tail_rcu(&entry->list, &iface->addr6_list);
+	ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+
+	if (ret_val != 0)
+		kfree(entry);
 	return 0;
 }
 #endif /* IPv6 */
@@ -719,22 +632,21 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 				      const struct in_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
+	int ret_val = 0;
+	struct netlbl_af4list *list_entry;
 	struct netlbl_unlhsh_addr4 *entry;
-	struct audit_buffer *audit_buf = NULL;
+	struct audit_buffer *audit_buf;
 	struct net_device *dev;
-	char *secctx = NULL;
+	char *secctx;
 	u32 secctx_len;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface);
-	if (entry != NULL &&
-	    entry->addr == addr->s_addr && entry->mask == mask->s_addr) {
-		entry->valid = 0;
-		list_del_rcu(&entry->list);
-		ret_val = 0;
-	}
+	list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
+					   &iface->addr4_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+	if (list_entry == NULL)
+		ret_val = -ENOENT;
+	entry = netlbl_unlhsh_addr4_entry(list_entry);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
@@ -742,12 +654,12 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 		dev = dev_get_by_index(net, iface->ifindex);
 		netlbl_unlabel_audit_addr4(audit_buf,
 					   (dev != NULL ? dev->name : NULL),
-					   entry->addr, entry->mask);
+					   addr->s_addr, mask->s_addr);
 		if (dev != NULL)
 			dev_put(dev);
-		if (security_secid_to_secctx(entry->secid,
-					     &secctx,
-					     &secctx_len) == 0) {
+		if (entry && security_secid_to_secctx(entry->secid,
+						      &secctx,
+						      &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
@@ -781,23 +693,20 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 				      const struct in6_addr *mask,
 				      struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
+	int ret_val = 0;
+	struct netlbl_af6list *list_entry;
 	struct netlbl_unlhsh_addr6 *entry;
-	struct audit_buffer *audit_buf = NULL;
+	struct audit_buffer *audit_buf;
 	struct net_device *dev;
-	char *secctx = NULL;
+	char *secctx;
 	u32 secctx_len;
 
 	spin_lock(&netlbl_unlhsh_lock);
-	entry = netlbl_unlhsh_search_addr6(addr, iface);
-	if (entry != NULL &&
-	    (ipv6_addr_equal(&entry->addr, addr) &&
-	     ipv6_addr_equal(&entry->mask, mask))) {
-		entry->valid = 0;
-		list_del_rcu(&entry->list);
-		ret_val = 0;
-	}
+	list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list);
 	spin_unlock(&netlbl_unlhsh_lock);
+	if (list_entry == NULL)
+		ret_val = -ENOENT;
+	entry = netlbl_unlhsh_addr6_entry(list_entry);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
 					      audit_info);
@@ -808,9 +717,9 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 					   addr, mask);
 		if (dev != NULL)
 			dev_put(dev);
-		if (security_secid_to_secctx(entry->secid,
-					     &secctx,
-					     &secctx_len) == 0) {
+		if (entry && security_secid_to_secctx(entry->secid,
+						      &secctx,
+						      &secctx_len) == 0) {
 			audit_log_format(audit_buf, " sec_obj=%s", secctx);
 			security_release_secctx(secctx, secctx_len);
 		}
@@ -836,16 +745,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
  */
 static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 {
-	struct netlbl_unlhsh_addr4 *iter4;
-	struct netlbl_unlhsh_addr6 *iter6;
+	struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+#endif /* IPv6 */
 
 	spin_lock(&netlbl_unlhsh_lock);
-	list_for_each_entry_rcu(iter4, &iface->addr4_list, list)
-		if (iter4->valid)
-			goto unlhsh_condremove_failure;
-	list_for_each_entry_rcu(iter6, &iface->addr6_list, list)
-		if (iter6->valid)
-			goto unlhsh_condremove_failure;
+	netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
+		goto unlhsh_condremove_failure;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
+		goto unlhsh_condremove_failure;
+#endif /* IPv6 */
 	iface->valid = 0;
 	if (iface->ifindex > 0)
 		list_del_rcu(&iface->list);
@@ -1349,7 +1260,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 	if (addr4) {
 		struct in_addr addr_struct;
 
-		addr_struct.s_addr = addr4->addr;
+		addr_struct.s_addr = addr4->list.addr;
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV4ADDR,
 				  sizeof(struct in_addr),
@@ -1357,7 +1268,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		if (ret_val != 0)
 			goto list_cb_failure;
 
-		addr_struct.s_addr = addr4->mask;
+		addr_struct.s_addr = addr4->list.mask;
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV4MASK,
 				  sizeof(struct in_addr),
@@ -1370,14 +1281,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV6ADDR,
 				  sizeof(struct in6_addr),
-				  &addr6->addr);
+				  &addr6->list.addr);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
 		ret_val = nla_put(cb_arg->skb,
 				  NLBL_UNLABEL_A_IPV6MASK,
 				  sizeof(struct in6_addr),
-				  &addr6->mask);
+				  &addr6->list.mask);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
@@ -1425,9 +1336,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	u32 iter_bkt;
 	u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_unlhsh_iface *iface;
-	struct netlbl_unlhsh_addr4 *addr4;
-	struct netlbl_unlhsh_addr6 *addr6;
 	struct list_head *iter_list;
+	struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *addr6;
+#endif
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -1442,38 +1355,38 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 			if (!iface->valid ||
 			    iter_chain++ < skip_chain)
 				continue;
-			list_for_each_entry_rcu(addr4,
-						&iface->addr4_list,
-						list) {
-				if (!addr4->valid || iter_addr4++ < skip_addr4)
+			netlbl_af4list_foreach_rcu(addr4,
+						   &iface->addr4_list) {
+				if (iter_addr4++ < skip_addr4)
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
-					             NLBL_UNLABEL_C_STATICLIST,
-						     iface,
-						     addr4,
-						     NULL,
-						     &cb_arg) < 0) {
+					      NLBL_UNLABEL_C_STATICLIST,
+					      iface,
+					      netlbl_unlhsh_addr4_entry(addr4),
+					      NULL,
+					      &cb_arg) < 0) {
 					iter_addr4--;
 					iter_chain--;
 					goto unlabel_staticlist_return;
 				}
 			}
-			list_for_each_entry_rcu(addr6,
-						&iface->addr6_list,
-						list) {
-				if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			netlbl_af6list_foreach_rcu(addr6,
+						   &iface->addr6_list) {
+				if (iter_addr6++ < skip_addr6)
 					continue;
 				if (netlbl_unlabel_staticlist_gen(
-						     NLBL_UNLABEL_C_STATICLIST,
-						     iface,
-						     NULL,
-						     addr6,
-						     &cb_arg) < 0) {
+					      NLBL_UNLABEL_C_STATICLIST,
+					      iface,
+					      NULL,
+					      netlbl_unlhsh_addr6_entry(addr6),
+					      &cb_arg) < 0) {
 					iter_addr6--;
 					iter_chain--;
 					goto unlabel_staticlist_return;
 				}
 			}
+#endif /* IPv6 */
 		}
 	}
 
@@ -1504,9 +1417,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 	u32 skip_addr4 = cb->args[0];
 	u32 skip_addr6 = cb->args[1];
-	u32 iter_addr4 = 0, iter_addr6 = 0;
-	struct netlbl_unlhsh_addr4 *addr4;
-	struct netlbl_unlhsh_addr6 *addr6;
+	u32 iter_addr4 = 0;
+	struct netlbl_af4list *addr4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	u32 iter_addr6 = 0;
+	struct netlbl_af6list *addr6;
+#endif
 
 	cb_arg.nl_cb = cb;
 	cb_arg.skb = skb;
@@ -1517,30 +1433,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	if (iface == NULL || !iface->valid)
 		goto unlabel_staticlistdef_return;
 
-	list_for_each_entry_rcu(addr4, &iface->addr4_list, list) {
-		if (!addr4->valid || iter_addr4++ < skip_addr4)
+	netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
+		if (iter_addr4++ < skip_addr4)
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
-					   iface,
-					   addr4,
-					   NULL,
-					   &cb_arg) < 0) {
+					      iface,
+					      netlbl_unlhsh_addr4_entry(addr4),
+					      NULL,
+					      &cb_arg) < 0) {
 			iter_addr4--;
 			goto unlabel_staticlistdef_return;
 		}
 	}
-	list_for_each_entry_rcu(addr6, &iface->addr6_list, list) {
-		if (!addr6->valid || iter_addr6++ < skip_addr6)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
+		if (iter_addr6++ < skip_addr6)
 			continue;
 		if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
-					   iface,
-					   NULL,
-					   addr6,
-					   &cb_arg) < 0) {
+					      iface,
+					      NULL,
+					      netlbl_unlhsh_addr6_entry(addr6),
+					      &cb_arg) < 0) {
 			iter_addr6--;
 			goto unlabel_staticlistdef_return;
 		}
 	}
+#endif /* IPv6 */
 
 unlabel_staticlistdef_return:
 	rcu_read_unlock();
@@ -1718,25 +1636,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 	switch (family) {
 	case PF_INET: {
 		struct iphdr *hdr4;
-		struct netlbl_unlhsh_addr4 *addr4;
+		struct netlbl_af4list *addr4;
 
 		hdr4 = ip_hdr(skb);
-		addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface);
+		addr4 = netlbl_af4list_search(hdr4->saddr,
+					      &iface->addr4_list);
 		if (addr4 == NULL)
 			goto unlabel_getattr_nolabel;
-		secattr->attr.secid = addr4->secid;
+		secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
 		break;
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case PF_INET6: {
 		struct ipv6hdr *hdr6;
-		struct netlbl_unlhsh_addr6 *addr6;
+		struct netlbl_af6list *addr6;
 
 		hdr6 = ipv6_hdr(skb);
-		addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface);
+		addr6 = netlbl_af6list_search(&hdr6->saddr,
+					      &iface->addr6_list);
 		if (addr6 == NULL)
 			goto unlabel_getattr_nolabel;
-		secattr->attr.secid = addr6->secid;
+		secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid;
 		break;
 	}
 #endif /* IPv6 */
-- 
cgit v1.2.3-70-g09d2


From 63c41688743760631188cf0f4ae986a6793ccb0a Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:32 -0400
Subject: netlabel: Add network address selectors to the NetLabel/LSM domain
 mapping

This patch extends the NetLabel traffic labeling capabilities to individual
packets based not only on the LSM domain but the by the destination address
as well.  The changes here only affect the core NetLabel infrastructre,
changes to the NetLabel KAPI and individial protocol engines are also
required but are split out into a different patch to ease review.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h             |   7 +-
 net/netlabel/netlabel_addrlist.c   | 130 ++++++++++++
 net/netlabel/netlabel_addrlist.h   |  15 ++
 net/netlabel/netlabel_domainhash.c | 290 +++++++++++++++++++++++----
 net/netlabel/netlabel_domainhash.h |  38 +++-
 net/netlabel/netlabel_kapi.c       |   7 +-
 net/netlabel/netlabel_mgmt.c       | 398 ++++++++++++++++++++++++++++---------
 net/netlabel/netlabel_mgmt.h       |  59 +++++-
 net/netlabel/netlabel_unlabeled.c  |  96 ++-------
 9 files changed, 816 insertions(+), 224 deletions(-)

(limited to 'net')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index e16db096126..0729f8ce504 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -9,7 +9,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -72,8 +72,9 @@ struct cipso_v4_doi;
 /* NetLabel NETLINK protocol version
  *  1: initial version
  *  2: added static labels for unlabeled connections
+ *  3: network selectors added to the NetLabel/LSM domain mapping
  */
-#define NETLBL_PROTO_VERSION            2
+#define NETLBL_PROTO_VERSION            3
 
 /* NetLabel NETLINK types/families */
 #define NETLBL_NLTYPE_NONE              0
@@ -87,6 +88,8 @@ struct cipso_v4_doi;
 #define NETLBL_NLTYPE_CIPSOV6_NAME      "NLBL_CIPSOv6"
 #define NETLBL_NLTYPE_UNLABELED         5
 #define NETLBL_NLTYPE_UNLABELED_NAME    "NLBL_UNLBL"
+#define NETLBL_NLTYPE_ADDRSELECT        6
+#define NETLBL_NLTYPE_ADDRSELECT_NAME   "NLBL_ADRSEL"
 
 /*
  * NetLabel - Kernel API for accessing the network packet label mappings.
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index dd928aa52db..b0925a30335 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -39,6 +39,7 @@
 #include <linux/ipv6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <linux/audit.h>
 
 #include "netlabel_addrlist.h"
 
@@ -69,6 +70,32 @@ struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
 	return NULL;
 }
 
+/**
+ * netlbl_af4list_search_exact - Search for an exact IPv4 address entry
+ * @addr: IPv4 address
+ * @mask: IPv4 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv4 address list given by @head.  If an exact match if found
+ * it is returned, otherwise NULL is returned.  The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+						   __be32 mask,
+						   struct list_head *head)
+{
+	struct netlbl_af4list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid && iter->addr == addr && iter->mask == mask)
+			return iter;
+
+	return NULL;
+}
+
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 /**
  * netlbl_af6list_search - Search for a matching IPv6 address entry
@@ -93,6 +120,33 @@ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
 
 	return NULL;
 }
+
+/**
+ * netlbl_af6list_search_exact - Search for an exact IPv6 address entry
+ * @addr: IPv6 address
+ * @mask: IPv6 address mask
+ * @head: the list head
+ *
+ * Description:
+ * Searches the IPv6 address list given by @head.  If an exact match if found
+ * it is returned, otherwise NULL is returned.  The caller is responsible for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+						   const struct in6_addr *mask,
+						   struct list_head *head)
+{
+	struct netlbl_af6list *iter;
+
+	list_for_each_entry_rcu(iter, head, list)
+		if (iter->valid &&
+		    ipv6_addr_equal(&iter->addr, addr) &&
+		    ipv6_addr_equal(&iter->mask, mask))
+			return iter;
+
+	return NULL;
+}
 #endif /* IPv6 */
 
 /**
@@ -256,3 +310,79 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
 	return NULL;
 }
 #endif /* IPv6 */
+
+/*
+ * Audit Helper Functions
+ */
+
+/**
+ * netlbl_af4list_audit_addr - Audit an IPv4 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv4 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+					int src, const char *dev,
+					__be32 addr, __be32 mask)
+{
+	u32 mask_val = ntohl(mask);
+	char *dir = (src ? "src" : "dst");
+
+	if (dev != NULL)
+		audit_log_format(audit_buf, " netif=%s", dev);
+	audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr));
+	if (mask_val != 0xffffffff) {
+		u32 mask_len = 0;
+		while (mask_val > 0) {
+			mask_val <<= 1;
+			mask_len++;
+		}
+		audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+	}
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_af6list_audit_addr - Audit an IPv6 address
+ * @audit_buf: audit buffer
+ * @src: true if source address, false if destination
+ * @dev: network interface
+ * @addr: IP address
+ * @mask: IP address mask
+ *
+ * Description:
+ * Write the IPv6 address and address mask, if necessary, to @audit_buf.
+ *
+ */
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+				 int src,
+				 const char *dev,
+				 const struct in6_addr *addr,
+				 const struct in6_addr *mask)
+{
+	char *dir = (src ? "src" : "dst");
+
+	if (dev != NULL)
+		audit_log_format(audit_buf, " netif=%s", dev);
+	audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr));
+	if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
+		u32 mask_len = 0;
+		u32 mask_val;
+		int iter = -1;
+		while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
+			mask_len += 32;
+		mask_val = ntohl(mask->s6_addr32[iter]);
+		while (mask_val > 0) {
+			mask_val <<= 1;
+			mask_len++;
+		}
+		audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
+	}
+}
+#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 0c41df057fa..0242bead405 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -36,6 +36,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <linux/in6.h>
+#include <linux/audit.h>
 
 /**
  * struct netlbl_af4list - NetLabel IPv4 address list
@@ -116,6 +117,12 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
 void netlbl_af4list_remove_entry(struct netlbl_af4list *entry);
 struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
 					     struct list_head *head);
+struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
+						   __be32 mask,
+						   struct list_head *head);
+void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+			       int src, const char *dev,
+			       __be32 addr, __be32 mask);
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 
@@ -169,6 +176,14 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
 void netlbl_af6list_remove_entry(struct netlbl_af6list *entry);
 struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
 					     struct list_head *head);
+struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
+						   const struct in6_addr *mask,
+						   struct list_head *head);
+void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+			       int src,
+			       const char *dev,
+			       const struct in6_addr *addr,
+			       const struct in6_addr *mask);
 #endif /* IPV6 */
 
 #endif
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0243f0c57b4..5fadf10e5dd 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -11,7 +11,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@
 #include <asm/bug.h>
 
 #include "netlabel_mgmt.h"
+#include "netlabel_addrlist.h"
 #include "netlabel_domainhash.h"
 #include "netlabel_user.h"
 
@@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
 static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 {
 	struct netlbl_dom_map *ptr;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	ptr = container_of(entry, struct netlbl_dom_map, rcu);
+	if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) {
+		netlbl_af4list_foreach_safe(iter4, tmp4,
+					    &ptr->type_def.addrsel->list4) {
+			netlbl_af4list_remove_entry(iter4);
+			kfree(netlbl_domhsh_addr4_entry(iter4));
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_safe(iter6, tmp6,
+					    &ptr->type_def.addrsel->list6) {
+			netlbl_af6list_remove_entry(iter6);
+			kfree(netlbl_domhsh_addr6_entry(iter6));
+		}
+#endif /* IPv6 */
+	}
 	kfree(ptr->domain);
 	kfree(ptr);
 }
@@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
 	return entry;
 }
 
+/**
+ * netlbl_domhsh_audit_add - Generate an audit entry for an add event
+ * @entry: the entry being added
+ * @addr4: the IPv4 address information
+ * @addr6: the IPv6 address information
+ * @result: the result code
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Generate an audit record for adding a new NetLabel/LSM mapping entry with
+ * the given information.  Caller is responsibile for holding the necessary
+ * locks.
+ *
+ */
+static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
+				    struct netlbl_af4list *addr4,
+				    struct netlbl_af6list *addr6,
+				    int result,
+				    struct netlbl_audit *audit_info)
+{
+	struct audit_buffer *audit_buf;
+	struct cipso_v4_doi *cipsov4 = NULL;
+	u32 type;
+
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
+	if (audit_buf != NULL) {
+		audit_log_format(audit_buf, " nlbl_domain=%s",
+				 entry->domain ? entry->domain : "(default)");
+		if (addr4 != NULL) {
+			struct netlbl_domaddr4_map *map4;
+			map4 = netlbl_domhsh_addr4_entry(addr4);
+			type = map4->type;
+			cipsov4 = map4->type_def.cipsov4;
+			netlbl_af4list_audit_addr(audit_buf, 0, NULL,
+						  addr4->addr, addr4->mask);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		} else if (addr6 != NULL) {
+			struct netlbl_domaddr6_map *map6;
+			map6 = netlbl_domhsh_addr6_entry(addr6);
+			type = map6->type;
+			netlbl_af6list_audit_addr(audit_buf, 0, NULL,
+						  &addr6->addr, &addr6->mask);
+#endif /* IPv6 */
+		} else {
+			type = entry->type;
+			cipsov4 = entry->type_def.cipsov4;
+		}
+		switch (type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			BUG_ON(cipsov4 == NULL);
+			audit_log_format(audit_buf,
+					 " nlbl_protocol=cipsov4 cipso_doi=%u",
+					 cipsov4->doi);
+			break;
+		}
+		audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0);
+		audit_log_end(audit_buf);
+	}
+}
+
 /*
  * Domain Hash Table Functions
  */
@@ -213,50 +297,106 @@ int __init netlbl_domhsh_init(u32 size)
 int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		      struct netlbl_audit *audit_info)
 {
-	int ret_val;
-	u32 bkt;
-	struct audit_buffer *audit_buf;
-
-	entry->valid = 1;
-	INIT_RCU_HEAD(&entry->rcu);
+	int ret_val = 0;
+	struct netlbl_dom_map *entry_old;
+	struct netlbl_af4list *iter4;
+	struct netlbl_af4list *tmp4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+	struct netlbl_af6list *tmp6;
+#endif /* IPv6 */
 
 	rcu_read_lock();
+
 	spin_lock(&netlbl_domhsh_lock);
-	if (entry->domain != NULL) {
-		bkt = netlbl_domhsh_hash(entry->domain);
-		if (netlbl_domhsh_search(entry->domain) == NULL)
+	if (entry->domain != NULL)
+		entry_old = netlbl_domhsh_search(entry->domain);
+	else
+		entry_old = netlbl_domhsh_search_def(entry->domain);
+	if (entry_old == NULL) {
+		entry->valid = 1;
+		INIT_RCU_HEAD(&entry->rcu);
+
+		if (entry->domain != NULL) {
+			u32 bkt = netlbl_domhsh_hash(entry->domain);
 			list_add_tail_rcu(&entry->list,
 				    &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
-		else
-			ret_val = -EEXIST;
-	} else {
-		INIT_LIST_HEAD(&entry->list);
-		if (rcu_dereference(netlbl_domhsh_def) == NULL)
+		} else {
+			INIT_LIST_HEAD(&entry->list);
 			rcu_assign_pointer(netlbl_domhsh_def, entry);
-		else
-			ret_val = -EEXIST;
-	}
-	spin_unlock(&netlbl_domhsh_lock);
-	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
-	if (audit_buf != NULL) {
-		audit_log_format(audit_buf,
-				 " nlbl_domain=%s",
-				 entry->domain ? entry->domain : "(default)");
-		switch (entry->type) {
-		case NETLBL_NLTYPE_UNLABELED:
-			audit_log_format(audit_buf, " nlbl_protocol=unlbl");
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			audit_log_format(audit_buf,
-					 " nlbl_protocol=cipsov4 cipso_doi=%u",
-					 entry->type_def.cipsov4->doi);
-			break;
 		}
-		audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
-		audit_log_end(audit_buf);
-	}
-	rcu_read_unlock();
 
+		if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+			netlbl_af4list_foreach_rcu(iter4,
+					       &entry->type_def.addrsel->list4)
+				netlbl_domhsh_audit_add(entry, iter4, NULL,
+							ret_val, audit_info);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			netlbl_af6list_foreach_rcu(iter6,
+					       &entry->type_def.addrsel->list6)
+				netlbl_domhsh_audit_add(entry, NULL, iter6,
+							ret_val, audit_info);
+#endif /* IPv6 */
+		} else
+			netlbl_domhsh_audit_add(entry, NULL, NULL,
+						ret_val, audit_info);
+	} else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT &&
+		   entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+		struct list_head *old_list4;
+		struct list_head *old_list6;
+
+		old_list4 = &entry_old->type_def.addrsel->list4;
+		old_list6 = &entry_old->type_def.addrsel->list6;
+
+		/* we only allow the addition of address selectors if all of
+		 * the selectors do not exist in the existing domain map */
+		netlbl_af4list_foreach_rcu(iter4,
+					   &entry->type_def.addrsel->list4)
+			if (netlbl_af4list_search_exact(iter4->addr,
+							iter4->mask,
+							old_list4)) {
+				ret_val = -EEXIST;
+				goto add_return;
+			}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_rcu(iter6,
+					   &entry->type_def.addrsel->list6)
+			if (netlbl_af6list_search_exact(&iter6->addr,
+							&iter6->mask,
+							old_list6)) {
+				ret_val = -EEXIST;
+				goto add_return;
+			}
+#endif /* IPv6 */
+
+		netlbl_af4list_foreach_safe(iter4, tmp4,
+					    &entry->type_def.addrsel->list4) {
+			netlbl_af4list_remove_entry(iter4);
+			iter4->valid = 1;
+			ret_val = netlbl_af4list_add(iter4, old_list4);
+			netlbl_domhsh_audit_add(entry_old, iter4, NULL,
+						ret_val, audit_info);
+			if (ret_val != 0)
+				goto add_return;
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_safe(iter6, tmp6,
+					    &entry->type_def.addrsel->list6) {
+			netlbl_af6list_remove_entry(iter6);
+			iter6->valid = 1;
+			ret_val = netlbl_af6list_add(iter6, old_list6);
+			netlbl_domhsh_audit_add(entry_old, NULL, iter6,
+						ret_val, audit_info);
+			if (ret_val != 0)
+				goto add_return;
+		}
+#endif /* IPv6 */
+	} else
+		ret_val = -EINVAL;
+
+add_return:
+	spin_unlock(&netlbl_domhsh_lock);
+	rcu_read_unlock();
 	return ret_val;
 }
 
@@ -319,7 +459,19 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
 	}
 
 	if (ret_val == 0) {
+		struct netlbl_af4list *iter4;
+		struct netlbl_domaddr4_map *map4;
+
 		switch (entry->type) {
+		case NETLBL_NLTYPE_ADDRSELECT:
+			netlbl_af4list_foreach_rcu(iter4,
+					     &entry->type_def.addrsel->list4) {
+				map4 = netlbl_domhsh_addr4_entry(iter4);
+				cipso_v4_doi_putdef(map4->type_def.cipsov4);
+			}
+			/* no need to check the IPv6 list since we currently
+			 * support only unlabeled protocols for IPv6 */
+			break;
 		case NETLBL_NLTYPE_CIPSOV4:
 			cipso_v4_doi_putdef(entry->type_def.cipsov4);
 			break;
@@ -387,6 +539,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
 	return netlbl_domhsh_search_def(domain);
 }
 
+/**
+ * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL.  The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+						       __be32 addr)
+{
+	struct netlbl_dom_map *dom_iter;
+	struct netlbl_af4list *addr_iter;
+
+	dom_iter = netlbl_domhsh_search_def(domain);
+	if (dom_iter == NULL)
+		return NULL;
+	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+		return NULL;
+
+	addr_iter = netlbl_af4list_search(addr,
+					  &dom_iter->type_def.addrsel->list4);
+	if (addr_iter == NULL)
+		return NULL;
+
+	return netlbl_domhsh_addr4_entry(addr_iter);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ * @addr: the IP address to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain
+ * and @addr, return a pointer to a copy of the entry or NULL.  The caller is
+ * responsible for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+						   const struct in6_addr *addr)
+{
+	struct netlbl_dom_map *dom_iter;
+	struct netlbl_af6list *addr_iter;
+
+	dom_iter = netlbl_domhsh_search_def(domain);
+	if (dom_iter == NULL)
+		return NULL;
+	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
+		return NULL;
+
+	addr_iter = netlbl_af6list_search(addr,
+					  &dom_iter->type_def.addrsel->list6);
+	if (addr_iter == NULL)
+		return NULL;
+
+	return netlbl_domhsh_addr6_entry(addr_iter);
+}
+#endif /* IPv6 */
+
 /**
  * netlbl_domhsh_walk - Iterate through the domain mapping hash table
  * @skip_bkt: the number of buckets to skip at the start
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index afcc41a7432..bfcb6763a1a 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -11,7 +11,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,16 +36,43 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+#include "netlabel_addrlist.h"
+
 /* Domain hash table size */
 /* XXX - currently this number is an uneducated guess */
 #define NETLBL_DOMHSH_BITSIZE       7
 
-/* Domain mapping definition struct */
+/* Domain mapping definition structures */
+#define netlbl_domhsh_addr4_entry(iter) \
+	container_of(iter, struct netlbl_domaddr4_map, list)
+struct netlbl_domaddr4_map {
+	u32 type;
+	union {
+		struct cipso_v4_doi *cipsov4;
+	} type_def;
+
+	struct netlbl_af4list list;
+};
+#define netlbl_domhsh_addr6_entry(iter) \
+	container_of(iter, struct netlbl_domaddr6_map, list)
+struct netlbl_domaddr6_map {
+	u32 type;
+
+	/* NOTE: no 'type_def' union needed at present since we don't currently
+	 *       support any IPv6 labeling protocols */
+
+	struct netlbl_af6list list;
+};
+struct netlbl_domaddr_map {
+	struct list_head list4;
+	struct list_head list6;
+};
 struct netlbl_dom_map {
 	char *domain;
 	u32 type;
 	union {
 		struct cipso_v4_doi *cipsov4;
+		struct netlbl_domaddr_map *addrsel;
 	} type_def;
 
 	u32 valid;
@@ -66,9 +93,16 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
 int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
+						       __be32 addr);
 int netlbl_domhsh_walk(u32 *skip_bkt,
 		     u32 *skip_chain,
 		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
 		     void *cb_arg);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+						  const struct in6_addr *addr);
+#endif /* IPv6 */
+
 #endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 7d8ecea9391..8b820dc9806 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -419,7 +419,9 @@ int netlbl_enabled(void)
  * Attach the correct label to the given socket using the security attributes
  * specified in @secattr.  This function requires exclusive access to @sk,
  * which means it either needs to be in the process of being created or locked.
- * Returns zero on success, negative values on failure.
+ * Returns zero on success, -EDESTADDRREQ if the domain is configured to use
+ * network address selectors (can't blindly label the socket), and negative
+ * values on all other failures.
  *
  */
 int netlbl_sock_setattr(struct sock *sk,
@@ -433,6 +435,9 @@ int netlbl_sock_setattr(struct sock *sk,
 	if (dom_entry == NULL)
 		goto socket_setattr_return;
 	switch (dom_entry->type) {
+	case NETLBL_NLTYPE_ADDRSELECT:
+		ret_val = -EDESTADDRREQ;
+		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		ret_val = cipso_v4_sock_setattr(sk,
 						dom_entry->type_def.cipsov4,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index c4e18c7bc0c..ee769ecaa13 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,9 +32,13 @@
 #include <linux/socket.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <asm/atomic.h>
@@ -71,79 +75,336 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
 };
 
 /*
- * NetLabel Command Handlers
+ * Helper Functions
  */
 
 /**
  * netlbl_mgmt_add - Handle an ADD message
- * @skb: the NETLINK buffer
  * @info: the Generic NETLINK info block
+ * @audit_info: NetLabel audit information
  *
  * Description:
- * Process a user generated ADD message and add the domains from the message
- * to the hash table.  See netlabel.h for a description of the message format.
- * Returns zero on success, negative values on failure.
+ * Helper function for the ADD and ADDDEF messages to add the domain mappings
+ * from the message to the hash table.  See netlabel.h for a description of the
+ * message format.  Returns zero on success, negative values on failure.
  *
  */
-static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_add_common(struct genl_info *info,
+				  struct netlbl_audit *audit_info)
 {
 	int ret_val = -EINVAL;
 	struct netlbl_dom_map *entry = NULL;
-	size_t tmp_size;
+	struct netlbl_domaddr_map *addrmap = NULL;
+	struct cipso_v4_doi *cipsov4 = NULL;
 	u32 tmp_val;
-	struct netlbl_audit audit_info;
-
-	if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
-	    !info->attrs[NLBL_MGMT_A_PROTOCOL])
-		goto add_failure;
-
-	netlbl_netlink_auditinfo(skb, &audit_info);
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
 		ret_val = -ENOMEM;
 		goto add_failure;
 	}
-	tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
-	entry->domain = kmalloc(tmp_size, GFP_KERNEL);
-	if (entry->domain == NULL) {
-		ret_val = -ENOMEM;
-		goto add_failure;
-	}
 	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
-	nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+	if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
+		size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+		if (entry->domain == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		nla_strlcpy(entry->domain,
+			    info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
+	}
+
+	/* NOTE: internally we allow/use a entry->type value of
+	 *       NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
+	 *       to pass that as a protocol value because we need to know the
+	 *       "real" protocol */
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add(entry, &audit_info);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
 			goto add_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL)
+		cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (cipsov4 == NULL)
 			goto add_failure;
-		ret_val = netlbl_domhsh_add(entry, &audit_info);
-		if (ret_val != 0)
-			cipso_v4_doi_putdef(entry->type_def.cipsov4);
+		entry->type_def.cipsov4 = cipsov4;
 		break;
 	default:
 		goto add_failure;
 	}
+
+	if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) {
+		struct in_addr *addr;
+		struct in_addr *mask;
+		struct netlbl_domaddr4_map *map;
+
+		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+		if (addrmap == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		INIT_LIST_HEAD(&addrmap->list4);
+		INIT_LIST_HEAD(&addrmap->list6);
+
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) !=
+		    sizeof(struct in_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) !=
+		    sizeof(struct in_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]);
+		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]);
+
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (map == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		map->list.addr = addr->s_addr & mask->s_addr;
+		map->list.mask = mask->s_addr;
+		map->list.valid = 1;
+		map->type = entry->type;
+		if (cipsov4)
+			map->type_def.cipsov4 = cipsov4;
+
+		ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
+		if (ret_val != 0) {
+			kfree(map);
+			goto add_failure;
+		}
+
+		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->type_def.addrsel = addrmap;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
+		struct in6_addr *addr;
+		struct in6_addr *mask;
+		struct netlbl_domaddr6_map *map;
+
+		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
+		if (addrmap == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		INIT_LIST_HEAD(&addrmap->list4);
+		INIT_LIST_HEAD(&addrmap->list6);
+
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) !=
+		    sizeof(struct in6_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) !=
+		    sizeof(struct in6_addr)) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]);
+		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]);
+
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (map == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		ipv6_addr_copy(&map->list.addr, addr);
+		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
+		map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
+		map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
+		map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
+		ipv6_addr_copy(&map->list.mask, mask);
+		map->list.valid = 1;
+		map->type = entry->type;
+
+		ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
+		if (ret_val != 0) {
+			kfree(map);
+			goto add_failure;
+		}
+
+		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->type_def.addrsel = addrmap;
+#endif /* IPv6 */
+	}
+
+	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
 		goto add_failure;
 
 	return 0;
 
 add_failure:
+	if (cipsov4)
+		cipso_v4_doi_putdef(cipsov4);
 	if (entry)
 		kfree(entry->domain);
+	kfree(addrmap);
 	kfree(entry);
 	return ret_val;
 }
 
+/**
+ * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry
+ * @skb: the NETLINK buffer
+ * @entry: the map entry
+ *
+ * Description:
+ * This function is a helper function used by the LISTALL and LISTDEF command
+ * handlers.  The caller is responsibile for ensuring that the RCU read lock
+ * is held.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listentry(struct sk_buff *skb,
+				 struct netlbl_dom_map *entry)
+{
+	int ret_val;
+	struct nlattr *nla_a;
+	struct nlattr *nla_b;
+	struct netlbl_af4list *iter4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct netlbl_af6list *iter6;
+#endif
+
+	if (entry->domain != NULL) {
+		ret_val = nla_put_string(skb,
+					 NLBL_MGMT_A_DOMAIN, entry->domain);
+		if (ret_val != 0)
+			return ret_val;
+	}
+
+	switch (entry->type) {
+	case NETLBL_NLTYPE_ADDRSELECT:
+		nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
+		if (nla_a == NULL)
+			return -ENOMEM;
+
+		netlbl_af4list_foreach_rcu(iter4,
+					   &entry->type_def.addrsel->list4) {
+			struct netlbl_domaddr4_map *map4;
+			struct in_addr addr_struct;
+
+			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			if (nla_b == NULL)
+				return -ENOMEM;
+
+			addr_struct.s_addr = iter4->addr;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR,
+					  sizeof(struct in_addr),
+					  &addr_struct);
+			if (ret_val != 0)
+				return ret_val;
+			addr_struct.s_addr = iter4->mask;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK,
+					  sizeof(struct in_addr),
+					  &addr_struct);
+			if (ret_val != 0)
+				return ret_val;
+			map4 = netlbl_domhsh_addr4_entry(iter4);
+			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+					      map4->type);
+			if (ret_val != 0)
+				return ret_val;
+			switch (map4->type) {
+			case NETLBL_NLTYPE_CIPSOV4:
+				ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+						  map4->type_def.cipsov4->doi);
+				if (ret_val != 0)
+					return ret_val;
+				break;
+			}
+
+			nla_nest_end(skb, nla_b);
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		netlbl_af6list_foreach_rcu(iter6,
+					   &entry->type_def.addrsel->list6) {
+			struct netlbl_domaddr6_map *map6;
+
+			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
+			if (nla_b == NULL)
+				return -ENOMEM;
+
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR,
+					  sizeof(struct in6_addr),
+					  &iter6->addr);
+			if (ret_val != 0)
+				return ret_val;
+			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK,
+					  sizeof(struct in6_addr),
+					  &iter6->mask);
+			if (ret_val != 0)
+				return ret_val;
+			map6 = netlbl_domhsh_addr6_entry(iter6);
+			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
+					      map6->type);
+			if (ret_val != 0)
+				return ret_val;
+
+			nla_nest_end(skb, nla_b);
+		}
+#endif /* IPv6 */
+
+		nla_nest_end(skb, nla_a);
+		break;
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		if (ret_val != 0)
+			return ret_val;
+		ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
+				      entry->type_def.cipsov4->doi);
+		break;
+	}
+
+	return ret_val;
+}
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table.  See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlbl_audit audit_info;
+
+	if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) ||
+	    (!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+	     info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+	     info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+		return -EINVAL;
+
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
+	return netlbl_mgmt_add_common(info, &audit_info);
+}
+
 /**
  * netlbl_mgmt_remove - Handle a REMOVE message
  * @skb: the NETLINK buffer
@@ -192,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 	if (data == NULL)
 		goto listall_cb_failure;
 
-	ret_val = nla_put_string(cb_arg->skb,
-				 NLBL_MGMT_A_DOMAIN,
-				 entry->domain);
+	ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry);
 	if (ret_val != 0)
 		goto listall_cb_failure;
-	ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
-	if (ret_val != 0)
-		goto listall_cb_failure;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = nla_put_u32(cb_arg->skb,
-				      NLBL_MGMT_A_CV4DOI,
-				      entry->type_def.cipsov4->doi);
-		if (ret_val != 0)
-			goto listall_cb_failure;
-		break;
-	}
 
 	cb_arg->seq++;
 	return genlmsg_end(cb_arg->skb, data);
@@ -262,50 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb,
  */
 static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -EINVAL;
-	struct netlbl_dom_map *entry = NULL;
-	u32 tmp_val;
 	struct netlbl_audit audit_info;
 
-	if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
-		goto adddef_failure;
+	if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
+	     info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
+	    (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
+	     info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
+	    ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
+	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
+		return -EINVAL;
 
 	netlbl_netlink_auditinfo(skb, &audit_info);
 
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (entry == NULL) {
-		ret_val = -ENOMEM;
-		goto adddef_failure;
-	}
-	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
-
-	switch (entry->type) {
-	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		break;
-	case NETLBL_NLTYPE_CIPSOV4:
-		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
-			goto adddef_failure;
-
-		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL)
-			goto adddef_failure;
-		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		if (ret_val != 0)
-			cipso_v4_doi_putdef(entry->type_def.cipsov4);
-		break;
-	default:
-		goto adddef_failure;
-	}
-	if (ret_val != 0)
-		goto adddef_failure;
-
-	return 0;
-
-adddef_failure:
-	kfree(entry);
-	return ret_val;
+	return netlbl_mgmt_add_common(info, &audit_info);
 }
 
 /**
@@ -359,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 		ret_val = -ENOENT;
 		goto listdef_failure_lock;
 	}
-	ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
-	if (ret_val != 0)
-		goto listdef_failure_lock;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = nla_put_u32(ans_skb,
-				      NLBL_MGMT_A_CV4DOI,
-				      entry->type_def.cipsov4->doi);
-		if (ret_val != 0)
-			goto listdef_failure_lock;
-		break;
-	}
+	ret_val = netlbl_mgmt_listentry(ans_skb, entry);
 	rcu_read_unlock();
+	if (ret_val != 0)
+		goto listdef_failure;
 
 	genlmsg_end(ans_skb, data);
 	return genlmsg_reply(ans_skb, info);
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index a43bff169d6..05d96431f81 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -45,6 +45,16 @@
  *     NLBL_MGMT_A_DOMAIN
  *     NLBL_MGMT_A_PROTOCOL
  *
+ *   If IPv4 is specified the following attributes are required:
+ *
+ *     NLBL_MGMT_A_IPV4ADDR
+ *     NLBL_MGMT_A_IPV4MASK
+ *
+ *   If IPv6 is specified the following attributes are required:
+ *
+ *     NLBL_MGMT_A_IPV6ADDR
+ *     NLBL_MGMT_A_IPV6MASK
+ *
  *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
@@ -68,13 +78,24 @@
  *   Required attributes:
  *
  *     NLBL_MGMT_A_DOMAIN
+ *
+ *   If the IP address selectors are not used the following attribute is
+ *   required:
+ *
  *     NLBL_MGMT_A_PROTOCOL
  *
- *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *   If the IP address selectors are used then the following attritbute is
+ *   required:
+ *
+ *     NLBL_MGMT_A_SELECTORLIST
+ *
+ *   If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ *   attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
  *
- *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *   If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ *   attributes are required.
  *
  * o ADDDEF:
  *   Sent by an application to set the default domain mapping for the NetLabel
@@ -100,15 +121,23 @@
  *   application there is no payload.  On success the kernel should send a
  *   response using the following format.
  *
- *   Required attributes:
+ *   If the IP address selectors are not used the following attribute is
+ *   required:
  *
  *     NLBL_MGMT_A_PROTOCOL
  *
- *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
+ *   If the IP address selectors are used then the following attritbute is
+ *   required:
+ *
+ *     NLBL_MGMT_A_SELECTORLIST
+ *
+ *   If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
+ *   attributes are required:
  *
  *     NLBL_MGMT_A_CV4DOI
  *
- *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
+ *   If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
+ *   attributes are required.
  *
  * o PROTOCOLS:
  *   Sent by an application to request a list of configured NetLabel protocols
@@ -162,6 +191,26 @@ enum {
 	NLBL_MGMT_A_CV4DOI,
 	/* (NLA_U32)
 	 * the CIPSOv4 DOI value */
+	NLBL_MGMT_A_IPV6ADDR,
+	/* (NLA_BINARY, struct in6_addr)
+	 * an IPv6 address */
+	NLBL_MGMT_A_IPV6MASK,
+	/* (NLA_BINARY, struct in6_addr)
+	 * an IPv6 address mask */
+	NLBL_MGMT_A_IPV4ADDR,
+	/* (NLA_BINARY, struct in_addr)
+	 * an IPv4 address */
+	NLBL_MGMT_A_IPV4MASK,
+	/* (NLA_BINARY, struct in_addr)
+	 * and IPv4 address mask */
+	NLBL_MGMT_A_ADDRSELECTOR,
+	/* (NLA_NESTED)
+	 * an IP address selector, must contain an address, mask, and protocol
+	 * attribute plus any protocol specific attributes */
+	NLBL_MGMT_A_SELECTORLIST,
+	/* (NLA_NESTED)
+	 * the selector list, there must be at least one
+	 * NLBL_MGMT_A_ADDRSELECTOR attribute */
 	__NLBL_MGMT_A_MAX,
 };
 #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index ab8131a8e48..e8a5c32b0f1 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -145,76 +145,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
 	[NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY }
 };
 
-/*
- * Audit Helper Functions
- */
-
-/**
- * netlbl_unlabel_audit_addr4 - Audit an IPv4 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv4 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
-				     const char *dev,
-				     __be32 addr, __be32 mask)
-{
-	u32 mask_val = ntohl(mask);
-
-	if (dev != NULL)
-		audit_log_format(audit_buf, " netif=%s", dev);
-	audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr));
-	if (mask_val != 0xffffffff) {
-		u32 mask_len = 0;
-		while (mask_val > 0) {
-			mask_val <<= 1;
-			mask_len++;
-		}
-		audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
-	}
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
- * @audit_buf: audit buffer
- * @dev: network interface
- * @addr: IP address
- * @mask: IP address mask
- *
- * Description:
- * Write the IPv6 address and address mask, if necessary, to @audit_buf.
- *
- */
-static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
-				     const char *dev,
-				     const struct in6_addr *addr,
-				     const struct in6_addr *mask)
-{
-	if (dev != NULL)
-		audit_log_format(audit_buf, " netif=%s", dev);
-	audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr));
-	if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
-		u32 mask_len = 0;
-		u32 mask_val;
-		int iter = -1;
-		while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
-			mask_len += 32;
-		mask_val = ntohl(mask->s6_addr32[iter]);
-		while (mask_val > 0) {
-			mask_val <<= 1;
-			mask_len++;
-		}
-		audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
-	}
-}
-#endif /* IPv6 */
-
 /*
  * Unlabeled Connection Hash Table Functions
  */
@@ -571,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net,
 		mask4 = (struct in_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
 		if (audit_buf != NULL)
-			netlbl_unlabel_audit_addr4(audit_buf,
-						   dev_name,
-						   addr4->s_addr,
-						   mask4->s_addr);
+			netlbl_af4list_audit_addr(audit_buf, 1,
+						  dev_name,
+						  addr4->s_addr,
+						  mask4->s_addr);
 		break;
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -585,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net,
 		mask6 = (struct in6_addr *)mask;
 		ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
 		if (audit_buf != NULL)
-			netlbl_unlabel_audit_addr6(audit_buf,
-						   dev_name,
-						   addr6, mask6);
+			netlbl_af6list_audit_addr(audit_buf, 1,
+						  dev_name,
+						  addr6, mask6);
 		break;
 	}
 #endif /* IPv6 */
@@ -652,9 +582,9 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 					      audit_info);
 	if (audit_buf != NULL) {
 		dev = dev_get_by_index(net, iface->ifindex);
-		netlbl_unlabel_audit_addr4(audit_buf,
-					   (dev != NULL ? dev->name : NULL),
-					   addr->s_addr, mask->s_addr);
+		netlbl_af4list_audit_addr(audit_buf, 1,
+					  (dev != NULL ? dev->name : NULL),
+					  addr->s_addr, mask->s_addr);
 		if (dev != NULL)
 			dev_put(dev);
 		if (entry && security_secid_to_secctx(entry->secid,
@@ -712,9 +642,9 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 					      audit_info);
 	if (audit_buf != NULL) {
 		dev = dev_get_by_index(net, iface->ifindex);
-		netlbl_unlabel_audit_addr6(audit_buf,
-					   (dev != NULL ? dev->name : NULL),
-					   addr, mask);
+		netlbl_af6list_audit_addr(audit_buf, 1,
+					  (dev != NULL ? dev->name : NULL),
+					  addr, mask);
 		if (dev != NULL)
 			dev_put(dev);
 		if (entry && security_secid_to_secctx(entry->secid,
-- 
cgit v1.2.3-70-g09d2


From 948bf85c1bc9a84754786a9d5dd99b7ecc46451e Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:32 -0400
Subject: netlabel: Add functionality to set the security attributes of a
 packet

This patch builds upon the new NetLabel address selector functionality by
providing the NetLabel KAPI and CIPSO engine support needed to enable the
new packet-based labeling.  The only new addition to the NetLabel KAPI at
this point is shown below:

 * int netlbl_skbuff_setattr(skb, family, secattr)

... and is designed to be called from a Netfilter hook after the packet's
IP header has been populated such as in the FORWARD or LOCAL_OUT hooks.

This patch also provides the necessary SELinux hooks to support this new
functionality.  Smack support is not currently included due to uncertainty
regarding the permissions needed to expand the Smack network access controls.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h            |  16 +++
 include/net/netlabel.h              |   9 ++
 net/ipv4/cipso_ipv4.c               | 222 +++++++++++++++++++++++++++++-------
 net/netlabel/netlabel_kapi.c        |  60 ++++++++++
 security/selinux/hooks.c            |  50 +++++++-
 security/selinux/include/netlabel.h |   9 ++
 security/selinux/include/objsec.h   |   1 +
 security/selinux/netlabel.c         |  68 ++++++++++-
 8 files changed, 393 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 5fe6556fb3c..2ce093ba553 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -208,6 +208,10 @@ int cipso_v4_sock_setattr(struct sock *sk,
 			  const struct cipso_v4_doi *doi_def,
 			  const struct netlbl_lsm_secattr *secattr);
 int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_delattr(struct sk_buff *skb);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 			    struct netlbl_lsm_secattr *secattr);
 int cipso_v4_validate(unsigned char **option);
@@ -232,6 +236,18 @@ static inline int cipso_v4_sock_getattr(struct sock *sk,
 	return -ENOSYS;
 }
 
+static inline int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+				      const struct cipso_v4_doi *doi_def,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+	return -ENOSYS;
+}
+
 static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 					  struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 0729f8ce504..3f67e6d49e4 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -382,6 +382,9 @@ int netlbl_sock_setattr(struct sock *sk,
 			const struct netlbl_lsm_secattr *secattr);
 int netlbl_sock_getattr(struct sock *sk,
 			struct netlbl_lsm_secattr *secattr);
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+			  u16 family,
+			  const struct netlbl_lsm_secattr *secattr);
 int netlbl_skbuff_getattr(const struct sk_buff *skb,
 			  u16 family,
 			  struct netlbl_lsm_secattr *secattr);
@@ -451,6 +454,12 @@ static inline int netlbl_sock_getattr(struct sock *sk,
 {
 	return -ENOSYS;
 }
+static inline int netlbl_skbuff_setattr(struct sk_buff *skb,
+				      u16 family,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
 static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
 					u16 family,
 					struct netlbl_lsm_secattr *secattr)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index bf87eddfec3..e13d6dbb66a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -13,7 +13,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -1665,48 +1665,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 }
 
 /**
- * cipso_v4_sock_setattr - Add a CIPSO option to a socket
- * @sk: the socket
+ * cipso_v4_genopt - Generate a CIPSO option
+ * @buf: the option buffer
+ * @buf_len: the size of opt_buf
  * @doi_def: the CIPSO DOI to use
- * @secattr: the specific security attributes of the socket
+ * @secattr: the security attributes
  *
  * Description:
- * Set the CIPSO option on the given socket using the DOI definition and
- * security attributes passed to the function.  This function requires
- * exclusive access to @sk, which means it either needs to be in the
- * process of being created or locked.  Returns zero on success and negative
- * values on failure.
+ * Generate a CIPSO option using the DOI definition and security attributes
+ * passed to the function.  Returns the length of the option on success and
+ * negative values on failure.
  *
  */
-int cipso_v4_sock_setattr(struct sock *sk,
-			  const struct cipso_v4_doi *doi_def,
-			  const struct netlbl_lsm_secattr *secattr)
+static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
+			   const struct cipso_v4_doi *doi_def,
+			   const struct netlbl_lsm_secattr *secattr)
 {
-	int ret_val = -EPERM;
+	int ret_val;
 	u32 iter;
-	unsigned char *buf;
-	u32 buf_len = 0;
-	u32 opt_len;
-	struct ip_options *opt = NULL;
-	struct inet_sock *sk_inet;
-	struct inet_connection_sock *sk_conn;
-
-	/* In the case of sock_create_lite(), the sock->sk field is not
-	 * defined yet but it is not a problem as the only users of these
-	 * "lite" PF_INET sockets are functions which do an accept() call
-	 * afterwards so we will label the socket as part of the accept(). */
-	if (sk == NULL)
-		return 0;
 
-	/* We allocate the maximum CIPSO option size here so we are probably
-	 * being a little wasteful, but it makes our life _much_ easier later
-	 * on and after all we are only talking about 40 bytes. */
-	buf_len = CIPSO_V4_OPT_LEN_MAX;
-	buf = kmalloc(buf_len, GFP_ATOMIC);
-	if (buf == NULL) {
-		ret_val = -ENOMEM;
-		goto socket_setattr_failure;
-	}
+	if (buf_len <= CIPSO_V4_HDR_LEN)
+		return -ENOSPC;
 
 	/* XXX - This code assumes only one tag per CIPSO option which isn't
 	 * really a good assumption to make but since we only support the MAC
@@ -1734,8 +1713,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
 						   buf_len - CIPSO_V4_HDR_LEN);
 			break;
 		default:
-			ret_val = -EPERM;
-			goto socket_setattr_failure;
+			return -EPERM;
 		}
 
 		iter++;
@@ -1743,9 +1721,58 @@ int cipso_v4_sock_setattr(struct sock *sk,
 		 iter < CIPSO_V4_TAG_MAXCNT &&
 		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
 	if (ret_val < 0)
-		goto socket_setattr_failure;
+		return ret_val;
 	cipso_v4_gentag_hdr(doi_def, buf, ret_val);
-	buf_len = CIPSO_V4_HDR_LEN + ret_val;
+	return CIPSO_V4_HDR_LEN + ret_val;
+}
+
+/**
+ * cipso_v4_sock_setattr - Add a CIPSO option to a socket
+ * @sk: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked.  Returns zero on success and negative
+ * values on failure.
+ *
+ */
+int cipso_v4_sock_setattr(struct sock *sk,
+			  const struct cipso_v4_doi *doi_def,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	if (sk == NULL)
+		return 0;
+
+	/* We allocate the maximum CIPSO option size here so we are probably
+	 * being a little wasteful, but it makes our life _much_ easier later
+	 * on and after all we are only talking about 40 bytes. */
+	buf_len = CIPSO_V4_OPT_LEN_MAX;
+	buf = kmalloc(buf_len, GFP_ATOMIC);
+	if (buf == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+
+	ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (ret_val < 0)
+		goto socket_setattr_failure;
+	buf_len = ret_val;
 
 	/* We can't use ip_options_get() directly because it makes a call to
 	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
@@ -1853,6 +1880,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 				secattr);
 }
 
+/**
+ * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Set the CIPSO option on the given packet based on the security attributes.
+ * Returns a pointer to the IP header on success and NULL on failure.
+ *
+ */
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char buf[CIPSO_V4_OPT_LEN_MAX];
+	u32 buf_len = CIPSO_V4_OPT_LEN_MAX;
+	u32 opt_len;
+	int len_delta;
+
+	buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (buf_len < 0)
+		return buf_len;
+	opt_len = (buf_len + 3) & ~3;
+
+	/* we overwrite any existing options to ensure that we have enough
+	 * room for the CIPSO option, the reason is that we _need_ to guarantee
+	 * that the security label is applied to the packet - we do the same
+	 * thing when using the socket options and it hasn't caused a problem,
+	 * if we need to we can always revisit this choice later */
+
+	len_delta = opt_len - opt->optlen;
+	/* if we don't ensure enough headroom we could panic on the skb_push()
+	 * call below so make sure we have enough, we are also "mangling" the
+	 * packet so we should probably do a copy-on-write call anyway */
+	ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (len_delta > 0) {
+		/* we assume that the header + opt->optlen have already been
+		 * "pushed" in ip_options_build() or similar */
+		iph = ip_hdr(skb);
+		skb_push(skb, len_delta);
+		memmove((char *)iph - len_delta, iph, iph->ihl << 2);
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+	} else if (len_delta < 0) {
+		iph = ip_hdr(skb);
+		memset(iph + 1, IPOPT_NOP, opt->optlen);
+	} else
+		iph = ip_hdr(skb);
+
+	if (opt->optlen > 0)
+		memset(opt, 0, sizeof(*opt));
+	opt->optlen = opt_len;
+	opt->cipso = sizeof(struct iphdr);
+	opt->is_changed = 1;
+
+	/* we have to do the following because we are being called from a
+	 * netfilter hook which means the packet already has had the header
+	 * fields populated and the checksum calculated - yes this means we
+	 * are doing more work than needed but we do it to keep the core
+	 * stack clean and tidy */
+	memcpy(iph + 1, buf, buf_len);
+	if (opt_len > buf_len)
+		memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
+	if (len_delta != 0) {
+		iph->ihl = 5 + (opt_len >> 2);
+		iph->tot_len = htons(skb->len);
+	}
+	ip_send_check(iph);
+
+	return 0;
+}
+
+/**
+ * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet
+ * @skb: the packet
+ *
+ * Description:
+ * Removes any and all CIPSO options from the given packet.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char *cipso_ptr;
+
+	if (opt->cipso == 0)
+		return 0;
+
+	/* since we are changing the packet we should make a copy */
+	ret_val = skb_cow(skb, skb_headroom(skb));
+	if (ret_val < 0)
+		return ret_val;
+
+	/* the easiest thing to do is just replace the cipso option with noop
+	 * options since we don't change the size of the packet, although we
+	 * still need to recalculate the checksum */
+
+	iph = ip_hdr(skb);
+	cipso_ptr = (unsigned char *)iph + opt->cipso;
+	memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+	opt->cipso = 0;
+	opt->is_changed = 1;
+
+	ip_send_check(iph);
+
+	return 0;
+}
+
 /**
  * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
  * @skb: the packet
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 8b820dc9806..cc8047d1f50 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -472,6 +472,66 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 	return cipso_v4_sock_getattr(sk, secattr);
 }
 
+/**
+ * netlbl_skbuff_setattr - Label a packet using the correct protocol
+ * @skb: the packet
+ * @family: protocol family
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given packet using the security attributes
+ * specified in @secattr.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+			  u16 family,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *hdr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (family) {
+	case AF_INET:
+		hdr4 = ip_hdr(skb);
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       hdr4->daddr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto skbuff_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_skbuff_setattr(skb,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			ret_val = cipso_v4_skbuff_delattr(skb);
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+skbuff_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
 /**
  * netlbl_skbuff_getattr - Determine the security attributes of a packet
  * @skb: the packet
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a91146a6b37..7432bdd5d36 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4407,13 +4407,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	u32 peer_sid;
 	struct avc_audit_data ad;
 	u8 secmark_active;
+	u8 netlbl_active;
 	u8 peerlbl_active;
 
 	if (!selinux_policycap_netpeer)
 		return NF_ACCEPT;
 
 	secmark_active = selinux_secmark_enabled();
-	peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+	netlbl_active = netlbl_enabled();
+	peerlbl_active = netlbl_active || selinux_xfrm_enabled();
 	if (!secmark_active && !peerlbl_active)
 		return NF_ACCEPT;
 
@@ -4440,6 +4442,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 				 SECCLASS_PACKET, PACKET__FORWARD_IN, &ad))
 			return NF_DROP;
 
+	if (netlbl_active)
+		/* we do this in the FORWARD path and not the POST_ROUTING
+		 * path because we want to make sure we apply the necessary
+		 * labeling before IPsec is applied so we can leverage AH
+		 * protection */
+		if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0)
+			return NF_DROP;
+
 	return NF_ACCEPT;
 }
 
@@ -4463,6 +4473,37 @@ static unsigned int selinux_ipv6_forward(unsigned int hooknum,
 }
 #endif	/* IPV6 */
 
+static unsigned int selinux_ip_output(struct sk_buff *skb,
+				      u16 family)
+{
+	u32 sid;
+
+	if (!netlbl_enabled())
+		return NF_ACCEPT;
+
+	/* we do this in the LOCAL_OUT path and not the POST_ROUTING path
+	 * because we want to make sure we apply the necessary labeling
+	 * before IPsec is applied so we can leverage AH protection */
+	if (skb->sk) {
+		struct sk_security_struct *sksec = skb->sk->sk_security;
+		sid = sksec->sid;
+	} else
+		sid = SECINITSID_KERNEL;
+	if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0)
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static unsigned int selinux_ipv4_output(unsigned int hooknum,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	return selinux_ip_output(skb, PF_INET);
+}
+
 static int selinux_ip_postroute_iptables_compat(struct sock *sk,
 						int ifindex,
 						struct avc_audit_data *ad,
@@ -5700,6 +5741,13 @@ static struct nf_hook_ops selinux_ipv4_ops[] = {
 		.pf =		PF_INET,
 		.hooknum =	NF_INET_FORWARD,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
+	},
+	{
+		.hook =		selinux_ipv4_output,
+		.owner =	THIS_MODULE,
+		.pf =		PF_INET,
+		.hooknum =	NF_INET_LOCAL_OUT,
+		.priority =	NF_IP_PRI_SELINUX_FIRST,
 	}
 };
 
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index d4e3ac8a7fb..b3e6ae071fc 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -48,6 +48,9 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 				 u16 family,
 				 u32 *type,
 				 u32 *sid);
+int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+				 u16 family,
+				 u32 sid);
 
 void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
 int selinux_netlbl_socket_post_create(struct socket *sock);
@@ -88,6 +91,12 @@ static inline int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 	*sid = SECSID_NULL;
 	return 0;
 }
+static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+					       u16 family,
+					       u32 sid)
+{
+	return 0;
+}
 
 static inline void selinux_netlbl_sock_graft(struct sock *sk,
 					     struct socket *sock)
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 91070ab874c..f46dd1c3d01 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -117,6 +117,7 @@ struct sk_security_struct {
 		NLBL_UNSET = 0,
 		NLBL_REQUIRE,
 		NLBL_LABELED,
+		NLBL_REQSKB,
 	} nlbl_state;
 #endif
 };
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 4053f7fc95f..090404d6e51 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -9,7 +9,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2007
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2007, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +31,8 @@
 #include <linux/rcupdate.h>
 #include <net/sock.h>
 #include <net/netlabel.h>
+#include <net/inet_sock.h>
+#include <net/inet_connection_sock.h>
 
 #include "objsec.h"
 #include "security.h"
@@ -77,6 +79,8 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
@@ -87,8 +91,29 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	if (rc != 0)
 		goto sock_setsid_return;
 	rc = netlbl_sock_setattr(sk, &secattr);
-	if (rc == 0)
+	switch (rc) {
+	case 0:
 		sksec->nlbl_state = NLBL_LABELED;
+		break;
+	case -EDESTADDRREQ:
+		/* we are going to possibly end up labeling the individual
+		 * packets later which is problematic for stream sockets
+		 * because of the additional IP header size, our solution is to
+		 * allow for the maximum IP header length (40 bytes for IPv4,
+		 * we don't have to worry about IPv6 yet) just in case */
+		sk_inet = inet_sk(sk);
+		if (sk_inet->is_icsk) {
+			sk_conn = inet_csk(sk);
+			if (sk_inet->opt)
+				sk_conn->icsk_ext_hdr_len -=
+							   sk_inet->opt->optlen;
+			sk_conn->icsk_ext_hdr_len += 40;
+			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+		}
+		sksec->nlbl_state = NLBL_REQSKB;
+		rc = 0;
+		break;
+	}
 
 sock_setsid_return:
 	netlbl_secattr_destroy(&secattr);
@@ -182,6 +207,45 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 	return rc;
 }
 
+/**
+ * selinux_netlbl_skbuff_setsid - Set the NetLabel on a packet given a sid
+ * @skb: the packet
+ * @family: protocol family
+ * @sid: the SID
+ *
+ * Description
+ * Call the NetLabel mechanism to set the label of a packet using @sid.
+ * Returns zero on auccess, negative values on failure.
+ *
+ */
+int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+				 u16 family,
+				 u32 sid)
+{
+	int rc;
+	struct netlbl_lsm_secattr secattr;
+	struct sock *sk;
+
+	/* if this is a locally generated packet check to see if it is already
+	 * being labeled by it's parent socket, if it is just exit */
+	sk = skb->sk;
+	if (sk != NULL) {
+		struct sk_security_struct *sksec = sk->sk_security;
+		if (sksec->nlbl_state != NLBL_REQSKB)
+			return 0;
+	}
+
+	netlbl_secattr_init(&secattr);
+	rc = security_netlbl_sid_to_secattr(sid, &secattr);
+	if (rc != 0)
+		goto skbuff_setsid_return;
+	rc = netlbl_skbuff_setattr(skb, family, &secattr);
+
+skbuff_setsid_return:
+	netlbl_secattr_destroy(&secattr);
+	return rc;
+}
+
 /**
  * selinux_netlbl_sock_graft - Netlabel the new socket
  * @sk: the new connection
-- 
cgit v1.2.3-70-g09d2


From 014ab19a69c325f52d7bae54ceeda73d6307ae0c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:33 -0400
Subject: selinux: Set socket NetLabel based on connection endpoint

Previous work enabled the use of address based NetLabel selectors, which while
highly useful, brought the potential for additional per-packet overhead when
used.  This patch attempts to solve that by applying NetLabel socket labels
when sockets are connect()'d.  This should alleviate the per-packet NetLabel
labeling for all connected sockets (yes, it even works for connected DGRAM
sockets).

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h            |   5 ++
 include/net/netlabel.h              |  13 ++++
 net/ipv4/cipso_ipv4.c               |  74 ++++++++++++++++++
 net/netlabel/netlabel_kapi.c        |  78 ++++++++++++++++++-
 security/selinux/hooks.c            |  11 +--
 security/selinux/include/netlabel.h |  19 ++++-
 security/selinux/include/objsec.h   |   1 +
 security/selinux/netlabel.c         | 147 +++++++++++++++++++++++++++++-------
 8 files changed, 311 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 2ce093ba553..811febf97ca 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -207,6 +207,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
 int cipso_v4_sock_setattr(struct sock *sk,
 			  const struct cipso_v4_doi *doi_def,
 			  const struct netlbl_lsm_secattr *secattr);
+void cipso_v4_sock_delattr(struct sock *sk);
 int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
 int cipso_v4_skbuff_setattr(struct sk_buff *skb,
 			    const struct cipso_v4_doi *doi_def,
@@ -230,6 +231,10 @@ static inline int cipso_v4_sock_setattr(struct sock *sk,
 	return -ENOSYS;
 }
 
+static inline void cipso_v4_sock_delattr(struct sock *sk)
+{
+}
+
 static inline int cipso_v4_sock_getattr(struct sock *sk,
 					struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 3f67e6d49e4..074cad40ac6 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -380,8 +380,12 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap,
 int netlbl_enabled(void);
 int netlbl_sock_setattr(struct sock *sk,
 			const struct netlbl_lsm_secattr *secattr);
+void netlbl_sock_delattr(struct sock *sk);
 int netlbl_sock_getattr(struct sock *sk,
 			struct netlbl_lsm_secattr *secattr);
+int netlbl_conn_setattr(struct sock *sk,
+			struct sockaddr *addr,
+			const struct netlbl_lsm_secattr *secattr);
 int netlbl_skbuff_setattr(struct sk_buff *skb,
 			  u16 family,
 			  const struct netlbl_lsm_secattr *secattr);
@@ -449,11 +453,20 @@ static inline int netlbl_sock_setattr(struct sock *sk,
 {
 	return -ENOSYS;
 }
+static inline void netlbl_sock_delattr(struct sock *sk)
+{
+}
 static inline int netlbl_sock_getattr(struct sock *sk,
 				      struct netlbl_lsm_secattr *secattr)
 {
 	return -ENOSYS;
 }
+static inline int netlbl_conn_setattr(struct sock *sk,
+				      struct sockaddr *addr,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
 static inline int netlbl_skbuff_setattr(struct sk_buff *skb,
 				      u16 family,
 				      const struct netlbl_lsm_secattr *secattr)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index e13d6dbb66a..23768b9d6b6 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1809,6 +1809,80 @@ socket_setattr_failure:
 	return ret_val;
 }
 
+/**
+ * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Removes the CIPSO option from a socket, if present.
+ *
+ */
+void cipso_v4_sock_delattr(struct sock *sk)
+{
+	u8 hdr_delta;
+	struct ip_options *opt;
+	struct inet_sock *sk_inet;
+
+	sk_inet = inet_sk(sk);
+	opt = sk_inet->opt;
+	if (opt == NULL || opt->cipso == 0)
+		return;
+
+	if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+		u8 cipso_len;
+		u8 cipso_off;
+		unsigned char *cipso_ptr;
+		int iter;
+		int optlen_new;
+
+		cipso_off = opt->cipso - sizeof(struct iphdr);
+		cipso_ptr = &opt->__data[cipso_off];
+		cipso_len = cipso_ptr[1];
+
+		if (opt->srr > opt->cipso)
+			opt->srr -= cipso_len;
+		if (opt->rr > opt->cipso)
+			opt->rr -= cipso_len;
+		if (opt->ts > opt->cipso)
+			opt->ts -= cipso_len;
+		if (opt->router_alert > opt->cipso)
+			opt->router_alert -= cipso_len;
+		opt->cipso = 0;
+
+		memmove(cipso_ptr, cipso_ptr + cipso_len,
+			opt->optlen - cipso_off - cipso_len);
+
+		/* determining the new total option length is tricky because of
+		 * the padding necessary, the only thing i can think to do at
+		 * this point is walk the options one-by-one, skipping the
+		 * padding at the end to determine the actual option size and
+		 * from there we can determine the new total option length */
+		iter = 0;
+		optlen_new = 0;
+		while (iter < opt->optlen)
+			if (opt->__data[iter] != IPOPT_NOP) {
+				iter += opt->__data[iter + 1];
+				optlen_new = iter;
+			} else
+				iter++;
+		hdr_delta = opt->optlen;
+		opt->optlen = (optlen_new + 3) & ~3;
+		hdr_delta -= opt->optlen;
+	} else {
+		/* only the cipso option was present on the socket so we can
+		 * remove the entire option struct */
+		sk_inet->opt = NULL;
+		hdr_delta = opt->optlen;
+		kfree(opt);
+	}
+
+	if (sk_inet->is_icsk && hdr_delta > 0) {
+		struct inet_connection_sock *sk_conn = inet_csk(sk);
+		sk_conn->icsk_ext_hdr_len -= hdr_delta;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+}
+
 /**
  * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
  * @cipso: the CIPSO v4 option
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index cc8047d1f50..78fc557689b 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -455,6 +455,20 @@ socket_setattr_return:
 	return ret_val;
 }
 
+/**
+ * netlbl_sock_delattr - Delete all the NetLabel labels on a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Remove all the NetLabel labeling from @sk.  The caller is responsible for
+ * ensuring that @sk is locked.
+ *
+ */
+void netlbl_sock_delattr(struct sock *sk)
+{
+	cipso_v4_sock_delattr(sk);
+}
+
 /**
  * netlbl_sock_getattr - Determine the security attributes of a sock
  * @sk: the sock
@@ -472,6 +486,68 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 	return cipso_v4_sock_getattr(sk, secattr);
 }
 
+/**
+ * netlbl_conn_setattr - Label a connected socket using the correct protocol
+ * @sk: the socket to label
+ * @addr: the destination address
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given connected socket using the security
+ * attributes specified in @secattr.  The caller is responsible for ensuring
+ * that @sk is locked.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_conn_setattr(struct sock *sk,
+			struct sockaddr *addr,
+			const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct sockaddr_in *addr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (addr->sa_family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       addr4->sin_addr.s_addr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto conn_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_sock_setattr(sk,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			cipso_v4_sock_delattr(sk);
+			ret_val = 0;
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+conn_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
 /**
  * netlbl_skbuff_setattr - Label a packet using the correct protocol
  * @skb: the packet
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7432bdd5d36..632ac3e80a6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3794,6 +3794,7 @@ out:
 
 static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen)
 {
+	struct sock *sk = sock->sk;
 	struct inode_security_struct *isec;
 	int err;
 
@@ -3807,7 +3808,6 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 	isec = SOCK_INODE(sock)->i_security;
 	if (isec->sclass == SECCLASS_TCP_SOCKET ||
 	    isec->sclass == SECCLASS_DCCP_SOCKET) {
-		struct sock *sk = sock->sk;
 		struct avc_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
@@ -3841,6 +3841,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 			goto out;
 	}
 
+	err = selinux_netlbl_socket_connect(sk, address);
+
 out:
 	return err;
 }
@@ -4290,8 +4292,6 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
 	    sk->sk_family == PF_UNIX)
 		isec->sid = sksec->sid;
 	sksec->sclass = isec->sclass;
-
-	selinux_netlbl_sock_graft(sk, parent);
 }
 
 static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
@@ -4342,8 +4342,7 @@ static void selinux_inet_csk_clone(struct sock *newsk,
 	selinux_netlbl_sk_security_reset(newsksec, req->rsk_ops->family);
 }
 
-static void selinux_inet_conn_established(struct sock *sk,
-				struct sk_buff *skb)
+static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 {
 	u16 family = sk->sk_family;
 	struct sk_security_struct *sksec = sk->sk_security;
@@ -4353,6 +4352,8 @@ static void selinux_inet_conn_established(struct sock *sk,
 		family = PF_INET;
 
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
+
+	selinux_netlbl_inet_conn_established(sk, family);
 }
 
 static void selinux_req_classify_flow(const struct request_sock *req,
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index b3e6ae071fc..982bac0ac32 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -52,7 +52,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 				 u16 family,
 				 u32 sid);
 
-void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
+void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family);
 int selinux_netlbl_socket_post_create(struct socket *sock);
 int selinux_netlbl_inode_permission(struct inode *inode, int mask);
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
@@ -62,6 +62,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 int selinux_netlbl_socket_setsockopt(struct socket *sock,
 				     int level,
 				     int optname);
+int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr);
+
 #else
 static inline void selinux_netlbl_cache_invalidate(void)
 {
@@ -98,8 +100,14 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 	return 0;
 }
 
-static inline void selinux_netlbl_sock_graft(struct sock *sk,
-					     struct socket *sock)
+static inline int selinux_netlbl_conn_setsid(struct sock *sk,
+					     struct sockaddr *addr)
+{
+	return 0;
+}
+
+static inline void selinux_netlbl_inet_conn_established(struct sock *sk,
+							u16 family)
 {
 	return;
 }
@@ -125,6 +133,11 @@ static inline int selinux_netlbl_socket_setsockopt(struct socket *sock,
 {
 	return 0;
 }
+static inline int selinux_netlbl_socket_connect(struct sock *sk,
+						struct sockaddr *addr)
+{
+	return 0;
+}
 #endif /* CONFIG_NETLABEL */
 
 #endif
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index f46dd1c3d01..ad34787c6c0 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -118,6 +118,7 @@ struct sk_security_struct {
 		NLBL_REQUIRE,
 		NLBL_LABELED,
 		NLBL_REQSKB,
+		NLBL_CONNLABELED,
 	} nlbl_state;
 #endif
 };
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 090404d6e51..b22b7dafa0e 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -29,10 +29,12 @@
 
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <net/sock.h>
 #include <net/netlabel.h>
-#include <net/inet_sock.h>
-#include <net/inet_connection_sock.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 
 #include "objsec.h"
 #include "security.h"
@@ -79,8 +81,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
-	struct inet_sock *sk_inet;
-	struct inet_connection_sock *sk_conn;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
@@ -96,20 +96,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 		sksec->nlbl_state = NLBL_LABELED;
 		break;
 	case -EDESTADDRREQ:
-		/* we are going to possibly end up labeling the individual
-		 * packets later which is problematic for stream sockets
-		 * because of the additional IP header size, our solution is to
-		 * allow for the maximum IP header length (40 bytes for IPv4,
-		 * we don't have to worry about IPv6 yet) just in case */
-		sk_inet = inet_sk(sk);
-		if (sk_inet->is_icsk) {
-			sk_conn = inet_csk(sk);
-			if (sk_inet->opt)
-				sk_conn->icsk_ext_hdr_len -=
-							   sk_inet->opt->optlen;
-			sk_conn->icsk_ext_hdr_len += 40;
-			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
-		}
 		sksec->nlbl_state = NLBL_REQSKB;
 		rc = 0;
 		break;
@@ -247,21 +233,77 @@ skbuff_setsid_return:
 }
 
 /**
- * selinux_netlbl_sock_graft - Netlabel the new socket
+ * selinux_netlbl_inet_conn_established - Netlabel the newly accepted connection
  * @sk: the new connection
- * @sock: the new socket
  *
  * Description:
- * The connection represented by @sk is being grafted onto @sock so set the
- * socket's NetLabel to match the SID of @sk.
+ * A new connection has been established on @sk so make sure it is labeled
+ * correctly with the NetLabel susbsystem.
  *
  */
-void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
+void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family)
 {
-	/* Try to set the NetLabel on the socket to save time later, if we fail
-	 * here we will pick up the pieces in later calls to
-	 * selinux_netlbl_inode_permission(). */
-	selinux_netlbl_sock_setsid(sk);
+	int rc;
+	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	struct inet_sock *sk_inet = inet_sk(sk);
+	struct sockaddr_in addr;
+
+	if (sksec->nlbl_state != NLBL_REQUIRE)
+		return;
+
+	netlbl_secattr_init(&secattr);
+	if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0)
+		goto inet_conn_established_return;
+
+	rc = netlbl_sock_setattr(sk, &secattr);
+	switch (rc) {
+	case 0:
+		sksec->nlbl_state = NLBL_LABELED;
+		break;
+	case -EDESTADDRREQ:
+		/* no PF_INET6 support yet because we don't support any IPv6
+		 * labeling protocols */
+		if (family != PF_INET) {
+			sksec->nlbl_state = NLBL_UNSET;
+			goto inet_conn_established_return;
+		}
+
+		addr.sin_family = family;
+		addr.sin_addr.s_addr = sk_inet->daddr;
+		if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr,
+					&secattr) != 0) {
+			/* we failed to label the connected socket (could be
+			 * for a variety of reasons, the actual "why" isn't
+			 * important here) so we have to go to our backup plan,
+			 * labeling the packets individually in the netfilter
+			 * local output hook.  this is okay but we need to
+			 * adjust the MSS of the connection to take into
+			 * account any labeling overhead, since we don't know
+			 * the exact overhead at this point we'll use the worst
+			 * case value which is 40 bytes for IPv4 */
+			struct inet_connection_sock *sk_conn = inet_csk(sk);
+			sk_conn->icsk_ext_hdr_len += 40 -
+				      (sk_inet->opt ? sk_inet->opt->optlen : 0);
+			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+
+			sksec->nlbl_state = NLBL_REQSKB;
+		} else
+			sksec->nlbl_state = NLBL_CONNLABELED;
+		break;
+	default:
+		/* note that we are failing to label the socket which could be
+		 * a bad thing since it means traffic could leave the system
+		 * without the desired labeling, however, all is not lost as
+		 * we have a check in selinux_netlbl_inode_permission() to
+		 * pick up the pieces that we might drop here because we can't
+		 * return an error code */
+		break;
+	}
+
+inet_conn_established_return:
+	netlbl_secattr_destroy(&secattr);
+	return;
 }
 
 /**
@@ -398,7 +440,8 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock,
 	struct netlbl_lsm_secattr secattr;
 
 	if (level == IPPROTO_IP && optname == IP_OPTIONS &&
-	    sksec->nlbl_state == NLBL_LABELED) {
+	    (sksec->nlbl_state == NLBL_LABELED ||
+	     sksec->nlbl_state == NLBL_CONNLABELED)) {
 		netlbl_secattr_init(&secattr);
 		lock_sock(sk);
 		rc = netlbl_sock_getattr(sk, &secattr);
@@ -410,3 +453,51 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock,
 
 	return rc;
 }
+
+/**
+ * selinux_netlbl_socket_connect - Label a client-side socket on connect
+ * @sk: the socket to label
+ * @addr: the destination address
+ *
+ * Description:
+ * Attempt to label a connected socket with NetLabel using the given address.
+ * Returns zero values on success, negative values on failure.
+ *
+ */
+int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
+{
+	int rc;
+	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+
+	if (sksec->nlbl_state != NLBL_REQSKB &&
+	    sksec->nlbl_state != NLBL_CONNLABELED)
+		return 0;
+
+	netlbl_secattr_init(&secattr);
+	local_bh_disable();
+	bh_lock_sock_nested(sk);
+
+	/* connected sockets are allowed to disconnect when the address family
+	 * is set to AF_UNSPEC, if that is what is happening we want to reset
+	 * the socket */
+	if (addr->sa_family == AF_UNSPEC) {
+		netlbl_sock_delattr(sk);
+		sksec->nlbl_state = NLBL_REQSKB;
+		rc = 0;
+		goto socket_connect_return;
+	}
+	rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr);
+	if (rc != 0)
+		goto socket_connect_return;
+	rc = netlbl_conn_setattr(sk, addr, &secattr);
+	if (rc != 0)
+		goto socket_connect_return;
+	sksec->nlbl_state = NLBL_CONNLABELED;
+
+socket_connect_return:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	netlbl_secattr_destroy(&secattr);
+	return rc;
+}
-- 
cgit v1.2.3-70-g09d2


From 15c45f7b2e81655f6eb500ec949c8bd70a04325a Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:34 -0400
Subject: cipso: Add support for native local labeling and fixup mapping names

This patch accomplishes three minor tasks: add a new tag type for local
labeling, rename the CIPSO_V4_MAP_STD define to CIPSO_V4_MAP_TRANS and
replace some of the CIPSO "magic numbers" with constants from the header
file.  The first change allows CIPSO to support full LSM labels/contexts,
not just MLS attributes.  The second change brings the mapping names inline
with what userspace is using, compatibility is preserved since we don't
actually change the value.  The last change is to aid readability and help
prevent mistakes.

Signed-off-by: Paul Moore <paul.moore@hp.com>
---
 include/net/cipso_ipv4.h         |  13 ++--
 net/ipv4/cipso_ipv4.c            | 127 +++++++++++++++++++++++++++++++++------
 net/ipv4/ip_options.c            |   2 +-
 net/netlabel/netlabel_cipso_v4.c |  14 ++---
 net/netlabel/netlabel_cipso_v4.h |   4 +-
 net/netlabel/netlabel_kapi.c     |   4 +-
 6 files changed, 128 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 811febf97ca..9909774eb99 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -45,7 +45,7 @@
 /* known doi values */
 #define CIPSO_V4_DOI_UNKNOWN          0x00000000
 
-/* tag types */
+/* standard tag types */
 #define CIPSO_V4_TAG_INVALID          0
 #define CIPSO_V4_TAG_RBITMAP          1
 #define CIPSO_V4_TAG_ENUM             2
@@ -53,10 +53,14 @@
 #define CIPSO_V4_TAG_PBITMAP          6
 #define CIPSO_V4_TAG_FREEFORM         7
 
+/* non-standard tag types (tags > 127) */
+#define CIPSO_V4_TAG_LOCAL            128
+
 /* doi mapping types */
 #define CIPSO_V4_MAP_UNKNOWN          0
-#define CIPSO_V4_MAP_STD              1
+#define CIPSO_V4_MAP_TRANS            1
 #define CIPSO_V4_MAP_PASS             2
+#define CIPSO_V4_MAP_LOCAL            3
 
 /* limits */
 #define CIPSO_V4_MAX_REM_LVLS         255
@@ -215,7 +219,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
 int cipso_v4_skbuff_delattr(struct sk_buff *skb);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 			    struct netlbl_lsm_secattr *secattr);
-int cipso_v4_validate(unsigned char **option);
+int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option);
 #else
 static inline void cipso_v4_error(struct sk_buff *skb,
 				  int error,
@@ -259,7 +263,8 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 	return -ENOSYS;
 }
 
-static inline int cipso_v4_validate(unsigned char **option)
+static inline int cipso_v4_validate(const struct sk_buff *skb,
+				    unsigned char **option)
 {
 	return -ENOSYS;
 }
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 23768b9d6b6..490e035c6d9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -109,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1;
  * be omitted. */
 #define CIPSO_V4_TAG_RNG_CAT_MAX      8
 
+/* Base length of the local tag (non-standard tag).
+ *  Tag definition (may change between kernel versions)
+ *
+ * 0          8          16         24         32
+ * +----------+----------+----------+----------+
+ * | 10000000 | 00000110 | 32-bit secid value  |
+ * +----------+----------+----------+----------+
+ * | in (host byte order)|
+ * +----------+----------+
+ *
+ */
+#define CIPSO_V4_TAG_LOC_BLEN         6
+
 /*
  * Helper Functions
  */
@@ -467,6 +480,10 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 			if (doi_def->type != CIPSO_V4_MAP_PASS)
 				return -EINVAL;
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			if (doi_def->type != CIPSO_V4_MAP_LOCAL)
+				return -EINVAL;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -502,7 +519,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
 		return;
 
 	switch (doi_def->type) {
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		kfree(doi_def->map.std->lvl.cipso);
 		kfree(doi_def->map.std->lvl.local);
 		kfree(doi_def->map.std->cat.cipso);
@@ -673,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
 	switch (doi_def->type) {
 	case CIPSO_V4_MAP_PASS:
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
 			return 0;
 		break;
@@ -702,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
 	case CIPSO_V4_MAP_PASS:
 		*net_lvl = host_lvl;
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		if (host_lvl < doi_def->map.std->lvl.local_size &&
 		    doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) {
 			*net_lvl = doi_def->map.std->lvl.local[host_lvl];
@@ -736,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
 	case CIPSO_V4_MAP_PASS:
 		*host_lvl = net_lvl;
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		map_tbl = doi_def->map.std;
 		if (net_lvl < map_tbl->lvl.cipso_size &&
 		    map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
@@ -773,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
 	switch (doi_def->type) {
 	case CIPSO_V4_MAP_PASS:
 		return 0;
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		cipso_cat_size = doi_def->map.std->cat.cipso_size;
 		cipso_array = doi_def->map.std->cat.cipso;
 		for (;;) {
@@ -821,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
 	u32 host_cat_size = 0;
 	u32 *host_cat_array = NULL;
 
-	if (doi_def->type == CIPSO_V4_MAP_STD) {
+	if (doi_def->type == CIPSO_V4_MAP_TRANS) {
 		host_cat_size = doi_def->map.std->cat.local_size;
 		host_cat_array = doi_def->map.std->cat.local;
 	}
@@ -836,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
 		case CIPSO_V4_MAP_PASS:
 			net_spot = host_spot;
 			break;
-		case CIPSO_V4_MAP_STD:
+		case CIPSO_V4_MAP_TRANS:
 			if (host_spot >= host_cat_size)
 				return -EPERM;
 			net_spot = host_cat_array[host_spot];
@@ -882,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
 	u32 net_cat_size = 0;
 	u32 *net_cat_array = NULL;
 
-	if (doi_def->type == CIPSO_V4_MAP_STD) {
+	if (doi_def->type == CIPSO_V4_MAP_TRANS) {
 		net_cat_size = doi_def->map.std->cat.cipso_size;
 		net_cat_array = doi_def->map.std->cat.cipso;
 	}
@@ -902,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
 		case CIPSO_V4_MAP_PASS:
 			host_spot = net_spot;
 			break;
-		case CIPSO_V4_MAP_STD:
+		case CIPSO_V4_MAP_TRANS:
 			if (net_spot >= net_cat_size)
 				return -EPERM;
 			host_spot = net_cat_array[net_spot];
@@ -1238,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x01;
+	buffer[0] = CIPSO_V4_TAG_RBITMAP;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1334,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x02;
+	buffer[0] = CIPSO_V4_TAG_ENUM;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1430,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
 	} else
 		tag_len = 4;
 
-	buffer[0] = 0x05;
+	buffer[0] = CIPSO_V4_TAG_RANGE;
 	buffer[1] = tag_len;
 	buffer[3] = level;
 
@@ -1483,6 +1500,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
 	return 0;
 }
 
+/**
+ * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the local tag.  Returns the size of the tag
+ * on success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def,
+			       const struct netlbl_lsm_secattr *secattr,
+			       unsigned char *buffer,
+			       u32 buffer_len)
+{
+	if (!(secattr->flags & NETLBL_SECATTR_SECID))
+		return -EPERM;
+
+	buffer[0] = CIPSO_V4_TAG_LOCAL;
+	buffer[1] = CIPSO_V4_TAG_LOC_BLEN;
+	*(u32 *)&buffer[2] = secattr->attr.secid;
+
+	return CIPSO_V4_TAG_LOC_BLEN;
+}
+
+/**
+ * cipso_v4_parsetag_loc - Parse a CIPSO local tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO local tag and return the security attributes in @secattr.
+ * Return zero on success, negatives values on failure.
+ *
+ */
+static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
+				 const unsigned char *tag,
+				 struct netlbl_lsm_secattr *secattr)
+{
+	secattr->attr.secid = *(u32 *)&tag[2];
+	secattr->flags |= NETLBL_SECATTR_SECID;
+
+	return 0;
+}
+
 /**
  * cipso_v4_validate - Validate a CIPSO option
  * @option: the start of the option, on error it is set to point to the error
@@ -1502,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
  *   that is unrecognized."
  *
  */
-int cipso_v4_validate(unsigned char **option)
+int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
 {
 	unsigned char *opt = *option;
 	unsigned char *tag;
@@ -1527,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option)
 		goto validate_return_locked;
 	}
 
-	opt_iter = 6;
+	opt_iter = CIPSO_V4_HDR_LEN;
 	tag = opt + opt_iter;
 	while (opt_iter < opt_len) {
 		for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
@@ -1545,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option)
 
 		switch (tag[0]) {
 		case CIPSO_V4_TAG_RBITMAP:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_RBM_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1563,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option)
 					err_offset = opt_iter + 3;
 					goto validate_return_locked;
 				}
-				if (tag_len > 4 &&
+				if (tag_len > CIPSO_V4_TAG_RBM_BLEN &&
 				    cipso_v4_map_cat_rbm_valid(doi_def,
 							    &tag[4],
 							    tag_len - 4) < 0) {
@@ -1573,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option)
 			}
 			break;
 		case CIPSO_V4_TAG_ENUM:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1583,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option)
 				err_offset = opt_iter + 3;
 				goto validate_return_locked;
 			}
-			if (tag_len > 4 &&
+			if (tag_len > CIPSO_V4_TAG_ENUM_BLEN &&
 			    cipso_v4_map_cat_enum_valid(doi_def,
 							&tag[4],
 							tag_len - 4) < 0) {
@@ -1592,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option)
 			}
 			break;
 		case CIPSO_V4_TAG_RANGE:
-			if (tag_len < 4) {
+			if (tag_len < CIPSO_V4_TAG_RNG_BLEN) {
 				err_offset = opt_iter + 1;
 				goto validate_return_locked;
 			}
@@ -1602,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option)
 				err_offset = opt_iter + 3;
 				goto validate_return_locked;
 			}
-			if (tag_len > 4 &&
+			if (tag_len > CIPSO_V4_TAG_RNG_BLEN &&
 			    cipso_v4_map_cat_rng_valid(doi_def,
 						       &tag[4],
 						       tag_len - 4) < 0) {
@@ -1610,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option)
 				goto validate_return_locked;
 			}
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			/* This is a non-standard tag that we only allow for
+			 * local connections, so if the incoming interface is
+			 * not the loopback device drop the packet. */
+			if (!(skb->dev->flags & IFF_LOOPBACK)) {
+				err_offset = opt_iter;
+				goto validate_return_locked;
+			}
+			if (tag_len != CIPSO_V4_TAG_LOC_BLEN) {
+				err_offset = opt_iter + 1;
+				goto validate_return_locked;
+			}
+			break;
 		default:
 			err_offset = opt_iter;
 			goto validate_return_locked;
@@ -1712,6 +1790,12 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
 						   &buf[CIPSO_V4_HDR_LEN],
 						   buf_len - CIPSO_V4_HDR_LEN);
 			break;
+		case CIPSO_V4_TAG_LOCAL:
+			ret_val = cipso_v4_gentag_loc(doi_def,
+						   secattr,
+						   &buf[CIPSO_V4_HDR_LEN],
+						   buf_len - CIPSO_V4_HDR_LEN);
+			break;
 		default:
 			return -EPERM;
 		}
@@ -1921,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso,
 	case CIPSO_V4_TAG_RANGE:
 		ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
 		break;
+	case CIPSO_V4_TAG_LOCAL:
+		ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr);
+		break;
 	}
 	if (ret_val == 0)
 		secattr->type = NETLBL_NLTYPE_CIPSOV4;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be3f18a7a40..2c88da6e786 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -438,7 +438,7 @@ int ip_options_compile(struct net *net,
 				goto error;
 			}
 			opt->cipso = optptr - iph;
-			if (cipso_v4_validate(&optptr)) {
+			if (cipso_v4_validate(skb, &optptr)) {
 				pp_ptr = optptr;
 				goto error;
 			}
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 5c4f60bbc82..db83a67cbc7 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -132,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
  * @info: the Generic NETLINK info block
  *
  * Description:
- * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
- * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
- * error.
+ * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine.  Return zero on success and
+ * non-zero on error.
  *
  */
 static int netlbl_cipsov4_add_std(struct genl_info *info)
@@ -164,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 		ret_val = -ENOMEM;
 		goto add_std_failure;
 	}
-	doi_def->type = CIPSO_V4_MAP_STD;
+	doi_def->type = CIPSO_V4_MAP_TRANS;
 
 	ret_val = netlbl_cipsov4_add_common(info, doi_def);
 	if (ret_val != 0)
@@ -393,8 +393,8 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 
 	type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
 	switch (type) {
-	case CIPSO_V4_MAP_STD:
-		type_str = "std";
+	case CIPSO_V4_MAP_TRANS:
+		type_str = "trans";
 		ret_val = netlbl_cipsov4_add_std(info);
 		break;
 	case CIPSO_V4_MAP_PASS:
@@ -497,7 +497,7 @@ list_start:
 	nla_nest_end(ans_skb, nla_a);
 
 	switch (doi_def->type) {
-	case CIPSO_V4_MAP_STD:
+	case CIPSO_V4_MAP_TRANS:
 		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
 		if (nla_a == NULL) {
 			ret_val = -ENOMEM;
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 220cb9d06b4..fb3957f1d69 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -45,7 +45,7 @@
  *     NLBL_CIPSOV4_A_MTYPE
  *     NLBL_CIPSOV4_A_TAGLST
  *
- *   If using CIPSO_V4_MAP_STD the following attributes are required:
+ *   If using CIPSO_V4_MAP_TRANS the following attributes are required:
  *
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
@@ -76,7 +76,7 @@
  *     NLBL_CIPSOV4_A_MTYPE
  *     NLBL_CIPSOV4_A_TAGLST
  *
- *   If using CIPSO_V4_MAP_STD the following attributes are required:
+ *   If using CIPSO_V4_MAP_TRANS the following attributes are required:
  *
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 78fc557689b..8435b15c3f7 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -157,8 +157,8 @@ cfg_cipsov4_add_map_return:
 					      audit_info);
 	if (audit_buf != NULL) {
 		switch (doi_type) {
-		case CIPSO_V4_MAP_STD:
-			type_str = "std";
+		case CIPSO_V4_MAP_TRANS:
+			type_str = "trans";
 			break;
 		case CIPSO_V4_MAP_PASS:
 			type_str = "pass";
-- 
cgit v1.2.3-70-g09d2


From d91d40799165b0c84c97e7c71fb8039494ff07dc Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:34 -0400
Subject: netlabel: Add configuration support for local labeling

Add the necessary NetLabel support for the new CIPSO mapping,
CIPSO_V4_MAP_LOCAL, which allows full LSM label/context support.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h           |  3 ++-
 net/netlabel/netlabel_cipso_v4.c | 41 ++++++++++++++++++++++++++++++++++++++++
 net/netlabel/netlabel_cipso_v4.h |  6 ++++--
 net/netlabel/netlabel_kapi.c     |  3 +++
 4 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index d56517ac3ba..17c442a4514 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -72,7 +72,8 @@ struct cipso_v4_doi;
 /* NetLabel NETLINK protocol version
  *  1: initial version
  *  2: added static labels for unlabeled connections
- *  3: network selectors added to the NetLabel/LSM domain mapping
+ *  3: network selectors added to the NetLabel/LSM domain mapping and the
+ *     CIPSO_V4_MAP_LOCAL CIPSO mapping was added
  */
 #define NETLBL_PROTO_VERSION            3
 
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index db83a67cbc7..fff32b70efa 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -364,6 +364,43 @@ add_pass_failure:
 	return ret_val;
 }
 
+/**
+ * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD
+ * message and add it to the CIPSO V4 engine.  Return zero on success and
+ * non-zero on error.
+ *
+ */
+static int netlbl_cipsov4_add_local(struct genl_info *info)
+{
+	int ret_val;
+	struct cipso_v4_doi *doi_def = NULL;
+
+	if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
+		return -EINVAL;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL)
+		return -ENOMEM;
+	doi_def->type = CIPSO_V4_MAP_LOCAL;
+
+	ret_val = netlbl_cipsov4_add_common(info, doi_def);
+	if (ret_val != 0)
+		goto add_local_failure;
+
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_local_failure;
+	return 0;
+
+add_local_failure:
+	cipso_v4_doi_free(doi_def);
+	return ret_val;
+}
+
 /**
  * netlbl_cipsov4_add - Handle an ADD message
  * @skb: the NETLINK buffer
@@ -401,6 +438,10 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 		type_str = "pass";
 		ret_val = netlbl_cipsov4_add_pass(info);
 		break;
+	case CIPSO_V4_MAP_LOCAL:
+		type_str = "local";
+		ret_val = netlbl_cipsov4_add_local(info);
+		break;
 	}
 	if (ret_val == 0)
 		atomic_inc(&netlabel_mgmt_protocount);
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index fb3957f1d69..c8a4079261f 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -50,7 +50,8 @@
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
  *
- *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *   If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ *   are required.
  *
  * o REMOVE:
  *   Sent by an application to remove a specific DOI mapping table from the
@@ -81,7 +82,8 @@
  *     NLBL_CIPSOV4_A_MLSLVLLST
  *     NLBL_CIPSOV4_A_MLSCATLST
  *
- *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
+ *   If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
+ *   are required.
  *
  * o LISTALL:
  *   This message is sent by an application to list the valid DOIs on the
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 8435b15c3f7..b32eceb3ab0 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -163,6 +163,9 @@ cfg_cipsov4_add_map_return:
 		case CIPSO_V4_MAP_PASS:
 			type_str = "pass";
 			break;
+		case CIPSO_V4_MAP_LOCAL:
+			type_str = "local";
+			break;
 		default:
 			type_str = "(unknown)";
 		}
-- 
cgit v1.2.3-70-g09d2


From ba9e64b1c23f1dd22fea14c310f739d84ac8b748 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 10 Oct 2008 12:10:30 -0700
Subject: gre: fix copy and paste error

The flags are dumped twice, the keys not at all.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0d5e35b0ed5..c0755e98b81 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1539,8 +1539,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
 	NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
 	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
-	NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
-	NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
+	NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
+	NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
 	NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
 	NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
 	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
-- 
cgit v1.2.3-70-g09d2


From 4d74f8ba1fb152ae07eb858abb713e094e77b7d5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 10 Oct 2008 12:11:06 -0700
Subject: gre: minor cleanups in netlink interface

- use typeful helpers for IFLA_GRE_LOCAL/IFLA_GRE_REMOTE
- replace magic value by FIELD_SIZEOF
- use MODULE_ALIAS_RTNL_LINK macro

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c0755e98b81..05ebce2881e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1368,10 +1368,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
 
 	if (data[IFLA_GRE_LOCAL])
-		memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
+		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
 
 	if (data[IFLA_GRE_REMOTE])
-		memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
+		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
 
 	if (data[IFLA_GRE_TTL])
 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
@@ -1541,8 +1541,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
 	NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
 	NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
-	NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
-	NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
+	NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
+	NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
 	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
 	NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
 	NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
@@ -1559,8 +1559,8 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
-	[IFLA_GRE_LOCAL]	= { .len = 4 },
-	[IFLA_GRE_REMOTE]	= { .len = 4 },
+	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
@@ -1643,5 +1643,5 @@ static void __exit ipgre_fini(void)
 module_init(ipgre_init);
 module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("rtnl-link-gre");
-MODULE_ALIAS("rtnl-link-gretap");
+MODULE_ALIAS_RTNL_LINK("gre");
+MODULE_ALIAS_RTNL_LINK("gretap");
-- 
cgit v1.2.3-70-g09d2


From 1839faab9a2747bcd30ee14e50575a39bf6735d4 Mon Sep 17 00:00:00 2001
From: Tobias Brunner <tobias.brunner@strongswan.org>
Date: Fri, 10 Oct 2008 14:07:03 -0700
Subject: af_key: fix SADB_X_SPDDELETE response

When deleting an SPD entry using SADB_X_SPDDELETE, c.data.byid is not
initialized to zero in pfkey_spddelete(). Thus, key_notify_policy()
responds with a PF_KEY message of type SADB_X_SPDDELETE2 instead of
SADB_X_SPDDELETE.

Signed-off-by: Tobias Brunner <tobias.brunner@strongswan.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 362fe317e1f..e55e0441e4d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2341,6 +2341,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
+	c.data.byid = 0;
 	c.event = XFRM_MSG_DELPOLICY;
 	km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
 
-- 
cgit v1.2.3-70-g09d2


From f901b64472fdabc72eca2b9426fa4e96972b64c4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 11 Oct 2008 12:18:04 -0700
Subject: ipvs: Add proper dependencies on IP_VS, and fix description header
 line.

Linus noted a build failure case:

net/netfilter/ipvs/ip_vs_xmit.c: In function 'ip_vs_tunnel_xmit':
net/netfilter/ipvs/ip_vs_xmit.c:616: error: implicit declaration of function 'ip_select_ident'

The proper include file (net/ip.h) is being included in ip_vs_xmit.c to get
that declaration.  So the only possible case where this can happen is if
CONFIG_INET is not enabled.

This seems to be purely a missing dependency in the ipvs/Kconfig file IP_VS
entry.

Also, while we're here, remove the out of date "EXPERIMENTAL" string in the
IP_VS config help header line.  IP_VS no longer depends upon CONFIG_EXPERIMENTAL

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index de6004de80b..05048e40326 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -2,8 +2,8 @@
 # IP Virtual Server configuration
 #
 menuconfig IP_VS
-	tristate "IP virtual server support (EXPERIMENTAL)"
-	depends on NETFILTER
+	tristate "IP virtual server support"
+	depends on NET && INET && NETFILTER
 	---help---
 	  IP Virtual Server support will let you build a high-performance
 	  virtual server based on cluster of two or more real servers. This
-- 
cgit v1.2.3-70-g09d2


From 7bb82d924536cfa62db73dd381b07d9e9b084ffa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 11 Oct 2008 12:20:15 -0700
Subject: gre: Initialise rtnl_link tunnel parameters properly

Brown paper bag error of calling memset with sizeof(p) instead
of sizeof(*p).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 05ebce2881e..85c487b8572 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1345,7 +1345,7 @@ out:
 static void ipgre_netlink_parms(struct nlattr *data[],
 				struct ip_tunnel_parm *parms)
 {
-	memset(parms, 0, sizeof(parms));
+	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.protocol = IPPROTO_GRE;
 
-- 
cgit v1.2.3-70-g09d2


From ff71268aa4e9d961643c5e0ea5e14a3dd6d27f28 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sun, 12 Oct 2008 21:03:38 -0700
Subject: wireless: remove duplicated #include

Removed duplicated include <linux/list.h> in
net/wireless/core.c.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/core.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5cadbeb76a1..24fdd4cd22c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -13,7 +13,6 @@
 #include <linux/debugfs.h>
 #include <linux/notifier.h>
 #include <linux/device.h>
-#include <linux/list.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include <net/wireless.h>
-- 
cgit v1.2.3-70-g09d2


From ab396eb03f33a2e2afb7b0603a43929bf5857c45 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dm@chelsio.com>
Date: Sun, 12 Oct 2008 21:07:34 -0700
Subject: net: Fix off-by-one in skb_dma_map

The unwind loop iterates down to -1 instead of stopping at 0 and ends up
accessing ->frags[-1].

Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skb_dma_map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 1f49afcd8e8..86234923a3b 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -35,7 +35,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
 	return 0;
 
 unwind:
-	while (i-- >= 0) {
+	while (--i >= 0) {
 		skb_frag_t *fp = &sp->frags[i];
 
 		dma_unmap_page(dev, sp->dma_maps[i + 1],
-- 
cgit v1.2.3-70-g09d2


From 14717f811b6662ca77bf39c07f5589c3b084f942 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sun, 12 Oct 2008 21:08:34 -0700
Subject: netfilter: remove unused #include <version.h>

The file(s) below do not use LINUX_VERSION_CODE nor KERNEL_VERSION.
  net/netfilter/nf_tproxy_core.c

This patch removes the said #include <version.h>.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_tproxy_core.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index fe34f4bf74c..cdc97f3105a 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -10,7 +10,6 @@
  *
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 
 #include <linux/net.h>
-- 
cgit v1.2.3-70-g09d2


From bf94e17bc8d35fc339945a42990a2f2b5e9b5a40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 12 Oct 2008 23:51:38 -0700
Subject: net/mac80211/rx.c: fix build error

older versions of gcc do not recognize that ieee80211_rx_h_mesh_fwding()
is unused when CONFIG_MAC80211_MESH is disabled:

  net/built-in.o: In function `ieee80211_rx_h_mesh_fwding':
  rx.c:(.text+0xd89af): undefined reference to `mpp_path_lookup'
  rx.c:(.text+0xd89c6): undefined reference to `mpp_path_add'

as this code construct:

        if (ieee80211_vif_is_mesh(&sdata->vif))
                CALL_RXH(ieee80211_rx_h_mesh_fwding);

still causes ieee80211_rx_h_mesh_fwding() to be linked in.

Protect these places with an #ifdef.

commit b0dee578 ("Fix modpost failure when rx handlers are not inlined.")
solved part of this problem - this patch is still needed.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/rx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 77e7b014872..cf6b121e1bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1379,6 +1379,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+#ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 {
@@ -1453,7 +1454,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	else
 		return RX_DROP_MONITOR;
 }
-
+#endif
 
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
@@ -1780,8 +1781,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
 	/* must be after MMIC verify so header is counted in MPDU mic */
 	CALL_RXH(ieee80211_rx_h_remove_qos_control)
 	CALL_RXH(ieee80211_rx_h_amsdu)
+#ifdef CONFIG_MAC80211_MESH
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		CALL_RXH(ieee80211_rx_h_mesh_fwding);
+#endif
 	CALL_RXH(ieee80211_rx_h_data)
 	CALL_RXH(ieee80211_rx_h_ctrl)
 	CALL_RXH(ieee80211_rx_h_action)
-- 
cgit v1.2.3-70-g09d2


From a447c0932445f92ce6f4c1bd020f62c5097a7842 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 13 Oct 2008 10:46:57 +0100
Subject: vfs: Use const for kernel parser table

This is a much better version of a previous patch to make the parser
tables constant. Rather than changing the typedef, we put the "const" in
all the various places where its required, allowing the __initconst
exception for nfsroot which was the cause of the previous trouble.

This was posted for review some time ago and I believe its been in -mm
since then.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Alexander Viro <aviro@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 arch/s390/hypfs/inode.c                   | 2 +-
 drivers/infiniband/ulp/srp/ib_srp.c       | 2 +-
 drivers/usb/core/inode.c                  | 2 +-
 fs/9p/v9fs.c                              | 2 +-
 fs/adfs/super.c                           | 2 +-
 fs/affs/super.c                           | 2 +-
 fs/afs/super.c                            | 2 +-
 fs/autofs/inode.c                         | 2 +-
 fs/autofs4/inode.c                        | 2 +-
 fs/befs/linuxvfs.c                        | 2 +-
 fs/devpts/inode.c                         | 2 +-
 fs/ecryptfs/main.c                        | 2 +-
 fs/ext2/super.c                           | 2 +-
 fs/ext3/super.c                           | 2 +-
 fs/ext4/super.c                           | 2 +-
 fs/fat/inode.c                            | 6 +++---
 fs/fuse/inode.c                           | 2 +-
 fs/gfs2/mount.c                           | 2 +-
 fs/hfs/super.c                            | 2 +-
 fs/hfsplus/options.c                      | 2 +-
 fs/hpfs/super.c                           | 2 +-
 fs/hugetlbfs/inode.c                      | 2 +-
 fs/isofs/inode.c                          | 2 +-
 fs/jfs/super.c                            | 2 +-
 fs/nfs/nfsroot.c                          | 2 +-
 fs/nfs/super.c                            | 6 +++---
 fs/ocfs2/super.c                          | 2 +-
 fs/omfs/inode.c                           | 2 +-
 fs/ubifs/super.c                          | 2 +-
 fs/udf/super.c                            | 2 +-
 fs/ufs/super.c                            | 4 ++--
 fs/xfs/linux-2.6/xfs_super.c              | 2 +-
 include/linux/parser.h                    | 2 +-
 lib/parser.c                              | 2 +-
 net/9p/client.c                           | 2 +-
 net/9p/trans_fd.c                         | 2 +-
 security/selinux/hooks.c                  | 2 +-
 38 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 690ca7b0dcf..2c8b8091250 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -659,7 +659,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
 };
 
-static match_table_t spufs_tokens = {
+static const match_table_t spufs_tokens = {
 	{ Opt_uid,   "uid=%d" },
 	{ Opt_gid,   "gid=%d" },
 	{ Opt_mode,  "mode=%o" },
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 7383781f3e6..36313801cd5 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -219,7 +219,7 @@ static int hypfs_release(struct inode *inode, struct file *filp)
 
 enum { opt_uid, opt_gid, opt_err };
 
-static match_table_t hypfs_tokens = {
+static const match_table_t hypfs_tokens = {
 	{opt_uid, "uid=%u"},
 	{opt_gid, "gid=%u"},
 	{opt_err, NULL}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index ed7c5f72cb8..5b8b533f290 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1683,7 +1683,7 @@ enum {
 				   SRP_OPT_SERVICE_ID),
 };
 
-static match_table_t srp_opt_tokens = {
+static const match_table_t srp_opt_tokens = {
 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
 	{ SRP_OPT_DGID,			"dgid=%s" 		},
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index db410e92c80..77fa7a08080 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -97,7 +97,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_devuid, "devuid=%u"},
 	{Opt_devgid, "devgid=%u"},
 	{Opt_devmode, "devmode=%o"},
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 047c791427a..c061c3f18e7 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -55,7 +55,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_debug, "debug=%x"},
 	{Opt_dfltuid, "dfltuid=%u"},
 	{Opt_dfltgid, "dfltgid=%u"},
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 26f3b43726b..7f83a46f2b7 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
 
 enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err};
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_ownmask, "ownmask=%o"},
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3a89094f93d..8989c93193e 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -135,7 +135,7 @@ enum {
 	Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bs, "bs=%u"},
 	{Opt_mode, "mode=%o"},
 	{Opt_mufs, "mufs"},
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 250d8c4d66e..aee239a048c 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -64,7 +64,7 @@ enum {
 	afs_opt_vol,
 };
 
-static match_table_t afs_options_list = {
+static const match_table_t afs_options_list = {
 	{ afs_opt_cell,		"cell=%s"	},
 	{ afs_opt_rwpath,	"rwpath"	},
 	{ afs_opt_vol,		"vol=%s"	},
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index dda510d31f8..b70eea1e8c5 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = {
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
 
-static match_table_t autofs_tokens = {
+static const match_table_t autofs_tokens = {
 	{Opt_fd, "fd=%u"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 7bb3e5ba053..45d55819203 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -213,7 +213,7 @@ static const struct super_operations autofs4_sops = {
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
 	Opt_indirect, Opt_direct, Opt_offset};
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 740f53672a8..9286b2af893 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -650,7 +650,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
 };
 
-static match_table_t befs_tokens = {
+static const match_table_t befs_tokens = {
 	{Opt_uid, "uid=%d"},
 	{Opt_gid, "gid=%d"},
 	{Opt_charset, "iocharset=%s"},
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index a70d5d0890c..4a714f6c1be 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -49,7 +49,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_mode, "mode=%o"},
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 448dfd597b5..8ebe9a5d1d9 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -211,7 +211,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
        ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
        ecryptfs_opt_encrypted_view, ecryptfs_opt_err };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ecryptfs_opt_sig, "sig=%s"},
 	{ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"},
 	{ecryptfs_opt_cipher, "cipher=%s"},
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index fd88c7b43e6..647cd888ac8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -393,7 +393,7 @@ enum {
 	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f38a5afc39a..399a96a6c55 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -760,7 +760,7 @@ enum {
 	Opt_grpquota
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb940c22ab0..dea8f13c2fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,7 @@ enum {
 	Opt_inode_readahead_blks
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 80ff3381fa2..d12cdf2a040 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -855,7 +855,7 @@ enum {
 	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 
-static match_table_t fat_tokens = {
+static const match_table_t fat_tokens = {
 	{Opt_check_r, "check=relaxed"},
 	{Opt_check_s, "check=strict"},
 	{Opt_check_n, "check=normal"},
@@ -890,14 +890,14 @@ static match_table_t fat_tokens = {
 	{Opt_tz_utc, "tz=UTC"},
 	{Opt_err, NULL},
 };
-static match_table_t msdos_tokens = {
+static const match_table_t msdos_tokens = {
 	{Opt_nodots, "nodots"},
 	{Opt_nodots, "dotsOK=no"},
 	{Opt_dots, "dots"},
 	{Opt_dots, "dotsOK=yes"},
 	{Opt_err, NULL}
 };
-static match_table_t vfat_tokens = {
+static const match_table_t vfat_tokens = {
 	{Opt_charset, "iocharset=%s"},
 	{Opt_shortname_lower, "shortname=lower"},
 	{Opt_shortname_win95, "shortname=win95"},
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d2249f174e2..6a84388cacf 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -354,7 +354,7 @@ enum {
 	OPT_ERR
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{OPT_FD,			"fd=%u"},
 	{OPT_ROOTMODE,			"rootmode=%o"},
 	{OPT_USER_ID,			"user_id=%u"},
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index df48333e6f0..f96eb90a2cf 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -46,7 +46,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_lockproto, "lockproto=%s"},
 	{Opt_locktable, "locktable=%s"},
 	{Opt_hostdata, "hostdata=%s"},
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4abb1047c68..3c7c7637719 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -173,7 +173,7 @@ enum {
 	opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ opt_uid, "uid=%u" },
 	{ opt_gid, "gid=%u" },
 	{ opt_umask, "umask=%o" },
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 9997cbf8beb..9699c56d323 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -25,7 +25,7 @@ enum {
 	opt_force, opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ opt_creator, "creator=%s" },
 	{ opt_type, "type=%s" },
 	{ opt_umask, "umask=%o" },
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index b8ae9c90ada..29ad461d568 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -215,7 +215,7 @@ enum {
 	Opt_timeshift, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_help, "help"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3f58923fb39..61edc701b0e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -57,7 +57,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_size,	"size=%s"},
 	{Opt_nr_inodes,	"nr_inodes=%s"},
 	{Opt_mode,	"mode=%o"},
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 26948a6033b..3f8af0f1505 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -310,7 +310,7 @@ enum {
 	Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_norock, "norock"},
 	{Opt_nojoliet, "nojoliet"},
 	{Opt_unhide, "unhide"},
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 3630718be39..0dae345e481 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -199,7 +199,7 @@ enum {
 	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_integrity, "integrity"},
 	{Opt_nointegrity, "nointegrity"},
 	{Opt_iocharset, "iocharset=%s"},
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd39..8478fc25dae 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initdata tokens = {
+static match_table_t __initconst tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e9b20173fef..ffb697416cb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -98,7 +98,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t nfs_mount_option_tokens = {
+static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_userspace, "bg" },
 	{ Opt_userspace, "fg" },
 	{ Opt_userspace, "retry=%s" },
@@ -163,7 +163,7 @@ enum {
 	Opt_xprt_err
 };
 
-static match_table_t nfs_xprt_protocol_tokens = {
+static const match_table_t nfs_xprt_protocol_tokens = {
 	{ Opt_xprt_udp, "udp" },
 	{ Opt_xprt_tcp, "tcp" },
 	{ Opt_xprt_rdma, "rdma" },
@@ -180,7 +180,7 @@ enum {
 	Opt_sec_err
 };
 
-static match_table_t nfs_secflavor_tokens = {
+static const match_table_t nfs_secflavor_tokens = {
 	{ Opt_sec_none, "none" },
 	{ Opt_sec_none, "null" },
 	{ Opt_sec_sys, "sys" },
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 88255d3f52b..70334d85aff 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -157,7 +157,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err_panic, "errors=panic"},
 	{Opt_err_ro, "errors=remount-ro"},
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d29047b1b9b..cbf047a847c 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -346,7 +346,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_umask, "umask=%o"},
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3f4902060c7..9a9220333b3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -848,7 +848,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_fast_unmount, "fast_unmount"},
 	{Opt_norm_unmount, "norm_unmount"},
 	{Opt_err, NULL},
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5698bbf83bb..e25e7010627 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -369,7 +369,7 @@ enum {
 	Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_novrs,	"novrs"},
 	{Opt_nostrict,	"nostrict"},
 	{Opt_bs,	"bs=%u"},
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3141969b456..e65212dfb60 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -309,7 +309,7 @@ enum {
        Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_type_old, "ufstype=old"},
 	{Opt_type_sunx86, "ufstype=sunx86"},
 	{Opt_type_sun, "ufstype=sun"},
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	struct match_token *tp = tokens;
+	const struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18d3c848783..7227b2efef2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -158,7 +158,7 @@ enum {
 	Opt_barrier, Opt_nobarrier, Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_err, NULL}
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 7dcd0507575..ea2281e726f 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -25,7 +25,7 @@ typedef struct {
 	char *to;
 } substring_t;
 
-int match_token(char *, match_table_t table, substring_t args[]);
+int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
diff --git a/lib/parser.c b/lib/parser.c
index 4f0cbc03e0e..b00d02059a5 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -100,7 +100,7 @@ static int match_one(char *s, const char *p, substring_t args[])
  * format identifiers which will be taken into account when matching the
  * tokens, and whose locations will be returned in the @args array.
  */
-int match_token(char *s, match_table_t table, substring_t args[])
+int match_token(char *s, const match_table_t table, substring_t args[])
 {
 	const struct match_token *p;
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 10e320307ec..e053e06028a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -52,7 +52,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d652baf5ff9..6dabbdb6665 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -86,7 +86,7 @@ enum {
 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 88f19536efa..576e5119907 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -325,7 +325,7 @@ enum {
 	Opt_rootcontext = 4,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_context, CONTEXT_STR "%s"},
 	{Opt_fscontext, FSCONTEXT_STR "%s"},
 	{Opt_defcontext, DEFCONTEXT_STR "%s"},
-- 
cgit v1.2.3-70-g09d2


From b4bb4ac8cb05ab5c13dfb7b47ef243982d3ad526 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 13 Oct 2008 18:43:59 -0700
Subject: pktgen: fix skb leak in case of failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Seems that skb goes into void unless something magic happened
in pskb_expand_head in case of failure.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a756847e381..99f656d35b4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2474,7 +2474,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 				if (ret < 0) {
 					printk(KERN_ERR "Error expanding "
 					       "ipsec packet %d\n",ret);
-					return 0;
+					goto err;
 				}
 			}
 
@@ -2484,8 +2484,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 			if (ret) {
 				printk(KERN_ERR "Error creating ipsec "
 				       "packet %d\n",ret);
-				kfree_skb(skb);
-				return 0;
+				goto err;
 			}
 			/* restore ll */
 			eth = (__u8 *) skb_push(skb, ETH_HLEN);
@@ -2494,6 +2493,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev,
 		}
 	}
 	return 1;
+err:
+	kfree_skb(skb);
+	return 0;
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From e7dc849494608fca7a7493c07eb190219c00d064 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 13 Oct 2008 18:54:07 -0700
Subject: netns: mib6 section fixlet

  LD      net/ipv6/ipv6.o
WARNING: net/ipv6/ipv6.o(.text+0xd8): Section mismatch in reference from the function inet6_net_init() to the function .init.text:ipv6_init_mibs()

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 050e14b7f70..01edac88851 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -834,7 +834,7 @@ static void __net_exit ipv6_cleanup_mibs(struct net *net)
 	snmp_mib_free((void **)net->mib.icmpv6msg_statistics);
 }
 
-static int inet6_net_init(struct net *net)
+static int __net_init inet6_net_init(struct net *net)
 {
 	int err = 0;
 
-- 
cgit v1.2.3-70-g09d2


From 510149e31974fdbb2c00c9bee6c0e2a688e61c85 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 13 Oct 2008 18:58:48 -0700
Subject: dsa: fix compile bug on s390

git commit 45cec1bac0719c904bb5f4405c2937f7e715888c
"dsa: Need to select PHYLIB." causes this build bug on s390:

drivers/built-in.o: In function `phy_stop_interrupts':
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:631: undefined reference to `free_irq'
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:646: undefined reference to `enable_irq'
drivers/built-in.o: In function `phy_start_interrupts':
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:601: undefined reference to `request_irq'
drivers/built-in.o: In function `phy_interrupt':
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:528: undefined reference to `disable_irq_nosync'
drivers/built-in.o: In function `phy_change':
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:674: undefined reference to `enable_irq'
/home/heicarst/linux-2.6/drivers/net/phy/phy.c:692: undefined reference to `disable_irq'

PHYLIB has alread a depend on !S390, however select PHYLIB at DSA overrides
that unfortunately. So add a depend on !S390 to DSA as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cdce4c6c672..49211b35725 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && !S390
 	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
-- 
cgit v1.2.3-70-g09d2


From 113aa838ec3a235d883f8357d31d90e16c47fc89 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Mon, 13 Oct 2008 19:01:08 -0700
Subject: net: Rationalise email address: Network Specific Parts

Clean up the various different email addresses of mine listed in the code
to a single current and valid address. As Dave says his network merges
for 2.6.28 are now done this seems a good point to send them in where
they won't risk disrupting real changes.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/3c501.c              | 12 ++++++------
 drivers/net/3c515.c              |  2 +-
 drivers/net/appletalk/cops.c     |  2 +-
 drivers/net/eexpress.c           |  2 +-
 drivers/net/ibmlana.c            |  2 +-
 drivers/net/macmace.c            |  2 +-
 drivers/net/pcmcia/3c589_cs.c    |  2 +-
 drivers/net/pcmcia/nmclan_cs.c   |  2 +-
 drivers/net/tlan.c               |  3 ++-
 drivers/net/tokenring/smctr.c    |  2 +-
 drivers/net/tulip/dmfe.c         |  4 ++--
 drivers/net/via-velocity.c       |  2 +-
 drivers/net/wan/z85230.c         |  3 ++-
 drivers/net/wan/z85230.h         |  2 +-
 drivers/net/wireless/wavelan.c   |  2 +-
 drivers/net/wireless/wavelan.p.h |  2 +-
 include/linux/if_ether.h         |  2 +-
 include/linux/if_fddi.h          |  2 +-
 include/linux/if_hippi.h         |  2 +-
 include/linux/igmp.h             |  2 +-
 include/linux/netdevice.h        |  2 +-
 net/802/psnap.c                  |  2 +-
 net/appletalk/ddp.c              |  4 ++--
 net/core/datagram.c              |  2 +-
 net/core/dev_mcast.c             |  2 +-
 net/core/skbuff.c                |  2 +-
 net/core/stream.c                |  2 +-
 net/ipv4/icmp.c                  |  2 +-
 net/ipv4/igmp.c                  |  2 +-
 net/ipv4/ip_fragment.c           |  2 +-
 net/ipv4/ip_input.c              |  2 +-
 net/ipv4/ipip.c                  |  2 +-
 net/ipv4/ipmr.c                  |  2 +-
 net/ipv4/udp.c                   |  2 +-
 net/netlink/af_netlink.c         |  2 +-
 net/sunrpc/xprtsock.c            |  4 ++--
 net/unix/af_unix.c               |  2 +-
 37 files changed, 47 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c
index 5ba4bab6d43..7d15e7c6bca 100644
--- a/drivers/net/3c501.c
+++ b/drivers/net/3c501.c
@@ -17,7 +17,7 @@
 	Annapolis MD 21403
 
     Fixed (again!) the missing interrupt locking on TX/RX shifting.
-	Alan Cox <Alan.Cox@linux.org>
+	Alan Cox <alan@lxorguk.ukuu.org.uk>
 
     Removed calls to init_etherdev since they are no longer needed, and
     cleaned up modularization just a bit. The driver still allows only
@@ -29,16 +29,16 @@
     the board. Now getting 150K/second FTP with a 3c501 card. Still playing
     with a TX-TX optimisation to see if we can touch 180-200K/second as seems
     theoretically maximum.
-		19950402 Alan Cox <Alan.Cox@linux.org>
+		19950402 Alan Cox <alan@lxorguk.ukuu.org.uk>
 
     Cleaned up for 2.3.x because we broke SMP now.
-		20000208 Alan Cox <alan@redhat.com>
+		20000208 Alan Cox <alan@lxorguk.ukuu.org.uk>
 
     Check up pass for 2.5. Nothing significant changed
-		20021009 Alan Cox <alan@redhat.com>
+		20021009 Alan Cox <alan@lxorguk.ukuu.org.uk>
 
     Fixed zero fill corner case
-		20030104 Alan Cox <alan@redhat.com>
+		20030104 Alan Cox <alan@lxorguk.ukuu.org.uk>
 
 
    For the avoidance of doubt the "preferred form" of this code is one which
@@ -104,7 +104,7 @@
 
 
 static const char version[] =
-	DRV_NAME ".c: " DRV_VERSION " Alan Cox (alan@redhat.com).\n";
+	DRV_NAME ".c: " DRV_VERSION " Alan Cox (alan@lxorguk.ukuu.org.uk).\n";
 
 /*
  *	Braindamage remaining:
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index e4e3241628d..a0f8b6e2d0a 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -18,7 +18,7 @@
 
 	2001/11/17 - Added ethtool support (jgarzik)
 
-	2002/10/28 - Locking updates for 2.5 (alan@redhat.com)
+	2002/10/28 - Locking updates for 2.5 (alan@lxorguk.ukuu.org.uk)
 
 */
 
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index a0b4c851607..735fc947640 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -4,7 +4,7 @@
  *      - Jay Schulist <jschlst@samba.org>
  *
  *	With more than a little help from;
- *	- Alan Cox <Alan.Cox@linux.org> 
+ *	- Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *      Derived from:
  *      - skeleton.c: A network driver outline for linux.
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 795c594a4b7..b751c1b96cf 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -8,7 +8,7 @@
  *
  * Many modifications, and currently maintained, by
  *  Philip Blundell <philb@gnu.org>
- * Added the Compaq LTE  Alan Cox <alan@redhat.com>
+ * Added the Compaq LTE  Alan Cox <alan@lxorguk.ukuu.org.uk>
  * Added MCA support Adam Fritzler
  *
  * Note - this driver is experimental still - it has problems on faster
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 95e3464068d..f02764725a2 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -71,7 +71,7 @@ History:
   June 1st, 2000
 	corrected version codes, added support for the latest 2.3 changes
   Oct 28th, 2002
-  	cleaned up for the 2.5 tree <alan@redhat.com>
+	cleaned up for the 2.5 tree <alan@lxorguk.ukuu.org.uk>
 
  *************************************************************************/
 
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 51ad3765e07..85587a6667b 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -9,7 +9,7 @@
  *	2 of the License, or (at your option) any later version.
  *
  *	Copyright (C) 1996 Paul Mackerras.
- *	Copyright (C) 1998 Alan Cox <alan@redhat.com>
+ *	Copyright (C) 1998 Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	Modified heavily by Joshua M. Thompson based on Dave Huang's NetBSD driver
  *
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 549a6455842..29bd8532d32 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -15,7 +15,7 @@
     incorporated herein by reference.
     Donald Becker may be reached at becker@scyld.com
     
-    Updated for 2.5.x by Alan Cox <alan@redhat.com>
+    Updated for 2.5.x by Alan Cox <alan@lxorguk.ukuu.org.uk>
 
 ======================================================================*/
 
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index cfcbea9b7e2..ee5dcbfd0ba 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -69,7 +69,7 @@ Driver Notes and Issues
 History
 -------------------------------------------------------------------------------
 Log: nmclan_cs.c,v
- * 2.5.75-ac1 2003/07/11 Alan Cox <alan@redhat.com>
+ * 2.5.75-ac1 2003/07/11 Alan Cox <alan@lxorguk.ukuu.org.uk>
  * Fixed hang on card eject as we probe it
  * Cleaned up to use new style locking.
  *
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index ec871f64676..c41d6876136 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -29,7 +29,8 @@
  *
  *	Tigran Aivazian <tigran@sco.com>:	TLan_PciProbe() now uses
  *						new PCI BIOS interface.
- *	Alan Cox	<alan@redhat.com>:	Fixed the out of memory
+ *	Alan Cox	<alan@lxorguk.ukuu.org.uk>:
+ *						Fixed the out of memory
  *						handling.
  *
  *	Torben Mathiasen <torben.mathiasen@compaq.com> New Maintainer!
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index fa73e6eed6b..ed50d288e49 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -25,7 +25,7 @@
  *  To do:
  *    1. Multicast support.
  *
- *  Initial 2.5 cleanup Alan Cox <alan@redhat.com>  2002/10/28
+ *  Initial 2.5 cleanup Alan Cox <alan@lxorguk.ukuu.org.uk>  2002/10/28
  */
 
 #include <linux/module.h>
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 656200472fa..8e46a513a25 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -23,7 +23,7 @@
     Marcelo Tosatti <marcelo@conectiva.com.br> :
     Made it compile in 2.3 (device to net_device)
 
-    Alan Cox <alan@redhat.com> :
+    Alan Cox <alan@lxorguk.ukuu.org.uk> :
     Cleaned up for kernel merge.
     Removed the back compatibility support
     Reformatted, fixing spelling etc as I went
@@ -49,7 +49,7 @@
     support.  Updated PCI resource allocation.  Do not
     forget to unmap PCI mapped skbs.
 
-    Alan Cox <alan@redhat.com>
+    Alan Cox <alan@lxorguk.ukuu.org.uk>
     Added new PCI identifiers provided by Clear Zhang at ALi
     for their 1563 ethernet device.
 
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index ad20f96edfa..2dced383bcf 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -12,7 +12,7 @@
  *	Scatter gather
  *	More testing
  *
- * The changes are (c) Copyright 2004, Red Hat Inc. <alan@redhat.com>
+ * The changes are (c) Copyright 2004, Red Hat Inc. <alan@lxorguk.ukuu.org.uk>
  * Additional fixes and clean up: Francois Romieu
  *
  * This source has not been verified for use in safety critical systems.
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 243bd8d918f..ccd9cd35ecb 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -18,7 +18,8 @@
  *	DMA now uses get_free_page as kmalloc buffers may span a 64K 
  *	boundary.
  *
- *	Modified for SMP safety and SMP locking by Alan Cox <alan@redhat.com>
+ *	Modified for SMP safety and SMP locking by Alan Cox
+ *					<alan@lxorguk.ukuu.org.uk>
  *
  *	Performance
  *
diff --git a/drivers/net/wan/z85230.h b/drivers/net/wan/z85230.h
index 4f372396c51..85b3e785d48 100644
--- a/drivers/net/wan/z85230.h
+++ b/drivers/net/wan/z85230.h
@@ -2,7 +2,7 @@
  *	Description of Z8530 Z85C30 and Z85230 communications chips
  *
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1998 Alan Cox <alan@redhat.com>
+ * Copyright (C) 1998 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
 
 #ifndef _Z8530_H
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 136220b5ca8..e939a73ff79 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -4387,7 +4387,7 @@ MODULE_LICENSE("GPL");
  *
  * Thanks go also to:
  *	James Ashton (jaa101@syseng.anu.edu.au),
- *	Alan Cox (alan@redhat.com),
+ *	Alan Cox (alan@lxorguk.ukuu.org.uk),
  *	Allan Creighton (allanc@cs.usyd.edu.au),
  *	Matthew Geier (matthew@cs.usyd.edu.au),
  *	Remo di Giovanni (remo@cs.usyd.edu.au),
diff --git a/drivers/net/wireless/wavelan.p.h b/drivers/net/wireless/wavelan.p.h
index b33ac47dd8d..44d31bbf39e 100644
--- a/drivers/net/wireless/wavelan.p.h
+++ b/drivers/net/wireless/wavelan.p.h
@@ -186,7 +186,7 @@
  *
  * Thanks go also to:
  *	James Ashton <jaa101@syseng.anu.edu.au>,
- *	Alan Cox <alan@redhat.com>,
+ *	Alan Cox <alan@lxorguk.ukuu.org.uk>,
  *	Allan Creighton <allanc@cs.usyd.edu.au>,
  *	Matthew Geier <matthew@cs.usyd.edu.au>,
  *	Remo di Giovanni <remo@cs.usyd.edu.au>,
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index bf1a53b2682..7f3c735f422 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -9,7 +9,7 @@
  *
  * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
- *		Alan Cox, <alan@redhat.com>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index ae77daed6c2..45de1046dbb 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -12,7 +12,7 @@
  *		if_fddi.h is based on previous if_ether.h and if_tr.h work by
  *			Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *			Donald Becker, <becker@super.org>
- *			Alan Cox, <alan@redhat.com>
+ *			Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *			Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
  *			Peter De Schrijver, <stud11@cc4.kuleuven.ac.be>
  *
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 94d31ca7d71..f0f23516bb5 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -9,7 +9,7 @@
  *
  * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
- *		Alan Cox, <alan@redhat.com>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
  *		Jes Sorensen, <Jes.Sorensen@cern.ch>
  *
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7bb3c095c15..f734a0ba069 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -2,7 +2,7 @@
  *	Linux NET3:	Internet Group Management Protocol  [IGMP]
  *
  *	Authors:
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	Extended to talk the BSD extended IGMP protocol of mrouted 3.6
  *
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3ea3de70a8..64875859d65 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -11,7 +11,7 @@
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
  *		Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
- *		Alan Cox, <Alan.Cox@linux.org>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Bjorn Ekwall. <bj0rn@blox.se>
  *              Pekka Riikonen <priikone@poseidon.pspt.fi>
  *
diff --git a/net/802/psnap.c b/net/802/psnap.c
index b3cfe5a14fc..70980baeb68 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -1,7 +1,7 @@
 /*
  *	SNAP data link layer. Derived from 802.2
  *
- *		Alan Cox <Alan.Cox@linux.org>,
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>,
  *		from the 802.2 layer by Greg Page.
  *		Merged in additions from Greg Page's psnap.c.
  *
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0c850427a85..d3134e7e6ee 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -2,7 +2,7 @@
  *	DDP:	An implementation of the AppleTalk DDP protocol for
  *		Ethernet 'ELAP'.
  *
- *		Alan Cox  <Alan.Cox@linux.org>
+ *		Alan Cox  <alan@lxorguk.ukuu.org.uk>
  *
  *		With more than a little assistance from
  *
@@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void)
 module_exit(atalk_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>");
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
 MODULE_DESCRIPTION("AppleTalk 0.20\n");
 MODULE_ALIAS_NETPROTO(PF_APPLETALK);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 52f577a0f54..ee631843c2f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -9,7 +9,7 @@
  *	identical recvmsg() code. So we share it here. The poll was
  *	shared before but buried in udp.c so I moved it.
  *
- *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
  *						     udp.c code)
  *
  *	Fixes:
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 5402b3b38e0..9e2fa39f22a 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -6,7 +6,7 @@
  *		Richard Underwood <richard@wuzz.demon.co.uk>
  *
  *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	Fixes:
  *		Alan Cox	:	Update the device on a real delete
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7f7bb1a636d..4e22e3a3535 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1,7 +1,7 @@
 /*
  *	Routines having to do with the 'struct sk_buff' memory handlers.
  *
- *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
  *	Fixes:
diff --git a/net/core/stream.c b/net/core/stream.c
index a6b3437ff08..8727cead64a 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -9,7 +9,7 @@
  *
  *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *                     (from old tcp.c code)
- *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ *                     Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
  */
 
 #include <linux/module.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 55c355e6323..72b2de76f1c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1,7 +1,7 @@
 /*
  *	NET3:	Implementation of the ICMP protocol layer.
  *
- *		Alan Cox, <alan@redhat.com>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7f9e337e390..a0d86455c53 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -9,7 +9,7 @@
  *	seems to fall out with gcc 2.6.2.
  *
  *	Authors:
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2152d222b95..e4f81f54bef 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -6,7 +6,7 @@
  *		The IP fragmentation functionality.
  *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
- *		Alan Cox <Alan.Cox@linux.org>
+ *		Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
  * Fixes:
  *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e0bed56c51f..861978a4f1a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -8,7 +8,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Donald Becker, <becker@super.org>
- *		Alan Cox, <Alan.Cox@linux.org>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Richard Underwood
  *		Stefan Becker, <stefanb@yello.ping.de>
  *		Jorge Cwik, <jorge@laser.satlink.net>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4c6d2caf920..29609d29df7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -41,7 +41,7 @@
 		Made the tunnels use dev->name not tunnel: when error reporting.
 		Added tx_dropped stat
 
-		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
+		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
 
 	Reworked:
 		Changed to tunnel to destination gateway in addition to the
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c519b8d30ee..b42e082cc17 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1,7 +1,7 @@
 /*
  *	IP multicast routing support for mrouted 3.6/3.8
  *
- *		(c) 1995 Alan Cox, <alan@redhat.com>
+ *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *	  Linux Consultancy and Custom Driver Development
  *
  *	This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eacf4cfef14..2095abc3cab 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -8,7 +8,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Alan Cox, <Alan.Cox@linux.org>
+ *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *		Hirokazu Takahashi, <taka@valinux.co.jp>
  *
  * Fixes:
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b0eacc0007c..2fd8afac5f7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1,7 +1,7 @@
 /*
  * NETLINK      Kernel-user communication protocol.
  *
- * 		Authors:	Alan Cox <alan@redhat.com>
+ * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *
  *		This program is free software; you can redistribute it and/or
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4486c59c3ac..9a288d5eea6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3,8 +3,8 @@
  *
  * Client-side transport implementation for sockets.
  *
- * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
- * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
+ * TCP callback races fixes (C) 1998 Red Hat
+ * TCP send fixes (C) 1998 Red Hat
  * TCP NFS related read + write fixes
  *  (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
  *
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 015606b54d9..c647aab8d41 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1,7 +1,7 @@
 /*
  * NET4:	Implementation of BSD Unix domain sockets.
  *
- * Authors:	Alan Cox, <alan.cox@linux.org>
+ * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
-- 
cgit v1.2.3-70-g09d2


From 56f26f7b78af36d0f048a9403084870d2ffb549f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 13 Oct 2008 21:59:03 +0200
Subject: net/rfkill/rfkill-input.c needs <linux/sched.h>

For some m68k configs, I get:

| net/rfkill/rfkill-input.c: In function 'rfkill_start':
| net/rfkill/rfkill-input.c:208: error: dereferencing pointer to incomplete type

As the incomplete type is `struct task_struct', including <linux/sched.h> fixes
it.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rfkill/rfkill-input.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index e5b69556bb5..21124ec0a73 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -16,6 +16,7 @@
 #include <linux/workqueue.h>
 #include <linux/init.h>
 #include <linux/rfkill.h>
+#include <linux/sched.h>
 
 #include "rfkill-input.h"
 
-- 
cgit v1.2.3-70-g09d2


From 85cdaf524b7ddab627e7d15405693f2511ef7505 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 16 May 2008 11:49:15 +0200
Subject: HID: make a bus from hid code

Make a bus from hid core. This is the first step for converting all the
quirks and separate almost-drivers into real drivers attached to this bus.

It's implemented to change behaviour in very tiny manner, so that no driver
needs to be changed this time.

Also add generic drivers for both usb and bt into usbhid or hidp
respectively which will bind all non-blacklisted device. Those blacklisted
will be either grabbed by special drivers or by nobody if they are broken at
the very rude base.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        | 474 +++++++++++++++++++++++++++++++++++-------
 drivers/hid/hid-input.c       |   2 +-
 drivers/hid/usbhid/hid-core.c |  44 +++-
 drivers/hid/usbhid/usbhid.h   |   2 +-
 include/linux/hid.h           | 104 ++++++++-
 net/bluetooth/hidp/core.c     |  64 +++++-
 6 files changed, 586 insertions(+), 104 deletions(-)

(limited to 'net')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 426ac5add58..017fc20167d 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -534,9 +534,10 @@ static void hid_free_report(struct hid_report *report)
  * Free a device structure, all reports, and all fields.
  */
 
-void hid_free_device(struct hid_device *device)
+static void hid_device_release(struct device *dev)
 {
-	unsigned i,j;
+	struct hid_device *device = container_of(dev, struct hid_device, dev);
+	unsigned i, j;
 
 	for (i = 0; i < HID_REPORT_TYPES; i++) {
 		struct hid_report_enum *report_enum = device->report_enum + i;
@@ -552,7 +553,6 @@ void hid_free_device(struct hid_device *device)
 	kfree(device->collection);
 	kfree(device);
 }
-EXPORT_SYMBOL_GPL(hid_free_device);
 
 /*
  * Fetch a report description item from the data stream. We support long
@@ -622,18 +622,24 @@ static u8 *fetch_item(__u8 *start, __u8 *end, struct hid_item *item)
 	return NULL;
 }
 
-/*
+/**
+ * hid_parse_report - parse device report
+ *
+ * @device: hid device
+ * @start: report start
+ * @size: report size
+ *
  * Parse a report description into a hid_device structure. Reports are
  * enumerated, fields are attached to these reports.
+ * 0 returned on success, otherwise nonzero error value.
  */
-
-struct hid_device *hid_parse_report(__u8 *start, unsigned size)
+int hid_parse_report(struct hid_device *device, __u8 *start,
+		unsigned size)
 {
-	struct hid_device *device;
 	struct hid_parser *parser;
 	struct hid_item item;
 	__u8 *end;
-	unsigned i;
+	int ret;
 	static int (*dispatch_type[])(struct hid_parser *parser,
 				      struct hid_item *item) = {
 		hid_parser_main,
@@ -642,76 +648,54 @@ struct hid_device *hid_parse_report(__u8 *start, unsigned size)
 		hid_parser_reserved
 	};
 
-	if (!(device = kzalloc(sizeof(struct hid_device), GFP_KERNEL)))
-		return NULL;
-
-	if (!(device->collection = kzalloc(sizeof(struct hid_collection) *
-				   HID_DEFAULT_NUM_COLLECTIONS, GFP_KERNEL))) {
-		kfree(device);
-		return NULL;
-	}
-	device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
-
-	for (i = 0; i < HID_REPORT_TYPES; i++)
-		INIT_LIST_HEAD(&device->report_enum[i].report_list);
-
-	if (!(device->rdesc = kmalloc(size, GFP_KERNEL))) {
-		kfree(device->collection);
-		kfree(device);
-		return NULL;
-	}
+	device->rdesc = kmalloc(size, GFP_KERNEL);
+	if (device->rdesc == NULL)
+		return -ENOMEM;
 	memcpy(device->rdesc, start, size);
 	device->rsize = size;
 
-	if (!(parser = vmalloc(sizeof(struct hid_parser)))) {
-		kfree(device->rdesc);
-		kfree(device->collection);
-		kfree(device);
-		return NULL;
+	parser = vmalloc(sizeof(struct hid_parser));
+	if (!parser) {
+		ret = -ENOMEM;
+		goto err;
 	}
+
 	memset(parser, 0, sizeof(struct hid_parser));
 	parser->device = device;
 
 	end = start + size;
+	ret = -EINVAL;
 	while ((start = fetch_item(start, end, &item)) != NULL) {
 
 		if (item.format != HID_ITEM_FORMAT_SHORT) {
 			dbg_hid("unexpected long global item\n");
-			hid_free_device(device);
-			vfree(parser);
-			return NULL;
+			goto err;
 		}
 
 		if (dispatch_type[item.type](parser, &item)) {
 			dbg_hid("item %u %u %u %u parsing failed\n",
 				item.format, (unsigned)item.size, (unsigned)item.type, (unsigned)item.tag);
-			hid_free_device(device);
-			vfree(parser);
-			return NULL;
+			goto err;
 		}
 
 		if (start == end) {
 			if (parser->collection_stack_ptr) {
 				dbg_hid("unbalanced collection at end of report description\n");
-				hid_free_device(device);
-				vfree(parser);
-				return NULL;
+				goto err;
 			}
 			if (parser->local.delimiter_depth) {
 				dbg_hid("unbalanced delimiter at end of report description\n");
-				hid_free_device(device);
-				vfree(parser);
-				return NULL;
+				goto err;
 			}
 			vfree(parser);
-			return device;
+			return 0;
 		}
 	}
 
 	dbg_hid("item fetching failed at offset %d\n", (int)(end - start));
-	hid_free_device(device);
+err:
 	vfree(parser);
-	return NULL;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hid_parse_report);
 
@@ -815,9 +799,73 @@ static __inline__ int search(__s32 *array, __s32 value, unsigned n)
 	return -1;
 }
 
-static void hid_process_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value, int interrupt)
+/**
+ * hid_match_report - check if driver's raw_event should be called
+ *
+ * @hid: hid device
+ * @report_type: type to match against
+ *
+ * compare hid->driver->report_table->report_type to report->type
+ */
+static int hid_match_report(struct hid_device *hid, struct hid_report *report)
 {
+	const struct hid_report_id *id = hid->driver->report_table;
+
+	if (!id) /* NULL means all */
+		return 1;
+
+	for (; id->report_type != HID_TERMINATOR; id++)
+		if (id->report_type == HID_ANY_ID ||
+				id->report_type == report->type)
+			return 1;
+	return 0;
+}
+
+/**
+ * hid_match_usage - check if driver's event should be called
+ *
+ * @hid: hid device
+ * @usage: usage to match against
+ *
+ * compare hid->driver->usage_table->usage_{type,code} to
+ * usage->usage_{type,code}
+ */
+static int hid_match_usage(struct hid_device *hid, struct hid_usage *usage)
+{
+	const struct hid_usage_id *id = hid->driver->usage_table;
+
+	if (!id) /* NULL means all */
+		return 1;
+
+	for (; id->usage_type != HID_ANY_ID - 1; id++)
+		if ((id->usage_hid == HID_ANY_ID ||
+				id->usage_hid == usage->hid) &&
+				(id->usage_type == HID_ANY_ID ||
+				id->usage_type == usage->type) &&
+				(id->usage_code == HID_ANY_ID ||
+				 id->usage_code == usage->code))
+			return 1;
+	return 0;
+}
+
+static void hid_process_event(struct hid_device *hid, struct hid_field *field,
+		struct hid_usage *usage, __s32 value, int interrupt)
+{
+	struct hid_driver *hdrv = hid->driver;
+	int ret;
+
 	hid_dump_input(usage, value);
+
+	if (hdrv && hdrv->event && hid_match_usage(hid, usage)) {
+		ret = hdrv->event(hid, field, usage, value);
+		if (ret != 0) {
+			if (ret < 0)
+				dbg_hid("%s's event failed with %d\n",
+						hdrv->name, ret);
+			return;
+		}
+	}
+
 	if (hid->claimed & HID_CLAIMED_INPUT)
 		hidinput_hid_event(hid, field, usage, value);
 	if (hid->claimed & HID_CLAIMED_HIDDEV && interrupt && hid->hiddev_hid_event)
@@ -946,44 +994,47 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
 }
 EXPORT_SYMBOL_GPL(hid_set_field);
 
-int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int interrupt)
+static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
+		const u8 *data)
 {
-	struct hid_report_enum *report_enum = hid->report_enum + type;
 	struct hid_report *report;
-	int n, rsize, i;
+	unsigned int n = 0;	/* Normally report number is 0 */
 
-	if (!hid)
-		return -ENODEV;
+	/* Device uses numbered reports, data[0] is report number */
+	if (report_enum->numbered)
+		n = *data;
 
-	if (!size) {
-		dbg_hid("empty report\n");
-		return -1;
-	}
+	report = report_enum->report_id_hash[n];
+	if (report == NULL)
+		dbg_hid("undefined report_id %u received\n", n);
 
-	dbg_hid("report (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un");
+	return report;
+}
 
-	n = 0;                          /* Normally report number is 0 */
-	if (report_enum->numbered) {    /* Device uses numbered reports, data[0] is report number */
-		n = *data++;
-		size--;
-	}
+void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+		int interrupt)
+{
+	struct hid_report_enum *report_enum = hid->report_enum + type;
+	struct hid_report *report;
+	unsigned int a;
+	int rsize, csize = size;
+	u8 *cdata = data;
 
-	/* dump the report */
-	dbg_hid("report %d (size %u) = ", n, size);
-	for (i = 0; i < size; i++)
-		dbg_hid_line(" %02x", data[i]);
-	dbg_hid_line("\n");
+	report = hid_get_report(report_enum, data);
+	if (!report)
+		return;
 
-	if (!(report = report_enum->report_id_hash[n])) {
-		dbg_hid("undefined report_id %d received\n", n);
-		return -1;
+	if (report_enum->numbered) {
+		cdata++;
+		csize--;
 	}
 
 	rsize = ((report->size - 1) >> 3) + 1;
 
-	if (size < rsize) {
-		dbg_hid("report %d is too short, (%d < %d)\n", report->id, size, rsize);
-		memset(data + size, 0, rsize - size);
+	if (csize < rsize) {
+		dbg_hid("report %d is too short, (%d < %d)\n", report->id,
+				csize, rsize);
+		memset(cdata + csize, 0, rsize - csize);
 	}
 
 	if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event)
@@ -996,24 +1047,295 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
 			hidraw_report_event(hid, data, size);
 	}
 
-	for (n = 0; n < report->maxfield; n++)
-		hid_input_field(hid, report->field[n], data, interrupt);
+	for (a = 0; a < report->maxfield; a++)
+		hid_input_field(hid, report->field[a], cdata, interrupt);
 
 	if (hid->claimed & HID_CLAIMED_INPUT)
 		hidinput_report_event(hid, report);
+}
+EXPORT_SYMBOL_GPL(hid_report_raw_event);
+
+/**
+ * hid_input_report - report data from lower layer (usb, bt...)
+ *
+ * @hid: hid device
+ * @type: HID report type (HID_*_REPORT)
+ * @data: report contents
+ * @size: size of data parameter
+ * @interrupt: called from atomic?
+ *
+ * This is data entry for lower layers.
+ */
+int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int interrupt)
+{
+	struct hid_report_enum *report_enum = hid->report_enum + type;
+	struct hid_driver *hdrv = hid->driver;
+	struct hid_report *report;
+	unsigned int i;
+	int ret;
+
+	if (!hid || !hid->driver)
+		return -ENODEV;
+
+	if (!size) {
+		dbg_hid("empty report\n");
+		return -1;
+	}
+
+	dbg_hid("report (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un");
+
+	report = hid_get_report(report_enum, data);
+	if (!report)
+		return -1;
+
+	/* dump the report */
+	dbg_hid("report %d (size %u) = ", report->id, size);
+	for (i = 0; i < size; i++)
+		dbg_hid_line(" %02x", data[i]);
+	dbg_hid_line("\n");
+
+	if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) {
+		ret = hdrv->raw_event(hid, report, data, size);
+		if (ret != 0)
+			return ret < 0 ? ret : 0;
+	}
+
+	hid_report_raw_event(hid, type, data, size, interrupt);
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(hid_input_report);
 
+static bool hid_match_one_id(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	return id->bus == hdev->bus &&
+		(id->vendor == HID_ANY_ID || id->vendor == hdev->vendor) &&
+		(id->product == HID_ANY_ID || id->product == hdev->product);
+}
+
+static const struct hid_device_id *hid_match_id(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	for (; id->bus; id++)
+		if (hid_match_one_id(hdev, id))
+			return id;
+
+	return NULL;
+}
+
+static const struct hid_device_id hid_blacklist[] = {
+	{ }
+};
+
+static int hid_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+
+	if (!hid_match_id(hdev, hdrv->id_table))
+		return 0;
+
+	/* generic wants all non-blacklisted */
+	if (!strncmp(hdrv->name, "generic-", 8))
+		return !hid_match_id(hdev, hid_blacklist);
+
+	return 1;
+}
+
+static int hid_device_probe(struct device *dev)
+{
+	struct hid_driver *hdrv = container_of(dev->driver,
+			struct hid_driver, driver);
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+	const struct hid_device_id *id;
+	int ret = 0;
+
+	if (!hdev->driver) {
+		if (hdrv->probe) {
+			ret = -ENODEV;
+
+			id = hid_match_id(hdev, hdrv->id_table);
+			if (id)
+				ret = hdrv->probe(hdev, id);
+		}
+		if (!ret)
+			hdev->driver = hdrv;
+	}
+	return ret;
+}
+
+static int hid_device_remove(struct device *dev)
+{
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+	struct hid_driver *hdrv = hdev->driver;
+
+	if (hdrv) {
+		if (hdrv->remove)
+			hdrv->remove(hdev);
+		hdev->driver = NULL;
+	}
+
+	return 0;
+}
+
+static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+
+	if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
+			hdev->bus, hdev->vendor, hdev->product))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "HID_NAME=%s", hdev->name))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "HID_PHYS=%s", hdev->phys))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "HID_UNIQ=%s", hdev->uniq))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODALIAS=hid:b%04Xv%08Xp%08X",
+			hdev->bus, hdev->vendor, hdev->product))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct bus_type hid_bus_type = {
+	.name		= "hid",
+	.match		= hid_bus_match,
+	.probe		= hid_device_probe,
+	.remove		= hid_device_remove,
+	.uevent		= hid_uevent,
+};
+
+int hid_add_device(struct hid_device *hdev)
+{
+	static atomic_t id = ATOMIC_INIT(0);
+	int ret;
+
+	if (WARN_ON(hdev->status & HID_STAT_ADDED))
+		return -EBUSY;
+
+	/* XXX hack, any other cleaner solution < 20 bus_id bytes? */
+	sprintf(hdev->dev.bus_id, "%04X:%04X:%04X.%04X", hdev->bus,
+			hdev->vendor, hdev->product, atomic_inc_return(&id));
+
+	ret = device_add(&hdev->dev);
+	if (!ret)
+		hdev->status |= HID_STAT_ADDED;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hid_add_device);
+
+/**
+ * hid_allocate_device - allocate new hid device descriptor
+ *
+ * Allocate and initialize hid device, so that hid_destroy_device might be
+ * used to free it.
+ *
+ * New hid_device pointer is returned on success, otherwise ERR_PTR encoded
+ * error value.
+ */
+struct hid_device *hid_allocate_device(void)
+{
+	struct hid_device *hdev;
+	unsigned int i;
+	int ret = -ENOMEM;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+	if (hdev == NULL)
+		return ERR_PTR(ret);
+
+	device_initialize(&hdev->dev);
+	hdev->dev.release = hid_device_release;
+	hdev->dev.bus = &hid_bus_type;
+
+	hdev->collection = kcalloc(HID_DEFAULT_NUM_COLLECTIONS,
+			sizeof(struct hid_collection), GFP_KERNEL);
+	if (hdev->collection == NULL)
+		goto err;
+	hdev->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
+
+	for (i = 0; i < HID_REPORT_TYPES; i++)
+		INIT_LIST_HEAD(&hdev->report_enum[i].report_list);
+
+	return hdev;
+err:
+	put_device(&hdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(hid_allocate_device);
+
+static void hid_remove_device(struct hid_device *hdev)
+{
+	if (hdev->status & HID_STAT_ADDED) {
+		device_del(&hdev->dev);
+		hdev->status &= ~HID_STAT_ADDED;
+	}
+}
+
+/**
+ * hid_destroy_device - free previously allocated device
+ *
+ * @hdev: hid device
+ *
+ * If you allocate hid_device through hid_allocate_device, you should ever
+ * free by this function.
+ */
+void hid_destroy_device(struct hid_device *hdev)
+{
+	hid_remove_device(hdev);
+	put_device(&hdev->dev);
+}
+EXPORT_SYMBOL_GPL(hid_destroy_device);
+
+int __hid_register_driver(struct hid_driver *hdrv, struct module *owner,
+		const char *mod_name)
+{
+	hdrv->driver.name = hdrv->name;
+	hdrv->driver.bus = &hid_bus_type;
+	hdrv->driver.owner = owner;
+	hdrv->driver.mod_name = mod_name;
+
+	return driver_register(&hdrv->driver);
+}
+EXPORT_SYMBOL_GPL(__hid_register_driver);
+
+void hid_unregister_driver(struct hid_driver *hdrv)
+{
+	driver_unregister(&hdrv->driver);
+}
+EXPORT_SYMBOL_GPL(hid_unregister_driver);
+
 static int __init hid_init(void)
 {
-	return hidraw_init();
+	int ret;
+
+	ret = bus_register(&hid_bus_type);
+	if (ret) {
+		printk(KERN_ERR "HID: can't register hid bus\n");
+		goto err;
+	}
+
+	ret = hidraw_init();
+	if (ret)
+		goto err_bus;
+
+	return 0;
+err_bus:
+	bus_unregister(&hid_bus_type);
+err:
+	return ret;
 }
 
 static void __exit hid_exit(void)
 {
 	hidraw_exit();
+	bus_unregister(&hid_bus_type);
 }
 
 module_init(hid_init);
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 1b2e8dc3398..4ae5603804e 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1032,7 +1032,7 @@ int hidinput_connect(struct hid_device *hid)
 				input_dev->id.vendor  = hid->vendor;
 				input_dev->id.product = hid->product;
 				input_dev->id.version = hid->version;
-				input_dev->dev.parent = hid->dev;
+				input_dev->dev.parent = hid->dev.parent;
 				hidinput->input = input_dev;
 				list_add_tail(&hidinput->list, &hid->inputs);
 			}
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 27fe4d8912c..5955d05ae54 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -770,8 +770,15 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 		dbg_hid_line(" %02x", (unsigned char) rdesc[n]);
 	dbg_hid_line("\n");
 
-	if (!(hid = hid_parse_report(rdesc, n))) {
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		kfree(rdesc);
+		return NULL;
+	}
+
+	if (hid_parse_report(hid, rdesc, n)) {
 		dbg_hid("parsing report descriptor failed\n");
+		hid_destroy_device(hid);
 		kfree(rdesc);
 		return NULL;
 	}
@@ -798,10 +805,8 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 	if (insize > HID_MAX_BUFFER_SIZE)
 		insize = HID_MAX_BUFFER_SIZE;
 
-	if (hid_alloc_buffers(dev, hid)) {
-		hid_free_buffers(dev, hid);
+	if (hid_alloc_buffers(dev, hid))
 		goto fail;
-	}
 
 	hid->name[0] = 0;
 
@@ -881,7 +886,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 
 	hid->version = le16_to_cpu(hdesc->bcdHID);
 	hid->country = hdesc->bCountryCode;
-	hid->dev = &intf->dev;
+	hid->dev.parent = &intf->dev;
 	usbhid->intf = intf;
 	usbhid->ifnum = interface->desc.bInterfaceNumber;
 
@@ -925,7 +930,7 @@ fail:
 	hid_free_buffers(dev, hid);
 	kfree(usbhid);
 fail_no_usbhid:
-	hid_free_device(hid);
+	hid_destroy_device(hid);
 
 	return NULL;
 }
@@ -964,14 +969,14 @@ static void hid_disconnect(struct usb_interface *intf)
 
 	hid_free_buffers(hid_to_usb_dev(hid), hid);
 	kfree(usbhid);
-	hid_free_device(hid);
+	hid_destroy_device(hid);
 }
 
 static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
 	struct hid_device *hid;
 	char path[64];
-	int i;
+	int i, ret;
 	char *c;
 
 	dbg_hid("HID probe called for ifnum %d\n",
@@ -1037,7 +1042,12 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	printk(": USB HID v%x.%02x %s [%s] on %s\n",
 		hid->version >> 8, hid->version & 0xff, c, hid->name, path);
 
-	return 0;
+	ret = hid_add_device(hid);
+	if (ret) {
+		dev_err(&intf->dev, "can't add hid device: %d\n", ret);
+		hid_disconnect(intf);
+	}
+	return ret;
 }
 
 static int hid_suspend(struct usb_interface *intf, pm_message_t message)
@@ -1107,9 +1117,22 @@ static struct usb_driver hid_driver = {
 	.supports_autosuspend = 1,
 };
 
+static const struct hid_device_id hid_usb_table[] = {
+	{ HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver hid_usb_driver = {
+	.name = "generic-usb",
+	.id_table = hid_usb_table,
+};
+
 static int __init hid_init(void)
 {
 	int retval;
+	retval = hid_register_driver(&hid_usb_driver);
+	if (retval)
+		goto hid_register_fail;
 	retval = usbhid_quirks_init(quirks_param);
 	if (retval)
 		goto usbhid_quirks_init_fail;
@@ -1127,6 +1150,8 @@ usb_register_fail:
 hiddev_init_fail:
 	usbhid_quirks_exit();
 usbhid_quirks_init_fail:
+	hid_unregister_driver(&hid_usb_driver);
+hid_register_fail:
 	return retval;
 }
 
@@ -1135,6 +1160,7 @@ static void __exit hid_exit(void)
 	usb_deregister(&hid_driver);
 	hiddev_exit();
 	usbhid_quirks_exit();
+	hid_unregister_driver(&hid_usb_driver);
 }
 
 module_init(hid_init);
diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
index 62d2d7c925b..b47f991867e 100644
--- a/drivers/hid/usbhid/usbhid.h
+++ b/drivers/hid/usbhid/usbhid.h
@@ -82,7 +82,7 @@ struct usbhid_device {
 };
 
 #define	hid_to_usb_dev(hid_dev) \
-	container_of(hid_dev->dev->parent, struct usb_device, dev)
+	container_of(hid_dev->dev.parent->parent, struct usb_device, dev)
 
 #endif
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b7a17762a0b..c4bea0eda85 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -418,6 +418,8 @@ struct hid_control_fifo {
 #define HID_CLAIMED_HIDDEV	2
 #define HID_CLAIMED_HIDRAW	4
 
+#define HID_STAT_ADDED		1
+
 #define HID_CTRL_RUNNING	1
 #define HID_OUT_RUNNING		2
 #define HID_IN_RUNNING		3
@@ -432,22 +434,26 @@ struct hid_input {
 	struct input_dev *input;
 };
 
+struct hid_driver;
+
 struct hid_device {							/* device report descriptor */
-	 __u8 *rdesc;
+	__u8 *rdesc;
 	unsigned rsize;
 	struct hid_collection *collection;				/* List of HID collections */
 	unsigned collection_size;					/* Number of allocated hid_collections */
 	unsigned maxcollection;						/* Number of parsed collections */
 	unsigned maxapplication;					/* Number of applications */
-	unsigned short bus;                                             /* BUS ID */
-	unsigned short vendor;                                          /* Vendor ID */
-	unsigned short product;                                         /* Product ID */
-	unsigned version;						/* HID version */
+	__u16 bus;							/* BUS ID */
+	__u32 vendor;							/* Vendor ID */
+	__u32 product;							/* Product ID */
+	__u32 version;							/* HID version */
 	unsigned country;						/* HID country */
 	struct hid_report_enum report_enum[HID_REPORT_TYPES];
 
-	struct device *dev;						/* device */
+	struct device dev;						/* device */
+	struct hid_driver *driver;
 
+	unsigned int status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
 	unsigned quirks;						/* Various quirks the device can pull on us */
 
@@ -483,6 +489,16 @@ struct hid_device {							/* device report descriptor */
 #endif
 };
 
+static inline void *hid_get_drvdata(struct hid_device *hdev)
+{
+	return dev_get_drvdata(&hdev->dev);
+}
+
+static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
+{
+	dev_set_drvdata(&hdev->dev, data);
+}
+
 #define HID_GLOBAL_STACK_SIZE 4
 #define HID_COLLECTION_STACK_SIZE 4
 
@@ -511,6 +527,61 @@ struct hid_descriptor {
 	struct hid_class_descriptor desc[1];
 } __attribute__ ((packed));
 
+#define HID_DEVICE(b, ven, prod) \
+	.bus = (b), \
+	.vendor = (ven), .product = (prod)
+
+#define HID_USB_DEVICE(ven, prod)	HID_DEVICE(BUS_USB, ven, prod)
+#define HID_BLUETOOTH_DEVICE(ven, prod)	HID_DEVICE(BUS_BLUETOOTH, ven, prod)
+
+#define HID_REPORT_ID(rep) \
+	.report_type = (rep)
+#define HID_USAGE_ID(uhid, utype, ucode) \
+	.usage_hid = (uhid), .usage_type = (utype), .usage_code = (ucode)
+/* we don't want to catch types and codes equal to 0 */
+#define HID_TERMINATOR		(HID_ANY_ID - 1)
+
+struct hid_report_id {
+	__u32 report_type;
+};
+struct hid_usage_id {
+	__u32 usage_hid;
+	__u32 usage_type;
+	__u32 usage_code;
+};
+
+/**
+ * struct hid_driver
+ * @name: driver name (e.g. "Footech_bar-wheel")
+ * @id_table: which devices is this driver for (must be non-NULL for probe
+ * 	      to be called)
+ * @probe: new device inserted
+ * @remove: device removed (NULL if not a hot-plug capable driver)
+ * @report_table: on which reports to call raw_event (NULL means all)
+ * @raw_event: if report in report_table, this hook is called (NULL means nop)
+ * @usage_table: on which events to call event (NULL means all)
+ * @event: if usage in usage_table, this hook is called (NULL means nop)
+ *
+ * raw_event and event should return 0 on no action performed, 1 when no
+ * further processing should be done and negative on error
+ */
+struct hid_driver {
+	char *name;
+	const struct hid_device_id *id_table;
+
+	int (*probe)(struct hid_device *dev, const struct hid_device_id *id);
+	void (*remove)(struct hid_device *dev);
+
+	const struct hid_report_id *report_table;
+	int (*raw_event)(struct hid_device *hdev, struct hid_report *report,
+			u8 *data, int size);
+	const struct hid_usage_id *usage_table;
+	int (*event)(struct hid_device *hdev, struct hid_field *field,
+			struct hid_usage *usage, __s32 value);
+/* private: */
+	struct device_driver driver;
+};
+
 /* Applications from HID Usage Tables 4/8/99 Version 1.1 */
 /* We ignore a few input applications that are not widely used */
 #define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || (a == 0x000d0002))
@@ -521,6 +592,17 @@ struct hid_descriptor {
 extern int hid_debug;
 #endif
 
+extern int hid_add_device(struct hid_device *);
+extern void hid_destroy_device(struct hid_device *);
+
+extern int __must_check __hid_register_driver(struct hid_driver *,
+		struct module *, const char *mod_name);
+static inline int __must_check hid_register_driver(struct hid_driver *driver)
+{
+	return __hid_register_driver(driver, THIS_MODULE, KBUILD_MODNAME);
+}
+extern void hid_unregister_driver(struct hid_driver *);
+
 extern void hidinput_hid_event(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
 extern void hidinput_report_event(struct hid_device *hid, struct hid_report *report);
 extern int hidinput_connect(struct hid_device *);
@@ -533,8 +615,14 @@ int hidinput_mapping_quirks(struct hid_usage *, struct input_dev *, unsigned lon
 int hidinput_event_quirks(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
 int hidinput_apple_event(struct hid_device *, struct input_dev *, struct hid_usage *, __s32);
 void hid_output_report(struct hid_report *report, __u8 *data);
-void hid_free_device(struct hid_device *device);
-struct hid_device *hid_parse_report(__u8 *start, unsigned size);
+struct hid_device *hid_allocate_device(void);
+int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
+
+void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+		int interrupt);
+
+extern int hid_generic_init(void);
+extern void hid_generic_exit(void);
 
 /* HID quirks API */
 u32 usbhid_lookup_quirk(const u16 idVendor, const u16 idProduct);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 96434d774c8..56a51f91591 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -578,7 +578,7 @@ static int hidp_session(void *arg)
 	if (session->hid) {
 		if (session->hid->claimed & HID_CLAIMED_INPUT)
 			hidinput_disconnect(session->hid);
-		hid_free_device(session->hid);
+		hid_destroy_device(session->hid);
 	}
 
 	/* Wakeup user-space polling for socket errors */
@@ -698,12 +698,13 @@ static void hidp_setup_quirks(struct hid_device *hid)
 			hid->quirks = hidp_blacklist[n].quirks;
 }
 
-static void hidp_setup_hid(struct hidp_session *session,
+static int hidp_setup_hid(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
 	struct hid_device *hid = session->hid;
 	struct hid_report *report;
 	bdaddr_t src, dst;
+	int ret;
 
 	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
 	baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
@@ -721,7 +722,7 @@ static void hidp_setup_hid(struct hidp_session *session,
 	strncpy(hid->phys, batostr(&src), 64);
 	strncpy(hid->uniq, batostr(&dst), 64);
 
-	hid->dev = hidp_get_device(session);
+	hid->dev.parent = hidp_get_device(session);
 
 	hid->hid_open  = hidp_open;
 	hid->hid_close = hidp_close;
@@ -738,6 +739,15 @@ static void hidp_setup_hid(struct hidp_session *session,
 
 	if (hidinput_connect(hid) == 0)
 		hid->claimed |= HID_CLAIMED_INPUT;
+
+	ret = hid_add_device(hid);
+	if (ret) {
+		if (hid->claimed & HID_CLAIMED_INPUT)
+			hidinput_disconnect(hid);
+		skb_queue_purge(&session->intr_transmit);
+	}
+
+	return ret;
 }
 
 int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
@@ -771,11 +781,19 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 			return -EFAULT;
 		}
 
-		session->hid = hid_parse_report(buf, req->rd_size);
+		session->hid = hid_allocate_device();
+		if (IS_ERR(session->hid)) {
+			kfree(buf);
+			kfree(session);
+			return PTR_ERR(session->hid);
+		}
+
+		err = hid_parse_report(session->hid, buf, req->rd_size);
 
 		kfree(buf);
 
-		if (!session->hid) {
+		if (err) {
+			hid_destroy_device(session->hid);
 			kfree(session);
 			return -EINVAL;
 		}
@@ -822,8 +840,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 			goto failed;
 	}
 
-	if (session->hid)
-		hidp_setup_hid(session, req);
+	if (session->hid) {
+		err = hidp_setup_hid(session, req);
+		if (err)
+			goto failed;
+	}
 
 	__hidp_link_session(session);
 
@@ -859,7 +880,7 @@ failed:
 	up_write(&hidp_session_sem);
 
 	if (session->hid)
-		hid_free_device(session->hid);
+		hid_destroy_device(session->hid);
 
 	input_free_device(session->input);
 	kfree(session);
@@ -950,18 +971,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci)
 	return err;
 }
 
+static const struct hid_device_id hidp_table[] = {
+	{ HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver hidp_driver = {
+	.name = "generic-bluetooth",
+	.id_table = hidp_table,
+};
+
 static int __init hidp_init(void)
 {
+	int ret;
+
 	l2cap_load();
 
 	BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
 
-	return hidp_init_sockets();
+	ret = hid_register_driver(&hidp_driver);
+	if (ret)
+		goto err;
+
+	ret = hidp_init_sockets();
+	if (ret)
+		goto err_drv;
+
+	return 0;
+err_drv:
+	hid_unregister_driver(&hidp_driver);
+err:
+	return ret;
 }
 
 static void __exit hidp_exit(void)
 {
 	hidp_cleanup_sockets();
+	hid_unregister_driver(&hidp_driver);
 }
 
 module_init(hidp_init);
-- 
cgit v1.2.3-70-g09d2


From c500c9714011edab021591340042787722db9cf0 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 16 May 2008 11:49:16 +0200
Subject: HID: hid, make parsing event driven

Next step for complete hid bus, this patch includes:
- call parser either from probe or from hid-core if there is no probe.
- add ll_driver structure and centralize some stuff there (open, close...)
- split and merge usb_hid_configure and hid_probe into several functions
  to allow hooks/fixes between them

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        |  24 ++-
 drivers/hid/hid-input.c       |  20 ++-
 drivers/hid/hidraw.c          |   6 +-
 drivers/hid/usbhid/hid-core.c | 330 ++++++++++++++++++++++++------------------
 include/linux/hid.h           | 104 ++++++++++++-
 net/bluetooth/hidp/core.c     | 191 +++++++++++++-----------
 net/bluetooth/hidp/hidp.h     |   2 +
 7 files changed, 438 insertions(+), 239 deletions(-)

(limited to 'net')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 017fc20167d..3dacbcd7e41 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -648,6 +648,9 @@ int hid_parse_report(struct hid_device *device, __u8 *start,
 		hid_parser_reserved
 	};
 
+	if (device->driver->report_fixup)
+		device->driver->report_fixup(device, start, size);
+
 	device->rdesc = kmalloc(size, GFP_KERNEL);
 	if (device->rdesc == NULL)
 		return -ENOMEM;
@@ -1152,15 +1155,20 @@ static int hid_device_probe(struct device *dev)
 	int ret = 0;
 
 	if (!hdev->driver) {
-		if (hdrv->probe) {
-			ret = -ENODEV;
+		id = hid_match_id(hdev, hdrv->id_table);
+		if (id == NULL)
+			return -ENODEV;
 
-			id = hid_match_id(hdev, hdrv->id_table);
-			if (id)
-				ret = hdrv->probe(hdev, id);
+		hdev->driver = hdrv;
+		if (hdrv->probe) {
+			ret = hdrv->probe(hdev, id);
+		} else { /* default probe */
+			ret = hid_parse(hdev);
+			if (!ret)
+				ret = hid_hw_start(hdev);
 		}
-		if (!ret)
-			hdev->driver = hdrv;
+		if (ret)
+			hdev->driver = NULL;
 	}
 	return ret;
 }
@@ -1173,6 +1181,8 @@ static int hid_device_remove(struct device *dev)
 	if (hdrv) {
 		if (hdrv->remove)
 			hdrv->remove(hdev);
+		else /* default remove */
+			hid_hw_stop(hdev);
 		hdev->driver = NULL;
 	}
 
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 4ae5603804e..9fa7239ab31 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -390,6 +390,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	if (ret)
 		goto mapped;
 
+	if (device->driver->input_mapping) {
+		int ret = device->driver->input_mapping(device, hidinput, field,
+				usage, &bit, &max);
+		if (ret > 0)
+			goto mapped;
+		if (ret < 0)
+			goto ignore;
+	}
+
 	switch (usage->hid & HID_USAGE_PAGE) {
 
 		case HID_UP_UNDEFINED:
@@ -755,6 +764,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	}
 
 mapped:
+	if (device->driver->input_mapped && device->driver->input_mapped(device,
+				hidinput, field, usage, &bit, &max) < 0)
+		goto ignore;
+
 	if (device->quirks & HID_QUIRK_MIGHTYMOUSE) {
 		if (usage->hid == HID_GD_Z)
 			map_rel(REL_HWHEEL);
@@ -961,14 +974,14 @@ static int hidinput_open(struct input_dev *dev)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 
-	return hid->hid_open(hid);
+	return hid->ll_driver->open(hid);
 }
 
 static void hidinput_close(struct input_dev *dev)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 
-	hid->hid_close(hid);
+	hid->ll_driver->close(hid);
 }
 
 /*
@@ -1019,7 +1032,8 @@ int hidinput_connect(struct hid_device *hid)
 				}
 
 				input_set_drvdata(input_dev, hid);
-				input_dev->event = hid->hidinput_input_event;
+				input_dev->event =
+					hid->ll_driver->hidinput_input_event;
 				input_dev->open = hidinput_open;
 				input_dev->close = hidinput_close;
 				input_dev->setkeycode = hidinput_setkeycode;
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index c40f0403eda..4be240e74d4 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -181,7 +181,7 @@ static int hidraw_open(struct inode *inode, struct file *file)
 
 	dev = hidraw_table[minor];
 	if (!dev->open++)
-		dev->hid->hid_open(dev->hid);
+		dev->hid->ll_driver->open(dev->hid);
 
 out_unlock:
 	spin_unlock(&minors_lock);
@@ -207,7 +207,7 @@ static int hidraw_release(struct inode * inode, struct file * file)
 	dev = hidraw_table[minor];
 	if (!dev->open--) {
 		if (list->hidraw->exist)
-			dev->hid->hid_close(dev->hid);
+			dev->hid->ll_driver->close(dev->hid);
 		else
 			kfree(list->hidraw);
 	}
@@ -367,7 +367,7 @@ void hidraw_disconnect(struct hid_device *hid)
 	device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor));
 
 	if (hidraw->open) {
-		hid->hid_close(hid);
+		hid->ll_driver->close(hid);
 		wake_up_interruptible(&hidraw->wait);
 	} else {
 		kfree(hidraw);
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 5955d05ae54..d2a3461909a 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -701,17 +701,84 @@ static void hid_fixup_sony_ps3_controller(struct usb_device *dev, int ifnum)
 	kfree(buf);
 }
 
-static struct hid_device *usb_hid_configure(struct usb_interface *intf)
+static int usbhid_start_finish(struct hid_device *hid)
 {
+	struct usb_interface *intf = to_usb_interface(hid->dev.parent);
+	char path[64], *type;
+	unsigned int i;
+
+	usbhid_init_reports(hid);
+	hid_dump_device(hid);
+	if (hid->quirks & HID_QUIRK_RESET_LEDS)
+		usbhid_set_leds(hid);
+
+	if (!hidinput_connect(hid))
+		hid->claimed |= HID_CLAIMED_INPUT;
+	if (!hiddev_connect(hid))
+		hid->claimed |= HID_CLAIMED_HIDDEV;
+	if (!hidraw_connect(hid))
+		hid->claimed |= HID_CLAIMED_HIDRAW;
+
+	if (!hid->claimed) {
+		printk(KERN_ERR "HID device claimed by neither input, hiddev "
+				"nor hidraw\n");
+		return -ENODEV;
+	}
+
+	if ((hid->claimed & HID_CLAIMED_INPUT))
+		hid_ff_init(hid);
+
+	if (hid->quirks & HID_QUIRK_SONY_PS3_CONTROLLER)
+		hid_fixup_sony_ps3_controller(interface_to_usbdev(intf),
+			intf->cur_altsetting->desc.bInterfaceNumber);
+
+	printk(KERN_INFO);
+
+	if (hid->claimed & HID_CLAIMED_INPUT)
+		printk("input");
+	if ((hid->claimed & HID_CLAIMED_INPUT) &&
+			((hid->claimed & HID_CLAIMED_HIDDEV) ||
+				hid->claimed & HID_CLAIMED_HIDRAW))
+		printk(",");
+	if (hid->claimed & HID_CLAIMED_HIDDEV)
+		printk("hiddev%d", hid->minor);
+	if ((hid->claimed & HID_CLAIMED_INPUT) &&
+			(hid->claimed & HID_CLAIMED_HIDDEV) &&
+			(hid->claimed & HID_CLAIMED_HIDRAW))
+		printk(",");
+	if (hid->claimed & HID_CLAIMED_HIDRAW)
+		printk("hidraw%d", ((struct hidraw *)hid->hidraw)->minor);
+
+	type = "Device";
+	for (i = 0; i < hid->maxcollection; i++) {
+		if (hid->collection[i].type == HID_COLLECTION_APPLICATION &&
+		    (hid->collection[i].usage & HID_USAGE_PAGE) ==
+						HID_UP_GENDESK &&
+		    (hid->collection[i].usage & 0xffff) <
+						ARRAY_SIZE(hid_types)) {
+			type = hid_types[hid->collection[i].usage & 0xffff];
+			break;
+		}
+	}
+
+	usb_make_path(interface_to_usbdev(intf), path, 63);
+
+	printk(": USB HID v%x.%02x %s [%s] on %s\n",
+		hid->version >> 8, hid->version & 0xff, type, hid->name, path);
+
+	return 0;
+}
+
+static int usbhid_parse(struct hid_device *hid)
+{
+	struct usb_interface *intf = to_usb_interface(hid->dev.parent);
 	struct usb_host_interface *interface = intf->cur_altsetting;
 	struct usb_device *dev = interface_to_usbdev (intf);
 	struct hid_descriptor *hdesc;
-	struct hid_device *hid;
 	u32 quirks = 0;
-	unsigned int insize = 0, rsize = 0;
+	unsigned int rsize = 0;
 	char *rdesc;
-	int n, len;
-	struct usbhid_device *usbhid;
+	int ret, n;
 
 	quirks = usbhid_lookup_quirk(le16_to_cpu(dev->descriptor.idVendor),
 			le16_to_cpu(dev->descriptor.idProduct));
@@ -725,40 +792,44 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 	}
 
 	if (quirks & HID_QUIRK_IGNORE)
-		return NULL;
+		return -ENODEV;
 
 	if ((quirks & HID_QUIRK_IGNORE_MOUSE) &&
 		(interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE))
-			return NULL;
-
+			return -ENODEV;
 
 	if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) &&
 	    (!interface->desc.bNumEndpoints ||
 	     usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) {
 		dbg_hid("class descriptor not present\n");
-		return NULL;
+		return -ENODEV;
 	}
 
+	hid->version = le16_to_cpu(hdesc->bcdHID);
+	hid->country = hdesc->bCountryCode;
+
 	for (n = 0; n < hdesc->bNumDescriptors; n++)
 		if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT)
 			rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength);
 
 	if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) {
 		dbg_hid("weird size of report descriptor (%u)\n", rsize);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if (!(rdesc = kmalloc(rsize, GFP_KERNEL))) {
 		dbg_hid("couldn't allocate rdesc memory\n");
-		return NULL;
+		return -ENOMEM;
 	}
 
 	hid_set_idle(dev, interface->desc.bInterfaceNumber, 0, 0);
 
-	if ((n = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber, HID_DT_REPORT, rdesc, rsize)) < 0) {
+	ret = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber,
+			HID_DT_REPORT, rdesc, rsize);
+	if (ret < 0) {
 		dbg_hid("reading report descriptor failed\n");
 		kfree(rdesc);
-		return NULL;
+		goto err;
 	}
 
 	usbhid_fixup_report_descriptor(le16_to_cpu(dev->descriptor.idVendor),
@@ -770,24 +841,36 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 		dbg_hid_line(" %02x", (unsigned char) rdesc[n]);
 	dbg_hid_line("\n");
 
-	hid = hid_allocate_device();
-	if (IS_ERR(hid)) {
-		kfree(rdesc);
-		return NULL;
-	}
-
-	if (hid_parse_report(hid, rdesc, n)) {
+	ret = hid_parse_report(hid, rdesc, rsize);
+	kfree(rdesc);
+	if (ret) {
 		dbg_hid("parsing report descriptor failed\n");
-		hid_destroy_device(hid);
-		kfree(rdesc);
-		return NULL;
+		goto err;
 	}
 
-	kfree(rdesc);
 	hid->quirks = quirks;
 
-	if (!(usbhid = kzalloc(sizeof(struct usbhid_device), GFP_KERNEL)))
-		goto fail_no_usbhid;
+	return 0;
+err:
+	return ret;
+}
+
+static int usbhid_start(struct hid_device *hid)
+{
+	struct usb_interface *intf = to_usb_interface(hid->dev.parent);
+	struct usb_host_interface *interface = intf->cur_altsetting;
+	struct usb_device *dev = interface_to_usbdev(intf);
+	struct usbhid_device *usbhid;
+	unsigned int n, insize = 0;
+	int ret;
+
+	WARN_ON(hid->driver_data);
+
+	usbhid = kzalloc(sizeof(struct usbhid_device), GFP_KERNEL);
+	if (usbhid == NULL) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	hid->driver_data = usbhid;
 	usbhid->hid = hid;
@@ -805,27 +888,12 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 	if (insize > HID_MAX_BUFFER_SIZE)
 		insize = HID_MAX_BUFFER_SIZE;
 
-	if (hid_alloc_buffers(dev, hid))
+	if (hid_alloc_buffers(dev, hid)) {
+		ret = -ENOMEM;
 		goto fail;
-
-	hid->name[0] = 0;
-
-	if (dev->manufacturer)
-		strlcpy(hid->name, dev->manufacturer, sizeof(hid->name));
-
-	if (dev->product) {
-		if (dev->manufacturer)
-			strlcat(hid->name, " ", sizeof(hid->name));
-		strlcat(hid->name, dev->product, sizeof(hid->name));
 	}
 
-	if (!strlen(hid->name))
-		snprintf(hid->name, sizeof(hid->name), "HID %04x:%04x",
-			 le16_to_cpu(dev->descriptor.idVendor),
-			 le16_to_cpu(dev->descriptor.idProduct));
-
 	for (n = 0; n < interface->desc.bNumEndpoints; n++) {
-
 		struct usb_endpoint_descriptor *endpoint;
 		int pipe;
 		int interval;
@@ -837,7 +905,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 		interval = endpoint->bInterval;
 
 		/* Some vendors give fullspeed interval on highspeed devides */
-		if (quirks & HID_QUIRK_FULLSPEED_INTERVAL  &&
+		if (hid->quirks & HID_QUIRK_FULLSPEED_INTERVAL &&
 		    dev->speed == USB_SPEED_HIGH) {
 			interval = fls(endpoint->bInterval*8);
 			printk(KERN_INFO "%s: Fixing fullspeed to highspeed interval: %d -> %d\n",
@@ -848,6 +916,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 		if (hid->collection->usage == HID_GD_MOUSE && hid_mousepoll_interval > 0)
 			interval = hid_mousepoll_interval;
 
+		ret = -ENOMEM;
 		if (usb_endpoint_dir_in(endpoint)) {
 			if (usbhid->urbin)
 				continue;
@@ -873,6 +942,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 
 	if (!usbhid->urbin) {
 		err_hid("couldn't find an input interrupt endpoint");
+		ret = -ENODEV;
 		goto fail;
 	}
 
@@ -884,44 +954,26 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 	spin_lock_init(&usbhid->outlock);
 	spin_lock_init(&usbhid->ctrllock);
 
-	hid->version = le16_to_cpu(hdesc->bcdHID);
-	hid->country = hdesc->bCountryCode;
-	hid->dev.parent = &intf->dev;
 	usbhid->intf = intf;
 	usbhid->ifnum = interface->desc.bInterfaceNumber;
 
-	hid->bus = BUS_USB;
-	hid->vendor = le16_to_cpu(dev->descriptor.idVendor);
-	hid->product = le16_to_cpu(dev->descriptor.idProduct);
-
-	usb_make_path(dev, hid->phys, sizeof(hid->phys));
-	strlcat(hid->phys, "/input", sizeof(hid->phys));
-	len = strlen(hid->phys);
-	if (len < sizeof(hid->phys) - 1)
-		snprintf(hid->phys + len, sizeof(hid->phys) - len,
-			 "%d", intf->altsetting[0].desc.bInterfaceNumber);
-
-	if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0)
-		hid->uniq[0] = 0;
-
 	usbhid->urbctrl = usb_alloc_urb(0, GFP_KERNEL);
-	if (!usbhid->urbctrl)
+	if (!usbhid->urbctrl) {
+		ret = -ENOMEM;
 		goto fail;
+	}
 
 	usb_fill_control_urb(usbhid->urbctrl, dev, 0, (void *) usbhid->cr,
 			     usbhid->ctrlbuf, 1, hid_ctrl, hid);
 	usbhid->urbctrl->setup_dma = usbhid->cr_dma;
 	usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma;
 	usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP);
-	hid->hidinput_input_event = usb_hidinput_input_event;
-	hid->hid_open = usbhid_open;
-	hid->hid_close = usbhid_close;
-#ifdef CONFIG_USB_HIDDEV
-	hid->hiddev_hid_event = hiddev_hid_event;
-	hid->hiddev_report_event = hiddev_report_event;
-#endif
-	hid->hid_output_raw_report = usbhid_output_raw_report;
-	return hid;
+
+	ret = usbhid_start_finish(hid);
+	if (ret)
+		goto fail;
+
+	return 0;
 
 fail:
 	usb_free_urb(usbhid->urbin);
@@ -929,24 +981,18 @@ fail:
 	usb_free_urb(usbhid->urbctrl);
 	hid_free_buffers(dev, hid);
 	kfree(usbhid);
-fail_no_usbhid:
-	hid_destroy_device(hid);
-
-	return NULL;
+err:
+	return ret;
 }
 
-static void hid_disconnect(struct usb_interface *intf)
+static void usbhid_stop(struct hid_device *hid)
 {
-	struct hid_device *hid = usb_get_intfdata (intf);
-	struct usbhid_device *usbhid;
+	struct usbhid_device *usbhid = hid->driver_data;
 
-	if (!hid)
+	if (WARN_ON(!usbhid))
 		return;
 
-	usbhid = hid->driver_data;
-
 	spin_lock_irq(&usbhid->inlock);	/* Sync with error handler */
-	usb_set_intfdata(intf, NULL);
 	set_bit(HID_DISCONNECTED, &usbhid->iofl);
 	spin_unlock_irq(&usbhid->inlock);
 	usb_kill_urb(usbhid->urbin);
@@ -963,93 +1009,99 @@ static void hid_disconnect(struct usb_interface *intf)
 	if (hid->claimed & HID_CLAIMED_HIDRAW)
 		hidraw_disconnect(hid);
 
+	hid->claimed = 0;
+
 	usb_free_urb(usbhid->urbin);
 	usb_free_urb(usbhid->urbctrl);
 	usb_free_urb(usbhid->urbout);
 
 	hid_free_buffers(hid_to_usb_dev(hid), hid);
 	kfree(usbhid);
-	hid_destroy_device(hid);
+	hid->driver_data = NULL;
 }
 
+static struct hid_ll_driver usb_hid_driver = {
+	.parse = usbhid_parse,
+	.start = usbhid_start,
+	.stop = usbhid_stop,
+	.open = usbhid_open,
+	.close = usbhid_close,
+	.hidinput_input_event = usb_hidinput_input_event,
+};
+
 static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
+	struct usb_device *dev = interface_to_usbdev(intf);
 	struct hid_device *hid;
-	char path[64];
-	int i, ret;
-	char *c;
+	size_t len;
+	int ret;
 
 	dbg_hid("HID probe called for ifnum %d\n",
 			intf->altsetting->desc.bInterfaceNumber);
 
-	if (!(hid = usb_hid_configure(intf)))
-		return -ENODEV;
-
-	usbhid_init_reports(hid);
-	hid_dump_device(hid);
-	if (hid->quirks & HID_QUIRK_RESET_LEDS)
-		usbhid_set_leds(hid);
-
-	if (!hidinput_connect(hid))
-		hid->claimed |= HID_CLAIMED_INPUT;
-	if (!hiddev_connect(hid))
-		hid->claimed |= HID_CLAIMED_HIDDEV;
-	if (!hidraw_connect(hid))
-		hid->claimed |= HID_CLAIMED_HIDRAW;
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return PTR_ERR(hid);
 
 	usb_set_intfdata(intf, hid);
+	hid->ll_driver = &usb_hid_driver;
+	hid->hid_output_raw_report = usbhid_output_raw_report;
+#ifdef CONFIG_USB_HIDDEV
+	hid->hiddev_hid_event = hiddev_hid_event;
+	hid->hiddev_report_event = hiddev_report_event;
+#endif
+	hid->dev.parent = &intf->dev;
+	hid->bus = BUS_USB;
+	hid->vendor = le16_to_cpu(dev->descriptor.idVendor);
+	hid->product = le16_to_cpu(dev->descriptor.idProduct);
+	hid->name[0] = 0;
 
-	if (!hid->claimed) {
-		printk ("HID device claimed by neither input, hiddev nor hidraw\n");
-		hid_disconnect(intf);
-		return -ENODEV;
-	}
-
-	if ((hid->claimed & HID_CLAIMED_INPUT))
-		hid_ff_init(hid);
-
-	if (hid->quirks & HID_QUIRK_SONY_PS3_CONTROLLER)
-		hid_fixup_sony_ps3_controller(interface_to_usbdev(intf),
-			intf->cur_altsetting->desc.bInterfaceNumber);
-
-	printk(KERN_INFO);
-
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		printk("input");
-	if ((hid->claimed & HID_CLAIMED_INPUT) && ((hid->claimed & HID_CLAIMED_HIDDEV) ||
-				hid->claimed & HID_CLAIMED_HIDRAW))
-		printk(",");
-	if (hid->claimed & HID_CLAIMED_HIDDEV)
-		printk("hiddev%d", hid->minor);
-	if ((hid->claimed & HID_CLAIMED_INPUT) && (hid->claimed & HID_CLAIMED_HIDDEV) &&
-			(hid->claimed & HID_CLAIMED_HIDRAW))
-		printk(",");
-	if (hid->claimed & HID_CLAIMED_HIDRAW)
-		printk("hidraw%d", ((struct hidraw*)hid->hidraw)->minor);
+	if (dev->manufacturer)
+		strlcpy(hid->name, dev->manufacturer, sizeof(hid->name));
 
-	c = "Device";
-	for (i = 0; i < hid->maxcollection; i++) {
-		if (hid->collection[i].type == HID_COLLECTION_APPLICATION &&
-		    (hid->collection[i].usage & HID_USAGE_PAGE) == HID_UP_GENDESK &&
-		    (hid->collection[i].usage & 0xffff) < ARRAY_SIZE(hid_types)) {
-			c = hid_types[hid->collection[i].usage & 0xffff];
-			break;
-		}
+	if (dev->product) {
+		if (dev->manufacturer)
+			strlcat(hid->name, " ", sizeof(hid->name));
+		strlcat(hid->name, dev->product, sizeof(hid->name));
 	}
 
-	usb_make_path(interface_to_usbdev(intf), path, 63);
+	if (!strlen(hid->name))
+		snprintf(hid->name, sizeof(hid->name), "HID %04x:%04x",
+			 le16_to_cpu(dev->descriptor.idVendor),
+			 le16_to_cpu(dev->descriptor.idProduct));
 
-	printk(": USB HID v%x.%02x %s [%s] on %s\n",
-		hid->version >> 8, hid->version & 0xff, c, hid->name, path);
+	usb_make_path(dev, hid->phys, sizeof(hid->phys));
+	strlcat(hid->phys, "/input", sizeof(hid->phys));
+	len = strlen(hid->phys);
+	if (len < sizeof(hid->phys) - 1)
+		snprintf(hid->phys + len, sizeof(hid->phys) - len,
+			 "%d", intf->altsetting[0].desc.bInterfaceNumber);
+
+	if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0)
+		hid->uniq[0] = 0;
 
 	ret = hid_add_device(hid);
 	if (ret) {
 		dev_err(&intf->dev, "can't add hid device: %d\n", ret);
-		hid_disconnect(intf);
+		goto err;
 	}
+
+	return 0;
+err:
+	hid_destroy_device(hid);
 	return ret;
 }
 
+static void hid_disconnect(struct usb_interface *intf)
+{
+	struct hid_device *hid = usb_get_intfdata(intf);
+
+	if (WARN_ON(!hid))
+		return;
+
+	hid_destroy_device(hid);
+}
+
 static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct hid_device *hid = usb_get_intfdata (intf);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index c4bea0eda85..ac2584fe65c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -419,6 +419,7 @@ struct hid_control_fifo {
 #define HID_CLAIMED_HIDRAW	4
 
 #define HID_STAT_ADDED		1
+#define HID_STAT_PARSED		2
 
 #define HID_CTRL_RUNNING	1
 #define HID_OUT_RUNNING		2
@@ -435,6 +436,7 @@ struct hid_input {
 };
 
 struct hid_driver;
+struct hid_ll_driver;
 
 struct hid_device {							/* device report descriptor */
 	__u8 *rdesc;
@@ -452,6 +454,7 @@ struct hid_device {							/* device report descriptor */
 
 	struct device dev;						/* device */
 	struct hid_driver *driver;
+	struct hid_ll_driver *ll_driver;
 
 	unsigned int status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
@@ -471,11 +474,6 @@ struct hid_device {							/* device report descriptor */
 
 	__s32 delayed_value;						/* For A4 Tech mice hwheel quirk */
 
-	/* device-specific function pointers */
-	int (*hidinput_input_event) (struct input_dev *, unsigned int, unsigned int, int);
-	int (*hid_open) (struct hid_device *);
-	void (*hid_close) (struct hid_device *);
-
 	/* hiddev event handler */
 	void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
 				  struct hid_usage *, __s32);
@@ -561,9 +559,22 @@ struct hid_usage_id {
  * @raw_event: if report in report_table, this hook is called (NULL means nop)
  * @usage_table: on which events to call event (NULL means all)
  * @event: if usage in usage_table, this hook is called (NULL means nop)
+ * @report_fixup: called before report descriptor parsing (NULL means nop)
+ * @input_mapping: invoked on input registering before mapping an usage
+ * @input_mapped: invoked on input registering after mapping an usage
  *
  * raw_event and event should return 0 on no action performed, 1 when no
  * further processing should be done and negative on error
+ *
+ * input_mapping shall return a negative value to completely ignore this usage
+ * (e.g. doubled or invalid usage), zero to continue with parsing of this
+ * usage by generic code (no special handling needed) or positive to skip
+ * generic parsing (needed special handling which was done in the hook already)
+ * input_mapped shall return negative to inform the layer that this usage
+ * should not be considered for further processing or zero to notify that
+ * no processing was performed and should be done in a generic manner
+ * Both these functions may be NULL which means the same behavior as returning
+ * zero from them.
  */
 struct hid_driver {
 	char *name;
@@ -578,10 +589,43 @@ struct hid_driver {
 	const struct hid_usage_id *usage_table;
 	int (*event)(struct hid_device *hdev, struct hid_field *field,
 			struct hid_usage *usage, __s32 value);
+
+	void (*report_fixup)(struct hid_device *hdev, __u8 *buf,
+			unsigned int size);
+
+	int (*input_mapping)(struct hid_device *hdev,
+			struct hid_input *hidinput, struct hid_field *field,
+			struct hid_usage *usage, unsigned long **bit, int *max);
+	int (*input_mapped)(struct hid_device *hdev,
+			struct hid_input *hidinput, struct hid_field *field,
+			struct hid_usage *usage, unsigned long **bit, int *max);
 /* private: */
 	struct device_driver driver;
 };
 
+/**
+ * hid_ll_driver - low level driver callbacks
+ * @start: called on probe to start the device
+ * @stop: called on remove
+ * @open: called by input layer on open
+ * @close: called by input layer on close
+ * @hidinput_input_event: event input event (e.g. ff or leds)
+ * @parse: this method is called only once to parse the device data,
+ *	   shouldn't allocate anything to not leak memory
+ */
+struct hid_ll_driver {
+	int (*start)(struct hid_device *hdev);
+	void (*stop)(struct hid_device *hdev);
+
+	int (*open)(struct hid_device *hdev);
+	void (*close)(struct hid_device *hdev);
+
+	int (*hidinput_input_event) (struct input_dev *idev, unsigned int type,
+			unsigned int code, int value);
+
+	int (*parse)(struct hid_device *hdev);
+};
+
 /* Applications from HID Usage Tables 4/8/99 Version 1.1 */
 /* We ignore a few input applications that are not widely used */
 #define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || (a == 0x000d0002))
@@ -618,6 +662,56 @@ void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 
+/**
+ * hid_parse - parse HW reports
+ *
+ * @hdev: hid device
+ *
+ * Call this from probe after you set up the device (if needed). Your
+ * report_fixup will be called (if non-NULL) after reading raw report from
+ * device before passing it to hid layer for real parsing.
+ */
+static inline int __must_check hid_parse(struct hid_device *hdev)
+{
+	int ret;
+
+	if (hdev->status & HID_STAT_PARSED)
+		return 0;
+
+	ret = hdev->ll_driver->parse(hdev);
+	if (!ret)
+		hdev->status |= HID_STAT_PARSED;
+
+	return ret;
+}
+
+/**
+ * hid_hw_start - start underlaying HW
+ *
+ * @hdev: hid device
+ *
+ * Call this in probe function *after* hid_parse. This will setup HW buffers
+ * and start the device (if not deffered to device open). hid_hw_stop must be
+ * called if this was successfull.
+ */
+static inline int __must_check hid_hw_start(struct hid_device *hdev)
+{
+	return hdev->ll_driver->start(hdev);
+}
+
+/**
+ * hid_hw_stop - stop underlaying HW
+ *
+ * @hdev: hid device
+ *
+ * This is usually called from remove function or from probe when something
+ * failed and hid_hw_start was called already.
+ */
+static inline void hid_hw_stop(struct hid_device *hdev)
+{
+	hdev->ll_driver->stop(hdev);
+}
+
 void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 		int interrupt);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 56a51f91591..d8029cfcd45 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session)
 static int hidp_setup_input(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
-	struct input_dev *input = session->input;
+	struct input_dev *input;
 	int i;
 
+	input = input_allocate_device();
+	if (!input)
+		return -ENOMEM;
+
+	session->input = input;
+
 	input_set_drvdata(input, session);
 
 	input->name = "Bluetooth HID Boot Protocol Device";
@@ -698,55 +704,117 @@ static void hidp_setup_quirks(struct hid_device *hid)
 			hid->quirks = hidp_blacklist[n].quirks;
 }
 
+static int hidp_parse(struct hid_device *hid)
+{
+	struct hidp_session *session = hid->driver_data;
+	struct hidp_connadd_req *req = session->req;
+	unsigned char *buf;
+	int ret;
+
+	buf = kmalloc(req->rd_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, req->rd_data, req->rd_size)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	ret = hid_parse_report(session->hid, buf, req->rd_size);
+
+	kfree(buf);
+
+	if (ret)
+		return ret;
+
+	session->req = NULL;
+
+	hidp_setup_quirks(hid);
+	return 0;
+}
+
+static int hidp_start(struct hid_device *hid)
+{
+	struct hidp_session *session = hid->driver_data;
+	struct hid_report *report;
+
+	list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
+			report_list, list)
+		hidp_send_report(session, report);
+
+	list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
+			report_list, list)
+		hidp_send_report(session, report);
+
+	if (hidinput_connect(hid) == 0)
+		hid->claimed |= HID_CLAIMED_INPUT;
+
+	return 0;
+}
+
+static void hidp_stop(struct hid_device *hid)
+{
+	struct hidp_session *session = hid->driver_data;
+
+	skb_queue_purge(&session->ctrl_transmit);
+	skb_queue_purge(&session->intr_transmit);
+
+	if (hid->claimed & HID_CLAIMED_INPUT)
+		hidinput_disconnect(hid);
+	hid->claimed = 0;
+}
+
+static struct hid_ll_driver hidp_hid_driver = {
+	.parse = hidp_parse,
+	.start = hidp_start,
+	.stop = hidp_stop,
+	.open  = hidp_open,
+	.close = hidp_close,
+	.hidinput_input_event = hidp_hidinput_event,
+};
+
 static int hidp_setup_hid(struct hidp_session *session,
 				struct hidp_connadd_req *req)
 {
-	struct hid_device *hid = session->hid;
-	struct hid_report *report;
+	struct hid_device *hid;
 	bdaddr_t src, dst;
 	int ret;
 
-	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
-	baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		ret = PTR_ERR(session->hid);
+		goto err;
+	}
 
+	session->hid = hid;
+	session->req = req;
 	hid->driver_data = session;
 
-	hid->country = req->country;
+	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
+	baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
 
 	hid->bus     = BUS_BLUETOOTH;
 	hid->vendor  = req->vendor;
 	hid->product = req->product;
 	hid->version = req->version;
+	hid->country = req->country;
 
 	strncpy(hid->name, req->name, 128);
 	strncpy(hid->phys, batostr(&src), 64);
 	strncpy(hid->uniq, batostr(&dst), 64);
 
 	hid->dev.parent = hidp_get_device(session);
-
-	hid->hid_open  = hidp_open;
-	hid->hid_close = hidp_close;
-
-	hid->hidinput_input_event = hidp_hidinput_event;
-
-	hidp_setup_quirks(hid);
-
-	list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list)
-		hidp_send_report(session, report);
-
-	list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list)
-		hidp_send_report(session, report);
-
-	if (hidinput_connect(hid) == 0)
-		hid->claimed |= HID_CLAIMED_INPUT;
+	hid->ll_driver = &hidp_hid_driver;
 
 	ret = hid_add_device(hid);
-	if (ret) {
-		if (hid->claimed & HID_CLAIMED_INPUT)
-			hidinput_disconnect(hid);
-		skb_queue_purge(&session->intr_transmit);
-	}
+	if (ret)
+		goto err_hid;
 
+	return 0;
+err_hid:
+	hid_destroy_device(hid);
+	session->hid = NULL;
+err:
 	return ret;
 }
 
@@ -767,46 +835,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 
 	BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
 
-	if (req->rd_size > 0) {
-		unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL);
-
-		if (!buf) {
-			kfree(session);
-			return -ENOMEM;
-		}
-
-		if (copy_from_user(buf, req->rd_data, req->rd_size)) {
-			kfree(buf);
-			kfree(session);
-			return -EFAULT;
-		}
-
-		session->hid = hid_allocate_device();
-		if (IS_ERR(session->hid)) {
-			kfree(buf);
-			kfree(session);
-			return PTR_ERR(session->hid);
-		}
-
-		err = hid_parse_report(session->hid, buf, req->rd_size);
-
-		kfree(buf);
-
-		if (err) {
-			hid_destroy_device(session->hid);
-			kfree(session);
-			return -EINVAL;
-		}
-	}
-
-	if (!session->hid) {
-		session->input = input_allocate_device();
-		if (!session->input) {
-			kfree(session);
-			return -ENOMEM;
-		}
-	}
-
 	down_write(&hidp_session_sem);
 
 	s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
@@ -834,16 +862,16 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 	session->flags   = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
 	session->idle_to = req->idle_to;
 
-	if (session->input) {
-		err = hidp_setup_input(session, req);
-		if (err < 0)
-			goto failed;
-	}
-
-	if (session->hid) {
+	if (req->rd_size > 0) {
 		err = hidp_setup_hid(session, req);
 		if (err)
-			goto failed;
+			goto err_skb;
+	}
+
+	if (!session->hid) {
+		err = hidp_setup_input(session, req);
+		if (err < 0)
+			goto err_skb;
 	}
 
 	__hidp_link_session(session);
@@ -871,16 +899,15 @@ unlink:
 
 	__hidp_unlink_session(session);
 
-	if (session->input) {
+	if (session->input)
 		input_unregister_device(session->input);
-		session->input = NULL; /* don't try to free it here */
-	}
-
-failed:
-	up_write(&hidp_session_sem);
-
 	if (session->hid)
 		hid_destroy_device(session->hid);
+err_skb:
+	skb_queue_purge(&session->ctrl_transmit);
+	skb_queue_purge(&session->intr_transmit);
+failed:
+	up_write(&hidp_session_sem);
 
 	input_free_device(session->input);
 	kfree(session);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 343fb0566b3..e503c89057a 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -151,6 +151,8 @@ struct hidp_session {
 
 	struct sk_buff_head ctrl_transmit;
 	struct sk_buff_head intr_transmit;
+
+	struct hidp_connadd_req *req;
 };
 
 static inline void hidp_schedule(struct hidp_session *session)
-- 
cgit v1.2.3-70-g09d2


From d458a9dfc4de24870b8c747484b1988726534bee Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 16 May 2008 11:49:20 +0200
Subject: HID: move ignore quirks

Move ignore quirks from usbhid-quirks into hid-core code. Also don't output
warning when ENODEV is error code in usbhid and try ordinal input in hidp
when that error is returned.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c          | 163 +++++++++++++++++++++++++++++++
 drivers/hid/hid-ids.h           |  66 +------------
 drivers/hid/usbhid/hid-core.c   |   6 +-
 drivers/hid/usbhid/hid-quirks.c | 210 ----------------------------------------
 include/linux/hid.h             |   1 -
 net/bluetooth/hidp/core.c       |   2 +-
 6 files changed, 168 insertions(+), 280 deletions(-)

(limited to 'net')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index c3ff7b13a4b..08470ab295b 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1237,6 +1237,164 @@ static struct bus_type hid_bus_type = {
 	.uevent		= hid_uevent,
 };
 
+static const struct hid_device_id hid_ignore_list[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_FLAIR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_302) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ADS_TECH, USB_DEVICE_ID_ADS_TECH_RADIO_SI470X) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_01) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_10) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_20) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_21) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_22) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_23) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_24) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AIRCABLE, USB_DEVICE_ID_AIRCABLE1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_USBRS232) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_LCM)},
+	{ HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM109) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_HIDCOM) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_ULTRAMOUSE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0001) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0002) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, 0x0004) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_4_PHIDGETSERVO_30) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_1_PHIDGETSERVO_30) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_0_4_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_16_16_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_8_8_8_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_7_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_8_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GLAB, USB_DEVICE_ID_PHIDGET_MOTORCONTROL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_SUPER_Q2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_GOGOPEN) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_PENPOWER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GRETAGMACBETH, USB_DEVICE_ID_GRETAGMACBETH_HUEY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_POWERMATE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_SOUNDKNOB) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_90) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_100) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_101) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_103) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_104) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_105) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_106) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_107) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_108) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_200) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_201) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_202) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_203) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_204) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_205) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_206) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_207) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_300) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_301) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_302) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_303) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_304) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_305) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_306) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_307) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_308) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_309) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_400) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_401) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_402) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_403) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_404) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_405) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_500) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_501) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_502) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_503) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_504) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1000) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1001) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1002) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1004) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1005) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1006) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1007) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_IMATION, USB_DEVICE_ID_DISC_STAKKA) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_KBGEAR, USB_DEVICE_ID_KBGEAR_JAMSTUDIO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_GPEN_560) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR, USB_DEVICE_ID_N_S_HARMONY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 20) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 30) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 100) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 108) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 118) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 200) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 300) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 400) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 500) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0001) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LABPRO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_GOTEMP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_CYCLOPS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LCSPEC) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WACOM, HID_ANY_ID) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_4_PHIDGETSERVO_20) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) },
+	{ }
+};
+
+static bool hid_ignore(struct hid_device *hdev)
+{
+	switch (hdev->vendor) {
+	case USB_VENDOR_ID_CODEMERCS:
+		/* ignore all Code Mercenaries IOWarrior devices */
+		if (hdev->product >= USB_DEVICE_ID_CODEMERCS_IOW_FIRST &&
+				hdev->product <= USB_DEVICE_ID_CODEMERCS_IOW_LAST)
+			return true;
+		break;
+	case USB_VENDOR_ID_LOGITECH:
+		if (hdev->product >= USB_DEVICE_ID_LOGITECH_HARMONY_FIRST &&
+				hdev->product <= USB_DEVICE_ID_LOGITECH_HARMONY_LAST)
+			return true;
+		break;
+	}
+
+	return !!hid_match_id(hdev, hid_ignore_list);
+}
+
 int hid_add_device(struct hid_device *hdev)
 {
 	static atomic_t id = ATOMIC_INIT(0);
@@ -1245,6 +1403,11 @@ int hid_add_device(struct hid_device *hdev)
 	if (WARN_ON(hdev->status & HID_STAT_ADDED))
 		return -EBUSY;
 
+	/* we need to kill them here, otherwise they will stay allocated to
+	 * wait for coming driver */
+	if (hid_ignore(hdev))
+		return -ENODEV;
+
 	/* XXX hack, any other cleaner solution < 20 bus_id bytes? */
 	sprintf(hdev->dev.bus_id, "%04X:%04X:%04X.%04X", hdev->bus,
 			hdev->vendor, hdev->product, atomic_inc_return(&id));
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index f4ef84ebe97..23d021bd95d 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -251,70 +251,8 @@
 #define USB_DEVICE_ID_LOGITECH_LX3	0xc044
 #define USB_DEVICE_ID_LOGITECH_V150	0xc047
 #define USB_DEVICE_ID_LOGITECH_RECEIVER	0xc101
-#define USB_DEVICE_ID_LOGITECH_HARMONY  0xc110
-#define USB_DEVICE_ID_LOGITECH_HARMONY_2 0xc111
-#define USB_DEVICE_ID_LOGITECH_HARMONY_3 0xc112
-#define USB_DEVICE_ID_LOGITECH_HARMONY_4 0xc113
-#define USB_DEVICE_ID_LOGITECH_HARMONY_5 0xc114
-#define USB_DEVICE_ID_LOGITECH_HARMONY_6 0xc115
-#define USB_DEVICE_ID_LOGITECH_HARMONY_7 0xc116
-#define USB_DEVICE_ID_LOGITECH_HARMONY_8 0xc117
-#define USB_DEVICE_ID_LOGITECH_HARMONY_9 0xc118
-#define USB_DEVICE_ID_LOGITECH_HARMONY_10 0xc119
-#define USB_DEVICE_ID_LOGITECH_HARMONY_11 0xc11a
-#define USB_DEVICE_ID_LOGITECH_HARMONY_12 0xc11b
-#define USB_DEVICE_ID_LOGITECH_HARMONY_13 0xc11c
-#define USB_DEVICE_ID_LOGITECH_HARMONY_14 0xc11d
-#define USB_DEVICE_ID_LOGITECH_HARMONY_15 0xc11e
-#define USB_DEVICE_ID_LOGITECH_HARMONY_16 0xc11f
-#define USB_DEVICE_ID_LOGITECH_HARMONY_17 0xc120
-#define USB_DEVICE_ID_LOGITECH_HARMONY_18 0xc121
-#define USB_DEVICE_ID_LOGITECH_HARMONY_19 0xc122
-#define USB_DEVICE_ID_LOGITECH_HARMONY_20 0xc123
-#define USB_DEVICE_ID_LOGITECH_HARMONY_21 0xc124
-#define USB_DEVICE_ID_LOGITECH_HARMONY_22 0xc125
-#define USB_DEVICE_ID_LOGITECH_HARMONY_23 0xc126
-#define USB_DEVICE_ID_LOGITECH_HARMONY_24 0xc127
-#define USB_DEVICE_ID_LOGITECH_HARMONY_25 0xc128
-#define USB_DEVICE_ID_LOGITECH_HARMONY_26 0xc129
-#define USB_DEVICE_ID_LOGITECH_HARMONY_27 0xc12a
-#define USB_DEVICE_ID_LOGITECH_HARMONY_28 0xc12b
-#define USB_DEVICE_ID_LOGITECH_HARMONY_29 0xc12c
-#define USB_DEVICE_ID_LOGITECH_HARMONY_30 0xc12d
-#define USB_DEVICE_ID_LOGITECH_HARMONY_31 0xc12e
-#define USB_DEVICE_ID_LOGITECH_HARMONY_32 0xc12f
-#define USB_DEVICE_ID_LOGITECH_HARMONY_33 0xc130
-#define USB_DEVICE_ID_LOGITECH_HARMONY_34 0xc131
-#define USB_DEVICE_ID_LOGITECH_HARMONY_35 0xc132
-#define USB_DEVICE_ID_LOGITECH_HARMONY_36 0xc133
-#define USB_DEVICE_ID_LOGITECH_HARMONY_37 0xc134
-#define USB_DEVICE_ID_LOGITECH_HARMONY_38 0xc135
-#define USB_DEVICE_ID_LOGITECH_HARMONY_39 0xc136
-#define USB_DEVICE_ID_LOGITECH_HARMONY_40 0xc137
-#define USB_DEVICE_ID_LOGITECH_HARMONY_41 0xc138
-#define USB_DEVICE_ID_LOGITECH_HARMONY_42 0xc139
-#define USB_DEVICE_ID_LOGITECH_HARMONY_43 0xc13a
-#define USB_DEVICE_ID_LOGITECH_HARMONY_44 0xc13b
-#define USB_DEVICE_ID_LOGITECH_HARMONY_45 0xc13c
-#define USB_DEVICE_ID_LOGITECH_HARMONY_46 0xc13d
-#define USB_DEVICE_ID_LOGITECH_HARMONY_47 0xc13e
-#define USB_DEVICE_ID_LOGITECH_HARMONY_48 0xc13f
-#define USB_DEVICE_ID_LOGITECH_HARMONY_49 0xc140
-#define USB_DEVICE_ID_LOGITECH_HARMONY_50 0xc141
-#define USB_DEVICE_ID_LOGITECH_HARMONY_51 0xc142
-#define USB_DEVICE_ID_LOGITECH_HARMONY_52 0xc143
-#define USB_DEVICE_ID_LOGITECH_HARMONY_53 0xc144
-#define USB_DEVICE_ID_LOGITECH_HARMONY_54 0xc145
-#define USB_DEVICE_ID_LOGITECH_HARMONY_55 0xc146
-#define USB_DEVICE_ID_LOGITECH_HARMONY_56 0xc147
-#define USB_DEVICE_ID_LOGITECH_HARMONY_57 0xc148
-#define USB_DEVICE_ID_LOGITECH_HARMONY_58 0xc149
-#define USB_DEVICE_ID_LOGITECH_HARMONY_59 0xc14a
-#define USB_DEVICE_ID_LOGITECH_HARMONY_60 0xc14b
-#define USB_DEVICE_ID_LOGITECH_HARMONY_61 0xc14c
-#define USB_DEVICE_ID_LOGITECH_HARMONY_62 0xc14d
-#define USB_DEVICE_ID_LOGITECH_HARMONY_63 0xc14e
-#define USB_DEVICE_ID_LOGITECH_HARMONY_64 0xc14f
+#define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
+#define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
 #define USB_DEVICE_ID_LOGITECH_EXTREME_3D	0xc215
 #define USB_DEVICE_ID_LOGITECH_WHEEL	0xc294
 #define USB_DEVICE_ID_LOGITECH_ELITE_KBD	0xc30a
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index d2a3461909a..9f5e100e95e 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -791,9 +791,6 @@ static int usbhid_parse(struct hid_device *hid)
 				quirks |= HID_QUIRK_NOGET;
 	}
 
-	if (quirks & HID_QUIRK_IGNORE)
-		return -ENODEV;
-
 	if ((quirks & HID_QUIRK_IGNORE_MOUSE) &&
 		(interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE))
 			return -ENODEV;
@@ -1082,7 +1079,8 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 
 	ret = hid_add_device(hid);
 	if (ret) {
-		dev_err(&intf->dev, "can't add hid device: %d\n", ret);
+		if (ret != -ENODEV)
+			dev_err(&intf->dev, "can't add hid device: %d\n", ret);
 		goto err;
 	}
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 48fdaa5db73..466d608ba6a 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -55,140 +55,6 @@ static const struct hid_blacklist {
 
 	{ USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193, HID_QUIRK_HWHEEL_WHEEL_INVERT },
 
-	{ USB_VENDOR_ID_ADS_TECH, USB_DEVICE_ID_ADS_TECH_RADIO_SI470X, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_01, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_10, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_21, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_22, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_23, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_24, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_AIRCABLE, USB_DEVICE_ID_AIRCABLE1, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_USBRS232, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_LCM, HID_QUIRK_IGNORE},
-	{ USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_CIDC, 0x0103, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM109, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_HIDCOM, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_ULTRAMOUSE, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GENERAL_TOUCH, 0x0001, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GENERAL_TOUCH, 0x0002, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GENERAL_TOUCH, 0x0003, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GENERAL_TOUCH, 0x0004, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_4_PHIDGETSERVO_30, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_1_PHIDGETSERVO_30, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_0_4_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_16_16_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_8_8_8_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_7_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_0_8_8_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GLAB, USB_DEVICE_ID_PHIDGET_MOTORCONTROL, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_SUPER_Q2, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_GOGOPEN, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GOTOP, USB_DEVICE_ID_PENPOWER, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GRETAGMACBETH, USB_DEVICE_ID_GRETAGMACBETH_HUEY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_POWERMATE, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GRIFFIN, USB_DEVICE_ID_SOUNDKNOB, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_90, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_100, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_101, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_103, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_104, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_105, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_106, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_107, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_108, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_200, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_201, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_202, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_203, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_204, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_205, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_206, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_207, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_300, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_301, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_302, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_303, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_304, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_305, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_306, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_307, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_308, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_309, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_400, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_401, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_402, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_403, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_404, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_405, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_500, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_501, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_502, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_503, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_504, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1000, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1001, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1002, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1003, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1004, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1005, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1006, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_GTCO, USB_DEVICE_ID_GTCO_1007, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_IMATION, USB_DEVICE_ID_DISC_STAKKA, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_KBGEAR, USB_DEVICE_ID_KBGEAR_JAMSTUDIO, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY1, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS1, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 30, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 100, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 108, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 118, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 200, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 300, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 400, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ONTRAK, USB_DEVICE_ID_ONTRAK_ADU100 + 500, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_PANJIT, 0x0001, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_PANJIT, 0x0002, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_PANJIT, 0x0003, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_PANJIT, 0x0004, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LABPRO, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_GOTEMP, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_CYCLOPS, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LCSPEC, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_4_PHIDGETSERVO_20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K, HID_QUIRK_IGNORE },
-
-	{ USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_FLAIR, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_ACECAD_302, HID_QUIRK_IGNORE },
-
-	{ USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT2, HID_QUIRK_IGNORE },
 
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K, HID_QUIRK_MICROSOFT_KEYS },
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K, HID_QUIRK_MICROSOFT_KEYS },
@@ -246,72 +112,6 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
 
 	{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_W7658, HID_QUIRK_RESET_LEDS },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_2, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_3, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_4, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_5, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_6, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_7, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_8, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_9, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_10, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_11, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_12, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_13, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_14, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_15, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_16, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_17, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_18, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_19, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_20, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_21, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_22, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_23, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_24, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_25, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_26, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_27, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_28, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_29, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_30, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_31, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_32, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_33, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_34, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_35, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_36, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_37, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_38, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_39, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_40, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_41, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_42, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_43, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_44, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_45, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_46, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_47, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_48, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_49, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_50, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_51, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_52, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_53, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_54, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_55, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_56, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_57, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_58, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_59, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_60, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_61, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_62, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_63, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_64, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR, USB_DEVICE_ID_N_S_HARMONY, HID_QUIRK_IGNORE },
-	{ USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_GPEN_560, HID_QUIRK_IGNORE },
 
 	{ 0, 0 }
 };
@@ -552,16 +352,6 @@ u32 usbhid_lookup_quirk(const u16 idVendor, const u16 idProduct)
 	u32 quirks = 0;
 	const struct hid_blacklist *bl_entry = NULL;
 
-	/* Ignore all Wacom devices */
-	if (idVendor == USB_VENDOR_ID_WACOM)
-		return HID_QUIRK_IGNORE;
-
-	/* ignore all Code Mercenaries IOWarrior devices */
-	if (idVendor == USB_VENDOR_ID_CODEMERCS)
-		if (idProduct >= USB_DEVICE_ID_CODEMERCS_IOW_FIRST &&
-				idProduct <= USB_DEVICE_ID_CODEMERCS_IOW_LAST)
-			return HID_QUIRK_IGNORE;
-
 	/* NCR devices must not be queried for reports */
 	if (idVendor == USB_VENDOR_ID_NCR &&
 			idProduct >= USB_DEVICE_ID_NCR_FIRST &&
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e9c4154ba33..0644fd33b98 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -257,7 +257,6 @@ struct hid_item {
 
 #define HID_QUIRK_INVERT			0x00000001
 #define HID_QUIRK_NOTOUCH			0x00000002
-#define HID_QUIRK_IGNORE			0x00000004
 #define HID_QUIRK_NOGET				0x00000008
 #define HID_QUIRK_HIDDEV			0x00000010
 #define HID_QUIRK_BADPAD			0x00000020
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index d8029cfcd45..4ae3207e8a9 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -864,7 +864,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
 
 	if (req->rd_size > 0) {
 		err = hidp_setup_hid(session, req);
-		if (err)
+		if (err && err != -ENODEV)
 			goto err_skb;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 8c19a51591d06f5226499972567f528cf6066bb7 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 18 Jun 2008 23:36:49 +0200
Subject: HID: move apple quirks

Move them from the core code to a separate driver.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig             |  14 ++
 drivers/hid/Makefile            |   1 +
 drivers/hid/hid-apple.c         | 479 ++++++++++++++++++++++++++++++++++++++++
 drivers/hid/hid-core.c          |  32 +++
 drivers/hid/hid-input-quirks.c  |   8 -
 drivers/hid/hid-input.c         | 221 +-----------------
 drivers/hid/usbhid/Kconfig      |  11 -
 drivers/hid/usbhid/hid-core.c   |   4 -
 drivers/hid/usbhid/hid-quirks.c |  49 ----
 drivers/hid/usbhid/usbmouse.c   |   5 +
 include/linux/hid.h             |  13 --
 net/bluetooth/hidp/core.c       |  22 --
 12 files changed, 532 insertions(+), 327 deletions(-)
 create mode 100644 drivers/hid/hid-apple.c

(limited to 'net')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 066e8c08c26..41283fff514 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -70,6 +70,20 @@ source "drivers/hid/usbhid/Kconfig"
 menu "Special HID drivers"
 	depends on HID
 
+config HID_APPLE
+	tristate "Apple"
+	default m
+	depends on (USB_HID || BT_HIDP)
+	---help---
+	Support for some Apple devices which less or more break
+	HID specification.
+
+	Say Y here if you want support for the special keys (Fn, Numlock) on
+	Apple iBooks, PowerBooks, MacBooks, MacBook Pros and aluminum USB
+	keyboards.
+
+	If unsure, say M.
+
 config HID_LOGITECH
 	tristate "Logitech"
 	default m
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index cae036bfe83..4a14821ceef 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_HID)		+= hid.o
 hid-$(CONFIG_HID_DEBUG)		+= hid-debug.o
 hid-$(CONFIG_HIDRAW)		+= hidraw.o
 
+obj-$(CONFIG_HID_APPLE)		+= hid-apple.o
 obj-$(CONFIG_HID_LOGITECH)	+= hid-logitech.o
 
 obj-$(CONFIG_USB_HID)		+= usbhid/
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
new file mode 100644
index 00000000000..5642e2c685f
--- /dev/null
+++ b/drivers/hid/hid-apple.c
@@ -0,0 +1,479 @@
+/*
+ *  USB HID quirks support for Linux
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2006-2007 Jiri Kosina
+ *  Copyright (c) 2007 Paul Walmsley
+ *  Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+
+#include "hid-ids.h"
+
+#define APPLE_RDESC_JIS		0x0001
+#define APPLE_IGNORE_MOUSE	0x0002
+#define APPLE_HAS_FN		0x0004
+#define APPLE_HIDDEV		0x0008
+#define APPLE_ISO_KEYBOARD	0x0010
+#define APPLE_MIGHTYMOUSE	0x0020
+#define APPLE_INVERT_HWHEEL	0x0040
+#define APPLE_IGNORE_HIDINPUT	0x0080
+#define APPLE_NUMLOCK_EMULATION	0x0100
+
+#define APPLE_FLAG_FKEY		0x01
+
+static unsigned int fnmode = 1;
+module_param(fnmode, uint, 0644);
+MODULE_PARM_DESC(fnmode, "Mode of fn key on Apple keyboards (0 = disabled, "
+		"[1] = fkeyslast, 2 = fkeysfirst)");
+
+struct apple_sc {
+	unsigned long quirks;
+	unsigned int fn_on;
+	DECLARE_BITMAP(pressed_fn, KEY_CNT);
+	DECLARE_BITMAP(pressed_numlock, KEY_CNT);
+};
+
+struct apple_key_translation {
+	u16 from;
+	u16 to;
+	u8 flags;
+};
+
+static struct apple_key_translation apple_fn_keys[] = {
+	{ KEY_BACKSPACE, KEY_DELETE },
+	{ KEY_F1,	KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
+	{ KEY_F2,	KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
+	{ KEY_F3,	KEY_FN_F5,          APPLE_FLAG_FKEY }, /* Exposé */
+	{ KEY_F4,	KEY_FN_F4,          APPLE_FLAG_FKEY }, /* Dashboard */
+	{ KEY_F5,	KEY_KBDILLUMDOWN,   APPLE_FLAG_FKEY },
+	{ KEY_F6,	KEY_KBDILLUMUP,     APPLE_FLAG_FKEY },
+	{ KEY_F7,	KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
+	{ KEY_F8,	KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
+	{ KEY_F9,	KEY_NEXTSONG,       APPLE_FLAG_FKEY },
+	{ KEY_F10,	KEY_MUTE,           APPLE_FLAG_FKEY },
+	{ KEY_F11,	KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
+	{ KEY_F12,	KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
+	{ KEY_UP,	KEY_PAGEUP },
+	{ KEY_DOWN,	KEY_PAGEDOWN },
+	{ KEY_LEFT,	KEY_HOME },
+	{ KEY_RIGHT,	KEY_END },
+	{ }
+};
+
+static struct apple_key_translation powerbook_fn_keys[] = {
+	{ KEY_BACKSPACE, KEY_DELETE },
+	{ KEY_F1,	KEY_BRIGHTNESSDOWN,     APPLE_FLAG_FKEY },
+	{ KEY_F2,	KEY_BRIGHTNESSUP,       APPLE_FLAG_FKEY },
+	{ KEY_F3,	KEY_MUTE,               APPLE_FLAG_FKEY },
+	{ KEY_F4,	KEY_VOLUMEDOWN,         APPLE_FLAG_FKEY },
+	{ KEY_F5,	KEY_VOLUMEUP,           APPLE_FLAG_FKEY },
+	{ KEY_F6,	KEY_NUMLOCK,            APPLE_FLAG_FKEY },
+	{ KEY_F7,	KEY_SWITCHVIDEOMODE,    APPLE_FLAG_FKEY },
+	{ KEY_F8,	KEY_KBDILLUMTOGGLE,     APPLE_FLAG_FKEY },
+	{ KEY_F9,	KEY_KBDILLUMDOWN,       APPLE_FLAG_FKEY },
+	{ KEY_F10,	KEY_KBDILLUMUP,         APPLE_FLAG_FKEY },
+	{ KEY_UP,	KEY_PAGEUP },
+	{ KEY_DOWN,	KEY_PAGEDOWN },
+	{ KEY_LEFT,	KEY_HOME },
+	{ KEY_RIGHT,	KEY_END },
+	{ }
+};
+
+static struct apple_key_translation powerbook_numlock_keys[] = {
+	{ KEY_J,	KEY_KP1 },
+	{ KEY_K,	KEY_KP2 },
+	{ KEY_L,	KEY_KP3 },
+	{ KEY_U,	KEY_KP4 },
+	{ KEY_I,	KEY_KP5 },
+	{ KEY_O,	KEY_KP6 },
+	{ KEY_7,	KEY_KP7 },
+	{ KEY_8,	KEY_KP8 },
+	{ KEY_9,	KEY_KP9 },
+	{ KEY_M,	KEY_KP0 },
+	{ KEY_DOT,	KEY_KPDOT },
+	{ KEY_SLASH,	KEY_KPPLUS },
+	{ KEY_SEMICOLON, KEY_KPMINUS },
+	{ KEY_P,	KEY_KPASTERISK },
+	{ KEY_MINUS,	KEY_KPEQUAL },
+	{ KEY_0,	KEY_KPSLASH },
+	{ KEY_F6,	KEY_NUMLOCK },
+	{ KEY_KPENTER,	KEY_KPENTER },
+	{ KEY_BACKSPACE, KEY_BACKSPACE },
+	{ }
+};
+
+static struct apple_key_translation apple_iso_keyboard[] = {
+	{ KEY_GRAVE,	KEY_102ND },
+	{ KEY_102ND,	KEY_GRAVE },
+	{ }
+};
+
+static struct apple_key_translation *apple_find_translation(
+		struct apple_key_translation *table, u16 from)
+{
+	struct apple_key_translation *trans;
+
+	/* Look for the translation */
+	for (trans = table; trans->from; trans++)
+		if (trans->from == from)
+			return trans;
+
+	return NULL;
+}
+
+static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
+		struct hid_usage *usage, __s32 value)
+{
+	struct apple_sc *asc = hid_get_drvdata(hid);
+	struct apple_key_translation *trans;
+
+	if (usage->code == KEY_FN) {
+		asc->fn_on = !!value;
+		input_event(input, usage->type, usage->code, value);
+		return 1;
+	}
+
+	if (fnmode) {
+		int do_translate;
+
+		trans = apple_find_translation((hid->product < 0x220 ||
+					hid->product >= 0x300) ?
+					powerbook_fn_keys : apple_fn_keys,
+					usage->code);
+		if (trans) {
+			if (test_bit(usage->code, asc->pressed_fn))
+				do_translate = 1;
+			else if (trans->flags & APPLE_FLAG_FKEY)
+				do_translate = (fnmode == 2 && asc->fn_on) ||
+					(fnmode == 1 && !asc->fn_on);
+			else
+				do_translate = asc->fn_on;
+
+			if (do_translate) {
+				if (value)
+					set_bit(usage->code, asc->pressed_fn);
+				else
+					clear_bit(usage->code, asc->pressed_fn);
+
+				input_event(input, usage->type, trans->to,
+						value);
+
+				return 1;
+			}
+		}
+
+		if (asc->quirks & APPLE_NUMLOCK_EMULATION &&
+				(test_bit(usage->code, asc->pressed_numlock) ||
+				test_bit(LED_NUML, input->led))) {
+			trans = apple_find_translation(powerbook_numlock_keys,
+					usage->code);
+
+			if (trans) {
+				if (value)
+					set_bit(usage->code,
+							asc->pressed_numlock);
+				else
+					clear_bit(usage->code,
+							asc->pressed_numlock);
+
+				input_event(input, usage->type, trans->to,
+						value);
+			}
+
+			return 1;
+		}
+	}
+
+	if (asc->quirks & APPLE_ISO_KEYBOARD) {
+		trans = apple_find_translation(apple_iso_keyboard, usage->code);
+		if (trans) {
+			input_event(input, usage->type, trans->to, value);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int apple_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct apple_sc *asc = hid_get_drvdata(hdev);
+
+	if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput ||
+			!usage->type)
+		return 0;
+
+	if ((asc->quirks & APPLE_INVERT_HWHEEL) &&
+			usage->code == REL_HWHEEL) {
+		input_event(field->hidinput->input, usage->type, usage->code,
+				-value);
+		return 1;
+	}
+
+	if ((asc->quirks & APPLE_HAS_FN) &&
+			hidinput_apple_event(hdev, field->hidinput->input,
+				usage, value))
+		return 1;
+
+
+	return 0;
+}
+
+/*
+ * MacBook JIS keyboard has wrong logical maximum
+ */
+static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int rsize)
+{
+	struct apple_sc *asc = hid_get_drvdata(hdev);
+
+	if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 &&
+			rdesc[53] == 0x65 && rdesc[59] == 0x65) {
+		dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report "
+				"descriptor\n");
+		rdesc[53] = rdesc[59] = 0xe7;
+	}
+}
+
+static void apple_setup_input(struct input_dev *input)
+{
+	struct apple_key_translation *trans;
+
+	set_bit(KEY_NUMLOCK, input->keybit);
+
+	/* Enable all needed keys */
+	for (trans = apple_fn_keys; trans->from; trans++)
+		set_bit(trans->to, input->keybit);
+
+	for (trans = powerbook_fn_keys; trans->from; trans++)
+		set_bit(trans->to, input->keybit);
+
+	for (trans = powerbook_numlock_keys; trans->from; trans++)
+		set_bit(trans->to, input->keybit);
+
+	for (trans = apple_iso_keyboard; trans->from; trans++)
+		set_bit(trans->to, input->keybit);
+}
+
+static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	if (usage->hid == (HID_UP_CUSTOM | 0x0003)) {
+		/* The fn key on Apple USB keyboards */
+		set_bit(EV_REP, hi->input->evbit);
+		hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
+		apple_setup_input(hi->input);
+		return 1;
+	}
+
+	/* we want the hid layer to go through standard path (set and ignore) */
+	return 0;
+}
+
+static int apple_input_mapped(struct hid_device *hdev, struct hid_input *hi,
+		struct hid_field *field, struct hid_usage *usage,
+		unsigned long **bit, int *max)
+{
+	struct apple_sc *asc = hid_get_drvdata(hdev);
+
+	if (asc->quirks & APPLE_MIGHTYMOUSE) {
+		if (usage->hid == HID_GD_Z)
+			hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
+		else if (usage->code == BTN_1)
+			hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_2);
+		else if (usage->code == BTN_2)
+			hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_1);
+	}
+
+	return 0;
+}
+
+static int apple_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	unsigned long quirks = id->driver_data;
+	struct apple_sc *asc;
+	int ret;
+
+	/* return something else or move to hid layer? device will reside
+	   allocated */
+	if (id->bus == BUS_USB && (quirks & APPLE_IGNORE_MOUSE) &&
+			to_usb_interface(hdev->dev.parent)->cur_altsetting->
+			desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE)
+		return -ENODEV;
+
+	asc = kzalloc(sizeof(*asc), GFP_KERNEL);
+	if (asc == NULL) {
+		dev_err(&hdev->dev, "can't alloc apple descriptor\n");
+		return -ENOMEM;
+	}
+
+	asc->quirks = quirks;
+
+	hid_set_drvdata(hdev, asc);
+
+	if (quirks & APPLE_HIDDEV)
+		hdev->quirks |= HID_QUIRK_HIDDEV;
+	if (quirks & APPLE_IGNORE_HIDINPUT)
+		hdev->quirks |= HID_QUIRK_IGNORE_HIDINPUT;
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		dev_err(&hdev->dev, "parse failed\n");
+		goto err_free;
+	}
+
+	ret = hid_hw_start(hdev);
+	if (ret) {
+		dev_err(&hdev->dev, "hw start failed\n");
+		goto err_free;
+	}
+
+	return 0;
+err_free:
+	kfree(asc);
+	return ret;
+}
+
+static void apple_remove(struct hid_device *hdev)
+{
+	hid_hw_stop(hdev);
+	kfree(hid_get_drvdata(hdev));
+}
+
+static const struct hid_device_id apple_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4),
+		.driver_data = APPLE_HIDDEV | APPLE_IGNORE_HIDINPUT },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE),
+		.driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },
+
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE | APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE | APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE | APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE | APPLE_RDESC_JIS},
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI),
+		.driver_data = APPLE_HAS_FN },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS),
+		.driver_data = APPLE_HAS_FN },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
+		.driver_data = APPLE_HAS_FN | APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS),
+		.driver_data = APPLE_HAS_FN | APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI),
+		.driver_data = APPLE_HAS_FN | APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS),
+		.driver_data = APPLE_HAS_FN | APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY),
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
+			APPLE_IGNORE_MOUSE },
+
+	/* Apple wireless Mighty Mouse */
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c),
+		.driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },
+
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, apple_devices);
+
+static struct hid_driver apple_driver = {
+	.name = "apple",
+	.id_table = apple_devices,
+	.report_fixup = apple_report_fixup,
+	.probe = apple_probe,
+	.remove = apple_remove,
+	.event = apple_event,
+	.input_mapping = apple_input_mapping,
+	.input_mapped = apple_input_mapped,
+};
+
+static int apple_init(void)
+{
+	int ret;
+
+	ret = hid_register_driver(&apple_driver);
+	if (ret)
+		printk(KERN_ERR "can't register apple driver\n");
+
+	return ret;
+}
+
+static void apple_exit(void)
+{
+	hid_unregister_driver(&apple_driver);
+}
+
+module_init(apple_init);
+module_exit(apple_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 08470ab295b..8e3c264c9b2 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1130,6 +1130,36 @@ static const struct hid_device_id *hid_match_id(struct hid_device *hdev,
 }
 
 static const struct hid_device_id hid_blacklist[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
@@ -1144,6 +1174,8 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_V150) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL) },
+
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c) },
 	{ }
 };
 
diff --git a/drivers/hid/hid-input-quirks.c b/drivers/hid/hid-input-quirks.c
index 10451ca3a78..878e193c6d7 100644
--- a/drivers/hid/hid-input-quirks.c
+++ b/drivers/hid/hid-input-quirks.c
@@ -331,19 +331,11 @@ int hidinput_event_quirks(struct hid_device *hid, struct hid_field *field, struc
 		return 1;
 	}
 
-	if ((hid->quirks & HID_QUIRK_INVERT_HWHEEL) && (usage->code == REL_HWHEEL)) {
-		input_event(input, usage->type, usage->code, -value);
-		return 1;
-	}
-
 	if ((hid->quirks & HID_QUIRK_2WHEEL_MOUSE_HACK_ON) && (usage->code == REL_WHEEL)) {
 		input_event(input, usage->type, REL_HWHEEL, value);
 		return 1;
 	}
 
-	if ((hid->quirks & HID_QUIRK_APPLE_HAS_FN) && hidinput_apple_event(hid, input, usage, value))
-		return 1;
-
 	/* Handling MS keyboards special buttons */
 	if (hid->quirks & HID_QUIRK_MICROSOFT_KEYS && 
 			usage->hid == (HID_UP_MSVENDOR | 0xff05)) {
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 4f2bac010f5..76ddf23f196 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -32,11 +32,6 @@
 #include <linux/hid.h>
 #include <linux/hid-debug.h>
 
-static int hid_apple_fnmode = 1;
-module_param_named(pb_fnmode, hid_apple_fnmode, int, 0644);
-MODULE_PARM_DESC(pb_fnmode,
-		"Mode of fn key on Apple keyboards (0 = disabled, 1 = fkeyslast, 2 = fkeysfirst)");
-
 #define unk	KEY_UNKNOWN
 
 static const unsigned char hid_keyboard[256] = {
@@ -73,202 +68,6 @@ static const struct {
 #define map_key_clear(c)	hid_map_usage_clear(hidinput, usage, &bit, \
 		&max, EV_KEY, (c))
 
-#ifdef CONFIG_USB_HIDINPUT_POWERBOOK
-
-struct hidinput_key_translation {
-	u16 from;
-	u16 to;
-	u8 flags;
-};
-
-#define APPLE_FLAG_FKEY 0x01
-
-static struct hidinput_key_translation apple_fn_keys[] = {
-	{ KEY_BACKSPACE, KEY_DELETE },
-	{ KEY_F1,       KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
-	{ KEY_F2,       KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
-	{ KEY_F3,       KEY_FN_F5,          APPLE_FLAG_FKEY }, /* Expos� */
-	{ KEY_F4,       KEY_FN_F4,          APPLE_FLAG_FKEY }, /* Dashboard */
-	{ KEY_F5,       KEY_KBDILLUMDOWN,   APPLE_FLAG_FKEY },
-	{ KEY_F6,       KEY_KBDILLUMUP,     APPLE_FLAG_FKEY },
-	{ KEY_F7,       KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
-	{ KEY_F8,       KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
-	{ KEY_F9,       KEY_NEXTSONG,       APPLE_FLAG_FKEY },
-	{ KEY_F10,      KEY_MUTE,           APPLE_FLAG_FKEY },
-	{ KEY_F11,      KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
-	{ KEY_F12,      KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
-	{ KEY_UP,       KEY_PAGEUP },
-	{ KEY_DOWN,     KEY_PAGEDOWN },
-	{ KEY_LEFT,     KEY_HOME },
-	{ KEY_RIGHT,    KEY_END },
-	{ }
-};
-
-static struct hidinput_key_translation powerbook_fn_keys[] = {
-	{ KEY_BACKSPACE, KEY_DELETE },
-	{ KEY_F1,       KEY_BRIGHTNESSDOWN,     APPLE_FLAG_FKEY },
-	{ KEY_F2,       KEY_BRIGHTNESSUP,       APPLE_FLAG_FKEY },
-	{ KEY_F3,       KEY_MUTE,               APPLE_FLAG_FKEY },
-	{ KEY_F4,       KEY_VOLUMEDOWN,         APPLE_FLAG_FKEY },
-	{ KEY_F5,       KEY_VOLUMEUP,           APPLE_FLAG_FKEY },
-	{ KEY_F6,       KEY_NUMLOCK,            APPLE_FLAG_FKEY },
-	{ KEY_F7,       KEY_SWITCHVIDEOMODE,    APPLE_FLAG_FKEY },
-	{ KEY_F8,       KEY_KBDILLUMTOGGLE,     APPLE_FLAG_FKEY },
-	{ KEY_F9,       KEY_KBDILLUMDOWN,       APPLE_FLAG_FKEY },
-	{ KEY_F10,      KEY_KBDILLUMUP,         APPLE_FLAG_FKEY },
-	{ KEY_UP,       KEY_PAGEUP },
-	{ KEY_DOWN,     KEY_PAGEDOWN },
-	{ KEY_LEFT,     KEY_HOME },
-	{ KEY_RIGHT,    KEY_END },
-	{ }
-};
-
-static struct hidinput_key_translation powerbook_numlock_keys[] = {
-	{ KEY_J,        KEY_KP1 },
-	{ KEY_K,        KEY_KP2 },
-	{ KEY_L,        KEY_KP3 },
-	{ KEY_U,        KEY_KP4 },
-	{ KEY_I,        KEY_KP5 },
-	{ KEY_O,        KEY_KP6 },
-	{ KEY_7,        KEY_KP7 },
-	{ KEY_8,        KEY_KP8 },
-	{ KEY_9,        KEY_KP9 },
-	{ KEY_M,        KEY_KP0 },
-	{ KEY_DOT,      KEY_KPDOT },
-	{ KEY_SLASH,    KEY_KPPLUS },
-	{ KEY_SEMICOLON, KEY_KPMINUS },
-	{ KEY_P,        KEY_KPASTERISK },
-	{ KEY_MINUS,    KEY_KPEQUAL },
-	{ KEY_0,        KEY_KPSLASH },
-	{ KEY_F6,       KEY_NUMLOCK },
-	{ KEY_KPENTER,  KEY_KPENTER },
-	{ KEY_BACKSPACE, KEY_BACKSPACE },
-	{ }
-};
-
-static struct hidinput_key_translation apple_iso_keyboard[] = {
-	{ KEY_GRAVE,    KEY_102ND },
-	{ KEY_102ND,    KEY_GRAVE },
-	{ }
-};
-
-static struct hidinput_key_translation *find_translation(struct hidinput_key_translation *table, u16 from)
-{
-	struct hidinput_key_translation *trans;
-
-	/* Look for the translation */
-	for (trans = table; trans->from; trans++)
-		if (trans->from == from)
-			return trans;
-
-	return NULL;
-}
-
-int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
-		struct hid_usage *usage, __s32 value)
-{
-	struct hidinput_key_translation *trans;
-
-	if (usage->code == KEY_FN) {
-		if (value) hid->quirks |=  HID_QUIRK_APPLE_FN_ON;
-		else       hid->quirks &= ~HID_QUIRK_APPLE_FN_ON;
-
-		input_event(input, usage->type, usage->code, value);
-
-		return 1;
-	}
-
-	if (hid_apple_fnmode) {
-		int do_translate;
-
-		trans = find_translation((hid->product < 0x220 ||
-					  hid->product >= 0x300) ?
-					 powerbook_fn_keys : apple_fn_keys,
-					 usage->code);
-		if (trans) {
-			if (test_bit(usage->code, hid->apple_pressed_fn))
-				do_translate = 1;
-			else if (trans->flags & APPLE_FLAG_FKEY)
-				do_translate =
-					(hid_apple_fnmode == 2 &&  (hid->quirks & HID_QUIRK_APPLE_FN_ON)) ||
-					(hid_apple_fnmode == 1 && !(hid->quirks & HID_QUIRK_APPLE_FN_ON));
-			else
-				do_translate = (hid->quirks & HID_QUIRK_APPLE_FN_ON);
-
-			if (do_translate) {
-				if (value)
-					set_bit(usage->code, hid->apple_pressed_fn);
-				else
-					clear_bit(usage->code, hid->apple_pressed_fn);
-
-				input_event(input, usage->type, trans->to, value);
-
-				return 1;
-			}
-		}
-
-		if (hid->quirks & HID_QUIRK_APPLE_NUMLOCK_EMULATION && (
-				test_bit(usage->code, hid->pb_pressed_numlock) ||
-				test_bit(LED_NUML, input->led))) {
-			trans = find_translation(powerbook_numlock_keys, usage->code);
-
-			if (trans) {
-				if (value)
-					set_bit(usage->code, hid->pb_pressed_numlock);
-				else
-					clear_bit(usage->code, hid->pb_pressed_numlock);
-
-				input_event(input, usage->type, trans->to, value);
-			}
-
-			return 1;
-		}
-	}
-
-	if (hid->quirks & HID_QUIRK_APPLE_ISO_KEYBOARD) {
-		trans = find_translation(apple_iso_keyboard, usage->code);
-		if (trans) {
-			input_event(input, usage->type, trans->to, value);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static void hidinput_apple_setup(struct input_dev *input)
-{
-	struct hidinput_key_translation *trans;
-
-	set_bit(KEY_NUMLOCK, input->keybit);
-
-	/* Enable all needed keys */
-	for (trans = apple_fn_keys; trans->from; trans++)
-		set_bit(trans->to, input->keybit);
-
-	for (trans = powerbook_fn_keys; trans->from; trans++)
-		set_bit(trans->to, input->keybit);
-
-	for (trans = powerbook_numlock_keys; trans->from; trans++)
-		set_bit(trans->to, input->keybit);
-
-	for (trans = apple_iso_keyboard; trans->from; trans++)
-		set_bit(trans->to, input->keybit);
-
-}
-#else
-inline int hidinput_apple_event(struct hid_device *hid,
-				       struct input_dev *input,
-				       struct hid_usage *usage, __s32 value)
-{
-	return 0;
-}
-
-static inline void hidinput_apple_setup(struct input_dev *input)
-{
-}
-#endif
-
 static inline int match_scancode(int code, int scancode)
 {
 	if (scancode == 0)
@@ -696,16 +495,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case HID_UP_CUSTOM: /* Reported on Logitech and Apple USB keyboards */
 
 			set_bit(EV_REP, input->evbit);
-			switch(usage->hid & HID_USAGE) {
-				case 0x003:
-					/* The fn key on Apple USB keyboards */
-					map_key_clear(KEY_FN);
-					hidinput_apple_setup(input);
-					break;
-
-				default:    goto ignore;
-			}
-			break;
+			goto ignore;
 
 		case HID_UP_LOGIVENDOR:
 
@@ -742,15 +532,6 @@ mapped:
 				hidinput, field, usage, &bit, &max) < 0)
 		goto ignore;
 
-	if (device->quirks & HID_QUIRK_MIGHTYMOUSE) {
-		if (usage->hid == HID_GD_Z)
-			map_rel(REL_HWHEEL);
-		else if (usage->code == BTN_1)
-			map_key(BTN_2);
-		else if (usage->code == BTN_2)
-			map_key(BTN_1);
-	}
-
 	if ((device->quirks & (HID_QUIRK_2WHEEL_MOUSE_HACK_7 | HID_QUIRK_2WHEEL_MOUSE_HACK_5 |
 			HID_QUIRK_2WHEEL_MOUSE_HACK_B8)) && (usage->type == EV_REL) &&
 			(usage->code == REL_WHEEL))
diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig
index 18f09104765..177bfa1a3e5 100644
--- a/drivers/hid/usbhid/Kconfig
+++ b/drivers/hid/usbhid/Kconfig
@@ -24,17 +24,6 @@ config USB_HID
 comment "Input core support is needed for USB HID input layer or HIDBP support"
 	depends on USB_HID && INPUT=n
 
-config USB_HIDINPUT_POWERBOOK
-	bool "Enable support for Apple laptop/aluminum USB special keys"
-	default n
-	depends on USB_HID
-	help
-	  Say Y here if you want support for the special keys (Fn, Numlock) on
-	  Apple iBooks, PowerBooks, MacBooks, MacBook Pros and aluminum USB
-	  keyboards.
-
-	  If unsure, say N.
-
 config HID_FF
 	bool "Force feedback support (EXPERIMENTAL)"
 	depends on USB_HID && EXPERIMENTAL
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 9f5e100e95e..97268082073 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -791,10 +791,6 @@ static int usbhid_parse(struct hid_device *hid)
 				quirks |= HID_QUIRK_NOGET;
 	}
 
-	if ((quirks & HID_QUIRK_IGNORE_MOUSE) &&
-		(interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE))
-			return -ENODEV;
-
 	if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) &&
 	    (!interface->desc.bNumEndpoints ||
 	     usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) {
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 466d608ba6a..4c5ee9ecd40 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -49,7 +49,6 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL },
 
 	{ USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM, HID_QUIRK_HIDDEV },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4, HID_QUIRK_HIDDEV | HID_QUIRK_IGNORE_HIDINPUT },
 	{ USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE, HID_QUIRK_HIDDEV | HID_QUIRK_IGNORE_HIDINPUT },
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV, HID_QUIRK_HIDINPUT },
 
@@ -59,8 +58,6 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K, HID_QUIRK_MICROSOFT_KEYS },
 	{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K, HID_QUIRK_MICROSOFT_KEYS },
 
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
-
 	{ USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS },
 	{ USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT },
 
@@ -82,35 +79,6 @@ static const struct hid_blacklist {
 
 	{ USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SMARTJOY_DUAL_PLUS, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
 
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI, HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS, HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD | HID_QUIRK_IGNORE_MOUSE},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS, HID_QUIRK_APPLE_HAS_FN  | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-
 	{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_W7658, HID_QUIRK_RESET_LEDS },
 
 	{ 0, 0 }
@@ -129,8 +97,6 @@ static const struct hid_rdesc_blacklist {
 
 	{ USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E, HID_QUIRK_RDESC_BUTTON_CONSUMER },
 
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS, HID_QUIRK_RDESC_MACBOOK_JIS },
-
 	{ USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE, HID_QUIRK_RDESC_PETALYNX },
 
 	{ USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE, HID_QUIRK_RDESC_SAMSUNG_REMOTE },
@@ -461,18 +427,6 @@ static void usbhid_fixup_cypress_descriptor(unsigned char *rdesc, int rsize)
 		printk(KERN_INFO "Fixing up Cypress report descriptor\n");
 }
 
-/*
- * MacBook JIS keyboard has wrong logical maximum
- */
-static void usbhid_fixup_macbook_descriptor(unsigned char *rdesc, int rsize)
-{
-	if (rsize >= 60 && rdesc[53] == 0x65
-			&& rdesc[59] == 0x65) {
-		printk(KERN_INFO "Fixing up MacBook JIS keyboard report descriptor\n");
-		rdesc[53] = rdesc[59] = 0xe7;
-	}
-}
-
 static void usbhid_fixup_button_consumer_descriptor(unsigned char *rdesc, int rsize)
 {
 	if (rsize >= 30 && rdesc[29] == 0x05
@@ -515,9 +469,6 @@ static void __usbhid_fixup_report_descriptor(__u32 quirks, char *rdesc, unsigned
 	if (quirks & HID_QUIRK_RDESC_PETALYNX)
 		usbhid_fixup_petalynx_descriptor(rdesc, rsize);
 
-	if (quirks & HID_QUIRK_RDESC_MACBOOK_JIS)
-		usbhid_fixup_macbook_descriptor(rdesc, rsize);
-
 	if (quirks & HID_QUIRK_RDESC_BUTTON_CONSUMER)
 		usbhid_fixup_button_consumer_descriptor(rdesc, rsize);
 
diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index 35689ef172c..a89646a3c1f 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -31,6 +31,11 @@
 #include <linux/usb/input.h>
 #include <linux/hid.h>
 
+/* for apple IDs */
+#ifdef CONFIG_USB_HID_MODULE
+#include "../hid-ids.h"
+#endif
+
 /*
  * Version Information
  */
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 0644fd33b98..75cc1531dd8 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -264,13 +264,7 @@ struct hid_item {
 #define HID_QUIRK_2WHEEL_MOUSE_HACK_7		0x00000080
 #define HID_QUIRK_2WHEEL_MOUSE_HACK_5		0x00000100
 #define HID_QUIRK_2WHEEL_MOUSE_HACK_ON		0x00000200
-#define HID_QUIRK_MIGHTYMOUSE			0x00000400
-#define HID_QUIRK_APPLE_HAS_FN			0x00000800
-#define HID_QUIRK_APPLE_FN_ON			0x00001000
-#define HID_QUIRK_INVERT_HWHEEL			0x00002000
-#define HID_QUIRK_APPLE_ISO_KEYBOARD		0x00004000
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
-#define HID_QUIRK_IGNORE_MOUSE			0x00020000
 #define HID_QUIRK_SONY_PS3_CONTROLLER		0x00040000
 #define HID_QUIRK_RESET_LEDS			0x00100000
 #define HID_QUIRK_HIDINPUT			0x00200000
@@ -279,7 +273,6 @@ struct hid_item {
 #define HID_QUIRK_HWHEEL_WHEEL_INVERT		0x04000000
 #define HID_QUIRK_MICROSOFT_KEYS		0x08000000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
-#define HID_QUIRK_APPLE_NUMLOCK_EMULATION	0x20000000
 
 /*
  * Separate quirks for runtime report descriptor fixup
@@ -288,7 +281,6 @@ struct hid_item {
 #define HID_QUIRK_RDESC_CYMOTION		0x00000001
 #define HID_QUIRK_RDESC_SWAPPED_MIN_MAX		0x00000004
 #define HID_QUIRK_RDESC_PETALYNX		0x00000008
-#define HID_QUIRK_RDESC_MACBOOK_JIS		0x00000010
 #define HID_QUIRK_RDESC_BUTTON_CONSUMER		0x00000020
 #define HID_QUIRK_RDESC_SAMSUNG_REMOTE		0x00000040
 #define HID_QUIRK_RDESC_MICROSOFT_RECV_1028	0x00000080
@@ -475,10 +467,6 @@ struct hid_device {							/* device report descriptor */
 
 	/* handler for raw output data, used by hidraw */
 	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
-#ifdef CONFIG_USB_HIDINPUT_POWERBOOK
-	unsigned long apple_pressed_fn[BITS_TO_LONGS(KEY_CNT)];
-	unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)];
-#endif
 };
 
 static inline void *hid_get_drvdata(struct hid_device *hdev)
@@ -652,7 +640,6 @@ int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int
 int hidinput_mapping_quirks(struct hid_usage *, struct hid_input *,
 		unsigned long **, int *);
 int hidinput_event_quirks(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
-int hidinput_apple_event(struct hid_device *, struct input_dev *, struct hid_usage *, __s32);
 void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 4ae3207e8a9..f3d830747b9 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -683,27 +683,6 @@ static void hidp_close(struct hid_device *hid)
 {
 }
 
-static const struct {
-	__u16 idVendor;
-	__u16 idProduct;
-	unsigned quirks;
-} hidp_blacklist[] = {
-	/* Apple wireless Mighty Mouse */
-	{ 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
-
-	{ }	/* Terminating entry */
-};
-
-static void hidp_setup_quirks(struct hid_device *hid)
-{
-	unsigned int n;
-
-	for (n = 0; hidp_blacklist[n].idVendor; n++)
-		if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) &&
-				hidp_blacklist[n].idProduct == le16_to_cpu(hid->product))
-			hid->quirks = hidp_blacklist[n].quirks;
-}
-
 static int hidp_parse(struct hid_device *hid)
 {
 	struct hidp_session *session = hid->driver_data;
@@ -729,7 +708,6 @@ static int hidp_parse(struct hid_device *hid)
 
 	session->req = NULL;
 
-	hidp_setup_quirks(hid);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 93c10132a7ac160df3175b53f7ee857625412165 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Fri, 27 Jun 2008 00:04:24 +0200
Subject: HID: move connect quirks

Move connecting from usbhid to the hid layer and fix also hidp in
that manner.
This removes all the ignore/force hidinput/hiddev connecting quirks.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-a4tech.c      |  2 +-
 drivers/hid/hid-apple.c       | 13 ++++----
 drivers/hid/hid-belkin.c      |  6 ++--
 drivers/hid/hid-core.c        | 76 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/hid/hid-cypress.c     |  2 +-
 drivers/hid/hid-dell.c        |  2 +-
 drivers/hid/hid-input.c       | 23 ++++++-------
 drivers/hid/hid-logitech.c    |  2 +-
 drivers/hid/hid-microsoft.c   |  5 ++-
 drivers/hid/hid-petalynx.c    |  2 +-
 drivers/hid/hid-samsung.c     |  5 ++-
 drivers/hid/hid-sony.c        |  5 ++-
 drivers/hid/usbhid/hid-core.c | 76 ++++---------------------------------------
 drivers/hid/usbhid/hiddev.c   | 20 +++++++-----
 include/linux/hid.h           | 37 +++++++++++++++++----
 include/linux/hiddev.h        |  6 ++--
 net/bluetooth/hidp/core.c     |  3 --
 17 files changed, 159 insertions(+), 126 deletions(-)

(limited to 'net')

diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c
index 26e16083a1f..ebca00e6c10 100644
--- a/drivers/hid/hid-a4tech.c
+++ b/drivers/hid/hid-a4tech.c
@@ -107,7 +107,7 @@ static int a4_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 2a68661fcea..f0b177844cf 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -309,6 +309,7 @@ static int apple_probe(struct hid_device *hdev,
 {
 	unsigned long quirks = id->driver_data;
 	struct apple_sc *asc;
+	unsigned int connect_mask = HID_CONNECT_DEFAULT;
 	int ret;
 
 	/* return something else or move to hid layer? device will reside
@@ -328,18 +329,18 @@ static int apple_probe(struct hid_device *hdev,
 
 	hid_set_drvdata(hdev, asc);
 
-	if (quirks & APPLE_HIDDEV)
-		hdev->quirks |= HID_QUIRK_HIDDEV;
-	if (quirks & APPLE_IGNORE_HIDINPUT)
-		hdev->quirks |= HID_QUIRK_IGNORE_HIDINPUT;
-
 	ret = hid_parse(hdev);
 	if (ret) {
 		dev_err(&hdev->dev, "parse failed\n");
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	if (quirks & APPLE_HIDDEV)
+		connect_mask |= HID_CONNECT_HIDDEV_FORCE;
+	if (quirks & APPLE_IGNORE_HIDINPUT)
+		connect_mask &= ~HID_CONNECT_HIDINPUT;
+
+	ret = hid_hw_start(hdev, connect_mask);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-belkin.c b/drivers/hid/hid-belkin.c
index 050b9892d7e..12c8a9ba6ed 100644
--- a/drivers/hid/hid-belkin.c
+++ b/drivers/hid/hid-belkin.c
@@ -54,16 +54,14 @@ static int belkin_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	hid_set_drvdata(hdev, (void *)quirks);
 
-	if (quirks & BELKIN_HIDDEV)
-		hdev->quirks |= HID_QUIRK_HIDDEV;
-
 	ret = hid_parse(hdev);
 	if (ret) {
 		dev_err(&hdev->dev, "parse failed\n");
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT |
+		((quirks & BELKIN_HIDDEV) ? HID_CONNECT_HIDDEV_FORCE : 0));
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ea5f8bc900e..699547ce257 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1113,6 +1113,80 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
 }
 EXPORT_SYMBOL_GPL(hid_input_report);
 
+int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
+{
+	static const char *types[] = { "Device", "Pointer", "Mouse", "Device",
+		"Joystick", "Gamepad", "Keyboard", "Keypad",
+		"Multi-Axis Controller"
+	};
+	const char *type, *bus;
+	char buf[64];
+	unsigned int i;
+	int len;
+
+	if (hdev->bus != BUS_USB)
+		connect_mask &= ~HID_CONNECT_HIDDEV;
+
+	if ((connect_mask & HID_CONNECT_HIDINPUT) && !hidinput_connect(hdev,
+				connect_mask & HID_CONNECT_HIDINPUT_FORCE))
+		hdev->claimed |= HID_CLAIMED_INPUT;
+	if ((connect_mask & HID_CONNECT_HIDDEV) && hdev->hiddev_connect &&
+			!hdev->hiddev_connect(hdev,
+				connect_mask & HID_CONNECT_HIDDEV_FORCE))
+		hdev->claimed |= HID_CLAIMED_HIDDEV;
+	if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev))
+		hdev->claimed |= HID_CLAIMED_HIDRAW;
+
+	if (!hdev->claimed) {
+		dev_err(&hdev->dev, "claimed by neither input, hiddev nor "
+				"hidraw\n");
+		return -ENODEV;
+	}
+
+	if ((hdev->claimed & HID_CLAIMED_INPUT) &&
+			(connect_mask & HID_CONNECT_FF) && hdev->ff_init)
+		hdev->ff_init(hdev);
+
+	len = 0;
+	if (hdev->claimed & HID_CLAIMED_INPUT)
+		len += sprintf(buf + len, "input");
+	if (hdev->claimed & HID_CLAIMED_HIDDEV)
+		len += sprintf(buf + len, "%shiddev%d", len ? "," : "",
+				hdev->minor);
+	if (hdev->claimed & HID_CLAIMED_HIDRAW)
+		len += sprintf(buf + len, "%shidraw%d", len ? "," : "",
+				((struct hidraw *)hdev->hidraw)->minor);
+
+	type = "Device";
+	for (i = 0; i < hdev->maxcollection; i++) {
+		struct hid_collection *col = &hdev->collection[i];
+		if (col->type == HID_COLLECTION_APPLICATION &&
+		   (col->usage & HID_USAGE_PAGE) == HID_UP_GENDESK &&
+		   (col->usage & 0xffff) < ARRAY_SIZE(types)) {
+			type = types[col->usage & 0xffff];
+			break;
+		}
+	}
+
+	switch (hdev->bus) {
+	case BUS_USB:
+		bus = "USB";
+		break;
+	case BUS_BLUETOOTH:
+		bus = "BLUETOOTH";
+		break;
+	default:
+		bus = "<UNKNOWN>";
+	}
+
+	dev_info(&hdev->dev, "%s: %s HID v%x.%02x %s [%s] on %s\n",
+			buf, bus, hdev->version >> 8, hdev->version & 0xff,
+			type, hdev->name, hdev->phys);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hid_connect);
+
 static bool hid_match_one_id(struct hid_device *hdev,
 		const struct hid_device_id *id)
 {
@@ -1238,7 +1312,7 @@ static int hid_device_probe(struct device *dev)
 		} else { /* default probe */
 			ret = hid_parse(hdev);
 			if (!ret)
-				ret = hid_hw_start(hdev);
+				ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 		}
 		if (ret)
 			hdev->driver = NULL;
diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index a1e13f15f0a..5d69d27b935 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -110,7 +110,7 @@ static int cp_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-dell.c b/drivers/hid/hid-dell.c
index 5d1d54cfa87..788faa6b6ca 100644
--- a/drivers/hid/hid-dell.c
+++ b/drivers/hid/hid-dell.c
@@ -34,7 +34,7 @@ static int dell_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 0a68935c20b..7f183b7147e 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -700,7 +700,7 @@ static void hidinput_close(struct input_dev *dev)
  * Read all reports and initialize the absolute field values.
  */
 
-int hidinput_connect(struct hid_device *hid)
+int hidinput_connect(struct hid_device *hid, unsigned int force)
 {
 	struct hid_report *report;
 	struct hid_input *hidinput = NULL;
@@ -708,19 +708,20 @@ int hidinput_connect(struct hid_device *hid)
 	int i, j, k;
 	int max_report_type = HID_OUTPUT_REPORT;
 
-	if (hid->quirks & HID_QUIRK_IGNORE_HIDINPUT)
-		return -1;
-
 	INIT_LIST_HEAD(&hid->inputs);
 
-	for (i = 0; i < hid->maxcollection; i++)
-		if (hid->collection[i].type == HID_COLLECTION_APPLICATION ||
-		    hid->collection[i].type == HID_COLLECTION_PHYSICAL)
-			if (IS_INPUT_APPLICATION(hid->collection[i].usage))
-				break;
+	if (!force) {
+		for (i = 0; i < hid->maxcollection; i++) {
+			struct hid_collection *col = &hid->collection[i];
+			if (col->type == HID_COLLECTION_APPLICATION ||
+					col->type == HID_COLLECTION_PHYSICAL)
+				if (IS_INPUT_APPLICATION(col->usage))
+					break;
+		}
 
-	if (i == hid->maxcollection && (hid->quirks & HID_QUIRK_HIDINPUT) == 0)
-		return -1;
+		if (i == hid->maxcollection)
+			return -1;
+	}
 
 	if (hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS)
 		max_report_type = HID_INPUT_REPORT;
diff --git a/drivers/hid/hid-logitech.c b/drivers/hid/hid-logitech.c
index b2aaebe1ac0..732258241c0 100644
--- a/drivers/hid/hid-logitech.c
+++ b/drivers/hid/hid-logitech.c
@@ -237,7 +237,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index 1fa8b813d44..d718b1607d0 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -154,8 +154,6 @@ static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	hid_set_drvdata(hdev, (void *)quirks);
 
-	if (quirks & MS_HIDINPUT)
-		hdev->quirks |= HID_QUIRK_HIDINPUT;
 	if (quirks & MS_NOGET)
 		hdev->quirks |= HID_QUIRK_NOGET;
 
@@ -165,7 +163,8 @@ static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | ((quirks & MS_HIDINPUT) ?
+				HID_CONNECT_HIDINPUT_FORCE : 0));
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c
index 4109244f1d7..10945fe12d5 100644
--- a/drivers/hid/hid-petalynx.c
+++ b/drivers/hid/hid-petalynx.c
@@ -80,7 +80,7 @@ static int pl_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c
index 8771bfae02f..15f3c049245 100644
--- a/drivers/hid/hid-samsung.c
+++ b/drivers/hid/hid-samsung.c
@@ -52,15 +52,14 @@ static int samsung_probe(struct hid_device *hdev,
 {
 	int ret;
 
-	hdev->quirks |= HID_QUIRK_HIDDEV | HID_QUIRK_IGNORE_HIDINPUT;
-
 	ret = hid_parse(hdev);
 	if (ret) {
 		dev_err(&hdev->dev, "parse failed\n");
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, (HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDINPUT) |
+			HID_CONNECT_HIDDEV_FORCE);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 97668c68f0a..3af8095a7de 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -57,15 +57,14 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	int ret;
 
-	hdev->quirks |= HID_QUIRK_HIDDEV;
-
 	ret = hid_parse(hdev);
 	if (ret) {
 		dev_err(&hdev->dev, "parse failed\n");
 		goto err_free;
 	}
 
-	ret = hid_hw_start(hdev);
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT |
+			HID_CONNECT_HIDDEV_FORCE);
 	if (ret) {
 		dev_err(&hdev->dev, "hw start failed\n");
 		goto err_free;
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index b41d0110a75..0513b60728d 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -44,8 +44,6 @@
 #define DRIVER_DESC "USB HID core driver"
 #define DRIVER_LICENSE "GPL"
 
-static char *hid_types[] = {"Device", "Pointer", "Mouse", "Device", "Joystick",
-				"Gamepad", "Keyboard", "Keypad", "Multi-Axis Controller"};
 /*
  * Module parameters.
  */
@@ -670,70 +668,6 @@ static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid)
 	usb_buffer_free(dev, usbhid->bufsize, usbhid->ctrlbuf, usbhid->ctrlbuf_dma);
 }
 
-static int usbhid_start_finish(struct hid_device *hid)
-{
-	struct usb_interface *intf = to_usb_interface(hid->dev.parent);
-	char path[64], *type;
-	unsigned int i;
-
-	usbhid_init_reports(hid);
-	hid_dump_device(hid);
-	if (hid->quirks & HID_QUIRK_RESET_LEDS)
-		usbhid_set_leds(hid);
-
-	if (!hidinput_connect(hid))
-		hid->claimed |= HID_CLAIMED_INPUT;
-	if (!hiddev_connect(hid))
-		hid->claimed |= HID_CLAIMED_HIDDEV;
-	if (!hidraw_connect(hid))
-		hid->claimed |= HID_CLAIMED_HIDRAW;
-
-	if (!hid->claimed) {
-		printk(KERN_ERR "HID device claimed by neither input, hiddev "
-				"nor hidraw\n");
-		return -ENODEV;
-	}
-
-	if ((hid->claimed & HID_CLAIMED_INPUT))
-		hid_ff_init(hid);
-
-	printk(KERN_INFO);
-
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		printk("input");
-	if ((hid->claimed & HID_CLAIMED_INPUT) &&
-			((hid->claimed & HID_CLAIMED_HIDDEV) ||
-				hid->claimed & HID_CLAIMED_HIDRAW))
-		printk(",");
-	if (hid->claimed & HID_CLAIMED_HIDDEV)
-		printk("hiddev%d", hid->minor);
-	if ((hid->claimed & HID_CLAIMED_INPUT) &&
-			(hid->claimed & HID_CLAIMED_HIDDEV) &&
-			(hid->claimed & HID_CLAIMED_HIDRAW))
-		printk(",");
-	if (hid->claimed & HID_CLAIMED_HIDRAW)
-		printk("hidraw%d", ((struct hidraw *)hid->hidraw)->minor);
-
-	type = "Device";
-	for (i = 0; i < hid->maxcollection; i++) {
-		if (hid->collection[i].type == HID_COLLECTION_APPLICATION &&
-		    (hid->collection[i].usage & HID_USAGE_PAGE) ==
-						HID_UP_GENDESK &&
-		    (hid->collection[i].usage & 0xffff) <
-						ARRAY_SIZE(hid_types)) {
-			type = hid_types[hid->collection[i].usage & 0xffff];
-			break;
-		}
-	}
-
-	usb_make_path(interface_to_usbdev(intf), path, 63);
-
-	printk(": USB HID v%x.%02x %s [%s] on %s\n",
-		hid->version >> 8, hid->version & 0xff, type, hid->name, path);
-
-	return 0;
-}
-
 static int usbhid_parse(struct hid_device *hid)
 {
 	struct usb_interface *intf = to_usb_interface(hid->dev.parent);
@@ -923,9 +857,11 @@ static int usbhid_start(struct hid_device *hid)
 	usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma;
 	usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP);
 
-	ret = usbhid_start_finish(hid);
-	if (ret)
-		goto fail;
+	usbhid_init_reports(hid);
+	hid_dump_device(hid);
+
+	if (hid->quirks & HID_QUIRK_RESET_LEDS)
+		usbhid_set_leds(hid);
 
 	return 0;
 
@@ -1000,7 +936,9 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	usb_set_intfdata(intf, hid);
 	hid->ll_driver = &usb_hid_driver;
 	hid->hid_output_raw_report = usbhid_output_raw_report;
+	hid->ff_init = hid_ff_init;
 #ifdef CONFIG_USB_HIDDEV
+	hid->hiddev_connect = hiddev_connect;
 	hid->hiddev_hid_event = hiddev_hid_event;
 	hid->hiddev_report_event = hiddev_report_event;
 #endif
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 842e9edb888..babd65dd46a 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -790,21 +790,23 @@ static struct usb_class_driver hiddev_class = {
 /*
  * This is where hid.c calls us to connect a hid device to the hiddev driver
  */
-int hiddev_connect(struct hid_device *hid)
+int hiddev_connect(struct hid_device *hid, unsigned int force)
 {
 	struct hiddev *hiddev;
 	struct usbhid_device *usbhid = hid->driver_data;
-	int i;
 	int retval;
 
-	for (i = 0; i < hid->maxcollection; i++)
-		if (hid->collection[i].type ==
-		    HID_COLLECTION_APPLICATION &&
-		    !IS_INPUT_APPLICATION(hid->collection[i].usage))
-			break;
+	if (!force) {
+		unsigned int i;
+		for (i = 0; i < hid->maxcollection; i++)
+			if (hid->collection[i].type ==
+			    HID_COLLECTION_APPLICATION &&
+			    !IS_INPUT_APPLICATION(hid->collection[i].usage))
+				break;
 
-	if (i == hid->maxcollection && (hid->quirks & HID_QUIRK_HIDDEV) == 0)
-		return -1;
+		if (i == hid->maxcollection)
+			return -1;
+	}
 
 	if (!(hiddev = kzalloc(sizeof(struct hiddev), GFP_KERNEL)))
 		return -1;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 43aa51a7fa9..043209f7bfc 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -246,6 +246,19 @@ struct hid_item {
 #define HID_OUTPUT_REPORT	1
 #define HID_FEATURE_REPORT	2
 
+/*
+ * HID connect requests
+ */
+
+#define HID_CONNECT_HIDINPUT		0x01
+#define HID_CONNECT_HIDINPUT_FORCE	0x02
+#define HID_CONNECT_HIDRAW		0x04
+#define HID_CONNECT_HIDDEV		0x08
+#define HID_CONNECT_HIDDEV_FORCE	0x10
+#define HID_CONNECT_FF			0x20
+#define HID_CONNECT_DEFAULT	(HID_CONNECT_HIDINPUT|HID_CONNECT_HIDRAW| \
+		HID_CONNECT_HIDDEV|HID_CONNECT_FF)
+
 /*
  * HID device quirks.
  */
@@ -258,13 +271,10 @@ struct hid_item {
 #define HID_QUIRK_INVERT			0x00000001
 #define HID_QUIRK_NOTOUCH			0x00000002
 #define HID_QUIRK_NOGET				0x00000008
-#define HID_QUIRK_HIDDEV			0x00000010
 #define HID_QUIRK_BADPAD			0x00000020
 #define HID_QUIRK_MULTI_INPUT			0x00000040
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_RESET_LEDS			0x00100000
-#define HID_QUIRK_HIDINPUT			0x00200000
-#define HID_QUIRK_IGNORE_HIDINPUT		0x01000000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
 
 /*
@@ -439,7 +449,11 @@ struct hid_device {							/* device report descriptor */
 
 	void *driver_data;
 
+	/* temporary hid_ff handling (until moved to the drivers) */
+	int (*ff_init)(struct hid_device *);
+
 	/* hiddev event handler */
+	int (*hiddev_connect)(struct hid_device *, unsigned int);
 	void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
 				  struct hid_usage *, __s32);
 	void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
@@ -610,7 +624,7 @@ extern void hid_unregister_driver(struct hid_driver *);
 
 extern void hidinput_hid_event(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
 extern void hidinput_report_event(struct hid_device *hid, struct hid_report *report);
-extern int hidinput_connect(struct hid_device *);
+extern int hidinput_connect(struct hid_device *hid, unsigned int force);
 extern void hidinput_disconnect(struct hid_device *);
 
 int hid_set_field(struct hid_field *, unsigned, __s32);
@@ -619,6 +633,7 @@ int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int
 void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
+int hid_connect(struct hid_device *hid, unsigned int connect_mask);
 
 /**
  * hid_map_usage - map usage input bits
@@ -700,14 +715,22 @@ static inline int __must_check hid_parse(struct hid_device *hdev)
  * hid_hw_start - start underlaying HW
  *
  * @hdev: hid device
+ * @connect_mask: which outputs to connect, see HID_CONNECT_*
  *
  * Call this in probe function *after* hid_parse. This will setup HW buffers
  * and start the device (if not deffered to device open). hid_hw_stop must be
  * called if this was successfull.
  */
-static inline int __must_check hid_hw_start(struct hid_device *hdev)
+static inline int __must_check hid_hw_start(struct hid_device *hdev,
+		unsigned int connect_mask)
 {
-	return hdev->ll_driver->start(hdev);
+	int ret = hdev->ll_driver->start(hdev);
+	if (ret || !connect_mask)
+		return ret;
+	ret = hid_connect(hdev, connect_mask);
+	if (ret)
+		hdev->ll_driver->stop(hdev);
+	return ret;
 }
 
 /**
@@ -749,7 +772,7 @@ static inline int hid_pidff_init(struct hid_device *hid) { return -ENODEV; }
 #endif
 
 #else
-static inline int hid_ff_init(struct hid_device *hid) { return -1; }
+#define hid_ff_init	NULL
 #endif
 
 #ifdef CONFIG_HID_DEBUG
diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h
index a416b904ba9..6ae77b3468f 100644
--- a/include/linux/hiddev.h
+++ b/include/linux/hiddev.h
@@ -217,7 +217,7 @@ struct hid_field;
 struct hid_report;
 
 #ifdef CONFIG_USB_HIDDEV
-int hiddev_connect(struct hid_device *);
+int hiddev_connect(struct hid_device *hid, unsigned int force);
 void hiddev_disconnect(struct hid_device *);
 void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
 		      struct hid_usage *usage, __s32 value);
@@ -225,7 +225,9 @@ void hiddev_report_event(struct hid_device *hid, struct hid_report *report);
 int __init hiddev_init(void);
 void hiddev_exit(void);
 #else
-static inline int hiddev_connect(struct hid_device *hid) { return -1; }
+static inline int hiddev_connect(struct hid_device *hid,
+		unsigned int force)
+{ return -1; }
 static inline void hiddev_disconnect(struct hid_device *hid) { }
 static inline void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
 		      struct hid_usage *usage, __s32 value) { }
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f3d830747b9..acdeab3d980 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -724,9 +724,6 @@ static int hidp_start(struct hid_device *hid)
 			report_list, list)
 		hidp_send_report(session, report);
 
-	if (hidinput_connect(hid) == 0)
-		hid->claimed |= HID_CLAIMED_INPUT;
-
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2